xref: /onnv-gate/usr/src/uts/common/io/bpf/bpf.c (revision 10639:368f1335a058)
1*10639SDarren.Reed@Sun.COM /*	$NetBSD: bpf.c,v 1.143 2009/03/11 05:55:22 mrg Exp $	*/
2*10639SDarren.Reed@Sun.COM 
3*10639SDarren.Reed@Sun.COM /*
4*10639SDarren.Reed@Sun.COM  * Copyright (c) 1990, 1991, 1993
5*10639SDarren.Reed@Sun.COM  *	The Regents of the University of California.  All rights reserved.
6*10639SDarren.Reed@Sun.COM  *
7*10639SDarren.Reed@Sun.COM  * This code is derived from the Stanford/CMU enet packet filter,
8*10639SDarren.Reed@Sun.COM  * (net/enet.c) distributed as part of 4.3BSD, and code contributed
9*10639SDarren.Reed@Sun.COM  * to Berkeley by Steven McCanne and Van Jacobson both of Lawrence
10*10639SDarren.Reed@Sun.COM  * Berkeley Laboratory.
11*10639SDarren.Reed@Sun.COM  *
12*10639SDarren.Reed@Sun.COM  * Redistribution and use in source and binary forms, with or without
13*10639SDarren.Reed@Sun.COM  * modification, are permitted provided that the following conditions
14*10639SDarren.Reed@Sun.COM  * are met:
15*10639SDarren.Reed@Sun.COM  * 1. Redistributions of source code must retain the above copyright
16*10639SDarren.Reed@Sun.COM  *    notice, this list of conditions and the following disclaimer.
17*10639SDarren.Reed@Sun.COM  * 2. Redistributions in binary form must reproduce the above copyright
18*10639SDarren.Reed@Sun.COM  *    notice, this list of conditions and the following disclaimer in the
19*10639SDarren.Reed@Sun.COM  *    documentation and/or other materials provided with the distribution.
20*10639SDarren.Reed@Sun.COM  * 3. Neither the name of the University nor the names of its contributors
21*10639SDarren.Reed@Sun.COM  *    may be used to endorse or promote products derived from this software
22*10639SDarren.Reed@Sun.COM  *    without specific prior written permission.
23*10639SDarren.Reed@Sun.COM  *
24*10639SDarren.Reed@Sun.COM  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25*10639SDarren.Reed@Sun.COM  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26*10639SDarren.Reed@Sun.COM  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27*10639SDarren.Reed@Sun.COM  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28*10639SDarren.Reed@Sun.COM  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29*10639SDarren.Reed@Sun.COM  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30*10639SDarren.Reed@Sun.COM  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31*10639SDarren.Reed@Sun.COM  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32*10639SDarren.Reed@Sun.COM  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33*10639SDarren.Reed@Sun.COM  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34*10639SDarren.Reed@Sun.COM  * SUCH DAMAGE.
35*10639SDarren.Reed@Sun.COM  *
36*10639SDarren.Reed@Sun.COM  *	@(#)bpf.c	8.4 (Berkeley) 1/9/95
37*10639SDarren.Reed@Sun.COM  * static char rcsid[] =
38*10639SDarren.Reed@Sun.COM  * "Header: bpf.c,v 1.67 96/09/26 22:00:52 leres Exp ";
39*10639SDarren.Reed@Sun.COM  */
40*10639SDarren.Reed@Sun.COM /*
41*10639SDarren.Reed@Sun.COM  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
42*10639SDarren.Reed@Sun.COM  * Use is subject to license terms.
43*10639SDarren.Reed@Sun.COM  */
44*10639SDarren.Reed@Sun.COM 
45*10639SDarren.Reed@Sun.COM /*
46*10639SDarren.Reed@Sun.COM  * The BPF implements the following access controls for zones attempting
47*10639SDarren.Reed@Sun.COM  * to read and write data. Writing of data requires that the net_rawaccess
48*10639SDarren.Reed@Sun.COM  * privilege is held whilst reading data requires either net_rawaccess or
49*10639SDarren.Reed@Sun.COM  * net_observerability.
50*10639SDarren.Reed@Sun.COM  *
51*10639SDarren.Reed@Sun.COM  *                              | Shared |  Exclusive |   Global
52*10639SDarren.Reed@Sun.COM  * -----------------------------+--------+------------+------------+
53*10639SDarren.Reed@Sun.COM  * DLT_IPNET in local zone      |  Read  |    Read    |    Read    |
54*10639SDarren.Reed@Sun.COM  * -----------------------------+--------+------------+------------+
55*10639SDarren.Reed@Sun.COM  * Raw access to local zone NIC |  None  | Read/Write | Read/Write |
56*10639SDarren.Reed@Sun.COM  * -----------------------------+--------+------------+------------+
57*10639SDarren.Reed@Sun.COM  * Raw access to all NICs       |  None  |    None    | Read/Write |
58*10639SDarren.Reed@Sun.COM  * -----------------------------+--------+------------+------------+
59*10639SDarren.Reed@Sun.COM  *
60*10639SDarren.Reed@Sun.COM  * The BPF driver is written as a cloning driver: each call to bpfopen()
61*10639SDarren.Reed@Sun.COM  * allocates a new minor number. This provides BPF with a 1:1 relationship
62*10639SDarren.Reed@Sun.COM  * between open's and close's. There is some amount of "descriptor state"
63*10639SDarren.Reed@Sun.COM  * that is kept per open. Pointers to this data are stored in a hash table
64*10639SDarren.Reed@Sun.COM  * (bpf_hash) that is index'd by the minor device number for each open file.
65*10639SDarren.Reed@Sun.COM  */
66*10639SDarren.Reed@Sun.COM #include <sys/param.h>
67*10639SDarren.Reed@Sun.COM #include <sys/systm.h>
68*10639SDarren.Reed@Sun.COM #include <sys/time.h>
69*10639SDarren.Reed@Sun.COM #include <sys/ioctl.h>
70*10639SDarren.Reed@Sun.COM #include <sys/queue.h>
71*10639SDarren.Reed@Sun.COM #include <sys/filio.h>
72*10639SDarren.Reed@Sun.COM #include <sys/policy.h>
73*10639SDarren.Reed@Sun.COM #include <sys/cmn_err.h>
74*10639SDarren.Reed@Sun.COM #include <sys/uio.h>
75*10639SDarren.Reed@Sun.COM #include <sys/file.h>
76*10639SDarren.Reed@Sun.COM #include <sys/sysmacros.h>
77*10639SDarren.Reed@Sun.COM #include <sys/zone.h>
78*10639SDarren.Reed@Sun.COM 
79*10639SDarren.Reed@Sun.COM #include <sys/socket.h>
80*10639SDarren.Reed@Sun.COM #include <sys/errno.h>
81*10639SDarren.Reed@Sun.COM #include <sys/poll.h>
82*10639SDarren.Reed@Sun.COM #include <sys/dlpi.h>
83*10639SDarren.Reed@Sun.COM #include <sys/neti.h>
84*10639SDarren.Reed@Sun.COM 
85*10639SDarren.Reed@Sun.COM #include <net/if.h>
86*10639SDarren.Reed@Sun.COM 
87*10639SDarren.Reed@Sun.COM #include <net/bpf.h>
88*10639SDarren.Reed@Sun.COM #include <net/bpfdesc.h>
89*10639SDarren.Reed@Sun.COM #include <net/dlt.h>
90*10639SDarren.Reed@Sun.COM 
91*10639SDarren.Reed@Sun.COM #include <netinet/in.h>
92*10639SDarren.Reed@Sun.COM #include <sys/mac.h>
93*10639SDarren.Reed@Sun.COM #include <sys/mac_client.h>
94*10639SDarren.Reed@Sun.COM #include <sys/mac_impl.h>
95*10639SDarren.Reed@Sun.COM #include <sys/time_std_impl.h>
96*10639SDarren.Reed@Sun.COM #include <sys/hook.h>
97*10639SDarren.Reed@Sun.COM #include <sys/hook_event.h>
98*10639SDarren.Reed@Sun.COM 
99*10639SDarren.Reed@Sun.COM 
100*10639SDarren.Reed@Sun.COM #define	mtod(_v, _t)	(_t)((_v)->b_rptr)
101*10639SDarren.Reed@Sun.COM #define	M_LEN(_m)	((_m)->b_wptr - (_m)->b_rptr)
102*10639SDarren.Reed@Sun.COM 
103*10639SDarren.Reed@Sun.COM /*
104*10639SDarren.Reed@Sun.COM  * 4096 is too small for FDDI frames. 8192 is too small for gigabit Ethernet
105*10639SDarren.Reed@Sun.COM  * jumbos (circa 9k), ATM, or Intel gig/10gig ethernet jumbos (16k).
106*10639SDarren.Reed@Sun.COM  */
107*10639SDarren.Reed@Sun.COM #define	BPF_BUFSIZE (32 * 1024)
108*10639SDarren.Reed@Sun.COM 
109*10639SDarren.Reed@Sun.COM typedef void *(*cp_fn_t)(void *, const void *, size_t);
110*10639SDarren.Reed@Sun.COM 
111*10639SDarren.Reed@Sun.COM /*
112*10639SDarren.Reed@Sun.COM  * The default read buffer size, and limit for BIOCSBLEN.
113*10639SDarren.Reed@Sun.COM  */
114*10639SDarren.Reed@Sun.COM int bpf_bufsize = BPF_BUFSIZE;
115*10639SDarren.Reed@Sun.COM int bpf_maxbufsize = (16 * 1024 * 1024);
116*10639SDarren.Reed@Sun.COM int bpf_debug = 0;
117*10639SDarren.Reed@Sun.COM mod_hash_t *bpf_hash = NULL;
118*10639SDarren.Reed@Sun.COM 
119*10639SDarren.Reed@Sun.COM /*
120*10639SDarren.Reed@Sun.COM  * Use a mutex to avoid a race condition between gathering the stats/peers
121*10639SDarren.Reed@Sun.COM  * and opening/closing the device.
122*10639SDarren.Reed@Sun.COM  */
123*10639SDarren.Reed@Sun.COM static kcondvar_t bpf_dlt_waiter;
124*10639SDarren.Reed@Sun.COM static kmutex_t bpf_mtx;
125*10639SDarren.Reed@Sun.COM static bpf_kstats_t ks_stats;
126*10639SDarren.Reed@Sun.COM static bpf_kstats_t bpf_kstats = {
127*10639SDarren.Reed@Sun.COM 	{ "readWait",		KSTAT_DATA_UINT64 },
128*10639SDarren.Reed@Sun.COM 	{ "writeOk",		KSTAT_DATA_UINT64 },
129*10639SDarren.Reed@Sun.COM 	{ "writeError",		KSTAT_DATA_UINT64 },
130*10639SDarren.Reed@Sun.COM 	{ "receive",		KSTAT_DATA_UINT64 },
131*10639SDarren.Reed@Sun.COM 	{ "captured",		KSTAT_DATA_UINT64 },
132*10639SDarren.Reed@Sun.COM 	{ "dropped",		KSTAT_DATA_UINT64 },
133*10639SDarren.Reed@Sun.COM };
134*10639SDarren.Reed@Sun.COM static kstat_t *bpf_ksp;
135*10639SDarren.Reed@Sun.COM 
136*10639SDarren.Reed@Sun.COM /*
137*10639SDarren.Reed@Sun.COM  *  bpf_iflist is the list of interfaces; each corresponds to an ifnet
138*10639SDarren.Reed@Sun.COM  *  bpf_dtab holds the descriptors, indexed by minor device #
139*10639SDarren.Reed@Sun.COM  */
140*10639SDarren.Reed@Sun.COM TAILQ_HEAD(, bpf_if) bpf_iflist;
141*10639SDarren.Reed@Sun.COM LIST_HEAD(, bpf_d) bpf_list;
142*10639SDarren.Reed@Sun.COM 
143*10639SDarren.Reed@Sun.COM static int	bpf_allocbufs(struct bpf_d *);
144*10639SDarren.Reed@Sun.COM static void	bpf_clear_timeout(struct bpf_d *);
145*10639SDarren.Reed@Sun.COM static void	bpf_debug_nic_action(char *, struct bpf_if *);
146*10639SDarren.Reed@Sun.COM static void	bpf_deliver(struct bpf_d *, cp_fn_t,
147*10639SDarren.Reed@Sun.COM 		    void *, uint_t, uint_t, boolean_t);
148*10639SDarren.Reed@Sun.COM static struct bpf_if *
149*10639SDarren.Reed@Sun.COM 		bpf_findif(struct bpf_d *, char *, int);
150*10639SDarren.Reed@Sun.COM static void	bpf_freed(struct bpf_d *);
151*10639SDarren.Reed@Sun.COM static int	bpf_ifname(struct bpf_d *d, char *, int);
152*10639SDarren.Reed@Sun.COM static void	*bpf_mcpy(void *, const void *, size_t);
153*10639SDarren.Reed@Sun.COM static void	bpf_attachd(struct bpf_d *, struct bpf_if *);
154*10639SDarren.Reed@Sun.COM static void	bpf_detachd(struct bpf_d *);
155*10639SDarren.Reed@Sun.COM static int	bpf_setif(struct bpf_d *, char *, int);
156*10639SDarren.Reed@Sun.COM static void	bpf_timed_out(void *);
157*10639SDarren.Reed@Sun.COM static inline void
158*10639SDarren.Reed@Sun.COM 		bpf_wakeup(struct bpf_d *);
159*10639SDarren.Reed@Sun.COM static void	catchpacket(struct bpf_d *, uchar_t *, uint_t, uint_t,
160*10639SDarren.Reed@Sun.COM 		    cp_fn_t, struct timeval *);
161*10639SDarren.Reed@Sun.COM static void	reset_d(struct bpf_d *);
162*10639SDarren.Reed@Sun.COM static int	bpf_getdltlist(struct bpf_d *, struct bpf_dltlist *);
163*10639SDarren.Reed@Sun.COM static int	bpf_setdlt(struct bpf_d *, void *);
164*10639SDarren.Reed@Sun.COM static void	bpf_dev_add(struct bpf_d *);
165*10639SDarren.Reed@Sun.COM static struct bpf_d *bpf_dev_find(minor_t);
166*10639SDarren.Reed@Sun.COM static struct bpf_d *bpf_dev_get(minor_t);
167*10639SDarren.Reed@Sun.COM static void	bpf_dev_remove(struct bpf_d *);
168*10639SDarren.Reed@Sun.COM 
169*10639SDarren.Reed@Sun.COM static int
170*10639SDarren.Reed@Sun.COM bpf_movein(struct uio *uio, int linktype, int mtu, mblk_t **mp)
171*10639SDarren.Reed@Sun.COM {
172*10639SDarren.Reed@Sun.COM 	mblk_t *m;
173*10639SDarren.Reed@Sun.COM 	int error;
174*10639SDarren.Reed@Sun.COM 	int len;
175*10639SDarren.Reed@Sun.COM 	int hlen;
176*10639SDarren.Reed@Sun.COM 	int align;
177*10639SDarren.Reed@Sun.COM 
178*10639SDarren.Reed@Sun.COM 	/*
179*10639SDarren.Reed@Sun.COM 	 * Build a sockaddr based on the data link layer type.
180*10639SDarren.Reed@Sun.COM 	 * We do this at this level because the ethernet header
181*10639SDarren.Reed@Sun.COM 	 * is copied directly into the data field of the sockaddr.
182*10639SDarren.Reed@Sun.COM 	 * In the case of SLIP, there is no header and the packet
183*10639SDarren.Reed@Sun.COM 	 * is forwarded as is.
184*10639SDarren.Reed@Sun.COM 	 * Also, we are careful to leave room at the front of the mbuf
185*10639SDarren.Reed@Sun.COM 	 * for the link level header.
186*10639SDarren.Reed@Sun.COM 	 */
187*10639SDarren.Reed@Sun.COM 	switch (linktype) {
188*10639SDarren.Reed@Sun.COM 
189*10639SDarren.Reed@Sun.COM 	case DLT_EN10MB:
190*10639SDarren.Reed@Sun.COM 		hlen = sizeof (struct ether_header);
191*10639SDarren.Reed@Sun.COM 		break;
192*10639SDarren.Reed@Sun.COM 
193*10639SDarren.Reed@Sun.COM 	case DLT_FDDI:
194*10639SDarren.Reed@Sun.COM 		hlen = 16;
195*10639SDarren.Reed@Sun.COM 		break;
196*10639SDarren.Reed@Sun.COM 
197*10639SDarren.Reed@Sun.COM 	case DLT_NULL:
198*10639SDarren.Reed@Sun.COM 		hlen = 0;
199*10639SDarren.Reed@Sun.COM 		break;
200*10639SDarren.Reed@Sun.COM 
201*10639SDarren.Reed@Sun.COM 	case DLT_IPOIB:
202*10639SDarren.Reed@Sun.COM 		hlen = 44;
203*10639SDarren.Reed@Sun.COM 		break;
204*10639SDarren.Reed@Sun.COM 
205*10639SDarren.Reed@Sun.COM 	default:
206*10639SDarren.Reed@Sun.COM 		return (EIO);
207*10639SDarren.Reed@Sun.COM 	}
208*10639SDarren.Reed@Sun.COM 
209*10639SDarren.Reed@Sun.COM 	align = 4 - (hlen & 3);
210*10639SDarren.Reed@Sun.COM 
211*10639SDarren.Reed@Sun.COM 	len = uio->uio_resid;
212*10639SDarren.Reed@Sun.COM 	/*
213*10639SDarren.Reed@Sun.COM 	 * If there aren't enough bytes for a link level header or the
214*10639SDarren.Reed@Sun.COM 	 * packet length exceeds the interface mtu, return an error.
215*10639SDarren.Reed@Sun.COM 	 */
216*10639SDarren.Reed@Sun.COM 	if (len < hlen || len - hlen > mtu)
217*10639SDarren.Reed@Sun.COM 		return (EMSGSIZE);
218*10639SDarren.Reed@Sun.COM 
219*10639SDarren.Reed@Sun.COM 	m = allocb(len + align, BPRI_MED);
220*10639SDarren.Reed@Sun.COM 	if (m == NULL) {
221*10639SDarren.Reed@Sun.COM 		error = ENOBUFS;
222*10639SDarren.Reed@Sun.COM 		goto bad;
223*10639SDarren.Reed@Sun.COM 	}
224*10639SDarren.Reed@Sun.COM 
225*10639SDarren.Reed@Sun.COM 	/* Insure the data is properly aligned */
226*10639SDarren.Reed@Sun.COM 	if (align > 0)
227*10639SDarren.Reed@Sun.COM 		m->b_rptr += align;
228*10639SDarren.Reed@Sun.COM 	m->b_wptr = m->b_rptr + len;
229*10639SDarren.Reed@Sun.COM 
230*10639SDarren.Reed@Sun.COM 	error = uiomove(mtod(m, void *), len, UIO_WRITE, uio);
231*10639SDarren.Reed@Sun.COM 	if (error)
232*10639SDarren.Reed@Sun.COM 		goto bad;
233*10639SDarren.Reed@Sun.COM 	*mp = m;
234*10639SDarren.Reed@Sun.COM 	return (0);
235*10639SDarren.Reed@Sun.COM 
236*10639SDarren.Reed@Sun.COM bad:
237*10639SDarren.Reed@Sun.COM 	if (m != NULL)
238*10639SDarren.Reed@Sun.COM 		freemsg(m);
239*10639SDarren.Reed@Sun.COM 	return (error);
240*10639SDarren.Reed@Sun.COM }
241*10639SDarren.Reed@Sun.COM 
242*10639SDarren.Reed@Sun.COM 
243*10639SDarren.Reed@Sun.COM /*
244*10639SDarren.Reed@Sun.COM  * Attach file to the bpf interface, i.e. make d listen on bp.
245*10639SDarren.Reed@Sun.COM  */
246*10639SDarren.Reed@Sun.COM static void
247*10639SDarren.Reed@Sun.COM bpf_attachd(struct bpf_d *d, struct bpf_if *bp)
248*10639SDarren.Reed@Sun.COM {
249*10639SDarren.Reed@Sun.COM 	uintptr_t mh = bp->bif_ifp;
250*10639SDarren.Reed@Sun.COM 
251*10639SDarren.Reed@Sun.COM 	ASSERT(bp != NULL);
252*10639SDarren.Reed@Sun.COM 	ASSERT(d->bd_bif == NULL);
253*10639SDarren.Reed@Sun.COM 	/*
254*10639SDarren.Reed@Sun.COM 	 * Point d at bp, and add d to the interface's list of listeners.
255*10639SDarren.Reed@Sun.COM 	 * Finally, point the driver's bpf cookie at the interface so
256*10639SDarren.Reed@Sun.COM 	 * it will divert packets to bpf.
257*10639SDarren.Reed@Sun.COM 	 *
258*10639SDarren.Reed@Sun.COM 	 * Note: Although this results in what looks like a lock order
259*10639SDarren.Reed@Sun.COM 	 * reversal (bd_lock is held), the deadlock threat is not present
260*10639SDarren.Reed@Sun.COM 	 * because the descriptor is not attached to any interface and
261*10639SDarren.Reed@Sun.COM 	 * therefore there cannot be a packet waiting on bd_lock in
262*10639SDarren.Reed@Sun.COM 	 * catchpacket.
263*10639SDarren.Reed@Sun.COM 	 */
264*10639SDarren.Reed@Sun.COM 	mutex_enter(&bp->bif_lock);
265*10639SDarren.Reed@Sun.COM 	d->bd_bif = bp;
266*10639SDarren.Reed@Sun.COM 	LIST_INSERT_HEAD(&bp->bif_dlist, d, bd_next);
267*10639SDarren.Reed@Sun.COM 	mutex_exit(&bp->bif_lock);
268*10639SDarren.Reed@Sun.COM 
269*10639SDarren.Reed@Sun.COM 	if (MBPF_CLIENT_OPEN(&bp->bif_mac, mh, &d->bd_mcip) == 0)
270*10639SDarren.Reed@Sun.COM 		(void) MBPF_PROMISC_ADD(&bp->bif_mac, d->bd_mcip, 0, d,
271*10639SDarren.Reed@Sun.COM 		    &d->bd_promisc_handle, d->bd_promisc_flags);
272*10639SDarren.Reed@Sun.COM }
273*10639SDarren.Reed@Sun.COM 
274*10639SDarren.Reed@Sun.COM /*
275*10639SDarren.Reed@Sun.COM  * Detach a file from its interface.
276*10639SDarren.Reed@Sun.COM  */
277*10639SDarren.Reed@Sun.COM static void
278*10639SDarren.Reed@Sun.COM bpf_detachd(struct bpf_d *d)
279*10639SDarren.Reed@Sun.COM {
280*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
281*10639SDarren.Reed@Sun.COM 	uintptr_t mph;
282*10639SDarren.Reed@Sun.COM 	uintptr_t mch;
283*10639SDarren.Reed@Sun.COM 
284*10639SDarren.Reed@Sun.COM 	mch = d->bd_mcip;
285*10639SDarren.Reed@Sun.COM 	d->bd_mcip = 0;
286*10639SDarren.Reed@Sun.COM 	bp = d->bd_bif;
287*10639SDarren.Reed@Sun.COM 	ASSERT(bp != NULL);
288*10639SDarren.Reed@Sun.COM 
289*10639SDarren.Reed@Sun.COM 	/*
290*10639SDarren.Reed@Sun.COM 	 * Check if this descriptor had requested promiscuous mode.
291*10639SDarren.Reed@Sun.COM 	 * If so, turn it off. There's no need to take any action
292*10639SDarren.Reed@Sun.COM 	 * here, that is done when MBPF_PROMISC_REMOVE is used;
293*10639SDarren.Reed@Sun.COM 	 * bd_promisc is just a local flag to stop promiscuous mode
294*10639SDarren.Reed@Sun.COM 	 * from being set more than once.
295*10639SDarren.Reed@Sun.COM 	 */
296*10639SDarren.Reed@Sun.COM 	if (d->bd_promisc)
297*10639SDarren.Reed@Sun.COM 		d->bd_promisc = 0;
298*10639SDarren.Reed@Sun.COM 
299*10639SDarren.Reed@Sun.COM 	/*
300*10639SDarren.Reed@Sun.COM 	 * Take device out of "promiscuous" mode.  Since we were able to
301*10639SDarren.Reed@Sun.COM 	 * enter "promiscuous" mode, we should be able to turn it off.
302*10639SDarren.Reed@Sun.COM 	 * Note, this field stores a pointer used to support both
303*10639SDarren.Reed@Sun.COM 	 * promiscuous and non-promiscuous callbacks for packets.
304*10639SDarren.Reed@Sun.COM 	 */
305*10639SDarren.Reed@Sun.COM 	mph = d->bd_promisc_handle;
306*10639SDarren.Reed@Sun.COM 	d->bd_promisc_handle = 0;
307*10639SDarren.Reed@Sun.COM 
308*10639SDarren.Reed@Sun.COM 	/*
309*10639SDarren.Reed@Sun.COM 	 * The lock has to be dropped here because mac_promisc_remove may
310*10639SDarren.Reed@Sun.COM 	 * need to wait for mac_promisc_dispatch, which has called into
311*10639SDarren.Reed@Sun.COM 	 * bpf and catchpacket is waiting for bd_lock...
312*10639SDarren.Reed@Sun.COM 	 * i.e mac_promisc_remove() needs to be called with none of the
313*10639SDarren.Reed@Sun.COM 	 * locks held that are part of the bpf_mtap() call path.
314*10639SDarren.Reed@Sun.COM 	 */
315*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
316*10639SDarren.Reed@Sun.COM 	if (mph != 0)
317*10639SDarren.Reed@Sun.COM 		MBPF_PROMISC_REMOVE(&bp->bif_mac, mph);
318*10639SDarren.Reed@Sun.COM 
319*10639SDarren.Reed@Sun.COM 	if (mch != 0)
320*10639SDarren.Reed@Sun.COM 		MBPF_CLIENT_CLOSE(&bp->bif_mac, mch);
321*10639SDarren.Reed@Sun.COM 
322*10639SDarren.Reed@Sun.COM 	/*
323*10639SDarren.Reed@Sun.COM 	 * bd_lock needs to stay not held by this function until after
324*10639SDarren.Reed@Sun.COM 	 * it has finished with bif_lock, otherwise there's a lock order
325*10639SDarren.Reed@Sun.COM 	 * reversal with bpf_deliver and the system can deadlock.
326*10639SDarren.Reed@Sun.COM 	 *
327*10639SDarren.Reed@Sun.COM 	 * Remove d from the interface's descriptor list.
328*10639SDarren.Reed@Sun.COM 	 */
329*10639SDarren.Reed@Sun.COM 	mutex_enter(&bp->bif_lock);
330*10639SDarren.Reed@Sun.COM 	LIST_REMOVE(d, bd_next);
331*10639SDarren.Reed@Sun.COM 	mutex_exit(&bp->bif_lock);
332*10639SDarren.Reed@Sun.COM 
333*10639SDarren.Reed@Sun.COM 	/*
334*10639SDarren.Reed@Sun.COM 	 * Because this function is called with bd_lock held, so it must
335*10639SDarren.Reed@Sun.COM 	 * exit with it held.
336*10639SDarren.Reed@Sun.COM 	 */
337*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
338*10639SDarren.Reed@Sun.COM 	/*
339*10639SDarren.Reed@Sun.COM 	 * bd_bif cannot be cleared until after the promisc callback has been
340*10639SDarren.Reed@Sun.COM 	 * removed.
341*10639SDarren.Reed@Sun.COM 	 */
342*10639SDarren.Reed@Sun.COM 	d->bd_bif = 0;
343*10639SDarren.Reed@Sun.COM }
344*10639SDarren.Reed@Sun.COM 
345*10639SDarren.Reed@Sun.COM 
346*10639SDarren.Reed@Sun.COM /*
347*10639SDarren.Reed@Sun.COM  * bpfilterattach() is called at load time.
348*10639SDarren.Reed@Sun.COM  */
349*10639SDarren.Reed@Sun.COM int
350*10639SDarren.Reed@Sun.COM bpfilterattach(void)
351*10639SDarren.Reed@Sun.COM {
352*10639SDarren.Reed@Sun.COM 
353*10639SDarren.Reed@Sun.COM 	bpf_hash = mod_hash_create_idhash("bpf_dev_tab", 31,
354*10639SDarren.Reed@Sun.COM 	    mod_hash_null_keydtor);
355*10639SDarren.Reed@Sun.COM 	if (bpf_hash == NULL)
356*10639SDarren.Reed@Sun.COM 		return (ENOMEM);
357*10639SDarren.Reed@Sun.COM 
358*10639SDarren.Reed@Sun.COM 	(void) memcpy(&ks_stats, &bpf_kstats, sizeof (bpf_kstats));
359*10639SDarren.Reed@Sun.COM 
360*10639SDarren.Reed@Sun.COM 	bpf_ksp = kstat_create("bpf", 0, "global", "misc",
361*10639SDarren.Reed@Sun.COM 	    KSTAT_TYPE_NAMED, sizeof (bpf_kstats) / sizeof (kstat_named_t),
362*10639SDarren.Reed@Sun.COM 	    KSTAT_FLAG_VIRTUAL);
363*10639SDarren.Reed@Sun.COM 	if (bpf_ksp != NULL) {
364*10639SDarren.Reed@Sun.COM 		bpf_ksp->ks_data = &ks_stats;
365*10639SDarren.Reed@Sun.COM 		kstat_install(bpf_ksp);
366*10639SDarren.Reed@Sun.COM 	} else {
367*10639SDarren.Reed@Sun.COM 		mod_hash_destroy_idhash(bpf_hash);
368*10639SDarren.Reed@Sun.COM 		bpf_hash = NULL;
369*10639SDarren.Reed@Sun.COM 		return (EEXIST);
370*10639SDarren.Reed@Sun.COM 	}
371*10639SDarren.Reed@Sun.COM 
372*10639SDarren.Reed@Sun.COM 	cv_init(&bpf_dlt_waiter, NULL, CV_DRIVER, NULL);
373*10639SDarren.Reed@Sun.COM 	mutex_init(&bpf_mtx, NULL, MUTEX_DRIVER, NULL);
374*10639SDarren.Reed@Sun.COM 
375*10639SDarren.Reed@Sun.COM 	LIST_INIT(&bpf_list);
376*10639SDarren.Reed@Sun.COM 	TAILQ_INIT(&bpf_iflist);
377*10639SDarren.Reed@Sun.COM 
378*10639SDarren.Reed@Sun.COM 	return (0);
379*10639SDarren.Reed@Sun.COM }
380*10639SDarren.Reed@Sun.COM 
381*10639SDarren.Reed@Sun.COM 
382*10639SDarren.Reed@Sun.COM /*
383*10639SDarren.Reed@Sun.COM  * bpfilterdetach() is called at unload time.
384*10639SDarren.Reed@Sun.COM  */
385*10639SDarren.Reed@Sun.COM int
386*10639SDarren.Reed@Sun.COM bpfilterdetach(void)
387*10639SDarren.Reed@Sun.COM {
388*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
389*10639SDarren.Reed@Sun.COM 
390*10639SDarren.Reed@Sun.COM 	if (bpf_ksp != NULL) {
391*10639SDarren.Reed@Sun.COM 		kstat_delete(bpf_ksp);
392*10639SDarren.Reed@Sun.COM 		bpf_ksp = NULL;
393*10639SDarren.Reed@Sun.COM 	}
394*10639SDarren.Reed@Sun.COM 
395*10639SDarren.Reed@Sun.COM 	/*
396*10639SDarren.Reed@Sun.COM 	 * When no attach/detach callbacks can arrive from mac,
397*10639SDarren.Reed@Sun.COM 	 * this is now safe without a lock.
398*10639SDarren.Reed@Sun.COM 	 */
399*10639SDarren.Reed@Sun.COM 	while ((bp = TAILQ_FIRST(&bpf_iflist)) != NULL)
400*10639SDarren.Reed@Sun.COM 		bpfdetach(bp->bif_ifp);
401*10639SDarren.Reed@Sun.COM 
402*10639SDarren.Reed@Sun.COM 	mutex_enter(&bpf_mtx);
403*10639SDarren.Reed@Sun.COM 	if (!LIST_EMPTY(&bpf_list)) {
404*10639SDarren.Reed@Sun.COM 		mutex_exit(&bpf_mtx);
405*10639SDarren.Reed@Sun.COM 		return (EBUSY);
406*10639SDarren.Reed@Sun.COM 	}
407*10639SDarren.Reed@Sun.COM 	mutex_exit(&bpf_mtx);
408*10639SDarren.Reed@Sun.COM 
409*10639SDarren.Reed@Sun.COM 	mod_hash_destroy_idhash(bpf_hash);
410*10639SDarren.Reed@Sun.COM 	bpf_hash = NULL;
411*10639SDarren.Reed@Sun.COM 
412*10639SDarren.Reed@Sun.COM 	cv_destroy(&bpf_dlt_waiter);
413*10639SDarren.Reed@Sun.COM 	mutex_destroy(&bpf_mtx);
414*10639SDarren.Reed@Sun.COM 
415*10639SDarren.Reed@Sun.COM 	return (0);
416*10639SDarren.Reed@Sun.COM }
417*10639SDarren.Reed@Sun.COM 
418*10639SDarren.Reed@Sun.COM /*
419*10639SDarren.Reed@Sun.COM  * Open ethernet device. Clones.
420*10639SDarren.Reed@Sun.COM  */
421*10639SDarren.Reed@Sun.COM /* ARGSUSED */
422*10639SDarren.Reed@Sun.COM int
423*10639SDarren.Reed@Sun.COM bpfopen(dev_t *devp, int flag, int mode, cred_t *cred)
424*10639SDarren.Reed@Sun.COM {
425*10639SDarren.Reed@Sun.COM 	struct bpf_d *d;
426*10639SDarren.Reed@Sun.COM 	uint_t dmin;
427*10639SDarren.Reed@Sun.COM 
428*10639SDarren.Reed@Sun.COM 	/*
429*10639SDarren.Reed@Sun.COM 	 * The security policy described at the top of this file is
430*10639SDarren.Reed@Sun.COM 	 * enforced here.
431*10639SDarren.Reed@Sun.COM 	 */
432*10639SDarren.Reed@Sun.COM 	if ((flag & FWRITE) != 0) {
433*10639SDarren.Reed@Sun.COM 		if (secpolicy_net_rawaccess(cred) != 0)
434*10639SDarren.Reed@Sun.COM 			return (EACCES);
435*10639SDarren.Reed@Sun.COM 	}
436*10639SDarren.Reed@Sun.COM 
437*10639SDarren.Reed@Sun.COM 	if ((flag & FREAD) != 0) {
438*10639SDarren.Reed@Sun.COM 		if ((secpolicy_net_observability(cred) != 0) &&
439*10639SDarren.Reed@Sun.COM 		    (secpolicy_net_rawaccess(cred) != 0))
440*10639SDarren.Reed@Sun.COM 			return (EACCES);
441*10639SDarren.Reed@Sun.COM 	}
442*10639SDarren.Reed@Sun.COM 
443*10639SDarren.Reed@Sun.COM 	if ((flag & (FWRITE|FREAD)) == 0)
444*10639SDarren.Reed@Sun.COM 		return (ENXIO);
445*10639SDarren.Reed@Sun.COM 
446*10639SDarren.Reed@Sun.COM 	/*
447*10639SDarren.Reed@Sun.COM 	 * If BPF is being opened from a non-global zone, trigger a call
448*10639SDarren.Reed@Sun.COM 	 * back into the driver to see if it needs to initialise local
449*10639SDarren.Reed@Sun.COM 	 * state in a zone.
450*10639SDarren.Reed@Sun.COM 	 */
451*10639SDarren.Reed@Sun.COM 	if (crgetzoneid(cred) != GLOBAL_ZONEID)
452*10639SDarren.Reed@Sun.COM 		bpf_open_zone(crgetzoneid(cred));
453*10639SDarren.Reed@Sun.COM 
454*10639SDarren.Reed@Sun.COM 	/*
455*10639SDarren.Reed@Sun.COM 	 * A structure is allocated per open file in BPF to store settings
456*10639SDarren.Reed@Sun.COM 	 * such as buffer capture size, provide private buffers, etc.
457*10639SDarren.Reed@Sun.COM 	 */
458*10639SDarren.Reed@Sun.COM 	d = (struct bpf_d *)kmem_zalloc(sizeof (*d), KM_SLEEP);
459*10639SDarren.Reed@Sun.COM 	d->bd_bufsize = bpf_bufsize;
460*10639SDarren.Reed@Sun.COM 	d->bd_fmode = flag;
461*10639SDarren.Reed@Sun.COM 	d->bd_zone = crgetzoneid(cred);
462*10639SDarren.Reed@Sun.COM 	d->bd_seesent = 1;
463*10639SDarren.Reed@Sun.COM 	d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_PHYS|
464*10639SDarren.Reed@Sun.COM 	    MAC_PROMISC_FLAGS_NO_COPY;
465*10639SDarren.Reed@Sun.COM 	mutex_init(&d->bd_lock, NULL, MUTEX_DRIVER, NULL);
466*10639SDarren.Reed@Sun.COM 	cv_init(&d->bd_wait, NULL, CV_DRIVER, NULL);
467*10639SDarren.Reed@Sun.COM 
468*10639SDarren.Reed@Sun.COM 	mutex_enter(&bpf_mtx);
469*10639SDarren.Reed@Sun.COM 	/*
470*10639SDarren.Reed@Sun.COM 	 * Find an unused minor number. Obviously this is an O(n) algorithm
471*10639SDarren.Reed@Sun.COM 	 * and doesn't scale particularly well, so if there are large numbers
472*10639SDarren.Reed@Sun.COM 	 * of open file descriptors happening in real use, this design may
473*10639SDarren.Reed@Sun.COM 	 * need to be revisited.
474*10639SDarren.Reed@Sun.COM 	 */
475*10639SDarren.Reed@Sun.COM 	for (dmin = 0; dmin < L_MAXMIN; dmin++)
476*10639SDarren.Reed@Sun.COM 		if (bpf_dev_find(dmin) == NULL)
477*10639SDarren.Reed@Sun.COM 			break;
478*10639SDarren.Reed@Sun.COM 	if (dmin == L_MAXMIN) {
479*10639SDarren.Reed@Sun.COM 		mutex_exit(&bpf_mtx);
480*10639SDarren.Reed@Sun.COM 		kmem_free(d, sizeof (*d));
481*10639SDarren.Reed@Sun.COM 		return (ENXIO);
482*10639SDarren.Reed@Sun.COM 	}
483*10639SDarren.Reed@Sun.COM 	d->bd_dev = dmin;
484*10639SDarren.Reed@Sun.COM 	LIST_INSERT_HEAD(&bpf_list, d, bd_list);
485*10639SDarren.Reed@Sun.COM 	bpf_dev_add(d);
486*10639SDarren.Reed@Sun.COM 	mutex_exit(&bpf_mtx);
487*10639SDarren.Reed@Sun.COM 
488*10639SDarren.Reed@Sun.COM 	*devp = makedevice(getmajor(*devp), dmin);
489*10639SDarren.Reed@Sun.COM 
490*10639SDarren.Reed@Sun.COM 	return (0);
491*10639SDarren.Reed@Sun.COM }
492*10639SDarren.Reed@Sun.COM 
493*10639SDarren.Reed@Sun.COM /*
494*10639SDarren.Reed@Sun.COM  * Close the descriptor by detaching it from its interface,
495*10639SDarren.Reed@Sun.COM  * deallocating its buffers, and marking it free.
496*10639SDarren.Reed@Sun.COM  *
497*10639SDarren.Reed@Sun.COM  * Because we only allow a device to be opened once, there is always a
498*10639SDarren.Reed@Sun.COM  * 1 to 1 relationship between opens and closes supporting this function.
499*10639SDarren.Reed@Sun.COM  */
500*10639SDarren.Reed@Sun.COM /* ARGSUSED */
501*10639SDarren.Reed@Sun.COM int
502*10639SDarren.Reed@Sun.COM bpfclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
503*10639SDarren.Reed@Sun.COM {
504*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = bpf_dev_get(getminor(dev));
505*10639SDarren.Reed@Sun.COM 
506*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
507*10639SDarren.Reed@Sun.COM 	if (d->bd_state == BPF_WAITING)
508*10639SDarren.Reed@Sun.COM 		bpf_clear_timeout(d);
509*10639SDarren.Reed@Sun.COM 	d->bd_state = BPF_IDLE;
510*10639SDarren.Reed@Sun.COM 	if (d->bd_bif)
511*10639SDarren.Reed@Sun.COM 		bpf_detachd(d);
512*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
513*10639SDarren.Reed@Sun.COM 
514*10639SDarren.Reed@Sun.COM 	mutex_enter(&bpf_mtx);
515*10639SDarren.Reed@Sun.COM 	LIST_REMOVE(d, bd_list);
516*10639SDarren.Reed@Sun.COM 	bpf_dev_remove(d);
517*10639SDarren.Reed@Sun.COM 	mutex_exit(&bpf_mtx);
518*10639SDarren.Reed@Sun.COM 
519*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
520*10639SDarren.Reed@Sun.COM 	mutex_destroy(&d->bd_lock);
521*10639SDarren.Reed@Sun.COM 	cv_destroy(&d->bd_wait);
522*10639SDarren.Reed@Sun.COM 
523*10639SDarren.Reed@Sun.COM 	bpf_freed(d);
524*10639SDarren.Reed@Sun.COM 	kmem_free(d, sizeof (*d));
525*10639SDarren.Reed@Sun.COM 
526*10639SDarren.Reed@Sun.COM 	return (0);
527*10639SDarren.Reed@Sun.COM }
528*10639SDarren.Reed@Sun.COM 
529*10639SDarren.Reed@Sun.COM /*
530*10639SDarren.Reed@Sun.COM  * Rotate the packet buffers in descriptor d.  Move the store buffer
531*10639SDarren.Reed@Sun.COM  * into the hold slot, and the free buffer into the store slot.
532*10639SDarren.Reed@Sun.COM  * Zero the length of the new store buffer.
533*10639SDarren.Reed@Sun.COM  */
534*10639SDarren.Reed@Sun.COM #define	ROTATE_BUFFERS(d) \
535*10639SDarren.Reed@Sun.COM 	(d)->bd_hbuf = (d)->bd_sbuf; \
536*10639SDarren.Reed@Sun.COM 	(d)->bd_hlen = (d)->bd_slen; \
537*10639SDarren.Reed@Sun.COM 	(d)->bd_sbuf = (d)->bd_fbuf; \
538*10639SDarren.Reed@Sun.COM 	(d)->bd_slen = 0; \
539*10639SDarren.Reed@Sun.COM 	(d)->bd_fbuf = 0;
540*10639SDarren.Reed@Sun.COM /*
541*10639SDarren.Reed@Sun.COM  *  bpfread - read next chunk of packets from buffers
542*10639SDarren.Reed@Sun.COM  */
543*10639SDarren.Reed@Sun.COM /* ARGSUSED */
544*10639SDarren.Reed@Sun.COM int
545*10639SDarren.Reed@Sun.COM bpfread(dev_t dev, struct uio *uio, cred_t *cred)
546*10639SDarren.Reed@Sun.COM {
547*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = bpf_dev_get(getminor(dev));
548*10639SDarren.Reed@Sun.COM 	int timed_out;
549*10639SDarren.Reed@Sun.COM 	ulong_t delay;
550*10639SDarren.Reed@Sun.COM 	int error;
551*10639SDarren.Reed@Sun.COM 
552*10639SDarren.Reed@Sun.COM 	if ((d->bd_fmode & FREAD) == 0)
553*10639SDarren.Reed@Sun.COM 		return (EBADF);
554*10639SDarren.Reed@Sun.COM 
555*10639SDarren.Reed@Sun.COM 	/*
556*10639SDarren.Reed@Sun.COM 	 * Restrict application to use a buffer the same size as
557*10639SDarren.Reed@Sun.COM 	 * the kernel buffers.
558*10639SDarren.Reed@Sun.COM 	 */
559*10639SDarren.Reed@Sun.COM 	if (uio->uio_resid != d->bd_bufsize)
560*10639SDarren.Reed@Sun.COM 		return (EINVAL);
561*10639SDarren.Reed@Sun.COM 
562*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
563*10639SDarren.Reed@Sun.COM 	if (d->bd_state == BPF_WAITING)
564*10639SDarren.Reed@Sun.COM 		bpf_clear_timeout(d);
565*10639SDarren.Reed@Sun.COM 	timed_out = (d->bd_state == BPF_TIMED_OUT);
566*10639SDarren.Reed@Sun.COM 	d->bd_state = BPF_IDLE;
567*10639SDarren.Reed@Sun.COM 	/*
568*10639SDarren.Reed@Sun.COM 	 * If the hold buffer is empty, then do a timed sleep, which
569*10639SDarren.Reed@Sun.COM 	 * ends when the timeout expires or when enough packets
570*10639SDarren.Reed@Sun.COM 	 * have arrived to fill the store buffer.
571*10639SDarren.Reed@Sun.COM 	 */
572*10639SDarren.Reed@Sun.COM 	while (d->bd_hbuf == 0) {
573*10639SDarren.Reed@Sun.COM 		if (d->bd_nonblock) {
574*10639SDarren.Reed@Sun.COM 			if (d->bd_slen == 0) {
575*10639SDarren.Reed@Sun.COM 				mutex_exit(&d->bd_lock);
576*10639SDarren.Reed@Sun.COM 				return (EWOULDBLOCK);
577*10639SDarren.Reed@Sun.COM 			}
578*10639SDarren.Reed@Sun.COM 			ROTATE_BUFFERS(d);
579*10639SDarren.Reed@Sun.COM 			break;
580*10639SDarren.Reed@Sun.COM 		}
581*10639SDarren.Reed@Sun.COM 
582*10639SDarren.Reed@Sun.COM 		if ((d->bd_immediate || timed_out) && d->bd_slen != 0) {
583*10639SDarren.Reed@Sun.COM 			/*
584*10639SDarren.Reed@Sun.COM 			 * A packet(s) either arrived since the previous
585*10639SDarren.Reed@Sun.COM 			 * read or arrived while we were asleep.
586*10639SDarren.Reed@Sun.COM 			 * Rotate the buffers and return what's here.
587*10639SDarren.Reed@Sun.COM 			 */
588*10639SDarren.Reed@Sun.COM 			ROTATE_BUFFERS(d);
589*10639SDarren.Reed@Sun.COM 			break;
590*10639SDarren.Reed@Sun.COM 		}
591*10639SDarren.Reed@Sun.COM 		ks_stats.kp_read_wait.value.ui64++;
592*10639SDarren.Reed@Sun.COM 		delay = ddi_get_lbolt() + d->bd_rtout;
593*10639SDarren.Reed@Sun.COM 		error = cv_timedwait_sig(&d->bd_wait, &d->bd_lock, delay);
594*10639SDarren.Reed@Sun.COM 		if (error == 0) {
595*10639SDarren.Reed@Sun.COM 			mutex_exit(&d->bd_lock);
596*10639SDarren.Reed@Sun.COM 			return (EINTR);
597*10639SDarren.Reed@Sun.COM 		}
598*10639SDarren.Reed@Sun.COM 		if (error == -1) {
599*10639SDarren.Reed@Sun.COM 			/*
600*10639SDarren.Reed@Sun.COM 			 * On a timeout, return what's in the buffer,
601*10639SDarren.Reed@Sun.COM 			 * which may be nothing.  If there is something
602*10639SDarren.Reed@Sun.COM 			 * in the store buffer, we can rotate the buffers.
603*10639SDarren.Reed@Sun.COM 			 */
604*10639SDarren.Reed@Sun.COM 			if (d->bd_hbuf)
605*10639SDarren.Reed@Sun.COM 				/*
606*10639SDarren.Reed@Sun.COM 				 * We filled up the buffer in between
607*10639SDarren.Reed@Sun.COM 				 * getting the timeout and arriving
608*10639SDarren.Reed@Sun.COM 				 * here, so we don't need to rotate.
609*10639SDarren.Reed@Sun.COM 				 */
610*10639SDarren.Reed@Sun.COM 				break;
611*10639SDarren.Reed@Sun.COM 
612*10639SDarren.Reed@Sun.COM 			if (d->bd_slen == 0) {
613*10639SDarren.Reed@Sun.COM 				mutex_exit(&d->bd_lock);
614*10639SDarren.Reed@Sun.COM 				return (0);
615*10639SDarren.Reed@Sun.COM 			}
616*10639SDarren.Reed@Sun.COM 			ROTATE_BUFFERS(d);
617*10639SDarren.Reed@Sun.COM 		}
618*10639SDarren.Reed@Sun.COM 	}
619*10639SDarren.Reed@Sun.COM 	/*
620*10639SDarren.Reed@Sun.COM 	 * At this point, we know we have something in the hold slot.
621*10639SDarren.Reed@Sun.COM 	 */
622*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
623*10639SDarren.Reed@Sun.COM 
624*10639SDarren.Reed@Sun.COM 	/*
625*10639SDarren.Reed@Sun.COM 	 * Move data from hold buffer into user space.
626*10639SDarren.Reed@Sun.COM 	 * We know the entire buffer is transferred since
627*10639SDarren.Reed@Sun.COM 	 * we checked above that the read buffer is bpf_bufsize bytes.
628*10639SDarren.Reed@Sun.COM 	 */
629*10639SDarren.Reed@Sun.COM 	error = uiomove(d->bd_hbuf, d->bd_hlen, UIO_READ, uio);
630*10639SDarren.Reed@Sun.COM 
631*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
632*10639SDarren.Reed@Sun.COM 	d->bd_fbuf = d->bd_hbuf;
633*10639SDarren.Reed@Sun.COM 	d->bd_hbuf = 0;
634*10639SDarren.Reed@Sun.COM 	d->bd_hlen = 0;
635*10639SDarren.Reed@Sun.COM done:
636*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
637*10639SDarren.Reed@Sun.COM 	return (error);
638*10639SDarren.Reed@Sun.COM }
639*10639SDarren.Reed@Sun.COM 
640*10639SDarren.Reed@Sun.COM 
641*10639SDarren.Reed@Sun.COM /*
642*10639SDarren.Reed@Sun.COM  * If there are processes sleeping on this descriptor, wake them up.
643*10639SDarren.Reed@Sun.COM  * NOTE: the lock for bd_wait is bd_lock and is held by bpf_deliver,
644*10639SDarren.Reed@Sun.COM  * so there is no code here grabbing it.
645*10639SDarren.Reed@Sun.COM  */
646*10639SDarren.Reed@Sun.COM static inline void
647*10639SDarren.Reed@Sun.COM bpf_wakeup(struct bpf_d *d)
648*10639SDarren.Reed@Sun.COM {
649*10639SDarren.Reed@Sun.COM 	cv_signal(&d->bd_wait);
650*10639SDarren.Reed@Sun.COM }
651*10639SDarren.Reed@Sun.COM 
652*10639SDarren.Reed@Sun.COM static void
653*10639SDarren.Reed@Sun.COM bpf_timed_out(void *arg)
654*10639SDarren.Reed@Sun.COM {
655*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = arg;
656*10639SDarren.Reed@Sun.COM 
657*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
658*10639SDarren.Reed@Sun.COM 	if (d->bd_state == BPF_WAITING) {
659*10639SDarren.Reed@Sun.COM 		d->bd_state = BPF_TIMED_OUT;
660*10639SDarren.Reed@Sun.COM 		if (d->bd_slen != 0)
661*10639SDarren.Reed@Sun.COM 			cv_signal(&d->bd_wait);
662*10639SDarren.Reed@Sun.COM 	}
663*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
664*10639SDarren.Reed@Sun.COM }
665*10639SDarren.Reed@Sun.COM 
666*10639SDarren.Reed@Sun.COM 
667*10639SDarren.Reed@Sun.COM /* ARGSUSED */
668*10639SDarren.Reed@Sun.COM int
669*10639SDarren.Reed@Sun.COM bpfwrite(dev_t dev, struct uio *uio, cred_t *cred)
670*10639SDarren.Reed@Sun.COM {
671*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = bpf_dev_get(getminor(dev));
672*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
673*10639SDarren.Reed@Sun.COM 	uintptr_t mch;
674*10639SDarren.Reed@Sun.COM 	uintptr_t ifp;
675*10639SDarren.Reed@Sun.COM 	uint_t mtu;
676*10639SDarren.Reed@Sun.COM 	mblk_t *m;
677*10639SDarren.Reed@Sun.COM 	int error;
678*10639SDarren.Reed@Sun.COM 	int dlt;
679*10639SDarren.Reed@Sun.COM 
680*10639SDarren.Reed@Sun.COM 	if ((d->bd_fmode & FWRITE) == 0)
681*10639SDarren.Reed@Sun.COM 		return (EBADF);
682*10639SDarren.Reed@Sun.COM 
683*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
684*10639SDarren.Reed@Sun.COM 	if (d->bd_bif == 0 || d->bd_mcip == 0 || d->bd_bif->bif_ifp == 0) {
685*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
686*10639SDarren.Reed@Sun.COM 		return (EINTR);
687*10639SDarren.Reed@Sun.COM 	}
688*10639SDarren.Reed@Sun.COM 
689*10639SDarren.Reed@Sun.COM 	if (uio->uio_resid == 0) {
690*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
691*10639SDarren.Reed@Sun.COM 		return (0);
692*10639SDarren.Reed@Sun.COM 	}
693*10639SDarren.Reed@Sun.COM 
694*10639SDarren.Reed@Sun.COM 	while (d->bd_inuse < 0) {
695*10639SDarren.Reed@Sun.COM 		d->bd_waiting++;
696*10639SDarren.Reed@Sun.COM 		if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) {
697*10639SDarren.Reed@Sun.COM 			d->bd_waiting--;
698*10639SDarren.Reed@Sun.COM 			mutex_exit(&d->bd_lock);
699*10639SDarren.Reed@Sun.COM 			return (EINTR);
700*10639SDarren.Reed@Sun.COM 		}
701*10639SDarren.Reed@Sun.COM 		d->bd_waiting--;
702*10639SDarren.Reed@Sun.COM 	}
703*10639SDarren.Reed@Sun.COM 
704*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
705*10639SDarren.Reed@Sun.COM 
706*10639SDarren.Reed@Sun.COM 	bp = d->bd_bif;
707*10639SDarren.Reed@Sun.COM 	dlt = bp->bif_dlt;
708*10639SDarren.Reed@Sun.COM 	mch = d->bd_mcip;
709*10639SDarren.Reed@Sun.COM 	ifp = bp->bif_ifp;
710*10639SDarren.Reed@Sun.COM 	MBPF_SDU_GET(&bp->bif_mac, ifp, &mtu);
711*10639SDarren.Reed@Sun.COM 	d->bd_inuse++;
712*10639SDarren.Reed@Sun.COM 
713*10639SDarren.Reed@Sun.COM 	m = NULL;
714*10639SDarren.Reed@Sun.COM 	if (dlt == DLT_IPNET) {
715*10639SDarren.Reed@Sun.COM 		error = EIO;
716*10639SDarren.Reed@Sun.COM 		goto done;
717*10639SDarren.Reed@Sun.COM 	}
718*10639SDarren.Reed@Sun.COM 
719*10639SDarren.Reed@Sun.COM 	error = bpf_movein(uio, dlt, mtu, &m);
720*10639SDarren.Reed@Sun.COM 	if (error)
721*10639SDarren.Reed@Sun.COM 		goto done;
722*10639SDarren.Reed@Sun.COM 
723*10639SDarren.Reed@Sun.COM 	DTRACE_PROBE5(bpf__tx, struct bpf_d *, d, struct bpf_if *, bp,
724*10639SDarren.Reed@Sun.COM 	    int, dlt, uint_t, mtu, mblk_t *, m);
725*10639SDarren.Reed@Sun.COM 
726*10639SDarren.Reed@Sun.COM 	if (M_LEN(m) > mtu) {
727*10639SDarren.Reed@Sun.COM 		error = EMSGSIZE;
728*10639SDarren.Reed@Sun.COM 		goto done;
729*10639SDarren.Reed@Sun.COM 	}
730*10639SDarren.Reed@Sun.COM 
731*10639SDarren.Reed@Sun.COM 	error = MBPF_TX(&bp->bif_mac, mch, m);
732*10639SDarren.Reed@Sun.COM 	/*
733*10639SDarren.Reed@Sun.COM 	 * The "tx" action here is required to consume the mblk_t.
734*10639SDarren.Reed@Sun.COM 	 */
735*10639SDarren.Reed@Sun.COM 	m = NULL;
736*10639SDarren.Reed@Sun.COM 
737*10639SDarren.Reed@Sun.COM done:
738*10639SDarren.Reed@Sun.COM 	if (error == 0)
739*10639SDarren.Reed@Sun.COM 		ks_stats.kp_write_ok.value.ui64++;
740*10639SDarren.Reed@Sun.COM 	else
741*10639SDarren.Reed@Sun.COM 		ks_stats.kp_write_error.value.ui64++;
742*10639SDarren.Reed@Sun.COM 	if (m != NULL)
743*10639SDarren.Reed@Sun.COM 		freemsg(m);
744*10639SDarren.Reed@Sun.COM 
745*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
746*10639SDarren.Reed@Sun.COM 	d->bd_inuse--;
747*10639SDarren.Reed@Sun.COM 	if ((d->bd_inuse == 0) && (d->bd_waiting != 0))
748*10639SDarren.Reed@Sun.COM 		cv_signal(&d->bd_wait);
749*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
750*10639SDarren.Reed@Sun.COM 
751*10639SDarren.Reed@Sun.COM 	/*
752*10639SDarren.Reed@Sun.COM 	 * The driver frees the mbuf.
753*10639SDarren.Reed@Sun.COM 	 */
754*10639SDarren.Reed@Sun.COM 	return (error);
755*10639SDarren.Reed@Sun.COM }
756*10639SDarren.Reed@Sun.COM 
757*10639SDarren.Reed@Sun.COM 
758*10639SDarren.Reed@Sun.COM /*
759*10639SDarren.Reed@Sun.COM  * Reset a descriptor by flushing its packet buffer and clearing the
760*10639SDarren.Reed@Sun.COM  * receive and drop counts.  Should be called at splnet.
761*10639SDarren.Reed@Sun.COM  */
762*10639SDarren.Reed@Sun.COM static void
763*10639SDarren.Reed@Sun.COM reset_d(struct bpf_d *d)
764*10639SDarren.Reed@Sun.COM {
765*10639SDarren.Reed@Sun.COM 	if (d->bd_hbuf) {
766*10639SDarren.Reed@Sun.COM 		/* Free the hold buffer. */
767*10639SDarren.Reed@Sun.COM 		d->bd_fbuf = d->bd_hbuf;
768*10639SDarren.Reed@Sun.COM 		d->bd_hbuf = 0;
769*10639SDarren.Reed@Sun.COM 	}
770*10639SDarren.Reed@Sun.COM 	d->bd_slen = 0;
771*10639SDarren.Reed@Sun.COM 	d->bd_hlen = 0;
772*10639SDarren.Reed@Sun.COM 	d->bd_rcount = 0;
773*10639SDarren.Reed@Sun.COM 	d->bd_dcount = 0;
774*10639SDarren.Reed@Sun.COM 	d->bd_ccount = 0;
775*10639SDarren.Reed@Sun.COM }
776*10639SDarren.Reed@Sun.COM 
777*10639SDarren.Reed@Sun.COM /*
778*10639SDarren.Reed@Sun.COM  *  FIONREAD		Check for read packet available.
779*10639SDarren.Reed@Sun.COM  *  BIOCGBLEN		Get buffer len [for read()].
780*10639SDarren.Reed@Sun.COM  *  BIOCSETF		Set ethernet read filter.
781*10639SDarren.Reed@Sun.COM  *  BIOCFLUSH		Flush read packet buffer.
782*10639SDarren.Reed@Sun.COM  *  BIOCPROMISC		Put interface into promiscuous mode.
783*10639SDarren.Reed@Sun.COM  *  BIOCGDLT		Get link layer type.
784*10639SDarren.Reed@Sun.COM  *  BIOCGETIF		Get interface name.
785*10639SDarren.Reed@Sun.COM  *  BIOCSETIF		Set interface.
786*10639SDarren.Reed@Sun.COM  *  BIOCSRTIMEOUT	Set read timeout.
787*10639SDarren.Reed@Sun.COM  *  BIOCGRTIMEOUT	Get read timeout.
788*10639SDarren.Reed@Sun.COM  *  BIOCGSTATS		Get packet stats.
789*10639SDarren.Reed@Sun.COM  *  BIOCIMMEDIATE	Set immediate mode.
790*10639SDarren.Reed@Sun.COM  *  BIOCVERSION		Get filter language version.
791*10639SDarren.Reed@Sun.COM  *  BIOCGHDRCMPLT	Get "header already complete" flag.
792*10639SDarren.Reed@Sun.COM  *  BIOCSHDRCMPLT	Set "header already complete" flag.
793*10639SDarren.Reed@Sun.COM  */
794*10639SDarren.Reed@Sun.COM /* ARGSUSED */
795*10639SDarren.Reed@Sun.COM int
796*10639SDarren.Reed@Sun.COM bpfioctl(dev_t dev, int cmd, intptr_t addr, int mode, cred_t *cred, int *rval)
797*10639SDarren.Reed@Sun.COM {
798*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = bpf_dev_get(getminor(dev));
799*10639SDarren.Reed@Sun.COM 	struct bpf_program prog;
800*10639SDarren.Reed@Sun.COM 	struct lifreq lifreq;
801*10639SDarren.Reed@Sun.COM 	struct ifreq ifreq;
802*10639SDarren.Reed@Sun.COM 	int error = 0;
803*10639SDarren.Reed@Sun.COM 	uint_t size;
804*10639SDarren.Reed@Sun.COM 
805*10639SDarren.Reed@Sun.COM 	/*
806*10639SDarren.Reed@Sun.COM 	 * Refresh the PID associated with this bpf file.
807*10639SDarren.Reed@Sun.COM 	 */
808*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
809*10639SDarren.Reed@Sun.COM 	if (d->bd_state == BPF_WAITING)
810*10639SDarren.Reed@Sun.COM 		bpf_clear_timeout(d);
811*10639SDarren.Reed@Sun.COM 	d->bd_state = BPF_IDLE;
812*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
813*10639SDarren.Reed@Sun.COM 
814*10639SDarren.Reed@Sun.COM 	switch (cmd) {
815*10639SDarren.Reed@Sun.COM 
816*10639SDarren.Reed@Sun.COM 	default:
817*10639SDarren.Reed@Sun.COM 		error = EINVAL;
818*10639SDarren.Reed@Sun.COM 		break;
819*10639SDarren.Reed@Sun.COM 
820*10639SDarren.Reed@Sun.COM 	/*
821*10639SDarren.Reed@Sun.COM 	 * Check for read packet available.
822*10639SDarren.Reed@Sun.COM 	 */
823*10639SDarren.Reed@Sun.COM 	case FIONREAD:
824*10639SDarren.Reed@Sun.COM 		{
825*10639SDarren.Reed@Sun.COM 			int n;
826*10639SDarren.Reed@Sun.COM 
827*10639SDarren.Reed@Sun.COM 			mutex_enter(&d->bd_lock);
828*10639SDarren.Reed@Sun.COM 			n = d->bd_slen;
829*10639SDarren.Reed@Sun.COM 			if (d->bd_hbuf)
830*10639SDarren.Reed@Sun.COM 				n += d->bd_hlen;
831*10639SDarren.Reed@Sun.COM 			mutex_exit(&d->bd_lock);
832*10639SDarren.Reed@Sun.COM 
833*10639SDarren.Reed@Sun.COM 			*(int *)addr = n;
834*10639SDarren.Reed@Sun.COM 			break;
835*10639SDarren.Reed@Sun.COM 		}
836*10639SDarren.Reed@Sun.COM 
837*10639SDarren.Reed@Sun.COM 	/*
838*10639SDarren.Reed@Sun.COM 	 * Get buffer len [for read()].
839*10639SDarren.Reed@Sun.COM 	 */
840*10639SDarren.Reed@Sun.COM 	case BIOCGBLEN:
841*10639SDarren.Reed@Sun.COM 		error = copyout(&d->bd_bufsize, (void *)addr,
842*10639SDarren.Reed@Sun.COM 		    sizeof (d->bd_bufsize));
843*10639SDarren.Reed@Sun.COM 		break;
844*10639SDarren.Reed@Sun.COM 
845*10639SDarren.Reed@Sun.COM 	/*
846*10639SDarren.Reed@Sun.COM 	 * Set buffer length.
847*10639SDarren.Reed@Sun.COM 	 */
848*10639SDarren.Reed@Sun.COM 	case BIOCSBLEN:
849*10639SDarren.Reed@Sun.COM 		if (copyin((void *)addr, &size, sizeof (size)) != 0) {
850*10639SDarren.Reed@Sun.COM 			error = EFAULT;
851*10639SDarren.Reed@Sun.COM 			break;
852*10639SDarren.Reed@Sun.COM 		}
853*10639SDarren.Reed@Sun.COM 
854*10639SDarren.Reed@Sun.COM 		mutex_enter(&d->bd_lock);
855*10639SDarren.Reed@Sun.COM 		if (d->bd_bif != 0) {
856*10639SDarren.Reed@Sun.COM 			error = EINVAL;
857*10639SDarren.Reed@Sun.COM 		} else {
858*10639SDarren.Reed@Sun.COM 			if (size > bpf_maxbufsize)
859*10639SDarren.Reed@Sun.COM 				size = bpf_maxbufsize;
860*10639SDarren.Reed@Sun.COM 			else if (size < BPF_MINBUFSIZE)
861*10639SDarren.Reed@Sun.COM 				size = BPF_MINBUFSIZE;
862*10639SDarren.Reed@Sun.COM 
863*10639SDarren.Reed@Sun.COM 			d->bd_bufsize = size;
864*10639SDarren.Reed@Sun.COM 		}
865*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
866*10639SDarren.Reed@Sun.COM 
867*10639SDarren.Reed@Sun.COM 		if (error == 0)
868*10639SDarren.Reed@Sun.COM 			error = copyout(&size, (void *)addr, sizeof (size));
869*10639SDarren.Reed@Sun.COM 		break;
870*10639SDarren.Reed@Sun.COM 
871*10639SDarren.Reed@Sun.COM 	/*
872*10639SDarren.Reed@Sun.COM 	 * Set link layer read filter.
873*10639SDarren.Reed@Sun.COM 	 */
874*10639SDarren.Reed@Sun.COM 	case BIOCSETF:
875*10639SDarren.Reed@Sun.COM 		if (ddi_copyin((void *)addr, &prog, sizeof (prog), mode)) {
876*10639SDarren.Reed@Sun.COM 			error = EFAULT;
877*10639SDarren.Reed@Sun.COM 			break;
878*10639SDarren.Reed@Sun.COM 		}
879*10639SDarren.Reed@Sun.COM 		error = bpf_setf(d, &prog);
880*10639SDarren.Reed@Sun.COM 		break;
881*10639SDarren.Reed@Sun.COM 
882*10639SDarren.Reed@Sun.COM 	/*
883*10639SDarren.Reed@Sun.COM 	 * Flush read packet buffer.
884*10639SDarren.Reed@Sun.COM 	 */
885*10639SDarren.Reed@Sun.COM 	case BIOCFLUSH:
886*10639SDarren.Reed@Sun.COM 		mutex_enter(&d->bd_lock);
887*10639SDarren.Reed@Sun.COM 		reset_d(d);
888*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
889*10639SDarren.Reed@Sun.COM 		break;
890*10639SDarren.Reed@Sun.COM 
891*10639SDarren.Reed@Sun.COM 	/*
892*10639SDarren.Reed@Sun.COM 	 * Put interface into promiscuous mode.
893*10639SDarren.Reed@Sun.COM 	 * This is a one-way ioctl, it is not used to turn promiscuous
894*10639SDarren.Reed@Sun.COM 	 * mode off.
895*10639SDarren.Reed@Sun.COM 	 */
896*10639SDarren.Reed@Sun.COM 	case BIOCPROMISC:
897*10639SDarren.Reed@Sun.COM 		if (d->bd_bif == 0) {
898*10639SDarren.Reed@Sun.COM 			/*
899*10639SDarren.Reed@Sun.COM 			 * No interface attached yet.
900*10639SDarren.Reed@Sun.COM 			 */
901*10639SDarren.Reed@Sun.COM 			error = EINVAL;
902*10639SDarren.Reed@Sun.COM 			break;
903*10639SDarren.Reed@Sun.COM 		}
904*10639SDarren.Reed@Sun.COM 		mutex_enter(&d->bd_lock);
905*10639SDarren.Reed@Sun.COM 		if (d->bd_promisc == 0) {
906*10639SDarren.Reed@Sun.COM 
907*10639SDarren.Reed@Sun.COM 			if (d->bd_promisc_handle) {
908*10639SDarren.Reed@Sun.COM 				uintptr_t mph;
909*10639SDarren.Reed@Sun.COM 
910*10639SDarren.Reed@Sun.COM 				mph = d->bd_promisc_handle;
911*10639SDarren.Reed@Sun.COM 				d->bd_promisc_handle = 0;
912*10639SDarren.Reed@Sun.COM 
913*10639SDarren.Reed@Sun.COM 				mutex_exit(&d->bd_lock);
914*10639SDarren.Reed@Sun.COM 				MBPF_PROMISC_REMOVE(&d->bd_bif->bif_mac, mph);
915*10639SDarren.Reed@Sun.COM 				mutex_enter(&d->bd_lock);
916*10639SDarren.Reed@Sun.COM 			}
917*10639SDarren.Reed@Sun.COM 
918*10639SDarren.Reed@Sun.COM 			d->bd_promisc_flags = MAC_PROMISC_FLAGS_NO_COPY;
919*10639SDarren.Reed@Sun.COM 			error = MBPF_PROMISC_ADD(&d->bd_bif->bif_mac,
920*10639SDarren.Reed@Sun.COM 			    d->bd_mcip, MAC_CLIENT_PROMISC_ALL, d,
921*10639SDarren.Reed@Sun.COM 			    &d->bd_promisc_handle, d->bd_promisc_flags);
922*10639SDarren.Reed@Sun.COM 			if (error == 0)
923*10639SDarren.Reed@Sun.COM 				d->bd_promisc = 1;
924*10639SDarren.Reed@Sun.COM 		}
925*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
926*10639SDarren.Reed@Sun.COM 		break;
927*10639SDarren.Reed@Sun.COM 
928*10639SDarren.Reed@Sun.COM 	/*
929*10639SDarren.Reed@Sun.COM 	 * Get device parameters.
930*10639SDarren.Reed@Sun.COM 	 */
931*10639SDarren.Reed@Sun.COM 	case BIOCGDLT:
932*10639SDarren.Reed@Sun.COM 		if (d->bd_bif == 0)
933*10639SDarren.Reed@Sun.COM 			error = EINVAL;
934*10639SDarren.Reed@Sun.COM 		else
935*10639SDarren.Reed@Sun.COM 			error = copyout(&d->bd_bif->bif_dlt, (void *)addr,
936*10639SDarren.Reed@Sun.COM 			    sizeof (d->bd_bif->bif_dlt));
937*10639SDarren.Reed@Sun.COM 		break;
938*10639SDarren.Reed@Sun.COM 
939*10639SDarren.Reed@Sun.COM 	/*
940*10639SDarren.Reed@Sun.COM 	 * Get a list of supported device parameters.
941*10639SDarren.Reed@Sun.COM 	 */
942*10639SDarren.Reed@Sun.COM 	case BIOCGDLTLIST:
943*10639SDarren.Reed@Sun.COM 		if (d->bd_bif == 0) {
944*10639SDarren.Reed@Sun.COM 			error = EINVAL;
945*10639SDarren.Reed@Sun.COM 		} else {
946*10639SDarren.Reed@Sun.COM 			struct bpf_dltlist list;
947*10639SDarren.Reed@Sun.COM 
948*10639SDarren.Reed@Sun.COM 			if (copyin((void *)addr, &list, sizeof (list)) != 0) {
949*10639SDarren.Reed@Sun.COM 				error = EFAULT;
950*10639SDarren.Reed@Sun.COM 				break;
951*10639SDarren.Reed@Sun.COM 			}
952*10639SDarren.Reed@Sun.COM 			error = bpf_getdltlist(d, &list);
953*10639SDarren.Reed@Sun.COM 			if ((error == 0) &&
954*10639SDarren.Reed@Sun.COM 			    copyout(&list, (void *)addr, sizeof (list)) != 0)
955*10639SDarren.Reed@Sun.COM 				error = EFAULT;
956*10639SDarren.Reed@Sun.COM 		}
957*10639SDarren.Reed@Sun.COM 		break;
958*10639SDarren.Reed@Sun.COM 
959*10639SDarren.Reed@Sun.COM 	/*
960*10639SDarren.Reed@Sun.COM 	 * Set device parameters.
961*10639SDarren.Reed@Sun.COM 	 */
962*10639SDarren.Reed@Sun.COM 	case BIOCSDLT:
963*10639SDarren.Reed@Sun.COM 		error = bpf_setdlt(d, (void *)addr);
964*10639SDarren.Reed@Sun.COM 		break;
965*10639SDarren.Reed@Sun.COM 
966*10639SDarren.Reed@Sun.COM 	/*
967*10639SDarren.Reed@Sun.COM 	 * Get interface name.
968*10639SDarren.Reed@Sun.COM 	 */
969*10639SDarren.Reed@Sun.COM 	case BIOCGETIF:
970*10639SDarren.Reed@Sun.COM 		if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) {
971*10639SDarren.Reed@Sun.COM 			error = EFAULT;
972*10639SDarren.Reed@Sun.COM 			break;
973*10639SDarren.Reed@Sun.COM 		}
974*10639SDarren.Reed@Sun.COM 		error = bpf_ifname(d, ifreq.ifr_name, sizeof (ifreq.ifr_name));
975*10639SDarren.Reed@Sun.COM 		if ((error == 0) &&
976*10639SDarren.Reed@Sun.COM 		    copyout(&ifreq, (void *)addr, sizeof (ifreq)) != 0) {
977*10639SDarren.Reed@Sun.COM 			error = EFAULT;
978*10639SDarren.Reed@Sun.COM 			break;
979*10639SDarren.Reed@Sun.COM 		}
980*10639SDarren.Reed@Sun.COM 		break;
981*10639SDarren.Reed@Sun.COM 
982*10639SDarren.Reed@Sun.COM 	/*
983*10639SDarren.Reed@Sun.COM 	 * Set interface.
984*10639SDarren.Reed@Sun.COM 	 */
985*10639SDarren.Reed@Sun.COM 	case BIOCSETIF:
986*10639SDarren.Reed@Sun.COM 		if (copyin((void *)addr, &ifreq, sizeof (ifreq)) != 0) {
987*10639SDarren.Reed@Sun.COM 			error = EFAULT;
988*10639SDarren.Reed@Sun.COM 			break;
989*10639SDarren.Reed@Sun.COM 		}
990*10639SDarren.Reed@Sun.COM 		error = bpf_setif(d, ifreq.ifr_name, sizeof (ifreq.ifr_name));
991*10639SDarren.Reed@Sun.COM 		break;
992*10639SDarren.Reed@Sun.COM 
993*10639SDarren.Reed@Sun.COM 	/*
994*10639SDarren.Reed@Sun.COM 	 * Get interface name.
995*10639SDarren.Reed@Sun.COM 	 */
996*10639SDarren.Reed@Sun.COM 	case BIOCGETLIF:
997*10639SDarren.Reed@Sun.COM 		if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) {
998*10639SDarren.Reed@Sun.COM 			error = EFAULT;
999*10639SDarren.Reed@Sun.COM 			break;
1000*10639SDarren.Reed@Sun.COM 		}
1001*10639SDarren.Reed@Sun.COM 		error = bpf_ifname(d, lifreq.lifr_name,
1002*10639SDarren.Reed@Sun.COM 		    sizeof (lifreq.lifr_name));
1003*10639SDarren.Reed@Sun.COM 		if ((error == 0) &&
1004*10639SDarren.Reed@Sun.COM 		    copyout(&lifreq, (void *)addr, sizeof (lifreq)) != 0) {
1005*10639SDarren.Reed@Sun.COM 			error = EFAULT;
1006*10639SDarren.Reed@Sun.COM 			break;
1007*10639SDarren.Reed@Sun.COM 		}
1008*10639SDarren.Reed@Sun.COM 		break;
1009*10639SDarren.Reed@Sun.COM 
1010*10639SDarren.Reed@Sun.COM 	/*
1011*10639SDarren.Reed@Sun.COM 	 * Set interface.
1012*10639SDarren.Reed@Sun.COM 	 */
1013*10639SDarren.Reed@Sun.COM 	case BIOCSETLIF:
1014*10639SDarren.Reed@Sun.COM 		if (copyin((void *)addr, &lifreq, sizeof (lifreq)) != 0) {
1015*10639SDarren.Reed@Sun.COM 			error = EFAULT;
1016*10639SDarren.Reed@Sun.COM 			break;
1017*10639SDarren.Reed@Sun.COM 		}
1018*10639SDarren.Reed@Sun.COM 		error = bpf_setif(d, lifreq.lifr_name,
1019*10639SDarren.Reed@Sun.COM 		    sizeof (lifreq.lifr_name));
1020*10639SDarren.Reed@Sun.COM 		break;
1021*10639SDarren.Reed@Sun.COM 
1022*10639SDarren.Reed@Sun.COM #ifdef _SYSCALL32_IMPL
1023*10639SDarren.Reed@Sun.COM 	/*
1024*10639SDarren.Reed@Sun.COM 	 * Set read timeout.
1025*10639SDarren.Reed@Sun.COM 	 */
1026*10639SDarren.Reed@Sun.COM 	case BIOCSRTIMEOUT32:
1027*10639SDarren.Reed@Sun.COM 		{
1028*10639SDarren.Reed@Sun.COM 			struct timeval32 tv;
1029*10639SDarren.Reed@Sun.COM 
1030*10639SDarren.Reed@Sun.COM 			if (copyin((void *)addr, &tv, sizeof (tv)) != 0) {
1031*10639SDarren.Reed@Sun.COM 				error = EFAULT;
1032*10639SDarren.Reed@Sun.COM 				break;
1033*10639SDarren.Reed@Sun.COM 			}
1034*10639SDarren.Reed@Sun.COM 
1035*10639SDarren.Reed@Sun.COM 			/* Convert the timeout in microseconds to ticks */
1036*10639SDarren.Reed@Sun.COM 			d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 +
1037*10639SDarren.Reed@Sun.COM 			    tv.tv_usec);
1038*10639SDarren.Reed@Sun.COM 			if ((d->bd_rtout == 0) && (tv.tv_usec != 0))
1039*10639SDarren.Reed@Sun.COM 				d->bd_rtout = 1;
1040*10639SDarren.Reed@Sun.COM 			break;
1041*10639SDarren.Reed@Sun.COM 		}
1042*10639SDarren.Reed@Sun.COM 
1043*10639SDarren.Reed@Sun.COM 	/*
1044*10639SDarren.Reed@Sun.COM 	 * Get read timeout.
1045*10639SDarren.Reed@Sun.COM 	 */
1046*10639SDarren.Reed@Sun.COM 	case BIOCGRTIMEOUT32:
1047*10639SDarren.Reed@Sun.COM 		{
1048*10639SDarren.Reed@Sun.COM 			struct timeval32 tv;
1049*10639SDarren.Reed@Sun.COM 			clock_t ticks;
1050*10639SDarren.Reed@Sun.COM 
1051*10639SDarren.Reed@Sun.COM 			ticks = drv_hztousec(d->bd_rtout);
1052*10639SDarren.Reed@Sun.COM 			tv.tv_sec = ticks / 1000000;
1053*10639SDarren.Reed@Sun.COM 			tv.tv_usec = ticks - (tv.tv_sec * 1000000);
1054*10639SDarren.Reed@Sun.COM 			error = copyout(&tv, (void *)addr, sizeof (tv));
1055*10639SDarren.Reed@Sun.COM 			break;
1056*10639SDarren.Reed@Sun.COM 		}
1057*10639SDarren.Reed@Sun.COM 
1058*10639SDarren.Reed@Sun.COM 	/*
1059*10639SDarren.Reed@Sun.COM 	 * Get a list of supported device parameters.
1060*10639SDarren.Reed@Sun.COM 	 */
1061*10639SDarren.Reed@Sun.COM 	case BIOCGDLTLIST32:
1062*10639SDarren.Reed@Sun.COM 		if (d->bd_bif == 0) {
1063*10639SDarren.Reed@Sun.COM 			error = EINVAL;
1064*10639SDarren.Reed@Sun.COM 		} else {
1065*10639SDarren.Reed@Sun.COM 			struct bpf_dltlist32 lst32;
1066*10639SDarren.Reed@Sun.COM 			struct bpf_dltlist list;
1067*10639SDarren.Reed@Sun.COM 
1068*10639SDarren.Reed@Sun.COM 			if (copyin((void *)addr, &lst32, sizeof (lst32)) != 0) {
1069*10639SDarren.Reed@Sun.COM 				error = EFAULT;
1070*10639SDarren.Reed@Sun.COM 				break;
1071*10639SDarren.Reed@Sun.COM 			}
1072*10639SDarren.Reed@Sun.COM 
1073*10639SDarren.Reed@Sun.COM 			list.bfl_len = lst32.bfl_len;
1074*10639SDarren.Reed@Sun.COM 			list.bfl_list = (void *)(uint64_t)lst32.bfl_list;
1075*10639SDarren.Reed@Sun.COM 			error = bpf_getdltlist(d, &list);
1076*10639SDarren.Reed@Sun.COM 			if (error == 0) {
1077*10639SDarren.Reed@Sun.COM 				lst32.bfl_len = list.bfl_len;
1078*10639SDarren.Reed@Sun.COM 
1079*10639SDarren.Reed@Sun.COM 				if (copyout(&lst32, (void *)addr,
1080*10639SDarren.Reed@Sun.COM 				    sizeof (lst32)) != 0)
1081*10639SDarren.Reed@Sun.COM 					error = EFAULT;
1082*10639SDarren.Reed@Sun.COM 			}
1083*10639SDarren.Reed@Sun.COM 		}
1084*10639SDarren.Reed@Sun.COM 		break;
1085*10639SDarren.Reed@Sun.COM 
1086*10639SDarren.Reed@Sun.COM 	/*
1087*10639SDarren.Reed@Sun.COM 	 * Set link layer read filter.
1088*10639SDarren.Reed@Sun.COM 	 */
1089*10639SDarren.Reed@Sun.COM 	case BIOCSETF32: {
1090*10639SDarren.Reed@Sun.COM 		struct bpf_program32 prog32;
1091*10639SDarren.Reed@Sun.COM 
1092*10639SDarren.Reed@Sun.COM 		if (ddi_copyin((void *)addr, &prog32, sizeof (prog), mode)) {
1093*10639SDarren.Reed@Sun.COM 			error = EFAULT;
1094*10639SDarren.Reed@Sun.COM 			break;
1095*10639SDarren.Reed@Sun.COM 		}
1096*10639SDarren.Reed@Sun.COM 		prog.bf_len = prog32.bf_len;
1097*10639SDarren.Reed@Sun.COM 		prog.bf_insns = (void *)(uint64_t)prog32.bf_insns;
1098*10639SDarren.Reed@Sun.COM 		error = bpf_setf(d, &prog);
1099*10639SDarren.Reed@Sun.COM 		break;
1100*10639SDarren.Reed@Sun.COM 	}
1101*10639SDarren.Reed@Sun.COM #endif
1102*10639SDarren.Reed@Sun.COM 
1103*10639SDarren.Reed@Sun.COM 	/*
1104*10639SDarren.Reed@Sun.COM 	 * Set read timeout.
1105*10639SDarren.Reed@Sun.COM 	 */
1106*10639SDarren.Reed@Sun.COM 	case BIOCSRTIMEOUT:
1107*10639SDarren.Reed@Sun.COM 		{
1108*10639SDarren.Reed@Sun.COM 			struct timeval tv;
1109*10639SDarren.Reed@Sun.COM 
1110*10639SDarren.Reed@Sun.COM 			if (copyin((void *)addr, &tv, sizeof (tv)) != 0) {
1111*10639SDarren.Reed@Sun.COM 				error = EFAULT;
1112*10639SDarren.Reed@Sun.COM 				break;
1113*10639SDarren.Reed@Sun.COM 			}
1114*10639SDarren.Reed@Sun.COM 
1115*10639SDarren.Reed@Sun.COM 			/* Convert the timeout in microseconds to ticks */
1116*10639SDarren.Reed@Sun.COM 			d->bd_rtout = drv_usectohz(tv.tv_sec * 1000000 +
1117*10639SDarren.Reed@Sun.COM 			    tv.tv_usec);
1118*10639SDarren.Reed@Sun.COM 			if ((d->bd_rtout == 0) && (tv.tv_usec != 0))
1119*10639SDarren.Reed@Sun.COM 				d->bd_rtout = 1;
1120*10639SDarren.Reed@Sun.COM 			break;
1121*10639SDarren.Reed@Sun.COM 		}
1122*10639SDarren.Reed@Sun.COM 
1123*10639SDarren.Reed@Sun.COM 	/*
1124*10639SDarren.Reed@Sun.COM 	 * Get read timeout.
1125*10639SDarren.Reed@Sun.COM 	 */
1126*10639SDarren.Reed@Sun.COM 	case BIOCGRTIMEOUT:
1127*10639SDarren.Reed@Sun.COM 		{
1128*10639SDarren.Reed@Sun.COM 			struct timeval tv;
1129*10639SDarren.Reed@Sun.COM 			clock_t ticks;
1130*10639SDarren.Reed@Sun.COM 
1131*10639SDarren.Reed@Sun.COM 			ticks = drv_hztousec(d->bd_rtout);
1132*10639SDarren.Reed@Sun.COM 			tv.tv_sec = ticks / 1000000;
1133*10639SDarren.Reed@Sun.COM 			tv.tv_usec = ticks - (tv.tv_sec * 1000000);
1134*10639SDarren.Reed@Sun.COM 			if (copyout(&tv, (void *)addr, sizeof (tv)) != 0)
1135*10639SDarren.Reed@Sun.COM 				error = EFAULT;
1136*10639SDarren.Reed@Sun.COM 			break;
1137*10639SDarren.Reed@Sun.COM 		}
1138*10639SDarren.Reed@Sun.COM 
1139*10639SDarren.Reed@Sun.COM 	/*
1140*10639SDarren.Reed@Sun.COM 	 * Get packet stats.
1141*10639SDarren.Reed@Sun.COM 	 */
1142*10639SDarren.Reed@Sun.COM 	case BIOCGSTATS:
1143*10639SDarren.Reed@Sun.COM 		{
1144*10639SDarren.Reed@Sun.COM 			struct bpf_stat bs;
1145*10639SDarren.Reed@Sun.COM 
1146*10639SDarren.Reed@Sun.COM 			bs.bs_recv = d->bd_rcount;
1147*10639SDarren.Reed@Sun.COM 			bs.bs_drop = d->bd_dcount;
1148*10639SDarren.Reed@Sun.COM 			bs.bs_capt = d->bd_ccount;
1149*10639SDarren.Reed@Sun.COM 			if (copyout(&bs, (void *)addr, sizeof (bs)) != 0)
1150*10639SDarren.Reed@Sun.COM 				error = EFAULT;
1151*10639SDarren.Reed@Sun.COM 			break;
1152*10639SDarren.Reed@Sun.COM 		}
1153*10639SDarren.Reed@Sun.COM 
1154*10639SDarren.Reed@Sun.COM 	/*
1155*10639SDarren.Reed@Sun.COM 	 * Set immediate mode.
1156*10639SDarren.Reed@Sun.COM 	 */
1157*10639SDarren.Reed@Sun.COM 	case BIOCIMMEDIATE:
1158*10639SDarren.Reed@Sun.COM 		if (copyin((void *)addr, &d->bd_immediate,
1159*10639SDarren.Reed@Sun.COM 		    sizeof (d->bd_immediate)) != 0)
1160*10639SDarren.Reed@Sun.COM 			error = EFAULT;
1161*10639SDarren.Reed@Sun.COM 		break;
1162*10639SDarren.Reed@Sun.COM 
1163*10639SDarren.Reed@Sun.COM 	case BIOCVERSION:
1164*10639SDarren.Reed@Sun.COM 		{
1165*10639SDarren.Reed@Sun.COM 			struct bpf_version bv;
1166*10639SDarren.Reed@Sun.COM 
1167*10639SDarren.Reed@Sun.COM 			bv.bv_major = BPF_MAJOR_VERSION;
1168*10639SDarren.Reed@Sun.COM 			bv.bv_minor = BPF_MINOR_VERSION;
1169*10639SDarren.Reed@Sun.COM 			if (copyout(&bv, (void *)addr, sizeof (bv)) != 0)
1170*10639SDarren.Reed@Sun.COM 				error = EFAULT;
1171*10639SDarren.Reed@Sun.COM 			break;
1172*10639SDarren.Reed@Sun.COM 		}
1173*10639SDarren.Reed@Sun.COM 
1174*10639SDarren.Reed@Sun.COM 	case BIOCGHDRCMPLT:	/* get "header already complete" flag */
1175*10639SDarren.Reed@Sun.COM 		if (copyout(&d->bd_hdrcmplt, (void *)addr,
1176*10639SDarren.Reed@Sun.COM 		    sizeof (d->bd_hdrcmplt)) != 0)
1177*10639SDarren.Reed@Sun.COM 			error = EFAULT;
1178*10639SDarren.Reed@Sun.COM 		break;
1179*10639SDarren.Reed@Sun.COM 
1180*10639SDarren.Reed@Sun.COM 	case BIOCSHDRCMPLT:	/* set "header already complete" flag */
1181*10639SDarren.Reed@Sun.COM 		if (copyin((void *)addr, &d->bd_hdrcmplt,
1182*10639SDarren.Reed@Sun.COM 		    sizeof (d->bd_hdrcmplt)) != 0)
1183*10639SDarren.Reed@Sun.COM 			error = EFAULT;
1184*10639SDarren.Reed@Sun.COM 		break;
1185*10639SDarren.Reed@Sun.COM 
1186*10639SDarren.Reed@Sun.COM 	/*
1187*10639SDarren.Reed@Sun.COM 	 * Get "see sent packets" flag
1188*10639SDarren.Reed@Sun.COM 	 */
1189*10639SDarren.Reed@Sun.COM 	case BIOCGSEESENT:
1190*10639SDarren.Reed@Sun.COM 		if (copyout(&d->bd_seesent, (void *)addr,
1191*10639SDarren.Reed@Sun.COM 		    sizeof (d->bd_seesent)) != 0)
1192*10639SDarren.Reed@Sun.COM 			error = EFAULT;
1193*10639SDarren.Reed@Sun.COM 		break;
1194*10639SDarren.Reed@Sun.COM 
1195*10639SDarren.Reed@Sun.COM 	/*
1196*10639SDarren.Reed@Sun.COM 	 * Set "see sent" packets flag
1197*10639SDarren.Reed@Sun.COM 	 */
1198*10639SDarren.Reed@Sun.COM 	case BIOCSSEESENT:
1199*10639SDarren.Reed@Sun.COM 		if (copyin((void *)addr, &d->bd_seesent,
1200*10639SDarren.Reed@Sun.COM 		    sizeof (d->bd_seesent)) != 0)
1201*10639SDarren.Reed@Sun.COM 			error = EFAULT;
1202*10639SDarren.Reed@Sun.COM 		break;
1203*10639SDarren.Reed@Sun.COM 
1204*10639SDarren.Reed@Sun.COM 	case FIONBIO:		/* Non-blocking I/O */
1205*10639SDarren.Reed@Sun.COM 		if (copyin((void *)addr, &d->bd_nonblock,
1206*10639SDarren.Reed@Sun.COM 		    sizeof (d->bd_nonblock)) != 0)
1207*10639SDarren.Reed@Sun.COM 			error = EFAULT;
1208*10639SDarren.Reed@Sun.COM 		break;
1209*10639SDarren.Reed@Sun.COM 	}
1210*10639SDarren.Reed@Sun.COM 	return (error);
1211*10639SDarren.Reed@Sun.COM }
1212*10639SDarren.Reed@Sun.COM 
1213*10639SDarren.Reed@Sun.COM /*
1214*10639SDarren.Reed@Sun.COM  * Set d's packet filter program to fp.  If this file already has a filter,
1215*10639SDarren.Reed@Sun.COM  * free it and replace it. If the new filter is "empty" (has a 0 size), then
1216*10639SDarren.Reed@Sun.COM  * the result is to just remove and free the existing filter.
1217*10639SDarren.Reed@Sun.COM  * Returns EINVAL for bogus requests.
1218*10639SDarren.Reed@Sun.COM  */
1219*10639SDarren.Reed@Sun.COM int
1220*10639SDarren.Reed@Sun.COM bpf_setf(struct bpf_d *d, struct bpf_program *fp)
1221*10639SDarren.Reed@Sun.COM {
1222*10639SDarren.Reed@Sun.COM 	struct bpf_insn *fcode, *old;
1223*10639SDarren.Reed@Sun.COM 	uint_t flen, size;
1224*10639SDarren.Reed@Sun.COM 	size_t oldsize;
1225*10639SDarren.Reed@Sun.COM 
1226*10639SDarren.Reed@Sun.COM 	if (fp->bf_insns == 0) {
1227*10639SDarren.Reed@Sun.COM 		if (fp->bf_len != 0)
1228*10639SDarren.Reed@Sun.COM 			return (EINVAL);
1229*10639SDarren.Reed@Sun.COM 		mutex_enter(&d->bd_lock);
1230*10639SDarren.Reed@Sun.COM 		old = d->bd_filter;
1231*10639SDarren.Reed@Sun.COM 		oldsize = d->bd_filter_size;
1232*10639SDarren.Reed@Sun.COM 		d->bd_filter = 0;
1233*10639SDarren.Reed@Sun.COM 		d->bd_filter_size = 0;
1234*10639SDarren.Reed@Sun.COM 		reset_d(d);
1235*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1236*10639SDarren.Reed@Sun.COM 		if (old != 0)
1237*10639SDarren.Reed@Sun.COM 			kmem_free(old, oldsize);
1238*10639SDarren.Reed@Sun.COM 		return (0);
1239*10639SDarren.Reed@Sun.COM 	}
1240*10639SDarren.Reed@Sun.COM 	flen = fp->bf_len;
1241*10639SDarren.Reed@Sun.COM 	if (flen > BPF_MAXINSNS)
1242*10639SDarren.Reed@Sun.COM 		return (EINVAL);
1243*10639SDarren.Reed@Sun.COM 
1244*10639SDarren.Reed@Sun.COM 	size = flen * sizeof (*fp->bf_insns);
1245*10639SDarren.Reed@Sun.COM 	fcode = kmem_alloc(size, KM_SLEEP);
1246*10639SDarren.Reed@Sun.COM 	if (copyin(fp->bf_insns, fcode, size) != 0)
1247*10639SDarren.Reed@Sun.COM 		return (EFAULT);
1248*10639SDarren.Reed@Sun.COM 
1249*10639SDarren.Reed@Sun.COM 	if (bpf_validate(fcode, (int)flen)) {
1250*10639SDarren.Reed@Sun.COM 		mutex_enter(&d->bd_lock);
1251*10639SDarren.Reed@Sun.COM 		old = d->bd_filter;
1252*10639SDarren.Reed@Sun.COM 		oldsize = d->bd_filter_size;
1253*10639SDarren.Reed@Sun.COM 		d->bd_filter = fcode;
1254*10639SDarren.Reed@Sun.COM 		d->bd_filter_size = size;
1255*10639SDarren.Reed@Sun.COM 		reset_d(d);
1256*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1257*10639SDarren.Reed@Sun.COM 		if (old != 0)
1258*10639SDarren.Reed@Sun.COM 			kmem_free(old, oldsize);
1259*10639SDarren.Reed@Sun.COM 
1260*10639SDarren.Reed@Sun.COM 		return (0);
1261*10639SDarren.Reed@Sun.COM 	}
1262*10639SDarren.Reed@Sun.COM 	kmem_free(fcode, size);
1263*10639SDarren.Reed@Sun.COM 	return (EINVAL);
1264*10639SDarren.Reed@Sun.COM }
1265*10639SDarren.Reed@Sun.COM 
1266*10639SDarren.Reed@Sun.COM /*
1267*10639SDarren.Reed@Sun.COM  * Detach a file from its current interface (if attached at all) and attach
1268*10639SDarren.Reed@Sun.COM  * to the interface indicated by the name stored in ifr.
1269*10639SDarren.Reed@Sun.COM  * Return an errno or 0.
1270*10639SDarren.Reed@Sun.COM  */
1271*10639SDarren.Reed@Sun.COM static int
1272*10639SDarren.Reed@Sun.COM bpf_setif(struct bpf_d *d, char *ifname, int namesize)
1273*10639SDarren.Reed@Sun.COM {
1274*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
1275*10639SDarren.Reed@Sun.COM 	int unit_seen;
1276*10639SDarren.Reed@Sun.COM 	char *cp;
1277*10639SDarren.Reed@Sun.COM 	int i;
1278*10639SDarren.Reed@Sun.COM 
1279*10639SDarren.Reed@Sun.COM 	/*
1280*10639SDarren.Reed@Sun.COM 	 * Make sure the provided name has a unit number, and default
1281*10639SDarren.Reed@Sun.COM 	 * it to '0' if not specified.
1282*10639SDarren.Reed@Sun.COM 	 * XXX This is ugly ... do this differently?
1283*10639SDarren.Reed@Sun.COM 	 */
1284*10639SDarren.Reed@Sun.COM 	unit_seen = 0;
1285*10639SDarren.Reed@Sun.COM 	cp = ifname;
1286*10639SDarren.Reed@Sun.COM 	cp[namesize - 1] = '\0';	/* sanity */
1287*10639SDarren.Reed@Sun.COM 	while (*cp++)
1288*10639SDarren.Reed@Sun.COM 		if (*cp >= '0' && *cp <= '9')
1289*10639SDarren.Reed@Sun.COM 			unit_seen = 1;
1290*10639SDarren.Reed@Sun.COM 	if (!unit_seen) {
1291*10639SDarren.Reed@Sun.COM 		/* Make sure to leave room for the '\0'. */
1292*10639SDarren.Reed@Sun.COM 		for (i = 0; i < (namesize - 1); ++i) {
1293*10639SDarren.Reed@Sun.COM 			if ((ifname[i] >= 'a' && ifname[i] <= 'z') ||
1294*10639SDarren.Reed@Sun.COM 			    (ifname[i] >= 'A' && ifname[i] <= 'Z'))
1295*10639SDarren.Reed@Sun.COM 				continue;
1296*10639SDarren.Reed@Sun.COM 			ifname[i] = '0';
1297*10639SDarren.Reed@Sun.COM 		}
1298*10639SDarren.Reed@Sun.COM 	}
1299*10639SDarren.Reed@Sun.COM 
1300*10639SDarren.Reed@Sun.COM 	/*
1301*10639SDarren.Reed@Sun.COM 	 * Make sure that only one call to this function happens at a time
1302*10639SDarren.Reed@Sun.COM 	 * and that we're not interleaving a read/write
1303*10639SDarren.Reed@Sun.COM 	 */
1304*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
1305*10639SDarren.Reed@Sun.COM 	while (d->bd_inuse != 0) {
1306*10639SDarren.Reed@Sun.COM 		d->bd_waiting++;
1307*10639SDarren.Reed@Sun.COM 		if (cv_wait_sig(&d->bd_wait, &d->bd_lock) <= 0) {
1308*10639SDarren.Reed@Sun.COM 			d->bd_waiting--;
1309*10639SDarren.Reed@Sun.COM 			mutex_exit(&d->bd_lock);
1310*10639SDarren.Reed@Sun.COM 			return (EINTR);
1311*10639SDarren.Reed@Sun.COM 		}
1312*10639SDarren.Reed@Sun.COM 		d->bd_waiting--;
1313*10639SDarren.Reed@Sun.COM 	}
1314*10639SDarren.Reed@Sun.COM 	d->bd_inuse = -1;
1315*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
1316*10639SDarren.Reed@Sun.COM 
1317*10639SDarren.Reed@Sun.COM 	/*
1318*10639SDarren.Reed@Sun.COM 	 * Look through attached interfaces for the named one.
1319*10639SDarren.Reed@Sun.COM 	 *
1320*10639SDarren.Reed@Sun.COM 	 * The search is done twice - once
1321*10639SDarren.Reed@Sun.COM 	 */
1322*10639SDarren.Reed@Sun.COM 	mutex_enter(&bpf_mtx);
1323*10639SDarren.Reed@Sun.COM 
1324*10639SDarren.Reed@Sun.COM 	bp = bpf_findif(d, ifname, -1);
1325*10639SDarren.Reed@Sun.COM 
1326*10639SDarren.Reed@Sun.COM 	if (bp != NULL) {
1327*10639SDarren.Reed@Sun.COM 		int error = 0;
1328*10639SDarren.Reed@Sun.COM 
1329*10639SDarren.Reed@Sun.COM 		if (d->bd_sbuf == 0)
1330*10639SDarren.Reed@Sun.COM 			error = bpf_allocbufs(d);
1331*10639SDarren.Reed@Sun.COM 
1332*10639SDarren.Reed@Sun.COM 		/*
1333*10639SDarren.Reed@Sun.COM 		 * We found the requested interface.
1334*10639SDarren.Reed@Sun.COM 		 * If we're already attached to requested interface,
1335*10639SDarren.Reed@Sun.COM 		 * just flush the buffer.
1336*10639SDarren.Reed@Sun.COM 		 */
1337*10639SDarren.Reed@Sun.COM 		mutex_enter(&d->bd_lock);
1338*10639SDarren.Reed@Sun.COM 		if (error == 0 && bp != d->bd_bif) {
1339*10639SDarren.Reed@Sun.COM 			if (d->bd_bif)
1340*10639SDarren.Reed@Sun.COM 				/*
1341*10639SDarren.Reed@Sun.COM 				 * Detach if attached to something else.
1342*10639SDarren.Reed@Sun.COM 				 */
1343*10639SDarren.Reed@Sun.COM 				bpf_detachd(d);
1344*10639SDarren.Reed@Sun.COM 
1345*10639SDarren.Reed@Sun.COM 			bpf_attachd(d, bp);
1346*10639SDarren.Reed@Sun.COM 		}
1347*10639SDarren.Reed@Sun.COM 		reset_d(d);
1348*10639SDarren.Reed@Sun.COM 		d->bd_inuse = 0;
1349*10639SDarren.Reed@Sun.COM 		if (d->bd_waiting != 0)
1350*10639SDarren.Reed@Sun.COM 			cv_signal(&d->bd_wait);
1351*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1352*10639SDarren.Reed@Sun.COM 		mutex_exit(&bpf_mtx);
1353*10639SDarren.Reed@Sun.COM 		return (error);
1354*10639SDarren.Reed@Sun.COM 	}
1355*10639SDarren.Reed@Sun.COM 
1356*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
1357*10639SDarren.Reed@Sun.COM 	d->bd_inuse = 0;
1358*10639SDarren.Reed@Sun.COM 	if (d->bd_waiting != 0)
1359*10639SDarren.Reed@Sun.COM 		cv_signal(&d->bd_wait);
1360*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
1361*10639SDarren.Reed@Sun.COM 	mutex_exit(&bpf_mtx);
1362*10639SDarren.Reed@Sun.COM 
1363*10639SDarren.Reed@Sun.COM 	/*
1364*10639SDarren.Reed@Sun.COM 	 * Try tickle the mac layer into attaching the device...
1365*10639SDarren.Reed@Sun.COM 	 */
1366*10639SDarren.Reed@Sun.COM 	return (bpf_provider_tickle(ifname, d->bd_zone));
1367*10639SDarren.Reed@Sun.COM }
1368*10639SDarren.Reed@Sun.COM 
1369*10639SDarren.Reed@Sun.COM /*
1370*10639SDarren.Reed@Sun.COM  * Copy the interface name to the ifreq.
1371*10639SDarren.Reed@Sun.COM  */
1372*10639SDarren.Reed@Sun.COM static int
1373*10639SDarren.Reed@Sun.COM bpf_ifname(struct bpf_d *d, char *buffer, int bufsize)
1374*10639SDarren.Reed@Sun.COM {
1375*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
1376*10639SDarren.Reed@Sun.COM 
1377*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
1378*10639SDarren.Reed@Sun.COM 	bp = d->bd_bif;
1379*10639SDarren.Reed@Sun.COM 	if (bp == NULL) {
1380*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1381*10639SDarren.Reed@Sun.COM 		return (EINVAL);
1382*10639SDarren.Reed@Sun.COM 	}
1383*10639SDarren.Reed@Sun.COM 
1384*10639SDarren.Reed@Sun.COM 	(void) strlcpy(buffer, bp->bif_ifname, bufsize);
1385*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
1386*10639SDarren.Reed@Sun.COM 
1387*10639SDarren.Reed@Sun.COM 	return (0);
1388*10639SDarren.Reed@Sun.COM }
1389*10639SDarren.Reed@Sun.COM 
1390*10639SDarren.Reed@Sun.COM /*
1391*10639SDarren.Reed@Sun.COM  * Support for poll() system call
1392*10639SDarren.Reed@Sun.COM  *
1393*10639SDarren.Reed@Sun.COM  * Return true iff the specific operation will not block indefinitely - with
1394*10639SDarren.Reed@Sun.COM  * the assumption that it is safe to positively acknowledge a request for the
1395*10639SDarren.Reed@Sun.COM  * ability to write to the BPF device.
1396*10639SDarren.Reed@Sun.COM  * Otherwise, return false but make a note that a selnotify() must be done.
1397*10639SDarren.Reed@Sun.COM  */
1398*10639SDarren.Reed@Sun.COM int
1399*10639SDarren.Reed@Sun.COM bpfchpoll(dev_t dev, short events, int anyyet, short *reventsp,
1400*10639SDarren.Reed@Sun.COM     struct pollhead **phpp)
1401*10639SDarren.Reed@Sun.COM {
1402*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = bpf_dev_get(getminor(dev));
1403*10639SDarren.Reed@Sun.COM 
1404*10639SDarren.Reed@Sun.COM 	if (events & (POLLIN | POLLRDNORM)) {
1405*10639SDarren.Reed@Sun.COM 		/*
1406*10639SDarren.Reed@Sun.COM 		 * An imitation of the FIONREAD ioctl code.
1407*10639SDarren.Reed@Sun.COM 		 */
1408*10639SDarren.Reed@Sun.COM 		mutex_enter(&d->bd_lock);
1409*10639SDarren.Reed@Sun.COM 		if (d->bd_hlen != 0 ||
1410*10639SDarren.Reed@Sun.COM 		    ((d->bd_immediate || d->bd_state == BPF_TIMED_OUT) &&
1411*10639SDarren.Reed@Sun.COM 		    d->bd_slen != 0)) {
1412*10639SDarren.Reed@Sun.COM 			*reventsp |= events & (POLLIN | POLLRDNORM);
1413*10639SDarren.Reed@Sun.COM 		} else {
1414*10639SDarren.Reed@Sun.COM 			*reventsp = 0;
1415*10639SDarren.Reed@Sun.COM 			if (!anyyet)
1416*10639SDarren.Reed@Sun.COM 				*phpp = &d->bd_poll;
1417*10639SDarren.Reed@Sun.COM 			/* Start the read timeout if necessary */
1418*10639SDarren.Reed@Sun.COM 			if (d->bd_rtout > 0 && d->bd_state == BPF_IDLE) {
1419*10639SDarren.Reed@Sun.COM 				bpf_clear_timeout(d);
1420*10639SDarren.Reed@Sun.COM 				/*
1421*10639SDarren.Reed@Sun.COM 				 * Only allow the timeout to be set once.
1422*10639SDarren.Reed@Sun.COM 				 */
1423*10639SDarren.Reed@Sun.COM 				if (d->bd_callout == 0)
1424*10639SDarren.Reed@Sun.COM 					d->bd_callout = timeout(bpf_timed_out,
1425*10639SDarren.Reed@Sun.COM 					    d, d->bd_rtout);
1426*10639SDarren.Reed@Sun.COM 				d->bd_state = BPF_WAITING;
1427*10639SDarren.Reed@Sun.COM 			}
1428*10639SDarren.Reed@Sun.COM 		}
1429*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1430*10639SDarren.Reed@Sun.COM 	}
1431*10639SDarren.Reed@Sun.COM 
1432*10639SDarren.Reed@Sun.COM 	return (0);
1433*10639SDarren.Reed@Sun.COM }
1434*10639SDarren.Reed@Sun.COM 
1435*10639SDarren.Reed@Sun.COM /*
1436*10639SDarren.Reed@Sun.COM  * Copy data from an mblk_t chain into a buffer. This works for ipnet
1437*10639SDarren.Reed@Sun.COM  * because the dl_ipnetinfo_t is placed in an mblk_t that leads the
1438*10639SDarren.Reed@Sun.COM  * packet itself.
1439*10639SDarren.Reed@Sun.COM  */
1440*10639SDarren.Reed@Sun.COM static void *
1441*10639SDarren.Reed@Sun.COM bpf_mcpy(void *dst_arg, const void *src_arg, size_t len)
1442*10639SDarren.Reed@Sun.COM {
1443*10639SDarren.Reed@Sun.COM 	const mblk_t *m;
1444*10639SDarren.Reed@Sun.COM 	uint_t count;
1445*10639SDarren.Reed@Sun.COM 	uchar_t *dst;
1446*10639SDarren.Reed@Sun.COM 
1447*10639SDarren.Reed@Sun.COM 	m = src_arg;
1448*10639SDarren.Reed@Sun.COM 	dst = dst_arg;
1449*10639SDarren.Reed@Sun.COM 	while (len > 0) {
1450*10639SDarren.Reed@Sun.COM 		if (m == NULL)
1451*10639SDarren.Reed@Sun.COM 			panic("bpf_mcpy");
1452*10639SDarren.Reed@Sun.COM 		count = (uint_t)min(M_LEN(m), len);
1453*10639SDarren.Reed@Sun.COM 		(void) memcpy(dst, mtod(m, const void *), count);
1454*10639SDarren.Reed@Sun.COM 		m = m->b_cont;
1455*10639SDarren.Reed@Sun.COM 		dst += count;
1456*10639SDarren.Reed@Sun.COM 		len -= count;
1457*10639SDarren.Reed@Sun.COM 	}
1458*10639SDarren.Reed@Sun.COM 	return (dst_arg);
1459*10639SDarren.Reed@Sun.COM }
1460*10639SDarren.Reed@Sun.COM 
1461*10639SDarren.Reed@Sun.COM /*
1462*10639SDarren.Reed@Sun.COM  * Dispatch a packet to all the listeners on interface bp.
1463*10639SDarren.Reed@Sun.COM  *
1464*10639SDarren.Reed@Sun.COM  * marg    pointer to the packet, either a data buffer or an mbuf chain
1465*10639SDarren.Reed@Sun.COM  * buflen  buffer length, if marg is a data buffer
1466*10639SDarren.Reed@Sun.COM  * cpfn    a function that can copy marg into the listener's buffer
1467*10639SDarren.Reed@Sun.COM  * pktlen  length of the packet
1468*10639SDarren.Reed@Sun.COM  * issent  boolean indicating whether the packet was sent or receive
1469*10639SDarren.Reed@Sun.COM  */
1470*10639SDarren.Reed@Sun.COM static inline void
1471*10639SDarren.Reed@Sun.COM bpf_deliver(struct bpf_d *d, cp_fn_t cpfn, void *marg, uint_t pktlen,
1472*10639SDarren.Reed@Sun.COM     uint_t buflen, boolean_t issent)
1473*10639SDarren.Reed@Sun.COM {
1474*10639SDarren.Reed@Sun.COM 	struct timeval tv;
1475*10639SDarren.Reed@Sun.COM 	uint_t slen;
1476*10639SDarren.Reed@Sun.COM 
1477*10639SDarren.Reed@Sun.COM 	if (!d->bd_seesent && issent)
1478*10639SDarren.Reed@Sun.COM 		return;
1479*10639SDarren.Reed@Sun.COM 
1480*10639SDarren.Reed@Sun.COM 	/*
1481*10639SDarren.Reed@Sun.COM 	 * Accuracy of the packet counters in BPF is vital so it
1482*10639SDarren.Reed@Sun.COM 	 * is important to protect even the outer ones.
1483*10639SDarren.Reed@Sun.COM 	 */
1484*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
1485*10639SDarren.Reed@Sun.COM 	slen = bpf_filter(d->bd_filter, marg, pktlen, buflen);
1486*10639SDarren.Reed@Sun.COM 	DTRACE_PROBE5(bpf__packet, struct bpf_if *, d->bd_bif,
1487*10639SDarren.Reed@Sun.COM 	    struct bpf_d *, d, void *, marg, uint_t, pktlen, uint_t, slen);
1488*10639SDarren.Reed@Sun.COM 	d->bd_rcount++;
1489*10639SDarren.Reed@Sun.COM 	ks_stats.kp_receive.value.ui64++;
1490*10639SDarren.Reed@Sun.COM 	if (slen != 0) {
1491*10639SDarren.Reed@Sun.COM 		uniqtime(&tv);
1492*10639SDarren.Reed@Sun.COM 		catchpacket(d, marg, pktlen, slen, cpfn, &tv);
1493*10639SDarren.Reed@Sun.COM 	}
1494*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
1495*10639SDarren.Reed@Sun.COM }
1496*10639SDarren.Reed@Sun.COM 
1497*10639SDarren.Reed@Sun.COM /*
1498*10639SDarren.Reed@Sun.COM  * Incoming linkage from device drivers.
1499*10639SDarren.Reed@Sun.COM  */
1500*10639SDarren.Reed@Sun.COM /* ARGSUSED */
1501*10639SDarren.Reed@Sun.COM void
1502*10639SDarren.Reed@Sun.COM bpf_mtap(void *arg, mac_resource_handle_t mrh, mblk_t *m, boolean_t issent)
1503*10639SDarren.Reed@Sun.COM {
1504*10639SDarren.Reed@Sun.COM 	cp_fn_t cpfn;
1505*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = arg;
1506*10639SDarren.Reed@Sun.COM 	uint_t pktlen, buflen;
1507*10639SDarren.Reed@Sun.COM 	void *marg;
1508*10639SDarren.Reed@Sun.COM 
1509*10639SDarren.Reed@Sun.COM 	pktlen = msgdsize(m);
1510*10639SDarren.Reed@Sun.COM 
1511*10639SDarren.Reed@Sun.COM 	if (pktlen == M_LEN(m)) {
1512*10639SDarren.Reed@Sun.COM 		cpfn = (cp_fn_t)memcpy;
1513*10639SDarren.Reed@Sun.COM 		marg = mtod(m, void *);
1514*10639SDarren.Reed@Sun.COM 		buflen = pktlen;
1515*10639SDarren.Reed@Sun.COM 	} else {
1516*10639SDarren.Reed@Sun.COM 		cpfn = bpf_mcpy;
1517*10639SDarren.Reed@Sun.COM 		marg = m;
1518*10639SDarren.Reed@Sun.COM 		buflen = 0;
1519*10639SDarren.Reed@Sun.COM 	}
1520*10639SDarren.Reed@Sun.COM 
1521*10639SDarren.Reed@Sun.COM 	bpf_deliver(d, cpfn, marg, pktlen, buflen, issent);
1522*10639SDarren.Reed@Sun.COM }
1523*10639SDarren.Reed@Sun.COM 
1524*10639SDarren.Reed@Sun.COM /*
1525*10639SDarren.Reed@Sun.COM  * Incoming linkage from ipnet.
1526*10639SDarren.Reed@Sun.COM  * In ipnet, there is only one event, NH_OBSERVE, that delivers packets
1527*10639SDarren.Reed@Sun.COM  * from all network interfaces. Thus the tap function needs to apply a
1528*10639SDarren.Reed@Sun.COM  * filter using the interface index/id to immitate snoop'ing on just the
1529*10639SDarren.Reed@Sun.COM  * specified interface.
1530*10639SDarren.Reed@Sun.COM  */
1531*10639SDarren.Reed@Sun.COM /* ARGSUSED */
1532*10639SDarren.Reed@Sun.COM void
1533*10639SDarren.Reed@Sun.COM bpf_itap(void *arg, mblk_t *m, boolean_t issent, uint_t length)
1534*10639SDarren.Reed@Sun.COM {
1535*10639SDarren.Reed@Sun.COM 	hook_pkt_observe_t *hdr;
1536*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = arg;
1537*10639SDarren.Reed@Sun.COM 
1538*10639SDarren.Reed@Sun.COM 	hdr = (hook_pkt_observe_t *)m->b_rptr;
1539*10639SDarren.Reed@Sun.COM 	if (ntohl(hdr->hpo_ifindex) != d->bd_bif->bif_linkid)
1540*10639SDarren.Reed@Sun.COM 		return;
1541*10639SDarren.Reed@Sun.COM 	bpf_deliver(d, bpf_mcpy, m, length, 0, issent);
1542*10639SDarren.Reed@Sun.COM 
1543*10639SDarren.Reed@Sun.COM }
1544*10639SDarren.Reed@Sun.COM 
1545*10639SDarren.Reed@Sun.COM /*
1546*10639SDarren.Reed@Sun.COM  * Move the packet data from interface memory (pkt) into the
1547*10639SDarren.Reed@Sun.COM  * store buffer.  Return 1 if it's time to wakeup a listener (buffer full),
1548*10639SDarren.Reed@Sun.COM  * otherwise 0.  "copy" is the routine called to do the actual data
1549*10639SDarren.Reed@Sun.COM  * transfer.  memcpy is passed in to copy contiguous chunks, while
1550*10639SDarren.Reed@Sun.COM  * bpf_mcpy is passed in to copy mbuf chains.  In the latter case,
1551*10639SDarren.Reed@Sun.COM  * pkt is really an mbuf.
1552*10639SDarren.Reed@Sun.COM  */
1553*10639SDarren.Reed@Sun.COM static void
1554*10639SDarren.Reed@Sun.COM catchpacket(struct bpf_d *d, uchar_t *pkt, uint_t pktlen, uint_t snaplen,
1555*10639SDarren.Reed@Sun.COM     cp_fn_t cpfn, struct timeval *tv)
1556*10639SDarren.Reed@Sun.COM {
1557*10639SDarren.Reed@Sun.COM 	struct bpf_hdr *hp;
1558*10639SDarren.Reed@Sun.COM 	int totlen, curlen;
1559*10639SDarren.Reed@Sun.COM 	int hdrlen = d->bd_bif->bif_hdrlen;
1560*10639SDarren.Reed@Sun.COM 	int do_wakeup = 0;
1561*10639SDarren.Reed@Sun.COM 
1562*10639SDarren.Reed@Sun.COM 	++d->bd_ccount;
1563*10639SDarren.Reed@Sun.COM 	ks_stats.kp_capture.value.ui64++;
1564*10639SDarren.Reed@Sun.COM 	/*
1565*10639SDarren.Reed@Sun.COM 	 * Figure out how many bytes to move.  If the packet is
1566*10639SDarren.Reed@Sun.COM 	 * greater or equal to the snapshot length, transfer that
1567*10639SDarren.Reed@Sun.COM 	 * much.  Otherwise, transfer the whole packet (unless
1568*10639SDarren.Reed@Sun.COM 	 * we hit the buffer size limit).
1569*10639SDarren.Reed@Sun.COM 	 */
1570*10639SDarren.Reed@Sun.COM 	totlen = hdrlen + min(snaplen, pktlen);
1571*10639SDarren.Reed@Sun.COM 	if (totlen > d->bd_bufsize)
1572*10639SDarren.Reed@Sun.COM 		totlen = d->bd_bufsize;
1573*10639SDarren.Reed@Sun.COM 
1574*10639SDarren.Reed@Sun.COM 	/*
1575*10639SDarren.Reed@Sun.COM 	 * Round up the end of the previous packet to the next longword.
1576*10639SDarren.Reed@Sun.COM 	 */
1577*10639SDarren.Reed@Sun.COM 	curlen = BPF_WORDALIGN(d->bd_slen);
1578*10639SDarren.Reed@Sun.COM 	if (curlen + totlen > d->bd_bufsize) {
1579*10639SDarren.Reed@Sun.COM 		/*
1580*10639SDarren.Reed@Sun.COM 		 * This packet will overflow the storage buffer.
1581*10639SDarren.Reed@Sun.COM 		 * Rotate the buffers if we can, then wakeup any
1582*10639SDarren.Reed@Sun.COM 		 * pending reads.
1583*10639SDarren.Reed@Sun.COM 		 */
1584*10639SDarren.Reed@Sun.COM 		if (d->bd_fbuf == 0) {
1585*10639SDarren.Reed@Sun.COM 			/*
1586*10639SDarren.Reed@Sun.COM 			 * We haven't completed the previous read yet,
1587*10639SDarren.Reed@Sun.COM 			 * so drop the packet.
1588*10639SDarren.Reed@Sun.COM 			 */
1589*10639SDarren.Reed@Sun.COM 			++d->bd_dcount;
1590*10639SDarren.Reed@Sun.COM 			ks_stats.kp_dropped.value.ui64++;
1591*10639SDarren.Reed@Sun.COM 			return;
1592*10639SDarren.Reed@Sun.COM 		}
1593*10639SDarren.Reed@Sun.COM 		ROTATE_BUFFERS(d);
1594*10639SDarren.Reed@Sun.COM 		do_wakeup = 1;
1595*10639SDarren.Reed@Sun.COM 		curlen = 0;
1596*10639SDarren.Reed@Sun.COM 	} else if (d->bd_immediate || d->bd_state == BPF_TIMED_OUT) {
1597*10639SDarren.Reed@Sun.COM 		/*
1598*10639SDarren.Reed@Sun.COM 		 * Immediate mode is set, or the read timeout has
1599*10639SDarren.Reed@Sun.COM 		 * already expired during a select call.  A packet
1600*10639SDarren.Reed@Sun.COM 		 * arrived, so the reader should be woken up.
1601*10639SDarren.Reed@Sun.COM 		 */
1602*10639SDarren.Reed@Sun.COM 		do_wakeup = 1;
1603*10639SDarren.Reed@Sun.COM 	}
1604*10639SDarren.Reed@Sun.COM 
1605*10639SDarren.Reed@Sun.COM 	/*
1606*10639SDarren.Reed@Sun.COM 	 * Append the bpf header to the existing buffer before we add
1607*10639SDarren.Reed@Sun.COM 	 * on the actual packet data.
1608*10639SDarren.Reed@Sun.COM 	 */
1609*10639SDarren.Reed@Sun.COM 	hp = (struct bpf_hdr *)((char *)d->bd_sbuf + curlen);
1610*10639SDarren.Reed@Sun.COM 	hp->bh_tstamp.tv_sec = tv->tv_sec;
1611*10639SDarren.Reed@Sun.COM 	hp->bh_tstamp.tv_usec = tv->tv_usec;
1612*10639SDarren.Reed@Sun.COM 	hp->bh_datalen = pktlen;
1613*10639SDarren.Reed@Sun.COM 	hp->bh_hdrlen = (uint16_t)hdrlen;
1614*10639SDarren.Reed@Sun.COM 	/*
1615*10639SDarren.Reed@Sun.COM 	 * Copy the packet data into the store buffer and update its length.
1616*10639SDarren.Reed@Sun.COM 	 */
1617*10639SDarren.Reed@Sun.COM 	(*cpfn)((uchar_t *)hp + hdrlen, pkt,
1618*10639SDarren.Reed@Sun.COM 	    (hp->bh_caplen = totlen - hdrlen));
1619*10639SDarren.Reed@Sun.COM 	d->bd_slen = curlen + totlen;
1620*10639SDarren.Reed@Sun.COM 
1621*10639SDarren.Reed@Sun.COM 	/*
1622*10639SDarren.Reed@Sun.COM 	 * Call bpf_wakeup after bd_slen has been updated.
1623*10639SDarren.Reed@Sun.COM 	 */
1624*10639SDarren.Reed@Sun.COM 	if (do_wakeup)
1625*10639SDarren.Reed@Sun.COM 		bpf_wakeup(d);
1626*10639SDarren.Reed@Sun.COM }
1627*10639SDarren.Reed@Sun.COM 
1628*10639SDarren.Reed@Sun.COM /*
1629*10639SDarren.Reed@Sun.COM  * Initialize all nonzero fields of a descriptor.
1630*10639SDarren.Reed@Sun.COM  */
1631*10639SDarren.Reed@Sun.COM static int
1632*10639SDarren.Reed@Sun.COM bpf_allocbufs(struct bpf_d *d)
1633*10639SDarren.Reed@Sun.COM {
1634*10639SDarren.Reed@Sun.COM 
1635*10639SDarren.Reed@Sun.COM 	d->bd_fbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
1636*10639SDarren.Reed@Sun.COM 	if (!d->bd_fbuf)
1637*10639SDarren.Reed@Sun.COM 		return (ENOBUFS);
1638*10639SDarren.Reed@Sun.COM 	d->bd_sbuf = kmem_zalloc(d->bd_bufsize, KM_NOSLEEP);
1639*10639SDarren.Reed@Sun.COM 	if (!d->bd_sbuf) {
1640*10639SDarren.Reed@Sun.COM 		kmem_free(d->bd_fbuf, d->bd_bufsize);
1641*10639SDarren.Reed@Sun.COM 		return (ENOBUFS);
1642*10639SDarren.Reed@Sun.COM 	}
1643*10639SDarren.Reed@Sun.COM 	d->bd_slen = 0;
1644*10639SDarren.Reed@Sun.COM 	d->bd_hlen = 0;
1645*10639SDarren.Reed@Sun.COM 	return (0);
1646*10639SDarren.Reed@Sun.COM }
1647*10639SDarren.Reed@Sun.COM 
1648*10639SDarren.Reed@Sun.COM /*
1649*10639SDarren.Reed@Sun.COM  * Free buffers currently in use by a descriptor.
1650*10639SDarren.Reed@Sun.COM  * Called on close.
1651*10639SDarren.Reed@Sun.COM  */
1652*10639SDarren.Reed@Sun.COM static void
1653*10639SDarren.Reed@Sun.COM bpf_freed(struct bpf_d *d)
1654*10639SDarren.Reed@Sun.COM {
1655*10639SDarren.Reed@Sun.COM 	/*
1656*10639SDarren.Reed@Sun.COM 	 * At this point the descriptor has been detached from its
1657*10639SDarren.Reed@Sun.COM 	 * interface and it yet hasn't been marked free.
1658*10639SDarren.Reed@Sun.COM 	 */
1659*10639SDarren.Reed@Sun.COM 	if (d->bd_sbuf != 0) {
1660*10639SDarren.Reed@Sun.COM 		kmem_free(d->bd_sbuf, d->bd_bufsize);
1661*10639SDarren.Reed@Sun.COM 		if (d->bd_hbuf != 0)
1662*10639SDarren.Reed@Sun.COM 			kmem_free(d->bd_hbuf, d->bd_bufsize);
1663*10639SDarren.Reed@Sun.COM 		if (d->bd_fbuf != 0)
1664*10639SDarren.Reed@Sun.COM 			kmem_free(d->bd_fbuf, d->bd_bufsize);
1665*10639SDarren.Reed@Sun.COM 	}
1666*10639SDarren.Reed@Sun.COM 	if (d->bd_filter)
1667*10639SDarren.Reed@Sun.COM 		kmem_free(d->bd_filter, d->bd_filter_size);
1668*10639SDarren.Reed@Sun.COM }
1669*10639SDarren.Reed@Sun.COM 
1670*10639SDarren.Reed@Sun.COM /*
1671*10639SDarren.Reed@Sun.COM  * Attach additional dlt for a interface to bpf.
1672*10639SDarren.Reed@Sun.COM  * dlt is the link layer type.
1673*10639SDarren.Reed@Sun.COM  *
1674*10639SDarren.Reed@Sun.COM  * The zoneid is passed in explicitly to prevent the need to
1675*10639SDarren.Reed@Sun.COM  * do a lookup in dls using the linkid. Such a lookup would need
1676*10639SDarren.Reed@Sun.COM  * to use the same hash table that gets used for walking when
1677*10639SDarren.Reed@Sun.COM  * dls_set_bpfattach() is called.
1678*10639SDarren.Reed@Sun.COM  */
1679*10639SDarren.Reed@Sun.COM void
1680*10639SDarren.Reed@Sun.COM bpfattach(uintptr_t ifp, int dlt, zoneid_t zoneid, int provider)
1681*10639SDarren.Reed@Sun.COM {
1682*10639SDarren.Reed@Sun.COM 	bpf_provider_t *bpr;
1683*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
1684*10639SDarren.Reed@Sun.COM 	uintptr_t client;
1685*10639SDarren.Reed@Sun.COM 	int hdrlen;
1686*10639SDarren.Reed@Sun.COM 
1687*10639SDarren.Reed@Sun.COM 	bpr = bpf_find_provider_by_id(provider);
1688*10639SDarren.Reed@Sun.COM 	if (bpr == NULL) {
1689*10639SDarren.Reed@Sun.COM 		if (bpf_debug)
1690*10639SDarren.Reed@Sun.COM 			cmn_err(CE_WARN, "bpfattach: unknown provider %d",
1691*10639SDarren.Reed@Sun.COM 			    provider);
1692*10639SDarren.Reed@Sun.COM 		return;
1693*10639SDarren.Reed@Sun.COM 	}
1694*10639SDarren.Reed@Sun.COM 
1695*10639SDarren.Reed@Sun.COM 	bp = kmem_zalloc(sizeof (*bp), KM_NOSLEEP);
1696*10639SDarren.Reed@Sun.COM 	if (bp == NULL) {
1697*10639SDarren.Reed@Sun.COM 		if (bpf_debug)
1698*10639SDarren.Reed@Sun.COM 			cmn_err(CE_WARN, "bpfattach: no memory for bpf_if");
1699*10639SDarren.Reed@Sun.COM 		return;
1700*10639SDarren.Reed@Sun.COM 	}
1701*10639SDarren.Reed@Sun.COM 	bp->bif_mac = *bpr;
1702*10639SDarren.Reed@Sun.COM 
1703*10639SDarren.Reed@Sun.COM 	/*
1704*10639SDarren.Reed@Sun.COM 	 * To get the user-visible name, it is necessary to get the mac
1705*10639SDarren.Reed@Sun.COM 	 * client name of an interface and for this, we need to do the
1706*10639SDarren.Reed@Sun.COM 	 * mac_client_open. Leaving it open is undesirable because it
1707*10639SDarren.Reed@Sun.COM 	 * creates an open reference that is hard to see from outside
1708*10639SDarren.Reed@Sun.COM 	 * of bpf, potentially leading to data structures not being
1709*10639SDarren.Reed@Sun.COM 	 * cleaned up when they should.
1710*10639SDarren.Reed@Sun.COM 	 */
1711*10639SDarren.Reed@Sun.COM 	if (MBPF_CLIENT_OPEN(&bp->bif_mac, ifp, &client) != 0) {
1712*10639SDarren.Reed@Sun.COM 		if (bpf_debug)
1713*10639SDarren.Reed@Sun.COM 			cmn_err(CE_WARN,
1714*10639SDarren.Reed@Sun.COM 			    "bpfattach: mac_client_open fail for %s",
1715*10639SDarren.Reed@Sun.COM 			    MBPF_NAME(&bp->bif_mac, ifp));
1716*10639SDarren.Reed@Sun.COM 		kmem_free(bp, sizeof (*bp));
1717*10639SDarren.Reed@Sun.COM 		return;
1718*10639SDarren.Reed@Sun.COM 	}
1719*10639SDarren.Reed@Sun.COM 	(void) strlcpy(bp->bif_ifname, MBPF_CLIENT_NAME(&bp->bif_mac, client),
1720*10639SDarren.Reed@Sun.COM 	    sizeof (bp->bif_ifname));
1721*10639SDarren.Reed@Sun.COM 	MBPF_CLIENT_CLOSE(&bp->bif_mac, client);
1722*10639SDarren.Reed@Sun.COM 
1723*10639SDarren.Reed@Sun.COM 	bp->bif_ifp = ifp;
1724*10639SDarren.Reed@Sun.COM 	bp->bif_dlt = bpf_dl_to_dlt(dlt);
1725*10639SDarren.Reed@Sun.COM 	bp->bif_zoneid = zoneid;
1726*10639SDarren.Reed@Sun.COM 	LIST_INIT(&bp->bif_dlist);
1727*10639SDarren.Reed@Sun.COM 
1728*10639SDarren.Reed@Sun.COM 	/*
1729*10639SDarren.Reed@Sun.COM 	 * Compute the length of the bpf header.  This is not necessarily
1730*10639SDarren.Reed@Sun.COM 	 * equal to SIZEOF_BPF_HDR because we want to insert spacing such
1731*10639SDarren.Reed@Sun.COM 	 * that the network layer header begins on a longword boundary (for
1732*10639SDarren.Reed@Sun.COM 	 * performance reasons and to alleviate alignment restrictions).
1733*10639SDarren.Reed@Sun.COM 	 */
1734*10639SDarren.Reed@Sun.COM 	hdrlen = bpf_dl_hdrsize(dlt);
1735*10639SDarren.Reed@Sun.COM 	bp->bif_hdrlen = BPF_WORDALIGN(hdrlen + SIZEOF_BPF_HDR) - hdrlen;
1736*10639SDarren.Reed@Sun.COM 
1737*10639SDarren.Reed@Sun.COM 	if (MBPF_GET_LINKID(&bp->bif_mac, MBPF_NAME(&bp->bif_mac, ifp),
1738*10639SDarren.Reed@Sun.COM 	    &bp->bif_linkid, zoneid) != 0) {
1739*10639SDarren.Reed@Sun.COM 		if (bpf_debug) {
1740*10639SDarren.Reed@Sun.COM 			cmn_err(CE_WARN,
1741*10639SDarren.Reed@Sun.COM 			    "bpfattach: linkid resolution fail for %s/%s",
1742*10639SDarren.Reed@Sun.COM 			    MBPF_NAME(&bp->bif_mac, ifp), bp->bif_ifname);
1743*10639SDarren.Reed@Sun.COM 		}
1744*10639SDarren.Reed@Sun.COM 		kmem_free(bp, sizeof (*bp));
1745*10639SDarren.Reed@Sun.COM 		return;
1746*10639SDarren.Reed@Sun.COM 	}
1747*10639SDarren.Reed@Sun.COM 	mutex_init(&bp->bif_lock, NULL, MUTEX_DRIVER, NULL);
1748*10639SDarren.Reed@Sun.COM 
1749*10639SDarren.Reed@Sun.COM 	bpf_debug_nic_action("attached to", bp);
1750*10639SDarren.Reed@Sun.COM 
1751*10639SDarren.Reed@Sun.COM 	mutex_enter(&bpf_mtx);
1752*10639SDarren.Reed@Sun.COM 	TAILQ_INSERT_TAIL(&bpf_iflist, bp, bif_next);
1753*10639SDarren.Reed@Sun.COM 	mutex_exit(&bpf_mtx);
1754*10639SDarren.Reed@Sun.COM }
1755*10639SDarren.Reed@Sun.COM 
1756*10639SDarren.Reed@Sun.COM /*
1757*10639SDarren.Reed@Sun.COM  * Remove an interface from bpf.
1758*10639SDarren.Reed@Sun.COM  */
1759*10639SDarren.Reed@Sun.COM void
1760*10639SDarren.Reed@Sun.COM bpfdetach(uintptr_t ifp)
1761*10639SDarren.Reed@Sun.COM {
1762*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
1763*10639SDarren.Reed@Sun.COM 	struct bpf_d *d;
1764*10639SDarren.Reed@Sun.COM 	int removed = 0;
1765*10639SDarren.Reed@Sun.COM 
1766*10639SDarren.Reed@Sun.COM 	mutex_enter(&bpf_mtx);
1767*10639SDarren.Reed@Sun.COM 	/*
1768*10639SDarren.Reed@Sun.COM 	 * Loop through all of the known descriptors to find any that are
1769*10639SDarren.Reed@Sun.COM 	 * using the interface that wants to be detached.
1770*10639SDarren.Reed@Sun.COM 	 */
1771*10639SDarren.Reed@Sun.COM 	LIST_FOREACH(d, &bpf_list, bd_list) {
1772*10639SDarren.Reed@Sun.COM 		mutex_enter(&d->bd_lock);
1773*10639SDarren.Reed@Sun.COM 		bp = d->bd_bif;
1774*10639SDarren.Reed@Sun.COM 		if (bp != NULL && bp->bif_ifp == ifp) {
1775*10639SDarren.Reed@Sun.COM 			/*
1776*10639SDarren.Reed@Sun.COM 			 * Detach the descriptor from an interface now.
1777*10639SDarren.Reed@Sun.COM 			 * It will be free'ed later by close routine.
1778*10639SDarren.Reed@Sun.COM 			 */
1779*10639SDarren.Reed@Sun.COM 			bpf_detachd(d);
1780*10639SDarren.Reed@Sun.COM 		}
1781*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1782*10639SDarren.Reed@Sun.COM 	}
1783*10639SDarren.Reed@Sun.COM 
1784*10639SDarren.Reed@Sun.COM again:
1785*10639SDarren.Reed@Sun.COM 	TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
1786*10639SDarren.Reed@Sun.COM 		if (bp->bif_ifp == ifp) {
1787*10639SDarren.Reed@Sun.COM 			TAILQ_REMOVE(&bpf_iflist, bp, bif_next);
1788*10639SDarren.Reed@Sun.COM 			bpf_debug_nic_action("detached from", bp);
1789*10639SDarren.Reed@Sun.COM 			while (bp->bif_inuse != 0)
1790*10639SDarren.Reed@Sun.COM 				cv_wait(&bpf_dlt_waiter, &bpf_mtx);
1791*10639SDarren.Reed@Sun.COM 			kmem_free(bp, sizeof (*bp));
1792*10639SDarren.Reed@Sun.COM 			removed++;
1793*10639SDarren.Reed@Sun.COM 			goto again;
1794*10639SDarren.Reed@Sun.COM 		}
1795*10639SDarren.Reed@Sun.COM 	}
1796*10639SDarren.Reed@Sun.COM 	mutex_exit(&bpf_mtx);
1797*10639SDarren.Reed@Sun.COM 
1798*10639SDarren.Reed@Sun.COM 	ASSERT(removed > 0);
1799*10639SDarren.Reed@Sun.COM }
1800*10639SDarren.Reed@Sun.COM 
1801*10639SDarren.Reed@Sun.COM /*
1802*10639SDarren.Reed@Sun.COM  * Get a list of available data link type of the interface.
1803*10639SDarren.Reed@Sun.COM  */
1804*10639SDarren.Reed@Sun.COM static int
1805*10639SDarren.Reed@Sun.COM bpf_getdltlist(struct bpf_d *d, struct bpf_dltlist *listp)
1806*10639SDarren.Reed@Sun.COM {
1807*10639SDarren.Reed@Sun.COM 	char ifname[LIFNAMSIZ+1];
1808*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
1809*10639SDarren.Reed@Sun.COM 	uintptr_t ifp;
1810*10639SDarren.Reed@Sun.COM 	int n, error;
1811*10639SDarren.Reed@Sun.COM 
1812*10639SDarren.Reed@Sun.COM 	mutex_enter(&bpf_mtx);
1813*10639SDarren.Reed@Sun.COM 	ifp = d->bd_bif->bif_ifp;
1814*10639SDarren.Reed@Sun.COM 	(void) strlcpy(ifname, MBPF_NAME(&d->bd_bif->bif_mac, ifp),
1815*10639SDarren.Reed@Sun.COM 	    sizeof (ifname));
1816*10639SDarren.Reed@Sun.COM 	n = 0;
1817*10639SDarren.Reed@Sun.COM 	error = 0;
1818*10639SDarren.Reed@Sun.COM 	TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
1819*10639SDarren.Reed@Sun.COM 		if (strcmp(bp->bif_ifname, ifname) != 0)
1820*10639SDarren.Reed@Sun.COM 			continue;
1821*10639SDarren.Reed@Sun.COM 		if (d->bd_zone != GLOBAL_ZONEID &&
1822*10639SDarren.Reed@Sun.COM 		    d->bd_zone != bp->bif_zoneid)
1823*10639SDarren.Reed@Sun.COM 			continue;
1824*10639SDarren.Reed@Sun.COM 		if (listp->bfl_list != NULL) {
1825*10639SDarren.Reed@Sun.COM 			if (n >= listp->bfl_len)
1826*10639SDarren.Reed@Sun.COM 				return (ENOMEM);
1827*10639SDarren.Reed@Sun.COM 			/*
1828*10639SDarren.Reed@Sun.COM 			 * Bumping of bif_inuse ensures the structure does not
1829*10639SDarren.Reed@Sun.COM 			 * disappear while the copyout runs and allows the for
1830*10639SDarren.Reed@Sun.COM 			 * loop to be continued.
1831*10639SDarren.Reed@Sun.COM 			 */
1832*10639SDarren.Reed@Sun.COM 			bp->bif_inuse++;
1833*10639SDarren.Reed@Sun.COM 			mutex_exit(&bpf_mtx);
1834*10639SDarren.Reed@Sun.COM 			if (copyout(&bp->bif_dlt,
1835*10639SDarren.Reed@Sun.COM 			    listp->bfl_list + n, sizeof (uint_t)) != 0)
1836*10639SDarren.Reed@Sun.COM 				error = EFAULT;
1837*10639SDarren.Reed@Sun.COM 			mutex_enter(&bpf_mtx);
1838*10639SDarren.Reed@Sun.COM 			bp->bif_inuse--;
1839*10639SDarren.Reed@Sun.COM 		}
1840*10639SDarren.Reed@Sun.COM 		n++;
1841*10639SDarren.Reed@Sun.COM 	}
1842*10639SDarren.Reed@Sun.COM 	cv_signal(&bpf_dlt_waiter);
1843*10639SDarren.Reed@Sun.COM 	mutex_exit(&bpf_mtx);
1844*10639SDarren.Reed@Sun.COM 	listp->bfl_len = n;
1845*10639SDarren.Reed@Sun.COM 	return (error);
1846*10639SDarren.Reed@Sun.COM }
1847*10639SDarren.Reed@Sun.COM 
1848*10639SDarren.Reed@Sun.COM /*
1849*10639SDarren.Reed@Sun.COM  * Set the data link type of a BPF instance.
1850*10639SDarren.Reed@Sun.COM  */
1851*10639SDarren.Reed@Sun.COM static int
1852*10639SDarren.Reed@Sun.COM bpf_setdlt(struct bpf_d *d, void *addr)
1853*10639SDarren.Reed@Sun.COM {
1854*10639SDarren.Reed@Sun.COM 	char ifname[LIFNAMSIZ+1];
1855*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
1856*10639SDarren.Reed@Sun.COM 	int error;
1857*10639SDarren.Reed@Sun.COM 	int dlt;
1858*10639SDarren.Reed@Sun.COM 
1859*10639SDarren.Reed@Sun.COM 	if (copyin(addr, &dlt, sizeof (dlt)) != 0)
1860*10639SDarren.Reed@Sun.COM 		return (EFAULT);
1861*10639SDarren.Reed@Sun.COM 	/*
1862*10639SDarren.Reed@Sun.COM 	 * The established order is get bpf_mtx before bd_lock, even
1863*10639SDarren.Reed@Sun.COM 	 * though bpf_mtx is not needed until the loop...
1864*10639SDarren.Reed@Sun.COM 	 */
1865*10639SDarren.Reed@Sun.COM 	mutex_enter(&bpf_mtx);
1866*10639SDarren.Reed@Sun.COM 	mutex_enter(&d->bd_lock);
1867*10639SDarren.Reed@Sun.COM 
1868*10639SDarren.Reed@Sun.COM 	if (d->bd_bif == 0) {			/* Interface not set */
1869*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1870*10639SDarren.Reed@Sun.COM 		mutex_exit(&bpf_mtx);
1871*10639SDarren.Reed@Sun.COM 		return (EINVAL);
1872*10639SDarren.Reed@Sun.COM 	}
1873*10639SDarren.Reed@Sun.COM 	if (d->bd_bif->bif_dlt == dlt) {	/* NULL-op */
1874*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1875*10639SDarren.Reed@Sun.COM 		mutex_exit(&bpf_mtx);
1876*10639SDarren.Reed@Sun.COM 		return (0);
1877*10639SDarren.Reed@Sun.COM 	}
1878*10639SDarren.Reed@Sun.COM 
1879*10639SDarren.Reed@Sun.COM 	/*
1880*10639SDarren.Reed@Sun.COM 	 * See the matrix at the top of the file for the permissions table
1881*10639SDarren.Reed@Sun.COM 	 * enforced by this driver.
1882*10639SDarren.Reed@Sun.COM 	 */
1883*10639SDarren.Reed@Sun.COM 	if ((d->bd_zone != GLOBAL_ZONEID) && (dlt != DLT_IPNET) &&
1884*10639SDarren.Reed@Sun.COM 	    (d->bd_bif->bif_zoneid != d->bd_zone)) {
1885*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1886*10639SDarren.Reed@Sun.COM 		mutex_exit(&bpf_mtx);
1887*10639SDarren.Reed@Sun.COM 		return (EINVAL);
1888*10639SDarren.Reed@Sun.COM 	}
1889*10639SDarren.Reed@Sun.COM 
1890*10639SDarren.Reed@Sun.COM 	(void) strlcpy(ifname,
1891*10639SDarren.Reed@Sun.COM 	    MBPF_NAME(&d->bd_bif->bif_mac, d->bd_bif->bif_ifp),
1892*10639SDarren.Reed@Sun.COM 	    sizeof (ifname));
1893*10639SDarren.Reed@Sun.COM 
1894*10639SDarren.Reed@Sun.COM 	bp = bpf_findif(d, ifname, dlt);
1895*10639SDarren.Reed@Sun.COM 
1896*10639SDarren.Reed@Sun.COM 	mutex_exit(&bpf_mtx);
1897*10639SDarren.Reed@Sun.COM 	/*
1898*10639SDarren.Reed@Sun.COM 	 * Now only bd_lock is held.
1899*10639SDarren.Reed@Sun.COM 	 *
1900*10639SDarren.Reed@Sun.COM 	 * If there was no matching interface that supports the requested
1901*10639SDarren.Reed@Sun.COM 	 * DLT, return an error and leave the current binding alone.
1902*10639SDarren.Reed@Sun.COM 	 */
1903*10639SDarren.Reed@Sun.COM 	if (bp == NULL) {
1904*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1905*10639SDarren.Reed@Sun.COM 		return (EINVAL);
1906*10639SDarren.Reed@Sun.COM 	}
1907*10639SDarren.Reed@Sun.COM 
1908*10639SDarren.Reed@Sun.COM 	error = 0;
1909*10639SDarren.Reed@Sun.COM 	bpf_detachd(d);
1910*10639SDarren.Reed@Sun.COM 	bpf_attachd(d, bp);
1911*10639SDarren.Reed@Sun.COM 	reset_d(d);
1912*10639SDarren.Reed@Sun.COM 
1913*10639SDarren.Reed@Sun.COM 	mutex_exit(&d->bd_lock);
1914*10639SDarren.Reed@Sun.COM 	return (error);
1915*10639SDarren.Reed@Sun.COM }
1916*10639SDarren.Reed@Sun.COM 
1917*10639SDarren.Reed@Sun.COM /*
1918*10639SDarren.Reed@Sun.COM  * bpf_clear_timeout is called with the bd_lock mutex held, providing it
1919*10639SDarren.Reed@Sun.COM  * with the necessary protection to retrieve and modify bd_callout but it
1920*10639SDarren.Reed@Sun.COM  * does not hold the lock for its entire duration... see below...
1921*10639SDarren.Reed@Sun.COM  */
1922*10639SDarren.Reed@Sun.COM static void
1923*10639SDarren.Reed@Sun.COM bpf_clear_timeout(struct bpf_d *d)
1924*10639SDarren.Reed@Sun.COM {
1925*10639SDarren.Reed@Sun.COM 	timeout_id_t tid = d->bd_callout;
1926*10639SDarren.Reed@Sun.COM 	d->bd_callout = 0;
1927*10639SDarren.Reed@Sun.COM 	d->bd_inuse++;
1928*10639SDarren.Reed@Sun.COM 
1929*10639SDarren.Reed@Sun.COM 	/*
1930*10639SDarren.Reed@Sun.COM 	 * If the timeout has fired and is waiting on bd_lock, we could
1931*10639SDarren.Reed@Sun.COM 	 * deadlock here because untimeout if bd_lock is held and would
1932*10639SDarren.Reed@Sun.COM 	 * wait for bpf_timed_out to finish and it never would.
1933*10639SDarren.Reed@Sun.COM 	 */
1934*10639SDarren.Reed@Sun.COM 	if (tid != 0) {
1935*10639SDarren.Reed@Sun.COM 		mutex_exit(&d->bd_lock);
1936*10639SDarren.Reed@Sun.COM 		(void) untimeout(tid);
1937*10639SDarren.Reed@Sun.COM 		mutex_enter(&d->bd_lock);
1938*10639SDarren.Reed@Sun.COM 	}
1939*10639SDarren.Reed@Sun.COM 
1940*10639SDarren.Reed@Sun.COM 	d->bd_inuse--;
1941*10639SDarren.Reed@Sun.COM }
1942*10639SDarren.Reed@Sun.COM 
1943*10639SDarren.Reed@Sun.COM /*
1944*10639SDarren.Reed@Sun.COM  * As a cloning device driver, BPF needs to keep track of which device
1945*10639SDarren.Reed@Sun.COM  * numbers are in use and which ones are not. A hash table, indexed by
1946*10639SDarren.Reed@Sun.COM  * the minor device number, is used to store the pointers to the
1947*10639SDarren.Reed@Sun.COM  * individual descriptors that are allocated in bpfopen().
1948*10639SDarren.Reed@Sun.COM  * The functions below present the interface for that hash table to
1949*10639SDarren.Reed@Sun.COM  * the rest of the driver.
1950*10639SDarren.Reed@Sun.COM  */
1951*10639SDarren.Reed@Sun.COM static struct bpf_d *
1952*10639SDarren.Reed@Sun.COM bpf_dev_find(minor_t minor)
1953*10639SDarren.Reed@Sun.COM {
1954*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = NULL;
1955*10639SDarren.Reed@Sun.COM 
1956*10639SDarren.Reed@Sun.COM 	(void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor,
1957*10639SDarren.Reed@Sun.COM 	    (mod_hash_val_t *)&d);
1958*10639SDarren.Reed@Sun.COM 
1959*10639SDarren.Reed@Sun.COM 	return (d);
1960*10639SDarren.Reed@Sun.COM }
1961*10639SDarren.Reed@Sun.COM 
1962*10639SDarren.Reed@Sun.COM static void
1963*10639SDarren.Reed@Sun.COM bpf_dev_add(struct bpf_d *d)
1964*10639SDarren.Reed@Sun.COM {
1965*10639SDarren.Reed@Sun.COM 	(void) mod_hash_insert(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev,
1966*10639SDarren.Reed@Sun.COM 	    (mod_hash_val_t)d);
1967*10639SDarren.Reed@Sun.COM }
1968*10639SDarren.Reed@Sun.COM 
1969*10639SDarren.Reed@Sun.COM static void
1970*10639SDarren.Reed@Sun.COM bpf_dev_remove(struct bpf_d *d)
1971*10639SDarren.Reed@Sun.COM {
1972*10639SDarren.Reed@Sun.COM 	struct bpf_d *stor;
1973*10639SDarren.Reed@Sun.COM 
1974*10639SDarren.Reed@Sun.COM 	(void) mod_hash_remove(bpf_hash, (mod_hash_key_t)(uintptr_t)d->bd_dev,
1975*10639SDarren.Reed@Sun.COM 	    (mod_hash_val_t *)&stor);
1976*10639SDarren.Reed@Sun.COM 	ASSERT(stor == d);
1977*10639SDarren.Reed@Sun.COM }
1978*10639SDarren.Reed@Sun.COM 
1979*10639SDarren.Reed@Sun.COM /*
1980*10639SDarren.Reed@Sun.COM  * bpf_def_get should only ever be called for a minor number that exists,
1981*10639SDarren.Reed@Sun.COM  * thus there should always be a pointer in the hash table that corresponds
1982*10639SDarren.Reed@Sun.COM  * to it.
1983*10639SDarren.Reed@Sun.COM  */
1984*10639SDarren.Reed@Sun.COM static struct bpf_d *
1985*10639SDarren.Reed@Sun.COM bpf_dev_get(minor_t minor)
1986*10639SDarren.Reed@Sun.COM {
1987*10639SDarren.Reed@Sun.COM 	struct bpf_d *d = NULL;
1988*10639SDarren.Reed@Sun.COM 
1989*10639SDarren.Reed@Sun.COM 	(void) mod_hash_find(bpf_hash, (mod_hash_key_t)(uintptr_t)minor,
1990*10639SDarren.Reed@Sun.COM 	    (mod_hash_val_t *)&d);
1991*10639SDarren.Reed@Sun.COM 	ASSERT(d != NULL);
1992*10639SDarren.Reed@Sun.COM 
1993*10639SDarren.Reed@Sun.COM 	return (d);
1994*10639SDarren.Reed@Sun.COM }
1995*10639SDarren.Reed@Sun.COM 
1996*10639SDarren.Reed@Sun.COM static void
1997*10639SDarren.Reed@Sun.COM bpf_debug_nic_action(char *txt, struct bpf_if *bp)
1998*10639SDarren.Reed@Sun.COM {
1999*10639SDarren.Reed@Sun.COM 	if (bpf_debug) {
2000*10639SDarren.Reed@Sun.COM 		cmn_err(CE_CONT, "%s %s %s/%d/%d/%d\n", bp->bif_ifname, txt,
2001*10639SDarren.Reed@Sun.COM 		    MBPF_NAME(&bp->bif_mac, bp->bif_ifp), bp->bif_linkid,
2002*10639SDarren.Reed@Sun.COM 		    bp->bif_zoneid, bp->bif_dlt);
2003*10639SDarren.Reed@Sun.COM 	}
2004*10639SDarren.Reed@Sun.COM }
2005*10639SDarren.Reed@Sun.COM 
2006*10639SDarren.Reed@Sun.COM /*
2007*10639SDarren.Reed@Sun.COM  * Finding a BPF network interface is a two pass job.
2008*10639SDarren.Reed@Sun.COM  * In the first pass, the best possible match is made on zone, DLT and
2009*10639SDarren.Reed@Sun.COM  * interface name.
2010*10639SDarren.Reed@Sun.COM  * In the second pass, we allow global zone snoopers to attach to interfaces
2011*10639SDarren.Reed@Sun.COM  * that are reserved for other zones.
2012*10639SDarren.Reed@Sun.COM  * This ensures that the global zone will always see its own interfaces first
2013*10639SDarren.Reed@Sun.COM  * before attaching to those that belong to a shared IP instance zone.
2014*10639SDarren.Reed@Sun.COM  */
2015*10639SDarren.Reed@Sun.COM static struct bpf_if *
2016*10639SDarren.Reed@Sun.COM bpf_findif(struct bpf_d *d, char *ifname, int dlt)
2017*10639SDarren.Reed@Sun.COM {
2018*10639SDarren.Reed@Sun.COM 	struct bpf_if *bp;
2019*10639SDarren.Reed@Sun.COM 
2020*10639SDarren.Reed@Sun.COM 	TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
2021*10639SDarren.Reed@Sun.COM 		if ((bp->bif_ifp == 0) ||
2022*10639SDarren.Reed@Sun.COM 		    (strcmp(ifname, bp->bif_ifname) != 0))
2023*10639SDarren.Reed@Sun.COM 			continue;
2024*10639SDarren.Reed@Sun.COM 
2025*10639SDarren.Reed@Sun.COM 		if (bp->bif_zoneid != d->bd_zone)
2026*10639SDarren.Reed@Sun.COM 			continue;
2027*10639SDarren.Reed@Sun.COM 
2028*10639SDarren.Reed@Sun.COM 		if ((dlt != -1) && (dlt != bp->bif_dlt))
2029*10639SDarren.Reed@Sun.COM 			continue;
2030*10639SDarren.Reed@Sun.COM 
2031*10639SDarren.Reed@Sun.COM 		return (bp);
2032*10639SDarren.Reed@Sun.COM 	}
2033*10639SDarren.Reed@Sun.COM 
2034*10639SDarren.Reed@Sun.COM 	if (d->bd_zone == GLOBAL_ZONEID) {
2035*10639SDarren.Reed@Sun.COM 		TAILQ_FOREACH(bp, &bpf_iflist, bif_next) {
2036*10639SDarren.Reed@Sun.COM 			if ((bp->bif_ifp == 0) ||
2037*10639SDarren.Reed@Sun.COM 			    (strcmp(ifname, bp->bif_ifname) != 0))
2038*10639SDarren.Reed@Sun.COM 				continue;
2039*10639SDarren.Reed@Sun.COM 
2040*10639SDarren.Reed@Sun.COM 			if ((dlt != -1) && (dlt != bp->bif_dlt))
2041*10639SDarren.Reed@Sun.COM 				continue;
2042*10639SDarren.Reed@Sun.COM 			return (bp);
2043*10639SDarren.Reed@Sun.COM 		}
2044*10639SDarren.Reed@Sun.COM 	}
2045*10639SDarren.Reed@Sun.COM 
2046*10639SDarren.Reed@Sun.COM 	return (NULL);
2047*10639SDarren.Reed@Sun.COM }
2048