xref: /netbsd-src/sys/net/if_bridge.c (revision 946379e7b37692fc43f68eb0d1c10daa0a7f3b6c)
1 /*	$NetBSD: if_bridge.c,v 1.105 2015/11/19 16:23:54 christos Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Jason R. Thorpe for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed for the NetBSD Project by
20  *	Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 /*
39  * Copyright (c) 1999, 2000 Jason L. Wright (jason@thought.net)
40  * All rights reserved.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. All advertising materials mentioning features or use of this software
51  *    must display the following acknowledgement:
52  *	This product includes software developed by Jason L. Wright
53  * 4. The name of the author may not be used to endorse or promote products
54  *    derived from this software without specific prior written permission.
55  *
56  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
57  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
58  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
59  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
60  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
61  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
62  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
64  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
65  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66  * POSSIBILITY OF SUCH DAMAGE.
67  *
68  * OpenBSD: if_bridge.c,v 1.60 2001/06/15 03:38:33 itojun Exp
69  */
70 
71 /*
72  * Network interface bridge support.
73  *
74  * TODO:
75  *
76  *	- Currently only supports Ethernet-like interfaces (Ethernet,
77  *	  802.11, VLANs on Ethernet, etc.)  Figure out a nice way
78  *	  to bridge other types of interfaces (FDDI-FDDI, and maybe
79  *	  consider heterogenous bridges).
80  */
81 
82 #include <sys/cdefs.h>
83 __KERNEL_RCSID(0, "$NetBSD: if_bridge.c,v 1.105 2015/11/19 16:23:54 christos Exp $");
84 
85 #ifdef _KERNEL_OPT
86 #include "opt_bridge_ipf.h"
87 #include "opt_inet.h"
88 #endif /* _KERNEL_OPT */
89 
90 #include <sys/param.h>
91 #include <sys/kernel.h>
92 #include <sys/mbuf.h>
93 #include <sys/queue.h>
94 #include <sys/socket.h>
95 #include <sys/socketvar.h> /* for softnet_lock */
96 #include <sys/sockio.h>
97 #include <sys/systm.h>
98 #include <sys/proc.h>
99 #include <sys/pool.h>
100 #include <sys/kauth.h>
101 #include <sys/cpu.h>
102 #include <sys/cprng.h>
103 #include <sys/mutex.h>
104 #include <sys/kmem.h>
105 
106 #include <net/bpf.h>
107 #include <net/if.h>
108 #include <net/if_dl.h>
109 #include <net/if_types.h>
110 #include <net/if_llc.h>
111 #include <net/pktqueue.h>
112 
113 #include <net/if_ether.h>
114 #include <net/if_bridgevar.h>
115 
116 #if defined(BRIDGE_IPF)
117 /* Used for bridge_ip[6]_checkbasic */
118 #include <netinet/in.h>
119 #include <netinet/in_systm.h>
120 #include <netinet/ip.h>
121 #include <netinet/ip_var.h>
122 #include <netinet/ip_private.h>		/* XXX */
123 
124 #include <netinet/ip6.h>
125 #include <netinet6/in6_var.h>
126 #include <netinet6/ip6_var.h>
127 #include <netinet6/ip6_private.h>	/* XXX */
128 #endif /* BRIDGE_IPF */
129 
130 /*
131  * Size of the route hash table.  Must be a power of two.
132  */
133 #ifndef BRIDGE_RTHASH_SIZE
134 #define	BRIDGE_RTHASH_SIZE		1024
135 #endif
136 
137 #define	BRIDGE_RTHASH_MASK		(BRIDGE_RTHASH_SIZE - 1)
138 
139 #include "carp.h"
140 #if NCARP > 0
141 #include <netinet/in.h>
142 #include <netinet/in_var.h>
143 #include <netinet/ip_carp.h>
144 #endif
145 
146 #include "ioconf.h"
147 
148 __CTASSERT(sizeof(struct ifbifconf) == sizeof(struct ifbaconf));
149 __CTASSERT(offsetof(struct ifbifconf, ifbic_len) == offsetof(struct ifbaconf, ifbac_len));
150 __CTASSERT(offsetof(struct ifbifconf, ifbic_buf) == offsetof(struct ifbaconf, ifbac_buf));
151 
152 /*
153  * Maximum number of addresses to cache.
154  */
155 #ifndef BRIDGE_RTABLE_MAX
156 #define	BRIDGE_RTABLE_MAX		100
157 #endif
158 
159 /*
160  * Spanning tree defaults.
161  */
162 #define	BSTP_DEFAULT_MAX_AGE		(20 * 256)
163 #define	BSTP_DEFAULT_HELLO_TIME		(2 * 256)
164 #define	BSTP_DEFAULT_FORWARD_DELAY	(15 * 256)
165 #define	BSTP_DEFAULT_HOLD_TIME		(1 * 256)
166 #define	BSTP_DEFAULT_BRIDGE_PRIORITY	0x8000
167 #define	BSTP_DEFAULT_PORT_PRIORITY	0x80
168 #define	BSTP_DEFAULT_PATH_COST		55
169 
170 /*
171  * Timeout (in seconds) for entries learned dynamically.
172  */
173 #ifndef BRIDGE_RTABLE_TIMEOUT
174 #define	BRIDGE_RTABLE_TIMEOUT		(20 * 60)	/* same as ARP */
175 #endif
176 
177 /*
178  * Number of seconds between walks of the route list.
179  */
180 #ifndef BRIDGE_RTABLE_PRUNE_PERIOD
181 #define	BRIDGE_RTABLE_PRUNE_PERIOD	(5 * 60)
182 #endif
183 
184 #define BRIDGE_RT_INTR_LOCK(_sc)	mutex_enter((_sc)->sc_rtlist_intr_lock)
185 #define BRIDGE_RT_INTR_UNLOCK(_sc)	mutex_exit((_sc)->sc_rtlist_intr_lock)
186 #define BRIDGE_RT_INTR_LOCKED(_sc)	mutex_owned((_sc)->sc_rtlist_intr_lock)
187 
188 #define BRIDGE_RT_LOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
189 					mutex_enter((_sc)->sc_rtlist_lock)
190 #define BRIDGE_RT_UNLOCK(_sc)	if ((_sc)->sc_rtlist_lock) \
191 					mutex_exit((_sc)->sc_rtlist_lock)
192 #define BRIDGE_RT_LOCKED(_sc)	(!(_sc)->sc_rtlist_lock || \
193 				 mutex_owned((_sc)->sc_rtlist_lock))
194 
195 #define BRIDGE_RT_PSZ_PERFORM(_sc) \
196 				if ((_sc)->sc_rtlist_psz != NULL) \
197 					pserialize_perform((_sc)->sc_rtlist_psz);
198 
199 #ifdef BRIDGE_MPSAFE
200 #define BRIDGE_RT_RENTER(__s)	do { \
201 					if (!cpu_intr_p()) \
202 						__s = pserialize_read_enter(); \
203 					else \
204 						__s = splhigh(); \
205 				} while (0)
206 #define BRIDGE_RT_REXIT(__s)	do { \
207 					if (!cpu_intr_p()) \
208 						pserialize_read_exit(__s); \
209 					else \
210 						splx(__s); \
211 				} while (0)
212 #else /* BRIDGE_MPSAFE */
213 #define BRIDGE_RT_RENTER(__s)	do { __s = 0; } while (0)
214 #define BRIDGE_RT_REXIT(__s)	do { (void)__s; } while (0)
215 #endif /* BRIDGE_MPSAFE */
216 
217 int	bridge_rtable_prune_period = BRIDGE_RTABLE_PRUNE_PERIOD;
218 
219 static struct pool bridge_rtnode_pool;
220 static struct work bridge_rtage_wk;
221 
222 static int	bridge_clone_create(struct if_clone *, int);
223 static int	bridge_clone_destroy(struct ifnet *);
224 
225 static int	bridge_ioctl(struct ifnet *, u_long, void *);
226 static int	bridge_init(struct ifnet *);
227 static void	bridge_stop(struct ifnet *, int);
228 static void	bridge_start(struct ifnet *);
229 
230 static void	bridge_input(struct ifnet *, struct mbuf *);
231 static void	bridge_forward(void *);
232 
233 static void	bridge_timer(void *);
234 
235 static void	bridge_broadcast(struct bridge_softc *, struct ifnet *,
236 				 struct mbuf *);
237 
238 static int	bridge_rtupdate(struct bridge_softc *, const uint8_t *,
239 				struct ifnet *, int, uint8_t);
240 static struct ifnet *bridge_rtlookup(struct bridge_softc *, const uint8_t *);
241 static void	bridge_rttrim(struct bridge_softc *);
242 static void	bridge_rtage(struct bridge_softc *);
243 static void	bridge_rtage_work(struct work *, void *);
244 static void	bridge_rtflush(struct bridge_softc *, int);
245 static int	bridge_rtdaddr(struct bridge_softc *, const uint8_t *);
246 static void	bridge_rtdelete(struct bridge_softc *, struct ifnet *ifp);
247 
248 static void	bridge_rtable_init(struct bridge_softc *);
249 static void	bridge_rtable_fini(struct bridge_softc *);
250 
251 static struct bridge_rtnode *bridge_rtnode_lookup(struct bridge_softc *,
252 						  const uint8_t *);
253 static int	bridge_rtnode_insert(struct bridge_softc *,
254 				     struct bridge_rtnode *);
255 static void	bridge_rtnode_remove(struct bridge_softc *,
256 				     struct bridge_rtnode *);
257 static void	bridge_rtnode_destroy(struct bridge_rtnode *);
258 
259 static struct bridge_iflist *bridge_lookup_member(struct bridge_softc *,
260 						  const char *name);
261 static struct bridge_iflist *bridge_lookup_member_if(struct bridge_softc *,
262 						     struct ifnet *ifp);
263 static void	bridge_release_member(struct bridge_softc *, struct bridge_iflist *);
264 static void	bridge_delete_member(struct bridge_softc *,
265 				     struct bridge_iflist *);
266 static struct bridge_iflist *bridge_try_hold_bif(struct bridge_iflist *);
267 
268 static int	bridge_ioctl_add(struct bridge_softc *, void *);
269 static int	bridge_ioctl_del(struct bridge_softc *, void *);
270 static int	bridge_ioctl_gifflags(struct bridge_softc *, void *);
271 static int	bridge_ioctl_sifflags(struct bridge_softc *, void *);
272 static int	bridge_ioctl_scache(struct bridge_softc *, void *);
273 static int	bridge_ioctl_gcache(struct bridge_softc *, void *);
274 static int	bridge_ioctl_gifs(struct bridge_softc *, void *);
275 static int	bridge_ioctl_rts(struct bridge_softc *, void *);
276 static int	bridge_ioctl_saddr(struct bridge_softc *, void *);
277 static int	bridge_ioctl_sto(struct bridge_softc *, void *);
278 static int	bridge_ioctl_gto(struct bridge_softc *, void *);
279 static int	bridge_ioctl_daddr(struct bridge_softc *, void *);
280 static int	bridge_ioctl_flush(struct bridge_softc *, void *);
281 static int	bridge_ioctl_gpri(struct bridge_softc *, void *);
282 static int	bridge_ioctl_spri(struct bridge_softc *, void *);
283 static int	bridge_ioctl_ght(struct bridge_softc *, void *);
284 static int	bridge_ioctl_sht(struct bridge_softc *, void *);
285 static int	bridge_ioctl_gfd(struct bridge_softc *, void *);
286 static int	bridge_ioctl_sfd(struct bridge_softc *, void *);
287 static int	bridge_ioctl_gma(struct bridge_softc *, void *);
288 static int	bridge_ioctl_sma(struct bridge_softc *, void *);
289 static int	bridge_ioctl_sifprio(struct bridge_softc *, void *);
290 static int	bridge_ioctl_sifcost(struct bridge_softc *, void *);
291 #if defined(BRIDGE_IPF)
292 static int	bridge_ioctl_gfilt(struct bridge_softc *, void *);
293 static int	bridge_ioctl_sfilt(struct bridge_softc *, void *);
294 static int	bridge_ipf(void *, struct mbuf **, struct ifnet *, int);
295 static int	bridge_ip_checkbasic(struct mbuf **mp);
296 # ifdef INET6
297 static int	bridge_ip6_checkbasic(struct mbuf **mp);
298 # endif /* INET6 */
299 #endif /* BRIDGE_IPF */
300 
301 static void bridge_sysctl_fwdq_setup(struct sysctllog **clog,
302     struct bridge_softc *sc);
303 
304 struct bridge_control {
305 	int	(*bc_func)(struct bridge_softc *, void *);
306 	int	bc_argsize;
307 	int	bc_flags;
308 };
309 
310 #define	BC_F_COPYIN		0x01	/* copy arguments in */
311 #define	BC_F_COPYOUT		0x02	/* copy arguments out */
312 #define	BC_F_SUSER		0x04	/* do super-user check */
313 #define BC_F_XLATEIN		0x08	/* xlate arguments in */
314 #define BC_F_XLATEOUT		0x10	/* xlate arguments out */
315 
316 static const struct bridge_control bridge_control_table[] = {
317 [BRDGADD] = {bridge_ioctl_add, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
318 [BRDGDEL] = {bridge_ioctl_del, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
319 
320 [BRDGGIFFLGS] = {bridge_ioctl_gifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_COPYOUT},
321 [BRDGSIFFLGS] = {bridge_ioctl_sifflags, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
322 
323 [BRDGSCACHE] = {bridge_ioctl_scache, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
324 [BRDGGCACHE] = {bridge_ioctl_gcache, sizeof(struct ifbrparam), BC_F_COPYOUT},
325 
326 [OBRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_COPYIN|BC_F_COPYOUT},
327 [OBRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_COPYIN|BC_F_COPYOUT},
328 
329 [BRDGSADDR] = {bridge_ioctl_saddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
330 
331 [BRDGSTO] = {bridge_ioctl_sto, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
332 [BRDGGTO] = {bridge_ioctl_gto, sizeof(struct ifbrparam), BC_F_COPYOUT},
333 
334 [BRDGDADDR] = {bridge_ioctl_daddr, sizeof(struct ifbareq), BC_F_COPYIN|BC_F_SUSER},
335 
336 [BRDGFLUSH] = {bridge_ioctl_flush, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
337 
338 [BRDGGPRI] = {bridge_ioctl_gpri, sizeof(struct ifbrparam), BC_F_COPYOUT},
339 [BRDGSPRI] = {bridge_ioctl_spri, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
340 
341 [BRDGGHT] = {bridge_ioctl_ght, sizeof(struct ifbrparam), BC_F_COPYOUT},
342 [BRDGSHT] = {bridge_ioctl_sht, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
343 
344 [BRDGGFD] = {bridge_ioctl_gfd, sizeof(struct ifbrparam), BC_F_COPYOUT},
345 [BRDGSFD] = {bridge_ioctl_sfd, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
346 
347 [BRDGGMA] = {bridge_ioctl_gma, sizeof(struct ifbrparam), BC_F_COPYOUT},
348 [BRDGSMA] = {bridge_ioctl_sma, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
349 
350 [BRDGSIFPRIO] = {bridge_ioctl_sifprio, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
351 
352 [BRDGSIFCOST] = {bridge_ioctl_sifcost, sizeof(struct ifbreq), BC_F_COPYIN|BC_F_SUSER},
353 #if defined(BRIDGE_IPF)
354 [BRDGGFILT] = {bridge_ioctl_gfilt, sizeof(struct ifbrparam), BC_F_COPYOUT},
355 [BRDGSFILT] = {bridge_ioctl_sfilt, sizeof(struct ifbrparam), BC_F_COPYIN|BC_F_SUSER},
356 #endif /* BRIDGE_IPF */
357 [BRDGGIFS] = {bridge_ioctl_gifs, sizeof(struct ifbifconf), BC_F_XLATEIN|BC_F_XLATEOUT},
358 [BRDGRTS] = {bridge_ioctl_rts, sizeof(struct ifbaconf), BC_F_XLATEIN|BC_F_XLATEOUT},
359 };
360 
361 static const int bridge_control_table_size = __arraycount(bridge_control_table);
362 
363 static LIST_HEAD(, bridge_softc) bridge_list;
364 static kmutex_t bridge_list_lock;
365 
366 static struct if_clone bridge_cloner =
367     IF_CLONE_INITIALIZER("bridge", bridge_clone_create, bridge_clone_destroy);
368 
369 /*
370  * bridgeattach:
371  *
372  *	Pseudo-device attach routine.
373  */
374 void
375 bridgeattach(int n)
376 {
377 
378 	pool_init(&bridge_rtnode_pool, sizeof(struct bridge_rtnode),
379 	    0, 0, 0, "brtpl", NULL, IPL_NET);
380 
381 	LIST_INIT(&bridge_list);
382 	mutex_init(&bridge_list_lock, MUTEX_DEFAULT, IPL_NET);
383 	if_clone_attach(&bridge_cloner);
384 }
385 
386 /*
387  * bridge_clone_create:
388  *
389  *	Create a new bridge instance.
390  */
391 static int
392 bridge_clone_create(struct if_clone *ifc, int unit)
393 {
394 	struct bridge_softc *sc;
395 	struct ifnet *ifp;
396 	int error, flags;
397 
398 	sc = kmem_zalloc(sizeof(*sc),  KM_SLEEP);
399 	ifp = &sc->sc_if;
400 
401 	sc->sc_brtmax = BRIDGE_RTABLE_MAX;
402 	sc->sc_brttimeout = BRIDGE_RTABLE_TIMEOUT;
403 	sc->sc_bridge_max_age = BSTP_DEFAULT_MAX_AGE;
404 	sc->sc_bridge_hello_time = BSTP_DEFAULT_HELLO_TIME;
405 	sc->sc_bridge_forward_delay = BSTP_DEFAULT_FORWARD_DELAY;
406 	sc->sc_bridge_priority = BSTP_DEFAULT_BRIDGE_PRIORITY;
407 	sc->sc_hold_time = BSTP_DEFAULT_HOLD_TIME;
408 	sc->sc_filter_flags = 0;
409 
410 	/* Initialize our routing table. */
411 	bridge_rtable_init(sc);
412 
413 #ifdef BRIDGE_MPSAFE
414 	flags = WQ_MPSAFE;
415 #else
416 	flags = 0;
417 #endif
418 	error = workqueue_create(&sc->sc_rtage_wq, "bridge_rtage",
419 	    bridge_rtage_work, sc, PRI_SOFTNET, IPL_SOFTNET, flags);
420 	if (error)
421 		panic("%s: workqueue_create %d\n", __func__, error);
422 
423 	callout_init(&sc->sc_brcallout, 0);
424 	callout_init(&sc->sc_bstpcallout, 0);
425 
426 	LIST_INIT(&sc->sc_iflist);
427 #ifdef BRIDGE_MPSAFE
428 	sc->sc_iflist_intr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
429 	sc->sc_iflist_psz = pserialize_create();
430 	sc->sc_iflist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
431 #else
432 	sc->sc_iflist_intr_lock = NULL;
433 	sc->sc_iflist_psz = NULL;
434 	sc->sc_iflist_lock = NULL;
435 #endif
436 	cv_init(&sc->sc_iflist_cv, "if_bridge_cv");
437 
438 	if_initname(ifp, ifc->ifc_name, unit);
439 	ifp->if_softc = sc;
440 	ifp->if_mtu = ETHERMTU;
441 	ifp->if_ioctl = bridge_ioctl;
442 	ifp->if_output = bridge_output;
443 	ifp->if_start = bridge_start;
444 	ifp->if_stop = bridge_stop;
445 	ifp->if_init = bridge_init;
446 	ifp->if_type = IFT_BRIDGE;
447 	ifp->if_addrlen = 0;
448 	ifp->if_dlt = DLT_EN10MB;
449 	ifp->if_hdrlen = ETHER_HDR_LEN;
450 
451 	sc->sc_fwd_pktq = pktq_create(IFQ_MAXLEN, bridge_forward, sc);
452 	KASSERT(sc->sc_fwd_pktq != NULL);
453 
454 	bridge_sysctl_fwdq_setup(&ifp->if_sysctl_log, sc);
455 
456 	if_attach(ifp);
457 
458 	if_alloc_sadl(ifp);
459 
460 	mutex_enter(&bridge_list_lock);
461 	LIST_INSERT_HEAD(&bridge_list, sc, sc_list);
462 	mutex_exit(&bridge_list_lock);
463 
464 	return (0);
465 }
466 
467 /*
468  * bridge_clone_destroy:
469  *
470  *	Destroy a bridge instance.
471  */
472 static int
473 bridge_clone_destroy(struct ifnet *ifp)
474 {
475 	struct bridge_softc *sc = ifp->if_softc;
476 	struct bridge_iflist *bif;
477 	int s;
478 
479 	/* Must be called during IFF_RUNNING, i.e., before bridge_stop */
480 	pktq_barrier(sc->sc_fwd_pktq);
481 
482 	s = splnet();
483 
484 	bridge_stop(ifp, 1);
485 
486 	BRIDGE_LOCK(sc);
487 	while ((bif = LIST_FIRST(&sc->sc_iflist)) != NULL)
488 		bridge_delete_member(sc, bif);
489 	BRIDGE_UNLOCK(sc);
490 
491 	mutex_enter(&bridge_list_lock);
492 	LIST_REMOVE(sc, sc_list);
493 	mutex_exit(&bridge_list_lock);
494 
495 	splx(s);
496 
497 	if_detach(ifp);
498 
499 	/* Should be called after if_detach for safe */
500 	pktq_flush(sc->sc_fwd_pktq);
501 	pktq_destroy(sc->sc_fwd_pktq);
502 
503 	/* Tear down the routing table. */
504 	bridge_rtable_fini(sc);
505 
506 	cv_destroy(&sc->sc_iflist_cv);
507 	if (sc->sc_iflist_intr_lock)
508 		mutex_obj_free(sc->sc_iflist_intr_lock);
509 
510 	if (sc->sc_iflist_psz)
511 		pserialize_destroy(sc->sc_iflist_psz);
512 	if (sc->sc_iflist_lock)
513 		mutex_obj_free(sc->sc_iflist_lock);
514 
515 	workqueue_destroy(sc->sc_rtage_wq);
516 
517 	kmem_free(sc, sizeof(*sc));
518 
519 	return (0);
520 }
521 
522 static int
523 bridge_sysctl_fwdq_maxlen(SYSCTLFN_ARGS)
524 {
525 	struct sysctlnode node = *rnode;
526 	const struct bridge_softc *sc =	node.sysctl_data;
527 	return sysctl_pktq_maxlen(SYSCTLFN_CALL(rnode), sc->sc_fwd_pktq);
528 }
529 
530 #define	SYSCTL_BRIDGE_PKTQ(cn, c)					\
531 	static int							\
532 	bridge_sysctl_fwdq_##cn(SYSCTLFN_ARGS)				\
533 	{								\
534 		struct sysctlnode node = *rnode;			\
535 		const struct bridge_softc *sc =	node.sysctl_data;	\
536 		return sysctl_pktq_count(SYSCTLFN_CALL(rnode),		\
537 					 sc->sc_fwd_pktq, c);		\
538 	}
539 
540 SYSCTL_BRIDGE_PKTQ(items, PKTQ_NITEMS)
541 SYSCTL_BRIDGE_PKTQ(drops, PKTQ_DROPS)
542 
543 static void
544 bridge_sysctl_fwdq_setup(struct sysctllog **clog, struct bridge_softc *sc)
545 {
546 	const struct sysctlnode *cnode, *rnode;
547 	sysctlfn len_func = NULL, maxlen_func = NULL, drops_func = NULL;
548 	const char *ifname = sc->sc_if.if_xname;
549 
550 	len_func = bridge_sysctl_fwdq_items;
551 	maxlen_func = bridge_sysctl_fwdq_maxlen;
552 	drops_func = bridge_sysctl_fwdq_drops;
553 
554 	if (sysctl_createv(clog, 0, NULL, &rnode,
555 			   CTLFLAG_PERMANENT,
556 			   CTLTYPE_NODE, "interfaces",
557 			   SYSCTL_DESCR("Per-interface controls"),
558 			   NULL, 0, NULL, 0,
559 			   CTL_NET, CTL_CREATE, CTL_EOL) != 0)
560 		goto bad;
561 
562 	if (sysctl_createv(clog, 0, &rnode, &rnode,
563 			   CTLFLAG_PERMANENT,
564 			   CTLTYPE_NODE, ifname,
565 			   SYSCTL_DESCR("Interface controls"),
566 			   NULL, 0, NULL, 0,
567 			   CTL_CREATE, CTL_EOL) != 0)
568 		goto bad;
569 
570 	if (sysctl_createv(clog, 0, &rnode, &rnode,
571 			   CTLFLAG_PERMANENT,
572 			   CTLTYPE_NODE, "fwdq",
573 			   SYSCTL_DESCR("Protocol input queue controls"),
574 			   NULL, 0, NULL, 0,
575 			   CTL_CREATE, CTL_EOL) != 0)
576 		goto bad;
577 
578 	if (sysctl_createv(clog, 0, &rnode, &cnode,
579 			   CTLFLAG_PERMANENT,
580 			   CTLTYPE_INT, "len",
581 			   SYSCTL_DESCR("Current forwarding queue length"),
582 			   len_func, 0, (void *)sc, 0,
583 			   CTL_CREATE, IFQCTL_LEN, CTL_EOL) != 0)
584 		goto bad;
585 
586 	if (sysctl_createv(clog, 0, &rnode, &cnode,
587 			   CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
588 			   CTLTYPE_INT, "maxlen",
589 			   SYSCTL_DESCR("Maximum allowed forwarding queue length"),
590 			   maxlen_func, 0, (void *)sc, 0,
591 			   CTL_CREATE, IFQCTL_MAXLEN, CTL_EOL) != 0)
592 		goto bad;
593 
594 	if (sysctl_createv(clog, 0, &rnode, &cnode,
595 			   CTLFLAG_PERMANENT,
596 			   CTLTYPE_INT, "drops",
597 			   SYSCTL_DESCR("Packets dropped due to full forwarding queue"),
598 			   drops_func, 0, (void *)sc, 0,
599 			   CTL_CREATE, IFQCTL_DROPS, CTL_EOL) != 0)
600 		goto bad;
601 
602 	return;
603 bad:
604 	aprint_error("%s: could not attach sysctl nodes\n", ifname);
605 	return;
606 }
607 
608 /*
609  * bridge_ioctl:
610  *
611  *	Handle a control request from the operator.
612  */
613 static int
614 bridge_ioctl(struct ifnet *ifp, u_long cmd, void *data)
615 {
616 	struct bridge_softc *sc = ifp->if_softc;
617 	struct lwp *l = curlwp;	/* XXX */
618 	union {
619 		struct ifbreq ifbreq;
620 		struct ifbifconf ifbifconf;
621 		struct ifbareq ifbareq;
622 		struct ifbaconf ifbaconf;
623 		struct ifbrparam ifbrparam;
624 	} args;
625 	struct ifdrv *ifd = (struct ifdrv *) data;
626 	const struct bridge_control *bc = NULL; /* XXXGCC */
627 	int s, error = 0;
628 
629 	/* Authorize command before calling splnet(). */
630 	switch (cmd) {
631 	case SIOCGDRVSPEC:
632 	case SIOCSDRVSPEC:
633 		if (ifd->ifd_cmd >= bridge_control_table_size
634 		    || (bc = &bridge_control_table[ifd->ifd_cmd]) == NULL) {
635 			error = EINVAL;
636 			return error;
637 		}
638 
639 		/* We only care about BC_F_SUSER at this point. */
640 		if ((bc->bc_flags & BC_F_SUSER) == 0)
641 			break;
642 
643 		error = kauth_authorize_network(l->l_cred,
644 		    KAUTH_NETWORK_INTERFACE_BRIDGE,
645 		    cmd == SIOCGDRVSPEC ?
646 		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_GETPRIV :
647 		     KAUTH_REQ_NETWORK_INTERFACE_BRIDGE_SETPRIV,
648 		     ifd, NULL, NULL);
649 		if (error)
650 			return (error);
651 
652 		break;
653 	}
654 
655 	s = splnet();
656 
657 	switch (cmd) {
658 	case SIOCGDRVSPEC:
659 	case SIOCSDRVSPEC:
660 		KASSERT(bc != NULL);
661 		if (cmd == SIOCGDRVSPEC &&
662 		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) == 0) {
663 			error = EINVAL;
664 			break;
665 		}
666 		else if (cmd == SIOCSDRVSPEC &&
667 		    (bc->bc_flags & (BC_F_COPYOUT|BC_F_XLATEOUT)) != 0) {
668 			error = EINVAL;
669 			break;
670 		}
671 
672 		/* BC_F_SUSER is checked above, before splnet(). */
673 
674 		if ((bc->bc_flags & (BC_F_XLATEIN|BC_F_XLATEOUT)) == 0
675 		    && (ifd->ifd_len != bc->bc_argsize
676 			|| ifd->ifd_len > sizeof(args))) {
677 			error = EINVAL;
678 			break;
679 		}
680 
681 		memset(&args, 0, sizeof(args));
682 		if (bc->bc_flags & BC_F_COPYIN) {
683 			error = copyin(ifd->ifd_data, &args, ifd->ifd_len);
684 			if (error)
685 				break;
686 		} else if (bc->bc_flags & BC_F_XLATEIN) {
687 			args.ifbifconf.ifbic_len = ifd->ifd_len;
688 			args.ifbifconf.ifbic_buf = ifd->ifd_data;
689 		}
690 
691 		error = (*bc->bc_func)(sc, &args);
692 		if (error)
693 			break;
694 
695 		if (bc->bc_flags & BC_F_COPYOUT) {
696 			error = copyout(&args, ifd->ifd_data, ifd->ifd_len);
697 		} else if (bc->bc_flags & BC_F_XLATEOUT) {
698 			ifd->ifd_len = args.ifbifconf.ifbic_len;
699 			ifd->ifd_data = args.ifbifconf.ifbic_buf;
700 		}
701 		break;
702 
703 	case SIOCSIFFLAGS:
704 		if ((error = ifioctl_common(ifp, cmd, data)) != 0)
705 			break;
706 		switch (ifp->if_flags & (IFF_UP|IFF_RUNNING)) {
707 		case IFF_RUNNING:
708 			/*
709 			 * If interface is marked down and it is running,
710 			 * then stop and disable it.
711 			 */
712 			(*ifp->if_stop)(ifp, 1);
713 			break;
714 		case IFF_UP:
715 			/*
716 			 * If interface is marked up and it is stopped, then
717 			 * start it.
718 			 */
719 			error = (*ifp->if_init)(ifp);
720 			break;
721 		default:
722 			break;
723 		}
724 		break;
725 
726 	case SIOCSIFMTU:
727 		if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
728 			error = 0;
729 		break;
730 
731 	default:
732 		error = ifioctl_common(ifp, cmd, data);
733 		break;
734 	}
735 
736 	splx(s);
737 
738 	return (error);
739 }
740 
741 /*
742  * bridge_lookup_member:
743  *
744  *	Lookup a bridge member interface.
745  */
746 static struct bridge_iflist *
747 bridge_lookup_member(struct bridge_softc *sc, const char *name)
748 {
749 	struct bridge_iflist *bif;
750 	struct ifnet *ifp;
751 	int s;
752 
753 	BRIDGE_PSZ_RENTER(s);
754 
755 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
756 		ifp = bif->bif_ifp;
757 		if (strcmp(ifp->if_xname, name) == 0)
758 			break;
759 	}
760 	bif = bridge_try_hold_bif(bif);
761 
762 	BRIDGE_PSZ_REXIT(s);
763 
764 	return bif;
765 }
766 
767 /*
768  * bridge_lookup_member_if:
769  *
770  *	Lookup a bridge member interface by ifnet*.
771  */
772 static struct bridge_iflist *
773 bridge_lookup_member_if(struct bridge_softc *sc, struct ifnet *member_ifp)
774 {
775 	struct bridge_iflist *bif;
776 	int s;
777 
778 	BRIDGE_PSZ_RENTER(s);
779 
780 	bif = member_ifp->if_bridgeif;
781 	bif = bridge_try_hold_bif(bif);
782 
783 	BRIDGE_PSZ_REXIT(s);
784 
785 	return bif;
786 }
787 
788 static struct bridge_iflist *
789 bridge_try_hold_bif(struct bridge_iflist *bif)
790 {
791 #ifdef BRIDGE_MPSAFE
792 	if (bif != NULL) {
793 		if (bif->bif_waiting)
794 			bif = NULL;
795 		else
796 			atomic_inc_32(&bif->bif_refs);
797 	}
798 #endif
799 	return bif;
800 }
801 
802 /*
803  * bridge_release_member:
804  *
805  *	Release the specified member interface.
806  */
807 static void
808 bridge_release_member(struct bridge_softc *sc, struct bridge_iflist *bif)
809 {
810 #ifdef BRIDGE_MPSAFE
811 	uint32_t refs;
812 
813 	refs = atomic_dec_uint_nv(&bif->bif_refs);
814 	if (__predict_false(refs == 0 && bif->bif_waiting)) {
815 		BRIDGE_INTR_LOCK(sc);
816 		cv_broadcast(&sc->sc_iflist_cv);
817 		BRIDGE_INTR_UNLOCK(sc);
818 	}
819 #else
820 	(void)sc;
821 	(void)bif;
822 #endif
823 }
824 
825 /*
826  * bridge_delete_member:
827  *
828  *	Delete the specified member interface.
829  */
830 static void
831 bridge_delete_member(struct bridge_softc *sc, struct bridge_iflist *bif)
832 {
833 	struct ifnet *ifs = bif->bif_ifp;
834 
835 	KASSERT(BRIDGE_LOCKED(sc));
836 
837 	ifs->if_input = ether_input;
838 	ifs->if_bridge = NULL;
839 	ifs->if_bridgeif = NULL;
840 
841 	LIST_REMOVE(bif, bif_next);
842 
843 	BRIDGE_PSZ_PERFORM(sc);
844 
845 	BRIDGE_UNLOCK(sc);
846 
847 #ifdef BRIDGE_MPSAFE
848 	BRIDGE_INTR_LOCK(sc);
849 	bif->bif_waiting = true;
850 	membar_sync();
851 	while (bif->bif_refs > 0) {
852 		aprint_debug("%s: cv_wait on iflist\n", __func__);
853 		cv_wait(&sc->sc_iflist_cv, sc->sc_iflist_intr_lock);
854 	}
855 	bif->bif_waiting = false;
856 	BRIDGE_INTR_UNLOCK(sc);
857 #endif
858 
859 	kmem_free(bif, sizeof(*bif));
860 
861 	BRIDGE_LOCK(sc);
862 }
863 
864 static int
865 bridge_ioctl_add(struct bridge_softc *sc, void *arg)
866 {
867 	struct ifbreq *req = arg;
868 	struct bridge_iflist *bif = NULL;
869 	struct ifnet *ifs;
870 	int error = 0;
871 
872 	ifs = ifunit(req->ifbr_ifsname);
873 	if (ifs == NULL)
874 		return (ENOENT);
875 
876 	if (sc->sc_if.if_mtu != ifs->if_mtu)
877 		return (EINVAL);
878 
879 	if (ifs->if_bridge == sc)
880 		return (EEXIST);
881 
882 	if (ifs->if_bridge != NULL)
883 		return (EBUSY);
884 
885 	if (ifs->if_input != ether_input)
886 		return EINVAL;
887 
888 	/* FIXME: doesn't work with non-IFF_SIMPLEX interfaces */
889 	if ((ifs->if_flags & IFF_SIMPLEX) == 0)
890 		return EINVAL;
891 
892 	bif = kmem_alloc(sizeof(*bif), KM_SLEEP);
893 
894 	switch (ifs->if_type) {
895 	case IFT_ETHER:
896 		if ((error = ether_enable_vlan_mtu(ifs)) > 0)
897 			goto out;
898 		/*
899 		 * Place the interface into promiscuous mode.
900 		 */
901 		error = ifpromisc(ifs, 1);
902 		if (error)
903 			goto out;
904 		break;
905 	default:
906 		error = EINVAL;
907 		goto out;
908 	}
909 
910 	bif->bif_ifp = ifs;
911 	bif->bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
912 	bif->bif_priority = BSTP_DEFAULT_PORT_PRIORITY;
913 	bif->bif_path_cost = BSTP_DEFAULT_PATH_COST;
914 	bif->bif_refs = 0;
915 	bif->bif_waiting = false;
916 
917 	BRIDGE_LOCK(sc);
918 
919 	ifs->if_bridge = sc;
920 	ifs->if_bridgeif = bif;
921 	LIST_INSERT_HEAD(&sc->sc_iflist, bif, bif_next);
922 	ifs->if_input = bridge_input;
923 
924 	BRIDGE_UNLOCK(sc);
925 
926 	if (sc->sc_if.if_flags & IFF_RUNNING)
927 		bstp_initialization(sc);
928 	else
929 		bstp_stop(sc);
930 
931  out:
932 	if (error) {
933 		if (bif != NULL)
934 			kmem_free(bif, sizeof(*bif));
935 	}
936 	return (error);
937 }
938 
939 static int
940 bridge_ioctl_del(struct bridge_softc *sc, void *arg)
941 {
942 	struct ifbreq *req = arg;
943 	const char *name = req->ifbr_ifsname;
944 	struct bridge_iflist *bif;
945 	struct ifnet *ifs;
946 
947 	BRIDGE_LOCK(sc);
948 
949 	/*
950 	 * Don't use bridge_lookup_member. We want to get a member
951 	 * with bif_refs == 0.
952 	 */
953 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
954 		ifs = bif->bif_ifp;
955 		if (strcmp(ifs->if_xname, name) == 0)
956 			break;
957 	}
958 
959 	if (bif == NULL) {
960 		BRIDGE_UNLOCK(sc);
961 		return ENOENT;
962 	}
963 
964 	bridge_delete_member(sc, bif);
965 
966 	BRIDGE_UNLOCK(sc);
967 
968 	switch (ifs->if_type) {
969 	case IFT_ETHER:
970 		/*
971 		 * Take the interface out of promiscuous mode.
972 		 * Don't call it with holding a spin lock.
973 		 */
974 		(void) ifpromisc(ifs, 0);
975 		(void) ether_disable_vlan_mtu(ifs);
976 		break;
977 	default:
978 #ifdef DIAGNOSTIC
979 		panic("bridge_delete_member: impossible");
980 #endif
981 		break;
982 	}
983 
984 	bridge_rtdelete(sc, ifs);
985 
986 	if (sc->sc_if.if_flags & IFF_RUNNING)
987 		bstp_initialization(sc);
988 
989 	return 0;
990 }
991 
992 static int
993 bridge_ioctl_gifflags(struct bridge_softc *sc, void *arg)
994 {
995 	struct ifbreq *req = arg;
996 	struct bridge_iflist *bif;
997 
998 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
999 	if (bif == NULL)
1000 		return (ENOENT);
1001 
1002 	req->ifbr_ifsflags = bif->bif_flags;
1003 	req->ifbr_state = bif->bif_state;
1004 	req->ifbr_priority = bif->bif_priority;
1005 	req->ifbr_path_cost = bif->bif_path_cost;
1006 	req->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1007 
1008 	bridge_release_member(sc, bif);
1009 
1010 	return (0);
1011 }
1012 
1013 static int
1014 bridge_ioctl_sifflags(struct bridge_softc *sc, void *arg)
1015 {
1016 	struct ifbreq *req = arg;
1017 	struct bridge_iflist *bif;
1018 
1019 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1020 	if (bif == NULL)
1021 		return (ENOENT);
1022 
1023 	if (req->ifbr_ifsflags & IFBIF_STP) {
1024 		switch (bif->bif_ifp->if_type) {
1025 		case IFT_ETHER:
1026 			/* These can do spanning tree. */
1027 			break;
1028 
1029 		default:
1030 			/* Nothing else can. */
1031 			bridge_release_member(sc, bif);
1032 			return (EINVAL);
1033 		}
1034 	}
1035 
1036 	bif->bif_flags = req->ifbr_ifsflags;
1037 
1038 	bridge_release_member(sc, bif);
1039 
1040 	if (sc->sc_if.if_flags & IFF_RUNNING)
1041 		bstp_initialization(sc);
1042 
1043 	return (0);
1044 }
1045 
1046 static int
1047 bridge_ioctl_scache(struct bridge_softc *sc, void *arg)
1048 {
1049 	struct ifbrparam *param = arg;
1050 
1051 	sc->sc_brtmax = param->ifbrp_csize;
1052 	bridge_rttrim(sc);
1053 
1054 	return (0);
1055 }
1056 
1057 static int
1058 bridge_ioctl_gcache(struct bridge_softc *sc, void *arg)
1059 {
1060 	struct ifbrparam *param = arg;
1061 
1062 	param->ifbrp_csize = sc->sc_brtmax;
1063 
1064 	return (0);
1065 }
1066 
1067 static int
1068 bridge_ioctl_gifs(struct bridge_softc *sc, void *arg)
1069 {
1070 	struct ifbifconf *bifc = arg;
1071 	struct bridge_iflist *bif;
1072 	struct ifbreq *breqs;
1073 	int i, count, error = 0;
1074 
1075 retry:
1076 	BRIDGE_LOCK(sc);
1077 	count = 0;
1078 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
1079 		count++;
1080 	BRIDGE_UNLOCK(sc);
1081 
1082 	if (count == 0) {
1083 		bifc->ifbic_len = 0;
1084 		return 0;
1085 	}
1086 
1087 	if (bifc->ifbic_len == 0 || bifc->ifbic_len < (sizeof(*breqs) * count)) {
1088 		/* Tell that a larger buffer is needed */
1089 		bifc->ifbic_len = sizeof(*breqs) * count;
1090 		return 0;
1091 	}
1092 
1093 	breqs = kmem_alloc(sizeof(*breqs) * count, KM_SLEEP);
1094 
1095 	BRIDGE_LOCK(sc);
1096 
1097 	i = 0;
1098 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next)
1099 		i++;
1100 	if (i > count) {
1101 		/*
1102 		 * The number of members has been increased.
1103 		 * We need more memory!
1104 		 */
1105 		BRIDGE_UNLOCK(sc);
1106 		kmem_free(breqs, sizeof(*breqs) * count);
1107 		goto retry;
1108 	}
1109 
1110 	i = 0;
1111 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1112 		struct ifbreq *breq = &breqs[i++];
1113 		memset(breq, 0, sizeof(*breq));
1114 
1115 		strlcpy(breq->ifbr_ifsname, bif->bif_ifp->if_xname,
1116 		    sizeof(breq->ifbr_ifsname));
1117 		breq->ifbr_ifsflags = bif->bif_flags;
1118 		breq->ifbr_state = bif->bif_state;
1119 		breq->ifbr_priority = bif->bif_priority;
1120 		breq->ifbr_path_cost = bif->bif_path_cost;
1121 		breq->ifbr_portno = bif->bif_ifp->if_index & 0xff;
1122 	}
1123 
1124 	/* Don't call copyout with holding the mutex */
1125 	BRIDGE_UNLOCK(sc);
1126 
1127 	for (i = 0; i < count; i++) {
1128 		error = copyout(&breqs[i], bifc->ifbic_req + i, sizeof(*breqs));
1129 		if (error)
1130 			break;
1131 	}
1132 	bifc->ifbic_len = sizeof(*breqs) * i;
1133 
1134 	kmem_free(breqs, sizeof(*breqs) * count);
1135 
1136 	return error;
1137 }
1138 
1139 static int
1140 bridge_ioctl_rts(struct bridge_softc *sc, void *arg)
1141 {
1142 	struct ifbaconf *bac = arg;
1143 	struct bridge_rtnode *brt;
1144 	struct ifbareq bareq;
1145 	int count = 0, error = 0, len;
1146 
1147 	if (bac->ifbac_len == 0)
1148 		return (0);
1149 
1150 	BRIDGE_RT_INTR_LOCK(sc);
1151 
1152 	len = bac->ifbac_len;
1153 	LIST_FOREACH(brt, &sc->sc_rtlist, brt_list) {
1154 		if (len < sizeof(bareq))
1155 			goto out;
1156 		memset(&bareq, 0, sizeof(bareq));
1157 		strlcpy(bareq.ifba_ifsname, brt->brt_ifp->if_xname,
1158 		    sizeof(bareq.ifba_ifsname));
1159 		memcpy(bareq.ifba_dst, brt->brt_addr, sizeof(brt->brt_addr));
1160 		if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
1161 			bareq.ifba_expire = brt->brt_expire - time_uptime;
1162 		} else
1163 			bareq.ifba_expire = 0;
1164 		bareq.ifba_flags = brt->brt_flags;
1165 
1166 		error = copyout(&bareq, bac->ifbac_req + count, sizeof(bareq));
1167 		if (error)
1168 			goto out;
1169 		count++;
1170 		len -= sizeof(bareq);
1171 	}
1172  out:
1173 	BRIDGE_RT_INTR_UNLOCK(sc);
1174 
1175 	bac->ifbac_len = sizeof(bareq) * count;
1176 	return (error);
1177 }
1178 
1179 static int
1180 bridge_ioctl_saddr(struct bridge_softc *sc, void *arg)
1181 {
1182 	struct ifbareq *req = arg;
1183 	struct bridge_iflist *bif;
1184 	int error;
1185 
1186 	bif = bridge_lookup_member(sc, req->ifba_ifsname);
1187 	if (bif == NULL)
1188 		return (ENOENT);
1189 
1190 	error = bridge_rtupdate(sc, req->ifba_dst, bif->bif_ifp, 1,
1191 	    req->ifba_flags);
1192 
1193 	bridge_release_member(sc, bif);
1194 
1195 	return (error);
1196 }
1197 
1198 static int
1199 bridge_ioctl_sto(struct bridge_softc *sc, void *arg)
1200 {
1201 	struct ifbrparam *param = arg;
1202 
1203 	sc->sc_brttimeout = param->ifbrp_ctime;
1204 
1205 	return (0);
1206 }
1207 
1208 static int
1209 bridge_ioctl_gto(struct bridge_softc *sc, void *arg)
1210 {
1211 	struct ifbrparam *param = arg;
1212 
1213 	param->ifbrp_ctime = sc->sc_brttimeout;
1214 
1215 	return (0);
1216 }
1217 
1218 static int
1219 bridge_ioctl_daddr(struct bridge_softc *sc, void *arg)
1220 {
1221 	struct ifbareq *req = arg;
1222 
1223 	return (bridge_rtdaddr(sc, req->ifba_dst));
1224 }
1225 
1226 static int
1227 bridge_ioctl_flush(struct bridge_softc *sc, void *arg)
1228 {
1229 	struct ifbreq *req = arg;
1230 
1231 	bridge_rtflush(sc, req->ifbr_ifsflags);
1232 
1233 	return (0);
1234 }
1235 
1236 static int
1237 bridge_ioctl_gpri(struct bridge_softc *sc, void *arg)
1238 {
1239 	struct ifbrparam *param = arg;
1240 
1241 	param->ifbrp_prio = sc->sc_bridge_priority;
1242 
1243 	return (0);
1244 }
1245 
1246 static int
1247 bridge_ioctl_spri(struct bridge_softc *sc, void *arg)
1248 {
1249 	struct ifbrparam *param = arg;
1250 
1251 	sc->sc_bridge_priority = param->ifbrp_prio;
1252 
1253 	if (sc->sc_if.if_flags & IFF_RUNNING)
1254 		bstp_initialization(sc);
1255 
1256 	return (0);
1257 }
1258 
1259 static int
1260 bridge_ioctl_ght(struct bridge_softc *sc, void *arg)
1261 {
1262 	struct ifbrparam *param = arg;
1263 
1264 	param->ifbrp_hellotime = sc->sc_bridge_hello_time >> 8;
1265 
1266 	return (0);
1267 }
1268 
1269 static int
1270 bridge_ioctl_sht(struct bridge_softc *sc, void *arg)
1271 {
1272 	struct ifbrparam *param = arg;
1273 
1274 	if (param->ifbrp_hellotime == 0)
1275 		return (EINVAL);
1276 	sc->sc_bridge_hello_time = param->ifbrp_hellotime << 8;
1277 
1278 	if (sc->sc_if.if_flags & IFF_RUNNING)
1279 		bstp_initialization(sc);
1280 
1281 	return (0);
1282 }
1283 
1284 static int
1285 bridge_ioctl_gfd(struct bridge_softc *sc, void *arg)
1286 {
1287 	struct ifbrparam *param = arg;
1288 
1289 	param->ifbrp_fwddelay = sc->sc_bridge_forward_delay >> 8;
1290 
1291 	return (0);
1292 }
1293 
1294 static int
1295 bridge_ioctl_sfd(struct bridge_softc *sc, void *arg)
1296 {
1297 	struct ifbrparam *param = arg;
1298 
1299 	if (param->ifbrp_fwddelay == 0)
1300 		return (EINVAL);
1301 	sc->sc_bridge_forward_delay = param->ifbrp_fwddelay << 8;
1302 
1303 	if (sc->sc_if.if_flags & IFF_RUNNING)
1304 		bstp_initialization(sc);
1305 
1306 	return (0);
1307 }
1308 
1309 static int
1310 bridge_ioctl_gma(struct bridge_softc *sc, void *arg)
1311 {
1312 	struct ifbrparam *param = arg;
1313 
1314 	param->ifbrp_maxage = sc->sc_bridge_max_age >> 8;
1315 
1316 	return (0);
1317 }
1318 
1319 static int
1320 bridge_ioctl_sma(struct bridge_softc *sc, void *arg)
1321 {
1322 	struct ifbrparam *param = arg;
1323 
1324 	if (param->ifbrp_maxage == 0)
1325 		return (EINVAL);
1326 	sc->sc_bridge_max_age = param->ifbrp_maxage << 8;
1327 
1328 	if (sc->sc_if.if_flags & IFF_RUNNING)
1329 		bstp_initialization(sc);
1330 
1331 	return (0);
1332 }
1333 
1334 static int
1335 bridge_ioctl_sifprio(struct bridge_softc *sc, void *arg)
1336 {
1337 	struct ifbreq *req = arg;
1338 	struct bridge_iflist *bif;
1339 
1340 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1341 	if (bif == NULL)
1342 		return (ENOENT);
1343 
1344 	bif->bif_priority = req->ifbr_priority;
1345 
1346 	if (sc->sc_if.if_flags & IFF_RUNNING)
1347 		bstp_initialization(sc);
1348 
1349 	bridge_release_member(sc, bif);
1350 
1351 	return (0);
1352 }
1353 
1354 #if defined(BRIDGE_IPF)
1355 static int
1356 bridge_ioctl_gfilt(struct bridge_softc *sc, void *arg)
1357 {
1358 	struct ifbrparam *param = arg;
1359 
1360 	param->ifbrp_filter = sc->sc_filter_flags;
1361 
1362 	return (0);
1363 }
1364 
1365 static int
1366 bridge_ioctl_sfilt(struct bridge_softc *sc, void *arg)
1367 {
1368 	struct ifbrparam *param = arg;
1369 	uint32_t nflags, oflags;
1370 
1371 	if (param->ifbrp_filter & ~IFBF_FILT_MASK)
1372 		return (EINVAL);
1373 
1374 	nflags = param->ifbrp_filter;
1375 	oflags = sc->sc_filter_flags;
1376 
1377 	if ((nflags & IFBF_FILT_USEIPF) && !(oflags & IFBF_FILT_USEIPF)) {
1378 		pfil_add_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1379 			sc->sc_if.if_pfil);
1380 	}
1381 	if (!(nflags & IFBF_FILT_USEIPF) && (oflags & IFBF_FILT_USEIPF)) {
1382 		pfil_remove_hook((void *)bridge_ipf, NULL, PFIL_IN|PFIL_OUT,
1383 			sc->sc_if.if_pfil);
1384 	}
1385 
1386 	sc->sc_filter_flags = nflags;
1387 
1388 	return (0);
1389 }
1390 #endif /* BRIDGE_IPF */
1391 
1392 static int
1393 bridge_ioctl_sifcost(struct bridge_softc *sc, void *arg)
1394 {
1395 	struct ifbreq *req = arg;
1396 	struct bridge_iflist *bif;
1397 
1398 	bif = bridge_lookup_member(sc, req->ifbr_ifsname);
1399 	if (bif == NULL)
1400 		return (ENOENT);
1401 
1402 	bif->bif_path_cost = req->ifbr_path_cost;
1403 
1404 	if (sc->sc_if.if_flags & IFF_RUNNING)
1405 		bstp_initialization(sc);
1406 
1407 	bridge_release_member(sc, bif);
1408 
1409 	return (0);
1410 }
1411 
1412 /*
1413  * bridge_ifdetach:
1414  *
1415  *	Detach an interface from a bridge.  Called when a member
1416  *	interface is detaching.
1417  */
1418 void
1419 bridge_ifdetach(struct ifnet *ifp)
1420 {
1421 	struct bridge_softc *sc = ifp->if_bridge;
1422 	struct ifbreq breq;
1423 
1424 	/* ioctl_lock should prevent this from happening */
1425 	KASSERT(sc != NULL);
1426 
1427 	memset(&breq, 0, sizeof(breq));
1428 	strlcpy(breq.ifbr_ifsname, ifp->if_xname, sizeof(breq.ifbr_ifsname));
1429 
1430 	(void) bridge_ioctl_del(sc, &breq);
1431 }
1432 
1433 /*
1434  * bridge_init:
1435  *
1436  *	Initialize a bridge interface.
1437  */
1438 static int
1439 bridge_init(struct ifnet *ifp)
1440 {
1441 	struct bridge_softc *sc = ifp->if_softc;
1442 
1443 	if (ifp->if_flags & IFF_RUNNING)
1444 		return (0);
1445 
1446 	callout_reset(&sc->sc_brcallout, bridge_rtable_prune_period * hz,
1447 	    bridge_timer, sc);
1448 
1449 	ifp->if_flags |= IFF_RUNNING;
1450 	bstp_initialization(sc);
1451 	return (0);
1452 }
1453 
1454 /*
1455  * bridge_stop:
1456  *
1457  *	Stop the bridge interface.
1458  */
1459 static void
1460 bridge_stop(struct ifnet *ifp, int disable)
1461 {
1462 	struct bridge_softc *sc = ifp->if_softc;
1463 
1464 	if ((ifp->if_flags & IFF_RUNNING) == 0)
1465 		return;
1466 
1467 	callout_stop(&sc->sc_brcallout);
1468 	bstp_stop(sc);
1469 
1470 	bridge_rtflush(sc, IFBF_FLUSHDYN);
1471 
1472 	ifp->if_flags &= ~IFF_RUNNING;
1473 }
1474 
1475 /*
1476  * bridge_enqueue:
1477  *
1478  *	Enqueue a packet on a bridge member interface.
1479  */
1480 void
1481 bridge_enqueue(struct bridge_softc *sc, struct ifnet *dst_ifp, struct mbuf *m,
1482     int runfilt)
1483 {
1484 	ALTQ_DECL(struct altq_pktattr pktattr;)
1485 	int len, error;
1486 	short mflags;
1487 
1488 	/*
1489 	 * Clear any in-bound checksum flags for this packet.
1490 	 */
1491 	m->m_pkthdr.csum_flags = 0;
1492 
1493 	if (runfilt) {
1494 		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1495 		    dst_ifp, PFIL_OUT) != 0) {
1496 			if (m != NULL)
1497 				m_freem(m);
1498 			return;
1499 		}
1500 		if (m == NULL)
1501 			return;
1502 	}
1503 
1504 #ifdef ALTQ
1505 	/*
1506 	 * If ALTQ is enabled on the member interface, do
1507 	 * classification; the queueing discipline might
1508 	 * not require classification, but might require
1509 	 * the address family/header pointer in the pktattr.
1510 	 */
1511 	if (ALTQ_IS_ENABLED(&dst_ifp->if_snd)) {
1512 		/* XXX IFT_ETHER */
1513 		altq_etherclassify(&dst_ifp->if_snd, m, &pktattr);
1514 	}
1515 #endif /* ALTQ */
1516 
1517 	len = m->m_pkthdr.len;
1518 	mflags = m->m_flags;
1519 
1520 	IFQ_ENQUEUE(&dst_ifp->if_snd, m, &pktattr, error);
1521 
1522 	if (error) {
1523 		/* mbuf is already freed */
1524 		sc->sc_if.if_oerrors++;
1525 		return;
1526 	}
1527 
1528 	sc->sc_if.if_opackets++;
1529 	sc->sc_if.if_obytes += len;
1530 
1531 	dst_ifp->if_obytes += len;
1532 
1533 	if (mflags & M_MCAST) {
1534 		sc->sc_if.if_omcasts++;
1535 		dst_ifp->if_omcasts++;
1536 	}
1537 
1538 	if ((dst_ifp->if_flags & IFF_OACTIVE) == 0)
1539 		(*dst_ifp->if_start)(dst_ifp);
1540 }
1541 
1542 /*
1543  * bridge_output:
1544  *
1545  *	Send output from a bridge member interface.  This
1546  *	performs the bridging function for locally originated
1547  *	packets.
1548  *
1549  *	The mbuf has the Ethernet header already attached.  We must
1550  *	enqueue or free the mbuf before returning.
1551  */
1552 int
1553 bridge_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa,
1554     struct rtentry *rt)
1555 {
1556 	struct ether_header *eh;
1557 	struct ifnet *dst_if;
1558 	struct bridge_softc *sc;
1559 #ifndef BRIDGE_MPSAFE
1560 	int s;
1561 #endif
1562 
1563 	if (m->m_len < ETHER_HDR_LEN) {
1564 		m = m_pullup(m, ETHER_HDR_LEN);
1565 		if (m == NULL)
1566 			return (0);
1567 	}
1568 
1569 	eh = mtod(m, struct ether_header *);
1570 	sc = ifp->if_bridge;
1571 
1572 #ifndef BRIDGE_MPSAFE
1573 	s = splnet();
1574 #endif
1575 
1576 	/*
1577 	 * If bridge is down, but the original output interface is up,
1578 	 * go ahead and send out that interface.  Otherwise, the packet
1579 	 * is dropped below.
1580 	 */
1581 	if (__predict_false(sc == NULL) ||
1582 	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1583 		dst_if = ifp;
1584 		goto sendunicast;
1585 	}
1586 
1587 	/*
1588 	 * If the packet is a multicast, or we don't know a better way to
1589 	 * get there, send to all interfaces.
1590 	 */
1591 	if (ETHER_IS_MULTICAST(eh->ether_dhost))
1592 		dst_if = NULL;
1593 	else
1594 		dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1595 	if (dst_if == NULL) {
1596 		struct bridge_iflist *bif;
1597 		struct mbuf *mc;
1598 		int used = 0;
1599 		int ss;
1600 
1601 		BRIDGE_PSZ_RENTER(ss);
1602 		LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
1603 			bif = bridge_try_hold_bif(bif);
1604 			if (bif == NULL)
1605 				continue;
1606 			BRIDGE_PSZ_REXIT(ss);
1607 
1608 			dst_if = bif->bif_ifp;
1609 			if ((dst_if->if_flags & IFF_RUNNING) == 0)
1610 				goto next;
1611 
1612 			/*
1613 			 * If this is not the original output interface,
1614 			 * and the interface is participating in spanning
1615 			 * tree, make sure the port is in a state that
1616 			 * allows forwarding.
1617 			 */
1618 			if (dst_if != ifp &&
1619 			    (bif->bif_flags & IFBIF_STP) != 0) {
1620 				switch (bif->bif_state) {
1621 				case BSTP_IFSTATE_BLOCKING:
1622 				case BSTP_IFSTATE_LISTENING:
1623 				case BSTP_IFSTATE_DISABLED:
1624 					goto next;
1625 				}
1626 			}
1627 
1628 			if (LIST_NEXT(bif, bif_next) == NULL) {
1629 				used = 1;
1630 				mc = m;
1631 			} else {
1632 				mc = m_copym(m, 0, M_COPYALL, M_NOWAIT);
1633 				if (mc == NULL) {
1634 					sc->sc_if.if_oerrors++;
1635 					goto next;
1636 				}
1637 			}
1638 
1639 			bridge_enqueue(sc, dst_if, mc, 0);
1640 next:
1641 			bridge_release_member(sc, bif);
1642 			BRIDGE_PSZ_RENTER(ss);
1643 		}
1644 		BRIDGE_PSZ_REXIT(ss);
1645 
1646 		if (used == 0)
1647 			m_freem(m);
1648 #ifndef BRIDGE_MPSAFE
1649 		splx(s);
1650 #endif
1651 		return (0);
1652 	}
1653 
1654  sendunicast:
1655 	/*
1656 	 * XXX Spanning tree consideration here?
1657 	 */
1658 
1659 	if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1660 		m_freem(m);
1661 #ifndef BRIDGE_MPSAFE
1662 		splx(s);
1663 #endif
1664 		return (0);
1665 	}
1666 
1667 	bridge_enqueue(sc, dst_if, m, 0);
1668 
1669 #ifndef BRIDGE_MPSAFE
1670 	splx(s);
1671 #endif
1672 	return (0);
1673 }
1674 
1675 /*
1676  * bridge_start:
1677  *
1678  *	Start output on a bridge.
1679  *
1680  *	NOTE: This routine should never be called in this implementation.
1681  */
1682 static void
1683 bridge_start(struct ifnet *ifp)
1684 {
1685 
1686 	printf("%s: bridge_start() called\n", ifp->if_xname);
1687 }
1688 
1689 /*
1690  * bridge_forward:
1691  *
1692  *	The forwarding function of the bridge.
1693  */
1694 static void
1695 bridge_forward(void *v)
1696 {
1697 	struct bridge_softc *sc = v;
1698 	struct mbuf *m;
1699 	struct bridge_iflist *bif;
1700 	struct ifnet *src_if, *dst_if;
1701 	struct ether_header *eh;
1702 #ifndef BRIDGE_MPSAFE
1703 	int s;
1704 
1705 	KERNEL_LOCK(1, NULL);
1706 	mutex_enter(softnet_lock);
1707 #endif
1708 
1709 	if ((sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1710 #ifndef BRIDGE_MPSAFE
1711 		mutex_exit(softnet_lock);
1712 		KERNEL_UNLOCK_ONE(NULL);
1713 #endif
1714 		return;
1715 	}
1716 
1717 #ifndef BRIDGE_MPSAFE
1718 	s = splnet();
1719 #endif
1720 	while ((m = pktq_dequeue(sc->sc_fwd_pktq)) != NULL) {
1721 		src_if = m->m_pkthdr.rcvif;
1722 
1723 		sc->sc_if.if_ipackets++;
1724 		sc->sc_if.if_ibytes += m->m_pkthdr.len;
1725 
1726 		/*
1727 		 * Look up the bridge_iflist.
1728 		 */
1729 		bif = bridge_lookup_member_if(sc, src_if);
1730 		if (bif == NULL) {
1731 			/* Interface is not a bridge member (anymore?) */
1732 			m_freem(m);
1733 			continue;
1734 		}
1735 
1736 		if (bif->bif_flags & IFBIF_STP) {
1737 			switch (bif->bif_state) {
1738 			case BSTP_IFSTATE_BLOCKING:
1739 			case BSTP_IFSTATE_LISTENING:
1740 			case BSTP_IFSTATE_DISABLED:
1741 				m_freem(m);
1742 				bridge_release_member(sc, bif);
1743 				continue;
1744 			}
1745 		}
1746 
1747 		eh = mtod(m, struct ether_header *);
1748 
1749 		/*
1750 		 * If the interface is learning, and the source
1751 		 * address is valid and not multicast, record
1752 		 * the address.
1753 		 */
1754 		if ((bif->bif_flags & IFBIF_LEARNING) != 0 &&
1755 		    ETHER_IS_MULTICAST(eh->ether_shost) == 0 &&
1756 		    (eh->ether_shost[0] == 0 &&
1757 		     eh->ether_shost[1] == 0 &&
1758 		     eh->ether_shost[2] == 0 &&
1759 		     eh->ether_shost[3] == 0 &&
1760 		     eh->ether_shost[4] == 0 &&
1761 		     eh->ether_shost[5] == 0) == 0) {
1762 			(void) bridge_rtupdate(sc, eh->ether_shost,
1763 			    src_if, 0, IFBAF_DYNAMIC);
1764 		}
1765 
1766 		if ((bif->bif_flags & IFBIF_STP) != 0 &&
1767 		    bif->bif_state == BSTP_IFSTATE_LEARNING) {
1768 			m_freem(m);
1769 			bridge_release_member(sc, bif);
1770 			continue;
1771 		}
1772 
1773 		bridge_release_member(sc, bif);
1774 
1775 		/*
1776 		 * At this point, the port either doesn't participate
1777 		 * in spanning tree or it is in the forwarding state.
1778 		 */
1779 
1780 		/*
1781 		 * If the packet is unicast, destined for someone on
1782 		 * "this" side of the bridge, drop it.
1783 		 */
1784 		if ((m->m_flags & (M_BCAST|M_MCAST)) == 0) {
1785 			dst_if = bridge_rtlookup(sc, eh->ether_dhost);
1786 			if (src_if == dst_if) {
1787 				m_freem(m);
1788 				continue;
1789 			}
1790 		} else {
1791 			/* ...forward it to all interfaces. */
1792 			sc->sc_if.if_imcasts++;
1793 			dst_if = NULL;
1794 		}
1795 
1796 		if (pfil_run_hooks(sc->sc_if.if_pfil, &m,
1797 		    m->m_pkthdr.rcvif, PFIL_IN) != 0) {
1798 			if (m != NULL)
1799 				m_freem(m);
1800 			continue;
1801 		}
1802 		if (m == NULL)
1803 			continue;
1804 
1805 		if (dst_if == NULL) {
1806 			bridge_broadcast(sc, src_if, m);
1807 			continue;
1808 		}
1809 
1810 		/*
1811 		 * At this point, we're dealing with a unicast frame
1812 		 * going to a different interface.
1813 		 */
1814 		if ((dst_if->if_flags & IFF_RUNNING) == 0) {
1815 			m_freem(m);
1816 			continue;
1817 		}
1818 
1819 		bif = bridge_lookup_member_if(sc, dst_if);
1820 		if (bif == NULL) {
1821 			/* Not a member of the bridge (anymore?) */
1822 			m_freem(m);
1823 			continue;
1824 		}
1825 
1826 		if (bif->bif_flags & IFBIF_STP) {
1827 			switch (bif->bif_state) {
1828 			case BSTP_IFSTATE_DISABLED:
1829 			case BSTP_IFSTATE_BLOCKING:
1830 				m_freem(m);
1831 				bridge_release_member(sc, bif);
1832 				continue;
1833 			}
1834 		}
1835 
1836 		bridge_release_member(sc, bif);
1837 
1838 		bridge_enqueue(sc, dst_if, m, 1);
1839 	}
1840 #ifndef BRIDGE_MPSAFE
1841 	splx(s);
1842 	mutex_exit(softnet_lock);
1843 	KERNEL_UNLOCK_ONE(NULL);
1844 #endif
1845 }
1846 
1847 static bool
1848 bstp_state_before_learning(struct bridge_iflist *bif)
1849 {
1850 	if (bif->bif_flags & IFBIF_STP) {
1851 		switch (bif->bif_state) {
1852 		case BSTP_IFSTATE_BLOCKING:
1853 		case BSTP_IFSTATE_LISTENING:
1854 		case BSTP_IFSTATE_DISABLED:
1855 			return true;
1856 		}
1857 	}
1858 	return false;
1859 }
1860 
1861 static bool
1862 bridge_ourether(struct bridge_iflist *bif, struct ether_header *eh, int src)
1863 {
1864 	uint8_t *ether = src ? eh->ether_shost : eh->ether_dhost;
1865 
1866 	if (memcmp(CLLADDR(bif->bif_ifp->if_sadl), ether, ETHER_ADDR_LEN) == 0
1867 #if NCARP > 0
1868 	    || (bif->bif_ifp->if_carp &&
1869 	        carp_ourether(bif->bif_ifp->if_carp, eh, IFT_ETHER, src) != NULL)
1870 #endif /* NCARP > 0 */
1871 	    )
1872 		return true;
1873 
1874 	return false;
1875 }
1876 
1877 /*
1878  * bridge_input:
1879  *
1880  *	Receive input from a member interface.  Queue the packet for
1881  *	bridging if it is not for us.
1882  */
1883 static void
1884 bridge_input(struct ifnet *ifp, struct mbuf *m)
1885 {
1886 	struct bridge_softc *sc = ifp->if_bridge;
1887 	struct bridge_iflist *bif;
1888 	struct ether_header *eh;
1889 
1890 	if (__predict_false(sc == NULL) ||
1891 	    (sc->sc_if.if_flags & IFF_RUNNING) == 0) {
1892 		ether_input(ifp, m);
1893 		return;
1894 	}
1895 
1896 	bif = bridge_lookup_member_if(sc, ifp);
1897 	if (bif == NULL) {
1898 		ether_input(ifp, m);
1899 		return;
1900 	}
1901 
1902 	eh = mtod(m, struct ether_header *);
1903 
1904 	if (ETHER_IS_MULTICAST(eh->ether_dhost)) {
1905 		if (memcmp(etherbroadcastaddr,
1906 		    eh->ether_dhost, ETHER_ADDR_LEN) == 0)
1907 			m->m_flags |= M_BCAST;
1908 		else
1909 			m->m_flags |= M_MCAST;
1910 	}
1911 
1912 	/*
1913 	 * A 'fast' path for packets addressed to interfaces that are
1914 	 * part of this bridge.
1915 	 */
1916 	if (!(m->m_flags & (M_BCAST|M_MCAST)) &&
1917 	    !bstp_state_before_learning(bif)) {
1918 		struct bridge_iflist *_bif;
1919 		struct ifnet *_ifp = NULL;
1920 		int s;
1921 
1922 		BRIDGE_PSZ_RENTER(s);
1923 		LIST_FOREACH(_bif, &sc->sc_iflist, bif_next) {
1924 			/* It is destined for us. */
1925 			if (bridge_ourether(_bif, eh, 0)) {
1926 				_bif = bridge_try_hold_bif(_bif);
1927 				BRIDGE_PSZ_REXIT(s);
1928 				if (_bif == NULL)
1929 					goto out;
1930 				if (_bif->bif_flags & IFBIF_LEARNING)
1931 					(void) bridge_rtupdate(sc,
1932 					    eh->ether_shost, ifp, 0, IFBAF_DYNAMIC);
1933 				_ifp = m->m_pkthdr.rcvif = _bif->bif_ifp;
1934 				bridge_release_member(sc, _bif);
1935 				goto out;
1936 			}
1937 
1938 			/* We just received a packet that we sent out. */
1939 			if (bridge_ourether(_bif, eh, 1))
1940 				break;
1941 		}
1942 		BRIDGE_PSZ_REXIT(s);
1943 out:
1944 
1945 		if (_bif != NULL) {
1946 			bridge_release_member(sc, bif);
1947 			if (_ifp != NULL) {
1948 				m->m_flags &= ~M_PROMISC;
1949 				ether_input(_ifp, m);
1950 			} else
1951 				m_freem(m);
1952 			return;
1953 		}
1954 	}
1955 
1956 	/* Tap off 802.1D packets; they do not get forwarded. */
1957 	if (bif->bif_flags & IFBIF_STP &&
1958 	    memcmp(eh->ether_dhost, bstp_etheraddr, ETHER_ADDR_LEN) == 0) {
1959 		bstp_input(sc, bif, m);
1960 		bridge_release_member(sc, bif);
1961 		return;
1962 	}
1963 
1964 	/*
1965 	 * A normal switch would discard the packet here, but that's not what
1966 	 * we've done historically. This also prevents some obnoxious behaviour.
1967 	 */
1968 	if (bstp_state_before_learning(bif)) {
1969 		bridge_release_member(sc, bif);
1970 		ether_input(ifp, m);
1971 		return;
1972 	}
1973 
1974 	bridge_release_member(sc, bif);
1975 
1976 	/* Queue the packet for bridge forwarding. */
1977 	{
1978 		/*
1979 		 * Force to enqueue to curcpu's pktq (RX can run on a CPU
1980 		 * other than CPU#0). XXX need fundamental solution.
1981 		 */
1982 		const unsigned hash = curcpu()->ci_index;
1983 
1984 		if (__predict_false(!pktq_enqueue(sc->sc_fwd_pktq, m, hash)))
1985 			m_freem(m);
1986 	}
1987 }
1988 
1989 /*
1990  * bridge_broadcast:
1991  *
1992  *	Send a frame to all interfaces that are members of
1993  *	the bridge, except for the one on which the packet
1994  *	arrived.
1995  */
1996 static void
1997 bridge_broadcast(struct bridge_softc *sc, struct ifnet *src_if,
1998     struct mbuf *m)
1999 {
2000 	struct bridge_iflist *bif;
2001 	struct mbuf *mc;
2002 	struct ifnet *dst_if;
2003 	bool bmcast;
2004 	int s;
2005 
2006 	bmcast = m->m_flags & (M_BCAST|M_MCAST);
2007 
2008 	BRIDGE_PSZ_RENTER(s);
2009 	LIST_FOREACH(bif, &sc->sc_iflist, bif_next) {
2010 		bif = bridge_try_hold_bif(bif);
2011 		if (bif == NULL)
2012 			continue;
2013 		BRIDGE_PSZ_REXIT(s);
2014 
2015 		dst_if = bif->bif_ifp;
2016 
2017 		if (bif->bif_flags & IFBIF_STP) {
2018 			switch (bif->bif_state) {
2019 			case BSTP_IFSTATE_BLOCKING:
2020 			case BSTP_IFSTATE_DISABLED:
2021 				goto next;
2022 			}
2023 		}
2024 
2025 		if ((bif->bif_flags & IFBIF_DISCOVER) == 0 && !bmcast)
2026 			goto next;
2027 
2028 		if ((dst_if->if_flags & IFF_RUNNING) == 0)
2029 			goto next;
2030 
2031 		if (dst_if != src_if) {
2032 			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
2033 			if (mc == NULL) {
2034 				sc->sc_if.if_oerrors++;
2035 				goto next;
2036 			}
2037 			bridge_enqueue(sc, dst_if, mc, 1);
2038 		}
2039 
2040 		if (bmcast) {
2041 			mc = m_copym(m, 0, M_COPYALL, M_DONTWAIT);
2042 			if (mc == NULL) {
2043 				sc->sc_if.if_oerrors++;
2044 				goto next;
2045 			}
2046 
2047 			mc->m_pkthdr.rcvif = dst_if;
2048 			mc->m_flags &= ~M_PROMISC;
2049 			ether_input(dst_if, mc);
2050 		}
2051 next:
2052 		bridge_release_member(sc, bif);
2053 		BRIDGE_PSZ_RENTER(s);
2054 	}
2055 	BRIDGE_PSZ_REXIT(s);
2056 
2057 	m_freem(m);
2058 }
2059 
2060 static int
2061 bridge_rtalloc(struct bridge_softc *sc, const uint8_t *dst,
2062     struct bridge_rtnode **brtp)
2063 {
2064 	struct bridge_rtnode *brt;
2065 	int error;
2066 
2067 	if (sc->sc_brtcnt >= sc->sc_brtmax)
2068 		return ENOSPC;
2069 
2070 	/*
2071 	 * Allocate a new bridge forwarding node, and
2072 	 * initialize the expiration time and Ethernet
2073 	 * address.
2074 	 */
2075 	brt = pool_get(&bridge_rtnode_pool, PR_NOWAIT);
2076 	if (brt == NULL)
2077 		return ENOMEM;
2078 
2079 	memset(brt, 0, sizeof(*brt));
2080 	brt->brt_expire = time_uptime + sc->sc_brttimeout;
2081 	brt->brt_flags = IFBAF_DYNAMIC;
2082 	memcpy(brt->brt_addr, dst, ETHER_ADDR_LEN);
2083 
2084 	BRIDGE_RT_INTR_LOCK(sc);
2085 	error = bridge_rtnode_insert(sc, brt);
2086 	BRIDGE_RT_INTR_UNLOCK(sc);
2087 
2088 	if (error != 0) {
2089 		pool_put(&bridge_rtnode_pool, brt);
2090 		return error;
2091 	}
2092 
2093 	*brtp = brt;
2094 	return 0;
2095 }
2096 
2097 /*
2098  * bridge_rtupdate:
2099  *
2100  *	Add a bridge routing entry.
2101  */
2102 static int
2103 bridge_rtupdate(struct bridge_softc *sc, const uint8_t *dst,
2104     struct ifnet *dst_if, int setflags, uint8_t flags)
2105 {
2106 	struct bridge_rtnode *brt;
2107 	int s;
2108 
2109 again:
2110 	/*
2111 	 * A route for this destination might already exist.  If so,
2112 	 * update it, otherwise create a new one.
2113 	 */
2114 	BRIDGE_RT_RENTER(s);
2115 	brt = bridge_rtnode_lookup(sc, dst);
2116 
2117 	if (brt != NULL) {
2118 		brt->brt_ifp = dst_if;
2119 		if (setflags) {
2120 			brt->brt_flags = flags;
2121 			if (flags & IFBAF_STATIC)
2122 				brt->brt_expire = 0;
2123 			else
2124 				brt->brt_expire = time_uptime + sc->sc_brttimeout;
2125 		} else {
2126 			if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2127 				brt->brt_expire = time_uptime + sc->sc_brttimeout;
2128 		}
2129 	}
2130 	BRIDGE_RT_REXIT(s);
2131 
2132 	if (brt == NULL) {
2133 		int r;
2134 
2135 		r = bridge_rtalloc(sc, dst, &brt);
2136 		if (r != 0)
2137 			return r;
2138 		goto again;
2139 	}
2140 
2141 	return 0;
2142 }
2143 
2144 /*
2145  * bridge_rtlookup:
2146  *
2147  *	Lookup the destination interface for an address.
2148  */
2149 static struct ifnet *
2150 bridge_rtlookup(struct bridge_softc *sc, const uint8_t *addr)
2151 {
2152 	struct bridge_rtnode *brt;
2153 	struct ifnet *ifs = NULL;
2154 	int s;
2155 
2156 	BRIDGE_RT_RENTER(s);
2157 	brt = bridge_rtnode_lookup(sc, addr);
2158 	if (brt != NULL)
2159 		ifs = brt->brt_ifp;
2160 	BRIDGE_RT_REXIT(s);
2161 
2162 	return ifs;
2163 }
2164 
2165 typedef bool (*bridge_iterate_cb_t)
2166     (struct bridge_softc *, struct bridge_rtnode *, bool *, void *);
2167 
2168 /*
2169  * bridge_rtlist_iterate_remove:
2170  *
2171  *	It iterates on sc->sc_rtlist and removes rtnodes of it which func
2172  *	callback judges to remove. Removals of rtnodes are done in a manner
2173  *	of pserialize. To this end, all kmem_* operations are placed out of
2174  *	mutexes.
2175  */
2176 static void
2177 bridge_rtlist_iterate_remove(struct bridge_softc *sc, bridge_iterate_cb_t func, void *arg)
2178 {
2179 	struct bridge_rtnode *brt, *nbrt;
2180 	struct bridge_rtnode **brt_list;
2181 	int i, count;
2182 
2183 retry:
2184 	count = sc->sc_brtcnt;
2185 	if (count == 0)
2186 		return;
2187 	brt_list = kmem_alloc(sizeof(*brt_list) * count, KM_SLEEP);
2188 
2189 	BRIDGE_RT_LOCK(sc);
2190 	BRIDGE_RT_INTR_LOCK(sc);
2191 	if (__predict_false(sc->sc_brtcnt > count)) {
2192 		/* The rtnodes increased, we need more memory */
2193 		BRIDGE_RT_INTR_UNLOCK(sc);
2194 		BRIDGE_RT_UNLOCK(sc);
2195 		kmem_free(brt_list, sizeof(*brt_list) * count);
2196 		goto retry;
2197 	}
2198 
2199 	i = 0;
2200 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2201 		bool need_break = false;
2202 		if (func(sc, brt, &need_break, arg)) {
2203 			bridge_rtnode_remove(sc, brt);
2204 			brt_list[i++] = brt;
2205 		}
2206 		if (need_break)
2207 			break;
2208 	}
2209 	BRIDGE_RT_INTR_UNLOCK(sc);
2210 
2211 	if (i > 0)
2212 		BRIDGE_RT_PSZ_PERFORM(sc);
2213 	BRIDGE_RT_UNLOCK(sc);
2214 
2215 	while (--i >= 0)
2216 		bridge_rtnode_destroy(brt_list[i]);
2217 
2218 	kmem_free(brt_list, sizeof(*brt_list) * count);
2219 }
2220 
2221 static bool
2222 bridge_rttrim0_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2223     bool *need_break, void *arg)
2224 {
2225 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC) {
2226 		/* Take into account of the subsequent removal */
2227 		if ((sc->sc_brtcnt - 1) <= sc->sc_brtmax)
2228 			*need_break = true;
2229 		return true;
2230 	} else
2231 		return false;
2232 }
2233 
2234 static void
2235 bridge_rttrim0(struct bridge_softc *sc)
2236 {
2237 	bridge_rtlist_iterate_remove(sc, bridge_rttrim0_cb, NULL);
2238 }
2239 
2240 /*
2241  * bridge_rttrim:
2242  *
2243  *	Trim the routine table so that we have a number
2244  *	of routing entries less than or equal to the
2245  *	maximum number.
2246  */
2247 static void
2248 bridge_rttrim(struct bridge_softc *sc)
2249 {
2250 
2251 	/* Make sure we actually need to do this. */
2252 	if (sc->sc_brtcnt <= sc->sc_brtmax)
2253 		return;
2254 
2255 	/* Force an aging cycle; this might trim enough addresses. */
2256 	bridge_rtage(sc);
2257 	if (sc->sc_brtcnt <= sc->sc_brtmax)
2258 		return;
2259 
2260 	bridge_rttrim0(sc);
2261 
2262 	return;
2263 }
2264 
2265 /*
2266  * bridge_timer:
2267  *
2268  *	Aging timer for the bridge.
2269  */
2270 static void
2271 bridge_timer(void *arg)
2272 {
2273 	struct bridge_softc *sc = arg;
2274 
2275 	workqueue_enqueue(sc->sc_rtage_wq, &bridge_rtage_wk, NULL);
2276 }
2277 
2278 static void
2279 bridge_rtage_work(struct work *wk, void *arg)
2280 {
2281 	struct bridge_softc *sc = arg;
2282 
2283 	KASSERT(wk == &bridge_rtage_wk);
2284 
2285 	bridge_rtage(sc);
2286 
2287 	if (sc->sc_if.if_flags & IFF_RUNNING)
2288 		callout_reset(&sc->sc_brcallout,
2289 		    bridge_rtable_prune_period * hz, bridge_timer, sc);
2290 }
2291 
2292 static bool
2293 bridge_rtage_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2294     bool *need_break, void *arg)
2295 {
2296 	if ((brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC &&
2297 	    time_uptime >= brt->brt_expire)
2298 		return true;
2299 	else
2300 		return false;
2301 }
2302 
2303 /*
2304  * bridge_rtage:
2305  *
2306  *	Perform an aging cycle.
2307  */
2308 static void
2309 bridge_rtage(struct bridge_softc *sc)
2310 {
2311 	bridge_rtlist_iterate_remove(sc, bridge_rtage_cb, NULL);
2312 }
2313 
2314 
2315 static bool
2316 bridge_rtflush_cb(struct bridge_softc *sc, struct bridge_rtnode *brt,
2317     bool *need_break, void *arg)
2318 {
2319 	int full = *(int*)arg;
2320 
2321 	if (full || (brt->brt_flags & IFBAF_TYPEMASK) == IFBAF_DYNAMIC)
2322 		return true;
2323 	else
2324 		return false;
2325 }
2326 
2327 /*
2328  * bridge_rtflush:
2329  *
2330  *	Remove all dynamic addresses from the bridge.
2331  */
2332 static void
2333 bridge_rtflush(struct bridge_softc *sc, int full)
2334 {
2335 	bridge_rtlist_iterate_remove(sc, bridge_rtflush_cb, &full);
2336 }
2337 
2338 /*
2339  * bridge_rtdaddr:
2340  *
2341  *	Remove an address from the table.
2342  */
2343 static int
2344 bridge_rtdaddr(struct bridge_softc *sc, const uint8_t *addr)
2345 {
2346 	struct bridge_rtnode *brt;
2347 
2348 	BRIDGE_RT_LOCK(sc);
2349 	BRIDGE_RT_INTR_LOCK(sc);
2350 	if ((brt = bridge_rtnode_lookup(sc, addr)) == NULL) {
2351 		BRIDGE_RT_INTR_UNLOCK(sc);
2352 		BRIDGE_RT_UNLOCK(sc);
2353 		return ENOENT;
2354 	}
2355 	bridge_rtnode_remove(sc, brt);
2356 	BRIDGE_RT_INTR_UNLOCK(sc);
2357 	BRIDGE_RT_PSZ_PERFORM(sc);
2358 	BRIDGE_RT_UNLOCK(sc);
2359 
2360 	bridge_rtnode_destroy(brt);
2361 
2362 	return 0;
2363 }
2364 
2365 /*
2366  * bridge_rtdelete:
2367  *
2368  *	Delete routes to a speicifc member interface.
2369  */
2370 static void
2371 bridge_rtdelete(struct bridge_softc *sc, struct ifnet *ifp)
2372 {
2373 	struct bridge_rtnode *brt, *nbrt;
2374 
2375 	BRIDGE_RT_LOCK(sc);
2376 	BRIDGE_RT_INTR_LOCK(sc);
2377 	LIST_FOREACH_SAFE(brt, &sc->sc_rtlist, brt_list, nbrt) {
2378 		if (brt->brt_ifp == ifp)
2379 			break;
2380 	}
2381 	if (brt == NULL) {
2382 		BRIDGE_RT_INTR_UNLOCK(sc);
2383 		BRIDGE_RT_UNLOCK(sc);
2384 		return;
2385 	}
2386 	bridge_rtnode_remove(sc, brt);
2387 	BRIDGE_RT_INTR_UNLOCK(sc);
2388 	BRIDGE_RT_PSZ_PERFORM(sc);
2389 	BRIDGE_RT_UNLOCK(sc);
2390 
2391 	bridge_rtnode_destroy(brt);
2392 }
2393 
2394 /*
2395  * bridge_rtable_init:
2396  *
2397  *	Initialize the route table for this bridge.
2398  */
2399 static void
2400 bridge_rtable_init(struct bridge_softc *sc)
2401 {
2402 	int i;
2403 
2404 	sc->sc_rthash = kmem_alloc(sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE,
2405 	    KM_SLEEP);
2406 
2407 	for (i = 0; i < BRIDGE_RTHASH_SIZE; i++)
2408 		LIST_INIT(&sc->sc_rthash[i]);
2409 
2410 	sc->sc_rthash_key = cprng_fast32();
2411 
2412 	LIST_INIT(&sc->sc_rtlist);
2413 
2414 	sc->sc_rtlist_intr_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NET);
2415 #ifdef BRIDGE_MPSAFE
2416 	sc->sc_rtlist_psz = pserialize_create();
2417 	sc->sc_rtlist_lock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_SOFTNET);
2418 #else
2419 	sc->sc_rtlist_psz = NULL;
2420 	sc->sc_rtlist_lock = NULL;
2421 #endif
2422 }
2423 
2424 /*
2425  * bridge_rtable_fini:
2426  *
2427  *	Deconstruct the route table for this bridge.
2428  */
2429 static void
2430 bridge_rtable_fini(struct bridge_softc *sc)
2431 {
2432 
2433 	kmem_free(sc->sc_rthash, sizeof(*sc->sc_rthash) * BRIDGE_RTHASH_SIZE);
2434 	if (sc->sc_rtlist_intr_lock)
2435 		mutex_obj_free(sc->sc_rtlist_intr_lock);
2436 	if (sc->sc_rtlist_lock)
2437 		mutex_obj_free(sc->sc_rtlist_lock);
2438 	if (sc->sc_rtlist_psz)
2439 		pserialize_destroy(sc->sc_rtlist_psz);
2440 }
2441 
2442 /*
2443  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
2444  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
2445  */
2446 #define	mix(a, b, c)							\
2447 do {									\
2448 	a -= b; a -= c; a ^= (c >> 13);					\
2449 	b -= c; b -= a; b ^= (a << 8);					\
2450 	c -= a; c -= b; c ^= (b >> 13);					\
2451 	a -= b; a -= c; a ^= (c >> 12);					\
2452 	b -= c; b -= a; b ^= (a << 16);					\
2453 	c -= a; c -= b; c ^= (b >> 5);					\
2454 	a -= b; a -= c; a ^= (c >> 3);					\
2455 	b -= c; b -= a; b ^= (a << 10);					\
2456 	c -= a; c -= b; c ^= (b >> 15);					\
2457 } while (/*CONSTCOND*/0)
2458 
2459 static inline uint32_t
2460 bridge_rthash(struct bridge_softc *sc, const uint8_t *addr)
2461 {
2462 	uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = sc->sc_rthash_key;
2463 
2464 	b += addr[5] << 8;
2465 	b += addr[4];
2466 	a += addr[3] << 24;
2467 	a += addr[2] << 16;
2468 	a += addr[1] << 8;
2469 	a += addr[0];
2470 
2471 	mix(a, b, c);
2472 
2473 	return (c & BRIDGE_RTHASH_MASK);
2474 }
2475 
2476 #undef mix
2477 
2478 /*
2479  * bridge_rtnode_lookup:
2480  *
2481  *	Look up a bridge route node for the specified destination.
2482  */
2483 static struct bridge_rtnode *
2484 bridge_rtnode_lookup(struct bridge_softc *sc, const uint8_t *addr)
2485 {
2486 	struct bridge_rtnode *brt;
2487 	uint32_t hash;
2488 	int dir;
2489 
2490 	hash = bridge_rthash(sc, addr);
2491 	LIST_FOREACH(brt, &sc->sc_rthash[hash], brt_hash) {
2492 		dir = memcmp(addr, brt->brt_addr, ETHER_ADDR_LEN);
2493 		if (dir == 0)
2494 			return (brt);
2495 		if (dir > 0)
2496 			return (NULL);
2497 	}
2498 
2499 	return (NULL);
2500 }
2501 
2502 /*
2503  * bridge_rtnode_insert:
2504  *
2505  *	Insert the specified bridge node into the route table.  We
2506  *	assume the entry is not already in the table.
2507  */
2508 static int
2509 bridge_rtnode_insert(struct bridge_softc *sc, struct bridge_rtnode *brt)
2510 {
2511 	struct bridge_rtnode *lbrt;
2512 	uint32_t hash;
2513 	int dir;
2514 
2515 	KASSERT(BRIDGE_RT_INTR_LOCKED(sc));
2516 
2517 	hash = bridge_rthash(sc, brt->brt_addr);
2518 
2519 	lbrt = LIST_FIRST(&sc->sc_rthash[hash]);
2520 	if (lbrt == NULL) {
2521 		LIST_INSERT_HEAD(&sc->sc_rthash[hash], brt, brt_hash);
2522 		goto out;
2523 	}
2524 
2525 	do {
2526 		dir = memcmp(brt->brt_addr, lbrt->brt_addr, ETHER_ADDR_LEN);
2527 		if (dir == 0)
2528 			return (EEXIST);
2529 		if (dir > 0) {
2530 			LIST_INSERT_BEFORE(lbrt, brt, brt_hash);
2531 			goto out;
2532 		}
2533 		if (LIST_NEXT(lbrt, brt_hash) == NULL) {
2534 			LIST_INSERT_AFTER(lbrt, brt, brt_hash);
2535 			goto out;
2536 		}
2537 		lbrt = LIST_NEXT(lbrt, brt_hash);
2538 	} while (lbrt != NULL);
2539 
2540 #ifdef DIAGNOSTIC
2541 	panic("bridge_rtnode_insert: impossible");
2542 #endif
2543 
2544  out:
2545 	LIST_INSERT_HEAD(&sc->sc_rtlist, brt, brt_list);
2546 	sc->sc_brtcnt++;
2547 
2548 	return (0);
2549 }
2550 
2551 /*
2552  * bridge_rtnode_remove:
2553  *
2554  *	Remove a bridge rtnode from the rthash and the rtlist of a bridge.
2555  */
2556 static void
2557 bridge_rtnode_remove(struct bridge_softc *sc, struct bridge_rtnode *brt)
2558 {
2559 
2560 	KASSERT(BRIDGE_RT_INTR_LOCKED(sc));
2561 
2562 	LIST_REMOVE(brt, brt_hash);
2563 	LIST_REMOVE(brt, brt_list);
2564 	sc->sc_brtcnt--;
2565 }
2566 
2567 /*
2568  * bridge_rtnode_destroy:
2569  *
2570  *	Destroy a bridge rtnode.
2571  */
2572 static void
2573 bridge_rtnode_destroy(struct bridge_rtnode *brt)
2574 {
2575 
2576 	pool_put(&bridge_rtnode_pool, brt);
2577 }
2578 
2579 #if defined(BRIDGE_IPF)
2580 extern pfil_head_t *inet_pfil_hook;                 /* XXX */
2581 extern pfil_head_t *inet6_pfil_hook;                /* XXX */
2582 
2583 /*
2584  * Send bridge packets through IPF if they are one of the types IPF can deal
2585  * with, or if they are ARP or REVARP.  (IPF will pass ARP and REVARP without
2586  * question.)
2587  */
2588 static int
2589 bridge_ipf(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir)
2590 {
2591 	int snap, error;
2592 	struct ether_header *eh1, eh2;
2593 	struct llc llc1;
2594 	uint16_t ether_type;
2595 
2596 	snap = 0;
2597 	error = -1;	/* Default error if not error == 0 */
2598 	eh1 = mtod(*mp, struct ether_header *);
2599 	ether_type = ntohs(eh1->ether_type);
2600 
2601 	/*
2602 	 * Check for SNAP/LLC.
2603 	 */
2604         if (ether_type < ETHERMTU) {
2605                 struct llc *llc2 = (struct llc *)(eh1 + 1);
2606 
2607                 if ((*mp)->m_len >= ETHER_HDR_LEN + 8 &&
2608                     llc2->llc_dsap == LLC_SNAP_LSAP &&
2609                     llc2->llc_ssap == LLC_SNAP_LSAP &&
2610                     llc2->llc_control == LLC_UI) {
2611                 	ether_type = htons(llc2->llc_un.type_snap.ether_type);
2612 			snap = 1;
2613                 }
2614         }
2615 
2616 	/*
2617 	 * If we're trying to filter bridge traffic, don't look at anything
2618 	 * other than IP and ARP traffic.  If the filter doesn't understand
2619 	 * IPv6, don't allow IPv6 through the bridge either.  This is lame
2620 	 * since if we really wanted, say, an AppleTalk filter, we are hosed,
2621 	 * but of course we don't have an AppleTalk filter to begin with.
2622 	 * (Note that since IPF doesn't understand ARP it will pass *ALL*
2623 	 * ARP traffic.)
2624 	 */
2625 	switch (ether_type) {
2626 		case ETHERTYPE_ARP:
2627 		case ETHERTYPE_REVARP:
2628 			return 0; /* Automatically pass */
2629 		case ETHERTYPE_IP:
2630 # ifdef INET6
2631 		case ETHERTYPE_IPV6:
2632 # endif /* INET6 */
2633 			break;
2634 		default:
2635 			goto bad;
2636 	}
2637 
2638 	/* Strip off the Ethernet header and keep a copy. */
2639 	m_copydata(*mp, 0, ETHER_HDR_LEN, (void *) &eh2);
2640 	m_adj(*mp, ETHER_HDR_LEN);
2641 
2642 	/* Strip off snap header, if present */
2643 	if (snap) {
2644 		m_copydata(*mp, 0, sizeof(struct llc), (void *) &llc1);
2645 		m_adj(*mp, sizeof(struct llc));
2646 	}
2647 
2648 	/*
2649 	 * Check basic packet sanity and run IPF through pfil.
2650 	 */
2651 	KASSERT(!cpu_intr_p());
2652 	switch (ether_type)
2653 	{
2654 	case ETHERTYPE_IP :
2655 		error = (dir == PFIL_IN) ? bridge_ip_checkbasic(mp) : 0;
2656 		if (error == 0)
2657 			error = pfil_run_hooks(inet_pfil_hook, mp, ifp, dir);
2658 		break;
2659 # ifdef INET6
2660 	case ETHERTYPE_IPV6 :
2661 		error = (dir == PFIL_IN) ? bridge_ip6_checkbasic(mp) : 0;
2662 		if (error == 0)
2663 			error = pfil_run_hooks(inet6_pfil_hook, mp, ifp, dir);
2664 		break;
2665 # endif
2666 	default :
2667 		error = 0;
2668 		break;
2669 	}
2670 
2671 	if (*mp == NULL)
2672 		return error;
2673 	if (error != 0)
2674 		goto bad;
2675 
2676 	error = -1;
2677 
2678 	/*
2679 	 * Finally, put everything back the way it was and return
2680 	 */
2681 	if (snap) {
2682 		M_PREPEND(*mp, sizeof(struct llc), M_DONTWAIT);
2683 		if (*mp == NULL)
2684 			return error;
2685 		bcopy(&llc1, mtod(*mp, void *), sizeof(struct llc));
2686 	}
2687 
2688 	M_PREPEND(*mp, ETHER_HDR_LEN, M_DONTWAIT);
2689 	if (*mp == NULL)
2690 		return error;
2691 	bcopy(&eh2, mtod(*mp, void *), ETHER_HDR_LEN);
2692 
2693 	return 0;
2694 
2695     bad:
2696 	m_freem(*mp);
2697 	*mp = NULL;
2698 	return error;
2699 }
2700 
2701 /*
2702  * Perform basic checks on header size since
2703  * IPF assumes ip_input has already processed
2704  * it for it.  Cut-and-pasted from ip_input.c.
2705  * Given how simple the IPv6 version is,
2706  * does the IPv4 version really need to be
2707  * this complicated?
2708  *
2709  * XXX Should we update ipstat here, or not?
2710  * XXX Right now we update ipstat but not
2711  * XXX csum_counter.
2712  */
2713 static int
2714 bridge_ip_checkbasic(struct mbuf **mp)
2715 {
2716 	struct mbuf *m = *mp;
2717 	struct ip *ip;
2718 	int len, hlen;
2719 
2720 	if (*mp == NULL)
2721 		return -1;
2722 
2723 	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2724 		if ((m = m_copyup(m, sizeof(struct ip),
2725 			(max_linkhdr + 3) & ~3)) == NULL) {
2726 			/* XXXJRT new stat, please */
2727 			ip_statinc(IP_STAT_TOOSMALL);
2728 			goto bad;
2729 		}
2730 	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
2731 		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
2732 			ip_statinc(IP_STAT_TOOSMALL);
2733 			goto bad;
2734 		}
2735 	}
2736 	ip = mtod(m, struct ip *);
2737 	if (ip == NULL) goto bad;
2738 
2739 	if (ip->ip_v != IPVERSION) {
2740 		ip_statinc(IP_STAT_BADVERS);
2741 		goto bad;
2742 	}
2743 	hlen = ip->ip_hl << 2;
2744 	if (hlen < sizeof(struct ip)) { /* minimum header length */
2745 		ip_statinc(IP_STAT_BADHLEN);
2746 		goto bad;
2747 	}
2748 	if (hlen > m->m_len) {
2749 		if ((m = m_pullup(m, hlen)) == 0) {
2750 			ip_statinc(IP_STAT_BADHLEN);
2751 			goto bad;
2752 		}
2753 		ip = mtod(m, struct ip *);
2754 		if (ip == NULL) goto bad;
2755 	}
2756 
2757         switch (m->m_pkthdr.csum_flags &
2758                 ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
2759                  M_CSUM_IPv4_BAD)) {
2760         case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
2761                 /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad); */
2762                 goto bad;
2763 
2764         case M_CSUM_IPv4:
2765                 /* Checksum was okay. */
2766                 /* INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok); */
2767                 break;
2768 
2769         default:
2770                 /* Must compute it ourselves. */
2771                 /* INET_CSUM_COUNTER_INCR(&ip_swcsum); */
2772                 if (in_cksum(m, hlen) != 0)
2773                         goto bad;
2774                 break;
2775         }
2776 
2777         /* Retrieve the packet length. */
2778         len = ntohs(ip->ip_len);
2779 
2780         /*
2781          * Check for additional length bogosity
2782          */
2783         if (len < hlen) {
2784 		ip_statinc(IP_STAT_BADLEN);
2785                 goto bad;
2786         }
2787 
2788         /*
2789          * Check that the amount of data in the buffers
2790          * is as at least much as the IP header would have us expect.
2791          * Drop packet if shorter than we expect.
2792          */
2793         if (m->m_pkthdr.len < len) {
2794 		ip_statinc(IP_STAT_TOOSHORT);
2795                 goto bad;
2796         }
2797 
2798 	/* Checks out, proceed */
2799 	*mp = m;
2800 	return 0;
2801 
2802     bad:
2803 	*mp = m;
2804 	return -1;
2805 }
2806 
2807 # ifdef INET6
2808 /*
2809  * Same as above, but for IPv6.
2810  * Cut-and-pasted from ip6_input.c.
2811  * XXX Should we update ip6stat, or not?
2812  */
2813 static int
2814 bridge_ip6_checkbasic(struct mbuf **mp)
2815 {
2816 	struct mbuf *m = *mp;
2817 	struct ip6_hdr *ip6;
2818 
2819         /*
2820          * If the IPv6 header is not aligned, slurp it up into a new
2821          * mbuf with space for link headers, in the event we forward
2822          * it.  Otherwise, if it is aligned, make sure the entire base
2823          * IPv6 header is in the first mbuf of the chain.
2824          */
2825         if (IP6_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
2826                 struct ifnet *inifp = m->m_pkthdr.rcvif;
2827                 if ((m = m_copyup(m, sizeof(struct ip6_hdr),
2828                                   (max_linkhdr + 3) & ~3)) == NULL) {
2829                         /* XXXJRT new stat, please */
2830 			ip6_statinc(IP6_STAT_TOOSMALL);
2831                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2832                         goto bad;
2833                 }
2834         } else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
2835                 struct ifnet *inifp = m->m_pkthdr.rcvif;
2836                 if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
2837 			ip6_statinc(IP6_STAT_TOOSMALL);
2838                         in6_ifstat_inc(inifp, ifs6_in_hdrerr);
2839                         goto bad;
2840                 }
2841         }
2842 
2843         ip6 = mtod(m, struct ip6_hdr *);
2844 
2845         if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
2846 		ip6_statinc(IP6_STAT_BADVERS);
2847                 in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
2848                 goto bad;
2849         }
2850 
2851 	/* Checks out, proceed */
2852 	*mp = m;
2853 	return 0;
2854 
2855     bad:
2856 	*mp = m;
2857 	return -1;
2858 }
2859 # endif /* INET6 */
2860 #endif /* BRIDGE_IPF */
2861