xref: /openbsd-src/sys/net/if_bpe.c (revision 9f11ffb7133c203312a01e4b986886bc88c7d74b)
1 /*	$OpenBSD: if_bpe.c,v 1.2 2019/01/16 00:26:45 jsg Exp $ */
2 /*
3  * Copyright (c) 2018 David Gwynne <dlg@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bpfilter.h"
19 #include "pf.h"
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/kernel.h>
24 #include <sys/mbuf.h>
25 #include <sys/socket.h>
26 #include <sys/ioctl.h>
27 #include <sys/timeout.h>
28 #include <sys/pool.h>
29 #include <sys/tree.h>
30 
31 #include <net/if.h>
32 #include <net/if_var.h>
33 #include <net/if_dl.h>
34 #include <net/if_media.h>
35 #include <net/if_types.h>
36 #include <net/rtable.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/if_ether.h>
40 
41 /* for bridge stuff */
42 #include <net/if_bridge.h>
43 
44 
45 #if NBPFILTER > 0
46 #include <net/bpf.h>
47 #endif
48 
49 #include <net/if_bpe.h>
50 
51 #define PBB_ITAG_ISID		0x00ffffff
52 #define PBB_ITAG_ISID_MIN	0x00000000
53 #define PBB_ITAG_ISID_MAX	0x00ffffff
54 #define PBB_ITAG_RES2		0x03000000	/* must be zero on input */
55 #define PBB_ITAG_RES1		0x04000000	/* ignore on input */
56 #define PBB_ITAG_UCA		0x08000000
57 #define PBB_ITAG_DEI		0x10000000
58 #define PBB_ITAG_PCP_SHIFT	29
59 #define PBB_ITAG_PCP_MASK	(0x7U << PBB_ITAG_PCP_SHIFT)
60 
61 #define BPE_BRIDGE_AGE_TMO	100 /* seconds */
62 
63 struct bpe_key {
64 	int			k_if;
65 	uint32_t		k_isid;
66 
67 	RBT_ENTRY(bpe_tunnel)	k_entry;
68 };
69 
70 RBT_HEAD(bpe_tree, bpe_key);
71 
72 static inline int bpe_cmp(const struct bpe_key *, const struct bpe_key *);
73 
74 RBT_PROTOTYPE(bpe_tree, bpe_key, k_entry, bpe_cmp);
75 RBT_GENERATE(bpe_tree, bpe_key, k_entry, bpe_cmp);
76 
77 struct bpe_entry {
78 	struct ether_addr	be_c_da; /* customer address - must be first */
79 	struct ether_addr	be_b_da; /* bridge address */
80 	unsigned int		be_type;
81 #define BPE_ENTRY_DYNAMIC		0
82 #define BPE_ENTRY_STATIC		1
83 	struct refcnt		be_refs;
84 	time_t			be_age;
85 
86 	RBT_ENTRY(bpe_entry)	be_entry;
87 };
88 
89 RBT_HEAD(bpe_map, bpe_entry);
90 
91 static inline int bpe_entry_cmp(const struct bpe_entry *,
92     const struct bpe_entry *);
93 
94 RBT_PROTOTYPE(bpe_map, bpe_entry, be_entry, bpe_entry_cmp);
95 RBT_GENERATE(bpe_map, bpe_entry, be_entry, bpe_entry_cmp);
96 
97 struct bpe_softc {
98 	struct bpe_key		sc_key; /* must be first */
99 	struct arpcom		sc_ac;
100 	struct ifmedia		sc_media;
101 	int			sc_txhprio;
102 	uint8_t			sc_group[ETHER_ADDR_LEN];
103 
104 	void *			sc_lh_cookie;
105 	void *			sc_dh_cookie;
106 
107 	struct bpe_map		sc_bridge_map;
108 	struct rwlock		sc_bridge_lock;
109 	unsigned int		sc_bridge_num;
110 	unsigned int		sc_bridge_max;
111 	int			sc_bridge_tmo; /* seconds */
112 	struct timeout		sc_bridge_age;
113 };
114 
115 void		bpeattach(int);
116 
117 static int	bpe_clone_create(struct if_clone *, int);
118 static int	bpe_clone_destroy(struct ifnet *);
119 
120 static void	bpe_start(struct ifnet *);
121 static int	bpe_ioctl(struct ifnet *, u_long, caddr_t);
122 static int	bpe_media_get(struct bpe_softc *, struct ifreq *);
123 static int	bpe_up(struct bpe_softc *);
124 static int	bpe_down(struct bpe_softc *);
125 static int	bpe_multi(struct bpe_softc *, struct ifnet *, u_long);
126 static int	bpe_set_vnetid(struct bpe_softc *, const struct ifreq *);
127 static void	bpe_set_group(struct bpe_softc *, uint32_t);
128 static int	bpe_set_parent(struct bpe_softc *, const struct if_parent *);
129 static int	bpe_get_parent(struct bpe_softc *, struct if_parent *);
130 static int	bpe_del_parent(struct bpe_softc *);
131 static void	bpe_link_hook(void *);
132 static void	bpe_link_state(struct bpe_softc *, u_char, uint64_t);
133 static void	bpe_detach_hook(void *);
134 
135 static void	bpe_input_map(struct bpe_softc *,
136 		    const uint8_t *, const uint8_t *);
137 static void	bpe_bridge_age(void *);
138 
139 static struct if_clone bpe_cloner =
140     IF_CLONE_INITIALIZER("bpe", bpe_clone_create, bpe_clone_destroy);
141 
142 static struct bpe_tree bpe_interfaces = RBT_INITIALIZER();
143 static struct rwlock bpe_lock = RWLOCK_INITIALIZER("bpeifs");
144 static struct pool bpe_entry_pool;
145 
146 #define ether_cmp(_a, _b)	memcmp((_a), (_b), ETHER_ADDR_LEN)
147 #define ether_is_eq(_a, _b)	(ether_cmp((_a), (_b)) == 0)
148 #define ether_is_bcast(_a)	ether_is_eq((_a), etherbroadcastaddr)
149 
150 void
151 bpeattach(int count)
152 {
153 	if_clone_attach(&bpe_cloner);
154 }
155 
156 static int
157 bpe_clone_create(struct if_clone *ifc, int unit)
158 {
159 	struct bpe_softc *sc;
160 	struct ifnet *ifp;
161 
162 	if (bpe_entry_pool.pr_size == 0) {
163 		pool_init(&bpe_entry_pool, sizeof(struct bpe_entry), 0,
164 		    IPL_NONE, 0, "bpepl", NULL);
165 	}
166 
167 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
168 	ifp = &sc->sc_ac.ac_if;
169 
170 	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
171 	    ifc->ifc_name, unit);
172 
173 	sc->sc_key.k_if = 0;
174 	sc->sc_key.k_isid = 0;
175 	bpe_set_group(sc, 0);
176 
177 	sc->sc_txhprio = IF_HDRPRIO_PACKET;
178 
179 	rw_init(&sc->sc_bridge_lock, "bpebr");
180 	RBT_INIT(bpe_map, &sc->sc_bridge_map);
181 	sc->sc_bridge_num = 0;
182 	sc->sc_bridge_max = 100; /* XXX */
183 	sc->sc_bridge_tmo = 240;
184 	timeout_set_proc(&sc->sc_bridge_age, bpe_bridge_age, sc);
185 
186 	ifp->if_softc = sc;
187 	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
188 	ifp->if_ioctl = bpe_ioctl;
189 	ifp->if_start = bpe_start;
190 	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
191 	ifp->if_xflags = IFXF_CLONED;
192 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
193 	ether_fakeaddr(ifp);
194 
195 	if_attach(ifp);
196 	ether_ifattach(ifp);
197 
198 	return (0);
199 }
200 
201 static int
202 bpe_clone_destroy(struct ifnet *ifp)
203 {
204 	struct bpe_softc *sc = ifp->if_softc;
205 
206 	NET_LOCK();
207 	if (ISSET(ifp->if_flags, IFF_RUNNING))
208 		bpe_down(sc);
209 	NET_UNLOCK();
210 
211 	ifmedia_delete_instance(&sc->sc_media, IFM_INST_ANY);
212 	ether_ifdetach(ifp);
213 	if_detach(ifp);
214 
215 	free(sc, M_DEVBUF, sizeof(*sc));
216 
217 	return (0);
218 }
219 
220 static inline int
221 bpe_entry_valid(struct bpe_softc *sc, const struct bpe_entry *be)
222 {
223 	time_t diff;
224 
225 	if (be == NULL)
226 		return (0);
227 
228 	if (be->be_type == BPE_ENTRY_STATIC)
229 		return (1);
230 
231 	diff = time_uptime - be->be_age;
232 	if (diff < sc->sc_bridge_tmo)
233 		return (1);
234 
235 	return (0);
236 }
237 
238 static void
239 bpe_start(struct ifnet *ifp)
240 {
241 	struct bpe_softc *sc = ifp->if_softc;
242 	struct ifnet *ifp0;
243 	struct mbuf *m0, *m;
244 	struct ether_header *ceh;
245 	struct ether_header *beh;
246 	uint32_t itag, *itagp;
247 	int hlen = sizeof(*beh) + sizeof(*itagp);
248 #if NBPFILTER > 0
249 	caddr_t if_bpf;
250 #endif
251 	int txprio;
252 	uint8_t prio;
253 
254 	ifp0 = if_get(sc->sc_key.k_if);
255 	if (ifp0 == NULL || !ISSET(ifp0->if_flags, IFF_RUNNING)) {
256 		ifq_purge(&ifp->if_snd);
257 		goto done;
258 	}
259 
260 	txprio = sc->sc_txhprio;
261 
262 	while ((m0 = ifq_dequeue(&ifp->if_snd)) != NULL) {
263 #if NBPFILTER > 0
264 		if_bpf = ifp->if_bpf;
265 		if (if_bpf)
266 			bpf_mtap_ether(if_bpf, m0, BPF_DIRECTION_OUT);
267 #endif
268 
269 		ceh = mtod(m0, struct ether_header *);
270 
271 		/* force prepend of a whole mbuf because of alignment */
272 		m = m_get(M_DONTWAIT, m0->m_type);
273 		if (m == NULL) {
274 			m_freem(m0);
275 			continue;
276 		}
277 
278 		M_MOVE_PKTHDR(m, m0);
279 		m->m_next = m0;
280 
281 		m_align(m, 0);
282 		m->m_len = 0;
283 
284 		m = m_prepend(m, hlen, M_DONTWAIT);
285 		if (m == NULL)
286 			continue;
287 
288 		beh = mtod(m, struct ether_header *);
289 
290 		if (ether_is_bcast(ceh->ether_dhost)) {
291 			memcpy(beh->ether_dhost, sc->sc_group,
292 			    sizeof(beh->ether_dhost));
293 		} else {
294 			struct bpe_entry *be;
295 
296 			rw_enter_read(&sc->sc_bridge_lock);
297 			be = RBT_FIND(bpe_map, &sc->sc_bridge_map,
298 			    (struct bpe_entry *)ceh->ether_dhost);
299 			if (bpe_entry_valid(sc, be)) {
300 				memcpy(beh->ether_dhost, &be->be_b_da,
301 				    sizeof(beh->ether_dhost));
302 			} else {
303 				/* "flood" to unknown hosts */
304 				memcpy(beh->ether_dhost, sc->sc_group,
305 				    sizeof(beh->ether_dhost));
306 			}
307 			rw_exit_read(&sc->sc_bridge_lock);
308 		}
309 
310 		memcpy(beh->ether_shost, ((struct arpcom *)ifp0)->ac_enaddr,
311 		    sizeof(beh->ether_shost));
312 		beh->ether_type = htons(ETHERTYPE_PBB);
313 
314 		prio = (txprio == IF_HDRPRIO_PACKET) ?
315 		    m->m_pkthdr.pf.prio : txprio;
316 
317 		itag = sc->sc_key.k_isid;
318 		itag |= prio << PBB_ITAG_PCP_SHIFT;
319 		itagp = (uint32_t *)(beh + 1);
320 
321 		htobem32(itagp, itag);
322 
323 		if_enqueue(ifp0, m);
324 	}
325 
326 done:
327 	if_put(ifp0);
328 }
329 
330 static void
331 bpe_bridge_age(void *arg)
332 {
333 	struct bpe_softc *sc = arg;
334 	struct bpe_entry *be, *nbe;
335 	time_t diff;
336 
337 	timeout_add_sec(&sc->sc_bridge_age, BPE_BRIDGE_AGE_TMO);
338 
339 	rw_enter_write(&sc->sc_bridge_lock);
340 	RBT_FOREACH_SAFE(be, bpe_map, &sc->sc_bridge_map, nbe) {
341 		if (be->be_type != BPE_ENTRY_DYNAMIC)
342 			continue;
343 
344 		diff = time_uptime - be->be_age;
345 		if (diff < sc->sc_bridge_tmo)
346 			continue;
347 
348 		sc->sc_bridge_num--;
349 		RBT_REMOVE(bpe_map, &sc->sc_bridge_map, be);
350 		if (refcnt_rele(&be->be_refs))
351 			pool_put(&bpe_entry_pool, be);
352 	}
353 	rw_exit_write(&sc->sc_bridge_lock);
354 }
355 
356 static int
357 bpe_rtfind(struct bpe_softc *sc, struct ifbaconf *baconf)
358 {
359 	struct ifnet *ifp = &sc->sc_ac.ac_if;
360 	struct bpe_entry *be;
361 	struct ifbareq bareq;
362 	caddr_t uaddr, end;
363 	int error;
364 	time_t age;
365 	struct sockaddr_dl *sdl;
366 
367 	if (baconf->ifbac_len == 0) {
368 		/* single read is atomic */
369 		baconf->ifbac_len = sc->sc_bridge_num * sizeof(bareq);
370 		return (0);
371 	}
372 
373 	uaddr = baconf->ifbac_buf;
374 	end = uaddr + baconf->ifbac_len;
375 
376 	rw_enter_read(&sc->sc_bridge_lock);
377 	RBT_FOREACH(be, bpe_map, &sc->sc_bridge_map) {
378 		if (uaddr >= end)
379 			break;
380 
381 		memcpy(bareq.ifba_name, ifp->if_xname,
382 		    sizeof(bareq.ifba_name));
383 		memcpy(bareq.ifba_ifsname, ifp->if_xname,
384 		    sizeof(bareq.ifba_ifsname));
385 		memcpy(&bareq.ifba_dst, &be->be_c_da,
386 		    sizeof(bareq.ifba_dst));
387 
388 		memset(&bareq.ifba_dstsa, 0, sizeof(bareq.ifba_dstsa));
389 
390 		bzero(&bareq.ifba_dstsa, sizeof(bareq.ifba_dstsa));
391 		sdl = (struct sockaddr_dl *)&bareq.ifba_dstsa;
392 		sdl->sdl_len = sizeof(sdl);
393 		sdl->sdl_family = AF_LINK;
394 		sdl->sdl_index = 0;
395 		sdl->sdl_type = IFT_ETHER;
396 		sdl->sdl_nlen = 0;
397 		sdl->sdl_alen = sizeof(be->be_b_da);
398 		CTASSERT(sizeof(sdl->sdl_data) >= sizeof(be->be_b_da));
399 		memcpy(sdl->sdl_data, &be->be_b_da, sizeof(be->be_b_da));
400 
401 		switch (be->be_type) {
402 		case BPE_ENTRY_DYNAMIC:
403 			age = time_uptime - be->be_age;
404 			bareq.ifba_age = MIN(age, 0xff);
405 			bareq.ifba_flags = IFBAF_DYNAMIC;
406 			break;
407 		case BPE_ENTRY_STATIC:
408 			bareq.ifba_age = 0;
409 			bareq.ifba_flags = IFBAF_STATIC;
410 			break;
411 		}
412 
413 		error = copyout(&bareq, uaddr, sizeof(bareq));
414 		if (error != 0) {
415 			rw_exit_read(&sc->sc_bridge_lock);
416 			return (error);
417 		}
418 
419 		uaddr += sizeof(bareq);
420 	}
421 	baconf->ifbac_len = sc->sc_bridge_num * sizeof(bareq);
422 	rw_exit_read(&sc->sc_bridge_lock);
423 
424 	return (0);
425 }
426 
427 static void
428 bpe_flush_map(struct bpe_softc *sc, uint32_t flags)
429 {
430 	struct bpe_entry *be, *nbe;
431 
432 	rw_enter_write(&sc->sc_bridge_lock);
433 	RBT_FOREACH_SAFE(be, bpe_map, &sc->sc_bridge_map, nbe) {
434 		if (flags == IFBF_FLUSHDYN &&
435 		    be->be_type != BPE_ENTRY_DYNAMIC)
436 			continue;
437 
438 		RBT_REMOVE(bpe_map, &sc->sc_bridge_map, be);
439 		if (refcnt_rele(&be->be_refs))
440 			pool_put(&bpe_entry_pool, be);
441 	}
442 	rw_exit_write(&sc->sc_bridge_lock);
443 }
444 
445 static int
446 bpe_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
447 {
448 	struct bpe_softc *sc = ifp->if_softc;
449 	struct ifreq *ifr = (struct ifreq *)data;
450 	struct ifbrparam *bparam = (struct ifbrparam *)data;
451 	int error = 0;
452 
453 	switch (cmd) {
454 	case SIOCSIFFLAGS:
455 		if (ISSET(ifp->if_flags, IFF_UP)) {
456 			if (!ISSET(ifp->if_flags, IFF_RUNNING))
457 				error = bpe_up(sc);
458 			else
459 				error = 0;
460 		} else {
461 			if (ISSET(ifp->if_flags, IFF_RUNNING))
462 				error = bpe_down(sc);
463 		}
464 		break;
465 
466 	case SIOCSVNETID:
467 		error = bpe_set_vnetid(sc, ifr);
468 	case SIOCGVNETID:
469 		ifr->ifr_vnetid = sc->sc_key.k_isid;
470 		break;
471 
472 	case SIOCSIFPARENT:
473 		error = bpe_set_parent(sc, (struct if_parent *)data);
474 		break;
475 	case SIOCGIFPARENT:
476 		error = bpe_get_parent(sc, (struct if_parent *)data);
477 		break;
478 	case SIOCDIFPARENT:
479 		error = bpe_del_parent(sc);
480 		break;
481 
482 	case SIOCSTXHPRIO:
483 		if (ifr->ifr_hdrprio == IF_HDRPRIO_PACKET) /* use mbuf prio */
484 			;
485 		else if (ifr->ifr_hdrprio < IF_HDRPRIO_MIN ||
486 		    ifr->ifr_hdrprio > IF_HDRPRIO_MAX) {
487 			error = EINVAL;
488 			break;
489 		}
490 
491 		sc->sc_txhprio = ifr->ifr_hdrprio;
492 		break;
493 	case SIOCGTXHPRIO:
494 		ifr->ifr_hdrprio = sc->sc_txhprio;
495 		break;
496 
497 	case SIOCGIFMEDIA:
498 		error = bpe_media_get(sc, ifr);
499 		break;
500 
501 	case SIOCBRDGSCACHE:
502 		error = suser(curproc);
503 		if (error != 0)
504 			break;
505 
506 		if (bparam->ifbrp_csize < 1) {
507 			error = EINVAL;
508 			break;
509 		}
510 
511 		/* commit */
512 		sc->sc_bridge_max = bparam->ifbrp_csize;
513 		break;
514 	case SIOCBRDGGCACHE:
515 		bparam->ifbrp_csize = sc->sc_bridge_max;
516 		break;
517 
518 	case SIOCBRDGSTO:
519 		error = suser(curproc);
520 		if (error != 0)
521 			break;
522 
523 		if (bparam->ifbrp_ctime < 8 ||
524 		    bparam->ifbrp_ctime > 3600) {
525 			error = EINVAL;
526 			break;
527 		}
528 		sc->sc_bridge_tmo = bparam->ifbrp_ctime;
529 		break;
530 	case SIOCBRDGGTO:
531 		bparam->ifbrp_ctime = sc->sc_bridge_tmo;
532 		break;
533 
534 	case SIOCBRDGRTS:
535 		error = bpe_rtfind(sc, (struct ifbaconf *)data);
536 		break;
537 	case SIOCBRDGFLUSH:
538 		error = suser(curproc);
539 		if (error != 0)
540 			break;
541 
542 		bpe_flush_map(sc,
543 		    ((struct ifbreq *)data)->ifbr_ifsflags);
544 		break;
545 
546 	default:
547 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
548 		break;
549 	}
550 
551 	return (error);
552 }
553 
554 static int
555 bpe_media_get(struct bpe_softc *sc, struct ifreq *ifr)
556 {
557 	struct ifnet *ifp0;
558 	int error;
559 
560 	ifp0 = if_get(sc->sc_key.k_if);
561 	if (ifp0 != NULL)
562 		error = (*ifp0->if_ioctl)(ifp0, SIOCGIFMEDIA, (caddr_t)ifr);
563 	else
564 		error = ENOTTY;
565 	if_put(ifp0);
566 
567 	return (error);
568 }
569 
570 static int
571 bpe_up(struct bpe_softc *sc)
572 {
573 	struct ifnet *ifp = &sc->sc_ac.ac_if;
574 	struct ifnet *ifp0;
575 	struct bpe_softc *osc;
576 	int error = 0;
577 	u_int hardmtu;
578 	u_int hlen = sizeof(struct ether_header) + sizeof(uint32_t);
579 
580 	KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
581 	NET_ASSERT_LOCKED();
582 
583 	ifp0 = if_get(sc->sc_key.k_if);
584 	if (ifp0 == NULL)
585 		return (ENXIO);
586 
587 	/* check again if bpe will work on top of the parent */
588 	if (ifp0->if_type != IFT_ETHER) {
589 		error = EPROTONOSUPPORT;
590 		goto put;
591 	}
592 
593 	hardmtu = ifp0->if_hardmtu;
594 	if (hardmtu < hlen) {
595 		error = ENOBUFS;
596 		goto put;
597 	}
598 	hardmtu -= hlen;
599 	if (ifp->if_mtu > hardmtu) {
600 		error = ENOBUFS;
601 		goto put;
602 	}
603 
604 	/* parent is fine, let's prepare the bpe to handle packets */
605 	ifp->if_hardmtu = hardmtu;
606 	SET(ifp->if_flags, ifp0->if_flags & IFF_SIMPLEX);
607 
608 	/* commit the interface */
609 	error = rw_enter(&bpe_lock, RW_WRITE | RW_INTR);
610 	if (error != 0)
611 		goto scrub;
612 
613 	osc = (struct bpe_softc *)RBT_INSERT(bpe_tree, &bpe_interfaces,
614 	    (struct bpe_key *)sc);
615 	rw_exit(&bpe_lock);
616 
617 	if (osc != NULL) {
618 		error = EADDRINUSE;
619 		goto scrub;
620 	}
621 
622 	if (bpe_multi(sc, ifp0, SIOCADDMULTI) != 0) {
623 		error = ENOTCONN;
624 		goto remove;
625 	}
626 
627 	/* Register callback for physical link state changes */
628 	sc->sc_lh_cookie = hook_establish(ifp0->if_linkstatehooks, 1,
629 	    bpe_link_hook, sc);
630 
631 	/* Register callback if parent wants to unregister */
632 	sc->sc_dh_cookie = hook_establish(ifp0->if_detachhooks, 0,
633 	    bpe_detach_hook, sc);
634 
635 	/* we're running now */
636 	SET(ifp->if_flags, IFF_RUNNING);
637 	bpe_link_state(sc, ifp0->if_link_state, ifp0->if_baudrate);
638 
639 	if_put(ifp0);
640 
641 	timeout_add_sec(&sc->sc_bridge_age, BPE_BRIDGE_AGE_TMO);
642 
643 	return (0);
644 
645 remove:
646 	rw_enter(&bpe_lock, RW_WRITE);
647 	RBT_REMOVE(bpe_tree, &bpe_interfaces, (struct bpe_key *)sc);
648 	rw_exit(&bpe_lock);
649 scrub:
650 	CLR(ifp->if_flags, IFF_SIMPLEX);
651 	ifp->if_hardmtu = 0xffff;
652 put:
653 	if_put(ifp0);
654 
655 	return (error);
656 }
657 
658 static int
659 bpe_down(struct bpe_softc *sc)
660 {
661 	struct ifnet *ifp = &sc->sc_ac.ac_if;
662 	struct ifnet *ifp0;
663 
664 	NET_ASSERT_LOCKED();
665 
666 	CLR(ifp->if_flags, IFF_RUNNING);
667 
668 	ifp0 = if_get(sc->sc_key.k_if);
669 	if (ifp0 != NULL) {
670 		hook_disestablish(ifp0->if_detachhooks, sc->sc_dh_cookie);
671 		hook_disestablish(ifp0->if_linkstatehooks, sc->sc_lh_cookie);
672 		bpe_multi(sc, ifp0, SIOCDELMULTI);
673 	}
674 	if_put(ifp0);
675 
676 	rw_enter(&bpe_lock, RW_WRITE);
677 	RBT_REMOVE(bpe_tree, &bpe_interfaces, (struct bpe_key *)sc);
678 	rw_exit(&bpe_lock);
679 
680 	CLR(ifp->if_flags, IFF_SIMPLEX);
681 	ifp->if_hardmtu = 0xffff;
682 
683 	return (0);
684 }
685 
686 static int
687 bpe_multi(struct bpe_softc *sc, struct ifnet *ifp0, u_long cmd)
688 {
689 	struct ifreq ifr;
690 	struct sockaddr *sa;
691 
692 	/* make it convincing */
693 	CTASSERT(sizeof(ifr.ifr_name) == sizeof(ifp0->if_xname));
694 	memcpy(ifr.ifr_name, ifp0->if_xname, sizeof(ifr.ifr_name));
695 
696 	sa = &ifr.ifr_addr;
697 	CTASSERT(sizeof(sa->sa_data) >= sizeof(sc->sc_group));
698 
699 	sa->sa_family = AF_UNSPEC;
700 	memcpy(sa->sa_data, sc->sc_group, sizeof(sc->sc_group));
701 
702 	return ((*ifp0->if_ioctl)(ifp0, cmd, (caddr_t)&ifr));
703 }
704 
705 static void
706 bpe_set_group(struct bpe_softc *sc, uint32_t isid)
707 {
708 	uint8_t *group = sc->sc_group;
709 
710 	group[0] = 0x01;
711 	group[1] = 0x1e;
712 	group[2] = 0x83;
713 	group[3] = isid >> 16;
714 	group[4] = isid >> 8;
715 	group[5] = isid >> 0;
716 }
717 
718 static int
719 bpe_set_vnetid(struct bpe_softc *sc, const struct ifreq *ifr)
720 {
721 	struct ifnet *ifp = &sc->sc_ac.ac_if;
722 	uint32_t isid;
723 
724 	if (ifr->ifr_vnetid < PBB_ITAG_ISID_MIN ||
725 	    ifr->ifr_vnetid > PBB_ITAG_ISID_MAX)
726 		return (EINVAL);
727 
728 	isid = ifr->ifr_vnetid;
729 	if (isid == sc->sc_key.k_isid)
730 		return (0);
731 
732 	if (ISSET(ifp->if_flags, IFF_RUNNING))
733 		return (EBUSY);
734 
735 	/* commit */
736 	sc->sc_key.k_isid = isid;
737 	bpe_set_group(sc, isid);
738 	bpe_flush_map(sc, IFBF_FLUSHALL);
739 
740 	return (0);
741 }
742 
743 static int
744 bpe_set_parent(struct bpe_softc *sc, const struct if_parent *p)
745 {
746 	struct ifnet *ifp = &sc->sc_ac.ac_if;
747 	struct ifnet *ifp0;
748 
749 	ifp0 = ifunit(p->ifp_parent); /* doesn't need an if_put */
750 	if (ifp0 == NULL)
751 		return (ENXIO);
752 
753 	if (ifp0->if_type != IFT_ETHER)
754 		return (ENXIO);
755 
756 	if (ifp0->if_index == sc->sc_key.k_if)
757 		return (0);
758 
759 	if (ISSET(ifp->if_flags, IFF_RUNNING))
760 		return (EBUSY);
761 
762 	/* commit */
763 	sc->sc_key.k_if = ifp0->if_index;
764 	bpe_flush_map(sc, IFBF_FLUSHALL);
765 
766 	return (0);
767 }
768 
769 static int
770 bpe_get_parent(struct bpe_softc *sc, struct if_parent *p)
771 {
772 	struct ifnet *ifp0;
773 	int error = 0;
774 
775 	ifp0 = if_get(sc->sc_key.k_if);
776 	if (ifp0 == NULL)
777 		error = EADDRNOTAVAIL;
778 	else
779 		memcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent));
780 	if_put(ifp0);
781 
782 	return (error);
783 }
784 
785 static int
786 bpe_del_parent(struct bpe_softc *sc)
787 {
788 	struct ifnet *ifp = &sc->sc_ac.ac_if;
789 
790 	if (ISSET(ifp->if_flags, IFF_RUNNING))
791 		return (EBUSY);
792 
793 	/* commit */
794 	sc->sc_key.k_if = 0;
795 	bpe_flush_map(sc, IFBF_FLUSHALL);
796 
797 	return (0);
798 }
799 
800 static inline struct bpe_softc *
801 bpe_find(struct ifnet *ifp0, uint32_t isid)
802 {
803 	struct bpe_key k = { .k_if = ifp0->if_index, .k_isid = isid };
804 	struct bpe_softc *sc;
805 
806 	rw_enter_read(&bpe_lock);
807 	sc = (struct bpe_softc *)RBT_FIND(bpe_tree, &bpe_interfaces, &k);
808 	rw_exit_read(&bpe_lock);
809 
810 	return (sc);
811 }
812 
813 static void
814 bpe_input_map(struct bpe_softc *sc, const uint8_t *ba, const uint8_t *ca)
815 {
816 	struct bpe_entry *be;
817 	int new = 0;
818 
819 	if (ETHER_IS_MULTICAST(ca))
820 		return;
821 
822 	/* remember where it came from */
823 	rw_enter_read(&sc->sc_bridge_lock);
824 	be = RBT_FIND(bpe_map, &sc->sc_bridge_map, (struct bpe_entry *)ca);
825 	if (be == NULL)
826 		new = 1;
827 	else {
828 		be->be_age = time_uptime; /* only a little bit racy */
829 
830 		if (be->be_type != BPE_ENTRY_DYNAMIC ||
831 		    ether_is_eq(ba, &be->be_b_da))
832 			be = NULL;
833 		else
834 			refcnt_take(&be->be_refs);
835 	}
836 	rw_exit_read(&sc->sc_bridge_lock);
837 
838 	if (new) {
839 		struct bpe_entry *obe;
840 		unsigned int num;
841 
842 		be = pool_get(&bpe_entry_pool, PR_NOWAIT);
843 		if (be == NULL) {
844 			/* oh well */
845 			return;
846 		}
847 
848 		memcpy(&be->be_c_da, ca, sizeof(be->be_c_da));
849 		memcpy(&be->be_b_da, ba, sizeof(be->be_b_da));
850 		be->be_type = BPE_ENTRY_DYNAMIC;
851 		refcnt_init(&be->be_refs);
852 		be->be_age = time_uptime;
853 
854 		rw_enter_write(&sc->sc_bridge_lock);
855 		num = sc->sc_bridge_num;
856 		if (++num > sc->sc_bridge_max)
857 			obe = be;
858 		else {
859 			/* try and give the ref to the map */
860 			obe = RBT_INSERT(bpe_map, &sc->sc_bridge_map, be);
861 			if (obe == NULL) {
862 				/* count the insert */
863 				sc->sc_bridge_num = num;
864 			}
865 		}
866 		rw_exit_write(&sc->sc_bridge_lock);
867 
868 		if (obe != NULL)
869 			pool_put(&bpe_entry_pool, obe);
870 	} else if (be != NULL) {
871 		rw_enter_write(&sc->sc_bridge_lock);
872 		memcpy(&be->be_b_da, ba, sizeof(be->be_b_da));
873 		rw_exit_write(&sc->sc_bridge_lock);
874 
875 		if (refcnt_rele(&be->be_refs)) {
876 			/* ioctl may have deleted the entry */
877 			pool_put(&bpe_entry_pool, be);
878 		}
879 	}
880 }
881 
882 void
883 bpe_input(struct ifnet *ifp0, struct mbuf *m)
884 {
885 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
886 	struct bpe_softc *sc;
887 	struct ifnet *ifp;
888 	struct ether_header *beh, *ceh;
889 	uint32_t *itagp, itag;
890 	unsigned int hlen = sizeof(*beh) + sizeof(*itagp) + sizeof(*ceh);
891 	struct mbuf *n;
892 	int off;
893 
894 	if (m->m_len < hlen) {
895 		m = m_pullup(m, hlen);
896 		if (m == NULL) {
897 			/* pbb short ++ */
898 			return;
899 		}
900 	}
901 
902 	beh = mtod(m, struct ether_header *);
903 	itagp = (uint32_t *)(beh + 1);
904 	itag = bemtoh32(itagp);
905 
906 	if (itag & PBB_ITAG_RES2) {
907 		/* dropped by res2 ++ */
908 		goto drop;
909 	}
910 
911 	sc = bpe_find(ifp0, itag & PBB_ITAG_ISID);
912 	if (sc == NULL) {
913 		/* no interface found */
914 		goto drop;
915 	}
916 
917 	ceh = (struct ether_header *)(itagp + 1);
918 
919 	bpe_input_map(sc, beh->ether_shost, ceh->ether_shost);
920 
921 	m_adj(m, sizeof(*beh) + sizeof(*itagp));
922 
923 	n = m_getptr(m, sizeof(*ceh), &off);
924 	if (n == NULL) {
925 		/* no data ++ */
926 		goto drop;
927 	}
928 
929 	if (!ALIGNED_POINTER(mtod(n, caddr_t) + off, uint32_t)) {
930 		/* unaligned ++ */
931 		n = m_dup_pkt(m, ETHER_ALIGN, M_NOWAIT);
932 		m_freem(m);
933 		if (n == NULL)
934 			return;
935 
936 		m = n;
937 	}
938 
939 	ifp = &sc->sc_ac.ac_if;
940 
941 	m->m_flags &= ~(M_BCAST|M_MCAST);
942 	m->m_pkthdr.ph_ifidx = ifp->if_index;
943 	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
944 
945 #if NPF > 0
946 	pf_pkt_addr_changed(m);
947 #endif
948 
949 	ml_enqueue(&ml, m);
950 	if_input(ifp, &ml);
951 	return;
952 
953 drop:
954 	m_freem(m);
955 }
956 
957 void
958 bpe_detach_hook(void *arg)
959 {
960 	struct bpe_softc *sc = arg;
961 	struct ifnet *ifp = &sc->sc_ac.ac_if;
962 
963 	if (ISSET(ifp->if_flags, IFF_RUNNING)) {
964 		bpe_down(sc);
965 		CLR(ifp->if_flags, IFF_UP);
966 	}
967 
968 	sc->sc_key.k_if = 0;
969 }
970 
971 static void
972 bpe_link_hook(void *arg)
973 {
974 	struct bpe_softc *sc = arg;
975 	struct ifnet *ifp0;
976 	u_char link = LINK_STATE_DOWN;
977 	uint64_t baud = 0;
978 
979 	ifp0 = if_get(sc->sc_key.k_if);
980 	if (ifp0 != NULL) {
981 		link = ifp0->if_link_state;
982 		baud = ifp0->if_baudrate;
983 	}
984 	if_put(ifp0);
985 
986 	bpe_link_state(sc, link, baud);
987 }
988 
989 void
990 bpe_link_state(struct bpe_softc *sc, u_char link, uint64_t baud)
991 {
992 	struct ifnet *ifp = &sc->sc_ac.ac_if;
993 
994 	if (ifp->if_link_state == link)
995 		return;
996 
997 	ifp->if_link_state = link;
998 	ifp->if_baudrate = baud;
999 
1000 	if_link_state_change(ifp);
1001 }
1002 
1003 static inline int
1004 bpe_cmp(const struct bpe_key *a, const struct bpe_key *b)
1005 {
1006 	if (a->k_if > b->k_if)
1007 		return (1);
1008 	if (a->k_if < b->k_if)
1009 		return (-1);
1010 	if (a->k_isid > b->k_isid)
1011 		return (1);
1012 	if (a->k_isid < b->k_isid)
1013 		return (-1);
1014 
1015 	return (0);
1016 }
1017 
1018 static inline int
1019 bpe_entry_cmp(const struct bpe_entry *a, const struct bpe_entry *b)
1020 {
1021 	return memcmp(&a->be_c_da, &b->be_c_da, sizeof(a->be_c_da));
1022 }
1023