xref: /dflybsd-src/sys/net/netmap/netmap_vale.c (revision ed9bd855a8b93a4d4c9df4cae9d83c7abb3b37a6)
1 /*
2  * Copyright (C) 2013 Universita` di Pisa. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *   1. Redistributions of source code must retain the above copyright
8  *      notice, this list of conditions and the following disclaimer.
9  *   2. Redistributions in binary form must reproduce the above copyright
10  *      notice, this list of conditions and the following disclaimer in the
11  *      documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  */
25 
26 
27 /*
28  * This module implements the VALE switch for netmap
29 
30 --- VALE SWITCH ---
31 
32 NMG_LOCK() serializes all modifications to switches and ports.
33 A switch cannot be deleted until all ports are gone.
34 
35 For each switch, an SX lock (RWlock on linux) protects
36 deletion of ports. When configuring or deleting a new port, the
37 lock is acquired in exclusive mode (after holding NMG_LOCK).
38 When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
39 The lock is held throughout the entire forwarding cycle,
40 during which the thread may incur in a page fault.
41 Hence it is important that sleepable shared locks are used.
42 
43 On the rx ring, the per-port lock is grabbed initially to reserve
44 a number of slot in the ring, then the lock is released,
45 packets are copied from source to destination, and then
46 the lock is acquired again and the receive ring is updated.
47 (A similar thing is done on the tx ring for NIC and host stack
48 ports attached to the switch)
49 
50  */
51 
52 /*
53  * OS-specific code that is used only within this file.
54  * Other OS-specific code that must be accessed by drivers
55  * is present in netmap_kern.h
56  */
57 
58 /* __FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257176 2013-10-26 17:58:36Z glebius $"); */
59 
60 #include <sys/types.h>
61 #include <sys/errno.h>
62 #include <sys/param.h>	/* defines used in kernel.h */
63 #include <sys/kernel.h>	/* types used in module initialization */
64 #include <sys/conf.h>	/* cdevsw struct, UID, GID */
65 #include <sys/sockio.h>
66 #include <sys/socketvar.h>	/* struct socket */
67 #include <sys/malloc.h>
68 #include <sys/poll.h>
69 #include <sys/rwlock.h>
70 #include <sys/socket.h> /* sockaddrs */
71 #include <sys/selinfo.h>
72 #include <sys/sysctl.h>
73 #include <net/if.h>
74 #include <net/if_var.h>
75 #include <net/bpf.h>		/* BIOCIMMEDIATE */
76 #include <machine/bus.h>	/* bus_dmamap_* */
77 #include <sys/endian.h>
78 #include <sys/refcount.h>
79 
80 // #define prefetch(x)	__builtin_prefetch(x)
81 
82 
83 #define BDG_RWLOCK_T		struct rwlock // struct rwlock
84 
85 #define	BDG_RWINIT(b)		\
86 	rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
87 #define BDG_WLOCK(b)		rw_wlock(&(b)->bdg_lock)
88 #define BDG_WUNLOCK(b)		rw_wunlock(&(b)->bdg_lock)
89 #define BDG_RLOCK(b)		rw_rlock(&(b)->bdg_lock)
90 #define BDG_RTRYLOCK(b)		rw_try_rlock(&(b)->bdg_lock)
91 #define BDG_RUNLOCK(b)		rw_runlock(&(b)->bdg_lock)
92 #define BDG_RWDESTROY(b)	rw_destroy(&(b)->bdg_lock)
93 
94 /*
95  * common headers
96  */
97 
98 #include <net/netmap.h>
99 
100 #include "netmap_kern.h"
101 #include "netmap_mem2.h"
102 
103 #ifdef WITH_VALE
104 
105 /*
106  * system parameters (most of them in netmap_kern.h)
107  * NM_NAME	prefix for switch port names, default "vale"
108  * NM_BDG_MAXPORTS	number of ports
109  * NM_BRIDGES	max number of switches in the system.
110  *	XXX should become a sysctl or tunable
111  *
112  * Switch ports are named valeX:Y where X is the switch name and Y
113  * is the port. If Y matches a physical interface name, the port is
114  * connected to a physical device.
115  *
116  * Unlike physical interfaces, switch ports use their own memory region
117  * for rings and buffers.
118  * The virtual interfaces use per-queue lock instead of core lock.
119  * In the tx loop, we aggregate traffic in batches to make all operations
120  * faster. The batch size is bridge_batch.
121  */
122 #define NM_BDG_MAXRINGS		16	/* XXX unclear how many. */
123 #define NM_BDG_MAXSLOTS		4096	/* XXX same as above */
124 #define NM_BRIDGE_RINGSIZE	1024	/* in the device */
125 #define NM_BDG_HASH		1024	/* forwarding table entries */
126 #define NM_BDG_BATCH		1024	/* entries in the forwarding buffer */
127 #define NM_MULTISEG		64	/* max size of a chain of bufs */
128 /* actual size of the tables */
129 #define NM_BDG_BATCH_MAX	(NM_BDG_BATCH + NM_MULTISEG)
130 /* NM_FT_NULL terminates a list of slots in the ft */
131 #define NM_FT_NULL		NM_BDG_BATCH_MAX
132 #define	NM_BRIDGES		8	/* number of bridges */
133 
134 
135 /*
136  * bridge_batch is set via sysctl to the max batch size to be
137  * used in the bridge. The actual value may be larger as the
138  * last packet in the block may overflow the size.
139  */
140 int bridge_batch = NM_BDG_BATCH; /* bridge batch size */
141 SYSCTL_DECL(_dev_netmap);
142 SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
143 
144 
145 static int bdg_netmap_attach(struct netmap_adapter *);
146 static int bdg_netmap_reg(struct netmap_adapter *na, int onoff);
147 static int netmap_bwrap_attach(struct ifnet *, struct ifnet *);
148 static int netmap_bwrap_register(struct netmap_adapter *, int onoff);
149 int kern_netmap_regif(struct nmreq *nmr);
150 
151 /*
152  * Each transmit queue accumulates a batch of packets into
153  * a structure before forwarding. Packets to the same
154  * destination are put in a list using ft_next as a link field.
155  * ft_frags and ft_next are valid only on the first fragment.
156  */
157 struct nm_bdg_fwd {	/* forwarding entry for a bridge */
158 	void *ft_buf;		/* netmap or indirect buffer */
159 	uint8_t ft_frags;	/* how many fragments (only on 1st frag) */
160 	uint8_t _ft_port;	/* dst port (unused) */
161 	uint16_t ft_flags;	/* flags, e.g. indirect */
162 	uint16_t ft_len;	/* src fragment len */
163 	uint16_t ft_next;	/* next packet to same destination */
164 };
165 
166 /*
167  * For each output interface, nm_bdg_q is used to construct a list.
168  * bq_len is the number of output buffers (we can have coalescing
169  * during the copy).
170  */
171 struct nm_bdg_q {
172 	uint16_t bq_head;
173 	uint16_t bq_tail;
174 	uint32_t bq_len;	/* number of buffers */
175 };
176 
177 /* XXX revise this */
178 struct nm_hash_ent {
179 	uint64_t	mac;	/* the top 2 bytes are the epoch */
180 	uint64_t	ports;
181 };
182 
183 /*
184  * nm_bridge is a descriptor for a VALE switch.
185  * Interfaces for a bridge are all in bdg_ports[].
186  * The array has fixed size, an empty entry does not terminate
187  * the search, but lookups only occur on attach/detach so we
188  * don't mind if they are slow.
189  *
190  * The bridge is non blocking on the transmit ports: excess
191  * packets are dropped if there is no room on the output port.
192  *
193  * bdg_lock protects accesses to the bdg_ports array.
194  * This is a rw lock (or equivalent).
195  */
196 struct nm_bridge {
197 	/* XXX what is the proper alignment/layout ? */
198 	BDG_RWLOCK_T	bdg_lock;	/* protects bdg_ports */
199 	int		bdg_namelen;
200 	uint32_t	bdg_active_ports; /* 0 means free */
201 	char		bdg_basename[IFNAMSIZ];
202 
203 	/* Indexes of active ports (up to active_ports)
204 	 * and all other remaining ports.
205 	 */
206 	uint8_t		bdg_port_index[NM_BDG_MAXPORTS];
207 
208 	struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
209 
210 
211 	/*
212 	 * The function to decide the destination port.
213 	 * It returns either of an index of the destination port,
214 	 * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
215 	 * forward this packet.  ring_nr is the source ring index, and the
216 	 * function may overwrite this value to forward this packet to a
217 	 * different ring index.
218 	 * This function must be set by netmap_bdgctl().
219 	 */
220 	bdg_lookup_fn_t nm_bdg_lookup;
221 
222 	/* the forwarding table, MAC+ports.
223 	 * XXX should be changed to an argument to be passed to
224 	 * the lookup function, and allocated on attach
225 	 */
226 	struct nm_hash_ent ht[NM_BDG_HASH];
227 };
228 
229 
230 /*
231  * XXX in principle nm_bridges could be created dynamically
232  * Right now we have a static array and deletions are protected
233  * by an exclusive lock.
234  */
235 struct nm_bridge nm_bridges[NM_BRIDGES];
236 
237 
238 /*
239  * A few function to tell which kind of port are we using.
240  * XXX should we hold a lock ?
241  *
242  * nma_is_vp()		virtual port
243  * nma_is_host()	port connected to the host stack
244  * nma_is_hw()		port connected to a NIC
245  * nma_is_generic()	generic netmap adapter XXX stop this madness
246  */
247 static __inline int
248 nma_is_vp(struct netmap_adapter *na)
249 {
250 	return na->nm_register == bdg_netmap_reg;
251 }
252 
253 
254 static __inline int
255 nma_is_host(struct netmap_adapter *na)
256 {
257 	return na->nm_register == NULL;
258 }
259 
260 
261 static __inline int
262 nma_is_hw(struct netmap_adapter *na)
263 {
264 	/* In case of sw adapter, nm_register is NULL */
265 	return !nma_is_vp(na) && !nma_is_host(na) && !nma_is_generic(na);
266 }
267 
268 static __inline int
269 nma_is_bwrap(struct netmap_adapter *na)
270 {
271 	return na->nm_register == netmap_bwrap_register;
272 }
273 
274 
275 
276 /*
277  * this is a slightly optimized copy routine which rounds
278  * to multiple of 64 bytes and is often faster than dealing
279  * with other odd sizes. We assume there is enough room
280  * in the source and destination buffers.
281  *
282  * XXX only for multiples of 64 bytes, non overlapped.
283  */
284 static inline void
285 pkt_copy(void *_src, void *_dst, int l)
286 {
287         uint64_t *src = _src;
288         uint64_t *dst = _dst;
289         if (unlikely(l >= 1024)) {
290                 memcpy(dst, src, l);
291                 return;
292         }
293         for (; likely(l > 0); l-=64) {
294                 *dst++ = *src++;
295                 *dst++ = *src++;
296                 *dst++ = *src++;
297                 *dst++ = *src++;
298                 *dst++ = *src++;
299                 *dst++ = *src++;
300                 *dst++ = *src++;
301                 *dst++ = *src++;
302         }
303 }
304 
305 
306 
307 /*
308  * locate a bridge among the existing ones.
309  * MUST BE CALLED WITH NMG_LOCK()
310  *
311  * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
312  * We assume that this is called with a name of at least NM_NAME chars.
313  */
314 static struct nm_bridge *
315 nm_find_bridge(const char *name, int create)
316 {
317 	int i, l, namelen;
318 	struct nm_bridge *b = NULL;
319 
320 	NMG_LOCK_ASSERT();
321 
322 	namelen = strlen(NM_NAME);	/* base length */
323 	l = name ? strlen(name) : 0;		/* actual length */
324 	if (l < namelen) {
325 		D("invalid bridge name %s", name ? name : NULL);
326 		return NULL;
327 	}
328 	for (i = namelen + 1; i < l; i++) {
329 		if (name[i] == ':') {
330 			namelen = i;
331 			break;
332 		}
333 	}
334 	if (namelen >= IFNAMSIZ)
335 		namelen = IFNAMSIZ;
336 	ND("--- prefix is '%.*s' ---", namelen, name);
337 
338 	/* lookup the name, remember empty slot if there is one */
339 	for (i = 0; i < NM_BRIDGES; i++) {
340 		struct nm_bridge *x = nm_bridges + i;
341 
342 		if (x->bdg_active_ports == 0) {
343 			if (create && b == NULL)
344 				b = x;	/* record empty slot */
345 		} else if (x->bdg_namelen != namelen) {
346 			continue;
347 		} else if (strncmp(name, x->bdg_basename, namelen) == 0) {
348 			ND("found '%.*s' at %d", namelen, name, i);
349 			b = x;
350 			break;
351 		}
352 	}
353 	if (i == NM_BRIDGES && b) { /* name not found, can create entry */
354 		/* initialize the bridge */
355 		strncpy(b->bdg_basename, name, namelen);
356 		ND("create new bridge %s with ports %d", b->bdg_basename,
357 			b->bdg_active_ports);
358 		b->bdg_namelen = namelen;
359 		b->bdg_active_ports = 0;
360 		for (i = 0; i < NM_BDG_MAXPORTS; i++)
361 			b->bdg_port_index[i] = i;
362 		/* set the default function */
363 		b->nm_bdg_lookup = netmap_bdg_learning;
364 		/* reset the MAC address table */
365 		bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
366 	}
367 	return b;
368 }
369 
370 
371 /*
372  * Free the forwarding tables for rings attached to switch ports.
373  */
374 static void
375 nm_free_bdgfwd(struct netmap_adapter *na)
376 {
377 	int nrings, i;
378 	struct netmap_kring *kring;
379 
380 	NMG_LOCK_ASSERT();
381 	nrings = nma_is_vp(na) ? na->num_tx_rings : na->num_rx_rings;
382 	kring = nma_is_vp(na) ? na->tx_rings : na->rx_rings;
383 	for (i = 0; i < nrings; i++) {
384 		if (kring[i].nkr_ft) {
385 			kfree(kring[i].nkr_ft, M_DEVBUF);
386 			kring[i].nkr_ft = NULL; /* protect from freeing twice */
387 		}
388 	}
389 }
390 
391 
392 /*
393  * Allocate the forwarding tables for the rings attached to the bridge ports.
394  */
395 static int
396 nm_alloc_bdgfwd(struct netmap_adapter *na)
397 {
398 	int nrings, l, i, num_dstq;
399 	struct netmap_kring *kring;
400 
401 	NMG_LOCK_ASSERT();
402 	/* all port:rings + broadcast */
403 	num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
404 	l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
405 	l += sizeof(struct nm_bdg_q) * num_dstq;
406 	l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
407 
408 	nrings = na->num_tx_rings + 1;
409 	kring = na->tx_rings;
410 	for (i = 0; i < nrings; i++) {
411 		struct nm_bdg_fwd *ft;
412 		struct nm_bdg_q *dstq;
413 		int j;
414 
415 		ft = kmalloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
416 		if (!ft) {
417 			nm_free_bdgfwd(na);
418 			return ENOMEM;
419 		}
420 		dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
421 		for (j = 0; j < num_dstq; j++) {
422 			dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
423 			dstq[j].bq_len = 0;
424 		}
425 		kring[i].nkr_ft = ft;
426 	}
427 	return 0;
428 }
429 
430 
431 static void
432 netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
433 {
434 	int s_hw = hw, s_sw = sw;
435 	int i, lim =b->bdg_active_ports;
436 	uint8_t tmp[NM_BDG_MAXPORTS];
437 
438 	/*
439 	New algorithm:
440 	make a copy of bdg_port_index;
441 	lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
442 	in the array of bdg_port_index, replacing them with
443 	entries from the bottom of the array;
444 	decrement bdg_active_ports;
445 	acquire BDG_WLOCK() and copy back the array.
446 	 */
447 
448 	D("detach %d and %d (lim %d)", hw, sw, lim);
449 	/* make a copy of the list of active ports, update it,
450 	 * and then copy back within BDG_WLOCK().
451 	 */
452 	memcpy(tmp, b->bdg_port_index, sizeof(tmp));
453 	for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
454 		if (hw >= 0 && tmp[i] == hw) {
455 			ND("detach hw %d at %d", hw, i);
456 			lim--; /* point to last active port */
457 			tmp[i] = tmp[lim]; /* swap with i */
458 			tmp[lim] = hw;	/* now this is inactive */
459 			hw = -1;
460 		} else if (sw >= 0 && tmp[i] == sw) {
461 			ND("detach sw %d at %d", sw, i);
462 			lim--;
463 			tmp[i] = tmp[lim];
464 			tmp[lim] = sw;
465 			sw = -1;
466 		} else {
467 			i++;
468 		}
469 	}
470 	if (hw >= 0 || sw >= 0) {
471 		D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
472 	}
473 
474 	BDG_WLOCK(b);
475 	b->bdg_ports[s_hw] = NULL;
476 	if (s_sw >= 0) {
477 		b->bdg_ports[s_sw] = NULL;
478 	}
479 	memcpy(b->bdg_port_index, tmp, sizeof(tmp));
480 	b->bdg_active_ports = lim;
481 	BDG_WUNLOCK(b);
482 
483 	ND("now %d active ports", lim);
484 	if (lim == 0) {
485 		ND("marking bridge %s as free", b->bdg_basename);
486 		b->nm_bdg_lookup = NULL;
487 	}
488 }
489 
490 static void
491 netmap_adapter_vp_dtor(struct netmap_adapter *na)
492 {
493 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
494 	struct nm_bridge *b = vpna->na_bdg;
495 	struct ifnet *ifp = na->ifp;
496 
497 	ND("%s has %d references", NM_IFPNAME(ifp), na->na_refcount);
498 
499 	if (b) {
500 		netmap_bdg_detach_common(b, vpna->bdg_port, -1);
501 	}
502 
503 	bzero(ifp, sizeof(*ifp));
504 	kfree(ifp, M_DEVBUF);
505 	na->ifp = NULL;
506 }
507 
508 int
509 netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
510 {
511 	const char *name = nmr->nr_name;
512 	struct ifnet *ifp;
513 	int error = 0;
514 	struct netmap_adapter *ret;
515 	struct netmap_vp_adapter *vpna;
516 	struct nm_bridge *b;
517 	int i, j, cand = -1, cand2 = -1;
518 	int needed;
519 
520 	*na = NULL;     /* default return value */
521 
522 	/* first try to see if this is a bridge port. */
523 	NMG_LOCK_ASSERT();
524 	if (strncmp(name, NM_NAME, sizeof(NM_NAME) - 1)) {
525 		return 0;  /* no error, but no VALE prefix */
526 	}
527 
528 	b = nm_find_bridge(name, create);
529 	if (b == NULL) {
530 		D("no bridges available for '%s'", name);
531 		return (ENXIO);
532 	}
533 
534 	/* Now we are sure that name starts with the bridge's name,
535 	 * lookup the port in the bridge. We need to scan the entire
536 	 * list. It is not important to hold a WLOCK on the bridge
537 	 * during the search because NMG_LOCK already guarantees
538 	 * that there are no other possible writers.
539 	 */
540 
541 	/* lookup in the local list of ports */
542 	for (j = 0; j < b->bdg_active_ports; j++) {
543 		i = b->bdg_port_index[j];
544 		vpna = b->bdg_ports[i];
545 		// KASSERT(na != NULL);
546 		ifp = vpna->up.ifp;
547 		/* XXX make sure the name only contains one : */
548 		if (!strcmp(NM_IFPNAME(ifp), name)) {
549 			netmap_adapter_get(&vpna->up);
550 			ND("found existing if %s refs %d", name,
551 				vpna->na_bdg_refcount);
552 			*na = (struct netmap_adapter *)vpna;
553 			return 0;
554 		}
555 	}
556 	/* not found, should we create it? */
557 	if (!create)
558 		return ENXIO;
559 	/* yes we should, see if we have space to attach entries */
560 	needed = 2; /* in some cases we only need 1 */
561 	if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
562 		D("bridge full %d, cannot create new port", b->bdg_active_ports);
563 		return EINVAL;
564 	}
565 	/* record the next two ports available, but do not allocate yet */
566 	cand = b->bdg_port_index[b->bdg_active_ports];
567 	cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
568 	ND("+++ bridge %s port %s used %d avail %d %d",
569 		b->bdg_basename, name, b->bdg_active_ports, cand, cand2);
570 
571 	/*
572 	 * try see if there is a matching NIC with this name
573 	 * (after the bridge's name)
574 	 */
575 	ifp = ifunit_ref(name + b->bdg_namelen + 1);
576 	if (!ifp) { /* this is a virtual port */
577 		/* Create a temporary NA with arguments, then
578 		 * bdg_netmap_attach() will allocate the real one
579 		 * and attach it to the ifp
580 		 */
581 		struct netmap_adapter tmp_na;
582 
583 		if (nmr->nr_cmd) {
584 			/* nr_cmd must be 0 for a virtual port */
585 			return EINVAL;
586 		}
587 		bzero(&tmp_na, sizeof(tmp_na));
588 		/* bound checking */
589 		tmp_na.num_tx_rings = nmr->nr_tx_rings;
590 		nm_bound_var(&tmp_na.num_tx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
591 		nmr->nr_tx_rings = tmp_na.num_tx_rings; // write back
592 		tmp_na.num_rx_rings = nmr->nr_rx_rings;
593 		nm_bound_var(&tmp_na.num_rx_rings, 1, 1, NM_BDG_MAXRINGS, NULL);
594 		nmr->nr_rx_rings = tmp_na.num_rx_rings; // write back
595 		nm_bound_var(&nmr->nr_tx_slots, NM_BRIDGE_RINGSIZE,
596 				1, NM_BDG_MAXSLOTS, NULL);
597 		tmp_na.num_tx_desc = nmr->nr_tx_slots;
598 		nm_bound_var(&nmr->nr_rx_slots, NM_BRIDGE_RINGSIZE,
599 				1, NM_BDG_MAXSLOTS, NULL);
600 		tmp_na.num_rx_desc = nmr->nr_rx_slots;
601 
602 	 	/* create a struct ifnet for the new port.
603 		 * need M_NOWAIT as we are under nma_lock
604 		 */
605 		ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
606 		if (!ifp)
607 			return ENOMEM;
608 
609 		strcpy(ifp->if_xname, name);
610 		tmp_na.ifp = ifp;
611 		/* bdg_netmap_attach creates a struct netmap_adapter */
612 		error = bdg_netmap_attach(&tmp_na);
613 		if (error) {
614 			D("error %d", error);
615 			kfree(ifp, M_DEVBUF);
616 			return error;
617 		}
618 		ret = NA(ifp);
619 		cand2 = -1;	/* only need one port */
620 	} else {  /* this is a NIC */
621 		struct ifnet *fake_ifp;
622 
623 		error = netmap_get_hw_na(ifp, &ret);
624 		if (error || ret == NULL)
625 			goto out;
626 
627 		/* make sure the NIC is not already in use */
628 		if (NETMAP_OWNED_BY_ANY(ret)) {
629 			D("NIC %s busy, cannot attach to bridge",
630 				NM_IFPNAME(ifp));
631 			error = EINVAL;
632 			goto out;
633 		}
634 		/* create a fake interface */
635 		fake_ifp = kmalloc(sizeof(*ifp), M_DEVBUF, M_NOWAIT | M_ZERO);
636 		if (!fake_ifp) {
637 			error = ENOMEM;
638 			goto out;
639 		}
640 		strcpy(fake_ifp->if_xname, name);
641 		error = netmap_bwrap_attach(fake_ifp, ifp);
642 		if (error) {
643 			kfree(fake_ifp, M_DEVBUF);
644 			goto out;
645 		}
646 		ret = NA(fake_ifp);
647 		if (nmr->nr_arg1 != NETMAP_BDG_HOST)
648 			cand2 = -1; /* only need one port */
649 		if_rele(ifp);
650 	}
651 	vpna = (struct netmap_vp_adapter *)ret;
652 
653 	BDG_WLOCK(b);
654 	vpna->bdg_port = cand;
655 	ND("NIC  %p to bridge port %d", vpna, cand);
656 	/* bind the port to the bridge (virtual ports are not active) */
657 	b->bdg_ports[cand] = vpna;
658 	vpna->na_bdg = b;
659 	b->bdg_active_ports++;
660 	if (cand2 >= 0) {
661 		struct netmap_vp_adapter *hostna = vpna + 1;
662 		/* also bind the host stack to the bridge */
663 		b->bdg_ports[cand2] = hostna;
664 		hostna->bdg_port = cand2;
665 		hostna->na_bdg = b;
666 		b->bdg_active_ports++;
667 		ND("host %p to bridge port %d", hostna, cand2);
668 	}
669 	ND("if %s refs %d", name, vpna->up.na_refcount);
670 	BDG_WUNLOCK(b);
671 	*na = ret;
672 	netmap_adapter_get(ret);
673 	return 0;
674 
675 out:
676 	if_rele(ifp);
677 
678 	return error;
679 }
680 
681 
682 /* Process NETMAP_BDG_ATTACH and NETMAP_BDG_DETACH */
683 static int
684 nm_bdg_attach(struct nmreq *nmr)
685 {
686 	struct netmap_adapter *na;
687 	struct netmap_if *nifp;
688 	struct netmap_priv_d *npriv;
689 	struct netmap_bwrap_adapter *bna;
690 	int error;
691 
692 	npriv = kmalloc(sizeof(*npriv), M_DEVBUF, M_NOWAIT|M_ZERO);
693 	if (npriv == NULL)
694 		return ENOMEM;
695 	NMG_LOCK();
696 	/* XXX probably netmap_get_bdg_na() */
697 	error = netmap_get_na(nmr, &na, 1 /* create if not exists */);
698 	if (error) /* no device, or another bridge or user owns the device */
699 		goto unlock_exit;
700 	/* netmap_get_na() sets na_bdg if this is a physical interface
701 	 * that we can attach to a switch.
702 	 */
703 	if (!nma_is_bwrap(na)) {
704 		/* got reference to a virtual port or direct access to a NIC.
705 		 * perhaps specified no bridge prefix or wrong NIC name
706 		 */
707 		error = EINVAL;
708 		goto unref_exit;
709 	}
710 
711 	if (na->active_fds > 0) { /* already registered */
712 		error = EBUSY;
713 		goto unref_exit;
714 	}
715 
716 	nifp = netmap_do_regif(npriv, na, nmr->nr_ringid, &error);
717 	if (!nifp) {
718 		goto unref_exit;
719 	}
720 
721 	bna = (struct netmap_bwrap_adapter*)na;
722 	bna->na_kpriv = npriv;
723 	NMG_UNLOCK();
724 	ND("registered %s to netmap-mode", NM_IFPNAME(na->ifp));
725 	return 0;
726 
727 unref_exit:
728 	netmap_adapter_put(na);
729 unlock_exit:
730 	NMG_UNLOCK();
731 	bzero(npriv, sizeof(*npriv));
732 	kfree(npriv, M_DEVBUF);
733 	return error;
734 }
735 
736 static int
737 nm_bdg_detach(struct nmreq *nmr)
738 {
739 	struct netmap_adapter *na;
740 	int error;
741 	struct netmap_bwrap_adapter *bna;
742 	int last_instance;
743 
744 	NMG_LOCK();
745 	error = netmap_get_na(nmr, &na, 0 /* don't create */);
746 	if (error) { /* no device, or another bridge or user owns the device */
747 		goto unlock_exit;
748 	}
749 	if (!nma_is_bwrap(na)) {
750 		/* got reference to a virtual port or direct access to a NIC.
751 		 * perhaps specified no bridge's prefix or wrong NIC's name
752 		 */
753 		error = EINVAL;
754 		goto unref_exit;
755 	}
756 	bna = (struct netmap_bwrap_adapter *)na;
757 
758 	if (na->active_fds == 0) { /* not registered */
759 		error = EINVAL;
760 		goto unref_exit;
761 	}
762 
763 	last_instance = netmap_dtor_locked(bna->na_kpriv); /* unregister */
764 	if (!last_instance) {
765 		D("--- error, trying to detach an entry with active mmaps");
766 		error = EINVAL;
767 	} else {
768 		struct netmap_priv_d *npriv = bna->na_kpriv;
769 
770 		bna->na_kpriv = NULL;
771 		D("deleting priv");
772 
773 		bzero(npriv, sizeof(*npriv));
774 		kfree(npriv, M_DEVBUF);
775 	}
776 
777 unref_exit:
778 	netmap_adapter_put(na);
779 unlock_exit:
780 	NMG_UNLOCK();
781 	return error;
782 
783 }
784 
785 
786 /* exported to kernel callers, e.g. OVS ?
787  * Entry point.
788  * Called without NMG_LOCK.
789  */
790 int
791 netmap_bdg_ctl(struct nmreq *nmr, bdg_lookup_fn_t func)
792 {
793 	struct nm_bridge *b;
794 	struct netmap_vp_adapter *na;
795 	struct ifnet *iter;
796 	char *name = nmr->nr_name;
797 	int cmd = nmr->nr_cmd, namelen = strlen(name);
798 	int error = 0, i, j;
799 
800 	switch (cmd) {
801 	case NETMAP_BDG_ATTACH:
802 		error = nm_bdg_attach(nmr);
803 		break;
804 
805 	case NETMAP_BDG_DETACH:
806 		error = nm_bdg_detach(nmr);
807 		break;
808 
809 	case NETMAP_BDG_LIST:
810 		/* this is used to enumerate bridges and ports */
811 		if (namelen) { /* look up indexes of bridge and port */
812 			if (strncmp(name, NM_NAME, strlen(NM_NAME))) {
813 				error = EINVAL;
814 				break;
815 			}
816 			NMG_LOCK();
817 			b = nm_find_bridge(name, 0 /* don't create */);
818 			if (!b) {
819 				error = ENOENT;
820 				NMG_UNLOCK();
821 				break;
822 			}
823 
824 			error = ENOENT;
825 			for (j = 0; j < b->bdg_active_ports; j++) {
826 				i = b->bdg_port_index[j];
827 				na = b->bdg_ports[i];
828 				if (na == NULL) {
829 					D("---AAAAAAAAARGH-------");
830 					continue;
831 				}
832 				iter = na->up.ifp;
833 				/* the former and the latter identify a
834 				 * virtual port and a NIC, respectively
835 				 */
836 				if (!strcmp(iter->if_xname, name)) {
837 					/* bridge index */
838 					nmr->nr_arg1 = b - nm_bridges;
839 					nmr->nr_arg2 = i; /* port index */
840 					error = 0;
841 					break;
842 				}
843 			}
844 			NMG_UNLOCK();
845 		} else {
846 			/* return the first non-empty entry starting from
847 			 * bridge nr_arg1 and port nr_arg2.
848 			 *
849 			 * Users can detect the end of the same bridge by
850 			 * seeing the new and old value of nr_arg1, and can
851 			 * detect the end of all the bridge by error != 0
852 			 */
853 			i = nmr->nr_arg1;
854 			j = nmr->nr_arg2;
855 
856 			NMG_LOCK();
857 			for (error = ENOENT; i < NM_BRIDGES; i++) {
858 				b = nm_bridges + i;
859 				if (j >= b->bdg_active_ports) {
860 					j = 0; /* following bridges scan from 0 */
861 					continue;
862 				}
863 				nmr->nr_arg1 = i;
864 				nmr->nr_arg2 = j;
865 				j = b->bdg_port_index[j];
866 				na = b->bdg_ports[j];
867 				iter = na->up.ifp;
868 				strncpy(name, iter->if_xname, (size_t)IFNAMSIZ);
869 				error = 0;
870 				break;
871 			}
872 			NMG_UNLOCK();
873 		}
874 		break;
875 
876 	case NETMAP_BDG_LOOKUP_REG:
877 		/* register a lookup function to the given bridge.
878 		 * nmr->nr_name may be just bridge's name (including ':'
879 		 * if it is not just NM_NAME).
880 		 */
881 		if (!func) {
882 			error = EINVAL;
883 			break;
884 		}
885 		NMG_LOCK();
886 		b = nm_find_bridge(name, 0 /* don't create */);
887 		if (!b) {
888 			error = EINVAL;
889 		} else {
890 			b->nm_bdg_lookup = func;
891 		}
892 		NMG_UNLOCK();
893 		break;
894 
895 	default:
896 		D("invalid cmd (nmr->nr_cmd) (0x%x)", cmd);
897 		error = EINVAL;
898 		break;
899 	}
900 	return error;
901 }
902 
903 
904 static int
905 netmap_vp_krings_create(struct netmap_adapter *na)
906 {
907 	u_int ntx, nrx, tailroom;
908 	int error, i;
909 	uint32_t *leases;
910 
911 	/* XXX vps do not need host rings,
912 	 * but we crash if we don't have one
913 	 */
914 	ntx = na->num_tx_rings + 1;
915 	nrx = na->num_rx_rings + 1;
916 
917 	/*
918 	 * Leases are attached to RX rings on vale ports
919 	 */
920 	tailroom = sizeof(uint32_t) * na->num_rx_desc * nrx;
921 
922 	error = netmap_krings_create(na, ntx, nrx, tailroom);
923 	if (error)
924 		return error;
925 
926 	leases = na->tailroom;
927 
928 	for (i = 0; i < nrx; i++) { /* Receive rings */
929 		na->rx_rings[i].nkr_leases = leases;
930 		leases += na->num_rx_desc;
931 	}
932 
933 	error = nm_alloc_bdgfwd(na);
934 	if (error) {
935 		netmap_krings_delete(na);
936 		return error;
937 	}
938 
939 	return 0;
940 }
941 
942 static void
943 netmap_vp_krings_delete(struct netmap_adapter *na)
944 {
945 	nm_free_bdgfwd(na);
946 	netmap_krings_delete(na);
947 }
948 
949 
950 static int
951 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
952 	struct netmap_vp_adapter *na, u_int ring_nr);
953 
954 
955 /*
956  * Grab packets from a kring, move them into the ft structure
957  * associated to the tx (input) port. Max one instance per port,
958  * filtered on input (ioctl, poll or XXX).
959  * Returns the next position in the ring.
960  */
961 static int
962 nm_bdg_preflush(struct netmap_vp_adapter *na, u_int ring_nr,
963 	struct netmap_kring *kring, u_int end)
964 {
965 	struct netmap_ring *ring = kring->ring;
966 	struct nm_bdg_fwd *ft;
967 	u_int j = kring->nr_hwcur, lim = kring->nkr_num_slots - 1;
968 	u_int ft_i = 0;	/* start from 0 */
969 	u_int frags = 1; /* how many frags ? */
970 	struct nm_bridge *b = na->na_bdg;
971 
972 	/* To protect against modifications to the bridge we acquire a
973 	 * shared lock, waiting if we can sleep (if the source port is
974 	 * attached to a user process) or with a trylock otherwise (NICs).
975 	 */
976 	ND("wait rlock for %d packets", ((j > end ? lim+1 : 0) + end) - j);
977 	if (na->up.na_flags & NAF_BDG_MAYSLEEP)
978 		BDG_RLOCK(b);
979 	else if (!BDG_RTRYLOCK(b))
980 		return 0;
981 	ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
982 	ft = kring->nkr_ft;
983 
984 	for (; likely(j != end); j = nm_next(j, lim)) {
985 		struct netmap_slot *slot = &ring->slot[j];
986 		char *buf;
987 
988 		ft[ft_i].ft_len = slot->len;
989 		ft[ft_i].ft_flags = slot->flags;
990 
991 		ND("flags is 0x%x", slot->flags);
992 		/* this slot goes into a list so initialize the link field */
993 		ft[ft_i].ft_next = NM_FT_NULL;
994 		buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
995 			(void *)(uintptr_t)slot->ptr : BDG_NMB(&na->up, slot);
996 		prefetch(buf);
997 		++ft_i;
998 		if (slot->flags & NS_MOREFRAG) {
999 			frags++;
1000 			continue;
1001 		}
1002 		if (unlikely(netmap_verbose && frags > 1))
1003 			RD(5, "%d frags at %d", frags, ft_i - frags);
1004 		ft[ft_i - frags].ft_frags = frags;
1005 		frags = 1;
1006 		if (unlikely((int)ft_i >= bridge_batch))
1007 			ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1008 	}
1009 	if (frags > 1) {
1010 		D("truncate incomplete fragment at %d (%d frags)", ft_i, frags);
1011 		// ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG
1012 		ft[ft_i - 1].ft_frags &= ~NS_MOREFRAG;
1013 		ft[ft_i - frags].ft_frags = frags - 1;
1014 	}
1015 	if (ft_i)
1016 		ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
1017 	BDG_RUNLOCK(b);
1018 	return j;
1019 }
1020 
1021 
1022 /*
1023  *---- support for virtual bridge -----
1024  */
1025 
1026 /* ----- FreeBSD if_bridge hash function ------- */
1027 
1028 /*
1029  * The following hash function is adapted from "Hash Functions" by Bob Jenkins
1030  * ("Algorithm Alley", Dr. Dobbs Journal, September 1997).
1031  *
1032  * http://www.burtleburtle.net/bob/hash/spooky.html
1033  */
1034 #define mix(a, b, c)                                                    \
1035 do {                                                                    \
1036         a -= b; a -= c; a ^= (c >> 13);                                 \
1037         b -= c; b -= a; b ^= (a << 8);                                  \
1038         c -= a; c -= b; c ^= (b >> 13);                                 \
1039         a -= b; a -= c; a ^= (c >> 12);                                 \
1040         b -= c; b -= a; b ^= (a << 16);                                 \
1041         c -= a; c -= b; c ^= (b >> 5);                                  \
1042         a -= b; a -= c; a ^= (c >> 3);                                  \
1043         b -= c; b -= a; b ^= (a << 10);                                 \
1044         c -= a; c -= b; c ^= (b >> 15);                                 \
1045 } while (/*CONSTCOND*/0)
1046 
1047 static __inline uint32_t
1048 nm_bridge_rthash(const uint8_t *addr)
1049 {
1050         uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
1051 
1052         b += addr[5] << 8;
1053         b += addr[4];
1054         a += addr[3] << 24;
1055         a += addr[2] << 16;
1056         a += addr[1] << 8;
1057         a += addr[0];
1058 
1059         mix(a, b, c);
1060 #define BRIDGE_RTHASH_MASK	(NM_BDG_HASH-1)
1061         return (c & BRIDGE_RTHASH_MASK);
1062 }
1063 
1064 #undef mix
1065 
1066 
1067 static int
1068 bdg_netmap_reg(struct netmap_adapter *na, int onoff)
1069 {
1070 	struct netmap_vp_adapter *vpna =
1071 		(struct netmap_vp_adapter*)na;
1072 	struct ifnet *ifp = na->ifp;
1073 
1074 	/* the interface is already attached to the bridge,
1075 	 * so we only need to toggle IFCAP_NETMAP.
1076 	 */
1077 	BDG_WLOCK(vpna->na_bdg);
1078 	if (onoff) {
1079 		ifp->if_capenable |= IFCAP_NETMAP;
1080 	} else {
1081 		ifp->if_capenable &= ~IFCAP_NETMAP;
1082 	}
1083 	BDG_WUNLOCK(vpna->na_bdg);
1084 	return 0;
1085 }
1086 
1087 
1088 /*
1089  * Lookup function for a learning bridge.
1090  * Update the hash table with the source address,
1091  * and then returns the destination port index, and the
1092  * ring in *dst_ring (at the moment, always use ring 0)
1093  */
1094 u_int
1095 netmap_bdg_learning(char *buf, u_int buf_len, uint8_t *dst_ring,
1096 		struct netmap_vp_adapter *na)
1097 {
1098 	struct nm_hash_ent *ht = na->na_bdg->ht;
1099 	uint32_t sh, dh;
1100 	u_int dst, mysrc = na->bdg_port;
1101 	uint64_t smac, dmac;
1102 
1103 	if (buf_len < 14) {
1104 		D("invalid buf length %d", buf_len);
1105 		return NM_BDG_NOPORT;
1106 	}
1107 	dmac = le64toh(*(uint64_t *)(buf)) & 0xffffffffffff;
1108 	smac = le64toh(*(uint64_t *)(buf + 4));
1109 	smac >>= 16;
1110 
1111 	/*
1112 	 * The hash is somewhat expensive, there might be some
1113 	 * worthwhile optimizations here.
1114 	 */
1115 	if ((buf[6] & 1) == 0) { /* valid src */
1116 		uint8_t *s = buf+6;
1117 		sh = nm_bridge_rthash(s); // XXX hash of source
1118 		/* update source port forwarding entry */
1119 		ht[sh].mac = smac;	/* XXX expire ? */
1120 		ht[sh].ports = mysrc;
1121 		if (netmap_verbose)
1122 		    D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
1123 			s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
1124 	}
1125 	dst = NM_BDG_BROADCAST;
1126 	if ((buf[0] & 1) == 0) { /* unicast */
1127 		dh = nm_bridge_rthash(buf); // XXX hash of dst
1128 		if (ht[dh].mac == dmac) {	/* found dst */
1129 			dst = ht[dh].ports;
1130 		}
1131 		/* XXX otherwise return NM_BDG_UNKNOWN ? */
1132 	}
1133 	*dst_ring = 0;
1134 	return dst;
1135 }
1136 
1137 
1138 /*
1139  * This flush routine supports only unicast and broadcast but a large
1140  * number of ports, and lets us replace the learn and dispatch functions.
1141  */
1142 int
1143 nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
1144 		u_int ring_nr)
1145 {
1146 	struct nm_bdg_q *dst_ents, *brddst;
1147 	uint16_t num_dsts = 0, *dsts;
1148 	struct nm_bridge *b = na->na_bdg;
1149 	u_int i, j, me = na->bdg_port;
1150 
1151 	/*
1152 	 * The work area (pointed by ft) is followed by an array of
1153 	 * pointers to queues , dst_ents; there are NM_BDG_MAXRINGS
1154 	 * queues per port plus one for the broadcast traffic.
1155 	 * Then we have an array of destination indexes.
1156 	 */
1157 	dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
1158 	dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
1159 
1160 	/* first pass: find a destination for each packet in the batch */
1161 	for (i = 0; likely(i < n); i += ft[i].ft_frags) {
1162 		uint8_t dst_ring = ring_nr; /* default, same ring as origin */
1163 		uint16_t dst_port, d_i;
1164 		struct nm_bdg_q *d;
1165 
1166 		ND("slot %d frags %d", i, ft[i].ft_frags);
1167 		dst_port = b->nm_bdg_lookup(ft[i].ft_buf, ft[i].ft_len,
1168 			&dst_ring, na);
1169 		if (netmap_verbose > 255)
1170 			RD(5, "slot %d port %d -> %d", i, me, dst_port);
1171 		if (dst_port == NM_BDG_NOPORT)
1172 			continue; /* this packet is identified to be dropped */
1173 		else if (unlikely(dst_port > NM_BDG_MAXPORTS))
1174 			continue;
1175 		else if (dst_port == NM_BDG_BROADCAST)
1176 			dst_ring = 0; /* broadcasts always go to ring 0 */
1177 		else if (unlikely(dst_port == me ||
1178 		    !b->bdg_ports[dst_port]))
1179 			continue;
1180 
1181 		/* get a position in the scratch pad */
1182 		d_i = dst_port * NM_BDG_MAXRINGS + dst_ring;
1183 		d = dst_ents + d_i;
1184 
1185 		/* append the first fragment to the list */
1186 		if (d->bq_head == NM_FT_NULL) { /* new destination */
1187 			d->bq_head = d->bq_tail = i;
1188 			/* remember this position to be scanned later */
1189 			if (dst_port != NM_BDG_BROADCAST)
1190 				dsts[num_dsts++] = d_i;
1191 		} else {
1192 			ft[d->bq_tail].ft_next = i;
1193 			d->bq_tail = i;
1194 		}
1195 		d->bq_len += ft[i].ft_frags;
1196 	}
1197 
1198 	/*
1199 	 * Broadcast traffic goes to ring 0 on all destinations.
1200 	 * So we need to add these rings to the list of ports to scan.
1201 	 * XXX at the moment we scan all NM_BDG_MAXPORTS ports, which is
1202 	 * expensive. We should keep a compact list of active destinations
1203 	 * so we could shorten this loop.
1204 	 */
1205 	brddst = dst_ents + NM_BDG_BROADCAST * NM_BDG_MAXRINGS;
1206 	if (brddst->bq_head != NM_FT_NULL) {
1207 		for (j = 0; likely(j < b->bdg_active_ports); j++) {
1208 			uint16_t d_i;
1209 			i = b->bdg_port_index[j];
1210 			if (unlikely(i == me))
1211 				continue;
1212 			d_i = i * NM_BDG_MAXRINGS;
1213 			if (dst_ents[d_i].bq_head == NM_FT_NULL)
1214 				dsts[num_dsts++] = d_i;
1215 		}
1216 	}
1217 
1218 	ND(5, "pass 1 done %d pkts %d dsts", n, num_dsts);
1219 	/* second pass: scan destinations (XXX will be modular somehow) */
1220 	for (i = 0; i < num_dsts; i++) {
1221 		struct ifnet *dst_ifp;
1222 		struct netmap_vp_adapter *dst_na;
1223 		struct netmap_kring *kring;
1224 		struct netmap_ring *ring;
1225 		u_int dst_nr, lim, j, sent = 0, d_i, next, brd_next;
1226 		u_int needed, howmany;
1227 		int retry = netmap_txsync_retry;
1228 		struct nm_bdg_q *d;
1229 		uint32_t my_start = 0, lease_idx = 0;
1230 		int nrings;
1231 
1232 		d_i = dsts[i];
1233 		ND("second pass %d port %d", i, d_i);
1234 		d = dst_ents + d_i;
1235 		// XXX fix the division
1236 		dst_na = b->bdg_ports[d_i/NM_BDG_MAXRINGS];
1237 		/* protect from the lookup function returning an inactive
1238 		 * destination port
1239 		 */
1240 		if (unlikely(dst_na == NULL))
1241 			goto cleanup;
1242 		if (dst_na->up.na_flags & NAF_SW_ONLY)
1243 			goto cleanup;
1244 		dst_ifp = dst_na->up.ifp;
1245 		/*
1246 		 * The interface may be in !netmap mode in two cases:
1247 		 * - when na is attached but not activated yet;
1248 		 * - when na is being deactivated but is still attached.
1249 		 */
1250 		if (unlikely(!(dst_ifp->if_capenable & IFCAP_NETMAP))) {
1251 			ND("not in netmap mode!");
1252 			goto cleanup;
1253 		}
1254 
1255 		/* there is at least one either unicast or broadcast packet */
1256 		brd_next = brddst->bq_head;
1257 		next = d->bq_head;
1258 		/* we need to reserve this many slots. If fewer are
1259 		 * available, some packets will be dropped.
1260 		 * Packets may have multiple fragments, so we may not use
1261 		 * there is a chance that we may not use all of the slots
1262 		 * we have claimed, so we will need to handle the leftover
1263 		 * ones when we regain the lock.
1264 		 */
1265 		needed = d->bq_len + brddst->bq_len;
1266 
1267 		ND(5, "pass 2 dst %d is %x %s",
1268 			i, d_i, is_vp ? "virtual" : "nic/host");
1269 		dst_nr = d_i & (NM_BDG_MAXRINGS-1);
1270 		nrings = dst_na->up.num_rx_rings;
1271 		if (dst_nr >= nrings)
1272 			dst_nr = dst_nr % nrings;
1273 		kring = &dst_na->up.rx_rings[dst_nr];
1274 		ring = kring->ring;
1275 		lim = kring->nkr_num_slots - 1;
1276 
1277 retry:
1278 
1279 		/* reserve the buffers in the queue and an entry
1280 		 * to report completion, and drop lock.
1281 		 * XXX this might become a helper function.
1282 		 */
1283 		lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1284 		if (kring->nkr_stopped) {
1285 			lockmgr(&kring->q_lock, LK_RELEASE);
1286 			goto cleanup;
1287 		}
1288 		if (dst_na->retry) {
1289 			dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1290 		}
1291 		my_start = j = kring->nkr_hwlease;
1292 		howmany = nm_kr_space(kring, 1);
1293 		if (needed < howmany)
1294 			howmany = needed;
1295 		lease_idx = nm_kr_lease(kring, howmany, 1);
1296 		lockmgr(&kring->q_lock, LK_RELEASE);
1297 
1298 		/* only retry if we need more than available slots */
1299 		if (retry && needed <= howmany)
1300 			retry = 0;
1301 
1302 		/* copy to the destination queue */
1303 		while (howmany > 0) {
1304 			struct netmap_slot *slot;
1305 			struct nm_bdg_fwd *ft_p, *ft_end;
1306 			u_int cnt;
1307 
1308 			/* find the queue from which we pick next packet.
1309 			 * NM_FT_NULL is always higher than valid indexes
1310 			 * so we never dereference it if the other list
1311 			 * has packets (and if both are empty we never
1312 			 * get here).
1313 			 */
1314 			if (next < brd_next) {
1315 				ft_p = ft + next;
1316 				next = ft_p->ft_next;
1317 			} else { /* insert broadcast */
1318 				ft_p = ft + brd_next;
1319 				brd_next = ft_p->ft_next;
1320 			}
1321 			cnt = ft_p->ft_frags; // cnt > 0
1322 			if (unlikely(cnt > howmany))
1323 			    break; /* no more space */
1324 			howmany -= cnt;
1325 			if (netmap_verbose && cnt > 1)
1326 				RD(5, "rx %d frags to %d", cnt, j);
1327 			ft_end = ft_p + cnt;
1328 			do {
1329 			    void *dst, *src = ft_p->ft_buf;
1330 			    size_t len = (ft_p->ft_len + 63) & ~63;
1331 
1332 			    slot = &ring->slot[j];
1333 			    dst = BDG_NMB(&dst_na->up, slot);
1334 			    /* round to a multiple of 64 */
1335 
1336 			    ND("send %d %d bytes at %s:%d",
1337 				i, ft_p->ft_len, NM_IFPNAME(dst_ifp), j);
1338 			    if (ft_p->ft_flags & NS_INDIRECT) {
1339 				if (copyin(src, dst, len)) {
1340 					// invalid user pointer, pretend len is 0
1341 					ft_p->ft_len = 0;
1342 				}
1343 			    } else {
1344 				//memcpy(dst, src, len);
1345 				pkt_copy(src, dst, (int)len);
1346 			    }
1347 			    slot->len = ft_p->ft_len;
1348 			    slot->flags = (cnt << 8)| NS_MOREFRAG;
1349 			    j = nm_next(j, lim);
1350 			    ft_p++;
1351 			    sent++;
1352 			} while (ft_p != ft_end);
1353 			slot->flags = (cnt << 8); /* clear flag on last entry */
1354 			/* are we done ? */
1355 			if (next == NM_FT_NULL && brd_next == NM_FT_NULL)
1356 				break;
1357 		}
1358 		{
1359 		    /* current position */
1360 		    uint32_t *p = kring->nkr_leases; /* shorthand */
1361 		    uint32_t update_pos;
1362 		    int still_locked = 1;
1363 
1364 		    lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1365 		    if (unlikely(howmany > 0)) {
1366 			/* not used all bufs. If i am the last one
1367 			 * i can recover the slots, otherwise must
1368 			 * fill them with 0 to mark empty packets.
1369 			 */
1370 			ND("leftover %d bufs", howmany);
1371 			if (nm_next(lease_idx, lim) == kring->nkr_lease_idx) {
1372 			    /* yes i am the last one */
1373 			    ND("roll back nkr_hwlease to %d", j);
1374 			    kring->nkr_hwlease = j;
1375 			} else {
1376 			    while (howmany-- > 0) {
1377 				ring->slot[j].len = 0;
1378 				ring->slot[j].flags = 0;
1379 				j = nm_next(j, lim);
1380 			    }
1381 			}
1382 		    }
1383 		    p[lease_idx] = j; /* report I am done */
1384 
1385 		    update_pos = nm_kr_rxpos(kring);
1386 
1387 		    if (my_start == update_pos) {
1388 			/* all slots before my_start have been reported,
1389 			 * so scan subsequent leases to see if other ranges
1390 			 * have been completed, and to a selwakeup or txsync.
1391 		         */
1392 			while (lease_idx != kring->nkr_lease_idx &&
1393 				p[lease_idx] != NR_NOSLOT) {
1394 			    j = p[lease_idx];
1395 			    p[lease_idx] = NR_NOSLOT;
1396 			    lease_idx = nm_next(lease_idx, lim);
1397 			}
1398 			/* j is the new 'write' position. j != my_start
1399 			 * means there are new buffers to report
1400 			 */
1401 			if (likely(j != my_start)) {
1402 				uint32_t old_avail = kring->nr_hwavail;
1403 
1404 				kring->nr_hwavail = (j >= kring->nr_hwcur) ?
1405 					j - kring->nr_hwcur :
1406 					j + lim + 1 - kring->nr_hwcur;
1407 				if (kring->nr_hwavail < old_avail) {
1408 					D("avail shrink %d -> %d",
1409 						old_avail, kring->nr_hwavail);
1410 				}
1411 				dst_na->up.nm_notify(&dst_na->up, dst_nr, NR_RX, 0);
1412 				still_locked = 0;
1413 				lockmgr(&kring->q_lock, LK_RELEASE);
1414 				if (dst_na->retry && retry--)
1415 					goto retry;
1416 			}
1417 		    }
1418 		    if (still_locked)
1419 			lockmgr(&kring->q_lock, LK_RELEASE);
1420 		}
1421 cleanup:
1422 		d->bq_head = d->bq_tail = NM_FT_NULL; /* cleanup */
1423 		d->bq_len = 0;
1424 	}
1425 	brddst->bq_head = brddst->bq_tail = NM_FT_NULL; /* cleanup */
1426 	brddst->bq_len = 0;
1427 	return 0;
1428 }
1429 
1430 static int
1431 netmap_vp_txsync(struct netmap_vp_adapter *na, u_int ring_nr, int flags)
1432 {
1433 	struct netmap_kring *kring = &na->up.tx_rings[ring_nr];
1434 	struct netmap_ring *ring = kring->ring;
1435 	u_int j, k, lim = kring->nkr_num_slots - 1;
1436 
1437 	k = ring->cur;
1438 	if (k > lim)
1439 		return netmap_ring_reinit(kring);
1440 
1441 	if (bridge_batch <= 0) { /* testing only */
1442 		j = k; // used all
1443 		goto done;
1444 	}
1445 	if (bridge_batch > NM_BDG_BATCH)
1446 		bridge_batch = NM_BDG_BATCH;
1447 
1448 	j = nm_bdg_preflush(na, ring_nr, kring, k);
1449 	if (j != k)
1450 		D("early break at %d/ %d, avail %d", j, k, kring->nr_hwavail);
1451 	/* k-j modulo ring size is the number of slots processed */
1452 	if (k < j)
1453 		k += kring->nkr_num_slots;
1454 	kring->nr_hwavail = lim - (k - j);
1455 
1456 done:
1457 	kring->nr_hwcur = j;
1458 	ring->avail = kring->nr_hwavail;
1459 	if (netmap_verbose)
1460 		D("%s ring %d flags %d", NM_IFPNAME(na->up.ifp), ring_nr, flags);
1461 	return 0;
1462 }
1463 
1464 
1465 /*
1466  * main dispatch routine for the bridge.
1467  * We already know that only one thread is running this.
1468  * we must run nm_bdg_preflush without lock.
1469  */
1470 static int
1471 bdg_netmap_txsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1472 {
1473 	struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
1474 	return netmap_vp_txsync(vpna, ring_nr, flags);
1475 }
1476 
1477 
1478 /*
1479  * user process reading from a VALE switch.
1480  * Already protected against concurrent calls from userspace,
1481  * but we must acquire the queue's lock to protect against
1482  * writers on the same queue.
1483  */
1484 static int
1485 bdg_netmap_rxsync(struct netmap_adapter *na, u_int ring_nr, int flags)
1486 {
1487 	struct netmap_kring *kring = &na->rx_rings[ring_nr];
1488 	struct netmap_ring *ring = kring->ring;
1489 	u_int j, lim = kring->nkr_num_slots - 1;
1490 	u_int k = ring->cur, resvd = ring->reserved;
1491 	int n;
1492 
1493 	lockmgr(&kring->q_lock, LK_EXCLUSIVE);
1494 	if (k > lim) {
1495 		D("ouch dangerous reset!!!");
1496 		n = netmap_ring_reinit(kring);
1497 		goto done;
1498 	}
1499 
1500 	/* skip past packets that userspace has released */
1501 	j = kring->nr_hwcur;    /* netmap ring index */
1502 	if (resvd > 0) {
1503 		if (resvd + ring->avail >= lim + 1) {
1504 			D("XXX invalid reserve/avail %d %d", resvd, ring->avail);
1505 			ring->reserved = resvd = 0; // XXX panic...
1506 		}
1507 		k = (k >= resvd) ? k - resvd : k + lim + 1 - resvd;
1508 	}
1509 
1510 	if (j != k) { /* userspace has released some packets. */
1511 		n = k - j;
1512 		if (n < 0)
1513 			n += kring->nkr_num_slots;
1514 		ND("userspace releases %d packets", n);
1515 		for (n = 0; likely(j != k); n++) {
1516 			struct netmap_slot *slot = &ring->slot[j];
1517 			void *addr = BDG_NMB(na, slot);
1518 
1519 			if (addr == netmap_buffer_base) { /* bad buf */
1520 				D("bad buffer index %d, ignore ?",
1521 					slot->buf_idx);
1522 			}
1523 			slot->flags &= ~NS_BUF_CHANGED;
1524 			j = nm_next(j, lim);
1525 		}
1526 		kring->nr_hwavail -= n;
1527 		kring->nr_hwcur = k;
1528 	}
1529 	/* tell userspace that there are new packets */
1530 	ring->avail = kring->nr_hwavail - resvd;
1531 	n = 0;
1532 done:
1533 	lockmgr(&kring->q_lock, LK_RELEASE);
1534 	return n;
1535 }
1536 
1537 static int
1538 bdg_netmap_attach(struct netmap_adapter *arg)
1539 {
1540 	struct netmap_vp_adapter *vpna;
1541 	struct netmap_adapter *na;
1542 	int error;
1543 
1544 	vpna = kmalloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
1545 	if (vpna == NULL)
1546 		return ENOMEM;
1547  	na = &vpna->up;
1548 	*na = *arg;
1549 	na->na_flags |= NAF_BDG_MAYSLEEP | NAF_MEM_OWNER;
1550 	na->nm_txsync = bdg_netmap_txsync;
1551 	na->nm_rxsync = bdg_netmap_rxsync;
1552 	na->nm_register = bdg_netmap_reg;
1553 	na->nm_dtor = netmap_adapter_vp_dtor;
1554 	na->nm_krings_create = netmap_vp_krings_create;
1555 	na->nm_krings_delete = netmap_vp_krings_delete;
1556 	na->nm_mem = netmap_mem_private_new(NM_IFPNAME(arg->ifp),
1557 			na->num_tx_rings, na->num_tx_desc,
1558 			na->num_rx_rings, na->num_rx_desc);
1559 	/* other nmd fields are set in the common routine */
1560 	error = netmap_attach_common(na);
1561 	if (error) {
1562 		kfree(vpna, M_DEVBUF);
1563 		return error;
1564 	}
1565 	return 0;
1566 }
1567 
1568 static void
1569 netmap_bwrap_dtor(struct netmap_adapter *na)
1570 {
1571 	struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
1572 	struct netmap_adapter *hwna = bna->hwna;
1573 	struct nm_bridge *b = bna->up.na_bdg,
1574 		*bh = bna->host.na_bdg;
1575 	struct ifnet *ifp = na->ifp;
1576 
1577 	ND("na %p", na);
1578 
1579 	if (b) {
1580 		netmap_bdg_detach_common(b, bna->up.bdg_port,
1581 			(bh ? bna->host.bdg_port : -1));
1582 	}
1583 
1584 	hwna->na_private = NULL;
1585 	netmap_adapter_put(hwna);
1586 
1587 	bzero(ifp, sizeof(*ifp));
1588 	kfree(ifp, M_DEVBUF);
1589 	na->ifp = NULL;
1590 
1591 }
1592 
1593 /*
1594  * Pass packets from nic to the bridge.
1595  * XXX TODO check locking: this is called from the interrupt
1596  * handler so we should make sure that the interface is not
1597  * disconnected while passing down an interrupt.
1598  *
1599  * Note, no user process can access this NIC so we can ignore
1600  * the info in the 'ring'.
1601  */
1602 /* callback that overwrites the hwna notify callback.
1603  * Packets come from the outside or from the host stack and are put on an hwna rx ring.
1604  * The bridge wrapper then sends the packets through the bridge.
1605  */
1606 static int
1607 netmap_bwrap_intr_notify(struct netmap_adapter *na, u_int ring_nr, enum txrx tx, int flags)
1608 {
1609 	struct ifnet *ifp = na->ifp;
1610 	struct netmap_bwrap_adapter *bna = na->na_private;
1611 	struct netmap_vp_adapter *hostna = &bna->host;
1612 	struct netmap_kring *kring, *bkring;
1613 	struct netmap_ring *ring;
1614 	int is_host_ring = ring_nr == na->num_rx_rings;
1615 	struct netmap_vp_adapter *vpna = &bna->up;
1616 	int error = 0;
1617 
1618 	ND("%s[%d] %s %x", NM_IFPNAME(ifp), ring_nr, (tx == NR_TX ? "TX" : "RX"), flags);
1619 
1620 	if (flags & NAF_DISABLE_NOTIFY) {
1621 		kring = tx == NR_TX ? na->tx_rings : na->rx_rings;
1622 		bkring = tx == NR_TX ? vpna->up.rx_rings : vpna->up.tx_rings;
1623 		if (kring->nkr_stopped)
1624 			netmap_disable_ring(bkring);
1625 		else
1626 			bkring->nkr_stopped = 0;
1627 		return 0;
1628 	}
1629 
1630 	if (ifp == NULL || !(ifp->if_capenable & IFCAP_NETMAP))
1631 		return 0;
1632 
1633 	if (tx == NR_TX)
1634 		return 0;
1635 
1636 	kring = &na->rx_rings[ring_nr];
1637 	ring = kring->ring;
1638 
1639 	/* make sure the ring is not disabled */
1640 	if (nm_kr_tryget(kring))
1641 		return 0;
1642 
1643 	if (is_host_ring && hostna->na_bdg == NULL) {
1644 		error = bna->save_notify(na, ring_nr, tx, flags);
1645 		goto put_out;
1646 	}
1647 
1648 	if (is_host_ring) {
1649 		vpna = hostna;
1650 		ring_nr = 0;
1651 	} else {
1652 		/* fetch packets that have arrived.
1653 		 * XXX maybe do this in a loop ?
1654 		 */
1655 		error = na->nm_rxsync(na, ring_nr, 0);
1656 		if (error)
1657 			goto put_out;
1658 	}
1659 	if (kring->nr_hwavail == 0 && netmap_verbose) {
1660 		D("how strange, interrupt with no packets on %s",
1661 			NM_IFPNAME(ifp));
1662 		goto put_out;
1663 	}
1664 	/* XXX avail ? */
1665 	ring->cur = nm_kr_rxpos(kring);
1666 	netmap_vp_txsync(vpna, ring_nr, flags);
1667 
1668 	if (!is_host_ring)
1669 		error = na->nm_rxsync(na, ring_nr, 0);
1670 
1671 put_out:
1672 	nm_kr_put(kring);
1673 	return error;
1674 }
1675 
1676 static int
1677 netmap_bwrap_register(struct netmap_adapter *na, int onoff)
1678 {
1679 	struct netmap_bwrap_adapter *bna =
1680 		(struct netmap_bwrap_adapter *)na;
1681 	struct netmap_adapter *hwna = bna->hwna;
1682 	struct netmap_vp_adapter *hostna = &bna->host;
1683 	int error;
1684 
1685 	ND("%s %d", NM_IFPNAME(ifp), onoff);
1686 
1687 	if (onoff) {
1688 		int i;
1689 
1690 		hwna->na_lut = na->na_lut;
1691 		hwna->na_lut_objtotal = na->na_lut_objtotal;
1692 
1693 		if (hostna->na_bdg) {
1694 			hostna->up.na_lut = na->na_lut;
1695 			hostna->up.na_lut_objtotal = na->na_lut_objtotal;
1696 		}
1697 
1698 		/* cross-link the netmap rings */
1699 		for (i = 0; i <= na->num_tx_rings; i++) {
1700 			hwna->tx_rings[i].nkr_num_slots = na->rx_rings[i].nkr_num_slots;
1701 			hwna->tx_rings[i].ring = na->rx_rings[i].ring;
1702 		}
1703 		for (i = 0; i <= na->num_rx_rings; i++) {
1704 			hwna->rx_rings[i].nkr_num_slots = na->tx_rings[i].nkr_num_slots;
1705 			hwna->rx_rings[i].ring = na->tx_rings[i].ring;
1706 		}
1707 	}
1708 
1709 	if (hwna->ifp) {
1710 		error = hwna->nm_register(hwna, onoff);
1711 		if (error)
1712 			return error;
1713 	}
1714 
1715 	bdg_netmap_reg(na, onoff);
1716 
1717 	if (onoff) {
1718 		bna->save_notify = hwna->nm_notify;
1719 		hwna->nm_notify = netmap_bwrap_intr_notify;
1720 	} else {
1721 		hwna->nm_notify = bna->save_notify;
1722 		hwna->na_lut = NULL;
1723 		hwna->na_lut_objtotal = 0;
1724 	}
1725 
1726 	return 0;
1727 }
1728 
1729 static int
1730 netmap_bwrap_config(struct netmap_adapter *na, u_int *txr, u_int *txd,
1731 				    u_int *rxr, u_int *rxd)
1732 {
1733 	struct netmap_bwrap_adapter *bna =
1734 		(struct netmap_bwrap_adapter *)na;
1735 	struct netmap_adapter *hwna = bna->hwna;
1736 
1737 	/* forward the request */
1738 	netmap_update_config(hwna);
1739 	/* swap the results */
1740 	*txr = hwna->num_rx_rings;
1741 	*txd = hwna->num_rx_desc;
1742 	*rxr = hwna->num_tx_rings;
1743 	*rxd = hwna->num_rx_desc;
1744 
1745 	return 0;
1746 }
1747 
1748 static int
1749 netmap_bwrap_krings_create(struct netmap_adapter *na)
1750 {
1751 	struct netmap_bwrap_adapter *bna =
1752 		(struct netmap_bwrap_adapter *)na;
1753 	struct netmap_adapter *hwna = bna->hwna;
1754 	struct netmap_adapter *hostna = &bna->host.up;
1755 	int error;
1756 
1757 	ND("%s", NM_IFPNAME(na->ifp));
1758 
1759 	error = netmap_vp_krings_create(na);
1760 	if (error)
1761 		return error;
1762 
1763 	error = hwna->nm_krings_create(hwna);
1764 	if (error) {
1765 		netmap_vp_krings_delete(na);
1766 		return error;
1767 	}
1768 
1769 	hostna->tx_rings = na->tx_rings + na->num_tx_rings;
1770 	hostna->rx_rings = na->rx_rings + na->num_rx_rings;
1771 
1772 	return 0;
1773 }
1774 
1775 static void
1776 netmap_bwrap_krings_delete(struct netmap_adapter *na)
1777 {
1778 	struct netmap_bwrap_adapter *bna =
1779 		(struct netmap_bwrap_adapter *)na;
1780 	struct netmap_adapter *hwna = bna->hwna;
1781 
1782 	ND("%s", NM_IFPNAME(na->ifp));
1783 
1784 	hwna->nm_krings_delete(hwna);
1785 	netmap_vp_krings_delete(na);
1786 }
1787 
1788 /* notify method for the bridge-->hwna direction */
1789 static int
1790 netmap_bwrap_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1791 {
1792 	struct netmap_bwrap_adapter *bna =
1793 		(struct netmap_bwrap_adapter *)na;
1794 	struct netmap_adapter *hwna = bna->hwna;
1795 	struct netmap_kring *kring, *hw_kring;
1796 	struct netmap_ring *ring;
1797 	u_int lim, k;
1798 	int error = 0;
1799 
1800 	if (tx == NR_TX)
1801 	        return ENXIO;
1802 
1803 	kring = &na->rx_rings[ring_n];
1804 	hw_kring = &hwna->tx_rings[ring_n];
1805 	ring = kring->ring;
1806 
1807 	lim = kring->nkr_num_slots - 1;
1808 	k = nm_kr_rxpos(kring);
1809 
1810 	if (hwna->ifp == NULL || !(hwna->ifp->if_capenable & IFCAP_NETMAP))
1811 		return 0;
1812 	ring->cur = k;
1813 	ND("%s[%d] PRE rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1814 		NM_IFPNAME(na->ifp), ring_n,
1815 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1816 		ring->cur, ring->avail, ring->reserved,
1817 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1818 	if (ring_n == na->num_rx_rings) {
1819 		netmap_txsync_to_host(hwna);
1820 	} else {
1821 		error = hwna->nm_txsync(hwna, ring_n, flags);
1822 	}
1823 	kring->nr_hwcur = ring->cur;
1824 	kring->nr_hwavail = 0;
1825 	kring->nr_hwreserved = lim - ring->avail;
1826 	ND("%s[%d] PST rx(%d, %d, %d, %d) ring(%d, %d, %d) tx(%d, %d)",
1827 		NM_IFPNAME(na->ifp), ring_n,
1828 		kring->nr_hwcur, kring->nr_hwavail, kring->nkr_hwlease, kring->nr_hwreserved,
1829 		ring->cur, ring->avail, ring->reserved,
1830 		hw_kring->nr_hwcur, hw_kring->nr_hwavail);
1831 
1832 	return error;
1833 }
1834 
1835 static int
1836 netmap_bwrap_host_notify(struct netmap_adapter *na, u_int ring_n, enum txrx tx, int flags)
1837 {
1838 	struct netmap_bwrap_adapter *bna = na->na_private;
1839 	struct netmap_adapter *port_na = &bna->up.up;
1840 	if (tx == NR_TX || ring_n != 0)
1841 		return ENXIO;
1842 	return netmap_bwrap_notify(port_na, port_na->num_rx_rings, NR_RX, flags);
1843 }
1844 
1845 /* attach a bridge wrapper to the 'real' device */
1846 static int
1847 netmap_bwrap_attach(struct ifnet *fake, struct ifnet *real)
1848 {
1849 	struct netmap_bwrap_adapter *bna;
1850 	struct netmap_adapter *na;
1851 	struct netmap_adapter *hwna = NA(real);
1852 	struct netmap_adapter *hostna;
1853 	int error;
1854 
1855 
1856 	bna = kmalloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
1857 	if (bna == NULL)
1858 		return ENOMEM;
1859 
1860 	na = &bna->up.up;
1861 	na->ifp = fake;
1862 	/* fill the ring data for the bwrap adapter with rx/tx meanings
1863 	 * swapped. The real cross-linking will be done during register,
1864 	 * when all the krings will have been created.
1865 	 */
1866 	na->num_rx_rings = hwna->num_tx_rings;
1867 	na->num_tx_rings = hwna->num_rx_rings;
1868 	na->num_tx_desc = hwna->num_rx_desc;
1869 	na->num_rx_desc = hwna->num_tx_desc;
1870 	na->nm_dtor = netmap_bwrap_dtor;
1871 	na->nm_register = netmap_bwrap_register;
1872 	// na->nm_txsync = netmap_bwrap_txsync;
1873 	// na->nm_rxsync = netmap_bwrap_rxsync;
1874 	na->nm_config = netmap_bwrap_config;
1875 	na->nm_krings_create = netmap_bwrap_krings_create;
1876 	na->nm_krings_delete = netmap_bwrap_krings_delete;
1877 	na->nm_notify = netmap_bwrap_notify;
1878 	na->nm_mem = hwna->nm_mem;
1879 	na->na_private = na; /* prevent NIOCREGIF */
1880 	bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
1881 
1882 	bna->hwna = hwna;
1883 	netmap_adapter_get(hwna);
1884 	hwna->na_private = bna; /* weak reference */
1885 
1886 	hostna = &bna->host.up;
1887 	hostna->ifp = hwna->ifp;
1888 	hostna->num_tx_rings = 1;
1889 	hostna->num_tx_desc = hwna->num_rx_desc;
1890 	hostna->num_rx_rings = 1;
1891 	hostna->num_rx_desc = hwna->num_tx_desc;
1892 	// hostna->nm_txsync = netmap_bwrap_host_txsync;
1893 	// hostna->nm_rxsync = netmap_bwrap_host_rxsync;
1894 	hostna->nm_notify = netmap_bwrap_host_notify;
1895 	hostna->nm_mem = na->nm_mem;
1896 	hostna->na_private = bna;
1897 
1898 	D("%s<->%s txr %d txd %d rxr %d rxd %d", fake->if_xname, real->if_xname,
1899 		na->num_tx_rings, na->num_tx_desc,
1900 		na->num_rx_rings, na->num_rx_desc);
1901 
1902 	error = netmap_attach_common(na);
1903 	if (error) {
1904 		netmap_adapter_put(hwna);
1905 		kfree(bna, M_DEVBUF);
1906 		return error;
1907 	}
1908 	return 0;
1909 }
1910 
1911 void
1912 netmap_init_bridges(void)
1913 {
1914 	int i;
1915 	bzero(nm_bridges, sizeof(struct nm_bridge) * NM_BRIDGES); /* safety */
1916 	for (i = 0; i < NM_BRIDGES; i++)
1917 		BDG_RWINIT(&nm_bridges[i]);
1918 }
1919 #endif /* WITH_VALE */
1920