xref: /netbsd-src/sys/arch/sparc64/dev/vnet.c (revision 2718af68c3efc72c9769069b5c7f9ed36f6b9def)
1 /*	$NetBSD: vnet.c,v 1.6 2022/02/11 23:49:28 riastradh Exp $	*/
2 /*	$OpenBSD: vnet.c,v 1.62 2020/07/10 13:26:36 patrick Exp $	*/
3 /*
4  * Copyright (c) 2009, 2015 Mark Kettenis
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/kmem.h>
20 #include <sys/param.h>
21 #include <sys/atomic.h>
22 #include <sys/callout.h>
23 #include <sys/device.h>
24 #include <sys/malloc.h>
25 #include <sys/pool.h>
26 #include <sys/mbuf.h>
27 #include <sys/socket.h>
28 #include <sys/sockio.h>
29 #include <sys/systm.h>
30 
31 #include <machine/autoconf.h>
32 #include <machine/hypervisor.h>
33 #include <machine/openfirm.h>
34 
35 #include <net/if.h>
36 #include <net/if_media.h>
37 
38 #include <netinet/in.h>
39 #include <net/if_ether.h>
40 
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44 
45 #include <uvm/uvm_extern.h>
46 
47 #include <sparc64/dev/cbusvar.h>
48 #include <sparc64/dev/ldcvar.h>
49 #include <sparc64/dev/viovar.h>
50 
51 #ifdef VNET_DEBUG
52 #define DPRINTF(x)	printf x
53 #else
54 #define DPRINTF(x)
55 #endif
56 
57 #define VNET_TX_ENTRIES		32
58 #define VNET_RX_ENTRIES		32
59 
60 struct vnet_attr_info {
61 	struct vio_msg_tag	tag;
62 	uint8_t			xfer_mode;
63 	uint8_t			addr_type;
64 	uint16_t		ack_freq;
65 	uint32_t		_reserved1;
66 	uint64_t		addr;
67 	uint64_t		mtu;
68 	uint64_t		_reserved2[3];
69 };
70 
71 /* Address types. */
72 #define VNET_ADDR_ETHERMAC	0x01
73 
74 /* Sub-Type envelopes. */
75 #define VNET_MCAST_INFO		0x0101
76 
77 #define VNET_NUM_MCAST		7
78 
79 struct vnet_mcast_info {
80 	struct vio_msg_tag	tag;
81 	uint8_t			set;
82 	uint8_t			count;
83 	uint8_t			mcast_addr[VNET_NUM_MCAST][ETHER_ADDR_LEN];
84 	uint32_t		_reserved;
85 };
86 
87 struct vnet_desc {
88 	struct vio_dring_hdr	hdr;
89 	uint32_t		nbytes;
90 	uint32_t		ncookies;
91 	struct ldc_cookie	cookie[2];
92 };
93 
94 struct vnet_desc_msg {
95 	struct vio_msg_tag	tag;
96 	uint64_t		seq_no;
97 	uint64_t		desc_handle;
98 	uint32_t		nbytes;
99 	uint32_t		ncookies;
100 	struct ldc_cookie	cookie[1];
101 };
102 
103 struct vnet_dring {
104 	bus_dmamap_t		vd_map;
105 	bus_dma_segment_t	vd_seg;
106 	struct vnet_desc	*vd_desc;
107 	int			vd_nentries;
108 };
109 
110 struct vnet_dring *vnet_dring_alloc(bus_dma_tag_t, int);
111 void	vnet_dring_free(bus_dma_tag_t, struct vnet_dring *);
112 
113 /*
114  * For now, we only support vNet 1.0.
115  */
116 #define VNET_MAJOR	1
117 #define VNET_MINOR	0
118 
119 /*
120  * The vNet protocol wants the IP header to be 64-bit aligned, so
121  * define out own variant of ETHER_ALIGN.
122  */
123 #define VNET_ETHER_ALIGN	6
124 
125 struct vnet_soft_desc {
126 	int		vsd_map_idx;
127 	unsigned char *vsd_buf;
128 };
129 
130 struct vnet_softc {
131 	device_t	sc_dv;
132 	bus_space_tag_t	sc_bustag;
133 	bus_dma_tag_t	sc_dmatag;
134 
135 	uint64_t	sc_tx_ino;
136 	uint64_t	sc_rx_ino;
137 	void		*sc_tx_ih;
138 	void		*sc_rx_ih;
139 
140 	struct ldc_conn	sc_lc;
141 
142 	uint16_t	sc_vio_state;
143 #define VIO_SND_VER_INFO	0x0001
144 #define VIO_ACK_VER_INFO	0x0002
145 #define VIO_RCV_VER_INFO	0x0004
146 #define VIO_SND_ATTR_INFO	0x0008
147 #define VIO_ACK_ATTR_INFO	0x0010
148 #define VIO_RCV_ATTR_INFO	0x0020
149 #define VIO_SND_DRING_REG	0x0040
150 #define VIO_ACK_DRING_REG	0x0080
151 #define VIO_RCV_DRING_REG	0x0100
152 #define VIO_SND_RDX		0x0200
153 #define VIO_ACK_RDX		0x0400
154 #define VIO_RCV_RDX		0x0800
155 
156 	struct callout	sc_handshake_co;
157 
158 	uint8_t		sc_xfer_mode;
159 
160 	uint32_t	sc_local_sid;
161 	uint64_t	sc_dring_ident;
162 	uint64_t	sc_seq_no;
163 
164 	u_int		sc_tx_prod;
165 	u_int		sc_tx_cons;
166 
167 	u_int		sc_peer_state;
168 
169 	struct ldc_map	*sc_lm;
170 	struct vnet_dring *sc_vd;
171 	struct vnet_soft_desc *sc_vsd;
172 #define VNET_NUM_SOFT_DESC	128
173 
174 	size_t		sc_peer_desc_size;
175 	struct ldc_cookie sc_peer_dring_cookie;
176 	int		sc_peer_dring_nentries;
177 
178 	struct pool	sc_pool;
179 
180 	struct ethercom	sc_ethercom;
181 	struct ifmedia	sc_media;
182 	u_int8_t sc_macaddr[ETHER_ADDR_LEN];
183 };
184 
185 int vnet_match (device_t, cfdata_t, void *);
186 void vnet_attach (device_t, device_t, void *);
187 
188 CFATTACH_DECL_NEW(vnet, sizeof(struct vnet_softc),
189     vnet_match, vnet_attach, NULL, NULL);
190 
191 int	vnet_tx_intr(void *);
192 int	vnet_rx_intr(void *);
193 void	vnet_handshake(void *);
194 
195 void	vio_rx_data(struct ldc_conn *, struct ldc_pkt *);
196 void	vnet_rx_vio_ctrl(struct vnet_softc *, struct vio_msg *);
197 void	vnet_rx_vio_ver_info(struct vnet_softc *, struct vio_msg_tag *);
198 void	vnet_rx_vio_attr_info(struct vnet_softc *, struct vio_msg_tag *);
199 void	vnet_rx_vio_dring_reg(struct vnet_softc *, struct vio_msg_tag *);
200 void	vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *);
201 void	vnet_rx_vio_mcast_info(struct vnet_softc *sc, struct vio_msg_tag *);
202 void	vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *);
203 void	vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *);
204 void	vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *);
205 
206 void	vnet_ldc_reset(struct ldc_conn *);
207 void	vnet_ldc_start(struct ldc_conn *);
208 
209 void	vnet_sendmsg(struct vnet_softc *, void *, size_t);
210 void	vnet_send_ver_info(struct vnet_softc *, uint16_t, uint16_t);
211 void	vnet_send_attr_info(struct vnet_softc *);
212 void	vnet_send_dring_reg(struct vnet_softc *);
213 void	vio_send_rdx(struct vnet_softc *);
214 void	vnet_send_dring_data(struct vnet_softc *, uint32_t);
215 
216 void	vnet_start(struct ifnet *);
217 void	vnet_start_desc(struct ifnet *);
218 int		vnet_ioctl(struct ifnet *, u_long, void *);
219 void	vnet_watchdog(struct ifnet *);
220 
221 int		vnet_media_change(struct ifnet *);
222 void	vnet_media_status(struct ifnet *, struct ifmediareq *);
223 
224 void	vnet_link_state(struct vnet_softc *sc);
225 
226 void	vnet_setmulti(struct vnet_softc *, int);
227 
228 int		vnet_init(struct ifnet *);
229 void	vnet_stop(struct ifnet *, int);
230 
231 int vnet_match(device_t parent, cfdata_t match, void *aux)
232 {
233 
234 	struct cbus_attach_args *ca = aux;
235 
236 	if (strcmp(ca->ca_name, "network") == 0)
237 		return (1);
238 
239 	return (0);
240 }
241 
242 void
243 vnet_attach(struct device *parent, struct device *self, void *aux)
244 {
245 	struct vnet_softc *sc = device_private(self);
246 	struct cbus_attach_args *ca = aux;
247 	struct ldc_conn *lc;
248 	struct ifnet *ifp;
249 
250 	sc->sc_dv = self;
251 	sc->sc_bustag = ca->ca_bustag;
252 	sc->sc_dmatag = ca->ca_dmatag;
253 	sc->sc_tx_ino = ca->ca_tx_ino;
254 	sc->sc_rx_ino = ca->ca_rx_ino;
255 
256 	printf(": ivec 0x%" PRIx64 ", 0x%" PRIx64, sc->sc_tx_ino, sc->sc_rx_ino);
257 
258 	/*
259 	 * Un-configure queues before registering interrupt handlers,
260 	 * such that we dont get any stale LDC packets or events.
261 	 */
262 	hv_ldc_tx_qconf(ca->ca_id, 0, 0);
263 	hv_ldc_rx_qconf(ca->ca_id, 0, 0);
264 
265 	sc->sc_tx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_tx_ino,
266 	    IPL_NET, vnet_tx_intr, sc);
267 	sc->sc_rx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_rx_ino,
268 	    IPL_NET, vnet_rx_intr, sc);
269 	if (sc->sc_tx_ih == NULL || sc->sc_rx_ih == NULL) {
270 		printf(", can't establish interrupts\n");
271 		return;
272 	}
273 
274 	lc = &sc->sc_lc;
275 	lc->lc_id = ca->ca_id;
276 	lc->lc_sc = sc;
277 	lc->lc_reset = vnet_ldc_reset;
278 	lc->lc_start = vnet_ldc_start;
279 	lc->lc_rx_data = vio_rx_data;
280 
281 	callout_init(&sc->sc_handshake_co, 0);
282 
283 	sc->sc_peer_state = VIO_DP_STOPPED;
284 
285 	lc->lc_txq = ldc_queue_alloc(VNET_TX_ENTRIES);
286 	if (lc->lc_txq == NULL) {
287 		printf(", can't allocate tx queue\n");
288 		return;
289 	}
290 
291 	lc->lc_rxq = ldc_queue_alloc(VNET_RX_ENTRIES);
292 	if (lc->lc_rxq == NULL) {
293 		printf(", can't allocate rx queue\n");
294 		goto free_txqueue;
295 	}
296 
297 	if (OF_getprop(ca->ca_node, "local-mac-address",
298 				   sc->sc_macaddr, ETHER_ADDR_LEN) > 0) {
299 		printf(", address %s", ether_sprintf(sc->sc_macaddr));
300 	} else {
301 		printf(", cannot retrieve local mac address\n");
302 		return;
303 	}
304 
305 	/*
306 	 * Each interface gets its own pool.
307 	 */
308 	pool_init(&sc->sc_pool, /*size*/2048, /*align*/0, /*align_offset*/0,
309 	    /*flags*/0, /*wchan*/device_xname(sc->sc_dv), /*palloc*/NULL,
310 	    IPL_NET);
311 
312 	ifp = &sc->sc_ethercom.ec_if;
313 	ifp->if_softc = sc;
314 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
315 	ifp->if_init = vnet_init;
316 	ifp->if_ioctl = vnet_ioctl;
317 	ifp->if_start = vnet_start;
318 	ifp->if_stop = vnet_stop;
319 	ifp->if_watchdog = vnet_watchdog;
320 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
321 	IFQ_SET_MAXLEN(&ifp->if_snd, 31); /* XXX */
322 
323 	ifmedia_init(&sc->sc_media, 0, vnet_media_change, vnet_media_status);
324 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
325 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
326 
327 	if_attach(ifp);
328 	ether_ifattach(ifp, sc->sc_macaddr);
329 
330 	printf("\n");
331 	return;
332 free_txqueue:
333 	ldc_queue_free(lc->lc_txq);
334 }
335 
336 int
337 vnet_tx_intr(void *arg)
338 {
339 	struct vnet_softc *sc = arg;
340 	struct ldc_conn *lc = &sc->sc_lc;
341 	uint64_t tx_head, tx_tail, tx_state;
342 
343 	hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
344 	if (tx_state != lc->lc_tx_state) {
345 		switch (tx_state) {
346 		case LDC_CHANNEL_DOWN:
347 			DPRINTF(("%s: Tx link down\n", __func__));
348 			break;
349 		case LDC_CHANNEL_UP:
350 			DPRINTF(("%s: Tx link up\n", __func__));
351 			break;
352 		case LDC_CHANNEL_RESET:
353 			DPRINTF(("%s: Tx link reset\n", __func__));
354 			break;
355 		}
356 		lc->lc_tx_state = tx_state;
357 	}
358 
359 	return (1);
360 }
361 
362 int
363 vnet_rx_intr(void *arg)
364 {
365 	struct vnet_softc *sc = arg;
366 	struct ldc_conn *lc = &sc->sc_lc;
367 	uint64_t rx_head, rx_tail, rx_state;
368 	struct ldc_pkt *lp;
369 	int err;
370 
371 	err = hv_ldc_rx_get_state(lc->lc_id, &rx_head, &rx_tail, &rx_state);
372 	if (err == H_EINVAL) {
373 		printf("hv_ldc_rx_get_state failed\n");
374 		return (0);
375 	}
376 	if (err != H_EOK) {
377 		printf("hv_ldc_rx_get_state %d\n", err);
378 		return (0);
379 	}
380 
381 	if (rx_state != lc->lc_rx_state) {
382 		switch (rx_state) {
383 		case LDC_CHANNEL_DOWN:
384 			lc->lc_tx_seqid = 0;
385 			lc->lc_state = 0;
386 			lc->lc_reset(lc);
387 			if (rx_head == rx_tail)
388 				break;
389 			/* Discard and ack pending I/O. */
390 			DPRINTF(("setting rx qhead to %" PRId64 "\n", rx_tail));
391 			err = hv_ldc_rx_set_qhead(lc->lc_id, rx_tail);
392 			if (err == H_EOK)
393 				break;
394 			printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
395 			break;
396 		case LDC_CHANNEL_UP:
397 			callout_reset(&sc->sc_handshake_co, hz / 2, vnet_handshake, sc);
398 			break;
399 		case LDC_CHANNEL_RESET:
400 			DPRINTF(("%s: Rx link reset\n", __func__));
401 			lc->lc_tx_seqid = 0;
402 			lc->lc_state = 0;
403 			lc->lc_reset(lc);
404 			callout_reset(&sc->sc_handshake_co, hz / 2, vnet_handshake, sc);
405 			if (rx_head == rx_tail) {
406 				break;
407 			}
408 			/* Discard and ack pending I/O. */
409 			DPRINTF(("setting rx qhead to %" PRId64 "\n", rx_tail));
410 			err = hv_ldc_rx_set_qhead(lc->lc_id, rx_tail);
411 			if (err == H_EOK)
412 				break;
413 			printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
414 			break;
415 		default:
416 			DPRINTF(("%s: unhandled rx_state %" PRIx64 "\n", __func__, rx_state));
417 			break;
418 		}
419 		lc->lc_rx_state = rx_state;
420 		return (1);
421 	} else {
422 	}
423 
424 	if (rx_head == rx_tail)
425 	{
426 		DPRINTF(("%s: head eq tail\n", __func__));
427 		return (0);
428 	}
429 	lp = (struct ldc_pkt *)(uintptr_t)(lc->lc_rxq->lq_va + rx_head);
430 	switch (lp->type) {
431 	case LDC_CTRL:
432 		DPRINTF(("%s: LDC_CTRL\n", __func__));
433 		ldc_rx_ctrl(lc, lp);
434 		break;
435 
436 	case LDC_DATA:
437 		DPRINTF(("%s: LDC_DATA\n", __func__));
438 		ldc_rx_data(lc, lp);
439 		break;
440 
441 	default:
442 		DPRINTF(("%s: unhandled type %0x02/%0x02/%0x02\n",
443 				 __func__, lp->type, lp->stype, lp->ctrl));
444 		Debugger();
445 		ldc_reset(lc);
446 		break;
447 	}
448 
449 	if (lc->lc_state == 0)
450 		return (1);
451 
452 	rx_head += sizeof(*lp);
453 	rx_head &= ((lc->lc_rxq->lq_nentries * sizeof(*lp)) - 1);
454 	err = hv_ldc_rx_set_qhead(lc->lc_id, rx_head);
455 	if (err != H_EOK)
456 		printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
457 	return (1);
458 }
459 
460 void
461 vnet_handshake(void *arg)
462 {
463 	struct vnet_softc *sc = arg;
464 
465 	ldc_send_vers(&sc->sc_lc);
466 }
467 
468 void
469 vio_rx_data(struct ldc_conn *lc, struct ldc_pkt *lp)
470 {
471 	struct vio_msg *vm = (struct vio_msg *)lp;
472 
473 	switch (vm->type) {
474 	case VIO_TYPE_CTRL:
475 		if ((lp->env & LDC_FRAG_START) == 0 &&
476 		    (lp->env & LDC_FRAG_STOP) == 0) {
477 			DPRINTF(("%s: FRAG_START==0 and FRAG_STOP==0\n", __func__));
478 			return;
479 		}
480 		vnet_rx_vio_ctrl(lc->lc_sc, vm);
481 		break;
482 
483 	case VIO_TYPE_DATA:
484 		if((lp->env & LDC_FRAG_START) == 0) {
485 			DPRINTF(("%s: FRAG_START==0\n", __func__));
486 			return;
487 		}
488 		vnet_rx_vio_data(lc->lc_sc, vm);
489 		break;
490 
491 	default:
492 		DPRINTF(("Unhandled packet type 0x%02x\n", vm->type));
493 		ldc_reset(lc);
494 		break;
495 	}
496 }
497 
498 void
499 vnet_rx_vio_ctrl(struct vnet_softc *sc, struct vio_msg *vm)
500 {
501 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
502 
503 	switch (tag->stype_env) {
504 	case VIO_VER_INFO:
505 		vnet_rx_vio_ver_info(sc, tag);
506 		break;
507 	case VIO_ATTR_INFO:
508 		vnet_rx_vio_attr_info(sc, tag);
509 		break;
510 	case VIO_DRING_REG:
511 		vnet_rx_vio_dring_reg(sc, tag);
512 		break;
513 	case VIO_RDX:
514 		vnet_rx_vio_rdx(sc, tag);
515 		break;
516 	case VNET_MCAST_INFO:
517 		vnet_rx_vio_mcast_info(sc, tag);
518 		break;
519 	default:
520 		printf("%s: CTRL/0x%02x/0x%04x FIXME\n",
521 				 __func__, tag->stype, tag->stype_env);
522 		break;
523 	}
524 }
525 
526 void
527 vnet_rx_vio_ver_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
528 {
529 	struct vio_ver_info *vi = (struct vio_ver_info *)tag;
530 
531 	switch (vi->tag.stype) {
532 	case VIO_SUBTYPE_INFO:
533 		DPRINTF(("CTRL/INFO/VER_INFO\n"));
534 
535 		/* Make sure we're talking to a virtual network device. */
536 		if (vi->dev_class != VDEV_NETWORK &&
537 		    vi->dev_class != VDEV_NETWORK_SWITCH) {
538 			DPRINTF(("Class is not network or network switch\n"));
539 			/* Huh, we're not talking to a network device? */
540 			printf("Not a network device\n");
541 			vi->tag.stype = VIO_SUBTYPE_NACK;
542 			vnet_sendmsg(sc, vi, sizeof(*vi));
543 			return;
544 		}
545 
546 		if (vi->major != VNET_MAJOR) {
547 			DPRINTF(("Major mismatch %" PRId8 " vs %" PRId8 "\n",
548 					 vi->major, VNET_MAJOR));
549 			vi->tag.stype = VIO_SUBTYPE_NACK;
550 			vi->major = VNET_MAJOR;
551 			vi->minor = VNET_MINOR;
552 			vnet_sendmsg(sc, vi, sizeof(*vi));
553 			return;
554 		}
555 
556 		vi->tag.stype = VIO_SUBTYPE_ACK;
557 		vi->tag.sid = sc->sc_local_sid;
558 		vi->minor = VNET_MINOR;
559 		vnet_sendmsg(sc, vi, sizeof(*vi));
560 		sc->sc_vio_state |= VIO_RCV_VER_INFO;
561 		break;
562 
563 	case VIO_SUBTYPE_ACK:
564 		DPRINTF(("CTRL/ACK/VER_INFO\n"));
565 		if (!ISSET(sc->sc_vio_state, VIO_SND_VER_INFO)) {
566 			ldc_reset(&sc->sc_lc);
567 			break;
568 		}
569 		sc->sc_vio_state |= VIO_ACK_VER_INFO;
570 		break;
571 
572 	default:
573 		DPRINTF(("CTRL/0x%02x/VER_INFO\n", vi->tag.stype));
574 		break;
575 	}
576 
577 	if (ISSET(sc->sc_vio_state, VIO_RCV_VER_INFO) &&
578 	    ISSET(sc->sc_vio_state, VIO_ACK_VER_INFO))
579 		vnet_send_attr_info(sc);
580 }
581 
582 void
583 vnet_rx_vio_attr_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
584 {
585 	struct vnet_attr_info *ai = (struct vnet_attr_info *)tag;
586 
587 	switch (ai->tag.stype) {
588 	case VIO_SUBTYPE_INFO:
589 		DPRINTF(("CTRL/INFO/ATTR_INFO\n"));
590 		sc->sc_xfer_mode = ai->xfer_mode;
591 		ai->tag.stype = VIO_SUBTYPE_ACK;
592 		ai->tag.sid = sc->sc_local_sid;
593 		vnet_sendmsg(sc, ai, sizeof(*ai));
594 		sc->sc_vio_state |= VIO_RCV_ATTR_INFO;
595 		break;
596 
597 	case VIO_SUBTYPE_ACK:
598 		DPRINTF(("CTRL/ACK/ATTR_INFO\n"));
599 		if (!ISSET(sc->sc_vio_state, VIO_SND_ATTR_INFO)) {
600 			ldc_reset(&sc->sc_lc);
601 			break;
602 		}
603 		sc->sc_vio_state |= VIO_ACK_ATTR_INFO;
604 		break;
605 
606 	default:
607 		DPRINTF(("CTRL/0x%02x/ATTR_INFO\n", ai->tag.stype));
608 		break;
609 	}
610 
611 	if (ISSET(sc->sc_vio_state, VIO_RCV_ATTR_INFO) &&
612 	    ISSET(sc->sc_vio_state, VIO_ACK_ATTR_INFO)) {
613 		if (sc->sc_xfer_mode == VIO_DRING_MODE)
614 			vnet_send_dring_reg(sc);
615 		else
616 			vio_send_rdx(sc);
617 	}
618 }
619 
620 void
621 vnet_rx_vio_dring_reg(struct vnet_softc *sc, struct vio_msg_tag *tag)
622 {
623 	struct vio_dring_reg *dr = (struct vio_dring_reg *)tag;
624 
625 	switch (dr->tag.stype) {
626 	case VIO_SUBTYPE_INFO:
627 		DPRINTF(("CTRL/INFO/DRING_REG\n"));
628 		sc->sc_peer_dring_nentries = dr->num_descriptors;
629 		sc->sc_peer_desc_size = dr->descriptor_size;
630 		sc->sc_peer_dring_cookie = dr->cookie[0];
631 
632 		dr->tag.stype = VIO_SUBTYPE_ACK;
633 		dr->tag.sid = sc->sc_local_sid;
634 		vnet_sendmsg(sc, dr, sizeof(*dr));
635 		sc->sc_vio_state |= VIO_RCV_DRING_REG;
636 		break;
637 
638 	case VIO_SUBTYPE_ACK:
639 		DPRINTF(("CTRL/ACK/DRING_REG\n"));
640 		if (!ISSET(sc->sc_vio_state, VIO_SND_DRING_REG)) {
641 			ldc_reset(&sc->sc_lc);
642 			break;
643 		}
644 
645 		sc->sc_dring_ident = dr->dring_ident;
646 		sc->sc_seq_no = 1;
647 
648 		sc->sc_vio_state |= VIO_ACK_DRING_REG;
649 		break;
650 
651 	default:
652 		DPRINTF(("CTRL/0x%02x/DRING_REG\n", dr->tag.stype));
653 		break;
654 	}
655 
656 	if (ISSET(sc->sc_vio_state, VIO_RCV_DRING_REG) &&
657 	    ISSET(sc->sc_vio_state, VIO_ACK_DRING_REG))
658 		vio_send_rdx(sc);
659 }
660 
661 void
662 vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *tag)
663 {
664 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
665 
666 	switch(tag->stype) {
667 	case VIO_SUBTYPE_INFO:
668 		DPRINTF(("CTRL/INFO/RDX\n"));
669 		tag->stype = VIO_SUBTYPE_ACK;
670 		tag->sid = sc->sc_local_sid;
671 		vnet_sendmsg(sc, tag, sizeof(*tag));
672 		sc->sc_vio_state |= VIO_RCV_RDX;
673 		break;
674 
675 	case VIO_SUBTYPE_ACK:
676 		DPRINTF(("CTRL/ACK/RDX\n"));
677 		if (!ISSET(sc->sc_vio_state, VIO_SND_RDX)) {
678 			ldc_reset(&sc->sc_lc);
679 			break;
680 		}
681 		sc->sc_vio_state |= VIO_ACK_RDX;
682 		break;
683 
684 	default:
685 		DPRINTF(("CTRL/0x%02x/RDX (VIO)\n", tag->stype));
686 		break;
687 	}
688 
689 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
690 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
691 		/* Link is up! */
692 		vnet_link_state(sc);
693 
694 		/* Configure multicast now that we can. */
695 		vnet_setmulti(sc, 1);
696 
697 		KERNEL_LOCK(1, curlwp);
698 		ifp->if_flags &= ~IFF_OACTIVE;
699 		vnet_start(ifp);
700 		KERNEL_UNLOCK_ONE(curlwp);
701 	}
702 }
703 
704 void
705 vnet_rx_vio_mcast_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
706 {
707 	switch(tag->stype) {
708 
709 		case VIO_SUBTYPE_INFO:
710 			DPRINTF(("CTRL/INFO/MCAST_INFO\n"));
711 			break;
712 
713 		case VIO_SUBTYPE_ACK:
714 			DPRINTF(("CTRL/ACK/MCAST_INFO\n"));
715 			break;
716 
717 		case VIO_SUBTYPE_NACK:
718 			DPRINTF(("CTRL/NACK/MCAST_INFO\n"));
719 			break;
720 
721 		default:
722 			printf("%s: CTRL/0x%02x/0x%04x\n",
723 				   __func__, tag->stype, tag->stype_env);
724 			break;
725 	}
726 }
727 
728 void
729 vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *vm)
730 {
731 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
732 
733 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
734 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
735 		DPRINTF(("Spurious DATA/0x%02x/0x%04x\n", tag->stype,
736 		    tag->stype_env));
737 		return;
738 	}
739 
740 	switch(tag->stype_env) {
741 	case VIO_DESC_DATA:
742 		vnet_rx_vio_desc_data(sc, tag);
743 		break;
744 
745 	case VIO_DRING_DATA:
746 		vnet_rx_vio_dring_data(sc, tag);
747 		break;
748 
749 	default:
750 		DPRINTF(("DATA/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
751 		break;
752 	}
753 }
754 
755 void
756 vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
757 {
758 
759 	struct vnet_desc_msg *dm = (struct vnet_desc_msg *)tag;
760 	struct ldc_conn *lc = &sc->sc_lc;
761 	struct ldc_map *map = sc->sc_lm;
762 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
763 	struct mbuf *m;
764 	unsigned char *buf;
765 	paddr_t pa;
766 	psize_t nbytes;
767 	u_int cons;
768 	int err;
769 
770 	switch(tag->stype) {
771 	case VIO_SUBTYPE_INFO:
772 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
773 		if (buf == NULL) {
774 			if_statinc(ifp, if_ierrors);
775 			goto skip;
776 		}
777 		nbytes = roundup(dm->nbytes, 8);
778 
779 		if (dm->nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
780 			if_statinc(ifp, if_ierrors);
781 			goto skip;
782 		}
783 
784 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
785 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
786 		    dm->cookie[0].addr, pa, nbytes, &nbytes);
787 		if (err != H_EOK) {
788 			pool_put(&sc->sc_pool, buf);
789 			if_statinc(ifp, if_ierrors);
790 			goto skip;
791 		}
792 
793 		/* Stupid OBP doesn't align properly. */
794 		m = m_devget(buf, dm->nbytes, 0, ifp);
795 		pool_put(&sc->sc_pool, buf);
796 		if (m == NULL) {
797 			if_statinc(ifp, if_ierrors);
798 			goto skip;
799 		}
800 
801 		/* Pass it on. */
802 		if_percpuq_enqueue(ifp->if_percpuq, m);
803 	skip:
804 		dm->tag.stype = VIO_SUBTYPE_ACK;
805 		dm->tag.sid = sc->sc_local_sid;
806 		vnet_sendmsg(sc, dm, sizeof(*dm));
807 		break;
808 
809 	case VIO_SUBTYPE_ACK:
810 		DPRINTF(("DATA/ACK/DESC_DATA\n"));
811 
812 		if (dm->desc_handle != sc->sc_tx_cons) {
813 			printf("out of order\n");
814 			return;
815 		}
816 
817 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
818 
819 		map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
820 		atomic_dec_32(&map->lm_count);
821 
822 		pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
823 		sc->sc_vsd[cons].vsd_buf = NULL;
824 
825 		sc->sc_tx_cons++;
826 		break;
827 
828 	case VIO_SUBTYPE_NACK:
829 		DPRINTF(("DATA/NACK/DESC_DATA\n"));
830 		break;
831 
832 	default:
833 		DPRINTF(("DATA/0x%02x/DESC_DATA\n", tag->stype));
834 		break;
835 	}
836 }
837 
838 void
839 vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
840 {
841 	struct vio_dring_msg *dm = (struct vio_dring_msg *)tag;
842 	struct ldc_conn *lc = &sc->sc_lc;
843 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
844 	struct mbuf *m = NULL;
845 	paddr_t pa;
846 	psize_t nbytes;
847 	int err;
848 
849 	switch(tag->stype) {
850 	case VIO_SUBTYPE_INFO:
851 	{
852 		DPRINTF(("%s: VIO_SUBTYPE_INFO\n", __func__));
853 		struct vnet_desc desc;
854 		uint64_t cookie;
855 		paddr_t desc_pa;
856 		int idx, ack_end_idx = -1;
857 
858 		idx = dm->start_idx;
859 		for (;;) {
860 			cookie = sc->sc_peer_dring_cookie.addr;
861 			cookie += idx * sc->sc_peer_desc_size;
862 			nbytes = sc->sc_peer_desc_size;
863 			pmap_extract(pmap_kernel(), (vaddr_t)&desc, &desc_pa);
864 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN, cookie,
865 			    desc_pa, nbytes, &nbytes);
866 			if (err != H_EOK) {
867 				printf("hv_ldc_copy_in %d\n", err);
868 				break;
869 			}
870 
871 			if (desc.hdr.dstate != VIO_DESC_READY)
872 				break;
873 
874 			if (desc.nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
875 				if_statinc(ifp, if_ierrors);
876 				goto skip;
877 			}
878 
879 			MGETHDR(m, M_DONTWAIT, MT_DATA);
880 			if (m == NULL) {
881 				DPRINTF(("%s: MGETHDR failed\n", __func__));
882 				if_statinc(ifp, if_ierrors);
883 				goto skip;
884 			}
885 			MCLGET(m, M_DONTWAIT);
886 			if ((m->m_flags & M_EXT) == 0)
887 				break;
888 			m->m_len = m->m_pkthdr.len = desc.nbytes;
889 			nbytes = roundup(desc.nbytes + VNET_ETHER_ALIGN, 8);
890 
891 			pmap_extract(pmap_kernel(), (vaddr_t)m->m_data, &pa);
892 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
893 			    desc.cookie[0].addr, pa, nbytes, &nbytes);
894 			if (err != H_EOK) {
895 				m_freem(m);
896 				goto skip;
897 			}
898 			m->m_data += VNET_ETHER_ALIGN;
899 			m_set_rcvif(m, ifp);
900 
901 			if_percpuq_enqueue(ifp->if_percpuq, m);
902 
903 		skip:
904 			desc.hdr.dstate = VIO_DESC_DONE;
905 			nbytes = sc->sc_peer_desc_size;
906 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT, cookie,
907 			    desc_pa, nbytes, &nbytes);
908 			if (err != H_EOK)
909 				printf("hv_ldc_copy_out %d\n", err);
910 
911 			ack_end_idx = idx;
912 			if (++idx == sc->sc_peer_dring_nentries)
913 				idx = 0;
914 		}
915 
916 		if (ack_end_idx == -1) {
917 			dm->tag.stype = VIO_SUBTYPE_NACK;
918 		} else {
919 			dm->tag.stype = VIO_SUBTYPE_ACK;
920 			dm->end_idx = ack_end_idx;
921 		}
922 		dm->tag.sid = sc->sc_local_sid;
923 		dm->proc_state = VIO_DP_STOPPED;
924 		vnet_sendmsg(sc, dm, sizeof(*dm));
925 		break;
926 	}
927 
928 	case VIO_SUBTYPE_ACK:
929 	{
930 		DPRINTF(("%s: VIO_SUBTYPE_ACK\n", __func__));
931 		struct ldc_map *map = sc->sc_lm;
932 		u_int cons, count;
933 
934 		sc->sc_peer_state = dm->proc_state;
935 
936 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
937 		while (sc->sc_vd->vd_desc[cons].hdr.dstate == VIO_DESC_DONE) {
938 			map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
939 			atomic_dec_32(&map->lm_count);
940 
941 			pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
942 			sc->sc_vsd[cons].vsd_buf = NULL;
943 
944 			sc->sc_vd->vd_desc[cons].hdr.dstate = VIO_DESC_FREE;
945 			sc->sc_tx_cons++;
946 			cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
947 		}
948 
949 		count = sc->sc_tx_prod - sc->sc_tx_cons;
950 		if (count > 0 && sc->sc_peer_state != VIO_DP_ACTIVE)
951 			vnet_send_dring_data(sc, cons);
952 
953 		KERNEL_LOCK(1, curlwp);
954 		if (count < (sc->sc_vd->vd_nentries - 1))
955 			ifp->if_flags &= ~IFF_OACTIVE;
956 		if (count == 0)
957 			ifp->if_timer = 0;
958 
959 		vnet_start(ifp);
960 		KERNEL_UNLOCK_ONE(curlwp);
961 		break;
962 	}
963 
964 	case VIO_SUBTYPE_NACK:
965 		DPRINTF(("DATA/NACK/DRING_DATA\n"));
966 		sc->sc_peer_state = VIO_DP_STOPPED;
967 		break;
968 
969 	default:
970 		DPRINTF(("DATA/0x%02x/DRING_DATA\n", tag->stype));
971 		break;
972 	}
973 }
974 
975 void
976 vnet_ldc_reset(struct ldc_conn *lc)
977 {
978 
979 	struct vnet_softc *sc = lc->lc_sc;
980 	int i;
981 
982 	callout_stop(&sc->sc_handshake_co);
983 	sc->sc_tx_prod = sc->sc_tx_cons = 0;
984 	sc->sc_peer_state = VIO_DP_STOPPED;
985 	sc->sc_vio_state = 0;
986 	vnet_link_state(sc);
987 
988 	sc->sc_lm->lm_next = 1;
989 	sc->sc_lm->lm_count = 1;
990 	for (i = 1; i < sc->sc_lm->lm_nentries; i++)
991 		sc->sc_lm->lm_slot[i].entry = 0;
992 
993 	for (i = 0; i < sc->sc_vd->vd_nentries; i++) {
994 		if (sc->sc_vsd[i].vsd_buf) {
995 			pool_put(&sc->sc_pool, sc->sc_vsd[i].vsd_buf);
996 			sc->sc_vsd[i].vsd_buf = NULL;
997 		}
998 		sc->sc_vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
999 	}
1000 }
1001 
1002 void
1003 vnet_ldc_start(struct ldc_conn *lc)
1004 {
1005 	struct vnet_softc *sc = lc->lc_sc;
1006 	callout_stop(&sc->sc_handshake_co);
1007 	vnet_send_ver_info(sc, VNET_MAJOR, VNET_MINOR);
1008 }
1009 
1010 void
1011 vnet_sendmsg(struct vnet_softc *sc, void *msg, size_t len)
1012 {
1013 	struct ldc_conn *lc = &sc->sc_lc;
1014 	int err;
1015 
1016 	err = ldc_send_unreliable(lc, msg, len);
1017 	if (err)
1018 		printf("%s: ldc_send_unreliable: %d\n", __func__, err);
1019 }
1020 
1021 void
1022 vnet_send_ver_info(struct vnet_softc *sc, uint16_t major, uint16_t minor)
1023 {
1024 	struct vio_ver_info vi;
1025 
1026 	bzero(&vi, sizeof(vi));
1027 	vi.tag.type = VIO_TYPE_CTRL;
1028 	vi.tag.stype = VIO_SUBTYPE_INFO;
1029 	vi.tag.stype_env = VIO_VER_INFO;
1030 	vi.tag.sid = sc->sc_local_sid;
1031 	vi.major = major;
1032 	vi.minor = minor;
1033 	vi.dev_class = VDEV_NETWORK;
1034 	vnet_sendmsg(sc, &vi, sizeof(vi));
1035 
1036 	sc->sc_vio_state |= VIO_SND_VER_INFO;
1037 }
1038 
1039 void
1040 vnet_send_attr_info(struct vnet_softc *sc)
1041 {
1042 	struct vnet_attr_info ai;
1043 	int i;
1044 
1045 	bzero(&ai, sizeof(ai));
1046 	ai.tag.type = VIO_TYPE_CTRL;
1047 	ai.tag.stype = VIO_SUBTYPE_INFO;
1048 	ai.tag.stype_env = VIO_ATTR_INFO;
1049 	ai.tag.sid = sc->sc_local_sid;
1050 	ai.xfer_mode = VIO_DRING_MODE;
1051 	ai.addr_type = VNET_ADDR_ETHERMAC;
1052 	ai.ack_freq = 0;
1053 	ai.addr = 0;
1054 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
1055 		ai.addr <<= 8;
1056 		ai.addr |= sc->sc_macaddr[i];
1057 	}
1058 	ai.mtu = ETHER_MAX_LEN - ETHER_CRC_LEN;
1059 	vnet_sendmsg(sc, &ai, sizeof(ai));
1060 
1061 	sc->sc_vio_state |= VIO_SND_ATTR_INFO;
1062 }
1063 
1064 void
1065 vnet_send_dring_reg(struct vnet_softc *sc)
1066 {
1067 	struct vio_dring_reg dr;
1068 
1069 	bzero(&dr, sizeof(dr));
1070 	dr.tag.type = VIO_TYPE_CTRL;
1071 	dr.tag.stype = VIO_SUBTYPE_INFO;
1072 	dr.tag.stype_env = VIO_DRING_REG;
1073 	dr.tag.sid = sc->sc_local_sid;
1074 	dr.dring_ident = 0;
1075 	dr.num_descriptors = sc->sc_vd->vd_nentries;
1076 	dr.descriptor_size = sizeof(struct vnet_desc);
1077 	dr.options = VIO_TX_RING;
1078 	dr.ncookies = 1;
1079 	dr.cookie[0].addr = 0;
1080 	dr.cookie[0].size = PAGE_SIZE;
1081 	vnet_sendmsg(sc, &dr, sizeof(dr));
1082 
1083 	sc->sc_vio_state |= VIO_SND_DRING_REG;
1084 };
1085 
1086 void
1087 vio_send_rdx(struct vnet_softc *sc)
1088 {
1089 	struct vio_msg_tag tag;
1090 
1091 	tag.type = VIO_TYPE_CTRL;
1092 	tag.stype = VIO_SUBTYPE_INFO;
1093 	tag.stype_env = VIO_RDX;
1094 	tag.sid = sc->sc_local_sid;
1095 	vnet_sendmsg(sc, &tag, sizeof(tag));
1096 
1097 	sc->sc_vio_state |= VIO_SND_RDX;
1098 }
1099 
1100 void
1101 vnet_send_dring_data(struct vnet_softc *sc, uint32_t start_idx)
1102 {
1103 	struct vio_dring_msg dm;
1104 	u_int peer_state;
1105 
1106 	peer_state = atomic_swap_uint(&sc->sc_peer_state, VIO_DP_ACTIVE);
1107 	if (peer_state == VIO_DP_ACTIVE) {
1108 		DPRINTF(("%s: peer_state == VIO_DP_ACTIVE\n", __func__));
1109 		return;
1110 	}
1111 
1112 	bzero(&dm, sizeof(dm));
1113 	dm.tag.type = VIO_TYPE_DATA;
1114 	dm.tag.stype = VIO_SUBTYPE_INFO;
1115 	dm.tag.stype_env = VIO_DRING_DATA;
1116 	dm.tag.sid = sc->sc_local_sid;
1117 	dm.seq_no = sc->sc_seq_no++;
1118 	dm.dring_ident = sc->sc_dring_ident;
1119 	dm.start_idx = start_idx;
1120 	dm.end_idx = -1;
1121 	vnet_sendmsg(sc, &dm, sizeof(dm));
1122 }
1123 
1124 void
1125 vnet_start(struct ifnet *ifp)
1126 {
1127 	struct vnet_softc *sc = ifp->if_softc;
1128 	struct ldc_conn *lc = &sc->sc_lc;
1129 	struct ldc_map *map = sc->sc_lm;
1130 	struct mbuf *m;
1131 	paddr_t pa;
1132 	unsigned char *buf;
1133 	uint64_t tx_head, tx_tail, tx_state;
1134 	u_int start, prod, count;
1135 	int err;
1136 	if (!(ifp->if_flags & IFF_RUNNING))
1137 	{
1138 		DPRINTF(("%s: not in RUNNING state\n", __func__));
1139 		return;
1140 	}
1141 	if (ifp->if_flags & IFF_OACTIVE)
1142 	{
1143 		DPRINTF(("%s: already active\n", __func__));
1144 		return;
1145 	}
1146 
1147 	if (IFQ_IS_EMPTY(&ifp->if_snd))
1148 	{
1149 		DPRINTF(("%s: queue is empty\n", __func__));
1150 		return;
1151 	} else {
1152 		DPRINTF(("%s: queue size %d\n", __func__, ifp->if_snd.ifq_len));
1153 	}
1154 
1155 	/*
1156 	 * We cannot transmit packets until a VIO connection has been
1157 	 * established.
1158 	 */
1159 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1160 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1161 	{
1162 		DPRINTF(("%s: vio connection not established yet\n", __func__));
1163 		return;
1164 	}
1165 
1166 	/*
1167 	 * Make sure there is room in the LDC transmit queue to send a
1168 	 * DRING_DATA message.
1169 	 */
1170 	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
1171 	if (err != H_EOK) {
1172 		DPRINTF(("%s: no room in ldc transmit queue\n", __func__));
1173 		return;
1174 	}
1175 	tx_tail += sizeof(struct ldc_pkt);
1176 	tx_tail &= ((lc->lc_txq->lq_nentries * sizeof(struct ldc_pkt)) - 1);
1177 	if (tx_tail == tx_head) {
1178 		ifp->if_flags |= IFF_OACTIVE;
1179 		{
1180 			DPRINTF(("%s: tail equals head\n", __func__));
1181 			return;
1182 		}
1183 	}
1184 
1185 	if (sc->sc_xfer_mode == VIO_DESC_MODE) {
1186 		DPRINTF(("%s: vio_desc_mode\n", __func__));
1187 		vnet_start_desc(ifp);
1188 		return;
1189 	}
1190 
1191 	start = prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1192 	while (sc->sc_vd->vd_desc[prod].hdr.dstate == VIO_DESC_FREE) {
1193 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1194 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1195 		    map->lm_count >= map->lm_nentries) {
1196 			DPRINTF(("%s: count issue\n", __func__));
1197 			ifp->if_flags |= IFF_OACTIVE;
1198 			break;
1199 		}
1200 
1201 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1202 		if (buf == NULL) {
1203 			DPRINTF(("%s: buff is NULL\n", __func__));
1204 			ifp->if_flags |= IFF_OACTIVE;
1205 			break;
1206 		}
1207 		IFQ_DEQUEUE(&ifp->if_snd, m);
1208 		if (m == NULL) {
1209 			pool_put(&sc->sc_pool, buf);
1210 			break;
1211 		}
1212 
1213 		m_copydata(m, 0, m->m_pkthdr.len, buf + VNET_ETHER_ALIGN);
1214 
1215 #if NBPFILTER > 0
1216 		/*
1217 		 * If BPF is listening on this interface, let it see the
1218 		 * packet before we commit it to the wire.
1219 		 */
1220 		if (ifp->if_bpf)
1221 		{
1222 			DPRINTF(("%s: before bpf\n", __func__));
1223 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1224 			DPRINTF(("%s: after bpf\n", __func__));
1225 		}
1226 #endif
1227 
1228 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1229 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1230 		while (map->lm_slot[map->lm_next].entry != 0) {
1231 			map->lm_next++;
1232 			map->lm_next &= (map->lm_nentries - 1);
1233 		}
1234 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1235 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1236 		atomic_inc_32(&map->lm_count);
1237 		sc->sc_vd->vd_desc[prod].nbytes = MAX(m->m_pkthdr.len, 60);
1238 		sc->sc_vd->vd_desc[prod].ncookies = 1;
1239 		sc->sc_vd->vd_desc[prod].cookie[0].addr =
1240 		    map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1241 		sc->sc_vd->vd_desc[prod].cookie[0].size = 2048;
1242 		membar_producer();
1243 		sc->sc_vd->vd_desc[prod].hdr.dstate = VIO_DESC_READY;
1244 
1245 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1246 		sc->sc_vsd[prod].vsd_buf = buf;
1247 
1248 		sc->sc_tx_prod++;
1249 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1250 
1251 		m_freem(m);
1252 	}
1253 
1254 	membar_producer();
1255 
1256 	if (start != prod && sc->sc_peer_state != VIO_DP_ACTIVE) {
1257 		vnet_send_dring_data(sc, start);
1258 		ifp->if_timer = 5;
1259 	}
1260 
1261 }
1262 
1263 void
1264 vnet_start_desc(struct ifnet *ifp)
1265 {
1266 	struct vnet_softc *sc = ifp->if_softc;
1267 	struct ldc_map *map = sc->sc_lm;
1268 	struct vnet_desc_msg dm;
1269 	struct mbuf *m;
1270 	paddr_t pa;
1271 	unsigned char *buf;
1272 	u_int prod, count;
1273 
1274 	for (;;) {
1275 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1276 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1277 		    map->lm_count >= map->lm_nentries) {
1278 			ifp->if_flags |= IFF_OACTIVE;
1279 			return;
1280 		}
1281 
1282 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1283 		if (buf == NULL) {
1284 			ifp->if_flags |= IFF_OACTIVE;
1285 			return;
1286 		}
1287 
1288 		IFQ_DEQUEUE(&ifp->if_snd, m);
1289 
1290 		if (m == NULL) {
1291 			pool_put(&sc->sc_pool, buf);
1292 			return;
1293 		}
1294 
1295 		m_copydata(m, 0, m->m_pkthdr.len, buf);
1296 
1297 #if NBPFILTER > 0
1298 		/*
1299 		 * If BPF is listening on this interface, let it see the
1300 		 * packet before we commit it to the wire.
1301 		 */
1302 		if (ifp->if_bpf)
1303 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1304 #endif
1305 
1306 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1307 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1308 		while (map->lm_slot[map->lm_next].entry != 0) {
1309 			map->lm_next++;
1310 			map->lm_next &= (map->lm_nentries - 1);
1311 		}
1312 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1313 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1314 		atomic_inc_32(&map->lm_count);
1315 
1316 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1317 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1318 		sc->sc_vsd[prod].vsd_buf = buf;
1319 
1320 		bzero(&dm, sizeof(dm));
1321 		dm.tag.type = VIO_TYPE_DATA;
1322 		dm.tag.stype = VIO_SUBTYPE_INFO;
1323 		dm.tag.stype_env = VIO_DESC_DATA;
1324 		dm.tag.sid = sc->sc_local_sid;
1325 		dm.seq_no = sc->sc_seq_no++;
1326 		dm.desc_handle = sc->sc_tx_prod;
1327 		dm.nbytes = MAX(m->m_pkthdr.len, 60);
1328 		dm.ncookies = 1;
1329 		dm.cookie[0].addr =
1330 			map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1331 		dm.cookie[0].size = 2048;
1332 		vnet_sendmsg(sc, &dm, sizeof(dm));
1333 
1334 		sc->sc_tx_prod++;
1335 		sc->sc_tx_prod &= (sc->sc_vd->vd_nentries - 1);
1336 
1337 		m_freem(m);
1338 	}
1339 }
1340 
1341 int
1342 vnet_ioctl(struct ifnet *ifp, u_long cmd, void* data)
1343 {
1344 	struct vnet_softc *sc = ifp->if_softc;
1345 	struct ifreq *ifr = (struct ifreq *)data;
1346 	int s, error = 0;
1347 
1348 	s = splnet();
1349 
1350 	switch (cmd) {
1351 
1352 		case SIOCSIFADDR:
1353 			ifp->if_flags |= IFF_UP;
1354 			/* FALLTHROUGH */
1355 		case SIOCSIFFLAGS:
1356 			if (ifp->if_flags & IFF_UP) {
1357 				if ((ifp->if_flags & IFF_RUNNING) == 0)
1358 					vnet_init(ifp);
1359 			} else {
1360 				if (ifp->if_flags & IFF_RUNNING)
1361 					vnet_stop(ifp, 0);
1362 			}
1363 		break;
1364 
1365 		case SIOCGIFMEDIA:
1366 		case SIOCSIFMEDIA:
1367 			error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1368 			break;
1369 
1370 		case SIOCADDMULTI:
1371 		case SIOCDELMULTI:
1372 			/*
1373 			 * XXX Removing all multicast addresses and adding
1374 			 * most of them back, is somewhat retarded.
1375 			 */
1376 			vnet_setmulti(sc, 0);
1377 			error = ether_ioctl(ifp, cmd, data);
1378 			vnet_setmulti(sc, 1);
1379 			if (error == ENETRESET)
1380 				error = 0;
1381 			break;
1382 
1383 		default:
1384 			error = ether_ioctl(ifp, cmd, data);
1385 	}
1386 
1387 	splx(s);
1388 
1389 	return (error);
1390 }
1391 
1392 void
1393 vnet_watchdog(struct ifnet *ifp)
1394 {
1395 
1396 	struct vnet_softc *sc = ifp->if_softc;
1397 
1398 	printf("%s: watchdog timeout\n", device_xname(sc->sc_dv));
1399 }
1400 
1401 int
1402 vnet_media_change(struct ifnet *ifp)
1403 {
1404 	return (0);
1405 }
1406 
1407 void
1408 vnet_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1409 {
1410 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1411 	imr->ifm_status = IFM_AVALID;
1412 	if (ifp->if_link_state == LINK_STATE_UP &&
1413 	    ifp->if_flags & IFF_UP)
1414 		imr->ifm_status |= IFM_ACTIVE;
1415 }
1416 
1417 void
1418 vnet_link_state(struct vnet_softc *sc)
1419 {
1420 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1421 	int link_state = LINK_STATE_DOWN;
1422 
1423 	KERNEL_LOCK(1, curlwp);
1424 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
1425 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1426 		link_state = LINK_STATE_UP;
1427 	if (ifp->if_link_state != link_state) {
1428 		if_link_state_change(ifp, link_state);
1429 	}
1430 	KERNEL_UNLOCK_ONE(curlwp);
1431 }
1432 
1433 void
1434 vnet_setmulti(struct vnet_softc *sc, int set)
1435 {
1436 	struct ethercom *ec = &sc->sc_ethercom;
1437 	struct ether_multi *enm;
1438 	struct ether_multistep step;
1439 	struct vnet_mcast_info mi;
1440 	int count = 0;
1441 
1442 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1443 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1444 		return;
1445 
1446 	bzero(&mi, sizeof(mi));
1447 	mi.tag.type = VIO_TYPE_CTRL;
1448 	mi.tag.stype = VIO_SUBTYPE_INFO;
1449 	mi.tag.stype_env = VNET_MCAST_INFO;
1450 	mi.tag.sid = sc->sc_local_sid;
1451 	mi.set = set ? 1 : 0;
1452 	KERNEL_LOCK(1, curlwp);
1453 	ETHER_FIRST_MULTI(step, ec, enm);
1454 	while (enm != NULL) {
1455 		/* XXX What about multicast ranges? */
1456 		bcopy(enm->enm_addrlo, mi.mcast_addr[count], ETHER_ADDR_LEN);
1457 		ETHER_NEXT_MULTI(step, enm);
1458 
1459 		count++;
1460 		if (count < VNET_NUM_MCAST)
1461 			continue;
1462 
1463 		mi.count = VNET_NUM_MCAST;
1464 		vnet_sendmsg(sc, &mi, sizeof(mi));
1465 		count = 0;
1466 	}
1467 
1468 	if (count > 0) {
1469 		mi.count = count;
1470 		vnet_sendmsg(sc, &mi, sizeof(mi));
1471 	}
1472 	KERNEL_UNLOCK_ONE(curlwp);
1473 }
1474 
1475 
1476 int
1477 vnet_init(struct ifnet *ifp)
1478 {
1479 	struct vnet_softc *sc = ifp->if_softc;
1480 	struct ldc_conn *lc = &sc->sc_lc;
1481 	int err;
1482 	vaddr_t va;
1483 	paddr_t pa;
1484 	sc->sc_lm = ldc_map_alloc(2048);
1485 	if (sc->sc_lm == NULL)
1486 		return ENOMEM;
1487 
1488 	va = (vaddr_t)sc->sc_lm->lm_slot;
1489 	pa = 0;
1490 	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1491 		panic("pmap_extract failed %lx\n", va);
1492 	err = hv_ldc_set_map_table(lc->lc_id, pa, 2048);
1493 	if (err != H_EOK) {
1494 		printf("hv_ldc_set_map_table %d\n", err);
1495 		return EINVAL;
1496 	}
1497 
1498 	sc->sc_vd = vnet_dring_alloc(sc->sc_dmatag, VNET_NUM_SOFT_DESC);
1499 	if (sc->sc_vd == NULL)
1500 		return ENOMEM;
1501 	sc->sc_vsd = malloc(VNET_NUM_SOFT_DESC * sizeof(*sc->sc_vsd), M_DEVBUF,
1502 	    M_NOWAIT|M_ZERO);
1503 	if (sc->sc_vsd == NULL)
1504 		return ENOMEM;
1505 
1506 	va = (vaddr_t)sc->sc_vd->vd_desc;
1507 	pa = 0;
1508 	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1509 		panic("pmap_extract failed %lx\n", va);
1510 	sc->sc_lm->lm_slot[0].entry = pa;
1511 	sc->sc_lm->lm_slot[0].entry &= LDC_MTE_RA_MASK;
1512 	sc->sc_lm->lm_slot[0].entry |= LDC_MTE_CPR | LDC_MTE_CPW;
1513 	sc->sc_lm->lm_next = 1;
1514 	sc->sc_lm->lm_count = 1;
1515 
1516 	va = lc->lc_txq->lq_va;
1517 	pa = 0;
1518 	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1519 		panic("pmap_extract failed %lx\n", va);
1520 	err = hv_ldc_tx_qconf(lc->lc_id, pa, lc->lc_txq->lq_nentries);
1521 	if (err != H_EOK)
1522 		printf("hv_ldc_tx_qconf %d\n", err);
1523 
1524 	va = (vaddr_t)lc->lc_rxq->lq_va;
1525 	pa = 0;
1526 	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1527 	  panic("pmap_extract failed %lx\n", va);
1528 
1529 	err = hv_ldc_rx_qconf(lc->lc_id, pa, lc->lc_rxq->lq_nentries);
1530 	if (err != H_EOK)
1531 		printf("hv_ldc_rx_qconf %d\n", err);
1532 
1533 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_ENABLED);
1534 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_ENABLED);
1535 
1536 	ldc_send_vers(lc);
1537 
1538 	ifp->if_flags |= IFF_RUNNING;
1539 
1540 	return 0;
1541 }
1542 
1543 void
1544 vnet_stop(struct ifnet *ifp, int disable)
1545 
1546 {
1547 	struct vnet_softc *sc = ifp->if_softc;
1548 	struct ldc_conn *lc = &sc->sc_lc;
1549 
1550 	ifp->if_flags &= ~IFF_RUNNING;
1551 	ifp->if_flags &= ~IFF_OACTIVE;
1552 	ifp->if_timer = 0;
1553 
1554 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_DISABLED);
1555 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_DISABLED);
1556 
1557 #if 0
1558 openbsd XXX
1559 	intr_barrier(sc->sc_tx_ih);
1560 	intr_barrier(sc->sc_rx_ih);
1561 #else
1562 	printf("vnet_stop() intr_barrier() not available\n");
1563 #endif
1564 
1565 	hv_ldc_tx_qconf(lc->lc_id, 0, 0);
1566 	hv_ldc_rx_qconf(lc->lc_id, 0, 0);
1567 	lc->lc_tx_seqid = 0;
1568 	lc->lc_state = 0;
1569 	lc->lc_tx_state = lc->lc_rx_state = LDC_CHANNEL_DOWN;
1570 	vnet_ldc_reset(lc);
1571 
1572 	free(sc->sc_vsd, M_DEVBUF);
1573 
1574 	vnet_dring_free(sc->sc_dmatag, sc->sc_vd);
1575 
1576 	hv_ldc_set_map_table(lc->lc_id, 0, 0);
1577 	ldc_map_free(sc->sc_lm);
1578 }
1579 
1580 struct vnet_dring *
1581 vnet_dring_alloc(bus_dma_tag_t t, int nentries)
1582 {
1583 	struct vnet_dring *vd;
1584 	bus_size_t size;
1585 	vaddr_t va;
1586 	int i;
1587 
1588 	vd = kmem_zalloc(sizeof(struct vnet_dring), KM_SLEEP);
1589 	if (vd == NULL)
1590 		return NULL;
1591 
1592 	size = roundup(nentries * sizeof(struct vnet_desc), PAGE_SIZE);
1593 
1594 	va = (vaddr_t)kmem_zalloc(size, KM_SLEEP);
1595 	vd->vd_desc = (struct vnet_desc *)va;
1596 	vd->vd_nentries = nentries;
1597 	bzero(vd->vd_desc, nentries * sizeof(struct vnet_desc));
1598 	for (i = 0; i < vd->vd_nentries; i++)
1599 		vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
1600 	return (vd);
1601 
1602 	return (NULL);
1603 }
1604 
1605 void
1606 vnet_dring_free(bus_dma_tag_t t, struct vnet_dring *vd)
1607 {
1608 
1609 	bus_size_t size;
1610 
1611 	size = vd->vd_nentries * sizeof(struct vnet_desc);
1612 	size = roundup(size, PAGE_SIZE);
1613 
1614 	kmem_free(vd->vd_desc, size);
1615 	kmem_free(vd, size);
1616 }
1617 
1618