xref: /netbsd-src/sys/arch/sparc64/dev/vnet.c (revision 627f7eb200a4419d89b531d55fccd2ee3ffdcde0)
1 /*	$NetBSD: vnet.c,v 1.5 2021/03/15 18:44:04 palle Exp $	*/
2 /*	$OpenBSD: vnet.c,v 1.62 2020/07/10 13:26:36 patrick Exp $	*/
3 /*
4  * Copyright (c) 2009, 2015 Mark Kettenis
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/kmem.h>
20 #include <sys/param.h>
21 #include <sys/atomic.h>
22 #include <sys/callout.h>
23 #include <sys/device.h>
24 #include <sys/malloc.h>
25 #include <sys/pool.h>
26 #include <sys/mbuf.h>
27 #include <sys/socket.h>
28 #include <sys/sockio.h>
29 #include <sys/systm.h>
30 
31 #include <machine/autoconf.h>
32 #include <machine/hypervisor.h>
33 #include <machine/openfirm.h>
34 
35 #include <net/if.h>
36 #include <net/if_media.h>
37 
38 #include <netinet/in.h>
39 #include <net/if_ether.h>
40 
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44 
45 #include <uvm/uvm_extern.h>
46 
47 #include <sparc64/dev/cbusvar.h>
48 #include <sparc64/dev/ldcvar.h>
49 #include <sparc64/dev/viovar.h>
50 
51 #ifdef VNET_DEBUG
52 #define DPRINTF(x)	printf x
53 #else
54 #define DPRINTF(x)
55 #endif
56 
57 #define VNET_TX_ENTRIES		32
58 #define VNET_RX_ENTRIES		32
59 
60 struct vnet_attr_info {
61 	struct vio_msg_tag	tag;
62 	uint8_t			xfer_mode;
63 	uint8_t			addr_type;
64 	uint16_t		ack_freq;
65 	uint32_t		_reserved1;
66 	uint64_t		addr;
67 	uint64_t		mtu;
68 	uint64_t		_reserved2[3];
69 };
70 
71 /* Address types. */
72 #define VNET_ADDR_ETHERMAC	0x01
73 
74 /* Sub-Type envelopes. */
75 #define VNET_MCAST_INFO		0x0101
76 
77 #define VNET_NUM_MCAST		7
78 
79 struct vnet_mcast_info {
80 	struct vio_msg_tag	tag;
81 	uint8_t			set;
82 	uint8_t			count;
83 	uint8_t			mcast_addr[VNET_NUM_MCAST][ETHER_ADDR_LEN];
84 	uint32_t		_reserved;
85 };
86 
87 struct vnet_desc {
88 	struct vio_dring_hdr	hdr;
89 	uint32_t		nbytes;
90 	uint32_t		ncookies;
91 	struct ldc_cookie	cookie[2];
92 };
93 
94 struct vnet_desc_msg {
95 	struct vio_msg_tag	tag;
96 	uint64_t		seq_no;
97 	uint64_t		desc_handle;
98 	uint32_t		nbytes;
99 	uint32_t		ncookies;
100 	struct ldc_cookie	cookie[1];
101 };
102 
103 struct vnet_dring {
104 	bus_dmamap_t		vd_map;
105 	bus_dma_segment_t	vd_seg;
106 	struct vnet_desc	*vd_desc;
107 	int			vd_nentries;
108 };
109 
110 struct vnet_dring *vnet_dring_alloc(bus_dma_tag_t, int);
111 void	vnet_dring_free(bus_dma_tag_t, struct vnet_dring *);
112 
113 /*
114  * For now, we only support vNet 1.0.
115  */
116 #define VNET_MAJOR	1
117 #define VNET_MINOR	0
118 
119 /*
120  * The vNet protocol wants the IP header to be 64-bit aligned, so
121  * define out own variant of ETHER_ALIGN.
122  */
123 #define VNET_ETHER_ALIGN	6
124 
125 struct vnet_soft_desc {
126 	int		vsd_map_idx;
127 	unsigned char *vsd_buf;
128 };
129 
130 struct vnet_softc {
131 	struct device	sc_dv;
132 	bus_space_tag_t	sc_bustag;
133 	bus_dma_tag_t	sc_dmatag;
134 
135 	uint64_t	sc_tx_ino;
136 	uint64_t	sc_rx_ino;
137 	void		*sc_tx_ih;
138 	void		*sc_rx_ih;
139 
140 	struct ldc_conn	sc_lc;
141 
142 	uint16_t	sc_vio_state;
143 #define VIO_SND_VER_INFO	0x0001
144 #define VIO_ACK_VER_INFO	0x0002
145 #define VIO_RCV_VER_INFO	0x0004
146 #define VIO_SND_ATTR_INFO	0x0008
147 #define VIO_ACK_ATTR_INFO	0x0010
148 #define VIO_RCV_ATTR_INFO	0x0020
149 #define VIO_SND_DRING_REG	0x0040
150 #define VIO_ACK_DRING_REG	0x0080
151 #define VIO_RCV_DRING_REG	0x0100
152 #define VIO_SND_RDX		0x0200
153 #define VIO_ACK_RDX		0x0400
154 #define VIO_RCV_RDX		0x0800
155 
156 	struct callout	sc_handshake_co;
157 
158 	uint8_t		sc_xfer_mode;
159 
160 	uint32_t	sc_local_sid;
161 	uint64_t	sc_dring_ident;
162 	uint64_t	sc_seq_no;
163 
164 	u_int		sc_tx_prod;
165 	u_int		sc_tx_cons;
166 
167 	u_int		sc_peer_state;
168 
169 	struct ldc_map	*sc_lm;
170 	struct vnet_dring *sc_vd;
171 	struct vnet_soft_desc *sc_vsd;
172 #define VNET_NUM_SOFT_DESC	128
173 
174 	size_t		sc_peer_desc_size;
175 	struct ldc_cookie sc_peer_dring_cookie;
176 	int		sc_peer_dring_nentries;
177 
178 	struct pool	sc_pool;
179 
180 	struct ethercom	sc_ethercom;
181 	struct ifmedia	sc_media;
182 	u_int8_t sc_macaddr[ETHER_ADDR_LEN];
183 };
184 
185 int vnet_match (device_t, cfdata_t, void *);
186 void vnet_attach (device_t, device_t, void *);
187 
188 CFATTACH_DECL_NEW(vnet, sizeof(struct vnet_softc),
189     vnet_match, vnet_attach, NULL, NULL);
190 
191 int	vnet_tx_intr(void *);
192 int	vnet_rx_intr(void *);
193 void	vnet_handshake(void *);
194 
195 void	vio_rx_data(struct ldc_conn *, struct ldc_pkt *);
196 void	vnet_rx_vio_ctrl(struct vnet_softc *, struct vio_msg *);
197 void	vnet_rx_vio_ver_info(struct vnet_softc *, struct vio_msg_tag *);
198 void	vnet_rx_vio_attr_info(struct vnet_softc *, struct vio_msg_tag *);
199 void	vnet_rx_vio_dring_reg(struct vnet_softc *, struct vio_msg_tag *);
200 void	vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *);
201 void	vnet_rx_vio_mcast_info(struct vnet_softc *sc, struct vio_msg_tag *);
202 void	vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *);
203 void	vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *);
204 void	vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *);
205 
206 void	vnet_ldc_reset(struct ldc_conn *);
207 void	vnet_ldc_start(struct ldc_conn *);
208 
209 void	vnet_sendmsg(struct vnet_softc *, void *, size_t);
210 void	vnet_send_ver_info(struct vnet_softc *, uint16_t, uint16_t);
211 void	vnet_send_attr_info(struct vnet_softc *);
212 void	vnet_send_dring_reg(struct vnet_softc *);
213 void	vio_send_rdx(struct vnet_softc *);
214 void	vnet_send_dring_data(struct vnet_softc *, uint32_t);
215 
216 void	vnet_start(struct ifnet *);
217 void	vnet_start_desc(struct ifnet *);
218 int		vnet_ioctl(struct ifnet *, u_long, void *);
219 void	vnet_watchdog(struct ifnet *);
220 
221 int		vnet_media_change(struct ifnet *);
222 void	vnet_media_status(struct ifnet *, struct ifmediareq *);
223 
224 void	vnet_link_state(struct vnet_softc *sc);
225 
226 void	vnet_setmulti(struct vnet_softc *, int);
227 
228 int		vnet_init(struct ifnet *);
229 void	vnet_stop(struct ifnet *, int);
230 
231 int vnet_match(device_t parent, cfdata_t match, void *aux)
232 {
233 
234 	struct cbus_attach_args *ca = aux;
235 
236 	if (strcmp(ca->ca_name, "network") == 0)
237 		return (1);
238 
239 	return (0);
240 }
241 
242 void
243 vnet_attach(struct device *parent, struct device *self, void *aux)
244 {
245 	struct vnet_softc *sc = device_private(self);
246 	struct cbus_attach_args *ca = aux;
247 	struct ldc_conn *lc;
248 	struct ifnet *ifp;
249 
250 	sc->sc_bustag = ca->ca_bustag;
251 	sc->sc_dmatag = ca->ca_dmatag;
252 	sc->sc_tx_ino = ca->ca_tx_ino;
253 	sc->sc_rx_ino = ca->ca_rx_ino;
254 
255 	printf(": ivec 0x%" PRIx64 ", 0x%" PRIx64, sc->sc_tx_ino, sc->sc_rx_ino);
256 
257 	/*
258 	 * Un-configure queues before registering interrupt handlers,
259 	 * such that we dont get any stale LDC packets or events.
260 	 */
261 	hv_ldc_tx_qconf(ca->ca_id, 0, 0);
262 	hv_ldc_rx_qconf(ca->ca_id, 0, 0);
263 
264 	sc->sc_tx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_tx_ino,
265 	    IPL_NET, vnet_tx_intr, sc);
266 	sc->sc_rx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_rx_ino,
267 	    IPL_NET, vnet_rx_intr, sc);
268 	if (sc->sc_tx_ih == NULL || sc->sc_rx_ih == NULL) {
269 		printf(", can't establish interrupts\n");
270 		return;
271 	}
272 
273 	lc = &sc->sc_lc;
274 	lc->lc_id = ca->ca_id;
275 	lc->lc_sc = sc;
276 	lc->lc_reset = vnet_ldc_reset;
277 	lc->lc_start = vnet_ldc_start;
278 	lc->lc_rx_data = vio_rx_data;
279 
280 	callout_init(&sc->sc_handshake_co, 0);
281 
282 	sc->sc_peer_state = VIO_DP_STOPPED;
283 
284 	lc->lc_txq = ldc_queue_alloc(VNET_TX_ENTRIES);
285 	if (lc->lc_txq == NULL) {
286 		printf(", can't allocate tx queue\n");
287 		return;
288 	}
289 
290 	lc->lc_rxq = ldc_queue_alloc(VNET_RX_ENTRIES);
291 	if (lc->lc_rxq == NULL) {
292 		printf(", can't allocate rx queue\n");
293 		goto free_txqueue;
294 	}
295 
296 	if (OF_getprop(ca->ca_node, "local-mac-address",
297 				   sc->sc_macaddr, ETHER_ADDR_LEN) > 0) {
298 		printf(", address %s", ether_sprintf(sc->sc_macaddr));
299 	} else {
300 		printf(", cannot retrieve local mac address\n");
301 		return;
302 	}
303 
304 	/*
305 	 * Each interface gets its own pool.
306 	 */
307 	pool_init(&sc->sc_pool, 2048, 0, 0, 0, sc->sc_dv.dv_xname, NULL, IPL_NET);
308 
309 	ifp = &sc->sc_ethercom.ec_if;
310 	ifp->if_softc = sc;
311 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
312 	ifp->if_init = vnet_init;
313 	ifp->if_ioctl = vnet_ioctl;
314 	ifp->if_start = vnet_start;
315 	ifp->if_stop = vnet_stop;
316 	ifp->if_watchdog = vnet_watchdog;
317 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
318 	IFQ_SET_MAXLEN(&ifp->if_snd, 31); /* XXX */
319 
320 	ifmedia_init(&sc->sc_media, 0, vnet_media_change, vnet_media_status);
321 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
322 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
323 
324 	if_attach(ifp);
325 	ether_ifattach(ifp, sc->sc_macaddr);
326 
327 	printf("\n");
328 	return;
329 free_txqueue:
330 	ldc_queue_free(lc->lc_txq);
331 }
332 
333 int
334 vnet_tx_intr(void *arg)
335 {
336 	struct vnet_softc *sc = arg;
337 	struct ldc_conn *lc = &sc->sc_lc;
338 	uint64_t tx_head, tx_tail, tx_state;
339 
340 	hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
341 	if (tx_state != lc->lc_tx_state) {
342 		switch (tx_state) {
343 		case LDC_CHANNEL_DOWN:
344 			DPRINTF(("%s: Tx link down\n", __func__));
345 			break;
346 		case LDC_CHANNEL_UP:
347 			DPRINTF(("%s: Tx link up\n", __func__));
348 			break;
349 		case LDC_CHANNEL_RESET:
350 			DPRINTF(("%s: Tx link reset\n", __func__));
351 			break;
352 		}
353 		lc->lc_tx_state = tx_state;
354 	}
355 
356 	return (1);
357 }
358 
359 int
360 vnet_rx_intr(void *arg)
361 {
362 	struct vnet_softc *sc = arg;
363 	struct ldc_conn *lc = &sc->sc_lc;
364 	uint64_t rx_head, rx_tail, rx_state;
365 	struct ldc_pkt *lp;
366 	int err;
367 
368 	err = hv_ldc_rx_get_state(lc->lc_id, &rx_head, &rx_tail, &rx_state);
369 	if (err == H_EINVAL) {
370 		printf("hv_ldc_rx_get_state failed\n");
371 		return (0);
372 	}
373 	if (err != H_EOK) {
374 		printf("hv_ldc_rx_get_state %d\n", err);
375 		return (0);
376 	}
377 
378 	if (rx_state != lc->lc_rx_state) {
379 		switch (rx_state) {
380 		case LDC_CHANNEL_DOWN:
381 			lc->lc_tx_seqid = 0;
382 			lc->lc_state = 0;
383 			lc->lc_reset(lc);
384 			if (rx_head == rx_tail)
385 				break;
386 			/* Discard and ack pending I/O. */
387 			DPRINTF(("setting rx qhead to %" PRId64 "\n", rx_tail));
388 			err = hv_ldc_rx_set_qhead(lc->lc_id, rx_tail);
389 			if (err == H_EOK)
390 				break;
391 			printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
392 			break;
393 		case LDC_CHANNEL_UP:
394 			callout_reset(&sc->sc_handshake_co, hz / 2, vnet_handshake, sc);
395 			break;
396 		case LDC_CHANNEL_RESET:
397 			DPRINTF(("%s: Rx link reset\n", __func__));
398 			lc->lc_tx_seqid = 0;
399 			lc->lc_state = 0;
400 			lc->lc_reset(lc);
401 			callout_reset(&sc->sc_handshake_co, hz / 2, vnet_handshake, sc);
402 			if (rx_head == rx_tail) {
403 				break;
404 			}
405 			/* Discard and ack pending I/O. */
406 			DPRINTF(("setting rx qhead to %" PRId64 "\n", rx_tail));
407 			err = hv_ldc_rx_set_qhead(lc->lc_id, rx_tail);
408 			if (err == H_EOK)
409 				break;
410 			printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
411 			break;
412 		default:
413 			DPRINTF(("%s: unhandled rx_state %" PRIx64 "\n", __func__, rx_state));
414 			break;
415 		}
416 		lc->lc_rx_state = rx_state;
417 		return (1);
418 	} else {
419 	}
420 
421 	if (rx_head == rx_tail)
422 	{
423 		DPRINTF(("%s: head eq tail\n", __func__));
424 		return (0);
425 	}
426 	lp = (struct ldc_pkt *)(uintptr_t)(lc->lc_rxq->lq_va + rx_head);
427 	switch (lp->type) {
428 	case LDC_CTRL:
429 		DPRINTF(("%s: LDC_CTRL\n", __func__));
430 		ldc_rx_ctrl(lc, lp);
431 		break;
432 
433 	case LDC_DATA:
434 		DPRINTF(("%s: LDC_DATA\n", __func__));
435 		ldc_rx_data(lc, lp);
436 		break;
437 
438 	default:
439 		DPRINTF(("%s: unhandled type %0x02/%0x02/%0x02\n",
440 				 __func__, lp->type, lp->stype, lp->ctrl));
441 		Debugger();
442 		ldc_reset(lc);
443 		break;
444 	}
445 
446 	if (lc->lc_state == 0)
447 		return (1);
448 
449 	rx_head += sizeof(*lp);
450 	rx_head &= ((lc->lc_rxq->lq_nentries * sizeof(*lp)) - 1);
451 	err = hv_ldc_rx_set_qhead(lc->lc_id, rx_head);
452 	if (err != H_EOK)
453 		printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
454 	return (1);
455 }
456 
457 void
458 vnet_handshake(void *arg)
459 {
460 	struct vnet_softc *sc = arg;
461 
462 	ldc_send_vers(&sc->sc_lc);
463 }
464 
465 void
466 vio_rx_data(struct ldc_conn *lc, struct ldc_pkt *lp)
467 {
468 	struct vio_msg *vm = (struct vio_msg *)lp;
469 
470 	switch (vm->type) {
471 	case VIO_TYPE_CTRL:
472 		if ((lp->env & LDC_FRAG_START) == 0 &&
473 		    (lp->env & LDC_FRAG_STOP) == 0) {
474 			DPRINTF(("%s: FRAG_START==0 and FRAG_STOP==0\n", __func__));
475 			return;
476 		}
477 		vnet_rx_vio_ctrl(lc->lc_sc, vm);
478 		break;
479 
480 	case VIO_TYPE_DATA:
481 		if((lp->env & LDC_FRAG_START) == 0) {
482 			DPRINTF(("%s: FRAG_START==0\n", __func__));
483 			return;
484 		}
485 		vnet_rx_vio_data(lc->lc_sc, vm);
486 		break;
487 
488 	default:
489 		DPRINTF(("Unhandled packet type 0x%02x\n", vm->type));
490 		ldc_reset(lc);
491 		break;
492 	}
493 }
494 
495 void
496 vnet_rx_vio_ctrl(struct vnet_softc *sc, struct vio_msg *vm)
497 {
498 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
499 
500 	switch (tag->stype_env) {
501 	case VIO_VER_INFO:
502 		vnet_rx_vio_ver_info(sc, tag);
503 		break;
504 	case VIO_ATTR_INFO:
505 		vnet_rx_vio_attr_info(sc, tag);
506 		break;
507 	case VIO_DRING_REG:
508 		vnet_rx_vio_dring_reg(sc, tag);
509 		break;
510 	case VIO_RDX:
511 		vnet_rx_vio_rdx(sc, tag);
512 		break;
513 	case VNET_MCAST_INFO:
514 		vnet_rx_vio_mcast_info(sc, tag);
515 		break;
516 	default:
517 		printf("%s: CTRL/0x%02x/0x%04x FIXME\n",
518 				 __func__, tag->stype, tag->stype_env);
519 		break;
520 	}
521 }
522 
523 void
524 vnet_rx_vio_ver_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
525 {
526 	struct vio_ver_info *vi = (struct vio_ver_info *)tag;
527 
528 	switch (vi->tag.stype) {
529 	case VIO_SUBTYPE_INFO:
530 		DPRINTF(("CTRL/INFO/VER_INFO\n"));
531 
532 		/* Make sure we're talking to a virtual network device. */
533 		if (vi->dev_class != VDEV_NETWORK &&
534 		    vi->dev_class != VDEV_NETWORK_SWITCH) {
535 			DPRINTF(("Class is not network or network switch\n"));
536 			/* Huh, we're not talking to a network device? */
537 			printf("Not a network device\n");
538 			vi->tag.stype = VIO_SUBTYPE_NACK;
539 			vnet_sendmsg(sc, vi, sizeof(*vi));
540 			return;
541 		}
542 
543 		if (vi->major != VNET_MAJOR) {
544 			DPRINTF(("Major mismatch %" PRId8 " vs %" PRId8 "\n",
545 					 vi->major, VNET_MAJOR));
546 			vi->tag.stype = VIO_SUBTYPE_NACK;
547 			vi->major = VNET_MAJOR;
548 			vi->minor = VNET_MINOR;
549 			vnet_sendmsg(sc, vi, sizeof(*vi));
550 			return;
551 		}
552 
553 		vi->tag.stype = VIO_SUBTYPE_ACK;
554 		vi->tag.sid = sc->sc_local_sid;
555 		vi->minor = VNET_MINOR;
556 		vnet_sendmsg(sc, vi, sizeof(*vi));
557 		sc->sc_vio_state |= VIO_RCV_VER_INFO;
558 		break;
559 
560 	case VIO_SUBTYPE_ACK:
561 		DPRINTF(("CTRL/ACK/VER_INFO\n"));
562 		if (!ISSET(sc->sc_vio_state, VIO_SND_VER_INFO)) {
563 			ldc_reset(&sc->sc_lc);
564 			break;
565 		}
566 		sc->sc_vio_state |= VIO_ACK_VER_INFO;
567 		break;
568 
569 	default:
570 		DPRINTF(("CTRL/0x%02x/VER_INFO\n", vi->tag.stype));
571 		break;
572 	}
573 
574 	if (ISSET(sc->sc_vio_state, VIO_RCV_VER_INFO) &&
575 	    ISSET(sc->sc_vio_state, VIO_ACK_VER_INFO))
576 		vnet_send_attr_info(sc);
577 }
578 
579 void
580 vnet_rx_vio_attr_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
581 {
582 	struct vnet_attr_info *ai = (struct vnet_attr_info *)tag;
583 
584 	switch (ai->tag.stype) {
585 	case VIO_SUBTYPE_INFO:
586 		DPRINTF(("CTRL/INFO/ATTR_INFO\n"));
587 		sc->sc_xfer_mode = ai->xfer_mode;
588 		ai->tag.stype = VIO_SUBTYPE_ACK;
589 		ai->tag.sid = sc->sc_local_sid;
590 		vnet_sendmsg(sc, ai, sizeof(*ai));
591 		sc->sc_vio_state |= VIO_RCV_ATTR_INFO;
592 		break;
593 
594 	case VIO_SUBTYPE_ACK:
595 		DPRINTF(("CTRL/ACK/ATTR_INFO\n"));
596 		if (!ISSET(sc->sc_vio_state, VIO_SND_ATTR_INFO)) {
597 			ldc_reset(&sc->sc_lc);
598 			break;
599 		}
600 		sc->sc_vio_state |= VIO_ACK_ATTR_INFO;
601 		break;
602 
603 	default:
604 		DPRINTF(("CTRL/0x%02x/ATTR_INFO\n", ai->tag.stype));
605 		break;
606 	}
607 
608 	if (ISSET(sc->sc_vio_state, VIO_RCV_ATTR_INFO) &&
609 	    ISSET(sc->sc_vio_state, VIO_ACK_ATTR_INFO)) {
610 		if (sc->sc_xfer_mode == VIO_DRING_MODE)
611 			vnet_send_dring_reg(sc);
612 		else
613 			vio_send_rdx(sc);
614 	}
615 }
616 
617 void
618 vnet_rx_vio_dring_reg(struct vnet_softc *sc, struct vio_msg_tag *tag)
619 {
620 	struct vio_dring_reg *dr = (struct vio_dring_reg *)tag;
621 
622 	switch (dr->tag.stype) {
623 	case VIO_SUBTYPE_INFO:
624 		DPRINTF(("CTRL/INFO/DRING_REG\n"));
625 		sc->sc_peer_dring_nentries = dr->num_descriptors;
626 		sc->sc_peer_desc_size = dr->descriptor_size;
627 		sc->sc_peer_dring_cookie = dr->cookie[0];
628 
629 		dr->tag.stype = VIO_SUBTYPE_ACK;
630 		dr->tag.sid = sc->sc_local_sid;
631 		vnet_sendmsg(sc, dr, sizeof(*dr));
632 		sc->sc_vio_state |= VIO_RCV_DRING_REG;
633 		break;
634 
635 	case VIO_SUBTYPE_ACK:
636 		DPRINTF(("CTRL/ACK/DRING_REG\n"));
637 		if (!ISSET(sc->sc_vio_state, VIO_SND_DRING_REG)) {
638 			ldc_reset(&sc->sc_lc);
639 			break;
640 		}
641 
642 		sc->sc_dring_ident = dr->dring_ident;
643 		sc->sc_seq_no = 1;
644 
645 		sc->sc_vio_state |= VIO_ACK_DRING_REG;
646 		break;
647 
648 	default:
649 		DPRINTF(("CTRL/0x%02x/DRING_REG\n", dr->tag.stype));
650 		break;
651 	}
652 
653 	if (ISSET(sc->sc_vio_state, VIO_RCV_DRING_REG) &&
654 	    ISSET(sc->sc_vio_state, VIO_ACK_DRING_REG))
655 		vio_send_rdx(sc);
656 }
657 
658 void
659 vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *tag)
660 {
661 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
662 
663 	switch(tag->stype) {
664 	case VIO_SUBTYPE_INFO:
665 		DPRINTF(("CTRL/INFO/RDX\n"));
666 		tag->stype = VIO_SUBTYPE_ACK;
667 		tag->sid = sc->sc_local_sid;
668 		vnet_sendmsg(sc, tag, sizeof(*tag));
669 		sc->sc_vio_state |= VIO_RCV_RDX;
670 		break;
671 
672 	case VIO_SUBTYPE_ACK:
673 		DPRINTF(("CTRL/ACK/RDX\n"));
674 		if (!ISSET(sc->sc_vio_state, VIO_SND_RDX)) {
675 			ldc_reset(&sc->sc_lc);
676 			break;
677 		}
678 		sc->sc_vio_state |= VIO_ACK_RDX;
679 		break;
680 
681 	default:
682 		DPRINTF(("CTRL/0x%02x/RDX (VIO)\n", tag->stype));
683 		break;
684 	}
685 
686 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
687 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
688 		/* Link is up! */
689 		vnet_link_state(sc);
690 
691 		/* Configure multicast now that we can. */
692 		vnet_setmulti(sc, 1);
693 
694 		KERNEL_LOCK(1, curlwp);
695 		ifp->if_flags &= ~IFF_OACTIVE;
696 		vnet_start(ifp);
697 		KERNEL_UNLOCK_ONE(curlwp);
698 	}
699 }
700 
701 void
702 vnet_rx_vio_mcast_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
703 {
704 	switch(tag->stype) {
705 
706 		case VIO_SUBTYPE_INFO:
707 			DPRINTF(("CTRL/INFO/MCAST_INFO\n"));
708 			break;
709 
710 		case VIO_SUBTYPE_ACK:
711 			DPRINTF(("CTRL/ACK/MCAST_INFO\n"));
712 			break;
713 
714 		case VIO_SUBTYPE_NACK:
715 			DPRINTF(("CTRL/NACK/MCAST_INFO\n"));
716 			break;
717 
718 		default:
719 			printf("%s: CTRL/0x%02x/0x%04x\n",
720 				   __func__, tag->stype, tag->stype_env);
721 			break;
722 	}
723 }
724 
725 void
726 vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *vm)
727 {
728 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
729 
730 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
731 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
732 		DPRINTF(("Spurious DATA/0x%02x/0x%04x\n", tag->stype,
733 		    tag->stype_env));
734 		return;
735 	}
736 
737 	switch(tag->stype_env) {
738 	case VIO_DESC_DATA:
739 		vnet_rx_vio_desc_data(sc, tag);
740 		break;
741 
742 	case VIO_DRING_DATA:
743 		vnet_rx_vio_dring_data(sc, tag);
744 		break;
745 
746 	default:
747 		DPRINTF(("DATA/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
748 		break;
749 	}
750 }
751 
752 void
753 vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
754 {
755 
756 	struct vnet_desc_msg *dm = (struct vnet_desc_msg *)tag;
757 	struct ldc_conn *lc = &sc->sc_lc;
758 	struct ldc_map *map = sc->sc_lm;
759 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
760 	struct mbuf *m;
761 	unsigned char *buf;
762 	paddr_t pa;
763 	psize_t nbytes;
764 	u_int cons;
765 	int err;
766 
767 	switch(tag->stype) {
768 	case VIO_SUBTYPE_INFO:
769 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
770 		if (buf == NULL) {
771 			if_statinc(ifp, if_ierrors);
772 			goto skip;
773 		}
774 		nbytes = roundup(dm->nbytes, 8);
775 
776 		if (dm->nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
777 			if_statinc(ifp, if_ierrors);
778 			goto skip;
779 		}
780 
781 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
782 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
783 		    dm->cookie[0].addr, pa, nbytes, &nbytes);
784 		if (err != H_EOK) {
785 			pool_put(&sc->sc_pool, buf);
786 			if_statinc(ifp, if_ierrors);
787 			goto skip;
788 		}
789 
790 		/* Stupid OBP doesn't align properly. */
791 		m = m_devget(buf, dm->nbytes, 0, ifp);
792 		pool_put(&sc->sc_pool, buf);
793 		if (m == NULL) {
794 			if_statinc(ifp, if_ierrors);
795 			goto skip;
796 		}
797 
798 		/* Pass it on. */
799 		if_percpuq_enqueue(ifp->if_percpuq, m);
800 	skip:
801 		dm->tag.stype = VIO_SUBTYPE_ACK;
802 		dm->tag.sid = sc->sc_local_sid;
803 		vnet_sendmsg(sc, dm, sizeof(*dm));
804 		break;
805 
806 	case VIO_SUBTYPE_ACK:
807 		DPRINTF(("DATA/ACK/DESC_DATA\n"));
808 
809 		if (dm->desc_handle != sc->sc_tx_cons) {
810 			printf("out of order\n");
811 			return;
812 		}
813 
814 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
815 
816 		map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
817 		atomic_dec_32(&map->lm_count);
818 
819 		pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
820 		sc->sc_vsd[cons].vsd_buf = NULL;
821 
822 		sc->sc_tx_cons++;
823 		break;
824 
825 	case VIO_SUBTYPE_NACK:
826 		DPRINTF(("DATA/NACK/DESC_DATA\n"));
827 		break;
828 
829 	default:
830 		DPRINTF(("DATA/0x%02x/DESC_DATA\n", tag->stype));
831 		break;
832 	}
833 }
834 
835 void
836 vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
837 {
838 	struct vio_dring_msg *dm = (struct vio_dring_msg *)tag;
839 	struct ldc_conn *lc = &sc->sc_lc;
840 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
841 	struct mbuf *m = NULL;
842 	paddr_t pa;
843 	psize_t nbytes;
844 	int err;
845 
846 	switch(tag->stype) {
847 	case VIO_SUBTYPE_INFO:
848 	{
849 		DPRINTF(("%s: VIO_SUBTYPE_INFO\n", __func__));
850 		struct vnet_desc desc;
851 		uint64_t cookie;
852 		paddr_t desc_pa;
853 		int idx, ack_end_idx = -1;
854 
855 		idx = dm->start_idx;
856 		for (;;) {
857 			cookie = sc->sc_peer_dring_cookie.addr;
858 			cookie += idx * sc->sc_peer_desc_size;
859 			nbytes = sc->sc_peer_desc_size;
860 			pmap_extract(pmap_kernel(), (vaddr_t)&desc, &desc_pa);
861 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN, cookie,
862 			    desc_pa, nbytes, &nbytes);
863 			if (err != H_EOK) {
864 				printf("hv_ldc_copy_in %d\n", err);
865 				break;
866 			}
867 
868 			if (desc.hdr.dstate != VIO_DESC_READY)
869 				break;
870 
871 			if (desc.nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
872 				if_statinc(ifp, if_ierrors);
873 				goto skip;
874 			}
875 
876 			MGETHDR(m, M_DONTWAIT, MT_DATA);
877 			if (m == NULL) {
878 				DPRINTF(("%s: MGETHDR failed\n", __func__));
879 				if_statinc(ifp, if_ierrors);
880 				goto skip;
881 			}
882 			MCLGET(m, M_DONTWAIT);
883 			if ((m->m_flags & M_EXT) == 0)
884 				break;
885 			m->m_len = m->m_pkthdr.len = desc.nbytes;
886 			nbytes = roundup(desc.nbytes + VNET_ETHER_ALIGN, 8);
887 
888 			pmap_extract(pmap_kernel(), (vaddr_t)m->m_data, &pa);
889 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
890 			    desc.cookie[0].addr, pa, nbytes, &nbytes);
891 			if (err != H_EOK) {
892 				m_freem(m);
893 				goto skip;
894 			}
895 			m->m_data += VNET_ETHER_ALIGN;
896 			m_set_rcvif(m, ifp);
897 
898 			if_percpuq_enqueue(ifp->if_percpuq, m);
899 
900 		skip:
901 			desc.hdr.dstate = VIO_DESC_DONE;
902 			nbytes = sc->sc_peer_desc_size;
903 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT, cookie,
904 			    desc_pa, nbytes, &nbytes);
905 			if (err != H_EOK)
906 				printf("hv_ldc_copy_out %d\n", err);
907 
908 			ack_end_idx = idx;
909 			if (++idx == sc->sc_peer_dring_nentries)
910 				idx = 0;
911 		}
912 
913 		if (ack_end_idx == -1) {
914 			dm->tag.stype = VIO_SUBTYPE_NACK;
915 		} else {
916 			dm->tag.stype = VIO_SUBTYPE_ACK;
917 			dm->end_idx = ack_end_idx;
918 		}
919 		dm->tag.sid = sc->sc_local_sid;
920 		dm->proc_state = VIO_DP_STOPPED;
921 		vnet_sendmsg(sc, dm, sizeof(*dm));
922 		break;
923 	}
924 
925 	case VIO_SUBTYPE_ACK:
926 	{
927 		DPRINTF(("%s: VIO_SUBTYPE_ACK\n", __func__));
928 		struct ldc_map *map = sc->sc_lm;
929 		u_int cons, count;
930 
931 		sc->sc_peer_state = dm->proc_state;
932 
933 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
934 		while (sc->sc_vd->vd_desc[cons].hdr.dstate == VIO_DESC_DONE) {
935 			map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
936 			atomic_dec_32(&map->lm_count);
937 
938 			pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
939 			sc->sc_vsd[cons].vsd_buf = NULL;
940 
941 			sc->sc_vd->vd_desc[cons].hdr.dstate = VIO_DESC_FREE;
942 			sc->sc_tx_cons++;
943 			cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
944 		}
945 
946 		count = sc->sc_tx_prod - sc->sc_tx_cons;
947 		if (count > 0 && sc->sc_peer_state != VIO_DP_ACTIVE)
948 			vnet_send_dring_data(sc, cons);
949 
950 		KERNEL_LOCK(1, curlwp);
951 		if (count < (sc->sc_vd->vd_nentries - 1))
952 			ifp->if_flags &= ~IFF_OACTIVE;
953 		if (count == 0)
954 			ifp->if_timer = 0;
955 
956 		vnet_start(ifp);
957 		KERNEL_UNLOCK_ONE(curlwp);
958 		break;
959 	}
960 
961 	case VIO_SUBTYPE_NACK:
962 		DPRINTF(("DATA/NACK/DRING_DATA\n"));
963 		sc->sc_peer_state = VIO_DP_STOPPED;
964 		break;
965 
966 	default:
967 		DPRINTF(("DATA/0x%02x/DRING_DATA\n", tag->stype));
968 		break;
969 	}
970 }
971 
972 void
973 vnet_ldc_reset(struct ldc_conn *lc)
974 {
975 
976 	struct vnet_softc *sc = lc->lc_sc;
977 	int i;
978 
979 	callout_stop(&sc->sc_handshake_co);
980 	sc->sc_tx_prod = sc->sc_tx_cons = 0;
981 	sc->sc_peer_state = VIO_DP_STOPPED;
982 	sc->sc_vio_state = 0;
983 	vnet_link_state(sc);
984 
985 	sc->sc_lm->lm_next = 1;
986 	sc->sc_lm->lm_count = 1;
987 	for (i = 1; i < sc->sc_lm->lm_nentries; i++)
988 		sc->sc_lm->lm_slot[i].entry = 0;
989 
990 	for (i = 0; i < sc->sc_vd->vd_nentries; i++) {
991 		if (sc->sc_vsd[i].vsd_buf) {
992 			pool_put(&sc->sc_pool, sc->sc_vsd[i].vsd_buf);
993 			sc->sc_vsd[i].vsd_buf = NULL;
994 		}
995 		sc->sc_vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
996 	}
997 }
998 
999 void
1000 vnet_ldc_start(struct ldc_conn *lc)
1001 {
1002 	struct vnet_softc *sc = lc->lc_sc;
1003 	callout_stop(&sc->sc_handshake_co);
1004 	vnet_send_ver_info(sc, VNET_MAJOR, VNET_MINOR);
1005 }
1006 
1007 void
1008 vnet_sendmsg(struct vnet_softc *sc, void *msg, size_t len)
1009 {
1010 	struct ldc_conn *lc = &sc->sc_lc;
1011 	int err;
1012 
1013 	err = ldc_send_unreliable(lc, msg, len);
1014 	if (err)
1015 		printf("%s: ldc_send_unreliable: %d\n", __func__, err);
1016 }
1017 
1018 void
1019 vnet_send_ver_info(struct vnet_softc *sc, uint16_t major, uint16_t minor)
1020 {
1021 	struct vio_ver_info vi;
1022 
1023 	bzero(&vi, sizeof(vi));
1024 	vi.tag.type = VIO_TYPE_CTRL;
1025 	vi.tag.stype = VIO_SUBTYPE_INFO;
1026 	vi.tag.stype_env = VIO_VER_INFO;
1027 	vi.tag.sid = sc->sc_local_sid;
1028 	vi.major = major;
1029 	vi.minor = minor;
1030 	vi.dev_class = VDEV_NETWORK;
1031 	vnet_sendmsg(sc, &vi, sizeof(vi));
1032 
1033 	sc->sc_vio_state |= VIO_SND_VER_INFO;
1034 }
1035 
1036 void
1037 vnet_send_attr_info(struct vnet_softc *sc)
1038 {
1039 	struct vnet_attr_info ai;
1040 	int i;
1041 
1042 	bzero(&ai, sizeof(ai));
1043 	ai.tag.type = VIO_TYPE_CTRL;
1044 	ai.tag.stype = VIO_SUBTYPE_INFO;
1045 	ai.tag.stype_env = VIO_ATTR_INFO;
1046 	ai.tag.sid = sc->sc_local_sid;
1047 	ai.xfer_mode = VIO_DRING_MODE;
1048 	ai.addr_type = VNET_ADDR_ETHERMAC;
1049 	ai.ack_freq = 0;
1050 	ai.addr = 0;
1051 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
1052 		ai.addr <<= 8;
1053 		ai.addr |= sc->sc_macaddr[i];
1054 	}
1055 	ai.mtu = ETHER_MAX_LEN - ETHER_CRC_LEN;
1056 	vnet_sendmsg(sc, &ai, sizeof(ai));
1057 
1058 	sc->sc_vio_state |= VIO_SND_ATTR_INFO;
1059 }
1060 
1061 void
1062 vnet_send_dring_reg(struct vnet_softc *sc)
1063 {
1064 	struct vio_dring_reg dr;
1065 
1066 	bzero(&dr, sizeof(dr));
1067 	dr.tag.type = VIO_TYPE_CTRL;
1068 	dr.tag.stype = VIO_SUBTYPE_INFO;
1069 	dr.tag.stype_env = VIO_DRING_REG;
1070 	dr.tag.sid = sc->sc_local_sid;
1071 	dr.dring_ident = 0;
1072 	dr.num_descriptors = sc->sc_vd->vd_nentries;
1073 	dr.descriptor_size = sizeof(struct vnet_desc);
1074 	dr.options = VIO_TX_RING;
1075 	dr.ncookies = 1;
1076 	dr.cookie[0].addr = 0;
1077 	dr.cookie[0].size = PAGE_SIZE;
1078 	vnet_sendmsg(sc, &dr, sizeof(dr));
1079 
1080 	sc->sc_vio_state |= VIO_SND_DRING_REG;
1081 };
1082 
1083 void
1084 vio_send_rdx(struct vnet_softc *sc)
1085 {
1086 	struct vio_msg_tag tag;
1087 
1088 	tag.type = VIO_TYPE_CTRL;
1089 	tag.stype = VIO_SUBTYPE_INFO;
1090 	tag.stype_env = VIO_RDX;
1091 	tag.sid = sc->sc_local_sid;
1092 	vnet_sendmsg(sc, &tag, sizeof(tag));
1093 
1094 	sc->sc_vio_state |= VIO_SND_RDX;
1095 }
1096 
1097 void
1098 vnet_send_dring_data(struct vnet_softc *sc, uint32_t start_idx)
1099 {
1100 	struct vio_dring_msg dm;
1101 	u_int peer_state;
1102 
1103 	peer_state = atomic_swap_uint(&sc->sc_peer_state, VIO_DP_ACTIVE);
1104 	if (peer_state == VIO_DP_ACTIVE) {
1105 		DPRINTF(("%s: peer_state == VIO_DP_ACTIVE\n", __func__));
1106 		return;
1107 	}
1108 
1109 	bzero(&dm, sizeof(dm));
1110 	dm.tag.type = VIO_TYPE_DATA;
1111 	dm.tag.stype = VIO_SUBTYPE_INFO;
1112 	dm.tag.stype_env = VIO_DRING_DATA;
1113 	dm.tag.sid = sc->sc_local_sid;
1114 	dm.seq_no = sc->sc_seq_no++;
1115 	dm.dring_ident = sc->sc_dring_ident;
1116 	dm.start_idx = start_idx;
1117 	dm.end_idx = -1;
1118 	vnet_sendmsg(sc, &dm, sizeof(dm));
1119 }
1120 
1121 void
1122 vnet_start(struct ifnet *ifp)
1123 {
1124 	struct vnet_softc *sc = ifp->if_softc;
1125 	struct ldc_conn *lc = &sc->sc_lc;
1126 	struct ldc_map *map = sc->sc_lm;
1127 	struct mbuf *m;
1128 	paddr_t pa;
1129 	unsigned char *buf;
1130 	uint64_t tx_head, tx_tail, tx_state;
1131 	u_int start, prod, count;
1132 	int err;
1133 	if (!(ifp->if_flags & IFF_RUNNING))
1134 	{
1135 		DPRINTF(("%s: not in RUNNING state\n", __func__));
1136 		return;
1137 	}
1138 	if (ifp->if_flags & IFF_OACTIVE)
1139 	{
1140 		DPRINTF(("%s: already active\n", __func__));
1141 		return;
1142 	}
1143 
1144 	if (IFQ_IS_EMPTY(&ifp->if_snd))
1145 	{
1146 		DPRINTF(("%s: queue is empty\n", __func__));
1147 		return;
1148 	} else {
1149 		DPRINTF(("%s: queue size %d\n", __func__, ifp->if_snd.ifq_len));
1150 	}
1151 
1152 	/*
1153 	 * We cannot transmit packets until a VIO connection has been
1154 	 * established.
1155 	 */
1156 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1157 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1158 	{
1159 		DPRINTF(("%s: vio connection not established yet\n", __func__));
1160 		return;
1161 	}
1162 
1163 	/*
1164 	 * Make sure there is room in the LDC transmit queue to send a
1165 	 * DRING_DATA message.
1166 	 */
1167 	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
1168 	if (err != H_EOK) {
1169 		DPRINTF(("%s: no room in ldc transmit queue\n", __func__));
1170 		return;
1171 	}
1172 	tx_tail += sizeof(struct ldc_pkt);
1173 	tx_tail &= ((lc->lc_txq->lq_nentries * sizeof(struct ldc_pkt)) - 1);
1174 	if (tx_tail == tx_head) {
1175 		ifp->if_flags |= IFF_OACTIVE;
1176 		{
1177 			DPRINTF(("%s: tail equals head\n", __func__));
1178 			return;
1179 		}
1180 	}
1181 
1182 	if (sc->sc_xfer_mode == VIO_DESC_MODE) {
1183 		DPRINTF(("%s: vio_desc_mode\n", __func__));
1184 		vnet_start_desc(ifp);
1185 		return;
1186 	}
1187 
1188 	start = prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1189 	while (sc->sc_vd->vd_desc[prod].hdr.dstate == VIO_DESC_FREE) {
1190 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1191 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1192 		    map->lm_count >= map->lm_nentries) {
1193 			DPRINTF(("%s: count issue\n", __func__));
1194 			ifp->if_flags |= IFF_OACTIVE;
1195 			break;
1196 		}
1197 
1198 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1199 		if (buf == NULL) {
1200 			DPRINTF(("%s: buff is NULL\n", __func__));
1201 			ifp->if_flags |= IFF_OACTIVE;
1202 			break;
1203 		}
1204 		IFQ_DEQUEUE(&ifp->if_snd, m);
1205 		if (m == NULL) {
1206 			pool_put(&sc->sc_pool, buf);
1207 			break;
1208 		}
1209 
1210 		m_copydata(m, 0, m->m_pkthdr.len, buf + VNET_ETHER_ALIGN);
1211 
1212 #if NBPFILTER > 0
1213 		/*
1214 		 * If BPF is listening on this interface, let it see the
1215 		 * packet before we commit it to the wire.
1216 		 */
1217 		if (ifp->if_bpf)
1218 		{
1219 			DPRINTF(("%s: before bpf\n", __func__));
1220 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1221 			DPRINTF(("%s: after bpf\n", __func__));
1222 		}
1223 #endif
1224 
1225 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1226 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1227 		while (map->lm_slot[map->lm_next].entry != 0) {
1228 			map->lm_next++;
1229 			map->lm_next &= (map->lm_nentries - 1);
1230 		}
1231 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1232 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1233 		atomic_inc_32(&map->lm_count);
1234 		sc->sc_vd->vd_desc[prod].nbytes = MAX(m->m_pkthdr.len, 60);
1235 		sc->sc_vd->vd_desc[prod].ncookies = 1;
1236 		sc->sc_vd->vd_desc[prod].cookie[0].addr =
1237 		    map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1238 		sc->sc_vd->vd_desc[prod].cookie[0].size = 2048;
1239 		membar_producer();
1240 		sc->sc_vd->vd_desc[prod].hdr.dstate = VIO_DESC_READY;
1241 
1242 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1243 		sc->sc_vsd[prod].vsd_buf = buf;
1244 
1245 		sc->sc_tx_prod++;
1246 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1247 
1248 		m_freem(m);
1249 	}
1250 
1251 	membar_producer();
1252 
1253 	if (start != prod && sc->sc_peer_state != VIO_DP_ACTIVE) {
1254 		vnet_send_dring_data(sc, start);
1255 		ifp->if_timer = 5;
1256 	}
1257 
1258 }
1259 
1260 void
1261 vnet_start_desc(struct ifnet *ifp)
1262 {
1263 	struct vnet_softc *sc = ifp->if_softc;
1264 	struct ldc_map *map = sc->sc_lm;
1265 	struct vnet_desc_msg dm;
1266 	struct mbuf *m;
1267 	paddr_t pa;
1268 	unsigned char *buf;
1269 	u_int prod, count;
1270 
1271 	for (;;) {
1272 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1273 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1274 		    map->lm_count >= map->lm_nentries) {
1275 			ifp->if_flags |= IFF_OACTIVE;
1276 			return;
1277 		}
1278 
1279 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1280 		if (buf == NULL) {
1281 			ifp->if_flags |= IFF_OACTIVE;
1282 			return;
1283 		}
1284 
1285 		IFQ_DEQUEUE(&ifp->if_snd, m);
1286 
1287 		if (m == NULL) {
1288 			pool_put(&sc->sc_pool, buf);
1289 			return;
1290 		}
1291 
1292 		m_copydata(m, 0, m->m_pkthdr.len, buf);
1293 
1294 #if NBPFILTER > 0
1295 		/*
1296 		 * If BPF is listening on this interface, let it see the
1297 		 * packet before we commit it to the wire.
1298 		 */
1299 		if (ifp->if_bpf)
1300 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1301 #endif
1302 
1303 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1304 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1305 		while (map->lm_slot[map->lm_next].entry != 0) {
1306 			map->lm_next++;
1307 			map->lm_next &= (map->lm_nentries - 1);
1308 		}
1309 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1310 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1311 		atomic_inc_32(&map->lm_count);
1312 
1313 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1314 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1315 		sc->sc_vsd[prod].vsd_buf = buf;
1316 
1317 		bzero(&dm, sizeof(dm));
1318 		dm.tag.type = VIO_TYPE_DATA;
1319 		dm.tag.stype = VIO_SUBTYPE_INFO;
1320 		dm.tag.stype_env = VIO_DESC_DATA;
1321 		dm.tag.sid = sc->sc_local_sid;
1322 		dm.seq_no = sc->sc_seq_no++;
1323 		dm.desc_handle = sc->sc_tx_prod;
1324 		dm.nbytes = MAX(m->m_pkthdr.len, 60);
1325 		dm.ncookies = 1;
1326 		dm.cookie[0].addr =
1327 			map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1328 		dm.cookie[0].size = 2048;
1329 		vnet_sendmsg(sc, &dm, sizeof(dm));
1330 
1331 		sc->sc_tx_prod++;
1332 		sc->sc_tx_prod &= (sc->sc_vd->vd_nentries - 1);
1333 
1334 		m_freem(m);
1335 	}
1336 }
1337 
1338 int
1339 vnet_ioctl(struct ifnet *ifp, u_long cmd, void* data)
1340 {
1341 	struct vnet_softc *sc = ifp->if_softc;
1342 	struct ifreq *ifr = (struct ifreq *)data;
1343 	int s, error = 0;
1344 
1345 	s = splnet();
1346 
1347 	switch (cmd) {
1348 
1349 		case SIOCSIFADDR:
1350 			ifp->if_flags |= IFF_UP;
1351 			/* FALLTHROUGH */
1352 		case SIOCSIFFLAGS:
1353 			if (ifp->if_flags & IFF_UP) {
1354 				if ((ifp->if_flags & IFF_RUNNING) == 0)
1355 					vnet_init(ifp);
1356 			} else {
1357 				if (ifp->if_flags & IFF_RUNNING)
1358 					vnet_stop(ifp, 0);
1359 			}
1360 		break;
1361 
1362 		case SIOCGIFMEDIA:
1363 		case SIOCSIFMEDIA:
1364 			error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1365 			break;
1366 
1367 		case SIOCADDMULTI:
1368 		case SIOCDELMULTI:
1369 			/*
1370 			 * XXX Removing all multicast addresses and adding
1371 			 * most of them back, is somewhat retarded.
1372 			 */
1373 			vnet_setmulti(sc, 0);
1374 			error = ether_ioctl(ifp, cmd, data);
1375 			vnet_setmulti(sc, 1);
1376 			if (error == ENETRESET)
1377 				error = 0;
1378 			break;
1379 
1380 		default:
1381 			error = ether_ioctl(ifp, cmd, data);
1382 	}
1383 
1384 	splx(s);
1385 
1386 	return (error);
1387 }
1388 
1389 void
1390 vnet_watchdog(struct ifnet *ifp)
1391 {
1392 
1393 	struct vnet_softc *sc = ifp->if_softc;
1394 
1395 	printf("%s: watchdog timeout\n", sc->sc_dv.dv_xname);
1396 }
1397 
1398 int
1399 vnet_media_change(struct ifnet *ifp)
1400 {
1401 	return (0);
1402 }
1403 
1404 void
1405 vnet_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1406 {
1407 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1408 	imr->ifm_status = IFM_AVALID;
1409 	if (ifp->if_link_state == LINK_STATE_UP &&
1410 	    ifp->if_flags & IFF_UP)
1411 		imr->ifm_status |= IFM_ACTIVE;
1412 }
1413 
1414 void
1415 vnet_link_state(struct vnet_softc *sc)
1416 {
1417 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1418 	int link_state = LINK_STATE_DOWN;
1419 
1420 	KERNEL_LOCK(1, curlwp);
1421 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
1422 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1423 		link_state = LINK_STATE_UP;
1424 	if (ifp->if_link_state != link_state) {
1425 		if_link_state_change(ifp, link_state);
1426 	}
1427 	KERNEL_UNLOCK_ONE(curlwp);
1428 }
1429 
1430 void
1431 vnet_setmulti(struct vnet_softc *sc, int set)
1432 {
1433 	struct ethercom *ec = &sc->sc_ethercom;
1434 	struct ether_multi *enm;
1435 	struct ether_multistep step;
1436 	struct vnet_mcast_info mi;
1437 	int count = 0;
1438 
1439 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1440 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1441 		return;
1442 
1443 	bzero(&mi, sizeof(mi));
1444 	mi.tag.type = VIO_TYPE_CTRL;
1445 	mi.tag.stype = VIO_SUBTYPE_INFO;
1446 	mi.tag.stype_env = VNET_MCAST_INFO;
1447 	mi.tag.sid = sc->sc_local_sid;
1448 	mi.set = set ? 1 : 0;
1449 	KERNEL_LOCK(1, curlwp);
1450 	ETHER_FIRST_MULTI(step, ec, enm);
1451 	while (enm != NULL) {
1452 		/* XXX What about multicast ranges? */
1453 		bcopy(enm->enm_addrlo, mi.mcast_addr[count], ETHER_ADDR_LEN);
1454 		ETHER_NEXT_MULTI(step, enm);
1455 
1456 		count++;
1457 		if (count < VNET_NUM_MCAST)
1458 			continue;
1459 
1460 		mi.count = VNET_NUM_MCAST;
1461 		vnet_sendmsg(sc, &mi, sizeof(mi));
1462 		count = 0;
1463 	}
1464 
1465 	if (count > 0) {
1466 		mi.count = count;
1467 		vnet_sendmsg(sc, &mi, sizeof(mi));
1468 	}
1469 	KERNEL_UNLOCK_ONE(curlwp);
1470 }
1471 
1472 
1473 int
1474 vnet_init(struct ifnet *ifp)
1475 {
1476 	struct vnet_softc *sc = ifp->if_softc;
1477 	struct ldc_conn *lc = &sc->sc_lc;
1478 	int err;
1479 	vaddr_t va;
1480 	paddr_t pa;
1481 	sc->sc_lm = ldc_map_alloc(2048);
1482 	if (sc->sc_lm == NULL)
1483 		return ENOMEM;
1484 
1485 	va = (vaddr_t)sc->sc_lm->lm_slot;
1486 	pa = 0;
1487 	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1488 		panic("pmap_extract failed %lx\n", va);
1489 	err = hv_ldc_set_map_table(lc->lc_id, pa, 2048);
1490 	if (err != H_EOK) {
1491 		printf("hv_ldc_set_map_table %d\n", err);
1492 		return EINVAL;
1493 	}
1494 
1495 	sc->sc_vd = vnet_dring_alloc(sc->sc_dmatag, VNET_NUM_SOFT_DESC);
1496 	if (sc->sc_vd == NULL)
1497 		return ENOMEM;
1498 	sc->sc_vsd = malloc(VNET_NUM_SOFT_DESC * sizeof(*sc->sc_vsd), M_DEVBUF,
1499 	    M_NOWAIT|M_ZERO);
1500 	if (sc->sc_vsd == NULL)
1501 		return ENOMEM;
1502 
1503 	va = (vaddr_t)sc->sc_vd->vd_desc;
1504 	pa = 0;
1505 	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1506 		panic("pmap_extract failed %lx\n", va);
1507 	sc->sc_lm->lm_slot[0].entry = pa;
1508 	sc->sc_lm->lm_slot[0].entry &= LDC_MTE_RA_MASK;
1509 	sc->sc_lm->lm_slot[0].entry |= LDC_MTE_CPR | LDC_MTE_CPW;
1510 	sc->sc_lm->lm_next = 1;
1511 	sc->sc_lm->lm_count = 1;
1512 
1513 	va = lc->lc_txq->lq_va;
1514 	pa = 0;
1515 	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1516 		panic("pmap_extract failed %lx\n", va);
1517 	err = hv_ldc_tx_qconf(lc->lc_id, pa, lc->lc_txq->lq_nentries);
1518 	if (err != H_EOK)
1519 		printf("hv_ldc_tx_qconf %d\n", err);
1520 
1521 	va = (vaddr_t)lc->lc_rxq->lq_va;
1522 	pa = 0;
1523 	if (pmap_extract(pmap_kernel(), va, &pa) == FALSE)
1524 	  panic("pmap_extract failed %lx\n", va);
1525 
1526 	err = hv_ldc_rx_qconf(lc->lc_id, pa, lc->lc_rxq->lq_nentries);
1527 	if (err != H_EOK)
1528 		printf("hv_ldc_rx_qconf %d\n", err);
1529 
1530 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_ENABLED);
1531 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_ENABLED);
1532 
1533 	ldc_send_vers(lc);
1534 
1535 	ifp->if_flags |= IFF_RUNNING;
1536 
1537 	return 0;
1538 }
1539 
1540 void
1541 vnet_stop(struct ifnet *ifp, int disable)
1542 
1543 {
1544 	struct vnet_softc *sc = ifp->if_softc;
1545 	struct ldc_conn *lc = &sc->sc_lc;
1546 
1547 	ifp->if_flags &= ~IFF_RUNNING;
1548 	ifp->if_flags &= ~IFF_OACTIVE;
1549 	ifp->if_timer = 0;
1550 
1551 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_DISABLED);
1552 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_DISABLED);
1553 
1554 #if 0
1555 openbsd XXX
1556 	intr_barrier(sc->sc_tx_ih);
1557 	intr_barrier(sc->sc_rx_ih);
1558 #else
1559 	printf("vnet_stop() intr_barrier() not available\n");
1560 #endif
1561 
1562 	hv_ldc_tx_qconf(lc->lc_id, 0, 0);
1563 	hv_ldc_rx_qconf(lc->lc_id, 0, 0);
1564 	lc->lc_tx_seqid = 0;
1565 	lc->lc_state = 0;
1566 	lc->lc_tx_state = lc->lc_rx_state = LDC_CHANNEL_DOWN;
1567 	vnet_ldc_reset(lc);
1568 
1569 	free(sc->sc_vsd, M_DEVBUF);
1570 
1571 	vnet_dring_free(sc->sc_dmatag, sc->sc_vd);
1572 
1573 	hv_ldc_set_map_table(lc->lc_id, 0, 0);
1574 	ldc_map_free(sc->sc_lm);
1575 }
1576 
1577 struct vnet_dring *
1578 vnet_dring_alloc(bus_dma_tag_t t, int nentries)
1579 {
1580 	struct vnet_dring *vd;
1581 	bus_size_t size;
1582 	vaddr_t va;
1583 	int i;
1584 
1585 	vd = kmem_zalloc(sizeof(struct vnet_dring), KM_SLEEP);
1586 	if (vd == NULL)
1587 		return NULL;
1588 
1589 	size = roundup(nentries * sizeof(struct vnet_desc), PAGE_SIZE);
1590 
1591 	va = (vaddr_t)kmem_zalloc(size, KM_SLEEP);
1592 	vd->vd_desc = (struct vnet_desc *)va;
1593 	vd->vd_nentries = nentries;
1594 	bzero(vd->vd_desc, nentries * sizeof(struct vnet_desc));
1595 	for (i = 0; i < vd->vd_nentries; i++)
1596 		vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
1597 	return (vd);
1598 
1599 	return (NULL);
1600 }
1601 
1602 void
1603 vnet_dring_free(bus_dma_tag_t t, struct vnet_dring *vd)
1604 {
1605 
1606 	bus_size_t size;
1607 
1608 	size = vd->vd_nentries * sizeof(struct vnet_desc);
1609 	size = roundup(size, PAGE_SIZE);
1610 
1611 	kmem_free(vd->vd_desc, size);
1612 	kmem_free(vd, size);
1613 }
1614 
1615