xref: /openbsd-src/sys/arch/sparc64/dev/vnet.c (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1 /*	$OpenBSD: vnet.c,v 1.57 2016/09/15 02:00:17 dlg Exp $	*/
2 /*
3  * Copyright (c) 2009, 2015 Mark Kettenis
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bpfilter.h"
19 
20 #include <sys/param.h>
21 #include <sys/atomic.h>
22 #include <sys/device.h>
23 #include <sys/malloc.h>
24 #include <sys/pool.h>
25 #include <sys/mbuf.h>
26 #include <sys/socket.h>
27 #include <sys/sockio.h>
28 #include <sys/systm.h>
29 #include <sys/timeout.h>
30 
31 #include <machine/autoconf.h>
32 #include <machine/hypervisor.h>
33 #include <machine/openfirm.h>
34 
35 #include <net/if.h>
36 #include <net/if_media.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/if_ether.h>
40 
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44 
45 #include <uvm/uvm_extern.h>
46 
47 #include <sparc64/dev/cbusvar.h>
48 #include <sparc64/dev/ldcvar.h>
49 #include <sparc64/dev/viovar.h>
50 
51 #ifdef VNET_DEBUG
52 #define DPRINTF(x)	printf x
53 #else
54 #define DPRINTF(x)
55 #endif
56 
57 #define VNET_TX_ENTRIES		32
58 #define VNET_RX_ENTRIES		32
59 
60 struct vnet_attr_info {
61 	struct vio_msg_tag	tag;
62 	uint8_t			xfer_mode;
63 	uint8_t			addr_type;
64 	uint16_t		ack_freq;
65 	uint32_t		_reserved1;
66 	uint64_t		addr;
67 	uint64_t		mtu;
68 	uint64_t		_reserved2[3];
69 };
70 
71 /* Address types. */
72 #define VNET_ADDR_ETHERMAC	0x01
73 
74 /* Sub-Type envelopes. */
75 #define VNET_MCAST_INFO		0x0101
76 
77 #define VNET_NUM_MCAST		7
78 
79 struct vnet_mcast_info {
80 	struct vio_msg_tag	tag;
81 	uint8_t			set;
82 	uint8_t			count;
83 	uint8_t			mcast_addr[VNET_NUM_MCAST][ETHER_ADDR_LEN];
84 	uint32_t		_reserved;
85 };
86 
87 struct vnet_desc {
88 	struct vio_dring_hdr	hdr;
89 	uint32_t		nbytes;
90 	uint32_t		ncookies;
91 	struct ldc_cookie	cookie[2];
92 };
93 
94 struct vnet_desc_msg {
95 	struct vio_msg_tag	tag;
96 	uint64_t		seq_no;
97 	uint64_t		desc_handle;
98 	uint32_t		nbytes;
99 	uint32_t		ncookies;
100 	struct ldc_cookie	cookie[1];
101 };
102 
103 struct vnet_dring {
104 	bus_dmamap_t		vd_map;
105 	bus_dma_segment_t	vd_seg;
106 	struct vnet_desc	*vd_desc;
107 	int			vd_nentries;
108 };
109 
110 struct vnet_dring *vnet_dring_alloc(bus_dma_tag_t, int);
111 void	vnet_dring_free(bus_dma_tag_t, struct vnet_dring *);
112 
113 /*
114  * For now, we only support vNet 1.0.
115  */
116 #define VNET_MAJOR	1
117 #define VNET_MINOR	0
118 
119 /*
120  * The vNet protocol wants the IP header to be 64-bit aligned, so
121  * define out own variant of ETHER_ALIGN.
122  */
123 #define VNET_ETHER_ALIGN	6
124 
125 struct vnet_soft_desc {
126 	int		vsd_map_idx;
127 	caddr_t		vsd_buf;
128 };
129 
130 struct vnet_softc {
131 	struct device	sc_dv;
132 	bus_space_tag_t	sc_bustag;
133 	bus_dma_tag_t	sc_dmatag;
134 
135 	uint64_t	sc_tx_ino;
136 	uint64_t	sc_rx_ino;
137 	void		*sc_tx_ih;
138 	void		*sc_rx_ih;
139 
140 	struct ldc_conn	sc_lc;
141 
142 	uint16_t	sc_vio_state;
143 #define VIO_SND_VER_INFO	0x0001
144 #define VIO_ACK_VER_INFO	0x0002
145 #define VIO_RCV_VER_INFO	0x0004
146 #define VIO_SND_ATTR_INFO	0x0008
147 #define VIO_ACK_ATTR_INFO	0x0010
148 #define VIO_RCV_ATTR_INFO	0x0020
149 #define VIO_SND_DRING_REG	0x0040
150 #define VIO_ACK_DRING_REG	0x0080
151 #define VIO_RCV_DRING_REG	0x0100
152 #define VIO_SND_RDX		0x0200
153 #define VIO_ACK_RDX		0x0400
154 #define VIO_RCV_RDX		0x0800
155 
156 	struct timeout	sc_handshake_to;
157 
158 	uint8_t		sc_xfer_mode;
159 
160 	uint32_t	sc_local_sid;
161 	uint64_t	sc_dring_ident;
162 	uint64_t	sc_seq_no;
163 
164 	u_int		sc_tx_prod;
165 	u_int		sc_tx_cons;
166 
167 	u_int		sc_peer_state;
168 
169 	struct ldc_map	*sc_lm;
170 	struct vnet_dring *sc_vd;
171 	struct vnet_soft_desc *sc_vsd;
172 #define VNET_NUM_SOFT_DESC	128
173 
174 	size_t		sc_peer_desc_size;
175 	struct ldc_cookie sc_peer_dring_cookie;
176 	int		sc_peer_dring_nentries;
177 
178 	struct pool	sc_pool;
179 
180 	struct arpcom	sc_ac;
181 	struct ifmedia	sc_media;
182 };
183 
184 int	vnet_match(struct device *, void *, void *);
185 void	vnet_attach(struct device *, struct device *, void *);
186 
187 struct cfattach vnet_ca = {
188 	sizeof(struct vnet_softc), vnet_match, vnet_attach
189 };
190 
191 struct cfdriver vnet_cd = {
192 	NULL, "vnet", DV_IFNET
193 };
194 
195 int	vnet_tx_intr(void *);
196 int	vnet_rx_intr(void *);
197 void	vnet_handshake(void *);
198 
199 void	vio_rx_data(struct ldc_conn *, struct ldc_pkt *);
200 void	vnet_rx_vio_ctrl(struct vnet_softc *, struct vio_msg *);
201 void	vnet_rx_vio_ver_info(struct vnet_softc *, struct vio_msg_tag *);
202 void	vnet_rx_vio_attr_info(struct vnet_softc *, struct vio_msg_tag *);
203 void	vnet_rx_vio_dring_reg(struct vnet_softc *, struct vio_msg_tag *);
204 void	vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *);
205 void	vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *);
206 void	vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *);
207 void	vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *);
208 
209 void	vnet_ldc_reset(struct ldc_conn *);
210 void	vnet_ldc_start(struct ldc_conn *);
211 
212 void	vnet_sendmsg(struct vnet_softc *, void *, size_t);
213 void	vnet_send_ver_info(struct vnet_softc *, uint16_t, uint16_t);
214 void	vnet_send_attr_info(struct vnet_softc *);
215 void	vnet_send_dring_reg(struct vnet_softc *);
216 void	vio_send_rdx(struct vnet_softc *);
217 void	vnet_send_dring_data(struct vnet_softc *, uint32_t);
218 
219 void	vnet_start(struct ifnet *);
220 void	vnet_start_desc(struct ifnet *);
221 int	vnet_ioctl(struct ifnet *, u_long, caddr_t);
222 void	vnet_watchdog(struct ifnet *);
223 
224 int	vnet_media_change(struct ifnet *);
225 void	vnet_media_status(struct ifnet *, struct ifmediareq *);
226 
227 void	vnet_link_state(struct vnet_softc *sc);
228 
229 void	vnet_setmulti(struct vnet_softc *, int);
230 
231 void	vnet_init(struct ifnet *);
232 void	vnet_stop(struct ifnet *);
233 
234 int
235 vnet_match(struct device *parent, void *match, void *aux)
236 {
237 	struct cbus_attach_args *ca = aux;
238 
239 	if (strcmp(ca->ca_name, "network") == 0)
240 		return (1);
241 
242 	return (0);
243 }
244 
245 void
246 vnet_attach(struct device *parent, struct device *self, void *aux)
247 {
248 	struct vnet_softc *sc = (struct vnet_softc *)self;
249 	struct cbus_attach_args *ca = aux;
250 	struct ldc_conn *lc;
251 	struct ifnet *ifp;
252 
253 	sc->sc_bustag = ca->ca_bustag;
254 	sc->sc_dmatag = ca->ca_dmatag;
255 	sc->sc_tx_ino = ca->ca_tx_ino;
256 	sc->sc_rx_ino = ca->ca_rx_ino;
257 
258 	printf(": ivec 0x%llx, 0x%llx", sc->sc_tx_ino, sc->sc_rx_ino);
259 
260 	/*
261 	 * Un-configure queues before registering interrupt handlers,
262 	 * such that we dont get any stale LDC packets or events.
263 	 */
264 	hv_ldc_tx_qconf(ca->ca_id, 0, 0);
265 	hv_ldc_rx_qconf(ca->ca_id, 0, 0);
266 
267 	sc->sc_tx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_tx_ino,
268 	    IPL_NET, BUS_INTR_ESTABLISH_MPSAFE, vnet_tx_intr,
269 	    sc, sc->sc_dv.dv_xname);
270 	sc->sc_rx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_rx_ino,
271 	    IPL_NET, BUS_INTR_ESTABLISH_MPSAFE, vnet_rx_intr,
272 	    sc, sc->sc_dv.dv_xname);
273 	if (sc->sc_tx_ih == NULL || sc->sc_rx_ih == NULL) {
274 		printf(", can't establish interrupt\n");
275 		return;
276 	}
277 
278 	lc = &sc->sc_lc;
279 	lc->lc_id = ca->ca_id;
280 	lc->lc_sc = sc;
281 	lc->lc_reset = vnet_ldc_reset;
282 	lc->lc_start = vnet_ldc_start;
283 	lc->lc_rx_data = vio_rx_data;
284 
285 	timeout_set(&sc->sc_handshake_to, vnet_handshake, sc);
286 	sc->sc_peer_state = VIO_DP_STOPPED;
287 
288 	lc->lc_txq = ldc_queue_alloc(sc->sc_dmatag, VNET_TX_ENTRIES);
289 	if (lc->lc_txq == NULL) {
290 		printf(", can't allocate tx queue\n");
291 		return;
292 	}
293 
294 	lc->lc_rxq = ldc_queue_alloc(sc->sc_dmatag, VNET_RX_ENTRIES);
295 	if (lc->lc_rxq == NULL) {
296 		printf(", can't allocate rx queue\n");
297 		goto free_txqueue;
298 	}
299 
300 	if (OF_getprop(ca->ca_node, "local-mac-address",
301 	    sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) > 0)
302 		printf(", address %s", ether_sprintf(sc->sc_ac.ac_enaddr));
303 
304 	/*
305 	 * Each interface gets its own pool.
306 	 */
307 	pool_init(&sc->sc_pool, 2048, 0, IPL_NET, 0, sc->sc_dv.dv_xname, NULL);
308 
309 	ifp = &sc->sc_ac.ac_if;
310 	ifp->if_softc = sc;
311 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
312 	ifp->if_link_state = LINK_STATE_DOWN;
313 	ifp->if_ioctl = vnet_ioctl;
314 	ifp->if_start = vnet_start;
315 	ifp->if_watchdog = vnet_watchdog;
316 	strlcpy(ifp->if_xname, sc->sc_dv.dv_xname, IFNAMSIZ);
317 	IFQ_SET_MAXLEN(&ifp->if_snd, 31); /* XXX */
318 
319 	ifmedia_init(&sc->sc_media, 0, vnet_media_change, vnet_media_status);
320 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
321 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
322 
323 	if_attach(ifp);
324 	ether_ifattach(ifp);
325 
326 	printf("\n");
327 	return;
328 
329 free_txqueue:
330 	ldc_queue_free(sc->sc_dmatag, lc->lc_txq);
331 }
332 
333 int
334 vnet_tx_intr(void *arg)
335 {
336 	struct vnet_softc *sc = arg;
337 	struct ldc_conn *lc = &sc->sc_lc;
338 	uint64_t tx_head, tx_tail, tx_state;
339 
340 	hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
341 	if (tx_state != lc->lc_tx_state) {
342 		switch (tx_state) {
343 		case LDC_CHANNEL_DOWN:
344 			DPRINTF(("Tx link down\n"));
345 			break;
346 		case LDC_CHANNEL_UP:
347 			DPRINTF(("Tx link up\n"));
348 			break;
349 		case LDC_CHANNEL_RESET:
350 			DPRINTF(("Tx link reset\n"));
351 			break;
352 		}
353 		lc->lc_tx_state = tx_state;
354 	}
355 
356 	return (1);
357 }
358 
359 int
360 vnet_rx_intr(void *arg)
361 {
362 	struct vnet_softc *sc = arg;
363 	struct ldc_conn *lc = &sc->sc_lc;
364 	uint64_t rx_head, rx_tail, rx_state;
365 	struct ldc_pkt *lp;
366 	int err;
367 
368 	err = hv_ldc_rx_get_state(lc->lc_id, &rx_head, &rx_tail, &rx_state);
369 	if (err == H_EINVAL)
370 		return (0);
371 	if (err != H_EOK) {
372 		printf("hv_ldc_rx_get_state %d\n", err);
373 		return (0);
374 	}
375 
376 	if (rx_state != lc->lc_rx_state) {
377 		switch (rx_state) {
378 		case LDC_CHANNEL_DOWN:
379 			DPRINTF(("Rx link down\n"));
380 			lc->lc_tx_seqid = 0;
381 			lc->lc_state = 0;
382 			lc->lc_reset(lc);
383 			break;
384 		case LDC_CHANNEL_UP:
385 			DPRINTF(("Rx link up\n"));
386 			timeout_add_msec(&sc->sc_handshake_to, 500);
387 			break;
388 		case LDC_CHANNEL_RESET:
389 			DPRINTF(("Rx link reset\n"));
390 			lc->lc_tx_seqid = 0;
391 			lc->lc_state = 0;
392 			lc->lc_reset(lc);
393 			timeout_add_msec(&sc->sc_handshake_to, 500);
394 			break;
395 		}
396 		lc->lc_rx_state = rx_state;
397 		return (1);
398 	}
399 
400 	if (rx_head == rx_tail)
401 		return (0);
402 
403 	lp = (struct ldc_pkt *)(lc->lc_rxq->lq_va + rx_head);
404 	switch (lp->type) {
405 	case LDC_CTRL:
406 		ldc_rx_ctrl(lc, lp);
407 		break;
408 
409 	case LDC_DATA:
410 		ldc_rx_data(lc, lp);
411 		break;
412 
413 	default:
414 		DPRINTF(("%0x02/%0x02/%0x02\n", lp->type, lp->stype,
415 		    lp->ctrl));
416 		ldc_reset(lc);
417 		break;
418 	}
419 
420 	if (lc->lc_state == 0)
421 		return (1);
422 
423 	rx_head += sizeof(*lp);
424 	rx_head &= ((lc->lc_rxq->lq_nentries * sizeof(*lp)) - 1);
425 	err = hv_ldc_rx_set_qhead(lc->lc_id, rx_head);
426 	if (err != H_EOK)
427 		printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
428 
429 	return (1);
430 }
431 
432 void
433 vnet_handshake(void *arg)
434 {
435 	struct vnet_softc *sc = arg;
436 
437 	ldc_send_vers(&sc->sc_lc);
438 }
439 
440 void
441 vio_rx_data(struct ldc_conn *lc, struct ldc_pkt *lp)
442 {
443 	struct vio_msg *vm = (struct vio_msg *)lp;
444 
445 	switch (vm->type) {
446 	case VIO_TYPE_CTRL:
447 		if ((lp->env & LDC_FRAG_START) == 0 &&
448 		    (lp->env & LDC_FRAG_STOP) == 0)
449 			return;
450 		vnet_rx_vio_ctrl(lc->lc_sc, vm);
451 		break;
452 
453 	case VIO_TYPE_DATA:
454 		if((lp->env & LDC_FRAG_START) == 0)
455 			return;
456 		vnet_rx_vio_data(lc->lc_sc, vm);
457 		break;
458 
459 	default:
460 		DPRINTF(("Unhandled packet type 0x%02x\n", vm->type));
461 		ldc_reset(lc);
462 		break;
463 	}
464 }
465 
466 void
467 vnet_rx_vio_ctrl(struct vnet_softc *sc, struct vio_msg *vm)
468 {
469 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
470 
471 	switch (tag->stype_env) {
472 	case VIO_VER_INFO:
473 		vnet_rx_vio_ver_info(sc, tag);
474 		break;
475 	case VIO_ATTR_INFO:
476 		vnet_rx_vio_attr_info(sc, tag);
477 		break;
478 	case VIO_DRING_REG:
479 		vnet_rx_vio_dring_reg(sc, tag);
480 		break;
481 	case VIO_RDX:
482 		vnet_rx_vio_rdx(sc, tag);
483 		break;
484 	default:
485 		DPRINTF(("CTRL/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
486 		break;
487 	}
488 }
489 
490 void
491 vnet_rx_vio_ver_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
492 {
493 	struct vio_ver_info *vi = (struct vio_ver_info *)tag;
494 
495 	switch (vi->tag.stype) {
496 	case VIO_SUBTYPE_INFO:
497 		DPRINTF(("CTRL/INFO/VER_INFO\n"));
498 
499 		/* Make sure we're talking to a virtual network device. */
500 		if (vi->dev_class != VDEV_NETWORK &&
501 		    vi->dev_class != VDEV_NETWORK_SWITCH) {
502 			/* Huh, we're not talking to a network device? */
503 			printf("Not a network device\n");
504 			vi->tag.stype = VIO_SUBTYPE_NACK;
505 			vnet_sendmsg(sc, vi, sizeof(*vi));
506 			return;
507 		}
508 
509 		if (vi->major != VNET_MAJOR) {
510 			vi->tag.stype = VIO_SUBTYPE_NACK;
511 			vi->major = VNET_MAJOR;
512 			vi->minor = VNET_MINOR;
513 			vnet_sendmsg(sc, vi, sizeof(*vi));
514 			return;
515 		}
516 
517 		vi->tag.stype = VIO_SUBTYPE_ACK;
518 		vi->tag.sid = sc->sc_local_sid;
519 		vi->minor = VNET_MINOR;
520 		vnet_sendmsg(sc, vi, sizeof(*vi));
521 		sc->sc_vio_state |= VIO_RCV_VER_INFO;
522 		break;
523 
524 	case VIO_SUBTYPE_ACK:
525 		DPRINTF(("CTRL/ACK/VER_INFO\n"));
526 		if (!ISSET(sc->sc_vio_state, VIO_SND_VER_INFO)) {
527 			ldc_reset(&sc->sc_lc);
528 			break;
529 		}
530 		sc->sc_vio_state |= VIO_ACK_VER_INFO;
531 		break;
532 
533 	default:
534 		DPRINTF(("CTRL/0x%02x/VER_INFO\n", vi->tag.stype));
535 		break;
536 	}
537 
538 	if (ISSET(sc->sc_vio_state, VIO_RCV_VER_INFO) &&
539 	    ISSET(sc->sc_vio_state, VIO_ACK_VER_INFO))
540 		vnet_send_attr_info(sc);
541 }
542 
543 void
544 vnet_rx_vio_attr_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
545 {
546 	struct vnet_attr_info *ai = (struct vnet_attr_info *)tag;
547 
548 	switch (ai->tag.stype) {
549 	case VIO_SUBTYPE_INFO:
550 		DPRINTF(("CTRL/INFO/ATTR_INFO\n"));
551 		sc->sc_xfer_mode = ai->xfer_mode;
552 
553 		ai->tag.stype = VIO_SUBTYPE_ACK;
554 		ai->tag.sid = sc->sc_local_sid;
555 		vnet_sendmsg(sc, ai, sizeof(*ai));
556 		sc->sc_vio_state |= VIO_RCV_ATTR_INFO;
557 		break;
558 
559 	case VIO_SUBTYPE_ACK:
560 		DPRINTF(("CTRL/ACK/ATTR_INFO\n"));
561 		if (!ISSET(sc->sc_vio_state, VIO_SND_ATTR_INFO)) {
562 			ldc_reset(&sc->sc_lc);
563 			break;
564 		}
565 		sc->sc_vio_state |= VIO_ACK_ATTR_INFO;
566 		break;
567 
568 	default:
569 		DPRINTF(("CTRL/0x%02x/ATTR_INFO\n", ai->tag.stype));
570 		break;
571 	}
572 
573 	if (ISSET(sc->sc_vio_state, VIO_RCV_ATTR_INFO) &&
574 	    ISSET(sc->sc_vio_state, VIO_ACK_ATTR_INFO)) {
575 		if (sc->sc_xfer_mode == VIO_DRING_MODE)
576 			vnet_send_dring_reg(sc);
577 		else
578 			vio_send_rdx(sc);
579 	}
580 }
581 
582 void
583 vnet_rx_vio_dring_reg(struct vnet_softc *sc, struct vio_msg_tag *tag)
584 {
585 	struct vio_dring_reg *dr = (struct vio_dring_reg *)tag;
586 
587 	switch (dr->tag.stype) {
588 	case VIO_SUBTYPE_INFO:
589 		DPRINTF(("CTRL/INFO/DRING_REG\n"));
590 
591 		sc->sc_peer_dring_nentries = dr->num_descriptors;
592 		sc->sc_peer_desc_size = dr->descriptor_size;
593 		sc->sc_peer_dring_cookie = dr->cookie[0];
594 
595 		dr->tag.stype = VIO_SUBTYPE_ACK;
596 		dr->tag.sid = sc->sc_local_sid;
597 		vnet_sendmsg(sc, dr, sizeof(*dr));
598 		sc->sc_vio_state |= VIO_RCV_DRING_REG;
599 		break;
600 
601 	case VIO_SUBTYPE_ACK:
602 		DPRINTF(("CTRL/ACK/DRING_REG\n"));
603 		if (!ISSET(sc->sc_vio_state, VIO_SND_DRING_REG)) {
604 			ldc_reset(&sc->sc_lc);
605 			break;
606 		}
607 
608 		sc->sc_dring_ident = dr->dring_ident;
609 		sc->sc_seq_no = 1;
610 
611 		sc->sc_vio_state |= VIO_ACK_DRING_REG;
612 		break;
613 
614 	default:
615 		DPRINTF(("CTRL/0x%02x/DRING_REG\n", dr->tag.stype));
616 		break;
617 	}
618 
619 	if (ISSET(sc->sc_vio_state, VIO_RCV_DRING_REG) &&
620 	    ISSET(sc->sc_vio_state, VIO_ACK_DRING_REG))
621 		vio_send_rdx(sc);
622 }
623 
624 void
625 vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *tag)
626 {
627 	struct ifnet *ifp = &sc->sc_ac.ac_if;
628 
629 	switch(tag->stype) {
630 	case VIO_SUBTYPE_INFO:
631 		DPRINTF(("CTRL/INFO/RDX\n"));
632 
633 		tag->stype = VIO_SUBTYPE_ACK;
634 		tag->sid = sc->sc_local_sid;
635 		vnet_sendmsg(sc, tag, sizeof(*tag));
636 		sc->sc_vio_state |= VIO_RCV_RDX;
637 		break;
638 
639 	case VIO_SUBTYPE_ACK:
640 		DPRINTF(("CTRL/ACK/RDX\n"));
641 		if (!ISSET(sc->sc_vio_state, VIO_SND_RDX)) {
642 			ldc_reset(&sc->sc_lc);
643 			break;
644 		}
645 		sc->sc_vio_state |= VIO_ACK_RDX;
646 		break;
647 
648 	default:
649 		DPRINTF(("CTRL/0x%02x/RDX (VIO)\n", tag->stype));
650 		break;
651 	}
652 
653 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
654 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
655 		/* Link is up! */
656 		vnet_link_state(sc);
657 
658 		/* Configure multicast now that we can. */
659 		vnet_setmulti(sc, 1);
660 
661 		KERNEL_LOCK();
662 		ifq_clr_oactive(&ifp->if_snd);
663 		vnet_start(ifp);
664 		KERNEL_UNLOCK();
665 	}
666 }
667 
668 void
669 vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *vm)
670 {
671 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
672 
673 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
674 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
675 		DPRINTF(("Spurious DATA/0x%02x/0x%04x\n", tag->stype,
676 		    tag->stype_env));
677 		return;
678 	}
679 
680 	switch(tag->stype_env) {
681 	case VIO_DESC_DATA:
682 		vnet_rx_vio_desc_data(sc, tag);
683 		break;
684 
685 	case VIO_DRING_DATA:
686 		vnet_rx_vio_dring_data(sc, tag);
687 		break;
688 
689 	default:
690 		DPRINTF(("DATA/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
691 		break;
692 	}
693 }
694 
695 void
696 vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
697 {
698 	struct vnet_desc_msg *dm = (struct vnet_desc_msg *)tag;
699 	struct ldc_conn *lc = &sc->sc_lc;
700 	struct ldc_map *map = sc->sc_lm;
701 	struct ifnet *ifp = &sc->sc_ac.ac_if;
702 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
703 	struct mbuf *m;
704 	caddr_t buf;
705 	paddr_t pa;
706 	psize_t nbytes;
707 	u_int cons;
708 	int err;
709 
710 	switch(tag->stype) {
711 	case VIO_SUBTYPE_INFO:
712 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
713 		if (buf == NULL) {
714 			ifp->if_ierrors++;
715 			goto skip;
716 		}
717 		nbytes = roundup(dm->nbytes, 8);
718 
719 		if (dm->nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
720 			ifp->if_ierrors++;
721 			goto skip;
722 		}
723 
724 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
725 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
726 		    dm->cookie[0].addr, pa, nbytes, &nbytes);
727 		if (err != H_EOK) {
728 			pool_put(&sc->sc_pool, buf);
729 			ifp->if_ierrors++;
730 			goto skip;
731 		}
732 
733 		/* Stupid OBP doesn't align properly. */
734                 m = m_devget(buf, dm->nbytes, ETHER_ALIGN);
735 		pool_put(&sc->sc_pool, buf);
736 		if (m == NULL) {
737 			ifp->if_ierrors++;
738 			goto skip;
739 		}
740 
741 		/* Pass it on. */
742 		ml_enqueue(&ml, m);
743 		if_input(ifp, &ml);
744 
745 	skip:
746 		dm->tag.stype = VIO_SUBTYPE_ACK;
747 		dm->tag.sid = sc->sc_local_sid;
748 		vnet_sendmsg(sc, dm, sizeof(*dm));
749 		break;
750 
751 	case VIO_SUBTYPE_ACK:
752 		DPRINTF(("DATA/ACK/DESC_DATA\n"));
753 
754 		if (dm->desc_handle != sc->sc_tx_cons) {
755 			printf("out of order\n");
756 			return;
757 		}
758 
759 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
760 
761 		map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
762 		atomic_dec_int(&map->lm_count);
763 
764 		pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
765 		sc->sc_vsd[cons].vsd_buf = NULL;
766 		ifp->if_opackets++;
767 
768 		sc->sc_tx_cons++;
769 		break;
770 
771 	case VIO_SUBTYPE_NACK:
772 		DPRINTF(("DATA/NACK/DESC_DATA\n"));
773 		break;
774 
775 	default:
776 		DPRINTF(("DATA/0x%02x/DESC_DATA\n", tag->stype));
777 		break;
778 	}
779 }
780 
781 void
782 vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
783 {
784 	struct vio_dring_msg *dm = (struct vio_dring_msg *)tag;
785 	struct ldc_conn *lc = &sc->sc_lc;
786 	struct ifnet *ifp = &sc->sc_ac.ac_if;
787 	struct mbuf *m;
788 	paddr_t pa;
789 	psize_t nbytes;
790 	int err;
791 
792 	switch(tag->stype) {
793 	case VIO_SUBTYPE_INFO:
794 	{
795 		struct vnet_desc desc;
796 		uint64_t cookie;
797 		paddr_t desc_pa;
798 		int idx, ack_end_idx = -1;
799 		struct mbuf_list ml = MBUF_LIST_INITIALIZER();
800 
801 		idx = dm->start_idx;
802 		for (;;) {
803 			cookie = sc->sc_peer_dring_cookie.addr;
804 			cookie += idx * sc->sc_peer_desc_size;
805 			nbytes = sc->sc_peer_desc_size;
806 			pmap_extract(pmap_kernel(), (vaddr_t)&desc, &desc_pa);
807 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN, cookie,
808 			    desc_pa, nbytes, &nbytes);
809 			if (err != H_EOK) {
810 				printf("hv_ldc_copy_in %d\n", err);
811 				break;
812 			}
813 
814 			if (desc.hdr.dstate != VIO_DESC_READY)
815 				break;
816 
817 			if (desc.nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
818 				ifp->if_ierrors++;
819 				goto skip;
820 			}
821 
822 			m = MCLGETI(NULL, M_DONTWAIT, NULL, desc.nbytes);
823 			if (!m)
824 				break;
825 			m->m_len = m->m_pkthdr.len = desc.nbytes;
826 			nbytes = roundup(desc.nbytes + VNET_ETHER_ALIGN, 8);
827 
828 			pmap_extract(pmap_kernel(), (vaddr_t)m->m_data, &pa);
829 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
830 			    desc.cookie[0].addr, pa, nbytes, &nbytes);
831 			if (err != H_EOK) {
832 				m_freem(m);
833 				goto skip;
834 			}
835 			m->m_data += VNET_ETHER_ALIGN;
836 
837 			ml_enqueue(&ml, m);
838 
839 		skip:
840 			desc.hdr.dstate = VIO_DESC_DONE;
841 			nbytes = sc->sc_peer_desc_size;
842 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT, cookie,
843 			    desc_pa, nbytes, &nbytes);
844 			if (err != H_EOK)
845 				printf("hv_ldc_copy_out %d\n", err);
846 
847 			ack_end_idx = idx;
848 			if (++idx == sc->sc_peer_dring_nentries)
849 				idx = 0;
850 		}
851 
852 		if_input(ifp, &ml);
853 
854 		if (ack_end_idx == -1) {
855 			dm->tag.stype = VIO_SUBTYPE_NACK;
856 		} else {
857 			dm->tag.stype = VIO_SUBTYPE_ACK;
858 			dm->end_idx = ack_end_idx;
859 		}
860 		dm->tag.sid = sc->sc_local_sid;
861 		dm->proc_state = VIO_DP_STOPPED;
862 		vnet_sendmsg(sc, dm, sizeof(*dm));
863 		break;
864 	}
865 
866 	case VIO_SUBTYPE_ACK:
867 	{
868 		struct ldc_map *map = sc->sc_lm;
869 		u_int cons, count;
870 
871 		sc->sc_peer_state = dm->proc_state;
872 
873 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
874 		while (sc->sc_vd->vd_desc[cons].hdr.dstate == VIO_DESC_DONE) {
875 			map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
876 			atomic_dec_int(&map->lm_count);
877 
878 			pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
879 			sc->sc_vsd[cons].vsd_buf = NULL;
880 			ifp->if_opackets++;
881 
882 			sc->sc_vd->vd_desc[cons].hdr.dstate = VIO_DESC_FREE;
883 			sc->sc_tx_cons++;
884 			cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
885 		}
886 
887 		count = sc->sc_tx_prod - sc->sc_tx_cons;
888 		if (count > 0 && sc->sc_peer_state != VIO_DP_ACTIVE)
889 			vnet_send_dring_data(sc, cons);
890 
891 		KERNEL_LOCK();
892 		if (count < (sc->sc_vd->vd_nentries - 1))
893 			ifq_clr_oactive(&ifp->if_snd);
894 		if (count == 0)
895 			ifp->if_timer = 0;
896 
897 		vnet_start(ifp);
898 		KERNEL_UNLOCK();
899 		break;
900 	}
901 
902 	case VIO_SUBTYPE_NACK:
903 		DPRINTF(("DATA/NACK/DRING_DATA\n"));
904 		sc->sc_peer_state = VIO_DP_STOPPED;
905 		break;
906 
907 	default:
908 		DPRINTF(("DATA/0x%02x/DRING_DATA\n", tag->stype));
909 		break;
910 	}
911 }
912 
913 void
914 vnet_ldc_reset(struct ldc_conn *lc)
915 {
916 	struct vnet_softc *sc = lc->lc_sc;
917 	int i;
918 
919 	timeout_del(&sc->sc_handshake_to);
920 	sc->sc_tx_prod = sc->sc_tx_cons = 0;
921 	sc->sc_peer_state = VIO_DP_STOPPED;
922 	sc->sc_vio_state = 0;
923 	vnet_link_state(sc);
924 
925 	sc->sc_lm->lm_next = 1;
926 	sc->sc_lm->lm_count = 1;
927 	for (i = 1; i < sc->sc_lm->lm_nentries; i++)
928 		sc->sc_lm->lm_slot[i].entry = 0;
929 
930 	for (i = 0; i < sc->sc_vd->vd_nentries; i++) {
931 		if (sc->sc_vsd[i].vsd_buf) {
932 			pool_put(&sc->sc_pool, sc->sc_vsd[i].vsd_buf);
933 			sc->sc_vsd[i].vsd_buf = NULL;
934 		}
935 		sc->sc_vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
936 	}
937 }
938 
939 void
940 vnet_ldc_start(struct ldc_conn *lc)
941 {
942 	struct vnet_softc *sc = lc->lc_sc;
943 
944 	timeout_del(&sc->sc_handshake_to);
945 	vnet_send_ver_info(sc, VNET_MAJOR, VNET_MINOR);
946 }
947 
948 void
949 vnet_sendmsg(struct vnet_softc *sc, void *msg, size_t len)
950 {
951 	struct ldc_conn *lc = &sc->sc_lc;
952 	int err;
953 
954 	err = ldc_send_unreliable(lc, msg, len);
955 	if (err)
956 		printf("%s: ldc_send_unreliable: %d\n", __func__, err);
957 }
958 
959 void
960 vnet_send_ver_info(struct vnet_softc *sc, uint16_t major, uint16_t minor)
961 {
962 	struct vio_ver_info vi;
963 
964 	bzero(&vi, sizeof(vi));
965 	vi.tag.type = VIO_TYPE_CTRL;
966 	vi.tag.stype = VIO_SUBTYPE_INFO;
967 	vi.tag.stype_env = VIO_VER_INFO;
968 	vi.tag.sid = sc->sc_local_sid;
969 	vi.major = major;
970 	vi.minor = minor;
971 	vi.dev_class = VDEV_NETWORK;
972 	vnet_sendmsg(sc, &vi, sizeof(vi));
973 
974 	sc->sc_vio_state |= VIO_SND_VER_INFO;
975 }
976 
977 void
978 vnet_send_attr_info(struct vnet_softc *sc)
979 {
980 	struct vnet_attr_info ai;
981 	int i;
982 
983 	bzero(&ai, sizeof(ai));
984 	ai.tag.type = VIO_TYPE_CTRL;
985 	ai.tag.stype = VIO_SUBTYPE_INFO;
986 	ai.tag.stype_env = VIO_ATTR_INFO;
987 	ai.tag.sid = sc->sc_local_sid;
988 	ai.xfer_mode = VIO_DRING_MODE;
989 	ai.addr_type = VNET_ADDR_ETHERMAC;
990 	ai.ack_freq = 0;
991 	ai.addr = 0;
992 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
993 		ai.addr <<= 8;
994 		ai.addr |= sc->sc_ac.ac_enaddr[i];
995 	}
996 	ai.mtu = ETHER_MAX_LEN - ETHER_CRC_LEN;
997 	vnet_sendmsg(sc, &ai, sizeof(ai));
998 
999 	sc->sc_vio_state |= VIO_SND_ATTR_INFO;
1000 }
1001 
1002 void
1003 vnet_send_dring_reg(struct vnet_softc *sc)
1004 {
1005 	struct vio_dring_reg dr;
1006 
1007 	bzero(&dr, sizeof(dr));
1008 	dr.tag.type = VIO_TYPE_CTRL;
1009 	dr.tag.stype = VIO_SUBTYPE_INFO;
1010 	dr.tag.stype_env = VIO_DRING_REG;
1011 	dr.tag.sid = sc->sc_local_sid;
1012 	dr.dring_ident = 0;
1013 	dr.num_descriptors = sc->sc_vd->vd_nentries;
1014 	dr.descriptor_size = sizeof(struct vnet_desc);
1015 	dr.options = VIO_TX_RING;
1016 	dr.ncookies = 1;
1017 	dr.cookie[0].addr = 0;
1018 	dr.cookie[0].size = PAGE_SIZE;
1019 	vnet_sendmsg(sc, &dr, sizeof(dr));
1020 
1021 	sc->sc_vio_state |= VIO_SND_DRING_REG;
1022 };
1023 
1024 void
1025 vio_send_rdx(struct vnet_softc *sc)
1026 {
1027 	struct vio_msg_tag tag;
1028 
1029 	tag.type = VIO_TYPE_CTRL;
1030 	tag.stype = VIO_SUBTYPE_INFO;
1031 	tag.stype_env = VIO_RDX;
1032 	tag.sid = sc->sc_local_sid;
1033 	vnet_sendmsg(sc, &tag, sizeof(tag));
1034 
1035 	sc->sc_vio_state |= VIO_SND_RDX;
1036 }
1037 
1038 void
1039 vnet_send_dring_data(struct vnet_softc *sc, uint32_t start_idx)
1040 {
1041 	struct vio_dring_msg dm;
1042 	u_int peer_state;
1043 
1044 	peer_state = atomic_swap_uint(&sc->sc_peer_state, VIO_DP_ACTIVE);
1045 	if (peer_state == VIO_DP_ACTIVE)
1046 		return;
1047 
1048 	bzero(&dm, sizeof(dm));
1049 	dm.tag.type = VIO_TYPE_DATA;
1050 	dm.tag.stype = VIO_SUBTYPE_INFO;
1051 	dm.tag.stype_env = VIO_DRING_DATA;
1052 	dm.tag.sid = sc->sc_local_sid;
1053 	dm.seq_no = sc->sc_seq_no++;
1054 	dm.dring_ident = sc->sc_dring_ident;
1055 	dm.start_idx = start_idx;
1056 	dm.end_idx = -1;
1057 	vnet_sendmsg(sc, &dm, sizeof(dm));
1058 }
1059 
1060 void
1061 vnet_start(struct ifnet *ifp)
1062 {
1063 	struct vnet_softc *sc = ifp->if_softc;
1064 	struct ldc_conn *lc = &sc->sc_lc;
1065 	struct ldc_map *map = sc->sc_lm;
1066 	struct mbuf *m;
1067 	paddr_t pa;
1068 	caddr_t buf;
1069 	uint64_t tx_head, tx_tail, tx_state;
1070 	u_int start, prod, count;
1071 	int err;
1072 
1073 	if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
1074 		return;
1075 
1076 	if (IFQ_IS_EMPTY(&ifp->if_snd))
1077 		return;
1078 
1079 	/*
1080 	 * We cannot transmit packets until a VIO connection has been
1081 	 * established.
1082 	 */
1083 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1084 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1085 		return;
1086 
1087 	/*
1088 	 * Make sure there is room in the LDC transmit queue to send a
1089 	 * DRING_DATA message.
1090 	 */
1091 	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
1092 	if (err != H_EOK)
1093 		return;
1094 	tx_tail += sizeof(struct ldc_pkt);
1095 	tx_tail &= ((lc->lc_txq->lq_nentries * sizeof(struct ldc_pkt)) - 1);
1096 	if (tx_tail == tx_head) {
1097 		ifq_set_oactive(&ifp->if_snd);
1098 		return;
1099 	}
1100 
1101 	if (sc->sc_xfer_mode == VIO_DESC_MODE) {
1102 		vnet_start_desc(ifp);
1103 		return;
1104 	}
1105 
1106 	start = prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1107 	while (sc->sc_vd->vd_desc[prod].hdr.dstate == VIO_DESC_FREE) {
1108 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1109 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1110 		    map->lm_count >= map->lm_nentries) {
1111 			ifq_set_oactive(&ifp->if_snd);
1112 			break;
1113 		}
1114 
1115 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1116 		if (buf == NULL) {
1117 			ifq_set_oactive(&ifp->if_snd);
1118 			break;
1119 		}
1120 
1121 		IFQ_DEQUEUE(&ifp->if_snd, m);
1122 		if (m == NULL) {
1123 			pool_put(&sc->sc_pool, buf);
1124 			break;
1125 		}
1126 
1127 		m_copydata(m, 0, m->m_pkthdr.len, buf + VNET_ETHER_ALIGN);
1128 
1129 #if NBPFILTER > 0
1130 		/*
1131 		 * If BPF is listening on this interface, let it see the
1132 		 * packet before we commit it to the wire.
1133 		 */
1134 		if (ifp->if_bpf)
1135 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1136 #endif
1137 
1138 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1139 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1140 		while (map->lm_slot[map->lm_next].entry != 0) {
1141 			map->lm_next++;
1142 			map->lm_next &= (map->lm_nentries - 1);
1143 		}
1144 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1145 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1146 		atomic_inc_int(&map->lm_count);
1147 
1148 		sc->sc_vd->vd_desc[prod].nbytes = max(m->m_pkthdr.len, 60);
1149 		sc->sc_vd->vd_desc[prod].ncookies = 1;
1150 		sc->sc_vd->vd_desc[prod].cookie[0].addr =
1151 		    map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1152 		sc->sc_vd->vd_desc[prod].cookie[0].size = 2048;
1153 		membar_producer();
1154 		sc->sc_vd->vd_desc[prod].hdr.dstate = VIO_DESC_READY;
1155 
1156 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1157 		sc->sc_vsd[prod].vsd_buf = buf;
1158 
1159 		sc->sc_tx_prod++;
1160 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1161 
1162 		m_freem(m);
1163 	}
1164 
1165 	membar_producer();
1166 
1167 	if (start != prod && sc->sc_peer_state != VIO_DP_ACTIVE) {
1168 		vnet_send_dring_data(sc, start);
1169 		ifp->if_timer = 5;
1170 	}
1171 }
1172 
1173 void
1174 vnet_start_desc(struct ifnet *ifp)
1175 {
1176 	struct vnet_softc *sc = ifp->if_softc;
1177 	struct ldc_map *map = sc->sc_lm;
1178 	struct vnet_desc_msg dm;
1179 	struct mbuf *m;
1180 	paddr_t pa;
1181 	caddr_t buf;
1182 	u_int prod, count;
1183 
1184 	for (;;) {
1185 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1186 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1187 		    map->lm_count >= map->lm_nentries) {
1188 			ifq_set_oactive(&ifp->if_snd);
1189 			return;
1190 		}
1191 
1192 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1193 		if (buf == NULL) {
1194 			ifq_set_oactive(&ifp->if_snd);
1195 			return;
1196 		}
1197 
1198 		IFQ_DEQUEUE(&ifp->if_snd, m);
1199 		if (m == NULL) {
1200 			pool_put(&sc->sc_pool, buf);
1201 			return;
1202 		}
1203 
1204 		m_copydata(m, 0, m->m_pkthdr.len, buf);
1205 
1206 #if NBPFILTER > 0
1207 		/*
1208 		 * If BPF is listening on this interface, let it see the
1209 		 * packet before we commit it to the wire.
1210 		 */
1211 		if (ifp->if_bpf)
1212 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1213 #endif
1214 
1215 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1216 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1217 		while (map->lm_slot[map->lm_next].entry != 0) {
1218 			map->lm_next++;
1219 			map->lm_next &= (map->lm_nentries - 1);
1220 		}
1221 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1222 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1223 		atomic_inc_int(&map->lm_count);
1224 
1225 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1226 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1227 		sc->sc_vsd[prod].vsd_buf = buf;
1228 
1229 		bzero(&dm, sizeof(dm));
1230 		dm.tag.type = VIO_TYPE_DATA;
1231 		dm.tag.stype = VIO_SUBTYPE_INFO;
1232 		dm.tag.stype_env = VIO_DESC_DATA;
1233 		dm.tag.sid = sc->sc_local_sid;
1234 		dm.seq_no = sc->sc_seq_no++;
1235 		dm.desc_handle = sc->sc_tx_prod;
1236 		dm.nbytes = max(m->m_pkthdr.len, 60);
1237 		dm.ncookies = 1;
1238 		dm.cookie[0].addr =
1239 			map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1240 		dm.cookie[0].size = 2048;
1241 		vnet_sendmsg(sc, &dm, sizeof(dm));
1242 
1243 		sc->sc_tx_prod++;
1244 		sc->sc_tx_prod &= (sc->sc_vd->vd_nentries - 1);
1245 
1246 		m_freem(m);
1247 	}
1248 }
1249 
1250 int
1251 vnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1252 {
1253 	struct vnet_softc *sc = ifp->if_softc;
1254 	struct ifreq *ifr = (struct ifreq *)data;
1255 	int s, error = 0;
1256 
1257 	s = splnet();
1258 
1259 	switch (cmd) {
1260 	case SIOCSIFADDR:
1261 		ifp->if_flags |= IFF_UP;
1262 		/* FALLTHROUGH */
1263 	case SIOCSIFFLAGS:
1264 		if (ifp->if_flags & IFF_UP) {
1265 			if ((ifp->if_flags & IFF_RUNNING) == 0)
1266 				vnet_init(ifp);
1267 		} else {
1268 			if (ifp->if_flags & IFF_RUNNING)
1269 				vnet_stop(ifp);
1270 		}
1271 		break;
1272 
1273 	case SIOCGIFMEDIA:
1274 	case SIOCSIFMEDIA:
1275 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1276 		break;
1277 
1278 	case SIOCADDMULTI:
1279 	case SIOCDELMULTI:
1280 		/*
1281 		 * XXX Removing all multicast addresses and adding
1282 		 * most of them back, is somewhat retarded.
1283 		 */
1284 		vnet_setmulti(sc, 0);
1285 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
1286 		vnet_setmulti(sc, 1);
1287 		if (error == ENETRESET)
1288 			error = 0;
1289 		break;
1290 
1291 	default:
1292 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
1293 	}
1294 
1295 	splx(s);
1296 	return (error);
1297 }
1298 
1299 void
1300 vnet_watchdog(struct ifnet *ifp)
1301 {
1302 	struct vnet_softc *sc = ifp->if_softc;
1303 
1304 	printf("%s: watchdog timeout\n", sc->sc_dv.dv_xname);
1305 }
1306 
1307 int
1308 vnet_media_change(struct ifnet *ifp)
1309 {
1310 	return (0);
1311 }
1312 
1313 void
1314 vnet_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1315 {
1316 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1317 	imr->ifm_status = IFM_AVALID;
1318 
1319 	if (LINK_STATE_IS_UP(ifp->if_link_state) &&
1320 	    ifp->if_flags & IFF_UP)
1321 		imr->ifm_status |= IFM_ACTIVE;
1322 }
1323 
1324 void
1325 vnet_link_state(struct vnet_softc *sc)
1326 {
1327 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1328 	int link_state = LINK_STATE_DOWN;
1329 
1330 	KERNEL_LOCK();
1331 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
1332 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1333 		link_state = LINK_STATE_FULL_DUPLEX;
1334 	if (ifp->if_link_state != link_state) {
1335 		ifp->if_link_state = link_state;
1336 		if_link_state_change(ifp);
1337 	}
1338 	KERNEL_UNLOCK();
1339 }
1340 
1341 void
1342 vnet_setmulti(struct vnet_softc *sc, int set)
1343 {
1344 	struct arpcom *ac = &sc->sc_ac;
1345 	struct ether_multi *enm;
1346 	struct ether_multistep step;
1347 	struct vnet_mcast_info mi;
1348 	int count = 0;
1349 
1350 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1351 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1352 		return;
1353 
1354 	bzero(&mi, sizeof(mi));
1355 	mi.tag.type = VIO_TYPE_CTRL;
1356 	mi.tag.stype = VIO_SUBTYPE_INFO;
1357 	mi.tag.stype_env = VNET_MCAST_INFO;
1358 	mi.tag.sid = sc->sc_local_sid;
1359 	mi.set = set ? 1 : 0;
1360 	KERNEL_LOCK();
1361 	ETHER_FIRST_MULTI(step, ac, enm);
1362 	while (enm != NULL) {
1363 		/* XXX What about multicast ranges? */
1364 		bcopy(enm->enm_addrlo, mi.mcast_addr[count], ETHER_ADDR_LEN);
1365 		ETHER_NEXT_MULTI(step, enm);
1366 
1367 		count++;
1368 		if (count < VNET_NUM_MCAST)
1369 			continue;
1370 
1371 		mi.count = VNET_NUM_MCAST;
1372 		vnet_sendmsg(sc, &mi, sizeof(mi));
1373 		count = 0;
1374 	}
1375 
1376 	if (count > 0) {
1377 		mi.count = count;
1378 		vnet_sendmsg(sc, &mi, sizeof(mi));
1379 	}
1380 	KERNEL_UNLOCK();
1381 }
1382 
1383 void
1384 vnet_init(struct ifnet *ifp)
1385 {
1386 	struct vnet_softc *sc = ifp->if_softc;
1387 	struct ldc_conn *lc = &sc->sc_lc;
1388 	int err;
1389 
1390 	sc->sc_lm = ldc_map_alloc(sc->sc_dmatag, 2048);
1391 	if (sc->sc_lm == NULL)
1392 		return;
1393 
1394 	err = hv_ldc_set_map_table(lc->lc_id,
1395 	    sc->sc_lm->lm_map->dm_segs[0].ds_addr, sc->sc_lm->lm_nentries);
1396 	if (err != H_EOK) {
1397 		printf("hv_ldc_set_map_table %d\n", err);
1398 		return;
1399 	}
1400 
1401 	sc->sc_vd = vnet_dring_alloc(sc->sc_dmatag, VNET_NUM_SOFT_DESC);
1402 	if (sc->sc_vd == NULL)
1403 		return;
1404 	sc->sc_vsd = malloc(VNET_NUM_SOFT_DESC * sizeof(*sc->sc_vsd), M_DEVBUF,
1405 	    M_NOWAIT|M_ZERO);
1406 	if (sc->sc_vsd == NULL)
1407 		return;
1408 
1409 	sc->sc_lm->lm_slot[0].entry = sc->sc_vd->vd_map->dm_segs[0].ds_addr;
1410 	sc->sc_lm->lm_slot[0].entry &= LDC_MTE_RA_MASK;
1411 	sc->sc_lm->lm_slot[0].entry |= LDC_MTE_CPR | LDC_MTE_CPW;
1412 	sc->sc_lm->lm_next = 1;
1413 	sc->sc_lm->lm_count = 1;
1414 
1415 	err = hv_ldc_tx_qconf(lc->lc_id,
1416 	    lc->lc_txq->lq_map->dm_segs[0].ds_addr, lc->lc_txq->lq_nentries);
1417 	if (err != H_EOK)
1418 		printf("hv_ldc_tx_qconf %d\n", err);
1419 
1420 	err = hv_ldc_rx_qconf(lc->lc_id,
1421 	    lc->lc_rxq->lq_map->dm_segs[0].ds_addr, lc->lc_rxq->lq_nentries);
1422 	if (err != H_EOK)
1423 		printf("hv_ldc_rx_qconf %d\n", err);
1424 
1425 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_ENABLED);
1426 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_ENABLED);
1427 
1428 	ldc_send_vers(lc);
1429 
1430 	ifp->if_flags |= IFF_RUNNING;
1431 }
1432 
1433 void
1434 vnet_stop(struct ifnet *ifp)
1435 {
1436 	struct vnet_softc *sc = ifp->if_softc;
1437 	struct ldc_conn *lc = &sc->sc_lc;
1438 
1439 	ifp->if_flags &= ~IFF_RUNNING;
1440 	ifq_clr_oactive(&ifp->if_snd);
1441 	ifp->if_timer = 0;
1442 
1443 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_DISABLED);
1444 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_DISABLED);
1445 
1446 	intr_barrier(sc->sc_tx_ih);
1447 	intr_barrier(sc->sc_rx_ih);
1448 
1449 	hv_ldc_tx_qconf(lc->lc_id, 0, 0);
1450 	hv_ldc_rx_qconf(lc->lc_id, 0, 0);
1451 	lc->lc_tx_seqid = 0;
1452 	lc->lc_state = 0;
1453 	lc->lc_tx_state = lc->lc_rx_state = LDC_CHANNEL_DOWN;
1454 	vnet_ldc_reset(lc);
1455 
1456 	free(sc->sc_vsd, M_DEVBUF, VNET_NUM_SOFT_DESC * sizeof(*sc->sc_vsd));
1457 
1458 	vnet_dring_free(sc->sc_dmatag, sc->sc_vd);
1459 
1460 	hv_ldc_set_map_table(lc->lc_id, 0, 0);
1461 	ldc_map_free(sc->sc_dmatag, sc->sc_lm);
1462 }
1463 
1464 struct vnet_dring *
1465 vnet_dring_alloc(bus_dma_tag_t t, int nentries)
1466 {
1467 	struct vnet_dring *vd;
1468 	bus_size_t size;
1469 	caddr_t va;
1470 	int nsegs;
1471 	int i;
1472 
1473 	vd = malloc(sizeof(struct vnet_dring), M_DEVBUF, M_NOWAIT);
1474 	if (vd == NULL)
1475 		return NULL;
1476 
1477 	size = roundup(nentries * sizeof(struct vnet_desc), PAGE_SIZE);
1478 
1479 	if (bus_dmamap_create(t, size, 1, size, 0,
1480 	    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &vd->vd_map) != 0)
1481 		return (NULL);
1482 
1483 	if (bus_dmamem_alloc(t, size, PAGE_SIZE, 0, &vd->vd_seg, 1,
1484 	    &nsegs, BUS_DMA_NOWAIT) != 0)
1485 		goto destroy;
1486 
1487 	if (bus_dmamem_map(t, &vd->vd_seg, 1, size, &va,
1488 	    BUS_DMA_NOWAIT) != 0)
1489 		goto free;
1490 
1491 	if (bus_dmamap_load(t, vd->vd_map, va, size, NULL,
1492 	    BUS_DMA_NOWAIT) != 0)
1493 		goto unmap;
1494 
1495 	vd->vd_desc = (struct vnet_desc *)va;
1496 	vd->vd_nentries = nentries;
1497 	bzero(vd->vd_desc, nentries * sizeof(struct vnet_desc));
1498 	for (i = 0; i < vd->vd_nentries; i++)
1499 		vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
1500 	return (vd);
1501 
1502 unmap:
1503 	bus_dmamem_unmap(t, va, size);
1504 free:
1505 	bus_dmamem_free(t, &vd->vd_seg, 1);
1506 destroy:
1507 	bus_dmamap_destroy(t, vd->vd_map);
1508 
1509 	return (NULL);
1510 }
1511 
1512 void
1513 vnet_dring_free(bus_dma_tag_t t, struct vnet_dring *vd)
1514 {
1515 	bus_size_t size;
1516 
1517 	size = vd->vd_nentries * sizeof(struct vnet_desc);
1518 	size = roundup(size, PAGE_SIZE);
1519 
1520 	bus_dmamap_unload(t, vd->vd_map);
1521 	bus_dmamem_unmap(t, (caddr_t)vd->vd_desc, size);
1522 	bus_dmamem_free(t, &vd->vd_seg, 1);
1523 	bus_dmamap_destroy(t, vd->vd_map);
1524 	free(vd, M_DEVBUF, 0);
1525 }
1526