xref: /openbsd-src/sys/arch/sparc64/dev/vnet.c (revision 0b7734b3d77bb9b21afec6f4621cae6c805dbd45)
1 /*	$OpenBSD: vnet.c,v 1.56 2016/04/13 11:34:00 mpi Exp $	*/
2 /*
3  * Copyright (c) 2009, 2015 Mark Kettenis
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bpfilter.h"
19 
20 #include <sys/param.h>
21 #include <sys/atomic.h>
22 #include <sys/device.h>
23 #include <sys/malloc.h>
24 #include <sys/pool.h>
25 #include <sys/mbuf.h>
26 #include <sys/socket.h>
27 #include <sys/sockio.h>
28 #include <sys/systm.h>
29 #include <sys/timeout.h>
30 
31 #include <machine/autoconf.h>
32 #include <machine/hypervisor.h>
33 #include <machine/openfirm.h>
34 
35 #include <net/if.h>
36 #include <net/if_media.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/if_ether.h>
40 
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44 
45 #include <uvm/uvm_extern.h>
46 
47 #include <sparc64/dev/cbusvar.h>
48 #include <sparc64/dev/ldcvar.h>
49 #include <sparc64/dev/viovar.h>
50 
51 #ifdef VNET_DEBUG
52 #define DPRINTF(x)	printf x
53 #else
54 #define DPRINTF(x)
55 #endif
56 
57 #define VNET_TX_ENTRIES		32
58 #define VNET_RX_ENTRIES		32
59 
60 struct vnet_attr_info {
61 	struct vio_msg_tag	tag;
62 	uint8_t			xfer_mode;
63 	uint8_t			addr_type;
64 	uint16_t		ack_freq;
65 	uint32_t		_reserved1;
66 	uint64_t		addr;
67 	uint64_t		mtu;
68 	uint64_t		_reserved2[3];
69 };
70 
71 /* Address types. */
72 #define VNET_ADDR_ETHERMAC	0x01
73 
74 /* Sub-Type envelopes. */
75 #define VNET_MCAST_INFO		0x0101
76 
77 #define VNET_NUM_MCAST		7
78 
79 struct vnet_mcast_info {
80 	struct vio_msg_tag	tag;
81 	uint8_t			set;
82 	uint8_t			count;
83 	uint8_t			mcast_addr[VNET_NUM_MCAST][ETHER_ADDR_LEN];
84 	uint32_t		_reserved;
85 };
86 
87 struct vnet_desc {
88 	struct vio_dring_hdr	hdr;
89 	uint32_t		nbytes;
90 	uint32_t		ncookies;
91 	struct ldc_cookie	cookie[2];
92 };
93 
94 struct vnet_desc_msg {
95 	struct vio_msg_tag	tag;
96 	uint64_t		seq_no;
97 	uint64_t		desc_handle;
98 	uint32_t		nbytes;
99 	uint32_t		ncookies;
100 	struct ldc_cookie	cookie[1];
101 };
102 
103 struct vnet_dring {
104 	bus_dmamap_t		vd_map;
105 	bus_dma_segment_t	vd_seg;
106 	struct vnet_desc	*vd_desc;
107 	int			vd_nentries;
108 };
109 
110 struct vnet_dring *vnet_dring_alloc(bus_dma_tag_t, int);
111 void	vnet_dring_free(bus_dma_tag_t, struct vnet_dring *);
112 
113 /*
114  * For now, we only support vNet 1.0.
115  */
116 #define VNET_MAJOR	1
117 #define VNET_MINOR	0
118 
119 /*
120  * The vNet protocol wants the IP header to be 64-bit aligned, so
121  * define out own variant of ETHER_ALIGN.
122  */
123 #define VNET_ETHER_ALIGN	6
124 
125 struct vnet_soft_desc {
126 	int		vsd_map_idx;
127 	caddr_t		vsd_buf;
128 };
129 
130 struct vnet_softc {
131 	struct device	sc_dv;
132 	bus_space_tag_t	sc_bustag;
133 	bus_dma_tag_t	sc_dmatag;
134 
135 	uint64_t	sc_tx_ino;
136 	uint64_t	sc_rx_ino;
137 	void		*sc_tx_ih;
138 	void		*sc_rx_ih;
139 
140 	struct ldc_conn	sc_lc;
141 
142 	uint16_t	sc_vio_state;
143 #define VIO_SND_VER_INFO	0x0001
144 #define VIO_ACK_VER_INFO	0x0002
145 #define VIO_RCV_VER_INFO	0x0004
146 #define VIO_SND_ATTR_INFO	0x0008
147 #define VIO_ACK_ATTR_INFO	0x0010
148 #define VIO_RCV_ATTR_INFO	0x0020
149 #define VIO_SND_DRING_REG	0x0040
150 #define VIO_ACK_DRING_REG	0x0080
151 #define VIO_RCV_DRING_REG	0x0100
152 #define VIO_SND_RDX		0x0200
153 #define VIO_ACK_RDX		0x0400
154 #define VIO_RCV_RDX		0x0800
155 
156 	struct timeout	sc_handshake_to;
157 
158 	uint8_t		sc_xfer_mode;
159 
160 	uint32_t	sc_local_sid;
161 	uint64_t	sc_dring_ident;
162 	uint64_t	sc_seq_no;
163 
164 	u_int		sc_tx_prod;
165 	u_int		sc_tx_cons;
166 
167 	u_int		sc_peer_state;
168 
169 	struct ldc_map	*sc_lm;
170 	struct vnet_dring *sc_vd;
171 	struct vnet_soft_desc *sc_vsd;
172 #define VNET_NUM_SOFT_DESC	128
173 
174 	size_t		sc_peer_desc_size;
175 	struct ldc_cookie sc_peer_dring_cookie;
176 	int		sc_peer_dring_nentries;
177 
178 	struct pool	sc_pool;
179 
180 	struct arpcom	sc_ac;
181 	struct ifmedia	sc_media;
182 };
183 
184 int	vnet_match(struct device *, void *, void *);
185 void	vnet_attach(struct device *, struct device *, void *);
186 
187 struct cfattach vnet_ca = {
188 	sizeof(struct vnet_softc), vnet_match, vnet_attach
189 };
190 
191 struct cfdriver vnet_cd = {
192 	NULL, "vnet", DV_IFNET
193 };
194 
195 int	vnet_tx_intr(void *);
196 int	vnet_rx_intr(void *);
197 void	vnet_handshake(void *);
198 
199 void	vio_rx_data(struct ldc_conn *, struct ldc_pkt *);
200 void	vnet_rx_vio_ctrl(struct vnet_softc *, struct vio_msg *);
201 void	vnet_rx_vio_ver_info(struct vnet_softc *, struct vio_msg_tag *);
202 void	vnet_rx_vio_attr_info(struct vnet_softc *, struct vio_msg_tag *);
203 void	vnet_rx_vio_dring_reg(struct vnet_softc *, struct vio_msg_tag *);
204 void	vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *);
205 void	vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *);
206 void	vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *);
207 void	vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *);
208 
209 void	vnet_ldc_reset(struct ldc_conn *);
210 void	vnet_ldc_start(struct ldc_conn *);
211 
212 void	vnet_sendmsg(struct vnet_softc *, void *, size_t);
213 void	vnet_send_ver_info(struct vnet_softc *, uint16_t, uint16_t);
214 void	vnet_send_attr_info(struct vnet_softc *);
215 void	vnet_send_dring_reg(struct vnet_softc *);
216 void	vio_send_rdx(struct vnet_softc *);
217 void	vnet_send_dring_data(struct vnet_softc *, uint32_t);
218 
219 void	vnet_start(struct ifnet *);
220 void	vnet_start_desc(struct ifnet *);
221 int	vnet_ioctl(struct ifnet *, u_long, caddr_t);
222 void	vnet_watchdog(struct ifnet *);
223 
224 int	vnet_media_change(struct ifnet *);
225 void	vnet_media_status(struct ifnet *, struct ifmediareq *);
226 
227 void	vnet_link_state(struct vnet_softc *sc);
228 
229 void	vnet_setmulti(struct vnet_softc *, int);
230 
231 void	vnet_init(struct ifnet *);
232 void	vnet_stop(struct ifnet *);
233 
234 int
235 vnet_match(struct device *parent, void *match, void *aux)
236 {
237 	struct cbus_attach_args *ca = aux;
238 
239 	if (strcmp(ca->ca_name, "network") == 0)
240 		return (1);
241 
242 	return (0);
243 }
244 
245 void
246 vnet_attach(struct device *parent, struct device *self, void *aux)
247 {
248 	struct vnet_softc *sc = (struct vnet_softc *)self;
249 	struct cbus_attach_args *ca = aux;
250 	struct ldc_conn *lc;
251 	struct ifnet *ifp;
252 
253 	sc->sc_bustag = ca->ca_bustag;
254 	sc->sc_dmatag = ca->ca_dmatag;
255 	sc->sc_tx_ino = ca->ca_tx_ino;
256 	sc->sc_rx_ino = ca->ca_rx_ino;
257 
258 	printf(": ivec 0x%llx, 0x%llx", sc->sc_tx_ino, sc->sc_rx_ino);
259 
260 	/*
261 	 * Un-configure queues before registering interrupt handlers,
262 	 * such that we dont get any stale LDC packets or events.
263 	 */
264 	hv_ldc_tx_qconf(ca->ca_id, 0, 0);
265 	hv_ldc_rx_qconf(ca->ca_id, 0, 0);
266 
267 	sc->sc_tx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_tx_ino,
268 	    IPL_NET, BUS_INTR_ESTABLISH_MPSAFE, vnet_tx_intr,
269 	    sc, sc->sc_dv.dv_xname);
270 	sc->sc_rx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_rx_ino,
271 	    IPL_NET, BUS_INTR_ESTABLISH_MPSAFE, vnet_rx_intr,
272 	    sc, sc->sc_dv.dv_xname);
273 	if (sc->sc_tx_ih == NULL || sc->sc_rx_ih == NULL) {
274 		printf(", can't establish interrupt\n");
275 		return;
276 	}
277 
278 	lc = &sc->sc_lc;
279 	lc->lc_id = ca->ca_id;
280 	lc->lc_sc = sc;
281 	lc->lc_reset = vnet_ldc_reset;
282 	lc->lc_start = vnet_ldc_start;
283 	lc->lc_rx_data = vio_rx_data;
284 
285 	timeout_set(&sc->sc_handshake_to, vnet_handshake, sc);
286 	sc->sc_peer_state = VIO_DP_STOPPED;
287 
288 	lc->lc_txq = ldc_queue_alloc(sc->sc_dmatag, VNET_TX_ENTRIES);
289 	if (lc->lc_txq == NULL) {
290 		printf(", can't allocate tx queue\n");
291 		return;
292 	}
293 
294 	lc->lc_rxq = ldc_queue_alloc(sc->sc_dmatag, VNET_RX_ENTRIES);
295 	if (lc->lc_rxq == NULL) {
296 		printf(", can't allocate rx queue\n");
297 		goto free_txqueue;
298 	}
299 
300 	if (OF_getprop(ca->ca_node, "local-mac-address",
301 	    sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) > 0)
302 		printf(", address %s", ether_sprintf(sc->sc_ac.ac_enaddr));
303 
304 	/*
305 	 * Each interface gets its own pool.
306 	 */
307 	pool_init(&sc->sc_pool, 2048, 0, 0, 0, sc->sc_dv.dv_xname, NULL);
308 	pool_setipl(&sc->sc_pool, IPL_NET);
309 
310 	ifp = &sc->sc_ac.ac_if;
311 	ifp->if_softc = sc;
312 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
313 	ifp->if_link_state = LINK_STATE_DOWN;
314 	ifp->if_ioctl = vnet_ioctl;
315 	ifp->if_start = vnet_start;
316 	ifp->if_watchdog = vnet_watchdog;
317 	strlcpy(ifp->if_xname, sc->sc_dv.dv_xname, IFNAMSIZ);
318 	IFQ_SET_MAXLEN(&ifp->if_snd, 31); /* XXX */
319 
320 	ifmedia_init(&sc->sc_media, 0, vnet_media_change, vnet_media_status);
321 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
322 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
323 
324 	if_attach(ifp);
325 	ether_ifattach(ifp);
326 
327 	printf("\n");
328 	return;
329 
330 free_txqueue:
331 	ldc_queue_free(sc->sc_dmatag, lc->lc_txq);
332 }
333 
334 int
335 vnet_tx_intr(void *arg)
336 {
337 	struct vnet_softc *sc = arg;
338 	struct ldc_conn *lc = &sc->sc_lc;
339 	uint64_t tx_head, tx_tail, tx_state;
340 
341 	hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
342 	if (tx_state != lc->lc_tx_state) {
343 		switch (tx_state) {
344 		case LDC_CHANNEL_DOWN:
345 			DPRINTF(("Tx link down\n"));
346 			break;
347 		case LDC_CHANNEL_UP:
348 			DPRINTF(("Tx link up\n"));
349 			break;
350 		case LDC_CHANNEL_RESET:
351 			DPRINTF(("Tx link reset\n"));
352 			break;
353 		}
354 		lc->lc_tx_state = tx_state;
355 	}
356 
357 	return (1);
358 }
359 
360 int
361 vnet_rx_intr(void *arg)
362 {
363 	struct vnet_softc *sc = arg;
364 	struct ldc_conn *lc = &sc->sc_lc;
365 	uint64_t rx_head, rx_tail, rx_state;
366 	struct ldc_pkt *lp;
367 	int err;
368 
369 	err = hv_ldc_rx_get_state(lc->lc_id, &rx_head, &rx_tail, &rx_state);
370 	if (err == H_EINVAL)
371 		return (0);
372 	if (err != H_EOK) {
373 		printf("hv_ldc_rx_get_state %d\n", err);
374 		return (0);
375 	}
376 
377 	if (rx_state != lc->lc_rx_state) {
378 		switch (rx_state) {
379 		case LDC_CHANNEL_DOWN:
380 			DPRINTF(("Rx link down\n"));
381 			lc->lc_tx_seqid = 0;
382 			lc->lc_state = 0;
383 			lc->lc_reset(lc);
384 			break;
385 		case LDC_CHANNEL_UP:
386 			DPRINTF(("Rx link up\n"));
387 			timeout_add_msec(&sc->sc_handshake_to, 500);
388 			break;
389 		case LDC_CHANNEL_RESET:
390 			DPRINTF(("Rx link reset\n"));
391 			lc->lc_tx_seqid = 0;
392 			lc->lc_state = 0;
393 			lc->lc_reset(lc);
394 			timeout_add_msec(&sc->sc_handshake_to, 500);
395 			break;
396 		}
397 		lc->lc_rx_state = rx_state;
398 		return (1);
399 	}
400 
401 	if (rx_head == rx_tail)
402 		return (0);
403 
404 	lp = (struct ldc_pkt *)(lc->lc_rxq->lq_va + rx_head);
405 	switch (lp->type) {
406 	case LDC_CTRL:
407 		ldc_rx_ctrl(lc, lp);
408 		break;
409 
410 	case LDC_DATA:
411 		ldc_rx_data(lc, lp);
412 		break;
413 
414 	default:
415 		DPRINTF(("%0x02/%0x02/%0x02\n", lp->type, lp->stype,
416 		    lp->ctrl));
417 		ldc_reset(lc);
418 		break;
419 	}
420 
421 	if (lc->lc_state == 0)
422 		return (1);
423 
424 	rx_head += sizeof(*lp);
425 	rx_head &= ((lc->lc_rxq->lq_nentries * sizeof(*lp)) - 1);
426 	err = hv_ldc_rx_set_qhead(lc->lc_id, rx_head);
427 	if (err != H_EOK)
428 		printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
429 
430 	return (1);
431 }
432 
433 void
434 vnet_handshake(void *arg)
435 {
436 	struct vnet_softc *sc = arg;
437 
438 	ldc_send_vers(&sc->sc_lc);
439 }
440 
441 void
442 vio_rx_data(struct ldc_conn *lc, struct ldc_pkt *lp)
443 {
444 	struct vio_msg *vm = (struct vio_msg *)lp;
445 
446 	switch (vm->type) {
447 	case VIO_TYPE_CTRL:
448 		if ((lp->env & LDC_FRAG_START) == 0 &&
449 		    (lp->env & LDC_FRAG_STOP) == 0)
450 			return;
451 		vnet_rx_vio_ctrl(lc->lc_sc, vm);
452 		break;
453 
454 	case VIO_TYPE_DATA:
455 		if((lp->env & LDC_FRAG_START) == 0)
456 			return;
457 		vnet_rx_vio_data(lc->lc_sc, vm);
458 		break;
459 
460 	default:
461 		DPRINTF(("Unhandled packet type 0x%02x\n", vm->type));
462 		ldc_reset(lc);
463 		break;
464 	}
465 }
466 
467 void
468 vnet_rx_vio_ctrl(struct vnet_softc *sc, struct vio_msg *vm)
469 {
470 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
471 
472 	switch (tag->stype_env) {
473 	case VIO_VER_INFO:
474 		vnet_rx_vio_ver_info(sc, tag);
475 		break;
476 	case VIO_ATTR_INFO:
477 		vnet_rx_vio_attr_info(sc, tag);
478 		break;
479 	case VIO_DRING_REG:
480 		vnet_rx_vio_dring_reg(sc, tag);
481 		break;
482 	case VIO_RDX:
483 		vnet_rx_vio_rdx(sc, tag);
484 		break;
485 	default:
486 		DPRINTF(("CTRL/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
487 		break;
488 	}
489 }
490 
491 void
492 vnet_rx_vio_ver_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
493 {
494 	struct vio_ver_info *vi = (struct vio_ver_info *)tag;
495 
496 	switch (vi->tag.stype) {
497 	case VIO_SUBTYPE_INFO:
498 		DPRINTF(("CTRL/INFO/VER_INFO\n"));
499 
500 		/* Make sure we're talking to a virtual network device. */
501 		if (vi->dev_class != VDEV_NETWORK &&
502 		    vi->dev_class != VDEV_NETWORK_SWITCH) {
503 			/* Huh, we're not talking to a network device? */
504 			printf("Not a network device\n");
505 			vi->tag.stype = VIO_SUBTYPE_NACK;
506 			vnet_sendmsg(sc, vi, sizeof(*vi));
507 			return;
508 		}
509 
510 		if (vi->major != VNET_MAJOR) {
511 			vi->tag.stype = VIO_SUBTYPE_NACK;
512 			vi->major = VNET_MAJOR;
513 			vi->minor = VNET_MINOR;
514 			vnet_sendmsg(sc, vi, sizeof(*vi));
515 			return;
516 		}
517 
518 		vi->tag.stype = VIO_SUBTYPE_ACK;
519 		vi->tag.sid = sc->sc_local_sid;
520 		vi->minor = VNET_MINOR;
521 		vnet_sendmsg(sc, vi, sizeof(*vi));
522 		sc->sc_vio_state |= VIO_RCV_VER_INFO;
523 		break;
524 
525 	case VIO_SUBTYPE_ACK:
526 		DPRINTF(("CTRL/ACK/VER_INFO\n"));
527 		if (!ISSET(sc->sc_vio_state, VIO_SND_VER_INFO)) {
528 			ldc_reset(&sc->sc_lc);
529 			break;
530 		}
531 		sc->sc_vio_state |= VIO_ACK_VER_INFO;
532 		break;
533 
534 	default:
535 		DPRINTF(("CTRL/0x%02x/VER_INFO\n", vi->tag.stype));
536 		break;
537 	}
538 
539 	if (ISSET(sc->sc_vio_state, VIO_RCV_VER_INFO) &&
540 	    ISSET(sc->sc_vio_state, VIO_ACK_VER_INFO))
541 		vnet_send_attr_info(sc);
542 }
543 
544 void
545 vnet_rx_vio_attr_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
546 {
547 	struct vnet_attr_info *ai = (struct vnet_attr_info *)tag;
548 
549 	switch (ai->tag.stype) {
550 	case VIO_SUBTYPE_INFO:
551 		DPRINTF(("CTRL/INFO/ATTR_INFO\n"));
552 		sc->sc_xfer_mode = ai->xfer_mode;
553 
554 		ai->tag.stype = VIO_SUBTYPE_ACK;
555 		ai->tag.sid = sc->sc_local_sid;
556 		vnet_sendmsg(sc, ai, sizeof(*ai));
557 		sc->sc_vio_state |= VIO_RCV_ATTR_INFO;
558 		break;
559 
560 	case VIO_SUBTYPE_ACK:
561 		DPRINTF(("CTRL/ACK/ATTR_INFO\n"));
562 		if (!ISSET(sc->sc_vio_state, VIO_SND_ATTR_INFO)) {
563 			ldc_reset(&sc->sc_lc);
564 			break;
565 		}
566 		sc->sc_vio_state |= VIO_ACK_ATTR_INFO;
567 		break;
568 
569 	default:
570 		DPRINTF(("CTRL/0x%02x/ATTR_INFO\n", ai->tag.stype));
571 		break;
572 	}
573 
574 	if (ISSET(sc->sc_vio_state, VIO_RCV_ATTR_INFO) &&
575 	    ISSET(sc->sc_vio_state, VIO_ACK_ATTR_INFO)) {
576 		if (sc->sc_xfer_mode == VIO_DRING_MODE)
577 			vnet_send_dring_reg(sc);
578 		else
579 			vio_send_rdx(sc);
580 	}
581 }
582 
583 void
584 vnet_rx_vio_dring_reg(struct vnet_softc *sc, struct vio_msg_tag *tag)
585 {
586 	struct vio_dring_reg *dr = (struct vio_dring_reg *)tag;
587 
588 	switch (dr->tag.stype) {
589 	case VIO_SUBTYPE_INFO:
590 		DPRINTF(("CTRL/INFO/DRING_REG\n"));
591 
592 		sc->sc_peer_dring_nentries = dr->num_descriptors;
593 		sc->sc_peer_desc_size = dr->descriptor_size;
594 		sc->sc_peer_dring_cookie = dr->cookie[0];
595 
596 		dr->tag.stype = VIO_SUBTYPE_ACK;
597 		dr->tag.sid = sc->sc_local_sid;
598 		vnet_sendmsg(sc, dr, sizeof(*dr));
599 		sc->sc_vio_state |= VIO_RCV_DRING_REG;
600 		break;
601 
602 	case VIO_SUBTYPE_ACK:
603 		DPRINTF(("CTRL/ACK/DRING_REG\n"));
604 		if (!ISSET(sc->sc_vio_state, VIO_SND_DRING_REG)) {
605 			ldc_reset(&sc->sc_lc);
606 			break;
607 		}
608 
609 		sc->sc_dring_ident = dr->dring_ident;
610 		sc->sc_seq_no = 1;
611 
612 		sc->sc_vio_state |= VIO_ACK_DRING_REG;
613 		break;
614 
615 	default:
616 		DPRINTF(("CTRL/0x%02x/DRING_REG\n", dr->tag.stype));
617 		break;
618 	}
619 
620 	if (ISSET(sc->sc_vio_state, VIO_RCV_DRING_REG) &&
621 	    ISSET(sc->sc_vio_state, VIO_ACK_DRING_REG))
622 		vio_send_rdx(sc);
623 }
624 
625 void
626 vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *tag)
627 {
628 	struct ifnet *ifp = &sc->sc_ac.ac_if;
629 
630 	switch(tag->stype) {
631 	case VIO_SUBTYPE_INFO:
632 		DPRINTF(("CTRL/INFO/RDX\n"));
633 
634 		tag->stype = VIO_SUBTYPE_ACK;
635 		tag->sid = sc->sc_local_sid;
636 		vnet_sendmsg(sc, tag, sizeof(*tag));
637 		sc->sc_vio_state |= VIO_RCV_RDX;
638 		break;
639 
640 	case VIO_SUBTYPE_ACK:
641 		DPRINTF(("CTRL/ACK/RDX\n"));
642 		if (!ISSET(sc->sc_vio_state, VIO_SND_RDX)) {
643 			ldc_reset(&sc->sc_lc);
644 			break;
645 		}
646 		sc->sc_vio_state |= VIO_ACK_RDX;
647 		break;
648 
649 	default:
650 		DPRINTF(("CTRL/0x%02x/RDX (VIO)\n", tag->stype));
651 		break;
652 	}
653 
654 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
655 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
656 		/* Link is up! */
657 		vnet_link_state(sc);
658 
659 		/* Configure multicast now that we can. */
660 		vnet_setmulti(sc, 1);
661 
662 		KERNEL_LOCK();
663 		ifq_clr_oactive(&ifp->if_snd);
664 		vnet_start(ifp);
665 		KERNEL_UNLOCK();
666 	}
667 }
668 
669 void
670 vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *vm)
671 {
672 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
673 
674 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
675 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
676 		DPRINTF(("Spurious DATA/0x%02x/0x%04x\n", tag->stype,
677 		    tag->stype_env));
678 		return;
679 	}
680 
681 	switch(tag->stype_env) {
682 	case VIO_DESC_DATA:
683 		vnet_rx_vio_desc_data(sc, tag);
684 		break;
685 
686 	case VIO_DRING_DATA:
687 		vnet_rx_vio_dring_data(sc, tag);
688 		break;
689 
690 	default:
691 		DPRINTF(("DATA/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
692 		break;
693 	}
694 }
695 
696 void
697 vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
698 {
699 	struct vnet_desc_msg *dm = (struct vnet_desc_msg *)tag;
700 	struct ldc_conn *lc = &sc->sc_lc;
701 	struct ldc_map *map = sc->sc_lm;
702 	struct ifnet *ifp = &sc->sc_ac.ac_if;
703 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
704 	struct mbuf *m;
705 	caddr_t buf;
706 	paddr_t pa;
707 	psize_t nbytes;
708 	u_int cons;
709 	int err;
710 
711 	switch(tag->stype) {
712 	case VIO_SUBTYPE_INFO:
713 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
714 		if (buf == NULL) {
715 			ifp->if_ierrors++;
716 			goto skip;
717 		}
718 		nbytes = roundup(dm->nbytes, 8);
719 
720 		if (dm->nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
721 			ifp->if_ierrors++;
722 			goto skip;
723 		}
724 
725 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
726 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
727 		    dm->cookie[0].addr, pa, nbytes, &nbytes);
728 		if (err != H_EOK) {
729 			pool_put(&sc->sc_pool, buf);
730 			ifp->if_ierrors++;
731 			goto skip;
732 		}
733 
734 		/* Stupid OBP doesn't align properly. */
735                 m = m_devget(buf, dm->nbytes, ETHER_ALIGN);
736 		pool_put(&sc->sc_pool, buf);
737 		if (m == NULL) {
738 			ifp->if_ierrors++;
739 			goto skip;
740 		}
741 
742 		/* Pass it on. */
743 		ml_enqueue(&ml, m);
744 		if_input(ifp, &ml);
745 
746 	skip:
747 		dm->tag.stype = VIO_SUBTYPE_ACK;
748 		dm->tag.sid = sc->sc_local_sid;
749 		vnet_sendmsg(sc, dm, sizeof(*dm));
750 		break;
751 
752 	case VIO_SUBTYPE_ACK:
753 		DPRINTF(("DATA/ACK/DESC_DATA\n"));
754 
755 		if (dm->desc_handle != sc->sc_tx_cons) {
756 			printf("out of order\n");
757 			return;
758 		}
759 
760 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
761 
762 		map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
763 		atomic_dec_int(&map->lm_count);
764 
765 		pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
766 		sc->sc_vsd[cons].vsd_buf = NULL;
767 		ifp->if_opackets++;
768 
769 		sc->sc_tx_cons++;
770 		break;
771 
772 	case VIO_SUBTYPE_NACK:
773 		DPRINTF(("DATA/NACK/DESC_DATA\n"));
774 		break;
775 
776 	default:
777 		DPRINTF(("DATA/0x%02x/DESC_DATA\n", tag->stype));
778 		break;
779 	}
780 }
781 
782 void
783 vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
784 {
785 	struct vio_dring_msg *dm = (struct vio_dring_msg *)tag;
786 	struct ldc_conn *lc = &sc->sc_lc;
787 	struct ifnet *ifp = &sc->sc_ac.ac_if;
788 	struct mbuf *m;
789 	paddr_t pa;
790 	psize_t nbytes;
791 	int err;
792 
793 	switch(tag->stype) {
794 	case VIO_SUBTYPE_INFO:
795 	{
796 		struct vnet_desc desc;
797 		uint64_t cookie;
798 		paddr_t desc_pa;
799 		int idx, ack_end_idx = -1;
800 		struct mbuf_list ml = MBUF_LIST_INITIALIZER();
801 
802 		idx = dm->start_idx;
803 		for (;;) {
804 			cookie = sc->sc_peer_dring_cookie.addr;
805 			cookie += idx * sc->sc_peer_desc_size;
806 			nbytes = sc->sc_peer_desc_size;
807 			pmap_extract(pmap_kernel(), (vaddr_t)&desc, &desc_pa);
808 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN, cookie,
809 			    desc_pa, nbytes, &nbytes);
810 			if (err != H_EOK) {
811 				printf("hv_ldc_copy_in %d\n", err);
812 				break;
813 			}
814 
815 			if (desc.hdr.dstate != VIO_DESC_READY)
816 				break;
817 
818 			if (desc.nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
819 				ifp->if_ierrors++;
820 				goto skip;
821 			}
822 
823 			m = MCLGETI(NULL, M_DONTWAIT, NULL, desc.nbytes);
824 			if (!m)
825 				break;
826 			m->m_len = m->m_pkthdr.len = desc.nbytes;
827 			nbytes = roundup(desc.nbytes + VNET_ETHER_ALIGN, 8);
828 
829 			pmap_extract(pmap_kernel(), (vaddr_t)m->m_data, &pa);
830 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
831 			    desc.cookie[0].addr, pa, nbytes, &nbytes);
832 			if (err != H_EOK) {
833 				m_freem(m);
834 				goto skip;
835 			}
836 			m->m_data += VNET_ETHER_ALIGN;
837 
838 			ml_enqueue(&ml, m);
839 
840 		skip:
841 			desc.hdr.dstate = VIO_DESC_DONE;
842 			nbytes = sc->sc_peer_desc_size;
843 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT, cookie,
844 			    desc_pa, nbytes, &nbytes);
845 			if (err != H_EOK)
846 				printf("hv_ldc_copy_out %d\n", err);
847 
848 			ack_end_idx = idx;
849 			if (++idx == sc->sc_peer_dring_nentries)
850 				idx = 0;
851 		}
852 
853 		if_input(ifp, &ml);
854 
855 		if (ack_end_idx == -1) {
856 			dm->tag.stype = VIO_SUBTYPE_NACK;
857 		} else {
858 			dm->tag.stype = VIO_SUBTYPE_ACK;
859 			dm->end_idx = ack_end_idx;
860 		}
861 		dm->tag.sid = sc->sc_local_sid;
862 		dm->proc_state = VIO_DP_STOPPED;
863 		vnet_sendmsg(sc, dm, sizeof(*dm));
864 		break;
865 	}
866 
867 	case VIO_SUBTYPE_ACK:
868 	{
869 		struct ldc_map *map = sc->sc_lm;
870 		u_int cons, count;
871 
872 		sc->sc_peer_state = dm->proc_state;
873 
874 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
875 		while (sc->sc_vd->vd_desc[cons].hdr.dstate == VIO_DESC_DONE) {
876 			map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
877 			atomic_dec_int(&map->lm_count);
878 
879 			pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
880 			sc->sc_vsd[cons].vsd_buf = NULL;
881 			ifp->if_opackets++;
882 
883 			sc->sc_vd->vd_desc[cons].hdr.dstate = VIO_DESC_FREE;
884 			sc->sc_tx_cons++;
885 			cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
886 		}
887 
888 		count = sc->sc_tx_prod - sc->sc_tx_cons;
889 		if (count > 0 && sc->sc_peer_state != VIO_DP_ACTIVE)
890 			vnet_send_dring_data(sc, cons);
891 
892 		KERNEL_LOCK();
893 		if (count < (sc->sc_vd->vd_nentries - 1))
894 			ifq_clr_oactive(&ifp->if_snd);
895 		if (count == 0)
896 			ifp->if_timer = 0;
897 
898 		vnet_start(ifp);
899 		KERNEL_UNLOCK();
900 		break;
901 	}
902 
903 	case VIO_SUBTYPE_NACK:
904 		DPRINTF(("DATA/NACK/DRING_DATA\n"));
905 		sc->sc_peer_state = VIO_DP_STOPPED;
906 		break;
907 
908 	default:
909 		DPRINTF(("DATA/0x%02x/DRING_DATA\n", tag->stype));
910 		break;
911 	}
912 }
913 
914 void
915 vnet_ldc_reset(struct ldc_conn *lc)
916 {
917 	struct vnet_softc *sc = lc->lc_sc;
918 	int i;
919 
920 	timeout_del(&sc->sc_handshake_to);
921 	sc->sc_tx_prod = sc->sc_tx_cons = 0;
922 	sc->sc_peer_state = VIO_DP_STOPPED;
923 	sc->sc_vio_state = 0;
924 	vnet_link_state(sc);
925 
926 	sc->sc_lm->lm_next = 1;
927 	sc->sc_lm->lm_count = 1;
928 	for (i = 1; i < sc->sc_lm->lm_nentries; i++)
929 		sc->sc_lm->lm_slot[i].entry = 0;
930 
931 	for (i = 0; i < sc->sc_vd->vd_nentries; i++) {
932 		if (sc->sc_vsd[i].vsd_buf) {
933 			pool_put(&sc->sc_pool, sc->sc_vsd[i].vsd_buf);
934 			sc->sc_vsd[i].vsd_buf = NULL;
935 		}
936 		sc->sc_vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
937 	}
938 }
939 
940 void
941 vnet_ldc_start(struct ldc_conn *lc)
942 {
943 	struct vnet_softc *sc = lc->lc_sc;
944 
945 	timeout_del(&sc->sc_handshake_to);
946 	vnet_send_ver_info(sc, VNET_MAJOR, VNET_MINOR);
947 }
948 
949 void
950 vnet_sendmsg(struct vnet_softc *sc, void *msg, size_t len)
951 {
952 	struct ldc_conn *lc = &sc->sc_lc;
953 	int err;
954 
955 	err = ldc_send_unreliable(lc, msg, len);
956 	if (err)
957 		printf("%s: ldc_send_unreliable: %d\n", __func__, err);
958 }
959 
960 void
961 vnet_send_ver_info(struct vnet_softc *sc, uint16_t major, uint16_t minor)
962 {
963 	struct vio_ver_info vi;
964 
965 	bzero(&vi, sizeof(vi));
966 	vi.tag.type = VIO_TYPE_CTRL;
967 	vi.tag.stype = VIO_SUBTYPE_INFO;
968 	vi.tag.stype_env = VIO_VER_INFO;
969 	vi.tag.sid = sc->sc_local_sid;
970 	vi.major = major;
971 	vi.minor = minor;
972 	vi.dev_class = VDEV_NETWORK;
973 	vnet_sendmsg(sc, &vi, sizeof(vi));
974 
975 	sc->sc_vio_state |= VIO_SND_VER_INFO;
976 }
977 
978 void
979 vnet_send_attr_info(struct vnet_softc *sc)
980 {
981 	struct vnet_attr_info ai;
982 	int i;
983 
984 	bzero(&ai, sizeof(ai));
985 	ai.tag.type = VIO_TYPE_CTRL;
986 	ai.tag.stype = VIO_SUBTYPE_INFO;
987 	ai.tag.stype_env = VIO_ATTR_INFO;
988 	ai.tag.sid = sc->sc_local_sid;
989 	ai.xfer_mode = VIO_DRING_MODE;
990 	ai.addr_type = VNET_ADDR_ETHERMAC;
991 	ai.ack_freq = 0;
992 	ai.addr = 0;
993 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
994 		ai.addr <<= 8;
995 		ai.addr |= sc->sc_ac.ac_enaddr[i];
996 	}
997 	ai.mtu = ETHER_MAX_LEN - ETHER_CRC_LEN;
998 	vnet_sendmsg(sc, &ai, sizeof(ai));
999 
1000 	sc->sc_vio_state |= VIO_SND_ATTR_INFO;
1001 }
1002 
1003 void
1004 vnet_send_dring_reg(struct vnet_softc *sc)
1005 {
1006 	struct vio_dring_reg dr;
1007 
1008 	bzero(&dr, sizeof(dr));
1009 	dr.tag.type = VIO_TYPE_CTRL;
1010 	dr.tag.stype = VIO_SUBTYPE_INFO;
1011 	dr.tag.stype_env = VIO_DRING_REG;
1012 	dr.tag.sid = sc->sc_local_sid;
1013 	dr.dring_ident = 0;
1014 	dr.num_descriptors = sc->sc_vd->vd_nentries;
1015 	dr.descriptor_size = sizeof(struct vnet_desc);
1016 	dr.options = VIO_TX_RING;
1017 	dr.ncookies = 1;
1018 	dr.cookie[0].addr = 0;
1019 	dr.cookie[0].size = PAGE_SIZE;
1020 	vnet_sendmsg(sc, &dr, sizeof(dr));
1021 
1022 	sc->sc_vio_state |= VIO_SND_DRING_REG;
1023 };
1024 
1025 void
1026 vio_send_rdx(struct vnet_softc *sc)
1027 {
1028 	struct vio_msg_tag tag;
1029 
1030 	tag.type = VIO_TYPE_CTRL;
1031 	tag.stype = VIO_SUBTYPE_INFO;
1032 	tag.stype_env = VIO_RDX;
1033 	tag.sid = sc->sc_local_sid;
1034 	vnet_sendmsg(sc, &tag, sizeof(tag));
1035 
1036 	sc->sc_vio_state |= VIO_SND_RDX;
1037 }
1038 
1039 void
1040 vnet_send_dring_data(struct vnet_softc *sc, uint32_t start_idx)
1041 {
1042 	struct vio_dring_msg dm;
1043 	u_int peer_state;
1044 
1045 	peer_state = atomic_swap_uint(&sc->sc_peer_state, VIO_DP_ACTIVE);
1046 	if (peer_state == VIO_DP_ACTIVE)
1047 		return;
1048 
1049 	bzero(&dm, sizeof(dm));
1050 	dm.tag.type = VIO_TYPE_DATA;
1051 	dm.tag.stype = VIO_SUBTYPE_INFO;
1052 	dm.tag.stype_env = VIO_DRING_DATA;
1053 	dm.tag.sid = sc->sc_local_sid;
1054 	dm.seq_no = sc->sc_seq_no++;
1055 	dm.dring_ident = sc->sc_dring_ident;
1056 	dm.start_idx = start_idx;
1057 	dm.end_idx = -1;
1058 	vnet_sendmsg(sc, &dm, sizeof(dm));
1059 }
1060 
1061 void
1062 vnet_start(struct ifnet *ifp)
1063 {
1064 	struct vnet_softc *sc = ifp->if_softc;
1065 	struct ldc_conn *lc = &sc->sc_lc;
1066 	struct ldc_map *map = sc->sc_lm;
1067 	struct mbuf *m;
1068 	paddr_t pa;
1069 	caddr_t buf;
1070 	uint64_t tx_head, tx_tail, tx_state;
1071 	u_int start, prod, count;
1072 	int err;
1073 
1074 	if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
1075 		return;
1076 
1077 	if (IFQ_IS_EMPTY(&ifp->if_snd))
1078 		return;
1079 
1080 	/*
1081 	 * We cannot transmit packets until a VIO connection has been
1082 	 * established.
1083 	 */
1084 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1085 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1086 		return;
1087 
1088 	/*
1089 	 * Make sure there is room in the LDC transmit queue to send a
1090 	 * DRING_DATA message.
1091 	 */
1092 	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
1093 	if (err != H_EOK)
1094 		return;
1095 	tx_tail += sizeof(struct ldc_pkt);
1096 	tx_tail &= ((lc->lc_txq->lq_nentries * sizeof(struct ldc_pkt)) - 1);
1097 	if (tx_tail == tx_head) {
1098 		ifq_set_oactive(&ifp->if_snd);
1099 		return;
1100 	}
1101 
1102 	if (sc->sc_xfer_mode == VIO_DESC_MODE) {
1103 		vnet_start_desc(ifp);
1104 		return;
1105 	}
1106 
1107 	start = prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1108 	while (sc->sc_vd->vd_desc[prod].hdr.dstate == VIO_DESC_FREE) {
1109 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1110 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1111 		    map->lm_count >= map->lm_nentries) {
1112 			ifq_set_oactive(&ifp->if_snd);
1113 			break;
1114 		}
1115 
1116 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1117 		if (buf == NULL) {
1118 			ifq_set_oactive(&ifp->if_snd);
1119 			break;
1120 		}
1121 
1122 		IFQ_DEQUEUE(&ifp->if_snd, m);
1123 		if (m == NULL) {
1124 			pool_put(&sc->sc_pool, buf);
1125 			break;
1126 		}
1127 
1128 		m_copydata(m, 0, m->m_pkthdr.len, buf + VNET_ETHER_ALIGN);
1129 
1130 #if NBPFILTER > 0
1131 		/*
1132 		 * If BPF is listening on this interface, let it see the
1133 		 * packet before we commit it to the wire.
1134 		 */
1135 		if (ifp->if_bpf)
1136 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1137 #endif
1138 
1139 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1140 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1141 		while (map->lm_slot[map->lm_next].entry != 0) {
1142 			map->lm_next++;
1143 			map->lm_next &= (map->lm_nentries - 1);
1144 		}
1145 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1146 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1147 		atomic_inc_int(&map->lm_count);
1148 
1149 		sc->sc_vd->vd_desc[prod].nbytes = max(m->m_pkthdr.len, 60);
1150 		sc->sc_vd->vd_desc[prod].ncookies = 1;
1151 		sc->sc_vd->vd_desc[prod].cookie[0].addr =
1152 		    map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1153 		sc->sc_vd->vd_desc[prod].cookie[0].size = 2048;
1154 		membar_producer();
1155 		sc->sc_vd->vd_desc[prod].hdr.dstate = VIO_DESC_READY;
1156 
1157 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1158 		sc->sc_vsd[prod].vsd_buf = buf;
1159 
1160 		sc->sc_tx_prod++;
1161 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1162 
1163 		m_freem(m);
1164 	}
1165 
1166 	membar_producer();
1167 
1168 	if (start != prod && sc->sc_peer_state != VIO_DP_ACTIVE) {
1169 		vnet_send_dring_data(sc, start);
1170 		ifp->if_timer = 5;
1171 	}
1172 }
1173 
1174 void
1175 vnet_start_desc(struct ifnet *ifp)
1176 {
1177 	struct vnet_softc *sc = ifp->if_softc;
1178 	struct ldc_map *map = sc->sc_lm;
1179 	struct vnet_desc_msg dm;
1180 	struct mbuf *m;
1181 	paddr_t pa;
1182 	caddr_t buf;
1183 	u_int prod, count;
1184 
1185 	for (;;) {
1186 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1187 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1188 		    map->lm_count >= map->lm_nentries) {
1189 			ifq_set_oactive(&ifp->if_snd);
1190 			return;
1191 		}
1192 
1193 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1194 		if (buf == NULL) {
1195 			ifq_set_oactive(&ifp->if_snd);
1196 			return;
1197 		}
1198 
1199 		IFQ_DEQUEUE(&ifp->if_snd, m);
1200 		if (m == NULL) {
1201 			pool_put(&sc->sc_pool, buf);
1202 			return;
1203 		}
1204 
1205 		m_copydata(m, 0, m->m_pkthdr.len, buf);
1206 
1207 #if NBPFILTER > 0
1208 		/*
1209 		 * If BPF is listening on this interface, let it see the
1210 		 * packet before we commit it to the wire.
1211 		 */
1212 		if (ifp->if_bpf)
1213 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1214 #endif
1215 
1216 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1217 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1218 		while (map->lm_slot[map->lm_next].entry != 0) {
1219 			map->lm_next++;
1220 			map->lm_next &= (map->lm_nentries - 1);
1221 		}
1222 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1223 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1224 		atomic_inc_int(&map->lm_count);
1225 
1226 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1227 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1228 		sc->sc_vsd[prod].vsd_buf = buf;
1229 
1230 		bzero(&dm, sizeof(dm));
1231 		dm.tag.type = VIO_TYPE_DATA;
1232 		dm.tag.stype = VIO_SUBTYPE_INFO;
1233 		dm.tag.stype_env = VIO_DESC_DATA;
1234 		dm.tag.sid = sc->sc_local_sid;
1235 		dm.seq_no = sc->sc_seq_no++;
1236 		dm.desc_handle = sc->sc_tx_prod;
1237 		dm.nbytes = max(m->m_pkthdr.len, 60);
1238 		dm.ncookies = 1;
1239 		dm.cookie[0].addr =
1240 			map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1241 		dm.cookie[0].size = 2048;
1242 		vnet_sendmsg(sc, &dm, sizeof(dm));
1243 
1244 		sc->sc_tx_prod++;
1245 		sc->sc_tx_prod &= (sc->sc_vd->vd_nentries - 1);
1246 
1247 		m_freem(m);
1248 	}
1249 }
1250 
1251 int
1252 vnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1253 {
1254 	struct vnet_softc *sc = ifp->if_softc;
1255 	struct ifreq *ifr = (struct ifreq *)data;
1256 	int s, error = 0;
1257 
1258 	s = splnet();
1259 
1260 	switch (cmd) {
1261 	case SIOCSIFADDR:
1262 		ifp->if_flags |= IFF_UP;
1263 		/* FALLTHROUGH */
1264 	case SIOCSIFFLAGS:
1265 		if (ifp->if_flags & IFF_UP) {
1266 			if ((ifp->if_flags & IFF_RUNNING) == 0)
1267 				vnet_init(ifp);
1268 		} else {
1269 			if (ifp->if_flags & IFF_RUNNING)
1270 				vnet_stop(ifp);
1271 		}
1272 		break;
1273 
1274 	case SIOCGIFMEDIA:
1275 	case SIOCSIFMEDIA:
1276 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1277 		break;
1278 
1279 	case SIOCADDMULTI:
1280 	case SIOCDELMULTI:
1281 		/*
1282 		 * XXX Removing all multicast addresses and adding
1283 		 * most of them back, is somewhat retarded.
1284 		 */
1285 		vnet_setmulti(sc, 0);
1286 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
1287 		vnet_setmulti(sc, 1);
1288 		if (error == ENETRESET)
1289 			error = 0;
1290 		break;
1291 
1292 	default:
1293 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
1294 	}
1295 
1296 	splx(s);
1297 	return (error);
1298 }
1299 
1300 void
1301 vnet_watchdog(struct ifnet *ifp)
1302 {
1303 	struct vnet_softc *sc = ifp->if_softc;
1304 
1305 	printf("%s: watchdog timeout\n", sc->sc_dv.dv_xname);
1306 }
1307 
1308 int
1309 vnet_media_change(struct ifnet *ifp)
1310 {
1311 	return (0);
1312 }
1313 
1314 void
1315 vnet_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1316 {
1317 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1318 	imr->ifm_status = IFM_AVALID;
1319 
1320 	if (LINK_STATE_IS_UP(ifp->if_link_state) &&
1321 	    ifp->if_flags & IFF_UP)
1322 		imr->ifm_status |= IFM_ACTIVE;
1323 }
1324 
1325 void
1326 vnet_link_state(struct vnet_softc *sc)
1327 {
1328 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1329 	int link_state = LINK_STATE_DOWN;
1330 
1331 	KERNEL_LOCK();
1332 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
1333 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1334 		link_state = LINK_STATE_FULL_DUPLEX;
1335 	if (ifp->if_link_state != link_state) {
1336 		ifp->if_link_state = link_state;
1337 		if_link_state_change(ifp);
1338 	}
1339 	KERNEL_UNLOCK();
1340 }
1341 
1342 void
1343 vnet_setmulti(struct vnet_softc *sc, int set)
1344 {
1345 	struct arpcom *ac = &sc->sc_ac;
1346 	struct ether_multi *enm;
1347 	struct ether_multistep step;
1348 	struct vnet_mcast_info mi;
1349 	int count = 0;
1350 
1351 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1352 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1353 		return;
1354 
1355 	bzero(&mi, sizeof(mi));
1356 	mi.tag.type = VIO_TYPE_CTRL;
1357 	mi.tag.stype = VIO_SUBTYPE_INFO;
1358 	mi.tag.stype_env = VNET_MCAST_INFO;
1359 	mi.tag.sid = sc->sc_local_sid;
1360 	mi.set = set ? 1 : 0;
1361 	KERNEL_LOCK();
1362 	ETHER_FIRST_MULTI(step, ac, enm);
1363 	while (enm != NULL) {
1364 		/* XXX What about multicast ranges? */
1365 		bcopy(enm->enm_addrlo, mi.mcast_addr[count], ETHER_ADDR_LEN);
1366 		ETHER_NEXT_MULTI(step, enm);
1367 
1368 		count++;
1369 		if (count < VNET_NUM_MCAST)
1370 			continue;
1371 
1372 		mi.count = VNET_NUM_MCAST;
1373 		vnet_sendmsg(sc, &mi, sizeof(mi));
1374 		count = 0;
1375 	}
1376 
1377 	if (count > 0) {
1378 		mi.count = count;
1379 		vnet_sendmsg(sc, &mi, sizeof(mi));
1380 	}
1381 	KERNEL_UNLOCK();
1382 }
1383 
1384 void
1385 vnet_init(struct ifnet *ifp)
1386 {
1387 	struct vnet_softc *sc = ifp->if_softc;
1388 	struct ldc_conn *lc = &sc->sc_lc;
1389 	int err;
1390 
1391 	sc->sc_lm = ldc_map_alloc(sc->sc_dmatag, 2048);
1392 	if (sc->sc_lm == NULL)
1393 		return;
1394 
1395 	err = hv_ldc_set_map_table(lc->lc_id,
1396 	    sc->sc_lm->lm_map->dm_segs[0].ds_addr, sc->sc_lm->lm_nentries);
1397 	if (err != H_EOK) {
1398 		printf("hv_ldc_set_map_table %d\n", err);
1399 		return;
1400 	}
1401 
1402 	sc->sc_vd = vnet_dring_alloc(sc->sc_dmatag, VNET_NUM_SOFT_DESC);
1403 	if (sc->sc_vd == NULL)
1404 		return;
1405 	sc->sc_vsd = malloc(VNET_NUM_SOFT_DESC * sizeof(*sc->sc_vsd), M_DEVBUF,
1406 	    M_NOWAIT|M_ZERO);
1407 	if (sc->sc_vsd == NULL)
1408 		return;
1409 
1410 	sc->sc_lm->lm_slot[0].entry = sc->sc_vd->vd_map->dm_segs[0].ds_addr;
1411 	sc->sc_lm->lm_slot[0].entry &= LDC_MTE_RA_MASK;
1412 	sc->sc_lm->lm_slot[0].entry |= LDC_MTE_CPR | LDC_MTE_CPW;
1413 	sc->sc_lm->lm_next = 1;
1414 	sc->sc_lm->lm_count = 1;
1415 
1416 	err = hv_ldc_tx_qconf(lc->lc_id,
1417 	    lc->lc_txq->lq_map->dm_segs[0].ds_addr, lc->lc_txq->lq_nentries);
1418 	if (err != H_EOK)
1419 		printf("hv_ldc_tx_qconf %d\n", err);
1420 
1421 	err = hv_ldc_rx_qconf(lc->lc_id,
1422 	    lc->lc_rxq->lq_map->dm_segs[0].ds_addr, lc->lc_rxq->lq_nentries);
1423 	if (err != H_EOK)
1424 		printf("hv_ldc_rx_qconf %d\n", err);
1425 
1426 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_ENABLED);
1427 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_ENABLED);
1428 
1429 	ldc_send_vers(lc);
1430 
1431 	ifp->if_flags |= IFF_RUNNING;
1432 }
1433 
1434 void
1435 vnet_stop(struct ifnet *ifp)
1436 {
1437 	struct vnet_softc *sc = ifp->if_softc;
1438 	struct ldc_conn *lc = &sc->sc_lc;
1439 
1440 	ifp->if_flags &= ~IFF_RUNNING;
1441 	ifq_clr_oactive(&ifp->if_snd);
1442 	ifp->if_timer = 0;
1443 
1444 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_DISABLED);
1445 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_DISABLED);
1446 
1447 	intr_barrier(sc->sc_tx_ih);
1448 	intr_barrier(sc->sc_rx_ih);
1449 
1450 	hv_ldc_tx_qconf(lc->lc_id, 0, 0);
1451 	hv_ldc_rx_qconf(lc->lc_id, 0, 0);
1452 	lc->lc_tx_seqid = 0;
1453 	lc->lc_state = 0;
1454 	lc->lc_tx_state = lc->lc_rx_state = LDC_CHANNEL_DOWN;
1455 	vnet_ldc_reset(lc);
1456 
1457 	free(sc->sc_vsd, M_DEVBUF, VNET_NUM_SOFT_DESC * sizeof(*sc->sc_vsd));
1458 
1459 	vnet_dring_free(sc->sc_dmatag, sc->sc_vd);
1460 
1461 	hv_ldc_set_map_table(lc->lc_id, 0, 0);
1462 	ldc_map_free(sc->sc_dmatag, sc->sc_lm);
1463 }
1464 
1465 struct vnet_dring *
1466 vnet_dring_alloc(bus_dma_tag_t t, int nentries)
1467 {
1468 	struct vnet_dring *vd;
1469 	bus_size_t size;
1470 	caddr_t va;
1471 	int nsegs;
1472 	int i;
1473 
1474 	vd = malloc(sizeof(struct vnet_dring), M_DEVBUF, M_NOWAIT);
1475 	if (vd == NULL)
1476 		return NULL;
1477 
1478 	size = roundup(nentries * sizeof(struct vnet_desc), PAGE_SIZE);
1479 
1480 	if (bus_dmamap_create(t, size, 1, size, 0,
1481 	    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &vd->vd_map) != 0)
1482 		return (NULL);
1483 
1484 	if (bus_dmamem_alloc(t, size, PAGE_SIZE, 0, &vd->vd_seg, 1,
1485 	    &nsegs, BUS_DMA_NOWAIT) != 0)
1486 		goto destroy;
1487 
1488 	if (bus_dmamem_map(t, &vd->vd_seg, 1, size, &va,
1489 	    BUS_DMA_NOWAIT) != 0)
1490 		goto free;
1491 
1492 	if (bus_dmamap_load(t, vd->vd_map, va, size, NULL,
1493 	    BUS_DMA_NOWAIT) != 0)
1494 		goto unmap;
1495 
1496 	vd->vd_desc = (struct vnet_desc *)va;
1497 	vd->vd_nentries = nentries;
1498 	bzero(vd->vd_desc, nentries * sizeof(struct vnet_desc));
1499 	for (i = 0; i < vd->vd_nentries; i++)
1500 		vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
1501 	return (vd);
1502 
1503 unmap:
1504 	bus_dmamem_unmap(t, va, size);
1505 free:
1506 	bus_dmamem_free(t, &vd->vd_seg, 1);
1507 destroy:
1508 	bus_dmamap_destroy(t, vd->vd_map);
1509 
1510 	return (NULL);
1511 }
1512 
1513 void
1514 vnet_dring_free(bus_dma_tag_t t, struct vnet_dring *vd)
1515 {
1516 	bus_size_t size;
1517 
1518 	size = vd->vd_nentries * sizeof(struct vnet_desc);
1519 	size = roundup(size, PAGE_SIZE);
1520 
1521 	bus_dmamap_unload(t, vd->vd_map);
1522 	bus_dmamem_unmap(t, (caddr_t)vd->vd_desc, size);
1523 	bus_dmamem_free(t, &vd->vd_seg, 1);
1524 	bus_dmamap_destroy(t, vd->vd_map);
1525 	free(vd, M_DEVBUF, 0);
1526 }
1527