xref: /netbsd-src/sys/arch/xen/xen/if_xennet_xenbus.c (revision c2f76ff004a2cb67efe5b12d97bd3ef7fe89e18d)
1 /*      $NetBSD: if_xennet_xenbus.c,v 1.46 2011/01/11 23:22:19 jym Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  * Copyright (c) 2004 Christian Limpach.
30  * All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
42  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
44  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
45  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
50  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51  */
52 
53 /*
54  * This file contains the xennet frontend code required for the network
55  * communication between two Xen domains.
56  * It ressembles xbd, but is a little more complex as it must deal with two
57  * rings:
58  * - the TX ring, to transmit packets to backend (inside => outside)
59  * - the RX ring, to receive packets from backend (outside => inside)
60  *
61  * Principles are following.
62  *
63  * For TX:
64  * Purpose is to transmit packets to the outside. The start of day is in
65  * xennet_start() (default output routine of xennet) that schedules a softint,
66  * xennet_softstart(). xennet_softstart() generates the requests associated
67  * to the TX mbufs queued (see altq(9)).
68  * The backend's responses are processed by xennet_tx_complete(), called either
69  * from:
70  * - xennet_start()
71  * - xennet_handler(), during an asynchronous event notification from backend
72  *   (similar to an IRQ).
73  *
74  * for RX:
75  * Purpose is to process the packets received from the outside. RX buffers
76  * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
77  * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
78  * preparation for incoming packets from backend.
79  * When RX packets need to be processed, backend takes the requests previously
80  * offered by frontend and pushes the associated responses inside the I/O ring.
81  * When done, it notifies frontend through an event notification, which will
82  * asynchronously call xennet_handler() in frontend.
83  * xennet_handler() processes the responses, generates the associated mbuf, and
84  * passes it to the MI layer for further processing.
85  */
86 
87 #include <sys/cdefs.h>
88 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.46 2011/01/11 23:22:19 jym Exp $");
89 
90 #include "opt_xen.h"
91 #include "opt_nfs_boot.h"
92 #include "rnd.h"
93 
94 #include <sys/param.h>
95 #include <sys/device.h>
96 #include <sys/conf.h>
97 #include <sys/kernel.h>
98 #include <sys/proc.h>
99 #include <sys/systm.h>
100 #include <sys/intr.h>
101 #if NRND > 0
102 #include <sys/rnd.h>
103 #endif
104 
105 #include <net/if.h>
106 #include <net/if_dl.h>
107 #include <net/if_ether.h>
108 #include <net/bpf.h>
109 #include <net/bpfdesc.h>
110 
111 #if defined(NFS_BOOT_BOOTSTATIC)
112 #include <sys/fstypes.h>
113 #include <sys/mount.h>
114 #include <sys/statvfs.h>
115 #include <netinet/in.h>
116 #include <nfs/rpcv2.h>
117 #include <nfs/nfsproto.h>
118 #include <nfs/nfs.h>
119 #include <nfs/nfsmount.h>
120 #include <nfs/nfsdiskless.h>
121 #include <xen/if_xennetvar.h>
122 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
123 
124 #include <xen/xennet_checksum.h>
125 
126 #include <uvm/uvm.h>
127 
128 #include <xen/hypervisor.h>
129 #include <xen/evtchn.h>
130 #include <xen/granttables.h>
131 #include <xen/xen3-public/io/netif.h>
132 #include <xen/xenpmap.h>
133 
134 #include <xen/xenbus.h>
135 #include "locators.h"
136 
137 #undef XENNET_DEBUG_DUMP
138 #undef XENNET_DEBUG
139 #ifdef XENNET_DEBUG
140 #define XEDB_FOLLOW     0x01
141 #define XEDB_INIT       0x02
142 #define XEDB_EVENT      0x04
143 #define XEDB_MBUF       0x08
144 #define XEDB_MEM        0x10
145 int xennet_debug = 0xff;
146 #define DPRINTF(x) if (xennet_debug) printf x;
147 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
148 #else
149 #define DPRINTF(x)
150 #define DPRINTFN(n,x)
151 #endif
152 
153 #define GRANT_INVALID_REF -1 /* entry is free */
154 #define GRANT_STACK_REF   -2 /* entry owned by the network stack */
155 
156 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
157 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
158 
159 struct xennet_txreq {
160 	SLIST_ENTRY(xennet_txreq) txreq_next;
161 	uint16_t txreq_id; /* ID passed to backend */
162 	grant_ref_t txreq_gntref; /* grant ref of this request */
163 	struct mbuf *txreq_m; /* mbuf being transmitted */
164 };
165 
166 struct xennet_rxreq {
167 	SLIST_ENTRY(xennet_rxreq) rxreq_next;
168 	uint16_t rxreq_id; /* ID passed to backend */
169 	grant_ref_t rxreq_gntref; /* grant ref of this request */
170 /* va/pa for this receive buf. ma will be provided by backend */
171 	paddr_t rxreq_pa;
172 	vaddr_t rxreq_va;
173 	struct xennet_xenbus_softc *rxreq_sc; /* pointer to our interface */
174 };
175 
176 struct xennet_xenbus_softc {
177 	device_t sc_dev;
178 	struct ethercom sc_ethercom;
179 	uint8_t sc_enaddr[6];
180 	struct xenbus_device *sc_xbusd;
181 
182 	netif_tx_front_ring_t sc_tx_ring;
183 	netif_rx_front_ring_t sc_rx_ring;
184 
185 	unsigned int sc_evtchn;
186 	void *sc_softintr;
187 
188 	grant_ref_t sc_tx_ring_gntref;
189 	grant_ref_t sc_rx_ring_gntref;
190 
191 	struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
192 	struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
193 	SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
194 	SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
195 	int sc_free_rxreql; /* number of free receive request struct */
196 
197 	int sc_backend_status; /* our status with backend */
198 #define BEST_CLOSED		0
199 #define BEST_DISCONNECTED	1
200 #define BEST_CONNECTED		2
201 #define BEST_SUSPENDED		3
202 	unsigned long sc_rx_feature;
203 #define FEATURE_RX_FLIP		0
204 #define FEATURE_RX_COPY		1
205 #if NRND > 0
206 	rndsource_element_t     sc_rnd_source;
207 #endif
208 };
209 #define SC_NLIVEREQ(sc) ((sc)->sc_rx_ring.req_prod_pvt - \
210 			    (sc)->sc_rx_ring.sring->rsp_prod)
211 
212 /* too big to be on stack */
213 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
214 static u_long xennet_pages[NET_RX_RING_SIZE];
215 
216 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
217 static void xennet_xenbus_attach(device_t, device_t, void *);
218 static int  xennet_xenbus_detach(device_t, int);
219 static void xennet_backend_changed(void *, XenbusState);
220 
221 static int  xennet_xenbus_resume(void *);
222 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
223 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
224 static void xennet_tx_complete(struct xennet_xenbus_softc *);
225 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
226 static int  xennet_handler(void *);
227 #ifdef XENNET_DEBUG_DUMP
228 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
229 #endif
230 
231 static int  xennet_init(struct ifnet *);
232 static void xennet_stop(struct ifnet *, int);
233 static void xennet_reset(struct xennet_xenbus_softc *);
234 static void xennet_softstart(void *);
235 static void xennet_start(struct ifnet *);
236 static int  xennet_ioctl(struct ifnet *, u_long, void *);
237 static void xennet_watchdog(struct ifnet *);
238 
239 CFATTACH_DECL_NEW(xennet, sizeof(struct xennet_xenbus_softc),
240    xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL);
241 
242 static int
243 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
244 {
245 	struct xenbusdev_attach_args *xa = aux;
246 
247 	if (strcmp(xa->xa_type, "vif") != 0)
248 		return 0;
249 
250 	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
251 	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
252 		return 0;
253 
254 	return 1;
255 }
256 
257 static void
258 xennet_xenbus_attach(device_t parent, device_t self, void *aux)
259 {
260 	struct xennet_xenbus_softc *sc = device_private(self);
261 	struct xenbusdev_attach_args *xa = aux;
262 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
263 	int err;
264 	RING_IDX i;
265 	char *val, *e, *p;
266 	int s;
267 	extern int ifqmaxlen; /* XXX */
268 #ifdef XENNET_DEBUG
269 	char **dir;
270 	int dir_n = 0;
271 	char id_str[20];
272 #endif
273 
274 	aprint_normal(": Xen Virtual Network Interface\n");
275 	sc->sc_dev = self;
276 
277 #ifdef XENNET_DEBUG
278 	printf("path: %s\n", xa->xa_xbusd->xbusd_path);
279 	snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
280 	err = xenbus_directory(NULL, "device/vif", id_str, &dir_n, &dir);
281 	if (err) {
282 		aprint_error_dev(self, "xenbus_directory err %d\n", err);
283 	} else {
284 		printf("%s/\n", xa->xa_xbusd->xbusd_path);
285 		for (i = 0; i < dir_n; i++) {
286 			printf("\t/%s", dir[i]);
287 			err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path,
288 				          dir[i], NULL, &val);
289 			if (err) {
290 				aprint_error_dev(self, "xenbus_read err %d\n",
291 					         err);
292 			} else {
293 				printf(" = %s\n", val);
294 				free(val, M_DEVBUF);
295 			}
296 		}
297 	}
298 #endif /* XENNET_DEBUG */
299 	sc->sc_xbusd = xa->xa_xbusd;
300 	sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
301 
302 	/* initialize free RX and RX request lists */
303 	SLIST_INIT(&sc->sc_txreq_head);
304 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
305 		sc->sc_txreqs[i].txreq_id = i;
306 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
307 		    txreq_next);
308 	}
309 	SLIST_INIT(&sc->sc_rxreq_head);
310 	s = splvm();
311 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
312 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
313 		rxreq->rxreq_id = i;
314 		rxreq->rxreq_sc = sc;
315 		rxreq->rxreq_va = uvm_km_alloc(kernel_map,
316 		    PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
317 		if (rxreq->rxreq_va == 0)
318 			break;
319 		if (!pmap_extract(pmap_kernel(), rxreq->rxreq_va,
320 		    &rxreq->rxreq_pa))
321 			panic("%s: no pa for mapped va ?", device_xname(self));
322 		rxreq->rxreq_gntref = GRANT_INVALID_REF;
323 		SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
324 	}
325 	splx(s);
326 	sc->sc_free_rxreql = i;
327 	if (sc->sc_free_rxreql == 0) {
328 		aprint_error_dev(self, "failed to allocate rx memory\n");
329 		return;
330 	}
331 
332 	/* read mac address */
333 	err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path, "mac", NULL, &val);
334 	if (err) {
335 		aprint_error_dev(self, "can't read mac address, err %d\n", err);
336 		return;
337 	}
338 	for (i = 0, p = val; i < 6; i++) {
339 		sc->sc_enaddr[i] = strtoul(p, &e, 16);
340 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
341 			aprint_error_dev(self,
342 			    "%s is not a valid mac address\n", val);
343 			free(val, M_DEVBUF);
344 			return;
345 		}
346 		p = &e[1];
347 	}
348 	free(val, M_DEVBUF);
349 	aprint_normal_dev(self, "MAC address %s\n",
350 	    ether_sprintf(sc->sc_enaddr));
351 	/* Initialize ifnet structure and attach interface */
352 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
353 	ifp->if_softc = sc;
354 	ifp->if_start = xennet_start;
355 	ifp->if_ioctl = xennet_ioctl;
356 	ifp->if_watchdog = xennet_watchdog;
357 	ifp->if_init = xennet_init;
358 	ifp->if_stop = xennet_stop;
359 	ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
360 	ifp->if_timer = 0;
361 	ifp->if_snd.ifq_maxlen = max(ifqmaxlen, NET_TX_RING_SIZE * 2);
362 	ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
363 	IFQ_SET_READY(&ifp->if_snd);
364 	if_attach(ifp);
365 	ether_ifattach(ifp, sc->sc_enaddr);
366 	sc->sc_softintr = softint_establish(SOFTINT_NET, xennet_softstart, sc);
367 	if (sc->sc_softintr == NULL)
368 		panic("%s: can't establish soft interrupt",
369 			device_xname(self));
370 
371 #if NRND > 0
372 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
373 	    RND_TYPE_NET, 0);
374 #endif
375 }
376 
377 static int
378 xennet_xenbus_detach(device_t self, int flags)
379 {
380 	struct xennet_xenbus_softc *sc = device_private(self);
381 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
382 	int s0, s1;
383 	RING_IDX i;
384 
385 	DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
386 	s0 = splnet();
387 	xennet_stop(ifp, 1);
388 	/* wait for pending TX to complete, and collect pending RX packets */
389 	xennet_handler(sc);
390 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
391 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_detach", hz/2);
392 		xennet_handler(sc);
393 	}
394 	xennet_free_rx_buffer(sc);
395 
396 	s1 = splvm();
397 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
398 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
399 		uvm_km_free(kernel_map, rxreq->rxreq_va, PAGE_SIZE,
400 		    UVM_KMF_WIRED);
401 	}
402 	splx(s1);
403 
404 	ether_ifdetach(ifp);
405 	if_detach(ifp);
406 
407 #if NRND > 0
408 	/* Unhook the entropy source. */
409 	rnd_detach_source(&sc->sc_rnd_source);
410 #endif
411 
412 	while (xengnt_status(sc->sc_tx_ring_gntref)) {
413 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_txref", hz/2);
414 	}
415 	xengnt_revoke_access(sc->sc_tx_ring_gntref);
416 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
417 	    UVM_KMF_WIRED);
418 	while (xengnt_status(sc->sc_rx_ring_gntref)) {
419 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_rxref", hz/2);
420 	}
421 	xengnt_revoke_access(sc->sc_rx_ring_gntref);
422 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
423 	    UVM_KMF_WIRED);
424 	softint_disestablish(sc->sc_softintr);
425 	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
426 	splx(s0);
427 	DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
428 	return 0;
429 }
430 
431 static int
432 xennet_xenbus_resume(void *p)
433 {
434 	struct xennet_xenbus_softc *sc = p;
435 	struct xenbus_transaction *xbt;
436 	unsigned long rx_copy;
437 	int error;
438 	netif_tx_sring_t *tx_ring;
439 	netif_rx_sring_t *rx_ring;
440 	paddr_t ma;
441 	const char *errmsg;
442 
443 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
444 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
445 
446 	/* setup device: alloc event channel and shared rings */
447 	tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
448 	     UVM_KMF_WIRED | UVM_KMF_ZERO);
449 	rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
450 	    UVM_KMF_WIRED | UVM_KMF_ZERO);
451 	if (tx_ring == NULL || rx_ring == NULL)
452 		panic("xennet_xenbus_resume: can't alloc rings");
453 
454 	SHARED_RING_INIT(tx_ring);
455 	FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
456 	SHARED_RING_INIT(rx_ring);
457 	FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
458 
459 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
460 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
461 	if (error)
462 		return error;
463 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
464 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
465 	if (error)
466 		return error;
467 	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
468 	if (error)
469 		return error;
470 	aprint_verbose_dev(sc->sc_dev, "using event channel %d\n",
471 	    sc->sc_evtchn);
472 	event_set_handler(sc->sc_evtchn, &xennet_handler, sc,
473 	    IPL_NET, device_xname(sc->sc_dev));
474 
475 	error = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
476 	    "feature-rx-copy", &rx_copy, 10);
477 	if (error)
478 		rx_copy = 0; /* default value if key is absent */
479 
480 	if (rx_copy == 1) {
481 		aprint_normal_dev(sc->sc_dev, "using RX copy mode\n");
482 		sc->sc_rx_feature = FEATURE_RX_COPY;
483 	} else {
484 		aprint_normal_dev(sc->sc_dev, "using RX flip mode\n");
485 		sc->sc_rx_feature = FEATURE_RX_FLIP;
486 	}
487 
488 again:
489 	xbt = xenbus_transaction_start();
490 	if (xbt == NULL)
491 		return ENOMEM;
492 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
493 	    "vifname", device_xname(sc->sc_dev));
494 	if (error) {
495 		errmsg = "vifname";
496 		goto abort_transaction;
497 	}
498 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
499 	    "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
500 	if (error) {
501 		errmsg = "writing tx ring-ref";
502 		goto abort_transaction;
503 	}
504 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
505 	    "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
506 	if (error) {
507 		errmsg = "writing rx ring-ref";
508 		goto abort_transaction;
509 	}
510 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
511 	    "request-rx-copy", "%lu", rx_copy);
512 	if (error) {
513 		errmsg = "writing request-rx-copy";
514 		goto abort_transaction;
515 	}
516 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
517 	    "feature-rx-notify", "%u", 1);
518 	if (error) {
519 		errmsg = "writing feature-rx-notify";
520 		goto abort_transaction;
521 	}
522 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
523 	    "event-channel", "%u", sc->sc_evtchn);
524 	if (error) {
525 		errmsg = "writing event channel";
526 		goto abort_transaction;
527 	}
528 	error = xenbus_transaction_end(xbt, 0);
529 	if (error == EAGAIN)
530 		goto again;
531 	if (error) {
532 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
533 		return -1;
534 	}
535 	xennet_alloc_rx_buffer(sc);
536 	sc->sc_backend_status = BEST_CONNECTED;
537 	return 0;
538 
539 abort_transaction:
540 	xenbus_transaction_end(xbt, 1);
541 	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
542 	return error;
543 }
544 
545 static void xennet_backend_changed(void *arg, XenbusState new_state)
546 {
547 	struct xennet_xenbus_softc *sc = device_private((device_t)arg);
548 	DPRINTF(("%s: new backend state %d\n",
549 	    device_xname(sc->sc_dev), new_state));
550 
551 	switch (new_state) {
552 	case XenbusStateInitialising:
553 	case XenbusStateInitialised:
554 	case XenbusStateConnected:
555 		break;
556 	case XenbusStateClosing:
557 		sc->sc_backend_status = BEST_CLOSED;
558 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
559 		break;
560 	case XenbusStateInitWait:
561 		if (xennet_xenbus_resume(sc) == 0)
562 			xenbus_switch_state(sc->sc_xbusd, NULL,
563 			    XenbusStateConnected);
564 		break;
565 	case XenbusStateUnknown:
566 	default:
567 		panic("bad backend state %d", new_state);
568 	}
569 }
570 
571 /*
572  * Allocate RX buffers and put the associated request structures
573  * in the ring. This allows the backend to use them to communicate with
574  * frontend when some data is destined to frontend
575  */
576 
577 static void
578 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
579 {
580 	RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
581 	RING_IDX i;
582 	struct xennet_rxreq *req;
583 	struct xen_memory_reservation reservation;
584 	int s1, s2, otherend_id;
585 	paddr_t pfn;
586 
587 	otherend_id = sc->sc_xbusd->xbusd_otherend_id;
588 
589 	s1 = splnet();
590 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
591 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
592 		KASSERT(req != NULL);
593 		KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
594 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
595 		    req->rxreq_id;
596 
597 		switch (sc->sc_rx_feature) {
598 		case FEATURE_RX_COPY:
599 			if (xengnt_grant_access(otherend_id,
600 			    xpmap_ptom_masked(req->rxreq_pa),
601 			    0, &req->rxreq_gntref) != 0) {
602 				goto out_loop;
603 			}
604 			break;
605 		case FEATURE_RX_FLIP:
606 			if (xengnt_grant_transfer(otherend_id,
607 			    &req->rxreq_gntref) != 0) {
608 				goto out_loop;
609 			}
610 			break;
611 		default:
612 			panic("%s: unsupported RX feature mode: %ld\n",
613 			    __func__, sc->sc_rx_feature);
614 		}
615 
616 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
617 		    req->rxreq_gntref;
618 
619 		SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
620 		sc->sc_free_rxreql--;
621 
622 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
623 			/* unmap the page */
624 			MULTI_update_va_mapping(&rx_mcl[i],
625 			    req->rxreq_va, 0, 0);
626 			/*
627 			 * Remove this page from pseudo phys map before
628 			 * passing back to Xen.
629 			 */
630 			pfn = (req->rxreq_pa - XPMAP_OFFSET) >> PAGE_SHIFT;
631 			xennet_pages[i] = xpmap_phys_to_machine_mapping[pfn];
632 			xpmap_phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
633 		}
634 	}
635 
636 out_loop:
637 	if (i == 0) {
638 		splx(s1);
639 		return;
640 	}
641 
642 	if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
643 		/* also make sure to flush all TLB entries */
644 		rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
645 		    UVMF_TLB_FLUSH | UVMF_ALL;
646 		/*
647 		 * We may have allocated buffers which have entries
648 		 * outstanding in the page update queue -- make sure we flush
649 		 * those first!
650 		 */
651 		s2 = splvm();
652 		xpq_flush_queue();
653 		splx(s2);
654 		/* now decrease reservation */
655 		xenguest_handle(reservation.extent_start) = xennet_pages;
656 		reservation.nr_extents = i;
657 		reservation.extent_order = 0;
658 		reservation.address_bits = 0;
659 		reservation.domid = DOMID_SELF;
660 		rx_mcl[i].op = __HYPERVISOR_memory_op;
661 		rx_mcl[i].args[0] = XENMEM_decrease_reservation;
662 		rx_mcl[i].args[1] = (unsigned long)&reservation;
663 		HYPERVISOR_multicall(rx_mcl, i+1);
664 		if (__predict_false(rx_mcl[i].result != i)) {
665 			panic("xennet_alloc_rx_buffer: "
666 			    "XENMEM_decrease_reservation");
667 		}
668 	}
669 
670 	sc->sc_rx_ring.req_prod_pvt = req_prod + i;
671 	RING_PUSH_REQUESTS(&sc->sc_rx_ring);
672 
673 	splx(s1);
674 	return;
675 }
676 
677 /*
678  * Reclaim all RX buffers used by the I/O ring between frontend and backend
679  */
680 static void
681 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc)
682 {
683 	paddr_t ma, pa;
684 	vaddr_t va;
685 	RING_IDX i;
686 	mmu_update_t mmu[1];
687 	multicall_entry_t mcl[2];
688 
689 	int s = splbio();
690 
691 	DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
692 	/* get back memory from RX ring */
693 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
694 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
695 
696 		/*
697 		 * if the buffer is in transit in the network stack, wait for
698 		 * the network stack to free it.
699 		 */
700 		while ((volatile grant_ref_t)rxreq->rxreq_gntref ==
701 		    GRANT_STACK_REF)
702 			tsleep(xennet_xenbus_detach, PRIBIO, "xnet_free", hz/2);
703 
704 		if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
705 			/*
706 			 * this req is still granted. Get back the page or
707 			 * allocate a new one, and remap it.
708 			 */
709 			SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
710 			    rxreq_next);
711 			sc->sc_free_rxreql++;
712 
713 			switch (sc->sc_rx_feature) {
714 			case FEATURE_RX_COPY:
715 				xengnt_revoke_access(rxreq->rxreq_gntref);
716 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
717 				break;
718 			case FEATURE_RX_FLIP:
719 				ma = xengnt_revoke_transfer(
720 				    rxreq->rxreq_gntref);
721 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
722 				if (ma == 0) {
723 					u_long pfn;
724 					struct xen_memory_reservation xenres;
725 					/*
726 					 * transfer not complete, we lost the page.
727 					 * Get one from hypervisor
728 					 */
729 					xenguest_handle(xenres.extent_start) = &pfn;
730 					xenres.nr_extents = 1;
731 					xenres.extent_order = 0;
732 					xenres.address_bits = 31;
733 					xenres.domid = DOMID_SELF;
734 					if (HYPERVISOR_memory_op(
735 					    XENMEM_increase_reservation, &xenres) < 0) {
736 						panic("xennet_free_rx_buffer: "
737 						    "can't get memory back");
738 					}
739 					ma = pfn;
740 					KASSERT(ma != 0);
741 				}
742 				pa = rxreq->rxreq_pa;
743 				va = rxreq->rxreq_va;
744 				/* remap the page */
745 				mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
746 				mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
747 				MULTI_update_va_mapping(&mcl[0], va,
748 				    (ma << PAGE_SHIFT) | PG_V | PG_KW,
749 				    UVMF_TLB_FLUSH|UVMF_ALL);
750 				xpmap_phys_to_machine_mapping[
751 				    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
752 				mcl[1].op = __HYPERVISOR_mmu_update;
753 				mcl[1].args[0] = (unsigned long)mmu;
754 				mcl[1].args[1] = 1;
755 				mcl[1].args[2] = 0;
756 				mcl[1].args[3] = DOMID_SELF;
757 				HYPERVISOR_multicall(mcl, 2);
758 				break;
759 			default:
760 				panic("%s: unsupported RX feature mode: %ld\n",
761 				    __func__, sc->sc_rx_feature);
762 			}
763 		}
764 
765 	}
766 	splx(s);
767 	DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
768 }
769 
770 /*
771  * Clears a used RX request when its associated mbuf has been processed
772  */
773 static void
774 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
775 {
776 	struct xennet_rxreq *req = arg;
777 	struct xennet_xenbus_softc *sc = req->rxreq_sc;
778 
779 	int s = splnet();
780 
781 	/* puts back the RX request in the list of free RX requests */
782 	SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
783 	sc->sc_free_rxreql++;
784 
785 	/*
786 	 * ring needs more requests to be pushed in, allocate some
787 	 * RX buffers to catch-up with backend's consumption
788 	 */
789 	req->rxreq_gntref = GRANT_INVALID_REF;
790 	if (sc->sc_free_rxreql >= SC_NLIVEREQ(sc) &&
791 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
792 		xennet_alloc_rx_buffer(sc);
793 	}
794 
795 	if (m)
796 		pool_cache_put(mb_cache, m);
797 	splx(s);
798 }
799 
800 /*
801  * Process responses associated to the TX mbufs sent previously through
802  * xennet_softstart()
803  * Called at splnet.
804  */
805 static void
806 xennet_tx_complete(struct xennet_xenbus_softc *sc)
807 {
808 	struct xennet_txreq *req;
809 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
810 	RING_IDX resp_prod, i;
811 
812 	DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
813 	    sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
814 
815 again:
816 	resp_prod = sc->sc_tx_ring.sring->rsp_prod;
817 	xen_rmb();
818 	for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
819 		req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
820 		KASSERT(req->txreq_id ==
821 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
822 		if (__predict_false(xengnt_status(req->txreq_gntref))) {
823 			aprint_verbose_dev(sc->sc_dev,
824 					   "grant still used by backend\n");
825 			sc->sc_tx_ring.rsp_cons = i;
826 			goto end;
827 		}
828 		if (__predict_false(
829 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
830 		    NETIF_RSP_OKAY))
831 			ifp->if_oerrors++;
832 		else
833 			ifp->if_opackets++;
834 		xengnt_revoke_access(req->txreq_gntref);
835 		m_freem(req->txreq_m);
836 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
837 	}
838 	sc->sc_tx_ring.rsp_cons = resp_prod;
839 	/* set new event and check for race with rsp_cons update */
840 	sc->sc_tx_ring.sring->rsp_event =
841 	    resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
842 	ifp->if_timer = 0;
843 	xen_wmb();
844 	if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
845 		goto again;
846 end:
847 	if (ifp->if_flags & IFF_OACTIVE) {
848 		ifp->if_flags &= ~IFF_OACTIVE;
849 		xennet_softstart(sc);
850 	}
851 }
852 
853 /*
854  * Xennet event handler.
855  * Get outstanding responses of TX packets, then collect all responses of
856  * pending RX packets
857  * Called at splnet.
858  */
859 static int
860 xennet_handler(void *arg)
861 {
862 	struct xennet_xenbus_softc *sc = arg;
863 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
864 	RING_IDX resp_prod, i;
865 	struct xennet_rxreq *req;
866 	paddr_t ma, pa;
867 	vaddr_t va;
868 	mmu_update_t mmu[1];
869 	multicall_entry_t mcl[2];
870 	struct mbuf *m;
871 	void *pktp;
872 	int more_to_do;
873 
874 	if (sc->sc_backend_status != BEST_CONNECTED)
875 		return 1;
876 
877 	xennet_tx_complete(sc);
878 
879 #if NRND > 0
880 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
881 #endif
882 again:
883 	DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
884 	    sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
885 
886 	resp_prod = sc->sc_rx_ring.sring->rsp_prod;
887 	xen_rmb(); /* ensure we see replies up to resp_prod */
888 	for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
889 		netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
890 		req = &sc->sc_rxreqs[rx->id];
891 		KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
892 		KASSERT(req->rxreq_id == rx->id);
893 
894 		ma = 0;
895 		switch (sc->sc_rx_feature) {
896 		case FEATURE_RX_COPY:
897 			xengnt_revoke_access(req->rxreq_gntref);
898 			break;
899 		case FEATURE_RX_FLIP:
900 			ma = xengnt_revoke_transfer(req->rxreq_gntref);
901 			if (ma == 0) {
902 				DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
903 				/*
904 				 * the remote could't send us a packet.
905 				 * we can't free this rxreq as no page will be mapped
906 				 * here. Instead give it back immediatly to backend.
907 				 */
908 				ifp->if_ierrors++;
909 				RING_GET_REQUEST(&sc->sc_rx_ring,
910 				    sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
911 				RING_GET_REQUEST(&sc->sc_rx_ring,
912 				    sc->sc_rx_ring.req_prod_pvt)->gref =
913 					req->rxreq_gntref;
914 				sc->sc_rx_ring.req_prod_pvt++;
915 				RING_PUSH_REQUESTS(&sc->sc_rx_ring);
916 				continue;
917 			}
918 			break;
919 		default:
920 			panic("%s: unsupported RX feature mode: %ld\n",
921 			    __func__, sc->sc_rx_feature);
922 		}
923 
924 		req->rxreq_gntref = GRANT_INVALID_REF;
925 
926 		pa = req->rxreq_pa;
927 		va = req->rxreq_va;
928 
929 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
930 			/* remap the page */
931 			mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
932 			mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
933 			MULTI_update_va_mapping(&mcl[0], va,
934 			    (ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL);
935 			xpmap_phys_to_machine_mapping[
936 			    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
937 			mcl[1].op = __HYPERVISOR_mmu_update;
938 			mcl[1].args[0] = (unsigned long)mmu;
939 			mcl[1].args[1] = 1;
940 			mcl[1].args[2] = 0;
941 			mcl[1].args[3] = DOMID_SELF;
942 			HYPERVISOR_multicall(mcl, 2);
943 		}
944 
945 		pktp = (void *)(va + rx->offset);
946 #ifdef XENNET_DEBUG_DUMP
947 		xennet_hex_dump(pktp, rx->status, "r", rx->id);
948 #endif
949 		if ((ifp->if_flags & IFF_PROMISC) == 0) {
950 			struct ether_header *eh = pktp;
951 			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
952 			    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
953 			    ETHER_ADDR_LEN) != 0) {
954 				DPRINTFN(XEDB_EVENT,
955 				    ("xennet_handler bad dest\n"));
956 				/* packet not for us */
957 				xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE,
958 				    req);
959 				continue;
960 			}
961 		}
962 		MGETHDR(m, M_DONTWAIT, MT_DATA);
963 		if (__predict_false(m == NULL)) {
964 			printf("xennet: rx no mbuf\n");
965 			ifp->if_ierrors++;
966 			xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
967 			continue;
968 		}
969 		MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
970 
971 		m->m_pkthdr.rcvif = ifp;
972 		if (__predict_true(sc->sc_rx_ring.req_prod_pvt !=
973 		    sc->sc_rx_ring.sring->rsp_prod)) {
974 			m->m_len = m->m_pkthdr.len = rx->status;
975 			MEXTADD(m, pktp, rx->status,
976 			    M_DEVBUF, xennet_rx_mbuf_free, req);
977 			m->m_flags |= M_EXT_RW; /* we own the buffer */
978 			req->rxreq_gntref = GRANT_STACK_REF;
979 		} else {
980 			/*
981 			 * This was our last receive buffer, allocate
982 			 * memory, copy data and push the receive
983 			 * buffer back to the hypervisor.
984 			 */
985 			m->m_len = min(MHLEN, rx->status);
986 			m->m_pkthdr.len = 0;
987 			m_copyback(m, 0, rx->status, pktp);
988 			xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
989 			if (m->m_pkthdr.len < rx->status) {
990 				/* out of memory, just drop packets */
991 				ifp->if_ierrors++;
992 				m_freem(m);
993 				continue;
994 			}
995 		}
996 		if ((rx->flags & NETRXF_csum_blank) != 0) {
997 			xennet_checksum_fill(&m);
998 			if (m == NULL) {
999 				ifp->if_ierrors++;
1000 				continue;
1001 			}
1002 		}
1003 		/*
1004 		 * Pass packet to bpf if there is a listener.
1005 		 */
1006 		bpf_mtap(ifp, m);
1007 
1008 		ifp->if_ipackets++;
1009 
1010 		/* Pass the packet up. */
1011 		(*ifp->if_input)(ifp, m);
1012 	}
1013 	xen_rmb();
1014 	sc->sc_rx_ring.rsp_cons = i;
1015 	RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
1016 	if (more_to_do)
1017 		goto again;
1018 	return 1;
1019 }
1020 
1021 /*
1022  * The output routine of a xennet interface
1023  * Called at splnet.
1024  */
1025 void
1026 xennet_start(struct ifnet *ifp)
1027 {
1028 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1029 
1030 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", device_xname(sc->sc_dev)));
1031 
1032 #if NRND > 0
1033 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
1034 #endif
1035 
1036 	xennet_tx_complete(sc);
1037 
1038 	if (__predict_false(
1039 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING))
1040 		return;
1041 
1042 	/*
1043 	 * The Xen communication channel is much more efficient if we can
1044 	 * schedule batch of packets for domain0. To achieve this, we
1045 	 * schedule a soft interrupt, and just return. This way, the network
1046 	 * stack will enqueue all pending mbufs in the interface's send queue
1047 	 * before it is processed by xennet_softstart().
1048 	 */
1049 	softint_schedule(sc->sc_softintr);
1050 	return;
1051 }
1052 
1053 /*
1054  * Prepares mbufs for TX, and notify backend when finished
1055  * Called at splsoftnet
1056  */
1057 void
1058 xennet_softstart(void *arg)
1059 {
1060 	struct xennet_xenbus_softc *sc = arg;
1061 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1062 	struct mbuf *m, *new_m;
1063 	netif_tx_request_t *txreq;
1064 	RING_IDX req_prod;
1065 	paddr_t pa, pa2;
1066 	struct xennet_txreq *req;
1067 	int notify;
1068 	int do_notify = 0;
1069 	int s;
1070 
1071 	s = splnet();
1072 	if (__predict_false(
1073 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)) {
1074 		splx(s);
1075 		return;
1076 	}
1077 
1078 	req_prod = sc->sc_tx_ring.req_prod_pvt;
1079 	while (/*CONSTCOND*/1) {
1080 		uint16_t txflags;
1081 
1082 		req = SLIST_FIRST(&sc->sc_txreq_head);
1083 		if (__predict_false(req == NULL)) {
1084 			ifp->if_flags |= IFF_OACTIVE;
1085 			break;
1086 		}
1087 		IFQ_POLL(&ifp->if_snd, m);
1088 		if (m == NULL)
1089 			break;
1090 
1091 		switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
1092 		case M_EXT|M_EXT_CLUSTER:
1093 			KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
1094 			pa = m->m_ext.ext_paddr +
1095 				(m->m_data - m->m_ext.ext_buf);
1096 			break;
1097 		case 0:
1098 			KASSERT(m->m_paddr != M_PADDR_INVALID);
1099 			pa = m->m_paddr + M_BUFOFFSET(m) +
1100 				(m->m_data - M_BUFADDR(m));
1101 			break;
1102 		default:
1103 			if (__predict_false(
1104 			    !pmap_extract(pmap_kernel(), (vaddr_t)m->m_data,
1105 			    &pa))) {
1106 				panic("xennet_start: no pa");
1107 			}
1108 			break;
1109 		}
1110 
1111 		if ((m->m_pkthdr.csum_flags &
1112 		    (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1113 			txflags = NETTXF_csum_blank;
1114 		} else {
1115 			txflags = 0;
1116 		}
1117 
1118 		if (m->m_pkthdr.len != m->m_len ||
1119 		    (pa ^ (pa + m->m_pkthdr.len - 1)) & PG_FRAME) {
1120 
1121 			MGETHDR(new_m, M_DONTWAIT, MT_DATA);
1122 			if (__predict_false(new_m == NULL)) {
1123 				printf("%s: cannot allocate new mbuf\n",
1124 				       device_xname(sc->sc_dev));
1125 				break;
1126 			}
1127 			if (m->m_pkthdr.len > MHLEN) {
1128 				MCLGET(new_m, M_DONTWAIT);
1129 				if (__predict_false(
1130 				    (new_m->m_flags & M_EXT) == 0)) {
1131 					DPRINTF(("%s: no mbuf cluster\n",
1132 					    device_xname(sc->sc_dev)));
1133 					m_freem(new_m);
1134 					break;
1135 				}
1136 			}
1137 
1138 			m_copydata(m, 0, m->m_pkthdr.len, mtod(new_m, void *));
1139 			new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
1140 
1141 			if ((new_m->m_flags & M_EXT) != 0) {
1142 				pa = new_m->m_ext.ext_paddr;
1143 				KASSERT(new_m->m_data == new_m->m_ext.ext_buf);
1144 				KASSERT(pa != M_PADDR_INVALID);
1145 			} else {
1146 				pa = new_m->m_paddr;
1147 				KASSERT(pa != M_PADDR_INVALID);
1148 				KASSERT(new_m->m_data == M_BUFADDR(new_m));
1149 				pa += M_BUFOFFSET(new_m);
1150 			}
1151 			if (__predict_false(xengnt_grant_access(
1152 			    sc->sc_xbusd->xbusd_otherend_id,
1153 			    xpmap_ptom_masked(pa),
1154 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1155 				m_freem(new_m);
1156 				ifp->if_flags |= IFF_OACTIVE;
1157 				break;
1158 			}
1159 			/* we will be able to send new_m */
1160 			IFQ_DEQUEUE(&ifp->if_snd, m);
1161 			m_freem(m);
1162 			m = new_m;
1163 		} else {
1164 			if (__predict_false(xengnt_grant_access(
1165 			    sc->sc_xbusd->xbusd_otherend_id,
1166 			    xpmap_ptom_masked(pa),
1167 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1168 				ifp->if_flags |= IFF_OACTIVE;
1169 				break;
1170 			}
1171 			/* we will be able to send m */
1172 			IFQ_DEQUEUE(&ifp->if_snd, m);
1173 		}
1174 		MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1175 
1176 		KASSERT(((pa ^ (pa + m->m_pkthdr.len -  1)) & PG_FRAME) == 0);
1177 
1178 		SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1179 		req->txreq_m = m;
1180 
1181 		DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1182 		    "mbuf %p, buf %p/%p/%p, size %d\n",
1183 		    req->txreq_id, m, mtod(m, void *), (void *)pa,
1184 		    (void *)xpmap_ptom_masked(pa), m->m_pkthdr.len));
1185 		pmap_extract_ma(pmap_kernel(), mtod(m, vaddr_t), &pa2);
1186 		DPRINTFN(XEDB_MBUF, ("xennet_start pa %p ma %p/%p\n",
1187 		    (void *)pa, (void *)xpmap_ptom_masked(pa), (void *)pa2));
1188 #ifdef XENNET_DEBUG_DUMP
1189 		xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1190 			       	req->txreq_id);
1191 #endif
1192 
1193 		txreq = RING_GET_REQUEST(&sc->sc_tx_ring, req_prod);
1194 		txreq->id = req->txreq_id;
1195 		txreq->gref = req->txreq_gntref;
1196 		txreq->offset = pa & ~PG_FRAME;
1197 		txreq->size = m->m_pkthdr.len;
1198 		txreq->flags = txflags;
1199 
1200 		req_prod++;
1201 		sc->sc_tx_ring.req_prod_pvt = req_prod;
1202 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1203 		if (notify)
1204 			do_notify = 1;
1205 
1206 #ifdef XENNET_DEBUG
1207 		DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, "
1208 		    "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *),
1209 		    (void *)*kvtopte(mtod(m, vaddr_t)),
1210 		    (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))),
1211 		    (void *)m->m_paddr, m->m_pkthdr.len, m->m_len));
1212 		DPRINTFN(XEDB_MEM, ("id %d gref %d offset %d size %d flags %d"
1213 		    " prod %d\n",
1214 		    txreq->id, txreq->gref, txreq->offset, txreq->size,
1215 		    txreq->flags, req_prod));
1216 #endif
1217 
1218 		/*
1219 		 * Pass packet to bpf if there is a listener.
1220 		 */
1221 		bpf_mtap(ifp, m);
1222 	}
1223 
1224 	if (do_notify) {
1225 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1226 		ifp->if_timer = 5;
1227 	}
1228 	splx(s);
1229 
1230 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1231 	    device_xname(sc->sc_dev)));
1232 }
1233 
1234 int
1235 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1236 {
1237 #ifdef XENNET_DEBUG
1238 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1239 #endif
1240 	int s, error = 0;
1241 
1242 	s = splnet();
1243 
1244 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1245 	    device_xname(sc->sc_dev)));
1246 	error = ether_ioctl(ifp, cmd, data);
1247 	if (error == ENETRESET)
1248 		error = 0;
1249 	splx(s);
1250 
1251 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1252 	    device_xname(sc->sc_dev), error));
1253 
1254 	return error;
1255 }
1256 
1257 void
1258 xennet_watchdog(struct ifnet *ifp)
1259 {
1260 	aprint_verbose_ifnet(ifp, "xennet_watchdog\n");
1261 }
1262 
1263 int
1264 xennet_init(struct ifnet *ifp)
1265 {
1266 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1267 	int s = splnet();
1268 
1269 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1270 	    device_xname(sc->sc_dev)));
1271 
1272 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1273 		sc->sc_rx_ring.sring->rsp_event =
1274 		    sc->sc_rx_ring.rsp_cons + 1;
1275 		hypervisor_enable_event(sc->sc_evtchn);
1276 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1277 		xennet_reset(sc);
1278 	}
1279 	ifp->if_flags |= IFF_RUNNING;
1280 	ifp->if_flags &= ~IFF_OACTIVE;
1281 	ifp->if_timer = 0;
1282 	splx(s);
1283 	return 0;
1284 }
1285 
1286 void
1287 xennet_stop(struct ifnet *ifp, int disable)
1288 {
1289 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1290 	int s = splnet();
1291 
1292 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1293 	hypervisor_mask_event(sc->sc_evtchn);
1294 	xennet_reset(sc);
1295 	splx(s);
1296 }
1297 
1298 void
1299 xennet_reset(struct xennet_xenbus_softc *sc)
1300 {
1301 
1302 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n",
1303 	    device_xname(sc->sc_dev)));
1304 }
1305 
1306 #if defined(NFS_BOOT_BOOTSTATIC)
1307 int
1308 xennet_bootstatic_callback(struct nfs_diskless *nd)
1309 {
1310 #if 0
1311 	struct ifnet *ifp = nd->nd_ifp;
1312 	struct xennet_xenbus_softc *sc =
1313 	    (struct xennet_xenbus_softc *)ifp->if_softc;
1314 #endif
1315 	int flags = 0;
1316 	union xen_cmdline_parseinfo xcp;
1317 	struct sockaddr_in *sin;
1318 
1319 	memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1320 	xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1321 	xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1322 	xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1323 
1324 	if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1325 		flags |= NFS_BOOT_HAS_SERVER;
1326 		if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1327 			flags |= NFS_BOOT_HAS_ROOTPATH;
1328 	}
1329 
1330 	nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1331 	nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1332 	nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1333 
1334 	sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1335 	memset((void *)sin, 0, sizeof(*sin));
1336 	sin->sin_len = sizeof(*sin);
1337 	sin->sin_family = AF_INET;
1338 	sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1339 
1340 	if (nd->nd_myip.s_addr)
1341 		flags |= NFS_BOOT_HAS_MYIP;
1342 	if (nd->nd_gwip.s_addr)
1343 		flags |= NFS_BOOT_HAS_GWIP;
1344 	if (nd->nd_mask.s_addr)
1345 		flags |= NFS_BOOT_HAS_MASK;
1346 	if (sin->sin_addr.s_addr)
1347 		flags |= NFS_BOOT_HAS_SERVADDR;
1348 
1349 	return flags;
1350 }
1351 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
1352 
1353 #ifdef XENNET_DEBUG_DUMP
1354 #define XCHR(x) hexdigits[(x) & 0xf]
1355 static void
1356 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1357 {
1358 	size_t i, j;
1359 
1360 	printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
1361 	printf("00000000  ");
1362 	for(i=0; i<len; i++) {
1363 		printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1364 		if ((i+1) % 16 == 8)
1365 			printf(" ");
1366 		if ((i+1) % 16 == 0) {
1367 			printf(" %c", '|');
1368 			for(j=0; j<16; j++)
1369 				printf("%c", pkt[i-15+j]>=32 &&
1370 				    pkt[i-15+j]<127?pkt[i-15+j]:'.');
1371 			printf("%c\n%c%c%c%c%c%c%c%c  ", '|',
1372 			    XCHR((i+1)>>28), XCHR((i+1)>>24),
1373 			    XCHR((i+1)>>20), XCHR((i+1)>>16),
1374 			    XCHR((i+1)>>12), XCHR((i+1)>>8),
1375 			    XCHR((i+1)>>4), XCHR(i+1));
1376 		}
1377 	}
1378 	printf("\n");
1379 }
1380 #undef XCHR
1381 #endif
1382