xref: /netbsd-src/sys/arch/xen/xen/if_xennet_xenbus.c (revision 10ad5ffa714ce1a679dcc9dd8159648df2d67b5a)
1 /*      $NetBSD: if_xennet_xenbus.c,v 1.37 2009/07/29 12:02:09 cegger Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. All advertising materials mentioning features or use of this software
15  *    must display the following acknowledgement:
16  *      This product includes software developed by Manuel Bouyer.
17  * 4. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
33 /*
34  * Copyright (c) 2004 Christian Limpach.
35  * All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *      This product includes software developed by Christian Limpach.
48  * 4. The name of the author may not be used to endorse or promote products
49  *    derived from this software without specific prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
52  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
53  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
54  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
55  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
56  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
57  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
58  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
59  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
60  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
61  */
62 
63 /*
64  * This file contains the xennet frontend code required for the network
65  * communication between two Xen domains.
66  * It ressembles xbd, but is a little more complex as it must deal with two
67  * rings:
68  * - the TX ring, to transmit packets to backend (inside => outside)
69  * - the RX ring, to receive packets from backend (outside => inside)
70  *
71  * Principles are following.
72  *
73  * For TX:
74  * Purpose is to transmit packets to the outside. The start of day is in
75  * xennet_start() (default output routine of xennet) that schedules a softint,
76  * xennet_softstart(). xennet_softstart() generates the requests associated
77  * to the TX mbufs queued (see altq(9)).
78  * The backend's responses are processed by xennet_tx_complete(), called either
79  * from:
80  * - xennet_start()
81  * - xennet_handler(), during an asynchronous event notification from backend
82  *   (similar to an IRQ).
83  *
84  * for RX:
85  * Purpose is to process the packets received from the outside. RX buffers
86  * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
87  * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
88  * preparation for incoming packets from backend.
89  * When RX packets need to be processed, backend takes the requests previously
90  * offered by frontend and pushes the associated responses inside the I/O ring.
91  * When done, it notifies frontend through an event notification, which will
92  * asynchronously call xennet_handler() in frontend.
93  * xennet_handler() processes the responses, generates the associated mbuf, and
94  * passes it to the MI layer for further processing.
95  */
96 
97 #include <sys/cdefs.h>
98 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.37 2009/07/29 12:02:09 cegger Exp $");
99 
100 #include "opt_xen.h"
101 #include "opt_nfs_boot.h"
102 #include "rnd.h"
103 #include "bpfilter.h"
104 
105 #include <sys/param.h>
106 #include <sys/device.h>
107 #include <sys/conf.h>
108 #include <sys/kernel.h>
109 #include <sys/proc.h>
110 #include <sys/systm.h>
111 #include <sys/intr.h>
112 #if NRND > 0
113 #include <sys/rnd.h>
114 #endif
115 
116 #include <net/if.h>
117 #include <net/if_dl.h>
118 #include <net/if_ether.h>
119 #if NBPFILTER > 0
120 #include <net/bpf.h>
121 #include <net/bpfdesc.h>
122 #endif
123 
124 #if defined(NFS_BOOT_BOOTSTATIC)
125 #include <sys/fstypes.h>
126 #include <sys/mount.h>
127 #include <sys/statvfs.h>
128 #include <netinet/in.h>
129 #include <nfs/rpcv2.h>
130 #include <nfs/nfsproto.h>
131 #include <nfs/nfs.h>
132 #include <nfs/nfsmount.h>
133 #include <nfs/nfsdiskless.h>
134 #include <xen/if_xennetvar.h>
135 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
136 
137 #include <xen/xennet_checksum.h>
138 
139 #include <uvm/uvm.h>
140 
141 #include <xen/hypervisor.h>
142 #include <xen/evtchn.h>
143 #include <xen/granttables.h>
144 #include <xen/xen3-public/io/netif.h>
145 #include <xen/xenpmap.h>
146 
147 #include <xen/xenbus.h>
148 #include "locators.h"
149 
150 #undef XENNET_DEBUG_DUMP
151 #undef XENNET_DEBUG
152 #ifdef XENNET_DEBUG
153 #define XEDB_FOLLOW     0x01
154 #define XEDB_INIT       0x02
155 #define XEDB_EVENT      0x04
156 #define XEDB_MBUF       0x08
157 #define XEDB_MEM        0x10
158 int xennet_debug = 0xff;
159 #define DPRINTF(x) if (xennet_debug) printf x;
160 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
161 #else
162 #define DPRINTF(x)
163 #define DPRINTFN(n,x)
164 #endif
165 
166 #define GRANT_INVALID_REF -1 /* entry is free */
167 #define GRANT_STACK_REF   -2 /* entry owned by the network stack */
168 
169 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
170 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
171 
172 struct xennet_txreq {
173 	SLIST_ENTRY(xennet_txreq) txreq_next;
174 	uint16_t txreq_id; /* ID passed to backend */
175 	grant_ref_t txreq_gntref; /* grant ref of this request */
176 	struct mbuf *txreq_m; /* mbuf being transmitted */
177 };
178 
179 struct xennet_rxreq {
180 	SLIST_ENTRY(xennet_rxreq) rxreq_next;
181 	uint16_t rxreq_id; /* ID passed to backend */
182 	grant_ref_t rxreq_gntref; /* grant ref of this request */
183 /* va/pa for this receive buf. ma will be provided by backend */
184 	paddr_t rxreq_pa;
185 	vaddr_t rxreq_va;
186 	struct xennet_xenbus_softc *rxreq_sc; /* pointer to our interface */
187 };
188 
189 struct xennet_xenbus_softc {
190 	device_t sc_dev;
191 	struct ethercom sc_ethercom;
192 	uint8_t sc_enaddr[6];
193 	struct xenbus_device *sc_xbusd;
194 
195 	netif_tx_front_ring_t sc_tx_ring;
196 	netif_rx_front_ring_t sc_rx_ring;
197 
198 	unsigned int sc_evtchn;
199 	void *sc_softintr;
200 
201 	grant_ref_t sc_tx_ring_gntref;
202 	grant_ref_t sc_rx_ring_gntref;
203 
204 	struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
205 	struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
206 	SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
207 	SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
208 	int sc_free_rxreql; /* number of free receive request struct */
209 
210 	int sc_backend_status; /* our status with backend */
211 #define BEST_CLOSED		0
212 #define BEST_DISCONNECTED	1
213 #define BEST_CONNECTED		2
214 #define BEST_SUSPENDED		3
215 #if NRND > 0
216 	rndsource_element_t     sc_rnd_source;
217 #endif
218 };
219 #define SC_NLIVEREQ(sc) ((sc)->sc_rx_ring.req_prod_pvt - \
220 			    (sc)->sc_rx_ring.sring->rsp_prod)
221 
222 /* too big to be on stack */
223 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
224 static u_long xennet_pages[NET_RX_RING_SIZE];
225 
226 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
227 static void xennet_xenbus_attach(device_t, device_t, void *);
228 static int  xennet_xenbus_detach(device_t, int);
229 static void xennet_backend_changed(void *, XenbusState);
230 
231 static int  xennet_xenbus_resume(void *);
232 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
233 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
234 static void xennet_tx_complete(struct xennet_xenbus_softc *);
235 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
236 static int  xennet_handler(void *);
237 #ifdef XENNET_DEBUG_DUMP
238 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
239 #endif
240 
241 static int  xennet_init(struct ifnet *);
242 static void xennet_stop(struct ifnet *, int);
243 static void xennet_reset(struct xennet_xenbus_softc *);
244 static void xennet_softstart(void *);
245 static void xennet_start(struct ifnet *);
246 static int  xennet_ioctl(struct ifnet *, u_long, void *);
247 static void xennet_watchdog(struct ifnet *);
248 
249 CFATTACH_DECL_NEW(xennet, sizeof(struct xennet_xenbus_softc),
250    xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL);
251 
252 static int
253 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
254 {
255 	struct xenbusdev_attach_args *xa = aux;
256 
257 	if (strcmp(xa->xa_type, "vif") != 0)
258 		return 0;
259 
260 	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
261 	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
262 		return 0;
263 
264 	return 1;
265 }
266 
267 static void
268 xennet_xenbus_attach(device_t parent, device_t self, void *aux)
269 {
270 	struct xennet_xenbus_softc *sc = device_private(self);
271 	struct xenbusdev_attach_args *xa = aux;
272 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
273 	int err;
274 	RING_IDX i;
275 	char *val, *e, *p;
276 	int s;
277 	extern int ifqmaxlen; /* XXX */
278 #ifdef XENNET_DEBUG
279 	char **dir;
280 	int dir_n = 0;
281 	char id_str[20];
282 #endif
283 
284 	aprint_normal(": Xen Virtual Network Interface\n");
285 	sc->sc_dev = self;
286 
287 #ifdef XENNET_DEBUG
288 	printf("path: %s\n", xa->xa_xbusd->xbusd_path);
289 	snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
290 	err = xenbus_directory(NULL, "device/vif", id_str, &dir_n, &dir);
291 	if (err) {
292 		aprint_error_dev(self, "xenbus_directory err %d\n", err);
293 	} else {
294 		printf("%s/\n", xa->xa_xbusd->xbusd_path);
295 		for (i = 0; i < dir_n; i++) {
296 			printf("\t/%s", dir[i]);
297 			err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path,
298 				          dir[i], NULL, &val);
299 			if (err) {
300 				aprint_error_dev(self, "xenbus_read err %d\n",
301 					         err);
302 			} else {
303 				printf(" = %s\n", val);
304 				free(val, M_DEVBUF);
305 			}
306 		}
307 	}
308 #endif /* XENNET_DEBUG */
309 	sc->sc_xbusd = xa->xa_xbusd;
310 	sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
311 
312 	/* initialize free RX and RX request lists */
313 	SLIST_INIT(&sc->sc_txreq_head);
314 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
315 		sc->sc_txreqs[i].txreq_id = i;
316 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
317 		    txreq_next);
318 	}
319 	SLIST_INIT(&sc->sc_rxreq_head);
320 	s = splvm();
321 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
322 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
323 		rxreq->rxreq_id = i;
324 		rxreq->rxreq_sc = sc;
325 		rxreq->rxreq_va = uvm_km_alloc(kernel_map,
326 		    PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
327 		if (rxreq->rxreq_va == 0)
328 			break;
329 		if (!pmap_extract(pmap_kernel(), rxreq->rxreq_va,
330 		    &rxreq->rxreq_pa))
331 			panic("%s: no pa for mapped va ?", device_xname(self));
332 		rxreq->rxreq_gntref = GRANT_INVALID_REF;
333 		SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
334 	}
335 	splx(s);
336 	sc->sc_free_rxreql = i;
337 	if (sc->sc_free_rxreql == 0) {
338 		aprint_error_dev(self, "failed to allocate rx memory\n");
339 		return;
340 	}
341 
342 	/* read mac address */
343 	err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path, "mac", NULL, &val);
344 	if (err) {
345 		aprint_error_dev(self, "can't read mac address, err %d\n", err);
346 		return;
347 	}
348 	for (i = 0, p = val; i < 6; i++) {
349 		sc->sc_enaddr[i] = strtoul(p, &e, 16);
350 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
351 			aprint_error_dev(self,
352 			    "%s is not a valid mac address\n", val);
353 			free(val, M_DEVBUF);
354 			return;
355 		}
356 		p = &e[1];
357 	}
358 	free(val, M_DEVBUF);
359 	aprint_normal_dev(self, "MAC address %s\n",
360 	    ether_sprintf(sc->sc_enaddr));
361 	/* Initialize ifnet structure and attach interface */
362 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
363 	ifp->if_softc = sc;
364 	ifp->if_start = xennet_start;
365 	ifp->if_ioctl = xennet_ioctl;
366 	ifp->if_watchdog = xennet_watchdog;
367 	ifp->if_init = xennet_init;
368 	ifp->if_stop = xennet_stop;
369 	ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
370 	ifp->if_timer = 0;
371 	ifp->if_snd.ifq_maxlen = max(ifqmaxlen, NET_TX_RING_SIZE * 2);
372 	ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
373 	IFQ_SET_READY(&ifp->if_snd);
374 	if_attach(ifp);
375 	ether_ifattach(ifp, sc->sc_enaddr);
376 	sc->sc_softintr = softint_establish(SOFTINT_NET, xennet_softstart, sc);
377 	if (sc->sc_softintr == NULL)
378 		panic("%s: can't establish soft interrupt",
379 			device_xname(self));
380 
381 #if NRND > 0
382 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
383 	    RND_TYPE_NET, 0);
384 #endif
385 
386 	/* initialise shared structures and tell backend that we are ready */
387 	xennet_xenbus_resume(sc);
388 }
389 
390 static int
391 xennet_xenbus_detach(device_t self, int flags)
392 {
393 	struct xennet_xenbus_softc *sc = device_private(self);
394 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
395 	int s0, s1;
396 	RING_IDX i;
397 
398 	DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
399 	s0 = splnet();
400 	xennet_stop(ifp, 1);
401 	/* wait for pending TX to complete, and collect pending RX packets */
402 	xennet_handler(sc);
403 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
404 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_detach", hz/2);
405 		xennet_handler(sc);
406 	}
407 	xennet_free_rx_buffer(sc);
408 
409 	s1 = splvm();
410 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
411 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
412 		uvm_km_free(kernel_map, rxreq->rxreq_va, PAGE_SIZE,
413 		    UVM_KMF_WIRED);
414 	}
415 	splx(s1);
416 
417 	ether_ifdetach(ifp);
418 	if_detach(ifp);
419 
420 #if NRND > 0
421 	/* Unhook the entropy source. */
422 	rnd_detach_source(&sc->sc_rnd_source);
423 #endif
424 
425 	while (xengnt_status(sc->sc_tx_ring_gntref)) {
426 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_txref", hz/2);
427 	}
428 	xengnt_revoke_access(sc->sc_tx_ring_gntref);
429 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
430 	    UVM_KMF_WIRED);
431 	while (xengnt_status(sc->sc_rx_ring_gntref)) {
432 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_rxref", hz/2);
433 	}
434 	xengnt_revoke_access(sc->sc_rx_ring_gntref);
435 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
436 	    UVM_KMF_WIRED);
437 	softint_disestablish(sc->sc_softintr);
438 	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
439 	splx(s0);
440 	DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
441 	return 0;
442 }
443 
444 static int
445 xennet_xenbus_resume(void *p)
446 {
447 	struct xennet_xenbus_softc *sc = p;
448 	struct xenbus_transaction *xbt;
449 	int error;
450 	netif_tx_sring_t *tx_ring;
451 	netif_rx_sring_t *rx_ring;
452 	paddr_t ma;
453 	const char *errmsg;
454 
455 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
456 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
457 
458 
459 	/* setup device: alloc event channel and shared rings */
460 	tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
461 	     UVM_KMF_WIRED | UVM_KMF_ZERO);
462 	rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
463 	    UVM_KMF_WIRED | UVM_KMF_ZERO);
464 	if (tx_ring == NULL || rx_ring == NULL)
465 		panic("xennet_xenbus_resume: can't alloc rings");
466 
467 	SHARED_RING_INIT(tx_ring);
468 	FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
469 	SHARED_RING_INIT(rx_ring);
470 	FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
471 
472 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
473 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
474 	if (error)
475 		return error;
476 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
477 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
478 	if (error)
479 		return error;
480 	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
481 	if (error)
482 		return error;
483 	aprint_verbose_dev(sc->sc_dev, "using event channel %d\n",
484 	    sc->sc_evtchn);
485 	event_set_handler(sc->sc_evtchn, &xennet_handler, sc,
486 	    IPL_NET, device_xname(sc->sc_dev));
487 
488 again:
489 	xbt = xenbus_transaction_start();
490 	if (xbt == NULL)
491 		return ENOMEM;
492 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
493 	    "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
494 	if (error) {
495 		errmsg = "writing tx ring-ref";
496 		goto abort_transaction;
497 	}
498 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
499 	    "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
500 	if (error) {
501 		errmsg = "writing rx ring-ref";
502 		goto abort_transaction;
503 	}
504 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
505 	    "feature-rx-notify", "%u", 1);
506 	if (error) {
507 		errmsg = "writing feature-rx-notify";
508 		goto abort_transaction;
509 	}
510 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
511 	    "event-channel", "%u", sc->sc_evtchn);
512 	if (error) {
513 		errmsg = "writing event channel";
514 		goto abort_transaction;
515 	}
516 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
517 	    "state", "%d", XenbusStateConnected);
518 	if (error) {
519 		errmsg = "writing frontend XenbusStateConnected";
520 		goto abort_transaction;
521 	}
522 	error = xenbus_transaction_end(xbt, 0);
523 	if (error == EAGAIN)
524 		goto again;
525 	if (error) {
526 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
527 		return -1;
528 	}
529 	xennet_alloc_rx_buffer(sc);
530 	sc->sc_backend_status = BEST_CONNECTED;
531 	return 0;
532 
533 abort_transaction:
534 	xenbus_transaction_end(xbt, 1);
535 	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
536 	return error;
537 }
538 
539 static void xennet_backend_changed(void *arg, XenbusState new_state)
540 {
541 	struct xennet_xenbus_softc *sc = device_private((device_t)arg);
542 	DPRINTF(("%s: new backend state %d\n",
543 	    device_xname(sc->sc_dev), new_state));
544 
545 	switch (new_state) {
546 	case XenbusStateInitialising:
547 	case XenbusStateInitWait:
548 	case XenbusStateInitialised:
549 		break;
550 	case XenbusStateClosing:
551 		sc->sc_backend_status = BEST_CLOSED;
552 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
553 		break;
554 	case XenbusStateConnected:
555 		break;
556 	case XenbusStateUnknown:
557 	default:
558 		panic("bad backend state %d", new_state);
559 	}
560 }
561 
562 /*
563  * Allocate RX buffers and put the associated request structures
564  * in the ring. This allows the backend to use them to communicate with
565  * frontend when some data is destined to frontend
566  */
567 
568 static void
569 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
570 {
571 	RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
572 	RING_IDX i;
573 	struct xennet_rxreq *req;
574 	struct xen_memory_reservation reservation;
575 	int s1, s2;
576 	paddr_t pfn;
577 
578 	s1 = splnet();
579 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
580 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
581 		KASSERT(req != NULL);
582 		KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
583 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
584 		    req->rxreq_id;
585 		if (xengnt_grant_transfer(sc->sc_xbusd->xbusd_otherend_id,
586 		    &req->rxreq_gntref) != 0) {
587 			break;
588 		}
589 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
590 		    req->rxreq_gntref;
591 
592 		SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
593 		sc->sc_free_rxreql--;
594 
595 		/* unmap the page */
596 		MULTI_update_va_mapping(&rx_mcl[i], req->rxreq_va, 0, 0);
597 		/*
598 		 * Remove this page from pseudo phys map before
599 		 * passing back to Xen.
600 		 */
601 		pfn = (req->rxreq_pa - XPMAP_OFFSET) >> PAGE_SHIFT;
602 		xennet_pages[i] = xpmap_phys_to_machine_mapping[pfn];
603 		xpmap_phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
604 	}
605 	if (i == 0) {
606 		splx(s1);
607 		return;
608 	}
609 	/* also make sure to flush all TLB entries */
610 	rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
611 	/*
612 	 * We may have allocated buffers which have entries
613 	 * outstanding in the page update queue -- make sure we flush
614 	 * those first!
615 	 */
616 	s2 = splvm();
617 	xpq_flush_queue();
618 	splx(s2);
619 	/* now decrease reservation */
620 	xenguest_handle(reservation.extent_start) = xennet_pages;
621 	reservation.nr_extents = i;
622 	reservation.extent_order = 0;
623 	reservation.address_bits = 0;
624 	reservation.domid = DOMID_SELF;
625 	rx_mcl[i].op = __HYPERVISOR_memory_op;
626 	rx_mcl[i].args[0] = XENMEM_decrease_reservation;
627 	rx_mcl[i].args[1] = (unsigned long)&reservation;
628 	HYPERVISOR_multicall(rx_mcl, i+1);
629 	if (__predict_false(rx_mcl[i].result != i)) {
630 		panic("xennet_alloc_rx_buffer: XENMEM_decrease_reservation");
631 	}
632 	sc->sc_rx_ring.req_prod_pvt = req_prod + i;
633 	RING_PUSH_REQUESTS(&sc->sc_rx_ring);
634 
635 	splx(s1);
636 	return;
637 }
638 
639 /*
640  * Reclaim all RX buffers used by the I/O ring between frontend and backend
641  */
642 static void
643 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc)
644 {
645 	paddr_t ma, pa;
646 	vaddr_t va;
647 	RING_IDX i;
648 	mmu_update_t mmu[1];
649 	multicall_entry_t mcl[2];
650 
651 	int s = splbio();
652 
653 	DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
654 	/* get back memory from RX ring */
655 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
656 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
657 
658 		/*
659 		 * if the buffer is in transit in the network stack, wait for
660 		 * the network stack to free it.
661 		 */
662 		while ((volatile grant_ref_t)rxreq->rxreq_gntref ==
663 		    GRANT_STACK_REF)
664 			tsleep(xennet_xenbus_detach, PRIBIO, "xnet_free", hz/2);
665 
666 		if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
667 			/*
668 			 * this req is still granted. Get back the page or
669 			 * allocate a new one, and remap it.
670 			 */
671 			SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
672 			    rxreq_next);
673 			sc->sc_free_rxreql++;
674 			ma = xengnt_revoke_transfer(rxreq->rxreq_gntref);
675 			rxreq->rxreq_gntref = GRANT_INVALID_REF;
676 			if (ma == 0) {
677 				u_long pfn;
678 				struct xen_memory_reservation xenres;
679 				/*
680 				 * transfer not complete, we lost the page.
681 				 * Get one from hypervisor
682 				 */
683 				xenguest_handle(xenres.extent_start) = &pfn;
684 				xenres.nr_extents = 1;
685 				xenres.extent_order = 0;
686 				xenres.address_bits = 31;
687 				xenres.domid = DOMID_SELF;
688 				if (HYPERVISOR_memory_op(
689 				    XENMEM_increase_reservation, &xenres) < 0) {
690 					panic("xennet_free_rx_buffer: "
691 					    "can't get memory back");
692 				}
693 				ma = pfn;
694 				KASSERT(ma != 0);
695 			}
696 			pa = rxreq->rxreq_pa;
697 			va = rxreq->rxreq_va;
698 			/* remap the page */
699 			mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
700 			mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
701 			MULTI_update_va_mapping(&mcl[0], va,
702 			    (ma << PAGE_SHIFT) | PG_V | PG_KW,
703 			    UVMF_TLB_FLUSH|UVMF_ALL);
704 			xpmap_phys_to_machine_mapping[
705 			    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
706 			mcl[1].op = __HYPERVISOR_mmu_update;
707 			mcl[1].args[0] = (unsigned long)mmu;
708 			mcl[1].args[1] = 1;
709 			mcl[1].args[2] = 0;
710 			mcl[1].args[3] = DOMID_SELF;
711 			HYPERVISOR_multicall(mcl, 2);
712 		}
713 
714 	}
715 	splx(s);
716 	DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
717 }
718 
719 /*
720  * Clears a used RX request when its associated mbuf has been processed
721  */
722 static void
723 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
724 {
725 	struct xennet_rxreq *req = arg;
726 	struct xennet_xenbus_softc *sc = req->rxreq_sc;
727 
728 	int s = splnet();
729 
730 	/* puts back the RX request in the list of free RX requests */
731 	SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
732 	sc->sc_free_rxreql++;
733 
734 	/*
735 	 * ring needs more requests to be pushed in, allocate some
736 	 * RX buffers to catch-up with backend's consumption
737 	 */
738 	req->rxreq_gntref = GRANT_INVALID_REF;
739 	if (sc->sc_free_rxreql >= SC_NLIVEREQ(sc) &&
740 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
741 		xennet_alloc_rx_buffer(sc);
742 	}
743 
744 	if (m)
745 		pool_cache_put(mb_cache, m);
746 	splx(s);
747 }
748 
749 /*
750  * Process responses associated to the TX mbufs sent previously through
751  * xennet_softstart()
752  * Called at splnet.
753  */
754 static void
755 xennet_tx_complete(struct xennet_xenbus_softc *sc)
756 {
757 	struct xennet_txreq *req;
758 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
759 	RING_IDX resp_prod, i;
760 
761 	DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
762 	    sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
763 
764 again:
765 	resp_prod = sc->sc_tx_ring.sring->rsp_prod;
766 	xen_rmb();
767 	for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
768 		req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
769 		KASSERT(req->txreq_id ==
770 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
771 		if (__predict_false(xengnt_status(req->txreq_gntref))) {
772 			aprint_verbose_dev(sc->sc_dev,
773 					   "grant still used by backend\n");
774 			sc->sc_tx_ring.rsp_cons = i;
775 			goto end;
776 		}
777 		if (__predict_false(
778 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
779 		    NETIF_RSP_OKAY))
780 			ifp->if_oerrors++;
781 		else
782 			ifp->if_opackets++;
783 		xengnt_revoke_access(req->txreq_gntref);
784 		m_freem(req->txreq_m);
785 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
786 	}
787 	sc->sc_tx_ring.rsp_cons = resp_prod;
788 	/* set new event and check for race with rsp_cons update */
789 	sc->sc_tx_ring.sring->rsp_event =
790 	    resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
791 	ifp->if_timer = 0;
792 	xen_wmb();
793 	if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
794 		goto again;
795 end:
796 	if (ifp->if_flags & IFF_OACTIVE) {
797 		ifp->if_flags &= ~IFF_OACTIVE;
798 		xennet_softstart(sc);
799 	}
800 }
801 
802 /*
803  * Xennet event handler.
804  * Get outstanding responses of TX packets, then collect all responses of
805  * pending RX packets
806  * Called at splnet.
807  */
808 static int
809 xennet_handler(void *arg)
810 {
811 	struct xennet_xenbus_softc *sc = arg;
812 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
813 	RING_IDX resp_prod, i;
814 	struct xennet_rxreq *req;
815 	paddr_t ma, pa;
816 	vaddr_t va;
817 	mmu_update_t mmu[1];
818 	multicall_entry_t mcl[2];
819 	struct mbuf *m;
820 	void *pktp;
821 	int more_to_do;
822 
823 	if (sc->sc_backend_status != BEST_CONNECTED)
824 		return 1;
825 
826 	xennet_tx_complete(sc);
827 
828 #if NRND > 0
829 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
830 #endif
831 again:
832 	DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
833 	    sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
834 
835 	resp_prod = sc->sc_rx_ring.sring->rsp_prod;
836 	xen_rmb(); /* ensure we see replies up to resp_prod */
837 	for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
838 		netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
839 		req = &sc->sc_rxreqs[rx->id];
840 		KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
841 		KASSERT(req->rxreq_id == rx->id);
842 		ma = xengnt_revoke_transfer(req->rxreq_gntref);
843 		if (ma == 0) {
844 			DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
845 			/*
846 			 * the remote could't send us a packet.
847 			 * we can't free this rxreq as no page will be mapped
848 			 * here. Instead give it back immediatly to backend.
849 			 */
850 			ifp->if_ierrors++;
851 			RING_GET_REQUEST(&sc->sc_rx_ring,
852 			    sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
853 			RING_GET_REQUEST(&sc->sc_rx_ring,
854 			    sc->sc_rx_ring.req_prod_pvt)->gref =
855 				req->rxreq_gntref;
856 			sc->sc_rx_ring.req_prod_pvt++;
857 			RING_PUSH_REQUESTS(&sc->sc_rx_ring);
858 			continue;
859 		}
860 		req->rxreq_gntref = GRANT_INVALID_REF;
861 
862 		pa = req->rxreq_pa;
863 		va = req->rxreq_va;
864 		/* remap the page */
865 		mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
866 		mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
867 		MULTI_update_va_mapping(&mcl[0], va,
868 		    (ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL);
869 		xpmap_phys_to_machine_mapping[
870 		    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
871 		mcl[1].op = __HYPERVISOR_mmu_update;
872 		mcl[1].args[0] = (unsigned long)mmu;
873 		mcl[1].args[1] = 1;
874 		mcl[1].args[2] = 0;
875 		mcl[1].args[3] = DOMID_SELF;
876 		HYPERVISOR_multicall(mcl, 2);
877 		pktp = (void *)(va + rx->offset);
878 #ifdef XENNET_DEBUG_DUMP
879 		xennet_hex_dump(pktp, rx->status, "r", rx->id);
880 #endif
881 		if ((ifp->if_flags & IFF_PROMISC) == 0) {
882 			struct ether_header *eh = pktp;
883 			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
884 			    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
885 			    ETHER_ADDR_LEN) != 0) {
886 				DPRINTFN(XEDB_EVENT,
887 				    ("xennet_handler bad dest\n"));
888 				/* packet not for us */
889 				xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE,
890 				    req);
891 				continue;
892 			}
893 		}
894 		MGETHDR(m, M_DONTWAIT, MT_DATA);
895 		if (__predict_false(m == NULL)) {
896 			printf("xennet: rx no mbuf\n");
897 			ifp->if_ierrors++;
898 			xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
899 			continue;
900 		}
901 		MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
902 
903 		m->m_pkthdr.rcvif = ifp;
904 		if (__predict_true(sc->sc_rx_ring.req_prod_pvt !=
905 		    sc->sc_rx_ring.sring->rsp_prod)) {
906 			m->m_len = m->m_pkthdr.len = rx->status;
907 			MEXTADD(m, pktp, rx->status,
908 			    M_DEVBUF, xennet_rx_mbuf_free, req);
909 			m->m_flags |= M_EXT_RW; /* we own the buffer */
910 			req->rxreq_gntref = GRANT_STACK_REF;
911 		} else {
912 			/*
913 			 * This was our last receive buffer, allocate
914 			 * memory, copy data and push the receive
915 			 * buffer back to the hypervisor.
916 			 */
917 			m->m_len = min(MHLEN, rx->status);
918 			m->m_pkthdr.len = 0;
919 			m_copyback(m, 0, rx->status, pktp);
920 			xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
921 			if (m->m_pkthdr.len < rx->status) {
922 				/* out of memory, just drop packets */
923 				ifp->if_ierrors++;
924 				m_freem(m);
925 				continue;
926 			}
927 		}
928 		if ((rx->flags & NETRXF_csum_blank) != 0) {
929 			xennet_checksum_fill(&m);
930 			if (m == NULL) {
931 				ifp->if_ierrors++;
932 				continue;
933 			}
934 		}
935 #if NBPFILTER > 0
936 		/*
937 		 * Pass packet to bpf if there is a listener.
938 		 */
939 		if (ifp->if_bpf)
940 			bpf_mtap(ifp->if_bpf, m);
941 #endif
942 
943 		ifp->if_ipackets++;
944 
945 		/* Pass the packet up. */
946 		(*ifp->if_input)(ifp, m);
947 	}
948 	xen_rmb();
949 	sc->sc_rx_ring.rsp_cons = i;
950 	RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
951 	if (more_to_do)
952 		goto again;
953 	return 1;
954 }
955 
956 /*
957  * The output routine of a xennet interface
958  * Called at splnet.
959  */
960 void
961 xennet_start(struct ifnet *ifp)
962 {
963 	struct xennet_xenbus_softc *sc = ifp->if_softc;
964 
965 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", device_xname(sc->sc_dev)));
966 
967 #if NRND > 0
968 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
969 #endif
970 
971 	xennet_tx_complete(sc);
972 
973 	if (__predict_false(
974 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING))
975 		return;
976 
977 	/*
978 	 * The Xen communication channel is much more efficient if we can
979 	 * schedule batch of packets for domain0. To achieve this, we
980 	 * schedule a soft interrupt, and just return. This way, the network
981 	 * stack will enqueue all pending mbufs in the interface's send queue
982 	 * before it is processed by xennet_softstart().
983 	 */
984 	softint_schedule(sc->sc_softintr);
985 	return;
986 }
987 
988 /*
989  * Prepares mbufs for TX, and notify backend when finished
990  * Called at splsoftnet
991  */
992 void
993 xennet_softstart(void *arg)
994 {
995 	struct xennet_xenbus_softc *sc = arg;
996 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
997 	struct mbuf *m, *new_m;
998 	netif_tx_request_t *txreq;
999 	RING_IDX req_prod;
1000 	paddr_t pa, pa2;
1001 	struct xennet_txreq *req;
1002 	int notify;
1003 	int do_notify = 0;
1004 	int s;
1005 
1006 	s = splnet();
1007 	if (__predict_false(
1008 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)) {
1009 		splx(s);
1010 		return;
1011 	}
1012 
1013 	req_prod = sc->sc_tx_ring.req_prod_pvt;
1014 	while (/*CONSTCOND*/1) {
1015 		uint16_t txflags;
1016 
1017 		req = SLIST_FIRST(&sc->sc_txreq_head);
1018 		if (__predict_false(req == NULL)) {
1019 			ifp->if_flags |= IFF_OACTIVE;
1020 			break;
1021 		}
1022 		IFQ_POLL(&ifp->if_snd, m);
1023 		if (m == NULL)
1024 			break;
1025 
1026 		switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
1027 		case M_EXT|M_EXT_CLUSTER:
1028 			KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
1029 			pa = m->m_ext.ext_paddr +
1030 				(m->m_data - m->m_ext.ext_buf);
1031 			break;
1032 		case 0:
1033 			KASSERT(m->m_paddr != M_PADDR_INVALID);
1034 			pa = m->m_paddr + M_BUFOFFSET(m) +
1035 				(m->m_data - M_BUFADDR(m));
1036 			break;
1037 		default:
1038 			if (__predict_false(
1039 			    !pmap_extract(pmap_kernel(), (vaddr_t)m->m_data,
1040 			    &pa))) {
1041 				panic("xennet_start: no pa");
1042 			}
1043 			break;
1044 		}
1045 
1046 		if ((m->m_pkthdr.csum_flags &
1047 		    (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1048 			txflags = NETTXF_csum_blank;
1049 		} else {
1050 			txflags = 0;
1051 		}
1052 
1053 		if (m->m_pkthdr.len != m->m_len ||
1054 		    (pa ^ (pa + m->m_pkthdr.len - 1)) & PG_FRAME) {
1055 
1056 			MGETHDR(new_m, M_DONTWAIT, MT_DATA);
1057 			if (__predict_false(new_m == NULL)) {
1058 				printf("%s: cannot allocate new mbuf\n",
1059 				       device_xname(sc->sc_dev));
1060 				break;
1061 			}
1062 			if (m->m_pkthdr.len > MHLEN) {
1063 				MCLGET(new_m, M_DONTWAIT);
1064 				if (__predict_false(
1065 				    (new_m->m_flags & M_EXT) == 0)) {
1066 					DPRINTF(("%s: no mbuf cluster\n",
1067 					    device_xname(sc->sc_dev)));
1068 					m_freem(new_m);
1069 					break;
1070 				}
1071 			}
1072 
1073 			m_copydata(m, 0, m->m_pkthdr.len, mtod(new_m, void *));
1074 			new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
1075 
1076 			if ((new_m->m_flags & M_EXT) != 0) {
1077 				pa = new_m->m_ext.ext_paddr;
1078 				KASSERT(new_m->m_data == new_m->m_ext.ext_buf);
1079 				KASSERT(pa != M_PADDR_INVALID);
1080 			} else {
1081 				pa = new_m->m_paddr;
1082 				KASSERT(pa != M_PADDR_INVALID);
1083 				KASSERT(new_m->m_data == M_BUFADDR(new_m));
1084 				pa += M_BUFOFFSET(new_m);
1085 			}
1086 			if (__predict_false(xengnt_grant_access(
1087 			    sc->sc_xbusd->xbusd_otherend_id,
1088 			    xpmap_ptom_masked(pa),
1089 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1090 				m_freem(new_m);
1091 				ifp->if_flags |= IFF_OACTIVE;
1092 				break;
1093 			}
1094 			/* we will be able to send new_m */
1095 			IFQ_DEQUEUE(&ifp->if_snd, m);
1096 			m_freem(m);
1097 			m = new_m;
1098 		} else {
1099 			if (__predict_false(xengnt_grant_access(
1100 			    sc->sc_xbusd->xbusd_otherend_id,
1101 			    xpmap_ptom_masked(pa),
1102 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1103 				ifp->if_flags |= IFF_OACTIVE;
1104 				break;
1105 			}
1106 			/* we will be able to send m */
1107 			IFQ_DEQUEUE(&ifp->if_snd, m);
1108 		}
1109 		MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1110 
1111 		KASSERT(((pa ^ (pa + m->m_pkthdr.len -  1)) & PG_FRAME) == 0);
1112 
1113 		SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1114 		req->txreq_m = m;
1115 
1116 		DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1117 		    "mbuf %p, buf %p/%p/%p, size %d\n",
1118 		    req->txreq_id, m, mtod(m, void *), (void *)pa,
1119 		    (void *)xpmap_ptom_masked(pa), m->m_pkthdr.len));
1120 		pmap_extract_ma(pmap_kernel(), mtod(m, vaddr_t), &pa2);
1121 		DPRINTFN(XEDB_MBUF, ("xennet_start pa %p ma %p/%p\n",
1122 		    (void *)pa, (void *)xpmap_ptom_masked(pa), (void *)pa2));
1123 #ifdef XENNET_DEBUG_DUMP
1124 		xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1125 			       	req->txreq_id);
1126 #endif
1127 
1128 		txreq = RING_GET_REQUEST(&sc->sc_tx_ring, req_prod);
1129 		txreq->id = req->txreq_id;
1130 		txreq->gref = req->txreq_gntref;
1131 		txreq->offset = pa & ~PG_FRAME;
1132 		txreq->size = m->m_pkthdr.len;
1133 		txreq->flags = txflags;
1134 
1135 		req_prod++;
1136 		sc->sc_tx_ring.req_prod_pvt = req_prod;
1137 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1138 		if (notify)
1139 			do_notify = 1;
1140 
1141 #ifdef XENNET_DEBUG
1142 		DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, "
1143 		    "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *),
1144 		    (void *)*kvtopte(mtod(m, vaddr_t)),
1145 		    (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))),
1146 		    (void *)m->m_paddr, m->m_pkthdr.len, m->m_len));
1147 		DPRINTFN(XEDB_MEM, ("id %d gref %d offset %d size %d flags %d"
1148 		    " prod %d\n",
1149 		    txreq->id, txreq->gref, txreq->offset, txreq->size,
1150 		    txreq->flags, req_prod));
1151 #endif
1152 
1153 #if NBPFILTER > 0
1154 		/*
1155 		 * Pass packet to bpf if there is a listener.
1156 		 */
1157 		if (ifp->if_bpf) {
1158 			bpf_mtap(ifp->if_bpf, m);
1159 		}
1160 #endif
1161 	}
1162 
1163 	if (do_notify) {
1164 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1165 		ifp->if_timer = 5;
1166 	}
1167 	splx(s);
1168 
1169 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1170 	    device_xname(sc->sc_dev)));
1171 }
1172 
1173 int
1174 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1175 {
1176 #ifdef XENNET_DEBUG
1177 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1178 #endif
1179 	int s, error = 0;
1180 
1181 	s = splnet();
1182 
1183 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1184 	    device_xname(sc->sc_dev)));
1185 	error = ether_ioctl(ifp, cmd, data);
1186 	if (error == ENETRESET)
1187 		error = 0;
1188 	splx(s);
1189 
1190 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1191 	    device_xname(sc->sc_dev), error));
1192 
1193 	return error;
1194 }
1195 
1196 void
1197 xennet_watchdog(struct ifnet *ifp)
1198 {
1199 	aprint_verbose_ifnet(ifp, "xennet_watchdog\n");
1200 }
1201 
1202 int
1203 xennet_init(struct ifnet *ifp)
1204 {
1205 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1206 	int s = splnet();
1207 
1208 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1209 	    device_xname(sc->sc_dev)));
1210 
1211 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1212 		sc->sc_rx_ring.sring->rsp_event =
1213 		    sc->sc_rx_ring.rsp_cons + 1;
1214 		hypervisor_enable_event(sc->sc_evtchn);
1215 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1216 		xennet_reset(sc);
1217 	}
1218 	ifp->if_flags |= IFF_RUNNING;
1219 	ifp->if_flags &= ~IFF_OACTIVE;
1220 	ifp->if_timer = 0;
1221 	splx(s);
1222 	return 0;
1223 }
1224 
1225 void
1226 xennet_stop(struct ifnet *ifp, int disable)
1227 {
1228 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1229 	int s = splnet();
1230 
1231 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1232 	hypervisor_mask_event(sc->sc_evtchn);
1233 	xennet_reset(sc);
1234 	splx(s);
1235 }
1236 
1237 void
1238 xennet_reset(struct xennet_xenbus_softc *sc)
1239 {
1240 
1241 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n",
1242 	    device_xname(sc->sc_dev)));
1243 }
1244 
1245 #if defined(NFS_BOOT_BOOTSTATIC)
1246 int
1247 xennet_bootstatic_callback(struct nfs_diskless *nd)
1248 {
1249 #if 0
1250 	struct ifnet *ifp = nd->nd_ifp;
1251 	struct xennet_xenbus_softc *sc =
1252 	    (struct xennet_xenbus_softc *)ifp->if_softc;
1253 #endif
1254 	int flags = 0;
1255 	union xen_cmdline_parseinfo xcp;
1256 	struct sockaddr_in *sin;
1257 
1258 	memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1259 	xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1260 	xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1261 	xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1262 
1263 	if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1264 		flags |= NFS_BOOT_HAS_SERVER;
1265 		if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1266 			flags |= NFS_BOOT_HAS_ROOTPATH;
1267 	}
1268 
1269 	nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1270 	nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1271 	nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1272 
1273 	sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1274 	memset((void *)sin, 0, sizeof(*sin));
1275 	sin->sin_len = sizeof(*sin);
1276 	sin->sin_family = AF_INET;
1277 	sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1278 
1279 	if (nd->nd_myip.s_addr)
1280 		flags |= NFS_BOOT_HAS_MYIP;
1281 	if (nd->nd_gwip.s_addr)
1282 		flags |= NFS_BOOT_HAS_GWIP;
1283 	if (nd->nd_mask.s_addr)
1284 		flags |= NFS_BOOT_HAS_MASK;
1285 	if (sin->sin_addr.s_addr)
1286 		flags |= NFS_BOOT_HAS_SERVADDR;
1287 
1288 	return flags;
1289 }
1290 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
1291 
1292 #ifdef XENNET_DEBUG_DUMP
1293 #define XCHR(x) hexdigits[(x) & 0xf]
1294 static void
1295 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1296 {
1297 	size_t i, j;
1298 
1299 	printf("pkt %p len %d/%x type %s id %d\n", pkt, len, len, type, id);
1300 	printf("00000000  ");
1301 	for(i=0; i<len; i++) {
1302 		printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1303 		if ((i+1) % 16 == 8)
1304 			printf(" ");
1305 		if ((i+1) % 16 == 0) {
1306 			printf(" %c", '|');
1307 			for(j=0; j<16; j++)
1308 				printf("%c", pkt[i-15+j]>=32 &&
1309 				    pkt[i-15+j]<127?pkt[i-15+j]:'.');
1310 			printf("%c\n%c%c%c%c%c%c%c%c  ", '|',
1311 			    XCHR((i+1)>>28), XCHR((i+1)>>24),
1312 			    XCHR((i+1)>>20), XCHR((i+1)>>16),
1313 			    XCHR((i+1)>>12), XCHR((i+1)>>8),
1314 			    XCHR((i+1)>>4), XCHR(i+1));
1315 		}
1316 	}
1317 	printf("\n");
1318 }
1319 #undef XCHR
1320 #endif
1321