xref: /netbsd-src/sys/arch/xen/xen/if_xennet_xenbus.c (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1 /*      $NetBSD: if_xennet_xenbus.c,v 1.62 2012/06/30 23:36:20 jym Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  * Copyright (c) 2004 Christian Limpach.
30  * All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
42  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
44  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
45  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
50  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51  */
52 
53 /*
54  * This file contains the xennet frontend code required for the network
55  * communication between two Xen domains.
56  * It ressembles xbd, but is a little more complex as it must deal with two
57  * rings:
58  * - the TX ring, to transmit packets to backend (inside => outside)
59  * - the RX ring, to receive packets from backend (outside => inside)
60  *
61  * Principles are following.
62  *
63  * For TX:
64  * Purpose is to transmit packets to the outside. The start of day is in
65  * xennet_start() (default output routine of xennet) that schedules a softint,
66  * xennet_softstart(). xennet_softstart() generates the requests associated
67  * to the TX mbufs queued (see altq(9)).
68  * The backend's responses are processed by xennet_tx_complete(), called either
69  * from:
70  * - xennet_start()
71  * - xennet_handler(), during an asynchronous event notification from backend
72  *   (similar to an IRQ).
73  *
74  * for RX:
75  * Purpose is to process the packets received from the outside. RX buffers
76  * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
77  * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
78  * preparation for incoming packets from backend.
79  * When RX packets need to be processed, backend takes the requests previously
80  * offered by frontend and pushes the associated responses inside the I/O ring.
81  * When done, it notifies frontend through an event notification, which will
82  * asynchronously call xennet_handler() in frontend.
83  * xennet_handler() processes the responses, generates the associated mbuf, and
84  * passes it to the MI layer for further processing.
85  */
86 
87 #include <sys/cdefs.h>
88 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.62 2012/06/30 23:36:20 jym Exp $");
89 
90 #include "opt_xen.h"
91 #include "opt_nfs_boot.h"
92 
93 #include <sys/param.h>
94 #include <sys/device.h>
95 #include <sys/conf.h>
96 #include <sys/kernel.h>
97 #include <sys/proc.h>
98 #include <sys/systm.h>
99 #include <sys/intr.h>
100 #include <sys/rnd.h>
101 
102 #include <net/if.h>
103 #include <net/if_dl.h>
104 #include <net/if_ether.h>
105 #include <net/bpf.h>
106 #include <net/bpfdesc.h>
107 
108 #if defined(NFS_BOOT_BOOTSTATIC)
109 #include <sys/fstypes.h>
110 #include <sys/mount.h>
111 #include <sys/statvfs.h>
112 #include <netinet/in.h>
113 #include <nfs/rpcv2.h>
114 #include <nfs/nfsproto.h>
115 #include <nfs/nfs.h>
116 #include <nfs/nfsmount.h>
117 #include <nfs/nfsdiskless.h>
118 #include <xen/if_xennetvar.h>
119 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
120 
121 #include <xen/xennet_checksum.h>
122 
123 #include <uvm/uvm.h>
124 
125 #include <xen/hypervisor.h>
126 #include <xen/evtchn.h>
127 #include <xen/granttables.h>
128 #include <xen/xen-public/io/netif.h>
129 #include <xen/xenpmap.h>
130 
131 #include <xen/xenbus.h>
132 #include "locators.h"
133 
134 #undef XENNET_DEBUG_DUMP
135 #undef XENNET_DEBUG
136 #ifdef XENNET_DEBUG
137 #define XEDB_FOLLOW     0x01
138 #define XEDB_INIT       0x02
139 #define XEDB_EVENT      0x04
140 #define XEDB_MBUF       0x08
141 #define XEDB_MEM        0x10
142 int xennet_debug = 0xff;
143 #define DPRINTF(x) if (xennet_debug) printf x;
144 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
145 #else
146 #define DPRINTF(x)
147 #define DPRINTFN(n,x)
148 #endif
149 
150 #define GRANT_INVALID_REF -1 /* entry is free */
151 
152 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
153 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
154 
155 struct xennet_txreq {
156 	SLIST_ENTRY(xennet_txreq) txreq_next;
157 	uint16_t txreq_id; /* ID passed to backend */
158 	grant_ref_t txreq_gntref; /* grant ref of this request */
159 	struct mbuf *txreq_m; /* mbuf being transmitted */
160 };
161 
162 struct xennet_rxreq {
163 	SLIST_ENTRY(xennet_rxreq) rxreq_next;
164 	uint16_t rxreq_id; /* ID passed to backend */
165 	grant_ref_t rxreq_gntref; /* grant ref of this request */
166 /* va/pa for this receive buf. ma will be provided by backend */
167 	paddr_t rxreq_pa;
168 	vaddr_t rxreq_va;
169 	struct xennet_xenbus_softc *rxreq_sc; /* pointer to our interface */
170 };
171 
172 struct xennet_xenbus_softc {
173 	device_t sc_dev;
174 	struct ethercom sc_ethercom;
175 	uint8_t sc_enaddr[6];
176 	struct xenbus_device *sc_xbusd;
177 
178 	netif_tx_front_ring_t sc_tx_ring;
179 	netif_rx_front_ring_t sc_rx_ring;
180 
181 	unsigned int sc_evtchn;
182 	void *sc_softintr;
183 
184 	grant_ref_t sc_tx_ring_gntref;
185 	grant_ref_t sc_rx_ring_gntref;
186 
187 	kmutex_t sc_tx_lock; /* protects free TX list, below */
188 	kmutex_t sc_rx_lock; /* protects free RX list, below */
189 	struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
190 	struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
191 	SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
192 	SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
193 	int sc_free_rxreql; /* number of free receive request struct */
194 
195 	int sc_backend_status; /* our status with backend */
196 #define BEST_CLOSED		0
197 #define BEST_DISCONNECTED	1
198 #define BEST_CONNECTED		2
199 #define BEST_SUSPENDED		3
200 	unsigned long sc_rx_feature;
201 #define FEATURE_RX_FLIP		0
202 #define FEATURE_RX_COPY		1
203 	krndsource_t     sc_rnd_source;
204 };
205 #define SC_NLIVEREQ(sc) ((sc)->sc_rx_ring.req_prod_pvt - \
206 			    (sc)->sc_rx_ring.sring->rsp_prod)
207 
208 /* too big to be on stack */
209 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
210 static u_long xennet_pages[NET_RX_RING_SIZE];
211 
212 static pool_cache_t if_xennetrxbuf_cache;
213 static int if_xennetrxbuf_cache_inited=0;
214 
215 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
216 static void xennet_xenbus_attach(device_t, device_t, void *);
217 static int  xennet_xenbus_detach(device_t, int);
218 static void xennet_backend_changed(void *, XenbusState);
219 
220 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
221 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
222 static void xennet_tx_complete(struct xennet_xenbus_softc *);
223 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
224 static void xennet_rx_free_req(struct xennet_rxreq *);
225 static int  xennet_handler(void *);
226 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
227 #ifdef XENNET_DEBUG_DUMP
228 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
229 #endif
230 
231 static int  xennet_init(struct ifnet *);
232 static void xennet_stop(struct ifnet *, int);
233 static void xennet_reset(struct xennet_xenbus_softc *);
234 static void xennet_softstart(void *);
235 static void xennet_start(struct ifnet *);
236 static int  xennet_ioctl(struct ifnet *, u_long, void *);
237 static void xennet_watchdog(struct ifnet *);
238 
239 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *);
240 static bool xennet_xenbus_resume (device_t dev, const pmf_qual_t *);
241 
242 CFATTACH_DECL_NEW(xennet, sizeof(struct xennet_xenbus_softc),
243    xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL);
244 
245 static int
246 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
247 {
248 	struct xenbusdev_attach_args *xa = aux;
249 
250 	if (strcmp(xa->xa_type, "vif") != 0)
251 		return 0;
252 
253 	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
254 	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
255 		return 0;
256 
257 	return 1;
258 }
259 
260 static void
261 xennet_xenbus_attach(device_t parent, device_t self, void *aux)
262 {
263 	struct xennet_xenbus_softc *sc = device_private(self);
264 	struct xenbusdev_attach_args *xa = aux;
265 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
266 	int err;
267 	netif_tx_sring_t *tx_ring;
268 	netif_rx_sring_t *rx_ring;
269 	RING_IDX i;
270 	char *val, *e, *p;
271 	int s;
272 	extern int ifqmaxlen; /* XXX */
273 #ifdef XENNET_DEBUG
274 	char **dir;
275 	int dir_n = 0;
276 	char id_str[20];
277 #endif
278 
279 	aprint_normal(": Xen Virtual Network Interface\n");
280 	sc->sc_dev = self;
281 
282 #ifdef XENNET_DEBUG
283 	printf("path: %s\n", xa->xa_xbusd->xbusd_path);
284 	snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
285 	err = xenbus_directory(NULL, "device/vif", id_str, &dir_n, &dir);
286 	if (err) {
287 		aprint_error_dev(self, "xenbus_directory err %d\n", err);
288 	} else {
289 		printf("%s/\n", xa->xa_xbusd->xbusd_path);
290 		for (i = 0; i < dir_n; i++) {
291 			printf("\t/%s", dir[i]);
292 			err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path,
293 				          dir[i], NULL, &val);
294 			if (err) {
295 				aprint_error_dev(self, "xenbus_read err %d\n",
296 					         err);
297 			} else {
298 				printf(" = %s\n", val);
299 				free(val, M_DEVBUF);
300 			}
301 		}
302 	}
303 #endif /* XENNET_DEBUG */
304 	sc->sc_xbusd = xa->xa_xbusd;
305 	sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
306 
307 	/* xenbus ensure 2 devices can't be probed at the same time */
308 	if (if_xennetrxbuf_cache_inited == 0) {
309 		if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
310 		    "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
311 		if_xennetrxbuf_cache_inited = 1;
312 	}
313 
314 
315 	/* initialize free RX and RX request lists */
316 	mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
317 	SLIST_INIT(&sc->sc_txreq_head);
318 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
319 		sc->sc_txreqs[i].txreq_id = i;
320 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
321 		    txreq_next);
322 	}
323 	mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
324 	SLIST_INIT(&sc->sc_rxreq_head);
325 	s = splvm();
326 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
327 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
328 		rxreq->rxreq_id = i;
329 		rxreq->rxreq_sc = sc;
330 		rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
331 		    if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
332 		if (rxreq->rxreq_va == 0)
333 			break;
334 		rxreq->rxreq_gntref = GRANT_INVALID_REF;
335 		SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
336 	}
337 	splx(s);
338 	sc->sc_free_rxreql = i;
339 	if (sc->sc_free_rxreql == 0) {
340 		aprint_error_dev(self, "failed to allocate rx memory\n");
341 		return;
342 	}
343 
344 	/* read mac address */
345 	err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path, "mac", NULL, &val);
346 	if (err) {
347 		aprint_error_dev(self, "can't read mac address, err %d\n", err);
348 		return;
349 	}
350 	for (i = 0, p = val; i < 6; i++) {
351 		sc->sc_enaddr[i] = strtoul(p, &e, 16);
352 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
353 			aprint_error_dev(self,
354 			    "%s is not a valid mac address\n", val);
355 			free(val, M_DEVBUF);
356 			return;
357 		}
358 		p = &e[1];
359 	}
360 	free(val, M_DEVBUF);
361 	aprint_normal_dev(self, "MAC address %s\n",
362 	    ether_sprintf(sc->sc_enaddr));
363 	/* Initialize ifnet structure and attach interface */
364 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
365 	ifp->if_softc = sc;
366 	ifp->if_start = xennet_start;
367 	ifp->if_ioctl = xennet_ioctl;
368 	ifp->if_watchdog = xennet_watchdog;
369 	ifp->if_init = xennet_init;
370 	ifp->if_stop = xennet_stop;
371 	ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
372 	ifp->if_timer = 0;
373 	ifp->if_snd.ifq_maxlen = max(ifqmaxlen, NET_TX_RING_SIZE * 2);
374 	ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
375 	IFQ_SET_READY(&ifp->if_snd);
376 	if_attach(ifp);
377 	ether_ifattach(ifp, sc->sc_enaddr);
378 	sc->sc_softintr = softint_establish(SOFTINT_NET, xennet_softstart, sc);
379 	if (sc->sc_softintr == NULL)
380 		panic("%s: can't establish soft interrupt",
381 			device_xname(self));
382 
383 	/* alloc shared rings */
384 	tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
385 	    UVM_KMF_WIRED);
386 	rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
387 	    UVM_KMF_WIRED);
388 	if (tx_ring == NULL || rx_ring == NULL)
389 		panic("%s: can't alloc rings", device_xname(self));
390 
391 	sc->sc_tx_ring.sring = tx_ring;
392 	sc->sc_rx_ring.sring = rx_ring;
393 
394 	/* resume shared structures and tell backend that we are ready */
395 	if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) {
396 		uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE,
397 		    UVM_KMF_WIRED);
398 		uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE,
399 		    UVM_KMF_WIRED);
400 		return;
401 	}
402 
403 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
404 	    RND_TYPE_NET, 0);
405 
406 	if (!pmf_device_register(self, xennet_xenbus_suspend,
407 	    xennet_xenbus_resume))
408 		aprint_error_dev(self, "couldn't establish power handler\n");
409 	else
410 		pmf_class_network_register(self, ifp);
411 }
412 
413 static int
414 xennet_xenbus_detach(device_t self, int flags)
415 {
416 	struct xennet_xenbus_softc *sc = device_private(self);
417 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
418 	int s0, s1;
419 	RING_IDX i;
420 
421 	DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
422 	s0 = splnet();
423 	xennet_stop(ifp, 1);
424 	/* wait for pending TX to complete, and collect pending RX packets */
425 	xennet_handler(sc);
426 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
427 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_detach", hz/2);
428 		xennet_handler(sc);
429 	}
430 	xennet_free_rx_buffer(sc);
431 
432 	s1 = splvm();
433 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
434 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
435 		uvm_km_free(kernel_map, rxreq->rxreq_va, PAGE_SIZE,
436 		    UVM_KMF_WIRED);
437 	}
438 	splx(s1);
439 
440 	ether_ifdetach(ifp);
441 	if_detach(ifp);
442 
443 	/* Unhook the entropy source. */
444 	rnd_detach_source(&sc->sc_rnd_source);
445 
446 	while (xengnt_status(sc->sc_tx_ring_gntref)) {
447 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_txref", hz/2);
448 	}
449 	xengnt_revoke_access(sc->sc_tx_ring_gntref);
450 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
451 	    UVM_KMF_WIRED);
452 	while (xengnt_status(sc->sc_rx_ring_gntref)) {
453 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_rxref", hz/2);
454 	}
455 	xengnt_revoke_access(sc->sc_rx_ring_gntref);
456 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
457 	    UVM_KMF_WIRED);
458 	softint_disestablish(sc->sc_softintr);
459 	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
460 	splx(s0);
461 
462 	pmf_device_deregister(self);
463 
464 	DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
465 	return 0;
466 }
467 
468 static bool
469 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual)
470 {
471 	struct xennet_xenbus_softc *sc = device_private(dev);
472 	int error;
473 	netif_tx_sring_t *tx_ring;
474 	netif_rx_sring_t *rx_ring;
475 	paddr_t ma;
476 
477 	/* invalidate the RX and TX rings */
478 	if (sc->sc_backend_status == BEST_SUSPENDED) {
479 		/*
480 		 * Device was suspended, so ensure that access associated to
481 		 * the previous RX and TX rings are revoked.
482 		 */
483 		xengnt_revoke_access(sc->sc_tx_ring_gntref);
484 		xengnt_revoke_access(sc->sc_rx_ring_gntref);
485 	}
486 
487 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
488 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
489 
490 	tx_ring = sc->sc_tx_ring.sring;
491 	rx_ring = sc->sc_rx_ring.sring;
492 
493 	/* Initialize rings */
494 	memset(tx_ring, 0, PAGE_SIZE);
495 	SHARED_RING_INIT(tx_ring);
496 	FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
497 
498 	memset(rx_ring, 0, PAGE_SIZE);
499 	SHARED_RING_INIT(rx_ring);
500 	FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
501 
502 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
503 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
504 	if (error)
505 		goto abort_resume;
506 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
507 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
508 	if (error)
509 		goto abort_resume;
510 	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
511 	if (error)
512 		goto abort_resume;
513 	aprint_verbose_dev(dev, "using event channel %d\n",
514 	    sc->sc_evtchn);
515 	event_set_handler(sc->sc_evtchn, &xennet_handler, sc,
516 	    IPL_NET, device_xname(dev));
517 	return true;
518 
519 abort_resume:
520 	xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
521 	return false;
522 }
523 
524 static bool
525 xennet_talk_to_backend(struct xennet_xenbus_softc *sc)
526 {
527 	int error;
528 	unsigned long rx_copy;
529 	struct xenbus_transaction *xbt;
530 	const char *errmsg;
531 
532 	error = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
533 	    "feature-rx-copy", &rx_copy, 10);
534 	if (error)
535 		rx_copy = 0; /* default value if key is absent */
536 
537 	if (rx_copy == 1) {
538 		aprint_normal_dev(sc->sc_dev, "using RX copy mode\n");
539 		sc->sc_rx_feature = FEATURE_RX_COPY;
540 	} else {
541 		aprint_normal_dev(sc->sc_dev, "using RX flip mode\n");
542 		sc->sc_rx_feature = FEATURE_RX_FLIP;
543 	}
544 
545 again:
546 	xbt = xenbus_transaction_start();
547 	if (xbt == NULL)
548 		return false;
549 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
550 	    "vifname", "%s", device_xname(sc->sc_dev));
551 	if (error) {
552 		errmsg = "vifname";
553 		goto abort_transaction;
554 	}
555 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
556 	    "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
557 	if (error) {
558 		errmsg = "writing tx ring-ref";
559 		goto abort_transaction;
560 	}
561 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
562 	    "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
563 	if (error) {
564 		errmsg = "writing rx ring-ref";
565 		goto abort_transaction;
566 	}
567 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
568 	    "request-rx-copy", "%lu", rx_copy);
569 	if (error) {
570 		errmsg = "writing request-rx-copy";
571 		goto abort_transaction;
572 	}
573 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
574 	    "feature-rx-notify", "%u", 1);
575 	if (error) {
576 		errmsg = "writing feature-rx-notify";
577 		goto abort_transaction;
578 	}
579 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
580 	    "event-channel", "%u", sc->sc_evtchn);
581 	if (error) {
582 		errmsg = "writing event channel";
583 		goto abort_transaction;
584 	}
585 	error = xenbus_transaction_end(xbt, 0);
586 	if (error == EAGAIN)
587 		goto again;
588 	if (error) {
589 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
590 		return false;
591 	}
592 	mutex_enter(&sc->sc_rx_lock);
593 	xennet_alloc_rx_buffer(sc);
594 	mutex_exit(&sc->sc_rx_lock);
595 
596 	if (sc->sc_backend_status == BEST_SUSPENDED) {
597 		xenbus_device_resume(sc->sc_xbusd);
598 	}
599 
600 	sc->sc_backend_status = BEST_CONNECTED;
601 
602 	return true;
603 
604 abort_transaction:
605 	xenbus_transaction_end(xbt, 1);
606 	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
607 	return false;
608 }
609 
610 static bool
611 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual)
612 {
613 	int s;
614 	struct xennet_xenbus_softc *sc = device_private(dev);
615 
616 	/*
617 	 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(),
618 	 * so we do not mask event channel here
619 	 */
620 
621 	s = splnet();
622 	/* process any outstanding TX responses, then collect RX packets */
623 	xennet_handler(sc);
624 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
625 		tsleep(xennet_xenbus_suspend, PRIBIO, "xnet_suspend", hz/2);
626 		xennet_handler(sc);
627 	}
628 
629 	/*
630 	 * dom0 may still use references to the grants we gave away
631 	 * earlier during RX buffers allocation. So we do not free RX buffers
632 	 * here, as dom0 does not expect the guest domain to suddenly revoke
633 	 * access to these grants.
634 	 */
635 
636 	sc->sc_backend_status = BEST_SUSPENDED;
637 	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
638 
639 	splx(s);
640 
641 	xenbus_device_suspend(sc->sc_xbusd);
642 	aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
643 
644 	return true;
645 }
646 
647 static void xennet_backend_changed(void *arg, XenbusState new_state)
648 {
649 	struct xennet_xenbus_softc *sc = device_private((device_t)arg);
650 	DPRINTF(("%s: new backend state %d\n",
651 	    device_xname(sc->sc_dev), new_state));
652 
653 	switch (new_state) {
654 	case XenbusStateInitialising:
655 	case XenbusStateInitialised:
656 	case XenbusStateConnected:
657 		break;
658 	case XenbusStateClosing:
659 		sc->sc_backend_status = BEST_CLOSED;
660 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
661 		break;
662 	case XenbusStateInitWait:
663 		if (sc->sc_backend_status == BEST_CONNECTED)
664 			break;
665 		if (xennet_talk_to_backend(sc))
666 			xenbus_switch_state(sc->sc_xbusd, NULL,
667 			    XenbusStateConnected);
668 		break;
669 	case XenbusStateUnknown:
670 	default:
671 		panic("bad backend state %d", new_state);
672 	}
673 }
674 
675 /*
676  * Allocate RX buffers and put the associated request structures
677  * in the ring. This allows the backend to use them to communicate with
678  * frontend when some data is destined to frontend
679  */
680 
681 static void
682 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
683 {
684 	RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
685 	RING_IDX i;
686 	struct xennet_rxreq *req;
687 	struct xen_memory_reservation reservation;
688 	int s, otherend_id, notify;
689 
690 	otherend_id = sc->sc_xbusd->xbusd_otherend_id;
691 
692 	KASSERT(mutex_owned(&sc->sc_rx_lock));
693 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
694 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
695 		KASSERT(req != NULL);
696 		KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
697 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
698 		    req->rxreq_id;
699 
700 		switch (sc->sc_rx_feature) {
701 		case FEATURE_RX_COPY:
702 			if (xengnt_grant_access(otherend_id,
703 			    xpmap_ptom_masked(req->rxreq_pa),
704 			    0, &req->rxreq_gntref) != 0) {
705 				goto out_loop;
706 			}
707 			break;
708 		case FEATURE_RX_FLIP:
709 			if (xengnt_grant_transfer(otherend_id,
710 			    &req->rxreq_gntref) != 0) {
711 				goto out_loop;
712 			}
713 			break;
714 		default:
715 			panic("%s: unsupported RX feature mode: %ld\n",
716 			    __func__, sc->sc_rx_feature);
717 		}
718 
719 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
720 		    req->rxreq_gntref;
721 
722 		SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
723 		sc->sc_free_rxreql--;
724 
725 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
726 			/* unmap the page */
727 			MULTI_update_va_mapping(&rx_mcl[i],
728 			    req->rxreq_va, 0, 0);
729 			/*
730 			 * Remove this page from pseudo phys map before
731 			 * passing back to Xen.
732 			 */
733 			xennet_pages[i] =
734 			    xpmap_ptom(req->rxreq_pa) >> PAGE_SHIFT;
735 			xpmap_ptom_unmap(req->rxreq_pa);
736 		}
737 	}
738 
739 out_loop:
740 	if (i == 0) {
741 		return;
742 	}
743 
744 	if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
745 		/* also make sure to flush all TLB entries */
746 		rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
747 		    UVMF_TLB_FLUSH | UVMF_ALL;
748 		/*
749 		 * We may have allocated buffers which have entries
750 		 * outstanding in the page update queue -- make sure we flush
751 		 * those first!
752 		 */
753 		s = splvm();
754 		xpq_flush_queue();
755 		splx(s);
756 		/* now decrease reservation */
757 		set_xen_guest_handle(reservation.extent_start, xennet_pages);
758 		reservation.nr_extents = i;
759 		reservation.extent_order = 0;
760 		reservation.address_bits = 0;
761 		reservation.domid = DOMID_SELF;
762 		rx_mcl[i].op = __HYPERVISOR_memory_op;
763 		rx_mcl[i].args[0] = XENMEM_decrease_reservation;
764 		rx_mcl[i].args[1] = (unsigned long)&reservation;
765 		HYPERVISOR_multicall(rx_mcl, i+1);
766 		if (__predict_false(rx_mcl[i].result != i)) {
767 			panic("xennet_alloc_rx_buffer: "
768 			    "XENMEM_decrease_reservation");
769 		}
770 	}
771 
772 	sc->sc_rx_ring.req_prod_pvt = req_prod + i;
773 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
774 	if (notify)
775 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
776 	return;
777 }
778 
779 /*
780  * Reclaim all RX buffers used by the I/O ring between frontend and backend
781  */
782 static void
783 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc)
784 {
785 	paddr_t ma, pa;
786 	vaddr_t va;
787 	RING_IDX i;
788 	mmu_update_t mmu[1];
789 	multicall_entry_t mcl[2];
790 
791 	mutex_enter(&sc->sc_rx_lock);
792 
793 	DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
794 	/* get back memory from RX ring */
795 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
796 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
797 
798 		if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
799 			/*
800 			 * this req is still granted. Get back the page or
801 			 * allocate a new one, and remap it.
802 			 */
803 			SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
804 			    rxreq_next);
805 			sc->sc_free_rxreql++;
806 
807 			switch (sc->sc_rx_feature) {
808 			case FEATURE_RX_COPY:
809 				xengnt_revoke_access(rxreq->rxreq_gntref);
810 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
811 				break;
812 			case FEATURE_RX_FLIP:
813 				ma = xengnt_revoke_transfer(
814 				    rxreq->rxreq_gntref);
815 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
816 				if (ma == 0) {
817 					u_long pfn;
818 					struct xen_memory_reservation xenres;
819 					/*
820 					 * transfer not complete, we lost the page.
821 					 * Get one from hypervisor
822 					 */
823 					set_xen_guest_handle(
824 					    xenres.extent_start, &pfn);
825 					xenres.nr_extents = 1;
826 					xenres.extent_order = 0;
827 					xenres.address_bits = 31;
828 					xenres.domid = DOMID_SELF;
829 					if (HYPERVISOR_memory_op(
830 					    XENMEM_increase_reservation, &xenres) < 0) {
831 						panic("xennet_free_rx_buffer: "
832 						    "can't get memory back");
833 					}
834 					ma = pfn;
835 					KASSERT(ma != 0);
836 				}
837 				pa = rxreq->rxreq_pa;
838 				va = rxreq->rxreq_va;
839 				/* remap the page */
840 				mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
841 				mmu[0].val = pa >> PAGE_SHIFT;
842 				MULTI_update_va_mapping(&mcl[0], va,
843 				    (ma << PAGE_SHIFT) | PG_V | PG_KW,
844 				    UVMF_TLB_FLUSH|UVMF_ALL);
845 				xpmap_ptom_map(pa, ptoa(ma));
846 				mcl[1].op = __HYPERVISOR_mmu_update;
847 				mcl[1].args[0] = (unsigned long)mmu;
848 				mcl[1].args[1] = 1;
849 				mcl[1].args[2] = 0;
850 				mcl[1].args[3] = DOMID_SELF;
851 				HYPERVISOR_multicall(mcl, 2);
852 				break;
853 			default:
854 				panic("%s: unsupported RX feature mode: %ld\n",
855 				    __func__, sc->sc_rx_feature);
856 			}
857 		}
858 
859 	}
860 	mutex_exit(&sc->sc_rx_lock);
861 	DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
862 }
863 
864 /*
865  * Clears a used RX request when its associated mbuf has been processed
866  */
867 static void
868 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
869 {
870 	int s = splnet();
871 	KASSERT(buf == m->m_ext.ext_buf);
872 	KASSERT(arg == NULL);
873 	KASSERT(m != NULL);
874 	vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
875 	pool_cache_put_paddr(if_xennetrxbuf_cache,
876 	    (void *)va, m->m_ext.ext_paddr);
877 	pool_cache_put(mb_cache, m);
878 	splx(s);
879 };
880 
881 static void
882 xennet_rx_free_req(struct xennet_rxreq *req)
883 {
884 	struct xennet_xenbus_softc *sc = req->rxreq_sc;
885 
886 	KASSERT(mutex_owned(&sc->sc_rx_lock));
887 
888 	/* puts back the RX request in the list of free RX requests */
889 	SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
890 	sc->sc_free_rxreql++;
891 
892 	/*
893 	 * ring needs more requests to be pushed in, allocate some
894 	 * RX buffers to catch-up with backend's consumption
895 	 */
896 	req->rxreq_gntref = GRANT_INVALID_REF;
897 
898 	if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
899 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
900 		xennet_alloc_rx_buffer(sc);
901 	}
902 }
903 
904 /*
905  * Process responses associated to the TX mbufs sent previously through
906  * xennet_softstart()
907  * Called at splnet.
908  */
909 static void
910 xennet_tx_complete(struct xennet_xenbus_softc *sc)
911 {
912 	struct xennet_txreq *req;
913 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
914 	RING_IDX resp_prod, i;
915 
916 	DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
917 	    sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
918 
919 again:
920 	resp_prod = sc->sc_tx_ring.sring->rsp_prod;
921 	xen_rmb();
922 	mutex_enter(&sc->sc_tx_lock);
923 	for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
924 		req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
925 		KASSERT(req->txreq_id ==
926 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
927 		if (__predict_false(xengnt_status(req->txreq_gntref))) {
928 			aprint_verbose_dev(sc->sc_dev,
929 			    "grant still used by backend\n");
930 			sc->sc_tx_ring.rsp_cons = i;
931 			goto end;
932 		}
933 		if (__predict_false(
934 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
935 		    NETIF_RSP_OKAY))
936 			ifp->if_oerrors++;
937 		else
938 			ifp->if_opackets++;
939 		xengnt_revoke_access(req->txreq_gntref);
940 		m_freem(req->txreq_m);
941 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
942 	}
943 	mutex_exit(&sc->sc_tx_lock);
944 
945 	sc->sc_tx_ring.rsp_cons = resp_prod;
946 	/* set new event and check for race with rsp_cons update */
947 	sc->sc_tx_ring.sring->rsp_event =
948 	    resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
949 	ifp->if_timer = 0;
950 	xen_wmb();
951 	if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
952 		goto again;
953 end:
954 	if (ifp->if_flags & IFF_OACTIVE) {
955 		ifp->if_flags &= ~IFF_OACTIVE;
956 		xennet_softstart(sc);
957 	}
958 }
959 
960 /*
961  * Xennet event handler.
962  * Get outstanding responses of TX packets, then collect all responses of
963  * pending RX packets
964  * Called at splnet.
965  */
966 static int
967 xennet_handler(void *arg)
968 {
969 	struct xennet_xenbus_softc *sc = arg;
970 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
971 	RING_IDX resp_prod, i;
972 	struct xennet_rxreq *req;
973 	paddr_t ma, pa;
974 	vaddr_t va;
975 	mmu_update_t mmu[1];
976 	multicall_entry_t mcl[2];
977 	struct mbuf *m;
978 	void *pktp;
979 	int more_to_do;
980 
981 	if (sc->sc_backend_status != BEST_CONNECTED)
982 		return 1;
983 
984 	xennet_tx_complete(sc);
985 
986 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
987 
988 again:
989 	DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
990 	    sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
991 
992 	mutex_enter(&sc->sc_rx_lock);
993 	resp_prod = sc->sc_rx_ring.sring->rsp_prod;
994 	xen_rmb(); /* ensure we see replies up to resp_prod */
995 
996 	for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
997 		netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
998 		req = &sc->sc_rxreqs[rx->id];
999 		KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
1000 		KASSERT(req->rxreq_id == rx->id);
1001 
1002 		ma = 0;
1003 		switch (sc->sc_rx_feature) {
1004 		case FEATURE_RX_COPY:
1005 			xengnt_revoke_access(req->rxreq_gntref);
1006 			break;
1007 		case FEATURE_RX_FLIP:
1008 			ma = xengnt_revoke_transfer(req->rxreq_gntref);
1009 			if (ma == 0) {
1010 				DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
1011 				/*
1012 				 * the remote could't send us a packet.
1013 				 * we can't free this rxreq as no page will be mapped
1014 				 * here. Instead give it back immediatly to backend.
1015 				 */
1016 				ifp->if_ierrors++;
1017 				RING_GET_REQUEST(&sc->sc_rx_ring,
1018 				    sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
1019 				RING_GET_REQUEST(&sc->sc_rx_ring,
1020 				    sc->sc_rx_ring.req_prod_pvt)->gref =
1021 					req->rxreq_gntref;
1022 				sc->sc_rx_ring.req_prod_pvt++;
1023 				RING_PUSH_REQUESTS(&sc->sc_rx_ring);
1024 				continue;
1025 			}
1026 			break;
1027 		default:
1028 			panic("%s: unsupported RX feature mode: %ld\n",
1029 			    __func__, sc->sc_rx_feature);
1030 		}
1031 
1032 		pa = req->rxreq_pa;
1033 		va = req->rxreq_va;
1034 
1035 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
1036 			/* remap the page */
1037 			mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
1038 			mmu[0].val = pa >> PAGE_SHIFT;
1039 			MULTI_update_va_mapping(&mcl[0], va,
1040 			    (ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL);
1041 			xpmap_ptom_map(pa, ptoa(ma));
1042 			mcl[1].op = __HYPERVISOR_mmu_update;
1043 			mcl[1].args[0] = (unsigned long)mmu;
1044 			mcl[1].args[1] = 1;
1045 			mcl[1].args[2] = 0;
1046 			mcl[1].args[3] = DOMID_SELF;
1047 			HYPERVISOR_multicall(mcl, 2);
1048 		}
1049 
1050 		pktp = (void *)(va + rx->offset);
1051 #ifdef XENNET_DEBUG_DUMP
1052 		xennet_hex_dump(pktp, rx->status, "r", rx->id);
1053 #endif
1054 		if ((ifp->if_flags & IFF_PROMISC) == 0) {
1055 			struct ether_header *eh = pktp;
1056 			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
1057 			    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
1058 			    ETHER_ADDR_LEN) != 0) {
1059 				DPRINTFN(XEDB_EVENT,
1060 				    ("xennet_handler bad dest\n"));
1061 				/* packet not for us */
1062 				xennet_rx_free_req(req);
1063 				continue;
1064 			}
1065 		}
1066 		MGETHDR(m, M_DONTWAIT, MT_DATA);
1067 		if (__predict_false(m == NULL)) {
1068 			printf("%s: rx no mbuf\n", ifp->if_xname);
1069 			ifp->if_ierrors++;
1070 			xennet_rx_free_req(req);
1071 			continue;
1072 		}
1073 		MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
1074 
1075 		m->m_pkthdr.rcvif = ifp;
1076 		req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
1077 		    if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
1078 		if (__predict_false(req->rxreq_va == 0)) {
1079 			printf("%s: rx no buf\n", ifp->if_xname);
1080 			ifp->if_ierrors++;
1081 			req->rxreq_va = va;
1082 			req->rxreq_pa = pa;
1083 			xennet_rx_free_req(req);
1084 			m_freem(m);
1085 			continue;
1086 		}
1087 		m->m_len = m->m_pkthdr.len = rx->status;
1088 		MEXTADD(m, pktp, rx->status,
1089 		    M_DEVBUF, xennet_rx_mbuf_free, NULL);
1090 		m->m_flags |= M_EXT_RW; /* we own the buffer */
1091 		m->m_ext.ext_paddr = pa;
1092 		if ((rx->flags & NETRXF_csum_blank) != 0) {
1093 			xennet_checksum_fill(&m);
1094 			if (m == NULL) {
1095 				ifp->if_ierrors++;
1096 				continue;
1097 			}
1098 		}
1099 		/* free req may overwrite *rx, better doing it late */
1100 		xennet_rx_free_req(req);
1101 		/*
1102 		 * Pass packet to bpf if there is a listener.
1103 		 */
1104 		bpf_mtap(ifp, m);
1105 
1106 		ifp->if_ipackets++;
1107 
1108 		/* Pass the packet up. */
1109 		(*ifp->if_input)(ifp, m);
1110 	}
1111 	xen_rmb();
1112 	sc->sc_rx_ring.rsp_cons = i;
1113 	RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
1114 	mutex_exit(&sc->sc_rx_lock);
1115 
1116 	if (more_to_do)
1117 		goto again;
1118 
1119 	return 1;
1120 }
1121 
1122 /*
1123  * The output routine of a xennet interface
1124  * Called at splnet.
1125  */
1126 void
1127 xennet_start(struct ifnet *ifp)
1128 {
1129 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1130 
1131 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", device_xname(sc->sc_dev)));
1132 
1133 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
1134 
1135 	xennet_tx_complete(sc);
1136 
1137 	if (__predict_false(
1138 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING))
1139 		return;
1140 
1141 	/*
1142 	 * The Xen communication channel is much more efficient if we can
1143 	 * schedule batch of packets for domain0. To achieve this, we
1144 	 * schedule a soft interrupt, and just return. This way, the network
1145 	 * stack will enqueue all pending mbufs in the interface's send queue
1146 	 * before it is processed by xennet_softstart().
1147 	 */
1148 	softint_schedule(sc->sc_softintr);
1149 	return;
1150 }
1151 
1152 /*
1153  * Prepares mbufs for TX, and notify backend when finished
1154  * Called at splsoftnet
1155  */
1156 void
1157 xennet_softstart(void *arg)
1158 {
1159 	struct xennet_xenbus_softc *sc = arg;
1160 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1161 	struct mbuf *m, *new_m;
1162 	netif_tx_request_t *txreq;
1163 	RING_IDX req_prod;
1164 	paddr_t pa, pa2;
1165 	struct xennet_txreq *req;
1166 	int notify;
1167 	int do_notify = 0;
1168 
1169 	mutex_enter(&sc->sc_tx_lock);
1170 	if (__predict_false(
1171 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)) {
1172 		mutex_exit(&sc->sc_tx_lock);
1173 		return;
1174 	}
1175 
1176 	req_prod = sc->sc_tx_ring.req_prod_pvt;
1177 	while (/*CONSTCOND*/1) {
1178 		uint16_t txflags;
1179 
1180 		req = SLIST_FIRST(&sc->sc_txreq_head);
1181 		if (__predict_false(req == NULL)) {
1182 			ifp->if_flags |= IFF_OACTIVE;
1183 			break;
1184 		}
1185 		IFQ_POLL(&ifp->if_snd, m);
1186 		if (m == NULL)
1187 			break;
1188 
1189 		switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
1190 		case M_EXT|M_EXT_CLUSTER:
1191 			KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
1192 			pa = m->m_ext.ext_paddr +
1193 				(m->m_data - m->m_ext.ext_buf);
1194 			break;
1195 		case 0:
1196 			KASSERT(m->m_paddr != M_PADDR_INVALID);
1197 			pa = m->m_paddr + M_BUFOFFSET(m) +
1198 				(m->m_data - M_BUFADDR(m));
1199 			break;
1200 		default:
1201 			if (__predict_false(
1202 			    !pmap_extract(pmap_kernel(), (vaddr_t)m->m_data,
1203 			    &pa))) {
1204 				panic("xennet_start: no pa");
1205 			}
1206 			break;
1207 		}
1208 
1209 		if ((m->m_pkthdr.csum_flags &
1210 		    (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1211 			txflags = NETTXF_csum_blank;
1212 		} else {
1213 			txflags = 0;
1214 		}
1215 
1216 		if (m->m_pkthdr.len != m->m_len ||
1217 		    (pa ^ (pa + m->m_pkthdr.len - 1)) & PG_FRAME) {
1218 
1219 			MGETHDR(new_m, M_DONTWAIT, MT_DATA);
1220 			if (__predict_false(new_m == NULL)) {
1221 				printf("%s: cannot allocate new mbuf\n",
1222 				       device_xname(sc->sc_dev));
1223 				break;
1224 			}
1225 			if (m->m_pkthdr.len > MHLEN) {
1226 				MCLGET(new_m, M_DONTWAIT);
1227 				if (__predict_false(
1228 				    (new_m->m_flags & M_EXT) == 0)) {
1229 					DPRINTF(("%s: no mbuf cluster\n",
1230 					    device_xname(sc->sc_dev)));
1231 					m_freem(new_m);
1232 					break;
1233 				}
1234 			}
1235 
1236 			m_copydata(m, 0, m->m_pkthdr.len, mtod(new_m, void *));
1237 			new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
1238 
1239 			if ((new_m->m_flags & M_EXT) != 0) {
1240 				pa = new_m->m_ext.ext_paddr;
1241 				KASSERT(new_m->m_data == new_m->m_ext.ext_buf);
1242 				KASSERT(pa != M_PADDR_INVALID);
1243 			} else {
1244 				pa = new_m->m_paddr;
1245 				KASSERT(pa != M_PADDR_INVALID);
1246 				KASSERT(new_m->m_data == M_BUFADDR(new_m));
1247 				pa += M_BUFOFFSET(new_m);
1248 			}
1249 			if (__predict_false(xengnt_grant_access(
1250 			    sc->sc_xbusd->xbusd_otherend_id,
1251 			    xpmap_ptom_masked(pa),
1252 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1253 				m_freem(new_m);
1254 				ifp->if_flags |= IFF_OACTIVE;
1255 				break;
1256 			}
1257 			/* we will be able to send new_m */
1258 			IFQ_DEQUEUE(&ifp->if_snd, m);
1259 			m_freem(m);
1260 			m = new_m;
1261 		} else {
1262 			if (__predict_false(xengnt_grant_access(
1263 			    sc->sc_xbusd->xbusd_otherend_id,
1264 			    xpmap_ptom_masked(pa),
1265 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1266 				ifp->if_flags |= IFF_OACTIVE;
1267 				break;
1268 			}
1269 			/* we will be able to send m */
1270 			IFQ_DEQUEUE(&ifp->if_snd, m);
1271 		}
1272 		MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1273 
1274 		KASSERT(((pa ^ (pa + m->m_pkthdr.len -  1)) & PG_FRAME) == 0);
1275 
1276 		SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1277 		req->txreq_m = m;
1278 
1279 		DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1280 		    "mbuf %p, buf %p/%p/%p, size %d\n",
1281 		    req->txreq_id, m, mtod(m, void *), (void *)pa,
1282 		    (void *)xpmap_ptom_masked(pa), m->m_pkthdr.len));
1283 		pmap_extract_ma(pmap_kernel(), mtod(m, vaddr_t), &pa2);
1284 		DPRINTFN(XEDB_MBUF, ("xennet_start pa %p ma %p/%p\n",
1285 		    (void *)pa, (void *)xpmap_ptom_masked(pa), (void *)pa2));
1286 #ifdef XENNET_DEBUG_DUMP
1287 		xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1288 			       	req->txreq_id);
1289 #endif
1290 
1291 		txreq = RING_GET_REQUEST(&sc->sc_tx_ring, req_prod);
1292 		txreq->id = req->txreq_id;
1293 		txreq->gref = req->txreq_gntref;
1294 		txreq->offset = pa & ~PG_FRAME;
1295 		txreq->size = m->m_pkthdr.len;
1296 		txreq->flags = txflags;
1297 
1298 		req_prod++;
1299 		sc->sc_tx_ring.req_prod_pvt = req_prod;
1300 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1301 		if (notify)
1302 			do_notify = 1;
1303 
1304 #ifdef XENNET_DEBUG
1305 		DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, "
1306 		    "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *),
1307 		    (void *)*kvtopte(mtod(m, vaddr_t)),
1308 		    (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))),
1309 		    (void *)m->m_paddr, m->m_pkthdr.len, m->m_len));
1310 		DPRINTFN(XEDB_MEM, ("id %d gref %d offset %d size %d flags %d"
1311 		    " prod %d\n",
1312 		    txreq->id, txreq->gref, txreq->offset, txreq->size,
1313 		    txreq->flags, req_prod));
1314 #endif
1315 
1316 		/*
1317 		 * Pass packet to bpf if there is a listener.
1318 		 */
1319 		bpf_mtap(ifp, m);
1320 	}
1321 
1322 	if (do_notify) {
1323 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1324 		ifp->if_timer = 5;
1325 	}
1326 
1327 	mutex_exit(&sc->sc_tx_lock);
1328 
1329 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1330 	    device_xname(sc->sc_dev)));
1331 }
1332 
1333 int
1334 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1335 {
1336 #ifdef XENNET_DEBUG
1337 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1338 #endif
1339 	int s, error = 0;
1340 
1341 	s = splnet();
1342 
1343 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1344 	    device_xname(sc->sc_dev)));
1345 	error = ether_ioctl(ifp, cmd, data);
1346 	if (error == ENETRESET)
1347 		error = 0;
1348 	splx(s);
1349 
1350 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1351 	    device_xname(sc->sc_dev), error));
1352 
1353 	return error;
1354 }
1355 
1356 void
1357 xennet_watchdog(struct ifnet *ifp)
1358 {
1359 	aprint_verbose_ifnet(ifp, "xennet_watchdog\n");
1360 }
1361 
1362 int
1363 xennet_init(struct ifnet *ifp)
1364 {
1365 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1366 	mutex_enter(&sc->sc_rx_lock);
1367 
1368 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1369 	    device_xname(sc->sc_dev)));
1370 
1371 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1372 		sc->sc_rx_ring.sring->rsp_event =
1373 		    sc->sc_rx_ring.rsp_cons + 1;
1374 		hypervisor_enable_event(sc->sc_evtchn);
1375 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1376 		xennet_reset(sc);
1377 	}
1378 	ifp->if_flags |= IFF_RUNNING;
1379 	ifp->if_flags &= ~IFF_OACTIVE;
1380 	ifp->if_timer = 0;
1381 	mutex_exit(&sc->sc_rx_lock);
1382 	return 0;
1383 }
1384 
1385 void
1386 xennet_stop(struct ifnet *ifp, int disable)
1387 {
1388 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1389 
1390 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1391 	hypervisor_mask_event(sc->sc_evtchn);
1392 	xennet_reset(sc);
1393 }
1394 
1395 void
1396 xennet_reset(struct xennet_xenbus_softc *sc)
1397 {
1398 
1399 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n",
1400 	    device_xname(sc->sc_dev)));
1401 }
1402 
1403 #if defined(NFS_BOOT_BOOTSTATIC)
1404 int
1405 xennet_bootstatic_callback(struct nfs_diskless *nd)
1406 {
1407 #if 0
1408 	struct ifnet *ifp = nd->nd_ifp;
1409 	struct xennet_xenbus_softc *sc =
1410 	    (struct xennet_xenbus_softc *)ifp->if_softc;
1411 #endif
1412 	int flags = 0;
1413 	union xen_cmdline_parseinfo xcp;
1414 	struct sockaddr_in *sin;
1415 
1416 	memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1417 	xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1418 	xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1419 	xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1420 
1421 	if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1422 		flags |= NFS_BOOT_HAS_SERVER;
1423 		if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1424 			flags |= NFS_BOOT_HAS_ROOTPATH;
1425 	}
1426 
1427 	nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1428 	nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1429 	nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1430 
1431 	sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1432 	memset((void *)sin, 0, sizeof(*sin));
1433 	sin->sin_len = sizeof(*sin);
1434 	sin->sin_family = AF_INET;
1435 	sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1436 
1437 	if (nd->nd_myip.s_addr)
1438 		flags |= NFS_BOOT_HAS_MYIP;
1439 	if (nd->nd_gwip.s_addr)
1440 		flags |= NFS_BOOT_HAS_GWIP;
1441 	if (nd->nd_mask.s_addr)
1442 		flags |= NFS_BOOT_HAS_MASK;
1443 	if (sin->sin_addr.s_addr)
1444 		flags |= NFS_BOOT_HAS_SERVADDR;
1445 
1446 	return flags;
1447 }
1448 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
1449 
1450 #ifdef XENNET_DEBUG_DUMP
1451 #define XCHR(x) hexdigits[(x) & 0xf]
1452 static void
1453 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1454 {
1455 	size_t i, j;
1456 
1457 	printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
1458 	printf("00000000  ");
1459 	for(i=0; i<len; i++) {
1460 		printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1461 		if ((i+1) % 16 == 8)
1462 			printf(" ");
1463 		if ((i+1) % 16 == 0) {
1464 			printf(" %c", '|');
1465 			for(j=0; j<16; j++)
1466 				printf("%c", pkt[i-15+j]>=32 &&
1467 				    pkt[i-15+j]<127?pkt[i-15+j]:'.');
1468 			printf("%c\n%c%c%c%c%c%c%c%c  ", '|',
1469 			    XCHR((i+1)>>28), XCHR((i+1)>>24),
1470 			    XCHR((i+1)>>20), XCHR((i+1)>>16),
1471 			    XCHR((i+1)>>12), XCHR((i+1)>>8),
1472 			    XCHR((i+1)>>4), XCHR(i+1));
1473 		}
1474 	}
1475 	printf("\n");
1476 }
1477 #undef XCHR
1478 #endif
1479