xref: /netbsd-src/sys/arch/xen/xen/if_xennet_xenbus.c (revision a5847cc334d9a7029f6352b847e9e8d71a0f9e0c)
1 /*      $NetBSD: if_xennet_xenbus.c,v 1.53 2011/09/26 21:44:09 jym Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  * Copyright (c) 2004 Christian Limpach.
30  * All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
42  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
44  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
45  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
50  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51  */
52 
53 /*
54  * This file contains the xennet frontend code required for the network
55  * communication between two Xen domains.
56  * It ressembles xbd, but is a little more complex as it must deal with two
57  * rings:
58  * - the TX ring, to transmit packets to backend (inside => outside)
59  * - the RX ring, to receive packets from backend (outside => inside)
60  *
61  * Principles are following.
62  *
63  * For TX:
64  * Purpose is to transmit packets to the outside. The start of day is in
65  * xennet_start() (default output routine of xennet) that schedules a softint,
66  * xennet_softstart(). xennet_softstart() generates the requests associated
67  * to the TX mbufs queued (see altq(9)).
68  * The backend's responses are processed by xennet_tx_complete(), called either
69  * from:
70  * - xennet_start()
71  * - xennet_handler(), during an asynchronous event notification from backend
72  *   (similar to an IRQ).
73  *
74  * for RX:
75  * Purpose is to process the packets received from the outside. RX buffers
76  * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
77  * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
78  * preparation for incoming packets from backend.
79  * When RX packets need to be processed, backend takes the requests previously
80  * offered by frontend and pushes the associated responses inside the I/O ring.
81  * When done, it notifies frontend through an event notification, which will
82  * asynchronously call xennet_handler() in frontend.
83  * xennet_handler() processes the responses, generates the associated mbuf, and
84  * passes it to the MI layer for further processing.
85  */
86 
87 #include <sys/cdefs.h>
88 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.53 2011/09/26 21:44:09 jym Exp $");
89 
90 #include "opt_xen.h"
91 #include "opt_nfs_boot.h"
92 #include "rnd.h"
93 
94 #include <sys/param.h>
95 #include <sys/device.h>
96 #include <sys/conf.h>
97 #include <sys/kernel.h>
98 #include <sys/proc.h>
99 #include <sys/systm.h>
100 #include <sys/intr.h>
101 #if NRND > 0
102 #include <sys/rnd.h>
103 #endif
104 
105 #include <net/if.h>
106 #include <net/if_dl.h>
107 #include <net/if_ether.h>
108 #include <net/bpf.h>
109 #include <net/bpfdesc.h>
110 
111 #if defined(NFS_BOOT_BOOTSTATIC)
112 #include <sys/fstypes.h>
113 #include <sys/mount.h>
114 #include <sys/statvfs.h>
115 #include <netinet/in.h>
116 #include <nfs/rpcv2.h>
117 #include <nfs/nfsproto.h>
118 #include <nfs/nfs.h>
119 #include <nfs/nfsmount.h>
120 #include <nfs/nfsdiskless.h>
121 #include <xen/if_xennetvar.h>
122 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
123 
124 #include <xen/xennet_checksum.h>
125 
126 #include <uvm/uvm.h>
127 
128 #include <xen/hypervisor.h>
129 #include <xen/evtchn.h>
130 #include <xen/granttables.h>
131 #include <xen/xen3-public/io/netif.h>
132 #include <xen/xenpmap.h>
133 
134 #include <xen/xenbus.h>
135 #include "locators.h"
136 
137 #undef XENNET_DEBUG_DUMP
138 #undef XENNET_DEBUG
139 #ifdef XENNET_DEBUG
140 #define XEDB_FOLLOW     0x01
141 #define XEDB_INIT       0x02
142 #define XEDB_EVENT      0x04
143 #define XEDB_MBUF       0x08
144 #define XEDB_MEM        0x10
145 int xennet_debug = 0xff;
146 #define DPRINTF(x) if (xennet_debug) printf x;
147 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
148 #else
149 #define DPRINTF(x)
150 #define DPRINTFN(n,x)
151 #endif
152 
153 #define GRANT_INVALID_REF -1 /* entry is free */
154 #define GRANT_STACK_REF   -2 /* entry owned by the network stack */
155 
156 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
157 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
158 
159 struct xennet_txreq {
160 	SLIST_ENTRY(xennet_txreq) txreq_next;
161 	uint16_t txreq_id; /* ID passed to backend */
162 	grant_ref_t txreq_gntref; /* grant ref of this request */
163 	struct mbuf *txreq_m; /* mbuf being transmitted */
164 };
165 
166 struct xennet_rxreq {
167 	SLIST_ENTRY(xennet_rxreq) rxreq_next;
168 	uint16_t rxreq_id; /* ID passed to backend */
169 	grant_ref_t rxreq_gntref; /* grant ref of this request */
170 /* va/pa for this receive buf. ma will be provided by backend */
171 	paddr_t rxreq_pa;
172 	vaddr_t rxreq_va;
173 	struct xennet_xenbus_softc *rxreq_sc; /* pointer to our interface */
174 };
175 
176 struct xennet_xenbus_softc {
177 	device_t sc_dev;
178 	struct ethercom sc_ethercom;
179 	uint8_t sc_enaddr[6];
180 	struct xenbus_device *sc_xbusd;
181 
182 	netif_tx_front_ring_t sc_tx_ring;
183 	netif_rx_front_ring_t sc_rx_ring;
184 
185 	unsigned int sc_evtchn;
186 	void *sc_softintr;
187 
188 	grant_ref_t sc_tx_ring_gntref;
189 	grant_ref_t sc_rx_ring_gntref;
190 
191 	struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
192 	struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
193 	SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
194 	SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
195 	int sc_free_rxreql; /* number of free receive request struct */
196 
197 	int sc_backend_status; /* our status with backend */
198 #define BEST_CLOSED		0
199 #define BEST_DISCONNECTED	1
200 #define BEST_CONNECTED		2
201 #define BEST_SUSPENDED		3
202 	unsigned long sc_rx_feature;
203 #define FEATURE_RX_FLIP		0
204 #define FEATURE_RX_COPY		1
205 #if NRND > 0
206 	rndsource_element_t     sc_rnd_source;
207 #endif
208 };
209 #define SC_NLIVEREQ(sc) ((sc)->sc_rx_ring.req_prod_pvt - \
210 			    (sc)->sc_rx_ring.sring->rsp_prod)
211 
212 /* too big to be on stack */
213 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
214 static u_long xennet_pages[NET_RX_RING_SIZE];
215 
216 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
217 static void xennet_xenbus_attach(device_t, device_t, void *);
218 static int  xennet_xenbus_detach(device_t, int);
219 static void xennet_backend_changed(void *, XenbusState);
220 
221 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
222 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
223 static void xennet_tx_complete(struct xennet_xenbus_softc *);
224 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
225 static int  xennet_handler(void *);
226 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
227 #ifdef XENNET_DEBUG_DUMP
228 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
229 #endif
230 
231 static int  xennet_init(struct ifnet *);
232 static void xennet_stop(struct ifnet *, int);
233 static void xennet_reset(struct xennet_xenbus_softc *);
234 static void xennet_softstart(void *);
235 static void xennet_start(struct ifnet *);
236 static int  xennet_ioctl(struct ifnet *, u_long, void *);
237 static void xennet_watchdog(struct ifnet *);
238 
239 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *);
240 static bool xennet_xenbus_resume (device_t dev, const pmf_qual_t *);
241 
242 CFATTACH_DECL_NEW(xennet, sizeof(struct xennet_xenbus_softc),
243    xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL);
244 
245 static int
246 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
247 {
248 	struct xenbusdev_attach_args *xa = aux;
249 
250 	if (strcmp(xa->xa_type, "vif") != 0)
251 		return 0;
252 
253 	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
254 	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
255 		return 0;
256 
257 	return 1;
258 }
259 
260 static void
261 xennet_xenbus_attach(device_t parent, device_t self, void *aux)
262 {
263 	struct xennet_xenbus_softc *sc = device_private(self);
264 	struct xenbusdev_attach_args *xa = aux;
265 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
266 	int err;
267 	netif_tx_sring_t *tx_ring;
268 	netif_rx_sring_t *rx_ring;
269 	RING_IDX i;
270 	char *val, *e, *p;
271 	int s;
272 	extern int ifqmaxlen; /* XXX */
273 #ifdef XENNET_DEBUG
274 	char **dir;
275 	int dir_n = 0;
276 	char id_str[20];
277 #endif
278 
279 	aprint_normal(": Xen Virtual Network Interface\n");
280 	sc->sc_dev = self;
281 
282 #ifdef XENNET_DEBUG
283 	printf("path: %s\n", xa->xa_xbusd->xbusd_path);
284 	snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
285 	err = xenbus_directory(NULL, "device/vif", id_str, &dir_n, &dir);
286 	if (err) {
287 		aprint_error_dev(self, "xenbus_directory err %d\n", err);
288 	} else {
289 		printf("%s/\n", xa->xa_xbusd->xbusd_path);
290 		for (i = 0; i < dir_n; i++) {
291 			printf("\t/%s", dir[i]);
292 			err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path,
293 				          dir[i], NULL, &val);
294 			if (err) {
295 				aprint_error_dev(self, "xenbus_read err %d\n",
296 					         err);
297 			} else {
298 				printf(" = %s\n", val);
299 				free(val, M_DEVBUF);
300 			}
301 		}
302 	}
303 #endif /* XENNET_DEBUG */
304 	sc->sc_xbusd = xa->xa_xbusd;
305 	sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
306 
307 	/* initialize free RX and RX request lists */
308 	SLIST_INIT(&sc->sc_txreq_head);
309 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
310 		sc->sc_txreqs[i].txreq_id = i;
311 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
312 		    txreq_next);
313 	}
314 	SLIST_INIT(&sc->sc_rxreq_head);
315 	s = splvm();
316 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
317 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
318 		rxreq->rxreq_id = i;
319 		rxreq->rxreq_sc = sc;
320 		rxreq->rxreq_va = uvm_km_alloc(kernel_map,
321 		    PAGE_SIZE, PAGE_SIZE, UVM_KMF_WIRED | UVM_KMF_ZERO);
322 		if (rxreq->rxreq_va == 0)
323 			break;
324 		if (!pmap_extract(pmap_kernel(), rxreq->rxreq_va,
325 		    &rxreq->rxreq_pa))
326 			panic("%s: no pa for mapped va ?", device_xname(self));
327 		rxreq->rxreq_gntref = GRANT_INVALID_REF;
328 		SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
329 	}
330 	splx(s);
331 	sc->sc_free_rxreql = i;
332 	if (sc->sc_free_rxreql == 0) {
333 		aprint_error_dev(self, "failed to allocate rx memory\n");
334 		return;
335 	}
336 
337 	/* read mac address */
338 	err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path, "mac", NULL, &val);
339 	if (err) {
340 		aprint_error_dev(self, "can't read mac address, err %d\n", err);
341 		return;
342 	}
343 	for (i = 0, p = val; i < 6; i++) {
344 		sc->sc_enaddr[i] = strtoul(p, &e, 16);
345 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
346 			aprint_error_dev(self,
347 			    "%s is not a valid mac address\n", val);
348 			free(val, M_DEVBUF);
349 			return;
350 		}
351 		p = &e[1];
352 	}
353 	free(val, M_DEVBUF);
354 	aprint_normal_dev(self, "MAC address %s\n",
355 	    ether_sprintf(sc->sc_enaddr));
356 	/* Initialize ifnet structure and attach interface */
357 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
358 	ifp->if_softc = sc;
359 	ifp->if_start = xennet_start;
360 	ifp->if_ioctl = xennet_ioctl;
361 	ifp->if_watchdog = xennet_watchdog;
362 	ifp->if_init = xennet_init;
363 	ifp->if_stop = xennet_stop;
364 	ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
365 	ifp->if_timer = 0;
366 	ifp->if_snd.ifq_maxlen = max(ifqmaxlen, NET_TX_RING_SIZE * 2);
367 	ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
368 	IFQ_SET_READY(&ifp->if_snd);
369 	if_attach(ifp);
370 	ether_ifattach(ifp, sc->sc_enaddr);
371 	sc->sc_softintr = softint_establish(SOFTINT_NET, xennet_softstart, sc);
372 	if (sc->sc_softintr == NULL)
373 		panic("%s: can't establish soft interrupt",
374 			device_xname(self));
375 
376 	/* alloc shared rings */
377 	tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
378 	    UVM_KMF_WIRED);
379 	rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
380 	    UVM_KMF_WIRED);
381 	if (tx_ring == NULL || rx_ring == NULL)
382 		panic("%s: can't alloc rings", device_xname(self));
383 
384 	sc->sc_tx_ring.sring = tx_ring;
385 	sc->sc_rx_ring.sring = rx_ring;
386 
387 	/* resume shared structures and tell backend that we are ready */
388 	if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) {
389 		uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE,
390 		    UVM_KMF_WIRED);
391 		uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE,
392 		    UVM_KMF_WIRED);
393 		return;
394 	}
395 
396 #if NRND > 0
397 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
398 	    RND_TYPE_NET, 0);
399 #endif
400 
401 	if (!pmf_device_register(self, xennet_xenbus_suspend,
402 	    xennet_xenbus_resume))
403 		aprint_error_dev(self, "couldn't establish power handler\n");
404 	else
405 		pmf_class_network_register(self, ifp);
406 }
407 
408 static int
409 xennet_xenbus_detach(device_t self, int flags)
410 {
411 	struct xennet_xenbus_softc *sc = device_private(self);
412 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
413 	int s0, s1;
414 	RING_IDX i;
415 
416 	DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
417 	s0 = splnet();
418 	xennet_stop(ifp, 1);
419 	/* wait for pending TX to complete, and collect pending RX packets */
420 	xennet_handler(sc);
421 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
422 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_detach", hz/2);
423 		xennet_handler(sc);
424 	}
425 	xennet_free_rx_buffer(sc);
426 
427 	s1 = splvm();
428 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
429 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
430 		uvm_km_free(kernel_map, rxreq->rxreq_va, PAGE_SIZE,
431 		    UVM_KMF_WIRED);
432 	}
433 	splx(s1);
434 
435 	ether_ifdetach(ifp);
436 	if_detach(ifp);
437 
438 #if NRND > 0
439 	/* Unhook the entropy source. */
440 	rnd_detach_source(&sc->sc_rnd_source);
441 #endif
442 
443 	while (xengnt_status(sc->sc_tx_ring_gntref)) {
444 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_txref", hz/2);
445 	}
446 	xengnt_revoke_access(sc->sc_tx_ring_gntref);
447 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
448 	    UVM_KMF_WIRED);
449 	while (xengnt_status(sc->sc_rx_ring_gntref)) {
450 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_rxref", hz/2);
451 	}
452 	xengnt_revoke_access(sc->sc_rx_ring_gntref);
453 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
454 	    UVM_KMF_WIRED);
455 	softint_disestablish(sc->sc_softintr);
456 	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
457 	splx(s0);
458 
459 	pmf_device_deregister(self);
460 
461 	DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
462 	return 0;
463 }
464 
465 static bool
466 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual)
467 {
468 	struct xennet_xenbus_softc *sc = device_private(dev);
469 	int error;
470 	netif_tx_sring_t *tx_ring;
471 	netif_rx_sring_t *rx_ring;
472 	paddr_t ma;
473 
474 	/* invalidate the RX and TX rings */
475 	if (sc->sc_backend_status == BEST_SUSPENDED) {
476 		/*
477 		 * Device was suspended, so ensure that access associated to
478 		 * the previous RX and TX rings are revoked.
479 		 */
480 		xengnt_revoke_access(sc->sc_tx_ring_gntref);
481 		xengnt_revoke_access(sc->sc_rx_ring_gntref);
482 	}
483 
484 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
485 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
486 
487 	tx_ring = sc->sc_tx_ring.sring;
488 	rx_ring = sc->sc_rx_ring.sring;
489 
490 	/* Initialize rings */
491 	memset(tx_ring, 0, PAGE_SIZE);
492 	SHARED_RING_INIT(tx_ring);
493 	FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
494 
495 	memset(rx_ring, 0, PAGE_SIZE);
496 	SHARED_RING_INIT(rx_ring);
497 	FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
498 
499 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
500 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
501 	if (error)
502 		goto abort_resume;
503 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
504 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
505 	if (error)
506 		goto abort_resume;
507 	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
508 	if (error)
509 		goto abort_resume;
510 	aprint_verbose_dev(dev, "using event channel %d\n",
511 	    sc->sc_evtchn);
512 	event_set_handler(sc->sc_evtchn, &xennet_handler, sc,
513 	    IPL_NET, device_xname(dev));
514 	return true;
515 
516 abort_resume:
517 	xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
518 	return false;
519 }
520 
521 static bool
522 xennet_talk_to_backend(struct xennet_xenbus_softc *sc)
523 {
524 	int error;
525 	unsigned long rx_copy;
526 	struct xenbus_transaction *xbt;
527 	const char *errmsg;
528 
529 	error = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
530 	    "feature-rx-copy", &rx_copy, 10);
531 	if (error)
532 		rx_copy = 0; /* default value if key is absent */
533 
534 	if (rx_copy == 1) {
535 		aprint_normal_dev(sc->sc_dev, "using RX copy mode\n");
536 		sc->sc_rx_feature = FEATURE_RX_COPY;
537 	} else {
538 		aprint_normal_dev(sc->sc_dev, "using RX flip mode\n");
539 		sc->sc_rx_feature = FEATURE_RX_FLIP;
540 	}
541 
542 again:
543 	xbt = xenbus_transaction_start();
544 	if (xbt == NULL)
545 		return false;
546 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
547 	    "vifname", "%s", device_xname(sc->sc_dev));
548 	if (error) {
549 		errmsg = "vifname";
550 		goto abort_transaction;
551 	}
552 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
553 	    "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
554 	if (error) {
555 		errmsg = "writing tx ring-ref";
556 		goto abort_transaction;
557 	}
558 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
559 	    "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
560 	if (error) {
561 		errmsg = "writing rx ring-ref";
562 		goto abort_transaction;
563 	}
564 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
565 	    "request-rx-copy", "%lu", rx_copy);
566 	if (error) {
567 		errmsg = "writing request-rx-copy";
568 		goto abort_transaction;
569 	}
570 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
571 	    "feature-rx-notify", "%u", 1);
572 	if (error) {
573 		errmsg = "writing feature-rx-notify";
574 		goto abort_transaction;
575 	}
576 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
577 	    "event-channel", "%u", sc->sc_evtchn);
578 	if (error) {
579 		errmsg = "writing event channel";
580 		goto abort_transaction;
581 	}
582 	error = xenbus_transaction_end(xbt, 0);
583 	if (error == EAGAIN)
584 		goto again;
585 	if (error) {
586 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
587 		return false;
588 	}
589 	xennet_alloc_rx_buffer(sc);
590 
591 	if (sc->sc_backend_status == BEST_SUSPENDED) {
592 		xenbus_device_resume(sc->sc_xbusd);
593 	}
594 
595 	sc->sc_backend_status = BEST_CONNECTED;
596 
597 	return true;
598 
599 abort_transaction:
600 	xenbus_transaction_end(xbt, 1);
601 	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
602 	return false;
603 }
604 
605 static bool
606 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual)
607 {
608 	int s;
609 	struct xennet_xenbus_softc *sc = device_private(dev);
610 
611 	/*
612 	 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(),
613 	 * so we do not mask event channel here
614 	 */
615 
616 	s = splnet();
617 	/* process any outstanding TX responses, then collect RX packets */
618 	xennet_handler(sc);
619 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
620 		tsleep(xennet_xenbus_suspend, PRIBIO, "xnet_suspend", hz/2);
621 		xennet_handler(sc);
622 	}
623 
624 	/*
625 	 * dom0 may still use references to the grants we gave away
626 	 * earlier during RX buffers allocation. So we do not free RX buffers
627 	 * here, as dom0 does not expect the guest domain to suddenly revoke
628 	 * access to these grants.
629 	 */
630 
631 	sc->sc_backend_status = BEST_SUSPENDED;
632 	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
633 
634 	splx(s);
635 
636 	xenbus_device_suspend(sc->sc_xbusd);
637 	aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
638 
639 	return true;
640 }
641 
642 static void xennet_backend_changed(void *arg, XenbusState new_state)
643 {
644 	struct xennet_xenbus_softc *sc = device_private((device_t)arg);
645 	DPRINTF(("%s: new backend state %d\n",
646 	    device_xname(sc->sc_dev), new_state));
647 
648 	switch (new_state) {
649 	case XenbusStateInitialising:
650 	case XenbusStateInitialised:
651 	case XenbusStateConnected:
652 		break;
653 	case XenbusStateClosing:
654 		sc->sc_backend_status = BEST_CLOSED;
655 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
656 		break;
657 	case XenbusStateInitWait:
658 		if (sc->sc_backend_status == BEST_CONNECTED)
659 			break;
660 		if (xennet_talk_to_backend(sc))
661 			xenbus_switch_state(sc->sc_xbusd, NULL,
662 			    XenbusStateConnected);
663 		break;
664 	case XenbusStateUnknown:
665 	default:
666 		panic("bad backend state %d", new_state);
667 	}
668 }
669 
670 /*
671  * Allocate RX buffers and put the associated request structures
672  * in the ring. This allows the backend to use them to communicate with
673  * frontend when some data is destined to frontend
674  */
675 
676 static void
677 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
678 {
679 	RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
680 	RING_IDX i;
681 	struct xennet_rxreq *req;
682 	struct xen_memory_reservation reservation;
683 	int s1, s2, otherend_id;
684 	paddr_t pfn;
685 
686 	otherend_id = sc->sc_xbusd->xbusd_otherend_id;
687 
688 	s1 = splnet();
689 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
690 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
691 		KASSERT(req != NULL);
692 		KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
693 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
694 		    req->rxreq_id;
695 
696 		switch (sc->sc_rx_feature) {
697 		case FEATURE_RX_COPY:
698 			if (xengnt_grant_access(otherend_id,
699 			    xpmap_ptom_masked(req->rxreq_pa),
700 			    0, &req->rxreq_gntref) != 0) {
701 				goto out_loop;
702 			}
703 			break;
704 		case FEATURE_RX_FLIP:
705 			if (xengnt_grant_transfer(otherend_id,
706 			    &req->rxreq_gntref) != 0) {
707 				goto out_loop;
708 			}
709 			break;
710 		default:
711 			panic("%s: unsupported RX feature mode: %ld\n",
712 			    __func__, sc->sc_rx_feature);
713 		}
714 
715 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
716 		    req->rxreq_gntref;
717 
718 		SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
719 		sc->sc_free_rxreql--;
720 
721 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
722 			/* unmap the page */
723 			MULTI_update_va_mapping(&rx_mcl[i],
724 			    req->rxreq_va, 0, 0);
725 			/*
726 			 * Remove this page from pseudo phys map before
727 			 * passing back to Xen.
728 			 */
729 			pfn = (req->rxreq_pa - XPMAP_OFFSET) >> PAGE_SHIFT;
730 			xennet_pages[i] = xpmap_phys_to_machine_mapping[pfn];
731 			xpmap_phys_to_machine_mapping[pfn] = INVALID_P2M_ENTRY;
732 		}
733 	}
734 
735 out_loop:
736 	if (i == 0) {
737 		splx(s1);
738 		return;
739 	}
740 
741 	if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
742 		/* also make sure to flush all TLB entries */
743 		rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
744 		    UVMF_TLB_FLUSH | UVMF_ALL;
745 		/*
746 		 * We may have allocated buffers which have entries
747 		 * outstanding in the page update queue -- make sure we flush
748 		 * those first!
749 		 */
750 		s2 = splvm();
751 		xpq_flush_queue();
752 		splx(s2);
753 		/* now decrease reservation */
754 		xenguest_handle(reservation.extent_start) = xennet_pages;
755 		reservation.nr_extents = i;
756 		reservation.extent_order = 0;
757 		reservation.address_bits = 0;
758 		reservation.domid = DOMID_SELF;
759 		rx_mcl[i].op = __HYPERVISOR_memory_op;
760 		rx_mcl[i].args[0] = XENMEM_decrease_reservation;
761 		rx_mcl[i].args[1] = (unsigned long)&reservation;
762 		HYPERVISOR_multicall(rx_mcl, i+1);
763 		if (__predict_false(rx_mcl[i].result != i)) {
764 			panic("xennet_alloc_rx_buffer: "
765 			    "XENMEM_decrease_reservation");
766 		}
767 	}
768 
769 	sc->sc_rx_ring.req_prod_pvt = req_prod + i;
770 	RING_PUSH_REQUESTS(&sc->sc_rx_ring);
771 
772 	splx(s1);
773 	return;
774 }
775 
776 /*
777  * Reclaim all RX buffers used by the I/O ring between frontend and backend
778  */
779 static void
780 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc)
781 {
782 	paddr_t ma, pa;
783 	vaddr_t va;
784 	RING_IDX i;
785 	mmu_update_t mmu[1];
786 	multicall_entry_t mcl[2];
787 
788 	int s = splbio();
789 
790 	DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
791 	/* get back memory from RX ring */
792 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
793 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
794 
795 		/*
796 		 * if the buffer is in transit in the network stack, wait for
797 		 * the network stack to free it.
798 		 */
799 		while ((volatile grant_ref_t)rxreq->rxreq_gntref ==
800 		    GRANT_STACK_REF)
801 			tsleep(xennet_xenbus_detach, PRIBIO, "xnet_free", hz/2);
802 
803 		if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
804 			/*
805 			 * this req is still granted. Get back the page or
806 			 * allocate a new one, and remap it.
807 			 */
808 			SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
809 			    rxreq_next);
810 			sc->sc_free_rxreql++;
811 
812 			switch (sc->sc_rx_feature) {
813 			case FEATURE_RX_COPY:
814 				xengnt_revoke_access(rxreq->rxreq_gntref);
815 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
816 				break;
817 			case FEATURE_RX_FLIP:
818 				ma = xengnt_revoke_transfer(
819 				    rxreq->rxreq_gntref);
820 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
821 				if (ma == 0) {
822 					u_long pfn;
823 					struct xen_memory_reservation xenres;
824 					/*
825 					 * transfer not complete, we lost the page.
826 					 * Get one from hypervisor
827 					 */
828 					xenguest_handle(xenres.extent_start) = &pfn;
829 					xenres.nr_extents = 1;
830 					xenres.extent_order = 0;
831 					xenres.address_bits = 31;
832 					xenres.domid = DOMID_SELF;
833 					if (HYPERVISOR_memory_op(
834 					    XENMEM_increase_reservation, &xenres) < 0) {
835 						panic("xennet_free_rx_buffer: "
836 						    "can't get memory back");
837 					}
838 					ma = pfn;
839 					KASSERT(ma != 0);
840 				}
841 				pa = rxreq->rxreq_pa;
842 				va = rxreq->rxreq_va;
843 				/* remap the page */
844 				mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
845 				mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
846 				MULTI_update_va_mapping(&mcl[0], va,
847 				    (ma << PAGE_SHIFT) | PG_V | PG_KW,
848 				    UVMF_TLB_FLUSH|UVMF_ALL);
849 				xpmap_phys_to_machine_mapping[
850 				    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
851 				mcl[1].op = __HYPERVISOR_mmu_update;
852 				mcl[1].args[0] = (unsigned long)mmu;
853 				mcl[1].args[1] = 1;
854 				mcl[1].args[2] = 0;
855 				mcl[1].args[3] = DOMID_SELF;
856 				HYPERVISOR_multicall(mcl, 2);
857 				break;
858 			default:
859 				panic("%s: unsupported RX feature mode: %ld\n",
860 				    __func__, sc->sc_rx_feature);
861 			}
862 		}
863 
864 	}
865 	splx(s);
866 	DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
867 }
868 
869 /*
870  * Clears a used RX request when its associated mbuf has been processed
871  */
872 static void
873 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
874 {
875 	struct xennet_rxreq *req = arg;
876 	struct xennet_xenbus_softc *sc = req->rxreq_sc;
877 
878 	int s = splnet();
879 
880 	/* puts back the RX request in the list of free RX requests */
881 	SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
882 	sc->sc_free_rxreql++;
883 
884 	/*
885 	 * ring needs more requests to be pushed in, allocate some
886 	 * RX buffers to catch-up with backend's consumption
887 	 */
888 	req->rxreq_gntref = GRANT_INVALID_REF;
889 	if (sc->sc_free_rxreql >= SC_NLIVEREQ(sc) &&
890 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
891 		xennet_alloc_rx_buffer(sc);
892 	}
893 
894 	if (m)
895 		pool_cache_put(mb_cache, m);
896 	splx(s);
897 }
898 
899 /*
900  * Process responses associated to the TX mbufs sent previously through
901  * xennet_softstart()
902  * Called at splnet.
903  */
904 static void
905 xennet_tx_complete(struct xennet_xenbus_softc *sc)
906 {
907 	struct xennet_txreq *req;
908 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
909 	RING_IDX resp_prod, i;
910 
911 	DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
912 	    sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
913 
914 again:
915 	resp_prod = sc->sc_tx_ring.sring->rsp_prod;
916 	xen_rmb();
917 	for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
918 		req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
919 		KASSERT(req->txreq_id ==
920 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
921 		if (__predict_false(xengnt_status(req->txreq_gntref))) {
922 			aprint_verbose_dev(sc->sc_dev,
923 			    "grant still used by backend\n");
924 			sc->sc_tx_ring.rsp_cons = i;
925 			goto end;
926 		}
927 		if (__predict_false(
928 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
929 		    NETIF_RSP_OKAY))
930 			ifp->if_oerrors++;
931 		else
932 			ifp->if_opackets++;
933 		xengnt_revoke_access(req->txreq_gntref);
934 
935 		m_freem(req->txreq_m);
936 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
937 	}
938 	sc->sc_tx_ring.rsp_cons = resp_prod;
939 	/* set new event and check for race with rsp_cons update */
940 	sc->sc_tx_ring.sring->rsp_event =
941 	    resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
942 	ifp->if_timer = 0;
943 	xen_wmb();
944 	if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
945 		goto again;
946 end:
947 	if (ifp->if_flags & IFF_OACTIVE) {
948 		ifp->if_flags &= ~IFF_OACTIVE;
949 		xennet_softstart(sc);
950 	}
951 }
952 
953 /*
954  * Xennet event handler.
955  * Get outstanding responses of TX packets, then collect all responses of
956  * pending RX packets
957  * Called at splnet.
958  */
959 static int
960 xennet_handler(void *arg)
961 {
962 	struct xennet_xenbus_softc *sc = arg;
963 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
964 	RING_IDX resp_prod, i;
965 	struct xennet_rxreq *req;
966 	paddr_t ma, pa;
967 	vaddr_t va;
968 	mmu_update_t mmu[1];
969 	multicall_entry_t mcl[2];
970 	struct mbuf *m;
971 	void *pktp;
972 	int more_to_do;
973 
974 	if (sc->sc_backend_status != BEST_CONNECTED)
975 		return 1;
976 
977 	xennet_tx_complete(sc);
978 
979 #if NRND > 0
980 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
981 #endif
982 
983 again:
984 	DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
985 	    sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
986 
987 	resp_prod = sc->sc_rx_ring.sring->rsp_prod;
988 	xen_rmb(); /* ensure we see replies up to resp_prod */
989 	for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
990 		netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
991 		req = &sc->sc_rxreqs[rx->id];
992 		KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
993 		KASSERT(req->rxreq_id == rx->id);
994 
995 		ma = 0;
996 		switch (sc->sc_rx_feature) {
997 		case FEATURE_RX_COPY:
998 			xengnt_revoke_access(req->rxreq_gntref);
999 			break;
1000 		case FEATURE_RX_FLIP:
1001 			ma = xengnt_revoke_transfer(req->rxreq_gntref);
1002 			if (ma == 0) {
1003 				DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
1004 				/*
1005 				 * the remote could't send us a packet.
1006 				 * we can't free this rxreq as no page will be mapped
1007 				 * here. Instead give it back immediatly to backend.
1008 				 */
1009 				ifp->if_ierrors++;
1010 				RING_GET_REQUEST(&sc->sc_rx_ring,
1011 				    sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
1012 				RING_GET_REQUEST(&sc->sc_rx_ring,
1013 				    sc->sc_rx_ring.req_prod_pvt)->gref =
1014 					req->rxreq_gntref;
1015 				sc->sc_rx_ring.req_prod_pvt++;
1016 				RING_PUSH_REQUESTS(&sc->sc_rx_ring);
1017 				continue;
1018 			}
1019 			break;
1020 		default:
1021 			panic("%s: unsupported RX feature mode: %ld\n",
1022 			    __func__, sc->sc_rx_feature);
1023 		}
1024 
1025 		req->rxreq_gntref = GRANT_INVALID_REF;
1026 
1027 		pa = req->rxreq_pa;
1028 		va = req->rxreq_va;
1029 
1030 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
1031 			/* remap the page */
1032 			mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
1033 			mmu[0].val = ((pa - XPMAP_OFFSET) >> PAGE_SHIFT);
1034 			MULTI_update_va_mapping(&mcl[0], va,
1035 			    (ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL);
1036 			xpmap_phys_to_machine_mapping[
1037 			    (pa - XPMAP_OFFSET) >> PAGE_SHIFT] = ma;
1038 			mcl[1].op = __HYPERVISOR_mmu_update;
1039 			mcl[1].args[0] = (unsigned long)mmu;
1040 			mcl[1].args[1] = 1;
1041 			mcl[1].args[2] = 0;
1042 			mcl[1].args[3] = DOMID_SELF;
1043 			HYPERVISOR_multicall(mcl, 2);
1044 		}
1045 
1046 		pktp = (void *)(va + rx->offset);
1047 #ifdef XENNET_DEBUG_DUMP
1048 		xennet_hex_dump(pktp, rx->status, "r", rx->id);
1049 #endif
1050 		if ((ifp->if_flags & IFF_PROMISC) == 0) {
1051 			struct ether_header *eh = pktp;
1052 			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
1053 			    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
1054 			    ETHER_ADDR_LEN) != 0) {
1055 				DPRINTFN(XEDB_EVENT,
1056 				    ("xennet_handler bad dest\n"));
1057 				/* packet not for us */
1058 				xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE,
1059 				    req);
1060 				continue;
1061 			}
1062 		}
1063 		MGETHDR(m, M_DONTWAIT, MT_DATA);
1064 		if (__predict_false(m == NULL)) {
1065 			printf("%s: rx no mbuf\n", ifp->if_xname);
1066 			ifp->if_ierrors++;
1067 			xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
1068 			continue;
1069 		}
1070 		MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
1071 
1072 		m->m_pkthdr.rcvif = ifp;
1073 		if (__predict_true(sc->sc_rx_ring.req_prod_pvt !=
1074 		    sc->sc_rx_ring.sring->rsp_prod)) {
1075 			m->m_len = m->m_pkthdr.len = rx->status;
1076 			MEXTADD(m, pktp, rx->status,
1077 			    M_DEVBUF, xennet_rx_mbuf_free, req);
1078 			m->m_flags |= M_EXT_RW; /* we own the buffer */
1079 			req->rxreq_gntref = GRANT_STACK_REF;
1080 		} else {
1081 			/*
1082 			 * This was our last receive buffer, allocate
1083 			 * memory, copy data and push the receive
1084 			 * buffer back to the hypervisor.
1085 			 */
1086 			m->m_len = min(MHLEN, rx->status);
1087 			m->m_pkthdr.len = 0;
1088 			m_copyback(m, 0, rx->status, pktp);
1089 			xennet_rx_mbuf_free(NULL, (void *)va, PAGE_SIZE, req);
1090 			if (m->m_pkthdr.len < rx->status) {
1091 				/* out of memory, just drop packets */
1092 				ifp->if_ierrors++;
1093 				m_freem(m);
1094 				continue;
1095 			}
1096 		}
1097 		if ((rx->flags & NETRXF_csum_blank) != 0) {
1098 			xennet_checksum_fill(&m);
1099 			if (m == NULL) {
1100 				ifp->if_ierrors++;
1101 				continue;
1102 			}
1103 		}
1104 		/*
1105 		 * Pass packet to bpf if there is a listener.
1106 		 */
1107 		bpf_mtap(ifp, m);
1108 
1109 		ifp->if_ipackets++;
1110 
1111 		/* Pass the packet up. */
1112 		(*ifp->if_input)(ifp, m);
1113 	}
1114 	xen_rmb();
1115 	sc->sc_rx_ring.rsp_cons = i;
1116 	RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
1117 	if (more_to_do)
1118 		goto again;
1119 
1120 	return 1;
1121 }
1122 
1123 /*
1124  * The output routine of a xennet interface
1125  * Called at splnet.
1126  */
1127 void
1128 xennet_start(struct ifnet *ifp)
1129 {
1130 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1131 
1132 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", device_xname(sc->sc_dev)));
1133 
1134 #if NRND > 0
1135 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
1136 #endif
1137 
1138 	xennet_tx_complete(sc);
1139 
1140 	if (__predict_false(
1141 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING))
1142 		return;
1143 
1144 	/*
1145 	 * The Xen communication channel is much more efficient if we can
1146 	 * schedule batch of packets for domain0. To achieve this, we
1147 	 * schedule a soft interrupt, and just return. This way, the network
1148 	 * stack will enqueue all pending mbufs in the interface's send queue
1149 	 * before it is processed by xennet_softstart().
1150 	 */
1151 	softint_schedule(sc->sc_softintr);
1152 	return;
1153 }
1154 
1155 /*
1156  * Prepares mbufs for TX, and notify backend when finished
1157  * Called at splsoftnet
1158  */
1159 void
1160 xennet_softstart(void *arg)
1161 {
1162 	struct xennet_xenbus_softc *sc = arg;
1163 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1164 	struct mbuf *m, *new_m;
1165 	netif_tx_request_t *txreq;
1166 	RING_IDX req_prod;
1167 	paddr_t pa, pa2;
1168 	struct xennet_txreq *req;
1169 	int notify;
1170 	int do_notify = 0;
1171 	int s;
1172 
1173 	s = splnet();
1174 
1175 	if (__predict_false(
1176 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)) {
1177 		splx(s);
1178 		return;
1179 	}
1180 
1181 	req_prod = sc->sc_tx_ring.req_prod_pvt;
1182 	while (/*CONSTCOND*/1) {
1183 		uint16_t txflags;
1184 
1185 		req = SLIST_FIRST(&sc->sc_txreq_head);
1186 		if (__predict_false(req == NULL)) {
1187 			ifp->if_flags |= IFF_OACTIVE;
1188 			break;
1189 		}
1190 		IFQ_POLL(&ifp->if_snd, m);
1191 		if (m == NULL)
1192 			break;
1193 
1194 		switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
1195 		case M_EXT|M_EXT_CLUSTER:
1196 			KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
1197 			pa = m->m_ext.ext_paddr +
1198 				(m->m_data - m->m_ext.ext_buf);
1199 			break;
1200 		case 0:
1201 			KASSERT(m->m_paddr != M_PADDR_INVALID);
1202 			pa = m->m_paddr + M_BUFOFFSET(m) +
1203 				(m->m_data - M_BUFADDR(m));
1204 			break;
1205 		default:
1206 			if (__predict_false(
1207 			    !pmap_extract(pmap_kernel(), (vaddr_t)m->m_data,
1208 			    &pa))) {
1209 				panic("xennet_start: no pa");
1210 			}
1211 			break;
1212 		}
1213 
1214 		if ((m->m_pkthdr.csum_flags &
1215 		    (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1216 			txflags = NETTXF_csum_blank;
1217 		} else {
1218 			txflags = 0;
1219 		}
1220 
1221 		if (m->m_pkthdr.len != m->m_len ||
1222 		    (pa ^ (pa + m->m_pkthdr.len - 1)) & PG_FRAME) {
1223 
1224 			MGETHDR(new_m, M_DONTWAIT, MT_DATA);
1225 			if (__predict_false(new_m == NULL)) {
1226 				printf("%s: cannot allocate new mbuf\n",
1227 				       device_xname(sc->sc_dev));
1228 				break;
1229 			}
1230 			if (m->m_pkthdr.len > MHLEN) {
1231 				MCLGET(new_m, M_DONTWAIT);
1232 				if (__predict_false(
1233 				    (new_m->m_flags & M_EXT) == 0)) {
1234 					DPRINTF(("%s: no mbuf cluster\n",
1235 					    device_xname(sc->sc_dev)));
1236 					m_freem(new_m);
1237 					break;
1238 				}
1239 			}
1240 
1241 			m_copydata(m, 0, m->m_pkthdr.len, mtod(new_m, void *));
1242 			new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
1243 
1244 			if ((new_m->m_flags & M_EXT) != 0) {
1245 				pa = new_m->m_ext.ext_paddr;
1246 				KASSERT(new_m->m_data == new_m->m_ext.ext_buf);
1247 				KASSERT(pa != M_PADDR_INVALID);
1248 			} else {
1249 				pa = new_m->m_paddr;
1250 				KASSERT(pa != M_PADDR_INVALID);
1251 				KASSERT(new_m->m_data == M_BUFADDR(new_m));
1252 				pa += M_BUFOFFSET(new_m);
1253 			}
1254 			if (__predict_false(xengnt_grant_access(
1255 			    sc->sc_xbusd->xbusd_otherend_id,
1256 			    xpmap_ptom_masked(pa),
1257 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1258 				m_freem(new_m);
1259 				ifp->if_flags |= IFF_OACTIVE;
1260 				break;
1261 			}
1262 			/* we will be able to send new_m */
1263 			IFQ_DEQUEUE(&ifp->if_snd, m);
1264 			m_freem(m);
1265 			m = new_m;
1266 		} else {
1267 			if (__predict_false(xengnt_grant_access(
1268 			    sc->sc_xbusd->xbusd_otherend_id,
1269 			    xpmap_ptom_masked(pa),
1270 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1271 				ifp->if_flags |= IFF_OACTIVE;
1272 				break;
1273 			}
1274 			/* we will be able to send m */
1275 			IFQ_DEQUEUE(&ifp->if_snd, m);
1276 		}
1277 		MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1278 
1279 		KASSERT(((pa ^ (pa + m->m_pkthdr.len -  1)) & PG_FRAME) == 0);
1280 
1281 		SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1282 		req->txreq_m = m;
1283 
1284 		DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1285 		    "mbuf %p, buf %p/%p/%p, size %d\n",
1286 		    req->txreq_id, m, mtod(m, void *), (void *)pa,
1287 		    (void *)xpmap_ptom_masked(pa), m->m_pkthdr.len));
1288 		pmap_extract_ma(pmap_kernel(), mtod(m, vaddr_t), &pa2);
1289 		DPRINTFN(XEDB_MBUF, ("xennet_start pa %p ma %p/%p\n",
1290 		    (void *)pa, (void *)xpmap_ptom_masked(pa), (void *)pa2));
1291 #ifdef XENNET_DEBUG_DUMP
1292 		xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1293 			       	req->txreq_id);
1294 #endif
1295 
1296 		txreq = RING_GET_REQUEST(&sc->sc_tx_ring, req_prod);
1297 		txreq->id = req->txreq_id;
1298 		txreq->gref = req->txreq_gntref;
1299 		txreq->offset = pa & ~PG_FRAME;
1300 		txreq->size = m->m_pkthdr.len;
1301 		txreq->flags = txflags;
1302 
1303 		req_prod++;
1304 		sc->sc_tx_ring.req_prod_pvt = req_prod;
1305 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1306 		if (notify)
1307 			do_notify = 1;
1308 
1309 #ifdef XENNET_DEBUG
1310 		DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, "
1311 		    "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *),
1312 		    (void *)*kvtopte(mtod(m, vaddr_t)),
1313 		    (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))),
1314 		    (void *)m->m_paddr, m->m_pkthdr.len, m->m_len));
1315 		DPRINTFN(XEDB_MEM, ("id %d gref %d offset %d size %d flags %d"
1316 		    " prod %d\n",
1317 		    txreq->id, txreq->gref, txreq->offset, txreq->size,
1318 		    txreq->flags, req_prod));
1319 #endif
1320 
1321 		/*
1322 		 * Pass packet to bpf if there is a listener.
1323 		 */
1324 		bpf_mtap(ifp, m);
1325 	}
1326 
1327 	if (do_notify) {
1328 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1329 		ifp->if_timer = 5;
1330 	}
1331 
1332 	splx(s);
1333 
1334 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1335 	    device_xname(sc->sc_dev)));
1336 }
1337 
1338 int
1339 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1340 {
1341 #ifdef XENNET_DEBUG
1342 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1343 #endif
1344 	int s, error = 0;
1345 
1346 	s = splnet();
1347 
1348 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1349 	    device_xname(sc->sc_dev)));
1350 	error = ether_ioctl(ifp, cmd, data);
1351 	if (error == ENETRESET)
1352 		error = 0;
1353 	splx(s);
1354 
1355 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1356 	    device_xname(sc->sc_dev), error));
1357 
1358 	return error;
1359 }
1360 
1361 void
1362 xennet_watchdog(struct ifnet *ifp)
1363 {
1364 	aprint_verbose_ifnet(ifp, "xennet_watchdog\n");
1365 }
1366 
1367 int
1368 xennet_init(struct ifnet *ifp)
1369 {
1370 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1371 	int s = splnet();
1372 
1373 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1374 	    device_xname(sc->sc_dev)));
1375 
1376 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1377 		sc->sc_rx_ring.sring->rsp_event =
1378 		    sc->sc_rx_ring.rsp_cons + 1;
1379 		hypervisor_enable_event(sc->sc_evtchn);
1380 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1381 		xennet_reset(sc);
1382 	}
1383 	ifp->if_flags |= IFF_RUNNING;
1384 	ifp->if_flags &= ~IFF_OACTIVE;
1385 	ifp->if_timer = 0;
1386 	splx(s);
1387 	return 0;
1388 }
1389 
1390 void
1391 xennet_stop(struct ifnet *ifp, int disable)
1392 {
1393 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1394 	int s = splnet();
1395 
1396 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1397 	hypervisor_mask_event(sc->sc_evtchn);
1398 	xennet_reset(sc);
1399 	splx(s);
1400 }
1401 
1402 void
1403 xennet_reset(struct xennet_xenbus_softc *sc)
1404 {
1405 
1406 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n",
1407 	    device_xname(sc->sc_dev)));
1408 }
1409 
1410 #if defined(NFS_BOOT_BOOTSTATIC)
1411 int
1412 xennet_bootstatic_callback(struct nfs_diskless *nd)
1413 {
1414 #if 0
1415 	struct ifnet *ifp = nd->nd_ifp;
1416 	struct xennet_xenbus_softc *sc =
1417 	    (struct xennet_xenbus_softc *)ifp->if_softc;
1418 #endif
1419 	int flags = 0;
1420 	union xen_cmdline_parseinfo xcp;
1421 	struct sockaddr_in *sin;
1422 
1423 	memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1424 	xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1425 	xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1426 	xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1427 
1428 	if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1429 		flags |= NFS_BOOT_HAS_SERVER;
1430 		if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1431 			flags |= NFS_BOOT_HAS_ROOTPATH;
1432 	}
1433 
1434 	nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1435 	nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1436 	nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1437 
1438 	sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1439 	memset((void *)sin, 0, sizeof(*sin));
1440 	sin->sin_len = sizeof(*sin);
1441 	sin->sin_family = AF_INET;
1442 	sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1443 
1444 	if (nd->nd_myip.s_addr)
1445 		flags |= NFS_BOOT_HAS_MYIP;
1446 	if (nd->nd_gwip.s_addr)
1447 		flags |= NFS_BOOT_HAS_GWIP;
1448 	if (nd->nd_mask.s_addr)
1449 		flags |= NFS_BOOT_HAS_MASK;
1450 	if (sin->sin_addr.s_addr)
1451 		flags |= NFS_BOOT_HAS_SERVADDR;
1452 
1453 	return flags;
1454 }
1455 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
1456 
1457 #ifdef XENNET_DEBUG_DUMP
1458 #define XCHR(x) hexdigits[(x) & 0xf]
1459 static void
1460 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1461 {
1462 	size_t i, j;
1463 
1464 	printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
1465 	printf("00000000  ");
1466 	for(i=0; i<len; i++) {
1467 		printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1468 		if ((i+1) % 16 == 8)
1469 			printf(" ");
1470 		if ((i+1) % 16 == 0) {
1471 			printf(" %c", '|');
1472 			for(j=0; j<16; j++)
1473 				printf("%c", pkt[i-15+j]>=32 &&
1474 				    pkt[i-15+j]<127?pkt[i-15+j]:'.');
1475 			printf("%c\n%c%c%c%c%c%c%c%c  ", '|',
1476 			    XCHR((i+1)>>28), XCHR((i+1)>>24),
1477 			    XCHR((i+1)>>20), XCHR((i+1)>>16),
1478 			    XCHR((i+1)>>12), XCHR((i+1)>>8),
1479 			    XCHR((i+1)>>4), XCHR(i+1));
1480 		}
1481 	}
1482 	printf("\n");
1483 }
1484 #undef XCHR
1485 #endif
1486