xref: /netbsd-src/sys/arch/xen/xen/if_xennet_xenbus.c (revision c38e7cc395b1472a774ff828e46123de44c628e9)
1 /*      $NetBSD: if_xennet_xenbus.c,v 1.74 2018/01/25 17:41:49 riastradh Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * Copyright (c) 2004 Christian Limpach.
29  * All rights reserved.
30  *
31  * Redistribution and use in source and binary forms, with or without
32  * modification, are permitted provided that the following conditions
33  * are met:
34  * 1. Redistributions of source code must retain the above copyright
35  *    notice, this list of conditions and the following disclaimer.
36  * 2. Redistributions in binary form must reproduce the above copyright
37  *    notice, this list of conditions and the following disclaimer in the
38  *    documentation and/or other materials provided with the distribution.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
41  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
43  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
44  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
46  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
47  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
48  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
49  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50  */
51 
52 /*
53  * This file contains the xennet frontend code required for the network
54  * communication between two Xen domains.
55  * It ressembles xbd, but is a little more complex as it must deal with two
56  * rings:
57  * - the TX ring, to transmit packets to backend (inside => outside)
58  * - the RX ring, to receive packets from backend (outside => inside)
59  *
60  * Principles are following.
61  *
62  * For TX:
63  * Purpose is to transmit packets to the outside. The start of day is in
64  * xennet_start() (default output routine of xennet) that schedules a softint,
65  * xennet_softstart(). xennet_softstart() generates the requests associated
66  * to the TX mbufs queued (see altq(9)).
67  * The backend's responses are processed by xennet_tx_complete(), called either
68  * from:
69  * - xennet_start()
70  * - xennet_handler(), during an asynchronous event notification from backend
71  *   (similar to an IRQ).
72  *
73  * for RX:
74  * Purpose is to process the packets received from the outside. RX buffers
75  * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
76  * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
77  * preparation for incoming packets from backend.
78  * When RX packets need to be processed, backend takes the requests previously
79  * offered by frontend and pushes the associated responses inside the I/O ring.
80  * When done, it notifies frontend through an event notification, which will
81  * asynchronously call xennet_handler() in frontend.
82  * xennet_handler() processes the responses, generates the associated mbuf, and
83  * passes it to the MI layer for further processing.
84  */
85 
86 #include <sys/cdefs.h>
87 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.74 2018/01/25 17:41:49 riastradh Exp $");
88 
89 #include "opt_xen.h"
90 #include "opt_nfs_boot.h"
91 
92 #include <sys/param.h>
93 #include <sys/device.h>
94 #include <sys/conf.h>
95 #include <sys/kernel.h>
96 #include <sys/proc.h>
97 #include <sys/systm.h>
98 #include <sys/intr.h>
99 #include <sys/rndsource.h>
100 
101 #include <net/if.h>
102 #include <net/if_dl.h>
103 #include <net/if_ether.h>
104 #include <net/bpf.h>
105 #include <net/bpfdesc.h>
106 
107 #if defined(NFS_BOOT_BOOTSTATIC)
108 #include <sys/fstypes.h>
109 #include <sys/mount.h>
110 #include <sys/statvfs.h>
111 #include <netinet/in.h>
112 #include <nfs/rpcv2.h>
113 #include <nfs/nfsproto.h>
114 #include <nfs/nfs.h>
115 #include <nfs/nfsmount.h>
116 #include <nfs/nfsdiskless.h>
117 #include <xen/if_xennetvar.h>
118 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
119 
120 #include <xen/xennet_checksum.h>
121 
122 #include <uvm/uvm.h>
123 
124 #include <xen/hypervisor.h>
125 #include <xen/evtchn.h>
126 #include <xen/granttables.h>
127 #include <xen/xen-public/io/netif.h>
128 #include <xen/xenpmap.h>
129 
130 #include <xen/xenbus.h>
131 #include "locators.h"
132 
133 #undef XENNET_DEBUG_DUMP
134 #undef XENNET_DEBUG
135 #ifdef XENNET_DEBUG
136 #define XEDB_FOLLOW     0x01
137 #define XEDB_INIT       0x02
138 #define XEDB_EVENT      0x04
139 #define XEDB_MBUF       0x08
140 #define XEDB_MEM        0x10
141 int xennet_debug = 0xff;
142 #define DPRINTF(x) if (xennet_debug) printf x;
143 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
144 #else
145 #define DPRINTF(x)
146 #define DPRINTFN(n,x)
147 #endif
148 
149 extern pt_entry_t xpmap_pg_nx;
150 
151 #define GRANT_INVALID_REF -1 /* entry is free */
152 
153 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
154 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
155 
156 struct xennet_txreq {
157 	SLIST_ENTRY(xennet_txreq) txreq_next;
158 	uint16_t txreq_id; /* ID passed to backend */
159 	grant_ref_t txreq_gntref; /* grant ref of this request */
160 	struct mbuf *txreq_m; /* mbuf being transmitted */
161 };
162 
163 struct xennet_rxreq {
164 	SLIST_ENTRY(xennet_rxreq) rxreq_next;
165 	uint16_t rxreq_id; /* ID passed to backend */
166 	grant_ref_t rxreq_gntref; /* grant ref of this request */
167 /* va/pa for this receive buf. ma will be provided by backend */
168 	paddr_t rxreq_pa;
169 	vaddr_t rxreq_va;
170 	struct xennet_xenbus_softc *rxreq_sc; /* pointer to our interface */
171 };
172 
173 struct xennet_xenbus_softc {
174 	device_t sc_dev;
175 	struct ethercom sc_ethercom;
176 	uint8_t sc_enaddr[6];
177 	struct xenbus_device *sc_xbusd;
178 
179 	netif_tx_front_ring_t sc_tx_ring;
180 	netif_rx_front_ring_t sc_rx_ring;
181 
182 	unsigned int sc_evtchn;
183 	void *sc_softintr;
184 	struct intrhand *sc_ih;
185 
186 	grant_ref_t sc_tx_ring_gntref;
187 	grant_ref_t sc_rx_ring_gntref;
188 
189 	kmutex_t sc_tx_lock; /* protects free TX list, below */
190 	kmutex_t sc_rx_lock; /* protects free RX list, below */
191 	struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
192 	struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
193 	SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
194 	SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
195 	int sc_free_rxreql; /* number of free receive request struct */
196 
197 	int sc_backend_status; /* our status with backend */
198 #define BEST_CLOSED		0
199 #define BEST_DISCONNECTED	1
200 #define BEST_CONNECTED		2
201 #define BEST_SUSPENDED		3
202 	unsigned long sc_rx_feature;
203 #define FEATURE_RX_FLIP		0
204 #define FEATURE_RX_COPY		1
205 	krndsource_t sc_rnd_source;
206 };
207 #define SC_NLIVEREQ(sc) ((sc)->sc_rx_ring.req_prod_pvt - \
208 			    (sc)->sc_rx_ring.sring->rsp_prod)
209 
210 /* too big to be on stack */
211 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
212 static u_long xennet_pages[NET_RX_RING_SIZE];
213 
214 static pool_cache_t if_xennetrxbuf_cache;
215 static int if_xennetrxbuf_cache_inited=0;
216 
217 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
218 static void xennet_xenbus_attach(device_t, device_t, void *);
219 static int  xennet_xenbus_detach(device_t, int);
220 static void xennet_backend_changed(void *, XenbusState);
221 
222 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
223 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
224 static void xennet_tx_complete(struct xennet_xenbus_softc *);
225 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
226 static void xennet_rx_free_req(struct xennet_rxreq *);
227 static int  xennet_handler(void *);
228 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
229 #ifdef XENNET_DEBUG_DUMP
230 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
231 #endif
232 
233 static int  xennet_init(struct ifnet *);
234 static void xennet_stop(struct ifnet *, int);
235 static void xennet_reset(struct xennet_xenbus_softc *);
236 static void xennet_softstart(void *);
237 static void xennet_start(struct ifnet *);
238 static int  xennet_ioctl(struct ifnet *, u_long, void *);
239 static void xennet_watchdog(struct ifnet *);
240 
241 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *);
242 static bool xennet_xenbus_resume (device_t dev, const pmf_qual_t *);
243 
244 CFATTACH_DECL_NEW(xennet, sizeof(struct xennet_xenbus_softc),
245    xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL);
246 
247 static int
248 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
249 {
250 	struct xenbusdev_attach_args *xa = aux;
251 
252 	if (strcmp(xa->xa_type, "vif") != 0)
253 		return 0;
254 
255 	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
256 	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
257 		return 0;
258 
259 	return 1;
260 }
261 
262 static void
263 xennet_xenbus_attach(device_t parent, device_t self, void *aux)
264 {
265 	struct xennet_xenbus_softc *sc = device_private(self);
266 	struct xenbusdev_attach_args *xa = aux;
267 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
268 	int err;
269 	netif_tx_sring_t *tx_ring;
270 	netif_rx_sring_t *rx_ring;
271 	RING_IDX i;
272 	char *val, *e, *p;
273 	int s;
274 	extern int ifqmaxlen; /* XXX */
275 #ifdef XENNET_DEBUG
276 	char **dir;
277 	int dir_n = 0;
278 	char id_str[20];
279 #endif
280 
281 	aprint_normal(": Xen Virtual Network Interface\n");
282 	sc->sc_dev = self;
283 
284 #ifdef XENNET_DEBUG
285 	printf("path: %s\n", xa->xa_xbusd->xbusd_path);
286 	snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
287 	err = xenbus_directory(NULL, "device/vif", id_str, &dir_n, &dir);
288 	if (err) {
289 		aprint_error_dev(self, "xenbus_directory err %d\n", err);
290 	} else {
291 		printf("%s/\n", xa->xa_xbusd->xbusd_path);
292 		for (i = 0; i < dir_n; i++) {
293 			printf("\t/%s", dir[i]);
294 			err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path,
295 				          dir[i], NULL, &val);
296 			if (err) {
297 				aprint_error_dev(self, "xenbus_read err %d\n",
298 					         err);
299 			} else {
300 				printf(" = %s\n", val);
301 				free(val, M_DEVBUF);
302 			}
303 		}
304 	}
305 #endif /* XENNET_DEBUG */
306 	sc->sc_xbusd = xa->xa_xbusd;
307 	sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
308 
309 	/* xenbus ensure 2 devices can't be probed at the same time */
310 	if (if_xennetrxbuf_cache_inited == 0) {
311 		if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
312 		    "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
313 		if_xennetrxbuf_cache_inited = 1;
314 	}
315 
316 	/* initialize free RX and RX request lists */
317 	mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
318 	SLIST_INIT(&sc->sc_txreq_head);
319 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
320 		sc->sc_txreqs[i].txreq_id = i;
321 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
322 		    txreq_next);
323 	}
324 	mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
325 	SLIST_INIT(&sc->sc_rxreq_head);
326 	s = splvm();
327 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
328 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
329 		rxreq->rxreq_id = i;
330 		rxreq->rxreq_sc = sc;
331 		rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
332 		    if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
333 		if (rxreq->rxreq_va == 0)
334 			break;
335 		rxreq->rxreq_gntref = GRANT_INVALID_REF;
336 		SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
337 	}
338 	splx(s);
339 	sc->sc_free_rxreql = i;
340 	if (sc->sc_free_rxreql == 0) {
341 		aprint_error_dev(self, "failed to allocate rx memory\n");
342 		return;
343 	}
344 
345 	/* read mac address */
346 	err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path, "mac", NULL, &val);
347 	if (err) {
348 		aprint_error_dev(self, "can't read mac address, err %d\n", err);
349 		return;
350 	}
351 	for (i = 0, p = val; i < 6; i++) {
352 		sc->sc_enaddr[i] = strtoul(p, &e, 16);
353 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
354 			aprint_error_dev(self,
355 			    "%s is not a valid mac address\n", val);
356 			free(val, M_DEVBUF);
357 			return;
358 		}
359 		p = &e[1];
360 	}
361 	free(val, M_DEVBUF);
362 	aprint_normal_dev(self, "MAC address %s\n",
363 	    ether_sprintf(sc->sc_enaddr));
364 	/* Initialize ifnet structure and attach interface */
365 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
366 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
367 	ifp->if_softc = sc;
368 	ifp->if_start = xennet_start;
369 	ifp->if_ioctl = xennet_ioctl;
370 	ifp->if_watchdog = xennet_watchdog;
371 	ifp->if_init = xennet_init;
372 	ifp->if_stop = xennet_stop;
373 	ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
374 	ifp->if_timer = 0;
375 	ifp->if_snd.ifq_maxlen = max(ifqmaxlen, NET_TX_RING_SIZE * 2);
376 	ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
377 	IFQ_SET_READY(&ifp->if_snd);
378 	if_attach(ifp);
379 	ether_ifattach(ifp, sc->sc_enaddr);
380 	sc->sc_softintr = softint_establish(SOFTINT_NET, xennet_softstart, sc);
381 	if (sc->sc_softintr == NULL)
382 		panic("%s: can't establish soft interrupt",
383 			device_xname(self));
384 
385 	/* alloc shared rings */
386 	tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
387 	    UVM_KMF_WIRED);
388 	rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
389 	    UVM_KMF_WIRED);
390 	if (tx_ring == NULL || rx_ring == NULL)
391 		panic("%s: can't alloc rings", device_xname(self));
392 
393 	sc->sc_tx_ring.sring = tx_ring;
394 	sc->sc_rx_ring.sring = rx_ring;
395 
396 	/* resume shared structures and tell backend that we are ready */
397 	if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) {
398 		uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE,
399 		    UVM_KMF_WIRED);
400 		uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE,
401 		    UVM_KMF_WIRED);
402 		return;
403 	}
404 
405 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
406 	    RND_TYPE_NET, RND_FLAG_DEFAULT);
407 
408 	if (!pmf_device_register(self, xennet_xenbus_suspend,
409 	    xennet_xenbus_resume))
410 		aprint_error_dev(self, "couldn't establish power handler\n");
411 	else
412 		pmf_class_network_register(self, ifp);
413 }
414 
415 static int
416 xennet_xenbus_detach(device_t self, int flags)
417 {
418 	struct xennet_xenbus_softc *sc = device_private(self);
419 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
420 	int s0, s1;
421 	RING_IDX i;
422 
423 	DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
424 	s0 = splnet();
425 	xennet_stop(ifp, 1);
426 	intr_disestablish(sc->sc_ih);
427 	/* wait for pending TX to complete, and collect pending RX packets */
428 	xennet_handler(sc);
429 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
430 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_detach", hz/2);
431 		xennet_handler(sc);
432 	}
433 	xennet_free_rx_buffer(sc);
434 
435 	s1 = splvm();
436 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
437 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
438 		uvm_km_free(kernel_map, rxreq->rxreq_va, PAGE_SIZE,
439 		    UVM_KMF_WIRED);
440 	}
441 	splx(s1);
442 
443 	ether_ifdetach(ifp);
444 	if_detach(ifp);
445 
446 	/* Unhook the entropy source. */
447 	rnd_detach_source(&sc->sc_rnd_source);
448 
449 	while (xengnt_status(sc->sc_tx_ring_gntref)) {
450 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_txref", hz/2);
451 	}
452 	xengnt_revoke_access(sc->sc_tx_ring_gntref);
453 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
454 	    UVM_KMF_WIRED);
455 	while (xengnt_status(sc->sc_rx_ring_gntref)) {
456 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_rxref", hz/2);
457 	}
458 	xengnt_revoke_access(sc->sc_rx_ring_gntref);
459 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
460 	    UVM_KMF_WIRED);
461 	softint_disestablish(sc->sc_softintr);
462 	splx(s0);
463 
464 	pmf_device_deregister(self);
465 
466 	DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
467 	return 0;
468 }
469 
470 static bool
471 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual)
472 {
473 	struct xennet_xenbus_softc *sc = device_private(dev);
474 	int error;
475 	netif_tx_sring_t *tx_ring;
476 	netif_rx_sring_t *rx_ring;
477 	paddr_t ma;
478 
479 	/* invalidate the RX and TX rings */
480 	if (sc->sc_backend_status == BEST_SUSPENDED) {
481 		/*
482 		 * Device was suspended, so ensure that access associated to
483 		 * the previous RX and TX rings are revoked.
484 		 */
485 		xengnt_revoke_access(sc->sc_tx_ring_gntref);
486 		xengnt_revoke_access(sc->sc_rx_ring_gntref);
487 	}
488 
489 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
490 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
491 
492 	tx_ring = sc->sc_tx_ring.sring;
493 	rx_ring = sc->sc_rx_ring.sring;
494 
495 	/* Initialize rings */
496 	memset(tx_ring, 0, PAGE_SIZE);
497 	SHARED_RING_INIT(tx_ring);
498 	FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
499 
500 	memset(rx_ring, 0, PAGE_SIZE);
501 	SHARED_RING_INIT(rx_ring);
502 	FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
503 
504 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
505 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
506 	if (error)
507 		goto abort_resume;
508 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
509 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
510 	if (error)
511 		goto abort_resume;
512 	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
513 	if (error)
514 		goto abort_resume;
515 	aprint_verbose_dev(dev, "using event channel %d\n",
516 	    sc->sc_evtchn);
517 	sc->sc_ih = intr_establish_xname(0, &xen_pic, sc->sc_evtchn, IST_LEVEL,
518 	    IPL_NET, &xennet_handler, sc, false, device_xname(dev));
519 	KASSERT(sc->sc_ih != NULL);
520 	return true;
521 
522 abort_resume:
523 	xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
524 	return false;
525 }
526 
527 static bool
528 xennet_talk_to_backend(struct xennet_xenbus_softc *sc)
529 {
530 	int error;
531 	unsigned long rx_copy;
532 	struct xenbus_transaction *xbt;
533 	const char *errmsg;
534 
535 	error = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
536 	    "feature-rx-copy", &rx_copy, 10);
537 	if (error)
538 		rx_copy = 0; /* default value if key is absent */
539 
540 	if (rx_copy == 1) {
541 		aprint_normal_dev(sc->sc_dev, "using RX copy mode\n");
542 		sc->sc_rx_feature = FEATURE_RX_COPY;
543 	} else {
544 		aprint_normal_dev(sc->sc_dev, "using RX flip mode\n");
545 		sc->sc_rx_feature = FEATURE_RX_FLIP;
546 	}
547 
548 again:
549 	xbt = xenbus_transaction_start();
550 	if (xbt == NULL)
551 		return false;
552 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
553 	    "vifname", "%s", device_xname(sc->sc_dev));
554 	if (error) {
555 		errmsg = "vifname";
556 		goto abort_transaction;
557 	}
558 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
559 	    "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
560 	if (error) {
561 		errmsg = "writing tx ring-ref";
562 		goto abort_transaction;
563 	}
564 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
565 	    "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
566 	if (error) {
567 		errmsg = "writing rx ring-ref";
568 		goto abort_transaction;
569 	}
570 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
571 	    "request-rx-copy", "%lu", rx_copy);
572 	if (error) {
573 		errmsg = "writing request-rx-copy";
574 		goto abort_transaction;
575 	}
576 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
577 	    "feature-rx-notify", "%u", 1);
578 	if (error) {
579 		errmsg = "writing feature-rx-notify";
580 		goto abort_transaction;
581 	}
582 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
583 	    "event-channel", "%u", sc->sc_evtchn);
584 	if (error) {
585 		errmsg = "writing event channel";
586 		goto abort_transaction;
587 	}
588 	error = xenbus_transaction_end(xbt, 0);
589 	if (error == EAGAIN)
590 		goto again;
591 	if (error) {
592 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
593 		return false;
594 	}
595 	mutex_enter(&sc->sc_rx_lock);
596 	xennet_alloc_rx_buffer(sc);
597 	mutex_exit(&sc->sc_rx_lock);
598 
599 	if (sc->sc_backend_status == BEST_SUSPENDED) {
600 		xenbus_device_resume(sc->sc_xbusd);
601 	}
602 
603 	sc->sc_backend_status = BEST_CONNECTED;
604 
605 	return true;
606 
607 abort_transaction:
608 	xenbus_transaction_end(xbt, 1);
609 	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
610 	return false;
611 }
612 
613 static bool
614 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual)
615 {
616 	int s;
617 	struct xennet_xenbus_softc *sc = device_private(dev);
618 
619 	/*
620 	 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(),
621 	 * so we do not mask event channel here
622 	 */
623 
624 	s = splnet();
625 	/* process any outstanding TX responses, then collect RX packets */
626 	xennet_handler(sc);
627 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
628 		tsleep(xennet_xenbus_suspend, PRIBIO, "xnet_suspend", hz/2);
629 		xennet_handler(sc);
630 	}
631 
632 	/*
633 	 * dom0 may still use references to the grants we gave away
634 	 * earlier during RX buffers allocation. So we do not free RX buffers
635 	 * here, as dom0 does not expect the guest domain to suddenly revoke
636 	 * access to these grants.
637 	 */
638 
639 	sc->sc_backend_status = BEST_SUSPENDED;
640 	intr_disestablish(sc->sc_ih);
641 
642 	splx(s);
643 
644 	xenbus_device_suspend(sc->sc_xbusd);
645 	aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
646 
647 	return true;
648 }
649 
650 static void xennet_backend_changed(void *arg, XenbusState new_state)
651 {
652 	struct xennet_xenbus_softc *sc = device_private((device_t)arg);
653 	DPRINTF(("%s: new backend state %d\n",
654 	    device_xname(sc->sc_dev), new_state));
655 
656 	switch (new_state) {
657 	case XenbusStateInitialising:
658 	case XenbusStateInitialised:
659 	case XenbusStateConnected:
660 		break;
661 	case XenbusStateClosing:
662 		sc->sc_backend_status = BEST_CLOSED;
663 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
664 		break;
665 	case XenbusStateInitWait:
666 		if (sc->sc_backend_status == BEST_CONNECTED)
667 			break;
668 		if (xennet_talk_to_backend(sc))
669 			xenbus_switch_state(sc->sc_xbusd, NULL,
670 			    XenbusStateConnected);
671 		break;
672 	case XenbusStateUnknown:
673 	default:
674 		panic("bad backend state %d", new_state);
675 	}
676 }
677 
678 /*
679  * Allocate RX buffers and put the associated request structures
680  * in the ring. This allows the backend to use them to communicate with
681  * frontend when some data is destined to frontend
682  */
683 
684 static void
685 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
686 {
687 	RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
688 	RING_IDX i;
689 	struct xennet_rxreq *req;
690 	struct xen_memory_reservation reservation;
691 	int s, otherend_id, notify;
692 
693 	otherend_id = sc->sc_xbusd->xbusd_otherend_id;
694 
695 	KASSERT(mutex_owned(&sc->sc_rx_lock));
696 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
697 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
698 		KASSERT(req != NULL);
699 		KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
700 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
701 		    req->rxreq_id;
702 
703 		switch (sc->sc_rx_feature) {
704 		case FEATURE_RX_COPY:
705 			if (xengnt_grant_access(otherend_id,
706 			    xpmap_ptom_masked(req->rxreq_pa),
707 			    0, &req->rxreq_gntref) != 0) {
708 				goto out_loop;
709 			}
710 			break;
711 		case FEATURE_RX_FLIP:
712 			if (xengnt_grant_transfer(otherend_id,
713 			    &req->rxreq_gntref) != 0) {
714 				goto out_loop;
715 			}
716 			break;
717 		default:
718 			panic("%s: unsupported RX feature mode: %ld\n",
719 			    __func__, sc->sc_rx_feature);
720 		}
721 
722 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
723 		    req->rxreq_gntref;
724 
725 		SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
726 		sc->sc_free_rxreql--;
727 
728 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
729 			/* unmap the page */
730 			MULTI_update_va_mapping(&rx_mcl[i],
731 			    req->rxreq_va, 0, 0);
732 			/*
733 			 * Remove this page from pseudo phys map before
734 			 * passing back to Xen.
735 			 */
736 			xennet_pages[i] =
737 			    xpmap_ptom(req->rxreq_pa) >> PAGE_SHIFT;
738 			xpmap_ptom_unmap(req->rxreq_pa);
739 		}
740 	}
741 
742 out_loop:
743 	if (i == 0) {
744 		return;
745 	}
746 
747 	if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
748 		/* also make sure to flush all TLB entries */
749 		rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
750 		    UVMF_TLB_FLUSH | UVMF_ALL;
751 		/*
752 		 * We may have allocated buffers which have entries
753 		 * outstanding in the page update queue -- make sure we flush
754 		 * those first!
755 		 */
756 		s = splvm();
757 		xpq_flush_queue();
758 		splx(s);
759 		/* now decrease reservation */
760 		set_xen_guest_handle(reservation.extent_start, xennet_pages);
761 		reservation.nr_extents = i;
762 		reservation.extent_order = 0;
763 		reservation.address_bits = 0;
764 		reservation.domid = DOMID_SELF;
765 		rx_mcl[i].op = __HYPERVISOR_memory_op;
766 		rx_mcl[i].args[0] = XENMEM_decrease_reservation;
767 		rx_mcl[i].args[1] = (unsigned long)&reservation;
768 		HYPERVISOR_multicall(rx_mcl, i+1);
769 		if (__predict_false(rx_mcl[i].result != i)) {
770 			panic("xennet_alloc_rx_buffer: "
771 			    "XENMEM_decrease_reservation");
772 		}
773 	}
774 
775 	sc->sc_rx_ring.req_prod_pvt = req_prod + i;
776 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
777 	if (notify)
778 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
779 	return;
780 }
781 
782 /*
783  * Reclaim all RX buffers used by the I/O ring between frontend and backend
784  */
785 static void
786 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc)
787 {
788 	paddr_t ma, pa;
789 	vaddr_t va;
790 	RING_IDX i;
791 	mmu_update_t mmu[1];
792 	multicall_entry_t mcl[2];
793 
794 	mutex_enter(&sc->sc_rx_lock);
795 
796 	DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
797 	/* get back memory from RX ring */
798 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
799 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
800 
801 		if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
802 			/*
803 			 * this req is still granted. Get back the page or
804 			 * allocate a new one, and remap it.
805 			 */
806 			SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
807 			    rxreq_next);
808 			sc->sc_free_rxreql++;
809 
810 			switch (sc->sc_rx_feature) {
811 			case FEATURE_RX_COPY:
812 				xengnt_revoke_access(rxreq->rxreq_gntref);
813 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
814 				break;
815 			case FEATURE_RX_FLIP:
816 				ma = xengnt_revoke_transfer(
817 				    rxreq->rxreq_gntref);
818 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
819 				if (ma == 0) {
820 					u_long pfn;
821 					struct xen_memory_reservation xenres;
822 					/*
823 					 * transfer not complete, we lost the page.
824 					 * Get one from hypervisor
825 					 */
826 					set_xen_guest_handle(
827 					    xenres.extent_start, &pfn);
828 					xenres.nr_extents = 1;
829 					xenres.extent_order = 0;
830 					xenres.address_bits = 31;
831 					xenres.domid = DOMID_SELF;
832 					if (HYPERVISOR_memory_op(
833 					    XENMEM_increase_reservation, &xenres) < 0) {
834 						panic("xennet_free_rx_buffer: "
835 						    "can't get memory back");
836 					}
837 					ma = pfn;
838 					KASSERT(ma != 0);
839 				}
840 				pa = rxreq->rxreq_pa;
841 				va = rxreq->rxreq_va;
842 				/* remap the page */
843 				mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
844 				mmu[0].val = pa >> PAGE_SHIFT;
845 				MULTI_update_va_mapping(&mcl[0], va,
846 				    (ma << PAGE_SHIFT) | PG_V | PG_KW | xpmap_pg_nx,
847 				    UVMF_TLB_FLUSH|UVMF_ALL);
848 				xpmap_ptom_map(pa, ptoa(ma));
849 				mcl[1].op = __HYPERVISOR_mmu_update;
850 				mcl[1].args[0] = (unsigned long)mmu;
851 				mcl[1].args[1] = 1;
852 				mcl[1].args[2] = 0;
853 				mcl[1].args[3] = DOMID_SELF;
854 				HYPERVISOR_multicall(mcl, 2);
855 				break;
856 			default:
857 				panic("%s: unsupported RX feature mode: %ld\n",
858 				    __func__, sc->sc_rx_feature);
859 			}
860 		}
861 
862 	}
863 	mutex_exit(&sc->sc_rx_lock);
864 	DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
865 }
866 
867 /*
868  * Clears a used RX request when its associated mbuf has been processed
869  */
870 static void
871 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
872 {
873 	int s = splnet();
874 	KASSERT(buf == m->m_ext.ext_buf);
875 	KASSERT(arg == NULL);
876 	KASSERT(m != NULL);
877 	vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
878 	pool_cache_put_paddr(if_xennetrxbuf_cache,
879 	    (void *)va, m->m_ext.ext_paddr);
880 	pool_cache_put(mb_cache, m);
881 	splx(s);
882 };
883 
884 static void
885 xennet_rx_free_req(struct xennet_rxreq *req)
886 {
887 	struct xennet_xenbus_softc *sc = req->rxreq_sc;
888 
889 	KASSERT(mutex_owned(&sc->sc_rx_lock));
890 
891 	/* puts back the RX request in the list of free RX requests */
892 	SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
893 	sc->sc_free_rxreql++;
894 
895 	/*
896 	 * ring needs more requests to be pushed in, allocate some
897 	 * RX buffers to catch-up with backend's consumption
898 	 */
899 	req->rxreq_gntref = GRANT_INVALID_REF;
900 
901 	if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
902 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
903 		xennet_alloc_rx_buffer(sc);
904 	}
905 }
906 
907 /*
908  * Process responses associated to the TX mbufs sent previously through
909  * xennet_softstart()
910  * Called at splnet.
911  */
912 static void
913 xennet_tx_complete(struct xennet_xenbus_softc *sc)
914 {
915 	struct xennet_txreq *req;
916 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
917 	RING_IDX resp_prod, i;
918 
919 	DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
920 	    sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
921 
922 again:
923 	resp_prod = sc->sc_tx_ring.sring->rsp_prod;
924 	xen_rmb();
925 	mutex_enter(&sc->sc_tx_lock);
926 	for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
927 		req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
928 		KASSERT(req->txreq_id ==
929 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
930 		if (__predict_false(xengnt_status(req->txreq_gntref))) {
931 			aprint_verbose_dev(sc->sc_dev,
932 			    "grant still used by backend\n");
933 			sc->sc_tx_ring.rsp_cons = i;
934 			goto end;
935 		}
936 		if (__predict_false(
937 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
938 		    NETIF_RSP_OKAY))
939 			ifp->if_oerrors++;
940 		else
941 			ifp->if_opackets++;
942 		xengnt_revoke_access(req->txreq_gntref);
943 		m_freem(req->txreq_m);
944 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
945 	}
946 	mutex_exit(&sc->sc_tx_lock);
947 
948 	sc->sc_tx_ring.rsp_cons = resp_prod;
949 	/* set new event and check for race with rsp_cons update */
950 	sc->sc_tx_ring.sring->rsp_event =
951 	    resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
952 	ifp->if_timer = 0;
953 	xen_wmb();
954 	if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
955 		goto again;
956 end:
957 	if (ifp->if_flags & IFF_OACTIVE) {
958 		ifp->if_flags &= ~IFF_OACTIVE;
959 		softint_schedule(sc->sc_softintr);
960 	}
961 }
962 
963 /*
964  * Xennet event handler.
965  * Get outstanding responses of TX packets, then collect all responses of
966  * pending RX packets
967  * Called at splnet.
968  */
969 static int
970 xennet_handler(void *arg)
971 {
972 	struct xennet_xenbus_softc *sc = arg;
973 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
974 	RING_IDX resp_prod, i;
975 	struct xennet_rxreq *req;
976 	paddr_t ma, pa;
977 	vaddr_t va;
978 	mmu_update_t mmu[1];
979 	multicall_entry_t mcl[2];
980 	struct mbuf *m;
981 	void *pktp;
982 	int more_to_do;
983 
984 	if (sc->sc_backend_status != BEST_CONNECTED)
985 		return 1;
986 
987 	xennet_tx_complete(sc);
988 
989 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
990 
991 again:
992 	DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
993 	    sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
994 
995 	mutex_enter(&sc->sc_rx_lock);
996 	resp_prod = sc->sc_rx_ring.sring->rsp_prod;
997 	xen_rmb(); /* ensure we see replies up to resp_prod */
998 
999 	for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
1000 		netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
1001 		req = &sc->sc_rxreqs[rx->id];
1002 		KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
1003 		KASSERT(req->rxreq_id == rx->id);
1004 
1005 		ma = 0;
1006 		switch (sc->sc_rx_feature) {
1007 		case FEATURE_RX_COPY:
1008 			xengnt_revoke_access(req->rxreq_gntref);
1009 			break;
1010 		case FEATURE_RX_FLIP:
1011 			ma = xengnt_revoke_transfer(req->rxreq_gntref);
1012 			if (ma == 0) {
1013 				DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
1014 				/*
1015 				 * the remote could't send us a packet.
1016 				 * we can't free this rxreq as no page will be mapped
1017 				 * here. Instead give it back immediatly to backend.
1018 				 */
1019 				ifp->if_ierrors++;
1020 				RING_GET_REQUEST(&sc->sc_rx_ring,
1021 				    sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
1022 				RING_GET_REQUEST(&sc->sc_rx_ring,
1023 				    sc->sc_rx_ring.req_prod_pvt)->gref =
1024 					req->rxreq_gntref;
1025 				sc->sc_rx_ring.req_prod_pvt++;
1026 				RING_PUSH_REQUESTS(&sc->sc_rx_ring);
1027 				continue;
1028 			}
1029 			break;
1030 		default:
1031 			panic("%s: unsupported RX feature mode: %ld\n",
1032 			    __func__, sc->sc_rx_feature);
1033 		}
1034 
1035 		pa = req->rxreq_pa;
1036 		va = req->rxreq_va;
1037 
1038 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
1039 			/* remap the page */
1040 			mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
1041 			mmu[0].val = pa >> PAGE_SHIFT;
1042 			MULTI_update_va_mapping(&mcl[0], va,
1043 			    (ma << PAGE_SHIFT) | PG_V | PG_KW | xpmap_pg_nx,
1044 			    UVMF_TLB_FLUSH|UVMF_ALL);
1045 			xpmap_ptom_map(pa, ptoa(ma));
1046 			mcl[1].op = __HYPERVISOR_mmu_update;
1047 			mcl[1].args[0] = (unsigned long)mmu;
1048 			mcl[1].args[1] = 1;
1049 			mcl[1].args[2] = 0;
1050 			mcl[1].args[3] = DOMID_SELF;
1051 			HYPERVISOR_multicall(mcl, 2);
1052 		}
1053 
1054 		pktp = (void *)(va + rx->offset);
1055 #ifdef XENNET_DEBUG_DUMP
1056 		xennet_hex_dump(pktp, rx->status, "r", rx->id);
1057 #endif
1058 		if ((ifp->if_flags & IFF_PROMISC) == 0) {
1059 			struct ether_header *eh = pktp;
1060 			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
1061 			    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
1062 			    ETHER_ADDR_LEN) != 0) {
1063 				DPRINTFN(XEDB_EVENT,
1064 				    ("xennet_handler bad dest\n"));
1065 				/* packet not for us */
1066 				xennet_rx_free_req(req);
1067 				continue;
1068 			}
1069 		}
1070 		MGETHDR(m, M_DONTWAIT, MT_DATA);
1071 		if (__predict_false(m == NULL)) {
1072 			printf("%s: rx no mbuf\n", ifp->if_xname);
1073 			ifp->if_ierrors++;
1074 			xennet_rx_free_req(req);
1075 			continue;
1076 		}
1077 		MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
1078 
1079 		m_set_rcvif(m, ifp);
1080 		if (rx->status <= MHLEN) {
1081 			/* small packet; copy to mbuf data area */
1082 			m_copyback(m, 0, rx->status, pktp);
1083 			KASSERT(m->m_pkthdr.len == rx->status);
1084 			KASSERT(m->m_len == rx->status);
1085 		} else {
1086 			/* large packet; attach buffer to mbuf */
1087 			req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
1088 			    if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
1089 			if (__predict_false(req->rxreq_va == 0)) {
1090 				printf("%s: rx no buf\n", ifp->if_xname);
1091 				ifp->if_ierrors++;
1092 				req->rxreq_va = va;
1093 				req->rxreq_pa = pa;
1094 				xennet_rx_free_req(req);
1095 				m_freem(m);
1096 				continue;
1097 			}
1098 			m->m_len = m->m_pkthdr.len = rx->status;
1099 			MEXTADD(m, pktp, rx->status,
1100 			    M_DEVBUF, xennet_rx_mbuf_free, NULL);
1101 			m->m_ext.ext_paddr = pa;
1102 			m->m_flags |= M_EXT_RW; /* we own the buffer */
1103 		}
1104 		if ((rx->flags & NETRXF_csum_blank) != 0) {
1105 			xennet_checksum_fill(&m);
1106 			if (m == NULL) {
1107 				ifp->if_ierrors++;
1108 				xennet_rx_free_req(req);
1109 				continue;
1110 			}
1111 		}
1112 		/* free req may overwrite *rx, better doing it late */
1113 		xennet_rx_free_req(req);
1114 
1115 		/* Pass the packet up. */
1116 		if_percpuq_enqueue(ifp->if_percpuq, m);
1117 	}
1118 	xen_rmb();
1119 	sc->sc_rx_ring.rsp_cons = i;
1120 	RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
1121 	mutex_exit(&sc->sc_rx_lock);
1122 
1123 	if (more_to_do)
1124 		goto again;
1125 
1126 	return 1;
1127 }
1128 
1129 /*
1130  * The output routine of a xennet interface
1131  * Called at splnet.
1132  */
1133 void
1134 xennet_start(struct ifnet *ifp)
1135 {
1136 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1137 
1138 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", device_xname(sc->sc_dev)));
1139 
1140 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
1141 
1142 	xennet_tx_complete(sc);
1143 
1144 	if (__predict_false(
1145 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING))
1146 		return;
1147 
1148 	/*
1149 	 * The Xen communication channel is much more efficient if we can
1150 	 * schedule batch of packets for domain0. To achieve this, we
1151 	 * schedule a soft interrupt, and just return. This way, the network
1152 	 * stack will enqueue all pending mbufs in the interface's send queue
1153 	 * before it is processed by xennet_softstart().
1154 	 */
1155 	softint_schedule(sc->sc_softintr);
1156 	return;
1157 }
1158 
1159 /*
1160  * Prepares mbufs for TX, and notify backend when finished
1161  * Called at splsoftnet
1162  */
1163 void
1164 xennet_softstart(void *arg)
1165 {
1166 	struct xennet_xenbus_softc *sc = arg;
1167 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1168 	struct mbuf *m, *new_m;
1169 	netif_tx_request_t *txreq;
1170 	RING_IDX req_prod;
1171 	paddr_t pa, pa2;
1172 	struct xennet_txreq *req;
1173 	int notify;
1174 	int do_notify = 0;
1175 
1176 	mutex_enter(&sc->sc_tx_lock);
1177 	if (__predict_false(
1178 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)) {
1179 		mutex_exit(&sc->sc_tx_lock);
1180 		return;
1181 	}
1182 
1183 	req_prod = sc->sc_tx_ring.req_prod_pvt;
1184 	while (/*CONSTCOND*/1) {
1185 		uint16_t txflags;
1186 
1187 		req = SLIST_FIRST(&sc->sc_txreq_head);
1188 		if (__predict_false(req == NULL)) {
1189 			ifp->if_flags |= IFF_OACTIVE;
1190 			break;
1191 		}
1192 		IFQ_POLL(&ifp->if_snd, m);
1193 		if (m == NULL)
1194 			break;
1195 
1196 		switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
1197 		case M_EXT|M_EXT_CLUSTER:
1198 			KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
1199 			pa = m->m_ext.ext_paddr +
1200 				(m->m_data - m->m_ext.ext_buf);
1201 			break;
1202 		case 0:
1203 			KASSERT(m->m_paddr != M_PADDR_INVALID);
1204 			pa = m->m_paddr + M_BUFOFFSET(m) +
1205 				(m->m_data - M_BUFADDR(m));
1206 			break;
1207 		default:
1208 			if (__predict_false(
1209 			    !pmap_extract(pmap_kernel(), (vaddr_t)m->m_data,
1210 			    &pa))) {
1211 				panic("xennet_start: no pa");
1212 			}
1213 			break;
1214 		}
1215 
1216 		if ((m->m_pkthdr.csum_flags &
1217 		    (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1218 			txflags = NETTXF_csum_blank;
1219 		} else {
1220 			txflags = 0;
1221 		}
1222 
1223 		if (m->m_pkthdr.len != m->m_len ||
1224 		    (pa ^ (pa + m->m_pkthdr.len - 1)) & PG_FRAME) {
1225 
1226 			MGETHDR(new_m, M_DONTWAIT, MT_DATA);
1227 			if (__predict_false(new_m == NULL)) {
1228 				printf("%s: cannot allocate new mbuf\n",
1229 				       device_xname(sc->sc_dev));
1230 				break;
1231 			}
1232 			if (m->m_pkthdr.len > MHLEN) {
1233 				MCLGET(new_m, M_DONTWAIT);
1234 				if (__predict_false(
1235 				    (new_m->m_flags & M_EXT) == 0)) {
1236 					DPRINTF(("%s: no mbuf cluster\n",
1237 					    device_xname(sc->sc_dev)));
1238 					m_freem(new_m);
1239 					break;
1240 				}
1241 			}
1242 
1243 			m_copydata(m, 0, m->m_pkthdr.len, mtod(new_m, void *));
1244 			new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
1245 
1246 			if ((new_m->m_flags & M_EXT) != 0) {
1247 				pa = new_m->m_ext.ext_paddr;
1248 				KASSERT(new_m->m_data == new_m->m_ext.ext_buf);
1249 				KASSERT(pa != M_PADDR_INVALID);
1250 			} else {
1251 				pa = new_m->m_paddr;
1252 				KASSERT(pa != M_PADDR_INVALID);
1253 				KASSERT(new_m->m_data == M_BUFADDR(new_m));
1254 				pa += M_BUFOFFSET(new_m);
1255 			}
1256 			if (__predict_false(xengnt_grant_access(
1257 			    sc->sc_xbusd->xbusd_otherend_id,
1258 			    xpmap_ptom_masked(pa),
1259 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1260 				m_freem(new_m);
1261 				ifp->if_flags |= IFF_OACTIVE;
1262 				break;
1263 			}
1264 			/* we will be able to send new_m */
1265 			IFQ_DEQUEUE(&ifp->if_snd, m);
1266 			m_freem(m);
1267 			m = new_m;
1268 		} else {
1269 			if (__predict_false(xengnt_grant_access(
1270 			    sc->sc_xbusd->xbusd_otherend_id,
1271 			    xpmap_ptom_masked(pa),
1272 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1273 				ifp->if_flags |= IFF_OACTIVE;
1274 				break;
1275 			}
1276 			/* we will be able to send m */
1277 			IFQ_DEQUEUE(&ifp->if_snd, m);
1278 		}
1279 		MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1280 
1281 		KASSERT(((pa ^ (pa + m->m_pkthdr.len -  1)) & PG_FRAME) == 0);
1282 
1283 		SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1284 		req->txreq_m = m;
1285 
1286 		DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1287 		    "mbuf %p, buf %p/%p/%p, size %d\n",
1288 		    req->txreq_id, m, mtod(m, void *), (void *)pa,
1289 		    (void *)xpmap_ptom_masked(pa), m->m_pkthdr.len));
1290 		pmap_extract_ma(pmap_kernel(), mtod(m, vaddr_t), &pa2);
1291 		DPRINTFN(XEDB_MBUF, ("xennet_start pa %p ma %p/%p\n",
1292 		    (void *)pa, (void *)xpmap_ptom_masked(pa), (void *)pa2));
1293 #ifdef XENNET_DEBUG_DUMP
1294 		xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1295 			       	req->txreq_id);
1296 #endif
1297 
1298 		txreq = RING_GET_REQUEST(&sc->sc_tx_ring, req_prod);
1299 		txreq->id = req->txreq_id;
1300 		txreq->gref = req->txreq_gntref;
1301 		txreq->offset = pa & ~PG_FRAME;
1302 		txreq->size = m->m_pkthdr.len;
1303 		txreq->flags = txflags;
1304 
1305 		req_prod++;
1306 		sc->sc_tx_ring.req_prod_pvt = req_prod;
1307 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1308 		if (notify)
1309 			do_notify = 1;
1310 
1311 #ifdef XENNET_DEBUG
1312 		DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, "
1313 		    "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *),
1314 		    (void *)*kvtopte(mtod(m, vaddr_t)),
1315 		    (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))),
1316 		    (void *)m->m_paddr, m->m_pkthdr.len, m->m_len));
1317 		DPRINTFN(XEDB_MEM, ("id %d gref %d offset %d size %d flags %d"
1318 		    " prod %d\n",
1319 		    txreq->id, txreq->gref, txreq->offset, txreq->size,
1320 		    txreq->flags, req_prod));
1321 #endif
1322 
1323 		/*
1324 		 * Pass packet to bpf if there is a listener.
1325 		 */
1326 		bpf_mtap(ifp, m);
1327 	}
1328 
1329 	if (do_notify) {
1330 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1331 		ifp->if_timer = 5;
1332 	}
1333 
1334 	mutex_exit(&sc->sc_tx_lock);
1335 
1336 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1337 	    device_xname(sc->sc_dev)));
1338 }
1339 
1340 int
1341 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1342 {
1343 #ifdef XENNET_DEBUG
1344 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1345 #endif
1346 	int s, error = 0;
1347 
1348 	s = splnet();
1349 
1350 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1351 	    device_xname(sc->sc_dev)));
1352 	error = ether_ioctl(ifp, cmd, data);
1353 	if (error == ENETRESET)
1354 		error = 0;
1355 	splx(s);
1356 
1357 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1358 	    device_xname(sc->sc_dev), error));
1359 
1360 	return error;
1361 }
1362 
1363 void
1364 xennet_watchdog(struct ifnet *ifp)
1365 {
1366 	aprint_verbose_ifnet(ifp, "xennet_watchdog\n");
1367 }
1368 
1369 int
1370 xennet_init(struct ifnet *ifp)
1371 {
1372 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1373 	mutex_enter(&sc->sc_rx_lock);
1374 
1375 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1376 	    device_xname(sc->sc_dev)));
1377 
1378 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1379 		sc->sc_rx_ring.sring->rsp_event =
1380 		    sc->sc_rx_ring.rsp_cons + 1;
1381 		hypervisor_enable_event(sc->sc_evtchn);
1382 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1383 		xennet_reset(sc);
1384 	}
1385 	ifp->if_flags |= IFF_RUNNING;
1386 	ifp->if_flags &= ~IFF_OACTIVE;
1387 	ifp->if_timer = 0;
1388 	mutex_exit(&sc->sc_rx_lock);
1389 	return 0;
1390 }
1391 
1392 void
1393 xennet_stop(struct ifnet *ifp, int disable)
1394 {
1395 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1396 
1397 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1398 	hypervisor_mask_event(sc->sc_evtchn);
1399 	xennet_reset(sc);
1400 }
1401 
1402 void
1403 xennet_reset(struct xennet_xenbus_softc *sc)
1404 {
1405 
1406 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n",
1407 	    device_xname(sc->sc_dev)));
1408 }
1409 
1410 #if defined(NFS_BOOT_BOOTSTATIC)
1411 int
1412 xennet_bootstatic_callback(struct nfs_diskless *nd)
1413 {
1414 #if 0
1415 	struct ifnet *ifp = nd->nd_ifp;
1416 	struct xennet_xenbus_softc *sc =
1417 	    (struct xennet_xenbus_softc *)ifp->if_softc;
1418 #endif
1419 	int flags = 0;
1420 	union xen_cmdline_parseinfo xcp;
1421 	struct sockaddr_in *sin;
1422 
1423 	memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1424 	xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1425 	xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1426 	xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1427 
1428 	if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1429 		flags |= NFS_BOOT_HAS_SERVER;
1430 		if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1431 			flags |= NFS_BOOT_HAS_ROOTPATH;
1432 	}
1433 
1434 	nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1435 	nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1436 	nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1437 
1438 	sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1439 	memset((void *)sin, 0, sizeof(*sin));
1440 	sin->sin_len = sizeof(*sin);
1441 	sin->sin_family = AF_INET;
1442 	sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1443 
1444 	if (nd->nd_myip.s_addr)
1445 		flags |= NFS_BOOT_HAS_MYIP;
1446 	if (nd->nd_gwip.s_addr)
1447 		flags |= NFS_BOOT_HAS_GWIP;
1448 	if (nd->nd_mask.s_addr)
1449 		flags |= NFS_BOOT_HAS_MASK;
1450 	if (sin->sin_addr.s_addr)
1451 		flags |= NFS_BOOT_HAS_SERVADDR;
1452 
1453 	return flags;
1454 }
1455 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
1456 
1457 #ifdef XENNET_DEBUG_DUMP
1458 #define XCHR(x) hexdigits[(x) & 0xf]
1459 static void
1460 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1461 {
1462 	size_t i, j;
1463 
1464 	printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
1465 	printf("00000000  ");
1466 	for(i=0; i<len; i++) {
1467 		printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1468 		if ((i+1) % 16 == 8)
1469 			printf(" ");
1470 		if ((i+1) % 16 == 0) {
1471 			printf(" %c", '|');
1472 			for(j=0; j<16; j++)
1473 				printf("%c", pkt[i-15+j]>=32 &&
1474 				    pkt[i-15+j]<127?pkt[i-15+j]:'.');
1475 			printf("%c\n%c%c%c%c%c%c%c%c  ", '|',
1476 			    XCHR((i+1)>>28), XCHR((i+1)>>24),
1477 			    XCHR((i+1)>>20), XCHR((i+1)>>16),
1478 			    XCHR((i+1)>>12), XCHR((i+1)>>8),
1479 			    XCHR((i+1)>>4), XCHR(i+1));
1480 		}
1481 	}
1482 	printf("\n");
1483 }
1484 #undef XCHR
1485 #endif
1486