xref: /netbsd-src/sys/arch/xen/xen/if_xennet_xenbus.c (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1 /*      $NetBSD: if_xennet_xenbus.c,v 1.77 2018/06/26 06:48:00 msaitoh Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * Copyright (c) 2004 Christian Limpach.
29  * All rights reserved.
30  *
31  * Redistribution and use in source and binary forms, with or without
32  * modification, are permitted provided that the following conditions
33  * are met:
34  * 1. Redistributions of source code must retain the above copyright
35  *    notice, this list of conditions and the following disclaimer.
36  * 2. Redistributions in binary form must reproduce the above copyright
37  *    notice, this list of conditions and the following disclaimer in the
38  *    documentation and/or other materials provided with the distribution.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
41  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
43  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
44  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
46  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
47  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
48  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
49  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50  */
51 
52 /*
53  * This file contains the xennet frontend code required for the network
54  * communication between two Xen domains.
55  * It ressembles xbd, but is a little more complex as it must deal with two
56  * rings:
57  * - the TX ring, to transmit packets to backend (inside => outside)
58  * - the RX ring, to receive packets from backend (outside => inside)
59  *
60  * Principles are following.
61  *
62  * For TX:
63  * Purpose is to transmit packets to the outside. The start of day is in
64  * xennet_start() (default output routine of xennet) that schedules a softint,
65  * xennet_softstart(). xennet_softstart() generates the requests associated
66  * to the TX mbufs queued (see altq(9)).
67  * The backend's responses are processed by xennet_tx_complete(), called either
68  * from:
69  * - xennet_start()
70  * - xennet_handler(), during an asynchronous event notification from backend
71  *   (similar to an IRQ).
72  *
73  * for RX:
74  * Purpose is to process the packets received from the outside. RX buffers
75  * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
76  * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
77  * preparation for incoming packets from backend.
78  * When RX packets need to be processed, backend takes the requests previously
79  * offered by frontend and pushes the associated responses inside the I/O ring.
80  * When done, it notifies frontend through an event notification, which will
81  * asynchronously call xennet_handler() in frontend.
82  * xennet_handler() processes the responses, generates the associated mbuf, and
83  * passes it to the MI layer for further processing.
84  */
85 
86 #include <sys/cdefs.h>
87 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.77 2018/06/26 06:48:00 msaitoh Exp $");
88 
89 #include "opt_xen.h"
90 #include "opt_nfs_boot.h"
91 
92 #include <sys/param.h>
93 #include <sys/device.h>
94 #include <sys/conf.h>
95 #include <sys/kernel.h>
96 #include <sys/proc.h>
97 #include <sys/systm.h>
98 #include <sys/intr.h>
99 #include <sys/rndsource.h>
100 
101 #include <net/if.h>
102 #include <net/if_dl.h>
103 #include <net/if_ether.h>
104 #include <net/bpf.h>
105 
106 #if defined(NFS_BOOT_BOOTSTATIC)
107 #include <sys/fstypes.h>
108 #include <sys/mount.h>
109 #include <sys/statvfs.h>
110 #include <netinet/in.h>
111 #include <nfs/rpcv2.h>
112 #include <nfs/nfsproto.h>
113 #include <nfs/nfs.h>
114 #include <nfs/nfsmount.h>
115 #include <nfs/nfsdiskless.h>
116 #include <xen/if_xennetvar.h>
117 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
118 
119 #include <xen/xennet_checksum.h>
120 
121 #include <uvm/uvm.h>
122 
123 #include <xen/hypervisor.h>
124 #include <xen/evtchn.h>
125 #include <xen/granttables.h>
126 #include <xen/xen-public/io/netif.h>
127 #include <xen/xenpmap.h>
128 
129 #include <xen/xenbus.h>
130 #include "locators.h"
131 
132 #undef XENNET_DEBUG_DUMP
133 #undef XENNET_DEBUG
134 #ifdef XENNET_DEBUG
135 #define XEDB_FOLLOW     0x01
136 #define XEDB_INIT       0x02
137 #define XEDB_EVENT      0x04
138 #define XEDB_MBUF       0x08
139 #define XEDB_MEM        0x10
140 int xennet_debug = 0xff;
141 #define DPRINTF(x) if (xennet_debug) printf x;
142 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
143 #else
144 #define DPRINTF(x)
145 #define DPRINTFN(n,x)
146 #endif
147 
148 extern pt_entry_t xpmap_pg_nx;
149 
150 #define GRANT_INVALID_REF -1 /* entry is free */
151 
152 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
153 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
154 
155 struct xennet_txreq {
156 	SLIST_ENTRY(xennet_txreq) txreq_next;
157 	uint16_t txreq_id; /* ID passed to backend */
158 	grant_ref_t txreq_gntref; /* grant ref of this request */
159 	struct mbuf *txreq_m; /* mbuf being transmitted */
160 };
161 
162 struct xennet_rxreq {
163 	SLIST_ENTRY(xennet_rxreq) rxreq_next;
164 	uint16_t rxreq_id; /* ID passed to backend */
165 	grant_ref_t rxreq_gntref; /* grant ref of this request */
166 /* va/pa for this receive buf. ma will be provided by backend */
167 	paddr_t rxreq_pa;
168 	vaddr_t rxreq_va;
169 	struct xennet_xenbus_softc *rxreq_sc; /* pointer to our interface */
170 };
171 
172 struct xennet_xenbus_softc {
173 	device_t sc_dev;
174 	struct ethercom sc_ethercom;
175 	uint8_t sc_enaddr[6];
176 	struct xenbus_device *sc_xbusd;
177 
178 	netif_tx_front_ring_t sc_tx_ring;
179 	netif_rx_front_ring_t sc_rx_ring;
180 
181 	unsigned int sc_evtchn;
182 	void *sc_softintr;
183 	struct intrhand *sc_ih;
184 
185 	grant_ref_t sc_tx_ring_gntref;
186 	grant_ref_t sc_rx_ring_gntref;
187 
188 	kmutex_t sc_tx_lock; /* protects free TX list, below */
189 	kmutex_t sc_rx_lock; /* protects free RX list, below */
190 	struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
191 	struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
192 	SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
193 	SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
194 	int sc_free_rxreql; /* number of free receive request struct */
195 
196 	int sc_backend_status; /* our status with backend */
197 #define BEST_CLOSED		0
198 #define BEST_DISCONNECTED	1
199 #define BEST_CONNECTED		2
200 #define BEST_SUSPENDED		3
201 	unsigned long sc_rx_feature;
202 #define FEATURE_RX_FLIP		0
203 #define FEATURE_RX_COPY		1
204 	krndsource_t sc_rnd_source;
205 };
206 #define SC_NLIVEREQ(sc) ((sc)->sc_rx_ring.req_prod_pvt - \
207 			    (sc)->sc_rx_ring.sring->rsp_prod)
208 
209 /* too big to be on stack */
210 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
211 static u_long xennet_pages[NET_RX_RING_SIZE];
212 
213 static pool_cache_t if_xennetrxbuf_cache;
214 static int if_xennetrxbuf_cache_inited=0;
215 
216 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
217 static void xennet_xenbus_attach(device_t, device_t, void *);
218 static int  xennet_xenbus_detach(device_t, int);
219 static void xennet_backend_changed(void *, XenbusState);
220 
221 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
222 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
223 static void xennet_tx_complete(struct xennet_xenbus_softc *);
224 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
225 static void xennet_rx_free_req(struct xennet_rxreq *);
226 static int  xennet_handler(void *);
227 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
228 #ifdef XENNET_DEBUG_DUMP
229 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
230 #endif
231 
232 static int  xennet_init(struct ifnet *);
233 static void xennet_stop(struct ifnet *, int);
234 static void xennet_reset(struct xennet_xenbus_softc *);
235 static void xennet_softstart(void *);
236 static void xennet_start(struct ifnet *);
237 static int  xennet_ioctl(struct ifnet *, u_long, void *);
238 static void xennet_watchdog(struct ifnet *);
239 
240 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *);
241 static bool xennet_xenbus_resume (device_t dev, const pmf_qual_t *);
242 
243 CFATTACH_DECL_NEW(xennet, sizeof(struct xennet_xenbus_softc),
244    xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL);
245 
246 static int
247 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
248 {
249 	struct xenbusdev_attach_args *xa = aux;
250 
251 	if (strcmp(xa->xa_type, "vif") != 0)
252 		return 0;
253 
254 	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
255 	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
256 		return 0;
257 
258 	return 1;
259 }
260 
261 static void
262 xennet_xenbus_attach(device_t parent, device_t self, void *aux)
263 {
264 	struct xennet_xenbus_softc *sc = device_private(self);
265 	struct xenbusdev_attach_args *xa = aux;
266 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
267 	int err;
268 	netif_tx_sring_t *tx_ring;
269 	netif_rx_sring_t *rx_ring;
270 	RING_IDX i;
271 	char *val, *e, *p;
272 	int s;
273 	extern int ifqmaxlen; /* XXX */
274 #ifdef XENNET_DEBUG
275 	char **dir;
276 	int dir_n = 0;
277 	char id_str[20];
278 #endif
279 
280 	aprint_normal(": Xen Virtual Network Interface\n");
281 	sc->sc_dev = self;
282 
283 #ifdef XENNET_DEBUG
284 	printf("path: %s\n", xa->xa_xbusd->xbusd_path);
285 	snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
286 	err = xenbus_directory(NULL, "device/vif", id_str, &dir_n, &dir);
287 	if (err) {
288 		aprint_error_dev(self, "xenbus_directory err %d\n", err);
289 	} else {
290 		printf("%s/\n", xa->xa_xbusd->xbusd_path);
291 		for (i = 0; i < dir_n; i++) {
292 			printf("\t/%s", dir[i]);
293 			err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path,
294 				          dir[i], NULL, &val);
295 			if (err) {
296 				aprint_error_dev(self, "xenbus_read err %d\n",
297 					         err);
298 			} else {
299 				printf(" = %s\n", val);
300 				free(val, M_DEVBUF);
301 			}
302 		}
303 	}
304 #endif /* XENNET_DEBUG */
305 	sc->sc_xbusd = xa->xa_xbusd;
306 	sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
307 
308 	/* xenbus ensure 2 devices can't be probed at the same time */
309 	if (if_xennetrxbuf_cache_inited == 0) {
310 		if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
311 		    "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
312 		if_xennetrxbuf_cache_inited = 1;
313 	}
314 
315 	/* initialize free RX and RX request lists */
316 	mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
317 	SLIST_INIT(&sc->sc_txreq_head);
318 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
319 		sc->sc_txreqs[i].txreq_id = i;
320 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
321 		    txreq_next);
322 	}
323 	mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
324 	SLIST_INIT(&sc->sc_rxreq_head);
325 	s = splvm(); /* XXXSMP */
326 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
327 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
328 		rxreq->rxreq_id = i;
329 		rxreq->rxreq_sc = sc;
330 		rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
331 		    if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
332 		if (rxreq->rxreq_va == 0)
333 			break;
334 		rxreq->rxreq_gntref = GRANT_INVALID_REF;
335 		SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
336 	}
337 	splx(s);
338 	sc->sc_free_rxreql = i;
339 	if (sc->sc_free_rxreql == 0) {
340 		aprint_error_dev(self, "failed to allocate rx memory\n");
341 		return;
342 	}
343 
344 	/* read mac address */
345 	err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path, "mac", NULL, &val);
346 	if (err) {
347 		aprint_error_dev(self, "can't read mac address, err %d\n", err);
348 		return;
349 	}
350 	for (i = 0, p = val; i < 6; i++) {
351 		sc->sc_enaddr[i] = strtoul(p, &e, 16);
352 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
353 			aprint_error_dev(self,
354 			    "%s is not a valid mac address\n", val);
355 			free(val, M_DEVBUF);
356 			return;
357 		}
358 		p = &e[1];
359 	}
360 	free(val, M_DEVBUF);
361 	aprint_normal_dev(self, "MAC address %s\n",
362 	    ether_sprintf(sc->sc_enaddr));
363 	/* Initialize ifnet structure and attach interface */
364 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
365 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
366 	ifp->if_softc = sc;
367 	ifp->if_start = xennet_start;
368 	ifp->if_ioctl = xennet_ioctl;
369 	ifp->if_watchdog = xennet_watchdog;
370 	ifp->if_init = xennet_init;
371 	ifp->if_stop = xennet_stop;
372 	ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
373 	ifp->if_timer = 0;
374 	ifp->if_snd.ifq_maxlen = max(ifqmaxlen, NET_TX_RING_SIZE * 2);
375 	ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
376 	IFQ_SET_READY(&ifp->if_snd);
377 	if_attach(ifp);
378 	ether_ifattach(ifp, sc->sc_enaddr);
379 	sc->sc_softintr = softint_establish(SOFTINT_NET, xennet_softstart, sc);
380 	if (sc->sc_softintr == NULL)
381 		panic("%s: can't establish soft interrupt",
382 			device_xname(self));
383 
384 	/* alloc shared rings */
385 	tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
386 	    UVM_KMF_WIRED);
387 	rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
388 	    UVM_KMF_WIRED);
389 	if (tx_ring == NULL || rx_ring == NULL)
390 		panic("%s: can't alloc rings", device_xname(self));
391 
392 	sc->sc_tx_ring.sring = tx_ring;
393 	sc->sc_rx_ring.sring = rx_ring;
394 
395 	/* resume shared structures and tell backend that we are ready */
396 	if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) {
397 		uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE,
398 		    UVM_KMF_WIRED);
399 		uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE,
400 		    UVM_KMF_WIRED);
401 		return;
402 	}
403 
404 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
405 	    RND_TYPE_NET, RND_FLAG_DEFAULT);
406 
407 	if (!pmf_device_register(self, xennet_xenbus_suspend,
408 	    xennet_xenbus_resume))
409 		aprint_error_dev(self, "couldn't establish power handler\n");
410 	else
411 		pmf_class_network_register(self, ifp);
412 }
413 
414 static int
415 xennet_xenbus_detach(device_t self, int flags)
416 {
417 	struct xennet_xenbus_softc *sc = device_private(self);
418 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
419 	int s0, s1;
420 	RING_IDX i;
421 
422 	DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
423 	s0 = splnet();
424 	xennet_stop(ifp, 1);
425 	intr_disestablish(sc->sc_ih);
426 	/* wait for pending TX to complete, and collect pending RX packets */
427 	xennet_handler(sc);
428 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
429 		/* XXXSMP */
430 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_detach", hz/2);
431 		xennet_handler(sc);
432 	}
433 	xennet_free_rx_buffer(sc);
434 
435 	s1 = splvm(); /* XXXSMP */
436 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
437 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
438 		uvm_km_free(kernel_map, rxreq->rxreq_va, PAGE_SIZE,
439 		    UVM_KMF_WIRED);
440 	}
441 	splx(s1);
442 
443 	ether_ifdetach(ifp);
444 	if_detach(ifp);
445 
446 	/* Unhook the entropy source. */
447 	rnd_detach_source(&sc->sc_rnd_source);
448 
449 	while (xengnt_status(sc->sc_tx_ring_gntref)) {
450 		/* XXXSMP */
451 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_txref", hz/2);
452 	}
453 	xengnt_revoke_access(sc->sc_tx_ring_gntref);
454 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
455 	    UVM_KMF_WIRED);
456 	while (xengnt_status(sc->sc_rx_ring_gntref)) {
457 		/* XXXSMP */
458 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_rxref", hz/2);
459 	}
460 	xengnt_revoke_access(sc->sc_rx_ring_gntref);
461 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
462 	    UVM_KMF_WIRED);
463 	softint_disestablish(sc->sc_softintr);
464 	splx(s0);
465 
466 	pmf_device_deregister(self);
467 
468 	DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
469 	return 0;
470 }
471 
472 static bool
473 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual)
474 {
475 	struct xennet_xenbus_softc *sc = device_private(dev);
476 	int error;
477 	netif_tx_sring_t *tx_ring;
478 	netif_rx_sring_t *rx_ring;
479 	paddr_t ma;
480 
481 	/* invalidate the RX and TX rings */
482 	if (sc->sc_backend_status == BEST_SUSPENDED) {
483 		/*
484 		 * Device was suspended, so ensure that access associated to
485 		 * the previous RX and TX rings are revoked.
486 		 */
487 		xengnt_revoke_access(sc->sc_tx_ring_gntref);
488 		xengnt_revoke_access(sc->sc_rx_ring_gntref);
489 	}
490 
491 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
492 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
493 
494 	tx_ring = sc->sc_tx_ring.sring;
495 	rx_ring = sc->sc_rx_ring.sring;
496 
497 	/* Initialize rings */
498 	memset(tx_ring, 0, PAGE_SIZE);
499 	SHARED_RING_INIT(tx_ring);
500 	FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
501 
502 	memset(rx_ring, 0, PAGE_SIZE);
503 	SHARED_RING_INIT(rx_ring);
504 	FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
505 
506 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
507 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
508 	if (error)
509 		goto abort_resume;
510 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
511 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
512 	if (error)
513 		goto abort_resume;
514 	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
515 	if (error)
516 		goto abort_resume;
517 	aprint_verbose_dev(dev, "using event channel %d\n",
518 	    sc->sc_evtchn);
519 	sc->sc_ih = intr_establish_xname(0, &xen_pic, sc->sc_evtchn, IST_LEVEL,
520 	    IPL_NET, &xennet_handler, sc, false, device_xname(dev));
521 	KASSERT(sc->sc_ih != NULL);
522 	return true;
523 
524 abort_resume:
525 	xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
526 	return false;
527 }
528 
529 static bool
530 xennet_talk_to_backend(struct xennet_xenbus_softc *sc)
531 {
532 	int error;
533 	unsigned long rx_copy;
534 	struct xenbus_transaction *xbt;
535 	const char *errmsg;
536 
537 	error = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
538 	    "feature-rx-copy", &rx_copy, 10);
539 	if (error)
540 		rx_copy = 0; /* default value if key is absent */
541 
542 	if (rx_copy == 1) {
543 		aprint_normal_dev(sc->sc_dev, "using RX copy mode\n");
544 		sc->sc_rx_feature = FEATURE_RX_COPY;
545 	} else {
546 		aprint_normal_dev(sc->sc_dev, "using RX flip mode\n");
547 		sc->sc_rx_feature = FEATURE_RX_FLIP;
548 	}
549 
550 again:
551 	xbt = xenbus_transaction_start();
552 	if (xbt == NULL)
553 		return false;
554 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
555 	    "vifname", "%s", device_xname(sc->sc_dev));
556 	if (error) {
557 		errmsg = "vifname";
558 		goto abort_transaction;
559 	}
560 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
561 	    "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
562 	if (error) {
563 		errmsg = "writing tx ring-ref";
564 		goto abort_transaction;
565 	}
566 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
567 	    "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
568 	if (error) {
569 		errmsg = "writing rx ring-ref";
570 		goto abort_transaction;
571 	}
572 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
573 	    "request-rx-copy", "%lu", rx_copy);
574 	if (error) {
575 		errmsg = "writing request-rx-copy";
576 		goto abort_transaction;
577 	}
578 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
579 	    "feature-rx-notify", "%u", 1);
580 	if (error) {
581 		errmsg = "writing feature-rx-notify";
582 		goto abort_transaction;
583 	}
584 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
585 	    "event-channel", "%u", sc->sc_evtchn);
586 	if (error) {
587 		errmsg = "writing event channel";
588 		goto abort_transaction;
589 	}
590 	error = xenbus_transaction_end(xbt, 0);
591 	if (error == EAGAIN)
592 		goto again;
593 	if (error) {
594 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
595 		return false;
596 	}
597 	mutex_enter(&sc->sc_rx_lock);
598 	xennet_alloc_rx_buffer(sc);
599 	mutex_exit(&sc->sc_rx_lock);
600 
601 	if (sc->sc_backend_status == BEST_SUSPENDED) {
602 		xenbus_device_resume(sc->sc_xbusd);
603 	}
604 
605 	sc->sc_backend_status = BEST_CONNECTED;
606 
607 	return true;
608 
609 abort_transaction:
610 	xenbus_transaction_end(xbt, 1);
611 	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
612 	return false;
613 }
614 
615 static bool
616 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual)
617 {
618 	int s;
619 	struct xennet_xenbus_softc *sc = device_private(dev);
620 
621 	/*
622 	 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(),
623 	 * so we do not mask event channel here
624 	 */
625 
626 	s = splnet();
627 	/* process any outstanding TX responses, then collect RX packets */
628 	xennet_handler(sc);
629 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
630 		/* XXXSMP */
631 		tsleep(xennet_xenbus_suspend, PRIBIO, "xnet_suspend", hz/2);
632 		xennet_handler(sc);
633 	}
634 
635 	/*
636 	 * dom0 may still use references to the grants we gave away
637 	 * earlier during RX buffers allocation. So we do not free RX buffers
638 	 * here, as dom0 does not expect the guest domain to suddenly revoke
639 	 * access to these grants.
640 	 */
641 
642 	sc->sc_backend_status = BEST_SUSPENDED;
643 	intr_disestablish(sc->sc_ih);
644 
645 	splx(s);
646 
647 	xenbus_device_suspend(sc->sc_xbusd);
648 	aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
649 
650 	return true;
651 }
652 
653 static void xennet_backend_changed(void *arg, XenbusState new_state)
654 {
655 	struct xennet_xenbus_softc *sc = device_private((device_t)arg);
656 	DPRINTF(("%s: new backend state %d\n",
657 	    device_xname(sc->sc_dev), new_state));
658 
659 	switch (new_state) {
660 	case XenbusStateInitialising:
661 	case XenbusStateInitialised:
662 	case XenbusStateConnected:
663 		break;
664 	case XenbusStateClosing:
665 		sc->sc_backend_status = BEST_CLOSED;
666 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
667 		break;
668 	case XenbusStateInitWait:
669 		if (sc->sc_backend_status == BEST_CONNECTED)
670 			break;
671 		if (xennet_talk_to_backend(sc))
672 			xenbus_switch_state(sc->sc_xbusd, NULL,
673 			    XenbusStateConnected);
674 		break;
675 	case XenbusStateUnknown:
676 	default:
677 		panic("bad backend state %d", new_state);
678 	}
679 }
680 
681 /*
682  * Allocate RX buffers and put the associated request structures
683  * in the ring. This allows the backend to use them to communicate with
684  * frontend when some data is destined to frontend
685  */
686 
687 static void
688 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
689 {
690 	RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
691 	RING_IDX i;
692 	struct xennet_rxreq *req;
693 	struct xen_memory_reservation reservation;
694 	int s, otherend_id, notify;
695 
696 	otherend_id = sc->sc_xbusd->xbusd_otherend_id;
697 
698 	KASSERT(mutex_owned(&sc->sc_rx_lock));
699 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
700 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
701 		KASSERT(req != NULL);
702 		KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
703 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
704 		    req->rxreq_id;
705 
706 		switch (sc->sc_rx_feature) {
707 		case FEATURE_RX_COPY:
708 			if (xengnt_grant_access(otherend_id,
709 			    xpmap_ptom_masked(req->rxreq_pa),
710 			    0, &req->rxreq_gntref) != 0) {
711 				goto out_loop;
712 			}
713 			break;
714 		case FEATURE_RX_FLIP:
715 			if (xengnt_grant_transfer(otherend_id,
716 			    &req->rxreq_gntref) != 0) {
717 				goto out_loop;
718 			}
719 			break;
720 		default:
721 			panic("%s: unsupported RX feature mode: %ld\n",
722 			    __func__, sc->sc_rx_feature);
723 		}
724 
725 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
726 		    req->rxreq_gntref;
727 
728 		SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
729 		sc->sc_free_rxreql--;
730 
731 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
732 			/* unmap the page */
733 			MULTI_update_va_mapping(&rx_mcl[i],
734 			    req->rxreq_va, 0, 0);
735 			/*
736 			 * Remove this page from pseudo phys map before
737 			 * passing back to Xen.
738 			 */
739 			xennet_pages[i] =
740 			    xpmap_ptom(req->rxreq_pa) >> PAGE_SHIFT;
741 			xpmap_ptom_unmap(req->rxreq_pa);
742 		}
743 	}
744 
745 out_loop:
746 	if (i == 0) {
747 		return;
748 	}
749 
750 	if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
751 		/* also make sure to flush all TLB entries */
752 		rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
753 		    UVMF_TLB_FLUSH | UVMF_ALL;
754 		/*
755 		 * We may have allocated buffers which have entries
756 		 * outstanding in the page update queue -- make sure we flush
757 		 * those first!
758 		 */
759 		s = splvm(); /* XXXSMP */
760 		xpq_flush_queue();
761 		splx(s);
762 		/* now decrease reservation */
763 		set_xen_guest_handle(reservation.extent_start, xennet_pages);
764 		reservation.nr_extents = i;
765 		reservation.extent_order = 0;
766 		reservation.address_bits = 0;
767 		reservation.domid = DOMID_SELF;
768 		rx_mcl[i].op = __HYPERVISOR_memory_op;
769 		rx_mcl[i].args[0] = XENMEM_decrease_reservation;
770 		rx_mcl[i].args[1] = (unsigned long)&reservation;
771 		HYPERVISOR_multicall(rx_mcl, i+1);
772 		if (__predict_false(rx_mcl[i].result != i)) {
773 			panic("xennet_alloc_rx_buffer: "
774 			    "XENMEM_decrease_reservation");
775 		}
776 	}
777 
778 	sc->sc_rx_ring.req_prod_pvt = req_prod + i;
779 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
780 	if (notify)
781 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
782 	return;
783 }
784 
785 /*
786  * Reclaim all RX buffers used by the I/O ring between frontend and backend
787  */
788 static void
789 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc)
790 {
791 	paddr_t ma, pa;
792 	vaddr_t va;
793 	RING_IDX i;
794 	mmu_update_t mmu[1];
795 	multicall_entry_t mcl[2];
796 
797 	mutex_enter(&sc->sc_rx_lock);
798 
799 	DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
800 	/* get back memory from RX ring */
801 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
802 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
803 
804 		if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
805 			/*
806 			 * this req is still granted. Get back the page or
807 			 * allocate a new one, and remap it.
808 			 */
809 			SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
810 			    rxreq_next);
811 			sc->sc_free_rxreql++;
812 
813 			switch (sc->sc_rx_feature) {
814 			case FEATURE_RX_COPY:
815 				xengnt_revoke_access(rxreq->rxreq_gntref);
816 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
817 				break;
818 			case FEATURE_RX_FLIP:
819 				ma = xengnt_revoke_transfer(
820 				    rxreq->rxreq_gntref);
821 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
822 				if (ma == 0) {
823 					u_long pfn;
824 					struct xen_memory_reservation xenres;
825 					/*
826 					 * transfer not complete, we lost the page.
827 					 * Get one from hypervisor
828 					 */
829 					set_xen_guest_handle(
830 					    xenres.extent_start, &pfn);
831 					xenres.nr_extents = 1;
832 					xenres.extent_order = 0;
833 					xenres.address_bits = 31;
834 					xenres.domid = DOMID_SELF;
835 					if (HYPERVISOR_memory_op(
836 					    XENMEM_increase_reservation, &xenres) < 0) {
837 						panic("xennet_free_rx_buffer: "
838 						    "can't get memory back");
839 					}
840 					ma = pfn;
841 					KASSERT(ma != 0);
842 				}
843 				pa = rxreq->rxreq_pa;
844 				va = rxreq->rxreq_va;
845 				/* remap the page */
846 				mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
847 				mmu[0].val = pa >> PAGE_SHIFT;
848 				MULTI_update_va_mapping(&mcl[0], va,
849 				    (ma << PAGE_SHIFT) | PG_V | PG_KW | xpmap_pg_nx,
850 				    UVMF_TLB_FLUSH|UVMF_ALL);
851 				xpmap_ptom_map(pa, ptoa(ma));
852 				mcl[1].op = __HYPERVISOR_mmu_update;
853 				mcl[1].args[0] = (unsigned long)mmu;
854 				mcl[1].args[1] = 1;
855 				mcl[1].args[2] = 0;
856 				mcl[1].args[3] = DOMID_SELF;
857 				HYPERVISOR_multicall(mcl, 2);
858 				break;
859 			default:
860 				panic("%s: unsupported RX feature mode: %ld\n",
861 				    __func__, sc->sc_rx_feature);
862 			}
863 		}
864 
865 	}
866 	mutex_exit(&sc->sc_rx_lock);
867 	DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
868 }
869 
870 /*
871  * Clears a used RX request when its associated mbuf has been processed
872  */
873 static void
874 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
875 {
876 	int s = splnet();
877 	KASSERT(buf == m->m_ext.ext_buf);
878 	KASSERT(arg == NULL);
879 	KASSERT(m != NULL);
880 	vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
881 	pool_cache_put_paddr(if_xennetrxbuf_cache,
882 	    (void *)va, m->m_ext.ext_paddr);
883 	pool_cache_put(mb_cache, m);
884 	splx(s);
885 };
886 
887 static void
888 xennet_rx_free_req(struct xennet_rxreq *req)
889 {
890 	struct xennet_xenbus_softc *sc = req->rxreq_sc;
891 
892 	KASSERT(mutex_owned(&sc->sc_rx_lock));
893 
894 	/* puts back the RX request in the list of free RX requests */
895 	SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
896 	sc->sc_free_rxreql++;
897 
898 	/*
899 	 * ring needs more requests to be pushed in, allocate some
900 	 * RX buffers to catch-up with backend's consumption
901 	 */
902 	req->rxreq_gntref = GRANT_INVALID_REF;
903 
904 	if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
905 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
906 		xennet_alloc_rx_buffer(sc);
907 	}
908 }
909 
910 /*
911  * Process responses associated to the TX mbufs sent previously through
912  * xennet_softstart()
913  * Called at splnet.
914  */
915 static void
916 xennet_tx_complete(struct xennet_xenbus_softc *sc)
917 {
918 	struct xennet_txreq *req;
919 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
920 	RING_IDX resp_prod, i;
921 
922 	DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
923 	    sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
924 
925 again:
926 	resp_prod = sc->sc_tx_ring.sring->rsp_prod;
927 	xen_rmb();
928 	mutex_enter(&sc->sc_tx_lock);
929 	for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
930 		req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
931 		KASSERT(req->txreq_id ==
932 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
933 		if (__predict_false(xengnt_status(req->txreq_gntref))) {
934 			aprint_verbose_dev(sc->sc_dev,
935 			    "grant still used by backend\n");
936 			sc->sc_tx_ring.rsp_cons = i;
937 			goto end;
938 		}
939 		if (__predict_false(
940 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
941 		    NETIF_RSP_OKAY))
942 			ifp->if_oerrors++;
943 		else
944 			ifp->if_opackets++;
945 		xengnt_revoke_access(req->txreq_gntref);
946 		m_freem(req->txreq_m);
947 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
948 	}
949 	mutex_exit(&sc->sc_tx_lock);
950 
951 	sc->sc_tx_ring.rsp_cons = resp_prod;
952 	/* set new event and check for race with rsp_cons update */
953 	sc->sc_tx_ring.sring->rsp_event =
954 	    resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
955 	ifp->if_timer = 0;
956 	xen_wmb();
957 	if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
958 		goto again;
959 end:
960 	if (ifp->if_flags & IFF_OACTIVE) {
961 		ifp->if_flags &= ~IFF_OACTIVE;
962 		softint_schedule(sc->sc_softintr);
963 	}
964 }
965 
966 /*
967  * Xennet event handler.
968  * Get outstanding responses of TX packets, then collect all responses of
969  * pending RX packets
970  * Called at splnet.
971  */
972 static int
973 xennet_handler(void *arg)
974 {
975 	struct xennet_xenbus_softc *sc = arg;
976 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
977 	RING_IDX resp_prod, i;
978 	struct xennet_rxreq *req;
979 	paddr_t ma, pa;
980 	vaddr_t va;
981 	mmu_update_t mmu[1];
982 	multicall_entry_t mcl[2];
983 	struct mbuf *m;
984 	void *pktp;
985 	int more_to_do;
986 
987 	if (sc->sc_backend_status != BEST_CONNECTED)
988 		return 1;
989 
990 	xennet_tx_complete(sc);
991 
992 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
993 
994 again:
995 	DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
996 	    sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
997 
998 	mutex_enter(&sc->sc_rx_lock);
999 	resp_prod = sc->sc_rx_ring.sring->rsp_prod;
1000 	xen_rmb(); /* ensure we see replies up to resp_prod */
1001 
1002 	for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
1003 		netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
1004 		req = &sc->sc_rxreqs[rx->id];
1005 		KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
1006 		KASSERT(req->rxreq_id == rx->id);
1007 
1008 		ma = 0;
1009 		switch (sc->sc_rx_feature) {
1010 		case FEATURE_RX_COPY:
1011 			xengnt_revoke_access(req->rxreq_gntref);
1012 			break;
1013 		case FEATURE_RX_FLIP:
1014 			ma = xengnt_revoke_transfer(req->rxreq_gntref);
1015 			if (ma == 0) {
1016 				DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
1017 				/*
1018 				 * the remote could't send us a packet.
1019 				 * we can't free this rxreq as no page will be mapped
1020 				 * here. Instead give it back immediatly to backend.
1021 				 */
1022 				ifp->if_ierrors++;
1023 				RING_GET_REQUEST(&sc->sc_rx_ring,
1024 				    sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
1025 				RING_GET_REQUEST(&sc->sc_rx_ring,
1026 				    sc->sc_rx_ring.req_prod_pvt)->gref =
1027 					req->rxreq_gntref;
1028 				sc->sc_rx_ring.req_prod_pvt++;
1029 				RING_PUSH_REQUESTS(&sc->sc_rx_ring);
1030 				continue;
1031 			}
1032 			break;
1033 		default:
1034 			panic("%s: unsupported RX feature mode: %ld\n",
1035 			    __func__, sc->sc_rx_feature);
1036 		}
1037 
1038 		pa = req->rxreq_pa;
1039 		va = req->rxreq_va;
1040 
1041 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
1042 			/* remap the page */
1043 			mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
1044 			mmu[0].val = pa >> PAGE_SHIFT;
1045 			MULTI_update_va_mapping(&mcl[0], va,
1046 			    (ma << PAGE_SHIFT) | PG_V | PG_KW | xpmap_pg_nx,
1047 			    UVMF_TLB_FLUSH|UVMF_ALL);
1048 			xpmap_ptom_map(pa, ptoa(ma));
1049 			mcl[1].op = __HYPERVISOR_mmu_update;
1050 			mcl[1].args[0] = (unsigned long)mmu;
1051 			mcl[1].args[1] = 1;
1052 			mcl[1].args[2] = 0;
1053 			mcl[1].args[3] = DOMID_SELF;
1054 			HYPERVISOR_multicall(mcl, 2);
1055 		}
1056 
1057 		pktp = (void *)(va + rx->offset);
1058 #ifdef XENNET_DEBUG_DUMP
1059 		xennet_hex_dump(pktp, rx->status, "r", rx->id);
1060 #endif
1061 		if ((ifp->if_flags & IFF_PROMISC) == 0) {
1062 			struct ether_header *eh = pktp;
1063 			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
1064 			    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
1065 			    ETHER_ADDR_LEN) != 0) {
1066 				DPRINTFN(XEDB_EVENT,
1067 				    ("xennet_handler bad dest\n"));
1068 				/* packet not for us */
1069 				xennet_rx_free_req(req);
1070 				continue;
1071 			}
1072 		}
1073 		MGETHDR(m, M_DONTWAIT, MT_DATA);
1074 		if (__predict_false(m == NULL)) {
1075 			printf("%s: rx no mbuf\n", ifp->if_xname);
1076 			ifp->if_ierrors++;
1077 			xennet_rx_free_req(req);
1078 			continue;
1079 		}
1080 		MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
1081 
1082 		m_set_rcvif(m, ifp);
1083 		if (rx->status <= MHLEN) {
1084 			/* small packet; copy to mbuf data area */
1085 			m_copyback(m, 0, rx->status, pktp);
1086 			KASSERT(m->m_pkthdr.len == rx->status);
1087 			KASSERT(m->m_len == rx->status);
1088 		} else {
1089 			/* large packet; attach buffer to mbuf */
1090 			req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
1091 			    if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
1092 			if (__predict_false(req->rxreq_va == 0)) {
1093 				printf("%s: rx no buf\n", ifp->if_xname);
1094 				ifp->if_ierrors++;
1095 				req->rxreq_va = va;
1096 				req->rxreq_pa = pa;
1097 				xennet_rx_free_req(req);
1098 				m_freem(m);
1099 				continue;
1100 			}
1101 			m->m_len = m->m_pkthdr.len = rx->status;
1102 			MEXTADD(m, pktp, rx->status,
1103 			    M_DEVBUF, xennet_rx_mbuf_free, NULL);
1104 			m->m_ext.ext_paddr = pa;
1105 			m->m_flags |= M_EXT_RW; /* we own the buffer */
1106 		}
1107 		if ((rx->flags & NETRXF_csum_blank) != 0) {
1108 			xennet_checksum_fill(&m);
1109 			if (m == NULL) {
1110 				ifp->if_ierrors++;
1111 				xennet_rx_free_req(req);
1112 				continue;
1113 			}
1114 		}
1115 		/* free req may overwrite *rx, better doing it late */
1116 		xennet_rx_free_req(req);
1117 
1118 		/* Pass the packet up. */
1119 		if_percpuq_enqueue(ifp->if_percpuq, m);
1120 	}
1121 	xen_rmb();
1122 	sc->sc_rx_ring.rsp_cons = i;
1123 	RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
1124 	mutex_exit(&sc->sc_rx_lock);
1125 
1126 	if (more_to_do)
1127 		goto again;
1128 
1129 	return 1;
1130 }
1131 
1132 /*
1133  * The output routine of a xennet interface
1134  * Called at splnet.
1135  */
1136 void
1137 xennet_start(struct ifnet *ifp)
1138 {
1139 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1140 
1141 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", device_xname(sc->sc_dev)));
1142 
1143 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
1144 
1145 	xennet_tx_complete(sc);
1146 
1147 	if (__predict_false(
1148 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING))
1149 		return;
1150 
1151 	/*
1152 	 * The Xen communication channel is much more efficient if we can
1153 	 * schedule batch of packets for domain0. To achieve this, we
1154 	 * schedule a soft interrupt, and just return. This way, the network
1155 	 * stack will enqueue all pending mbufs in the interface's send queue
1156 	 * before it is processed by xennet_softstart().
1157 	 */
1158 	softint_schedule(sc->sc_softintr);
1159 	return;
1160 }
1161 
1162 /*
1163  * Prepares mbufs for TX, and notify backend when finished
1164  * Called at splsoftnet
1165  */
1166 void
1167 xennet_softstart(void *arg)
1168 {
1169 	struct xennet_xenbus_softc *sc = arg;
1170 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1171 	struct mbuf *m, *new_m;
1172 	netif_tx_request_t *txreq;
1173 	RING_IDX req_prod;
1174 	paddr_t pa, pa2;
1175 	struct xennet_txreq *req;
1176 	int notify;
1177 	int do_notify = 0;
1178 
1179 	mutex_enter(&sc->sc_tx_lock);
1180 	if (__predict_false(
1181 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)) {
1182 		mutex_exit(&sc->sc_tx_lock);
1183 		return;
1184 	}
1185 
1186 	req_prod = sc->sc_tx_ring.req_prod_pvt;
1187 	while (/*CONSTCOND*/1) {
1188 		uint16_t txflags;
1189 
1190 		req = SLIST_FIRST(&sc->sc_txreq_head);
1191 		if (__predict_false(req == NULL)) {
1192 			ifp->if_flags |= IFF_OACTIVE;
1193 			break;
1194 		}
1195 		IFQ_POLL(&ifp->if_snd, m);
1196 		if (m == NULL)
1197 			break;
1198 
1199 		switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
1200 		case M_EXT|M_EXT_CLUSTER:
1201 			KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
1202 			pa = m->m_ext.ext_paddr +
1203 				(m->m_data - m->m_ext.ext_buf);
1204 			break;
1205 		case 0:
1206 			KASSERT(m->m_paddr != M_PADDR_INVALID);
1207 			pa = m->m_paddr + M_BUFOFFSET(m) +
1208 				(m->m_data - M_BUFADDR(m));
1209 			break;
1210 		default:
1211 			if (__predict_false(
1212 			    !pmap_extract(pmap_kernel(), (vaddr_t)m->m_data,
1213 			    &pa))) {
1214 				panic("xennet_start: no pa");
1215 			}
1216 			break;
1217 		}
1218 
1219 		if ((m->m_pkthdr.csum_flags &
1220 		    (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1221 			txflags = NETTXF_csum_blank;
1222 		} else {
1223 			txflags = 0;
1224 		}
1225 
1226 		if (m->m_pkthdr.len != m->m_len ||
1227 		    (pa ^ (pa + m->m_pkthdr.len - 1)) & PG_FRAME) {
1228 
1229 			MGETHDR(new_m, M_DONTWAIT, MT_DATA);
1230 			if (__predict_false(new_m == NULL)) {
1231 				printf("%s: cannot allocate new mbuf\n",
1232 				       device_xname(sc->sc_dev));
1233 				break;
1234 			}
1235 			if (m->m_pkthdr.len > MHLEN) {
1236 				MCLGET(new_m, M_DONTWAIT);
1237 				if (__predict_false(
1238 				    (new_m->m_flags & M_EXT) == 0)) {
1239 					DPRINTF(("%s: no mbuf cluster\n",
1240 					    device_xname(sc->sc_dev)));
1241 					m_freem(new_m);
1242 					break;
1243 				}
1244 			}
1245 
1246 			m_copydata(m, 0, m->m_pkthdr.len, mtod(new_m, void *));
1247 			new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
1248 
1249 			if ((new_m->m_flags & M_EXT) != 0) {
1250 				pa = new_m->m_ext.ext_paddr;
1251 				KASSERT(new_m->m_data == new_m->m_ext.ext_buf);
1252 				KASSERT(pa != M_PADDR_INVALID);
1253 			} else {
1254 				pa = new_m->m_paddr;
1255 				KASSERT(pa != M_PADDR_INVALID);
1256 				KASSERT(new_m->m_data == M_BUFADDR(new_m));
1257 				pa += M_BUFOFFSET(new_m);
1258 			}
1259 			if (__predict_false(xengnt_grant_access(
1260 			    sc->sc_xbusd->xbusd_otherend_id,
1261 			    xpmap_ptom_masked(pa),
1262 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1263 				m_freem(new_m);
1264 				ifp->if_flags |= IFF_OACTIVE;
1265 				break;
1266 			}
1267 			/* we will be able to send new_m */
1268 			IFQ_DEQUEUE(&ifp->if_snd, m);
1269 			m_freem(m);
1270 			m = new_m;
1271 		} else {
1272 			if (__predict_false(xengnt_grant_access(
1273 			    sc->sc_xbusd->xbusd_otherend_id,
1274 			    xpmap_ptom_masked(pa),
1275 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1276 				ifp->if_flags |= IFF_OACTIVE;
1277 				break;
1278 			}
1279 			/* we will be able to send m */
1280 			IFQ_DEQUEUE(&ifp->if_snd, m);
1281 		}
1282 		MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1283 
1284 		KASSERT(((pa ^ (pa + m->m_pkthdr.len -  1)) & PG_FRAME) == 0);
1285 
1286 		SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1287 		req->txreq_m = m;
1288 
1289 		DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1290 		    "mbuf %p, buf %p/%p/%p, size %d\n",
1291 		    req->txreq_id, m, mtod(m, void *), (void *)pa,
1292 		    (void *)xpmap_ptom_masked(pa), m->m_pkthdr.len));
1293 		pmap_extract_ma(pmap_kernel(), mtod(m, vaddr_t), &pa2);
1294 		DPRINTFN(XEDB_MBUF, ("xennet_start pa %p ma %p/%p\n",
1295 		    (void *)pa, (void *)xpmap_ptom_masked(pa), (void *)pa2));
1296 #ifdef XENNET_DEBUG_DUMP
1297 		xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1298 			       	req->txreq_id);
1299 #endif
1300 
1301 		txreq = RING_GET_REQUEST(&sc->sc_tx_ring, req_prod);
1302 		txreq->id = req->txreq_id;
1303 		txreq->gref = req->txreq_gntref;
1304 		txreq->offset = pa & ~PG_FRAME;
1305 		txreq->size = m->m_pkthdr.len;
1306 		txreq->flags = txflags;
1307 
1308 		req_prod++;
1309 		sc->sc_tx_ring.req_prod_pvt = req_prod;
1310 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1311 		if (notify)
1312 			do_notify = 1;
1313 
1314 #ifdef XENNET_DEBUG
1315 		DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, "
1316 		    "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *),
1317 		    (void *)*kvtopte(mtod(m, vaddr_t)),
1318 		    (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))),
1319 		    (void *)m->m_paddr, m->m_pkthdr.len, m->m_len));
1320 		DPRINTFN(XEDB_MEM, ("id %d gref %d offset %d size %d flags %d"
1321 		    " prod %d\n",
1322 		    txreq->id, txreq->gref, txreq->offset, txreq->size,
1323 		    txreq->flags, req_prod));
1324 #endif
1325 
1326 		/*
1327 		 * Pass packet to bpf if there is a listener.
1328 		 */
1329 		bpf_mtap(ifp, m, BPF_D_OUT);
1330 	}
1331 
1332 	if (do_notify) {
1333 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1334 		ifp->if_timer = 5;
1335 	}
1336 
1337 	mutex_exit(&sc->sc_tx_lock);
1338 
1339 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1340 	    device_xname(sc->sc_dev)));
1341 }
1342 
1343 int
1344 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1345 {
1346 #ifdef XENNET_DEBUG
1347 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1348 #endif
1349 	int s, error = 0;
1350 
1351 	s = splnet();
1352 
1353 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1354 	    device_xname(sc->sc_dev)));
1355 	error = ether_ioctl(ifp, cmd, data);
1356 	if (error == ENETRESET)
1357 		error = 0;
1358 	splx(s);
1359 
1360 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1361 	    device_xname(sc->sc_dev), error));
1362 
1363 	return error;
1364 }
1365 
1366 void
1367 xennet_watchdog(struct ifnet *ifp)
1368 {
1369 	aprint_verbose_ifnet(ifp, "xennet_watchdog\n");
1370 }
1371 
1372 int
1373 xennet_init(struct ifnet *ifp)
1374 {
1375 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1376 	mutex_enter(&sc->sc_rx_lock);
1377 
1378 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1379 	    device_xname(sc->sc_dev)));
1380 
1381 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1382 		sc->sc_rx_ring.sring->rsp_event =
1383 		    sc->sc_rx_ring.rsp_cons + 1;
1384 		hypervisor_enable_event(sc->sc_evtchn);
1385 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1386 		xennet_reset(sc);
1387 	}
1388 	ifp->if_flags |= IFF_RUNNING;
1389 	ifp->if_flags &= ~IFF_OACTIVE;
1390 	ifp->if_timer = 0;
1391 	mutex_exit(&sc->sc_rx_lock);
1392 	return 0;
1393 }
1394 
1395 void
1396 xennet_stop(struct ifnet *ifp, int disable)
1397 {
1398 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1399 
1400 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1401 	hypervisor_mask_event(sc->sc_evtchn);
1402 	xennet_reset(sc);
1403 }
1404 
1405 void
1406 xennet_reset(struct xennet_xenbus_softc *sc)
1407 {
1408 
1409 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n",
1410 	    device_xname(sc->sc_dev)));
1411 }
1412 
1413 #if defined(NFS_BOOT_BOOTSTATIC)
1414 int
1415 xennet_bootstatic_callback(struct nfs_diskless *nd)
1416 {
1417 #if 0
1418 	struct ifnet *ifp = nd->nd_ifp;
1419 	struct xennet_xenbus_softc *sc =
1420 	    (struct xennet_xenbus_softc *)ifp->if_softc;
1421 #endif
1422 	int flags = 0;
1423 	union xen_cmdline_parseinfo xcp;
1424 	struct sockaddr_in *sin;
1425 
1426 	memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1427 	xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1428 	xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1429 	xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1430 
1431 	if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1432 		flags |= NFS_BOOT_HAS_SERVER;
1433 		if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1434 			flags |= NFS_BOOT_HAS_ROOTPATH;
1435 	}
1436 
1437 	nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1438 	nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1439 	nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1440 
1441 	sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1442 	memset((void *)sin, 0, sizeof(*sin));
1443 	sin->sin_len = sizeof(*sin);
1444 	sin->sin_family = AF_INET;
1445 	sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1446 
1447 	if (nd->nd_myip.s_addr)
1448 		flags |= NFS_BOOT_HAS_MYIP;
1449 	if (nd->nd_gwip.s_addr)
1450 		flags |= NFS_BOOT_HAS_GWIP;
1451 	if (nd->nd_mask.s_addr)
1452 		flags |= NFS_BOOT_HAS_MASK;
1453 	if (sin->sin_addr.s_addr)
1454 		flags |= NFS_BOOT_HAS_SERVADDR;
1455 
1456 	return flags;
1457 }
1458 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
1459 
1460 #ifdef XENNET_DEBUG_DUMP
1461 #define XCHR(x) hexdigits[(x) & 0xf]
1462 static void
1463 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1464 {
1465 	size_t i, j;
1466 
1467 	printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
1468 	printf("00000000  ");
1469 	for(i=0; i<len; i++) {
1470 		printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1471 		if ((i+1) % 16 == 8)
1472 			printf(" ");
1473 		if ((i+1) % 16 == 0) {
1474 			printf(" %c", '|');
1475 			for(j=0; j<16; j++)
1476 				printf("%c", pkt[i-15+j]>=32 &&
1477 				    pkt[i-15+j]<127?pkt[i-15+j]:'.');
1478 			printf("%c\n%c%c%c%c%c%c%c%c  ", '|',
1479 			    XCHR((i+1)>>28), XCHR((i+1)>>24),
1480 			    XCHR((i+1)>>20), XCHR((i+1)>>16),
1481 			    XCHR((i+1)>>12), XCHR((i+1)>>8),
1482 			    XCHR((i+1)>>4), XCHR(i+1));
1483 		}
1484 	}
1485 	printf("\n");
1486 }
1487 #undef XCHR
1488 #endif
1489