xref: /netbsd-src/sys/arch/xen/xen/if_xennet_xenbus.c (revision e89934bbf778a6d6d6894877c4da59d0c7835b0f)
1 /*      $NetBSD: if_xennet_xenbus.c,v 1.69 2016/12/15 09:28:04 ozaki-r Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  */
27 
28 /*
29  * Copyright (c) 2004 Christian Limpach.
30  * All rights reserved.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  *
41  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
42  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
44  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
45  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
46  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
50  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51  */
52 
53 /*
54  * This file contains the xennet frontend code required for the network
55  * communication between two Xen domains.
56  * It ressembles xbd, but is a little more complex as it must deal with two
57  * rings:
58  * - the TX ring, to transmit packets to backend (inside => outside)
59  * - the RX ring, to receive packets from backend (outside => inside)
60  *
61  * Principles are following.
62  *
63  * For TX:
64  * Purpose is to transmit packets to the outside. The start of day is in
65  * xennet_start() (default output routine of xennet) that schedules a softint,
66  * xennet_softstart(). xennet_softstart() generates the requests associated
67  * to the TX mbufs queued (see altq(9)).
68  * The backend's responses are processed by xennet_tx_complete(), called either
69  * from:
70  * - xennet_start()
71  * - xennet_handler(), during an asynchronous event notification from backend
72  *   (similar to an IRQ).
73  *
74  * for RX:
75  * Purpose is to process the packets received from the outside. RX buffers
76  * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
77  * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
78  * preparation for incoming packets from backend.
79  * When RX packets need to be processed, backend takes the requests previously
80  * offered by frontend and pushes the associated responses inside the I/O ring.
81  * When done, it notifies frontend through an event notification, which will
82  * asynchronously call xennet_handler() in frontend.
83  * xennet_handler() processes the responses, generates the associated mbuf, and
84  * passes it to the MI layer for further processing.
85  */
86 
87 #include <sys/cdefs.h>
88 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.69 2016/12/15 09:28:04 ozaki-r Exp $");
89 
90 #include "opt_xen.h"
91 #include "opt_nfs_boot.h"
92 
93 #include <sys/param.h>
94 #include <sys/device.h>
95 #include <sys/conf.h>
96 #include <sys/kernel.h>
97 #include <sys/proc.h>
98 #include <sys/systm.h>
99 #include <sys/intr.h>
100 #include <sys/rndsource.h>
101 
102 #include <net/if.h>
103 #include <net/if_dl.h>
104 #include <net/if_ether.h>
105 #include <net/bpf.h>
106 #include <net/bpfdesc.h>
107 
108 #if defined(NFS_BOOT_BOOTSTATIC)
109 #include <sys/fstypes.h>
110 #include <sys/mount.h>
111 #include <sys/statvfs.h>
112 #include <netinet/in.h>
113 #include <nfs/rpcv2.h>
114 #include <nfs/nfsproto.h>
115 #include <nfs/nfs.h>
116 #include <nfs/nfsmount.h>
117 #include <nfs/nfsdiskless.h>
118 #include <xen/if_xennetvar.h>
119 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
120 
121 #include <xen/xennet_checksum.h>
122 
123 #include <uvm/uvm.h>
124 
125 #include <xen/hypervisor.h>
126 #include <xen/evtchn.h>
127 #include <xen/granttables.h>
128 #include <xen/xen-public/io/netif.h>
129 #include <xen/xenpmap.h>
130 
131 #include <xen/xenbus.h>
132 #include "locators.h"
133 
134 #undef XENNET_DEBUG_DUMP
135 #undef XENNET_DEBUG
136 #ifdef XENNET_DEBUG
137 #define XEDB_FOLLOW     0x01
138 #define XEDB_INIT       0x02
139 #define XEDB_EVENT      0x04
140 #define XEDB_MBUF       0x08
141 #define XEDB_MEM        0x10
142 int xennet_debug = 0xff;
143 #define DPRINTF(x) if (xennet_debug) printf x;
144 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
145 #else
146 #define DPRINTF(x)
147 #define DPRINTFN(n,x)
148 #endif
149 
150 #define GRANT_INVALID_REF -1 /* entry is free */
151 
152 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
153 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
154 
155 struct xennet_txreq {
156 	SLIST_ENTRY(xennet_txreq) txreq_next;
157 	uint16_t txreq_id; /* ID passed to backend */
158 	grant_ref_t txreq_gntref; /* grant ref of this request */
159 	struct mbuf *txreq_m; /* mbuf being transmitted */
160 };
161 
162 struct xennet_rxreq {
163 	SLIST_ENTRY(xennet_rxreq) rxreq_next;
164 	uint16_t rxreq_id; /* ID passed to backend */
165 	grant_ref_t rxreq_gntref; /* grant ref of this request */
166 /* va/pa for this receive buf. ma will be provided by backend */
167 	paddr_t rxreq_pa;
168 	vaddr_t rxreq_va;
169 	struct xennet_xenbus_softc *rxreq_sc; /* pointer to our interface */
170 };
171 
172 struct xennet_xenbus_softc {
173 	device_t sc_dev;
174 	struct ethercom sc_ethercom;
175 	uint8_t sc_enaddr[6];
176 	struct xenbus_device *sc_xbusd;
177 
178 	netif_tx_front_ring_t sc_tx_ring;
179 	netif_rx_front_ring_t sc_rx_ring;
180 
181 	unsigned int sc_evtchn;
182 	void *sc_softintr;
183 
184 	grant_ref_t sc_tx_ring_gntref;
185 	grant_ref_t sc_rx_ring_gntref;
186 
187 	kmutex_t sc_tx_lock; /* protects free TX list, below */
188 	kmutex_t sc_rx_lock; /* protects free RX list, below */
189 	struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
190 	struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
191 	SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
192 	SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
193 	int sc_free_rxreql; /* number of free receive request struct */
194 
195 	int sc_backend_status; /* our status with backend */
196 #define BEST_CLOSED		0
197 #define BEST_DISCONNECTED	1
198 #define BEST_CONNECTED		2
199 #define BEST_SUSPENDED		3
200 	unsigned long sc_rx_feature;
201 #define FEATURE_RX_FLIP		0
202 #define FEATURE_RX_COPY		1
203 	krndsource_t     sc_rnd_source;
204 };
205 #define SC_NLIVEREQ(sc) ((sc)->sc_rx_ring.req_prod_pvt - \
206 			    (sc)->sc_rx_ring.sring->rsp_prod)
207 
208 /* too big to be on stack */
209 static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
210 static u_long xennet_pages[NET_RX_RING_SIZE];
211 
212 static pool_cache_t if_xennetrxbuf_cache;
213 static int if_xennetrxbuf_cache_inited=0;
214 
215 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
216 static void xennet_xenbus_attach(device_t, device_t, void *);
217 static int  xennet_xenbus_detach(device_t, int);
218 static void xennet_backend_changed(void *, XenbusState);
219 
220 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
221 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *);
222 static void xennet_tx_complete(struct xennet_xenbus_softc *);
223 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
224 static void xennet_rx_free_req(struct xennet_rxreq *);
225 static int  xennet_handler(void *);
226 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
227 #ifdef XENNET_DEBUG_DUMP
228 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
229 #endif
230 
231 static int  xennet_init(struct ifnet *);
232 static void xennet_stop(struct ifnet *, int);
233 static void xennet_reset(struct xennet_xenbus_softc *);
234 static void xennet_softstart(void *);
235 static void xennet_start(struct ifnet *);
236 static int  xennet_ioctl(struct ifnet *, u_long, void *);
237 static void xennet_watchdog(struct ifnet *);
238 
239 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *);
240 static bool xennet_xenbus_resume (device_t dev, const pmf_qual_t *);
241 
242 CFATTACH_DECL_NEW(xennet, sizeof(struct xennet_xenbus_softc),
243    xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL);
244 
245 static int
246 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
247 {
248 	struct xenbusdev_attach_args *xa = aux;
249 
250 	if (strcmp(xa->xa_type, "vif") != 0)
251 		return 0;
252 
253 	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
254 	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
255 		return 0;
256 
257 	return 1;
258 }
259 
260 static void
261 xennet_xenbus_attach(device_t parent, device_t self, void *aux)
262 {
263 	struct xennet_xenbus_softc *sc = device_private(self);
264 	struct xenbusdev_attach_args *xa = aux;
265 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
266 	int err;
267 	netif_tx_sring_t *tx_ring;
268 	netif_rx_sring_t *rx_ring;
269 	RING_IDX i;
270 	char *val, *e, *p;
271 	int s;
272 	extern int ifqmaxlen; /* XXX */
273 #ifdef XENNET_DEBUG
274 	char **dir;
275 	int dir_n = 0;
276 	char id_str[20];
277 #endif
278 
279 	aprint_normal(": Xen Virtual Network Interface\n");
280 	sc->sc_dev = self;
281 
282 #ifdef XENNET_DEBUG
283 	printf("path: %s\n", xa->xa_xbusd->xbusd_path);
284 	snprintf(id_str, sizeof(id_str), "%d", xa->xa_id);
285 	err = xenbus_directory(NULL, "device/vif", id_str, &dir_n, &dir);
286 	if (err) {
287 		aprint_error_dev(self, "xenbus_directory err %d\n", err);
288 	} else {
289 		printf("%s/\n", xa->xa_xbusd->xbusd_path);
290 		for (i = 0; i < dir_n; i++) {
291 			printf("\t/%s", dir[i]);
292 			err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path,
293 				          dir[i], NULL, &val);
294 			if (err) {
295 				aprint_error_dev(self, "xenbus_read err %d\n",
296 					         err);
297 			} else {
298 				printf(" = %s\n", val);
299 				free(val, M_DEVBUF);
300 			}
301 		}
302 	}
303 #endif /* XENNET_DEBUG */
304 	sc->sc_xbusd = xa->xa_xbusd;
305 	sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
306 
307 	/* xenbus ensure 2 devices can't be probed at the same time */
308 	if (if_xennetrxbuf_cache_inited == 0) {
309 		if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
310 		    "xnfrx", NULL, IPL_VM, NULL, NULL, NULL);
311 		if_xennetrxbuf_cache_inited = 1;
312 	}
313 
314 
315 	/* initialize free RX and RX request lists */
316 	mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
317 	SLIST_INIT(&sc->sc_txreq_head);
318 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
319 		sc->sc_txreqs[i].txreq_id = i;
320 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
321 		    txreq_next);
322 	}
323 	mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
324 	SLIST_INIT(&sc->sc_rxreq_head);
325 	s = splvm();
326 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
327 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
328 		rxreq->rxreq_id = i;
329 		rxreq->rxreq_sc = sc;
330 		rxreq->rxreq_va = (vaddr_t)pool_cache_get_paddr(
331 		    if_xennetrxbuf_cache, PR_WAITOK, &rxreq->rxreq_pa);
332 		if (rxreq->rxreq_va == 0)
333 			break;
334 		rxreq->rxreq_gntref = GRANT_INVALID_REF;
335 		SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
336 	}
337 	splx(s);
338 	sc->sc_free_rxreql = i;
339 	if (sc->sc_free_rxreql == 0) {
340 		aprint_error_dev(self, "failed to allocate rx memory\n");
341 		return;
342 	}
343 
344 	/* read mac address */
345 	err = xenbus_read(NULL, xa->xa_xbusd->xbusd_path, "mac", NULL, &val);
346 	if (err) {
347 		aprint_error_dev(self, "can't read mac address, err %d\n", err);
348 		return;
349 	}
350 	for (i = 0, p = val; i < 6; i++) {
351 		sc->sc_enaddr[i] = strtoul(p, &e, 16);
352 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
353 			aprint_error_dev(self,
354 			    "%s is not a valid mac address\n", val);
355 			free(val, M_DEVBUF);
356 			return;
357 		}
358 		p = &e[1];
359 	}
360 	free(val, M_DEVBUF);
361 	aprint_normal_dev(self, "MAC address %s\n",
362 	    ether_sprintf(sc->sc_enaddr));
363 	/* Initialize ifnet structure and attach interface */
364 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
365 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
366 	ifp->if_softc = sc;
367 	ifp->if_start = xennet_start;
368 	ifp->if_ioctl = xennet_ioctl;
369 	ifp->if_watchdog = xennet_watchdog;
370 	ifp->if_init = xennet_init;
371 	ifp->if_stop = xennet_stop;
372 	ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
373 	ifp->if_timer = 0;
374 	ifp->if_snd.ifq_maxlen = max(ifqmaxlen, NET_TX_RING_SIZE * 2);
375 	ifp->if_capabilities = IFCAP_CSUM_TCPv4_Tx | IFCAP_CSUM_UDPv4_Tx;
376 	IFQ_SET_READY(&ifp->if_snd);
377 	if_attach(ifp);
378 	ether_ifattach(ifp, sc->sc_enaddr);
379 	sc->sc_softintr = softint_establish(SOFTINT_NET, xennet_softstart, sc);
380 	if (sc->sc_softintr == NULL)
381 		panic("%s: can't establish soft interrupt",
382 			device_xname(self));
383 
384 	/* alloc shared rings */
385 	tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
386 	    UVM_KMF_WIRED);
387 	rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
388 	    UVM_KMF_WIRED);
389 	if (tx_ring == NULL || rx_ring == NULL)
390 		panic("%s: can't alloc rings", device_xname(self));
391 
392 	sc->sc_tx_ring.sring = tx_ring;
393 	sc->sc_rx_ring.sring = rx_ring;
394 
395 	/* resume shared structures and tell backend that we are ready */
396 	if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) {
397 		uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE,
398 		    UVM_KMF_WIRED);
399 		uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE,
400 		    UVM_KMF_WIRED);
401 		return;
402 	}
403 
404 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
405 	    RND_TYPE_NET, RND_FLAG_DEFAULT);
406 
407 	if (!pmf_device_register(self, xennet_xenbus_suspend,
408 	    xennet_xenbus_resume))
409 		aprint_error_dev(self, "couldn't establish power handler\n");
410 	else
411 		pmf_class_network_register(self, ifp);
412 }
413 
414 static int
415 xennet_xenbus_detach(device_t self, int flags)
416 {
417 	struct xennet_xenbus_softc *sc = device_private(self);
418 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
419 	int s0, s1;
420 	RING_IDX i;
421 
422 	DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
423 	s0 = splnet();
424 	xennet_stop(ifp, 1);
425 	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
426 	/* wait for pending TX to complete, and collect pending RX packets */
427 	xennet_handler(sc);
428 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
429 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_detach", hz/2);
430 		xennet_handler(sc);
431 	}
432 	xennet_free_rx_buffer(sc);
433 
434 	s1 = splvm();
435 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
436 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
437 		uvm_km_free(kernel_map, rxreq->rxreq_va, PAGE_SIZE,
438 		    UVM_KMF_WIRED);
439 	}
440 	splx(s1);
441 
442 	ether_ifdetach(ifp);
443 	if_detach(ifp);
444 
445 	/* Unhook the entropy source. */
446 	rnd_detach_source(&sc->sc_rnd_source);
447 
448 	while (xengnt_status(sc->sc_tx_ring_gntref)) {
449 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_txref", hz/2);
450 	}
451 	xengnt_revoke_access(sc->sc_tx_ring_gntref);
452 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
453 	    UVM_KMF_WIRED);
454 	while (xengnt_status(sc->sc_rx_ring_gntref)) {
455 		tsleep(xennet_xenbus_detach, PRIBIO, "xnet_rxref", hz/2);
456 	}
457 	xengnt_revoke_access(sc->sc_rx_ring_gntref);
458 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
459 	    UVM_KMF_WIRED);
460 	softint_disestablish(sc->sc_softintr);
461 	splx(s0);
462 
463 	pmf_device_deregister(self);
464 
465 	DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
466 	return 0;
467 }
468 
469 static bool
470 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual)
471 {
472 	struct xennet_xenbus_softc *sc = device_private(dev);
473 	int error;
474 	netif_tx_sring_t *tx_ring;
475 	netif_rx_sring_t *rx_ring;
476 	paddr_t ma;
477 
478 	/* invalidate the RX and TX rings */
479 	if (sc->sc_backend_status == BEST_SUSPENDED) {
480 		/*
481 		 * Device was suspended, so ensure that access associated to
482 		 * the previous RX and TX rings are revoked.
483 		 */
484 		xengnt_revoke_access(sc->sc_tx_ring_gntref);
485 		xengnt_revoke_access(sc->sc_rx_ring_gntref);
486 	}
487 
488 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
489 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
490 
491 	tx_ring = sc->sc_tx_ring.sring;
492 	rx_ring = sc->sc_rx_ring.sring;
493 
494 	/* Initialize rings */
495 	memset(tx_ring, 0, PAGE_SIZE);
496 	SHARED_RING_INIT(tx_ring);
497 	FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
498 
499 	memset(rx_ring, 0, PAGE_SIZE);
500 	SHARED_RING_INIT(rx_ring);
501 	FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
502 
503 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
504 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
505 	if (error)
506 		goto abort_resume;
507 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
508 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
509 	if (error)
510 		goto abort_resume;
511 	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
512 	if (error)
513 		goto abort_resume;
514 	aprint_verbose_dev(dev, "using event channel %d\n",
515 	    sc->sc_evtchn);
516 	event_set_handler(sc->sc_evtchn, &xennet_handler, sc,
517 	    IPL_NET, device_xname(dev));
518 	return true;
519 
520 abort_resume:
521 	xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
522 	return false;
523 }
524 
525 static bool
526 xennet_talk_to_backend(struct xennet_xenbus_softc *sc)
527 {
528 	int error;
529 	unsigned long rx_copy;
530 	struct xenbus_transaction *xbt;
531 	const char *errmsg;
532 
533 	error = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
534 	    "feature-rx-copy", &rx_copy, 10);
535 	if (error)
536 		rx_copy = 0; /* default value if key is absent */
537 
538 	if (rx_copy == 1) {
539 		aprint_normal_dev(sc->sc_dev, "using RX copy mode\n");
540 		sc->sc_rx_feature = FEATURE_RX_COPY;
541 	} else {
542 		aprint_normal_dev(sc->sc_dev, "using RX flip mode\n");
543 		sc->sc_rx_feature = FEATURE_RX_FLIP;
544 	}
545 
546 again:
547 	xbt = xenbus_transaction_start();
548 	if (xbt == NULL)
549 		return false;
550 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
551 	    "vifname", "%s", device_xname(sc->sc_dev));
552 	if (error) {
553 		errmsg = "vifname";
554 		goto abort_transaction;
555 	}
556 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
557 	    "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
558 	if (error) {
559 		errmsg = "writing tx ring-ref";
560 		goto abort_transaction;
561 	}
562 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
563 	    "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
564 	if (error) {
565 		errmsg = "writing rx ring-ref";
566 		goto abort_transaction;
567 	}
568 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
569 	    "request-rx-copy", "%lu", rx_copy);
570 	if (error) {
571 		errmsg = "writing request-rx-copy";
572 		goto abort_transaction;
573 	}
574 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
575 	    "feature-rx-notify", "%u", 1);
576 	if (error) {
577 		errmsg = "writing feature-rx-notify";
578 		goto abort_transaction;
579 	}
580 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
581 	    "event-channel", "%u", sc->sc_evtchn);
582 	if (error) {
583 		errmsg = "writing event channel";
584 		goto abort_transaction;
585 	}
586 	error = xenbus_transaction_end(xbt, 0);
587 	if (error == EAGAIN)
588 		goto again;
589 	if (error) {
590 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
591 		return false;
592 	}
593 	mutex_enter(&sc->sc_rx_lock);
594 	xennet_alloc_rx_buffer(sc);
595 	mutex_exit(&sc->sc_rx_lock);
596 
597 	if (sc->sc_backend_status == BEST_SUSPENDED) {
598 		xenbus_device_resume(sc->sc_xbusd);
599 	}
600 
601 	sc->sc_backend_status = BEST_CONNECTED;
602 
603 	return true;
604 
605 abort_transaction:
606 	xenbus_transaction_end(xbt, 1);
607 	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
608 	return false;
609 }
610 
611 static bool
612 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual)
613 {
614 	int s;
615 	struct xennet_xenbus_softc *sc = device_private(dev);
616 
617 	/*
618 	 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(),
619 	 * so we do not mask event channel here
620 	 */
621 
622 	s = splnet();
623 	/* process any outstanding TX responses, then collect RX packets */
624 	xennet_handler(sc);
625 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
626 		tsleep(xennet_xenbus_suspend, PRIBIO, "xnet_suspend", hz/2);
627 		xennet_handler(sc);
628 	}
629 
630 	/*
631 	 * dom0 may still use references to the grants we gave away
632 	 * earlier during RX buffers allocation. So we do not free RX buffers
633 	 * here, as dom0 does not expect the guest domain to suddenly revoke
634 	 * access to these grants.
635 	 */
636 
637 	sc->sc_backend_status = BEST_SUSPENDED;
638 	event_remove_handler(sc->sc_evtchn, &xennet_handler, sc);
639 
640 	splx(s);
641 
642 	xenbus_device_suspend(sc->sc_xbusd);
643 	aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
644 
645 	return true;
646 }
647 
648 static void xennet_backend_changed(void *arg, XenbusState new_state)
649 {
650 	struct xennet_xenbus_softc *sc = device_private((device_t)arg);
651 	DPRINTF(("%s: new backend state %d\n",
652 	    device_xname(sc->sc_dev), new_state));
653 
654 	switch (new_state) {
655 	case XenbusStateInitialising:
656 	case XenbusStateInitialised:
657 	case XenbusStateConnected:
658 		break;
659 	case XenbusStateClosing:
660 		sc->sc_backend_status = BEST_CLOSED;
661 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
662 		break;
663 	case XenbusStateInitWait:
664 		if (sc->sc_backend_status == BEST_CONNECTED)
665 			break;
666 		if (xennet_talk_to_backend(sc))
667 			xenbus_switch_state(sc->sc_xbusd, NULL,
668 			    XenbusStateConnected);
669 		break;
670 	case XenbusStateUnknown:
671 	default:
672 		panic("bad backend state %d", new_state);
673 	}
674 }
675 
676 /*
677  * Allocate RX buffers and put the associated request structures
678  * in the ring. This allows the backend to use them to communicate with
679  * frontend when some data is destined to frontend
680  */
681 
682 static void
683 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
684 {
685 	RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
686 	RING_IDX i;
687 	struct xennet_rxreq *req;
688 	struct xen_memory_reservation reservation;
689 	int s, otherend_id, notify;
690 
691 	otherend_id = sc->sc_xbusd->xbusd_otherend_id;
692 
693 	KASSERT(mutex_owned(&sc->sc_rx_lock));
694 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
695 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
696 		KASSERT(req != NULL);
697 		KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
698 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->id =
699 		    req->rxreq_id;
700 
701 		switch (sc->sc_rx_feature) {
702 		case FEATURE_RX_COPY:
703 			if (xengnt_grant_access(otherend_id,
704 			    xpmap_ptom_masked(req->rxreq_pa),
705 			    0, &req->rxreq_gntref) != 0) {
706 				goto out_loop;
707 			}
708 			break;
709 		case FEATURE_RX_FLIP:
710 			if (xengnt_grant_transfer(otherend_id,
711 			    &req->rxreq_gntref) != 0) {
712 				goto out_loop;
713 			}
714 			break;
715 		default:
716 			panic("%s: unsupported RX feature mode: %ld\n",
717 			    __func__, sc->sc_rx_feature);
718 		}
719 
720 		RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i)->gref =
721 		    req->rxreq_gntref;
722 
723 		SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
724 		sc->sc_free_rxreql--;
725 
726 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
727 			/* unmap the page */
728 			MULTI_update_va_mapping(&rx_mcl[i],
729 			    req->rxreq_va, 0, 0);
730 			/*
731 			 * Remove this page from pseudo phys map before
732 			 * passing back to Xen.
733 			 */
734 			xennet_pages[i] =
735 			    xpmap_ptom(req->rxreq_pa) >> PAGE_SHIFT;
736 			xpmap_ptom_unmap(req->rxreq_pa);
737 		}
738 	}
739 
740 out_loop:
741 	if (i == 0) {
742 		return;
743 	}
744 
745 	if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
746 		/* also make sure to flush all TLB entries */
747 		rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
748 		    UVMF_TLB_FLUSH | UVMF_ALL;
749 		/*
750 		 * We may have allocated buffers which have entries
751 		 * outstanding in the page update queue -- make sure we flush
752 		 * those first!
753 		 */
754 		s = splvm();
755 		xpq_flush_queue();
756 		splx(s);
757 		/* now decrease reservation */
758 		set_xen_guest_handle(reservation.extent_start, xennet_pages);
759 		reservation.nr_extents = i;
760 		reservation.extent_order = 0;
761 		reservation.address_bits = 0;
762 		reservation.domid = DOMID_SELF;
763 		rx_mcl[i].op = __HYPERVISOR_memory_op;
764 		rx_mcl[i].args[0] = XENMEM_decrease_reservation;
765 		rx_mcl[i].args[1] = (unsigned long)&reservation;
766 		HYPERVISOR_multicall(rx_mcl, i+1);
767 		if (__predict_false(rx_mcl[i].result != i)) {
768 			panic("xennet_alloc_rx_buffer: "
769 			    "XENMEM_decrease_reservation");
770 		}
771 	}
772 
773 	sc->sc_rx_ring.req_prod_pvt = req_prod + i;
774 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
775 	if (notify)
776 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
777 	return;
778 }
779 
780 /*
781  * Reclaim all RX buffers used by the I/O ring between frontend and backend
782  */
783 static void
784 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc)
785 {
786 	paddr_t ma, pa;
787 	vaddr_t va;
788 	RING_IDX i;
789 	mmu_update_t mmu[1];
790 	multicall_entry_t mcl[2];
791 
792 	mutex_enter(&sc->sc_rx_lock);
793 
794 	DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
795 	/* get back memory from RX ring */
796 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
797 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
798 
799 		if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
800 			/*
801 			 * this req is still granted. Get back the page or
802 			 * allocate a new one, and remap it.
803 			 */
804 			SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
805 			    rxreq_next);
806 			sc->sc_free_rxreql++;
807 
808 			switch (sc->sc_rx_feature) {
809 			case FEATURE_RX_COPY:
810 				xengnt_revoke_access(rxreq->rxreq_gntref);
811 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
812 				break;
813 			case FEATURE_RX_FLIP:
814 				ma = xengnt_revoke_transfer(
815 				    rxreq->rxreq_gntref);
816 				rxreq->rxreq_gntref = GRANT_INVALID_REF;
817 				if (ma == 0) {
818 					u_long pfn;
819 					struct xen_memory_reservation xenres;
820 					/*
821 					 * transfer not complete, we lost the page.
822 					 * Get one from hypervisor
823 					 */
824 					set_xen_guest_handle(
825 					    xenres.extent_start, &pfn);
826 					xenres.nr_extents = 1;
827 					xenres.extent_order = 0;
828 					xenres.address_bits = 31;
829 					xenres.domid = DOMID_SELF;
830 					if (HYPERVISOR_memory_op(
831 					    XENMEM_increase_reservation, &xenres) < 0) {
832 						panic("xennet_free_rx_buffer: "
833 						    "can't get memory back");
834 					}
835 					ma = pfn;
836 					KASSERT(ma != 0);
837 				}
838 				pa = rxreq->rxreq_pa;
839 				va = rxreq->rxreq_va;
840 				/* remap the page */
841 				mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
842 				mmu[0].val = pa >> PAGE_SHIFT;
843 				MULTI_update_va_mapping(&mcl[0], va,
844 				    (ma << PAGE_SHIFT) | PG_V | PG_KW,
845 				    UVMF_TLB_FLUSH|UVMF_ALL);
846 				xpmap_ptom_map(pa, ptoa(ma));
847 				mcl[1].op = __HYPERVISOR_mmu_update;
848 				mcl[1].args[0] = (unsigned long)mmu;
849 				mcl[1].args[1] = 1;
850 				mcl[1].args[2] = 0;
851 				mcl[1].args[3] = DOMID_SELF;
852 				HYPERVISOR_multicall(mcl, 2);
853 				break;
854 			default:
855 				panic("%s: unsupported RX feature mode: %ld\n",
856 				    __func__, sc->sc_rx_feature);
857 			}
858 		}
859 
860 	}
861 	mutex_exit(&sc->sc_rx_lock);
862 	DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
863 }
864 
865 /*
866  * Clears a used RX request when its associated mbuf has been processed
867  */
868 static void
869 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
870 {
871 	int s = splnet();
872 	KASSERT(buf == m->m_ext.ext_buf);
873 	KASSERT(arg == NULL);
874 	KASSERT(m != NULL);
875 	vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
876 	pool_cache_put_paddr(if_xennetrxbuf_cache,
877 	    (void *)va, m->m_ext.ext_paddr);
878 	pool_cache_put(mb_cache, m);
879 	splx(s);
880 };
881 
882 static void
883 xennet_rx_free_req(struct xennet_rxreq *req)
884 {
885 	struct xennet_xenbus_softc *sc = req->rxreq_sc;
886 
887 	KASSERT(mutex_owned(&sc->sc_rx_lock));
888 
889 	/* puts back the RX request in the list of free RX requests */
890 	SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
891 	sc->sc_free_rxreql++;
892 
893 	/*
894 	 * ring needs more requests to be pushed in, allocate some
895 	 * RX buffers to catch-up with backend's consumption
896 	 */
897 	req->rxreq_gntref = GRANT_INVALID_REF;
898 
899 	if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
900 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
901 		xennet_alloc_rx_buffer(sc);
902 	}
903 }
904 
905 /*
906  * Process responses associated to the TX mbufs sent previously through
907  * xennet_softstart()
908  * Called at splnet.
909  */
910 static void
911 xennet_tx_complete(struct xennet_xenbus_softc *sc)
912 {
913 	struct xennet_txreq *req;
914 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
915 	RING_IDX resp_prod, i;
916 
917 	DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
918 	    sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
919 
920 again:
921 	resp_prod = sc->sc_tx_ring.sring->rsp_prod;
922 	xen_rmb();
923 	mutex_enter(&sc->sc_tx_lock);
924 	for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
925 		req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
926 		KASSERT(req->txreq_id ==
927 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
928 		if (__predict_false(xengnt_status(req->txreq_gntref))) {
929 			aprint_verbose_dev(sc->sc_dev,
930 			    "grant still used by backend\n");
931 			sc->sc_tx_ring.rsp_cons = i;
932 			goto end;
933 		}
934 		if (__predict_false(
935 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
936 		    NETIF_RSP_OKAY))
937 			ifp->if_oerrors++;
938 		else
939 			ifp->if_opackets++;
940 		xengnt_revoke_access(req->txreq_gntref);
941 		m_freem(req->txreq_m);
942 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
943 	}
944 	mutex_exit(&sc->sc_tx_lock);
945 
946 	sc->sc_tx_ring.rsp_cons = resp_prod;
947 	/* set new event and check for race with rsp_cons update */
948 	sc->sc_tx_ring.sring->rsp_event =
949 	    resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
950 	ifp->if_timer = 0;
951 	xen_wmb();
952 	if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
953 		goto again;
954 end:
955 	if (ifp->if_flags & IFF_OACTIVE) {
956 		ifp->if_flags &= ~IFF_OACTIVE;
957 		xennet_softstart(sc);
958 	}
959 }
960 
961 /*
962  * Xennet event handler.
963  * Get outstanding responses of TX packets, then collect all responses of
964  * pending RX packets
965  * Called at splnet.
966  */
967 static int
968 xennet_handler(void *arg)
969 {
970 	struct xennet_xenbus_softc *sc = arg;
971 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
972 	RING_IDX resp_prod, i;
973 	struct xennet_rxreq *req;
974 	paddr_t ma, pa;
975 	vaddr_t va;
976 	mmu_update_t mmu[1];
977 	multicall_entry_t mcl[2];
978 	struct mbuf *m;
979 	void *pktp;
980 	int more_to_do;
981 
982 	if (sc->sc_backend_status != BEST_CONNECTED)
983 		return 1;
984 
985 	xennet_tx_complete(sc);
986 
987 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
988 
989 again:
990 	DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
991 	    sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
992 
993 	mutex_enter(&sc->sc_rx_lock);
994 	resp_prod = sc->sc_rx_ring.sring->rsp_prod;
995 	xen_rmb(); /* ensure we see replies up to resp_prod */
996 
997 	for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
998 		netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
999 		req = &sc->sc_rxreqs[rx->id];
1000 		KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
1001 		KASSERT(req->rxreq_id == rx->id);
1002 
1003 		ma = 0;
1004 		switch (sc->sc_rx_feature) {
1005 		case FEATURE_RX_COPY:
1006 			xengnt_revoke_access(req->rxreq_gntref);
1007 			break;
1008 		case FEATURE_RX_FLIP:
1009 			ma = xengnt_revoke_transfer(req->rxreq_gntref);
1010 			if (ma == 0) {
1011 				DPRINTFN(XEDB_EVENT, ("xennet_handler ma == 0\n"));
1012 				/*
1013 				 * the remote could't send us a packet.
1014 				 * we can't free this rxreq as no page will be mapped
1015 				 * here. Instead give it back immediatly to backend.
1016 				 */
1017 				ifp->if_ierrors++;
1018 				RING_GET_REQUEST(&sc->sc_rx_ring,
1019 				    sc->sc_rx_ring.req_prod_pvt)->id = req->rxreq_id;
1020 				RING_GET_REQUEST(&sc->sc_rx_ring,
1021 				    sc->sc_rx_ring.req_prod_pvt)->gref =
1022 					req->rxreq_gntref;
1023 				sc->sc_rx_ring.req_prod_pvt++;
1024 				RING_PUSH_REQUESTS(&sc->sc_rx_ring);
1025 				continue;
1026 			}
1027 			break;
1028 		default:
1029 			panic("%s: unsupported RX feature mode: %ld\n",
1030 			    __func__, sc->sc_rx_feature);
1031 		}
1032 
1033 		pa = req->rxreq_pa;
1034 		va = req->rxreq_va;
1035 
1036 		if (sc->sc_rx_feature == FEATURE_RX_FLIP) {
1037 			/* remap the page */
1038 			mmu[0].ptr = (ma << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
1039 			mmu[0].val = pa >> PAGE_SHIFT;
1040 			MULTI_update_va_mapping(&mcl[0], va,
1041 			    (ma << PAGE_SHIFT) | PG_V | PG_KW, UVMF_TLB_FLUSH|UVMF_ALL);
1042 			xpmap_ptom_map(pa, ptoa(ma));
1043 			mcl[1].op = __HYPERVISOR_mmu_update;
1044 			mcl[1].args[0] = (unsigned long)mmu;
1045 			mcl[1].args[1] = 1;
1046 			mcl[1].args[2] = 0;
1047 			mcl[1].args[3] = DOMID_SELF;
1048 			HYPERVISOR_multicall(mcl, 2);
1049 		}
1050 
1051 		pktp = (void *)(va + rx->offset);
1052 #ifdef XENNET_DEBUG_DUMP
1053 		xennet_hex_dump(pktp, rx->status, "r", rx->id);
1054 #endif
1055 		if ((ifp->if_flags & IFF_PROMISC) == 0) {
1056 			struct ether_header *eh = pktp;
1057 			if (ETHER_IS_MULTICAST(eh->ether_dhost) == 0 &&
1058 			    memcmp(CLLADDR(ifp->if_sadl), eh->ether_dhost,
1059 			    ETHER_ADDR_LEN) != 0) {
1060 				DPRINTFN(XEDB_EVENT,
1061 				    ("xennet_handler bad dest\n"));
1062 				/* packet not for us */
1063 				xennet_rx_free_req(req);
1064 				continue;
1065 			}
1066 		}
1067 		MGETHDR(m, M_DONTWAIT, MT_DATA);
1068 		if (__predict_false(m == NULL)) {
1069 			printf("%s: rx no mbuf\n", ifp->if_xname);
1070 			ifp->if_ierrors++;
1071 			xennet_rx_free_req(req);
1072 			continue;
1073 		}
1074 		MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
1075 
1076 		m_set_rcvif(m, ifp);
1077 		req->rxreq_va = (vaddr_t)pool_cache_get_paddr(
1078 		    if_xennetrxbuf_cache, PR_NOWAIT, &req->rxreq_pa);
1079 		if (__predict_false(req->rxreq_va == 0)) {
1080 			printf("%s: rx no buf\n", ifp->if_xname);
1081 			ifp->if_ierrors++;
1082 			req->rxreq_va = va;
1083 			req->rxreq_pa = pa;
1084 			xennet_rx_free_req(req);
1085 			m_freem(m);
1086 			continue;
1087 		}
1088 		m->m_len = m->m_pkthdr.len = rx->status;
1089 		MEXTADD(m, pktp, rx->status,
1090 		    M_DEVBUF, xennet_rx_mbuf_free, NULL);
1091 		m->m_flags |= M_EXT_RW; /* we own the buffer */
1092 		m->m_ext.ext_paddr = pa;
1093 		if ((rx->flags & NETRXF_csum_blank) != 0) {
1094 			xennet_checksum_fill(&m);
1095 			if (m == NULL) {
1096 				ifp->if_ierrors++;
1097 				continue;
1098 			}
1099 		}
1100 		/* free req may overwrite *rx, better doing it late */
1101 		xennet_rx_free_req(req);
1102 
1103 		/* Pass the packet up. */
1104 		if_percpuq_enqueue(ifp->if_percpuq, m);
1105 	}
1106 	xen_rmb();
1107 	sc->sc_rx_ring.rsp_cons = i;
1108 	RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
1109 	mutex_exit(&sc->sc_rx_lock);
1110 
1111 	if (more_to_do)
1112 		goto again;
1113 
1114 	return 1;
1115 }
1116 
1117 /*
1118  * The output routine of a xennet interface
1119  * Called at splnet.
1120  */
1121 void
1122 xennet_start(struct ifnet *ifp)
1123 {
1124 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1125 
1126 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start()\n", device_xname(sc->sc_dev)));
1127 
1128 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
1129 
1130 	xennet_tx_complete(sc);
1131 
1132 	if (__predict_false(
1133 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING))
1134 		return;
1135 
1136 	/*
1137 	 * The Xen communication channel is much more efficient if we can
1138 	 * schedule batch of packets for domain0. To achieve this, we
1139 	 * schedule a soft interrupt, and just return. This way, the network
1140 	 * stack will enqueue all pending mbufs in the interface's send queue
1141 	 * before it is processed by xennet_softstart().
1142 	 */
1143 	softint_schedule(sc->sc_softintr);
1144 	return;
1145 }
1146 
1147 /*
1148  * Prepares mbufs for TX, and notify backend when finished
1149  * Called at splsoftnet
1150  */
1151 void
1152 xennet_softstart(void *arg)
1153 {
1154 	struct xennet_xenbus_softc *sc = arg;
1155 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
1156 	struct mbuf *m, *new_m;
1157 	netif_tx_request_t *txreq;
1158 	RING_IDX req_prod;
1159 	paddr_t pa, pa2;
1160 	struct xennet_txreq *req;
1161 	int notify;
1162 	int do_notify = 0;
1163 
1164 	mutex_enter(&sc->sc_tx_lock);
1165 	if (__predict_false(
1166 	    (ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)) {
1167 		mutex_exit(&sc->sc_tx_lock);
1168 		return;
1169 	}
1170 
1171 	req_prod = sc->sc_tx_ring.req_prod_pvt;
1172 	while (/*CONSTCOND*/1) {
1173 		uint16_t txflags;
1174 
1175 		req = SLIST_FIRST(&sc->sc_txreq_head);
1176 		if (__predict_false(req == NULL)) {
1177 			ifp->if_flags |= IFF_OACTIVE;
1178 			break;
1179 		}
1180 		IFQ_POLL(&ifp->if_snd, m);
1181 		if (m == NULL)
1182 			break;
1183 
1184 		switch (m->m_flags & (M_EXT|M_EXT_CLUSTER)) {
1185 		case M_EXT|M_EXT_CLUSTER:
1186 			KASSERT(m->m_ext.ext_paddr != M_PADDR_INVALID);
1187 			pa = m->m_ext.ext_paddr +
1188 				(m->m_data - m->m_ext.ext_buf);
1189 			break;
1190 		case 0:
1191 			KASSERT(m->m_paddr != M_PADDR_INVALID);
1192 			pa = m->m_paddr + M_BUFOFFSET(m) +
1193 				(m->m_data - M_BUFADDR(m));
1194 			break;
1195 		default:
1196 			if (__predict_false(
1197 			    !pmap_extract(pmap_kernel(), (vaddr_t)m->m_data,
1198 			    &pa))) {
1199 				panic("xennet_start: no pa");
1200 			}
1201 			break;
1202 		}
1203 
1204 		if ((m->m_pkthdr.csum_flags &
1205 		    (M_CSUM_TCPv4 | M_CSUM_UDPv4)) != 0) {
1206 			txflags = NETTXF_csum_blank;
1207 		} else {
1208 			txflags = 0;
1209 		}
1210 
1211 		if (m->m_pkthdr.len != m->m_len ||
1212 		    (pa ^ (pa + m->m_pkthdr.len - 1)) & PG_FRAME) {
1213 
1214 			MGETHDR(new_m, M_DONTWAIT, MT_DATA);
1215 			if (__predict_false(new_m == NULL)) {
1216 				printf("%s: cannot allocate new mbuf\n",
1217 				       device_xname(sc->sc_dev));
1218 				break;
1219 			}
1220 			if (m->m_pkthdr.len > MHLEN) {
1221 				MCLGET(new_m, M_DONTWAIT);
1222 				if (__predict_false(
1223 				    (new_m->m_flags & M_EXT) == 0)) {
1224 					DPRINTF(("%s: no mbuf cluster\n",
1225 					    device_xname(sc->sc_dev)));
1226 					m_freem(new_m);
1227 					break;
1228 				}
1229 			}
1230 
1231 			m_copydata(m, 0, m->m_pkthdr.len, mtod(new_m, void *));
1232 			new_m->m_len = new_m->m_pkthdr.len = m->m_pkthdr.len;
1233 
1234 			if ((new_m->m_flags & M_EXT) != 0) {
1235 				pa = new_m->m_ext.ext_paddr;
1236 				KASSERT(new_m->m_data == new_m->m_ext.ext_buf);
1237 				KASSERT(pa != M_PADDR_INVALID);
1238 			} else {
1239 				pa = new_m->m_paddr;
1240 				KASSERT(pa != M_PADDR_INVALID);
1241 				KASSERT(new_m->m_data == M_BUFADDR(new_m));
1242 				pa += M_BUFOFFSET(new_m);
1243 			}
1244 			if (__predict_false(xengnt_grant_access(
1245 			    sc->sc_xbusd->xbusd_otherend_id,
1246 			    xpmap_ptom_masked(pa),
1247 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1248 				m_freem(new_m);
1249 				ifp->if_flags |= IFF_OACTIVE;
1250 				break;
1251 			}
1252 			/* we will be able to send new_m */
1253 			IFQ_DEQUEUE(&ifp->if_snd, m);
1254 			m_freem(m);
1255 			m = new_m;
1256 		} else {
1257 			if (__predict_false(xengnt_grant_access(
1258 			    sc->sc_xbusd->xbusd_otherend_id,
1259 			    xpmap_ptom_masked(pa),
1260 			    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1261 				ifp->if_flags |= IFF_OACTIVE;
1262 				break;
1263 			}
1264 			/* we will be able to send m */
1265 			IFQ_DEQUEUE(&ifp->if_snd, m);
1266 		}
1267 		MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1268 
1269 		KASSERT(((pa ^ (pa + m->m_pkthdr.len -  1)) & PG_FRAME) == 0);
1270 
1271 		SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1272 		req->txreq_m = m;
1273 
1274 		DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1275 		    "mbuf %p, buf %p/%p/%p, size %d\n",
1276 		    req->txreq_id, m, mtod(m, void *), (void *)pa,
1277 		    (void *)xpmap_ptom_masked(pa), m->m_pkthdr.len));
1278 		pmap_extract_ma(pmap_kernel(), mtod(m, vaddr_t), &pa2);
1279 		DPRINTFN(XEDB_MBUF, ("xennet_start pa %p ma %p/%p\n",
1280 		    (void *)pa, (void *)xpmap_ptom_masked(pa), (void *)pa2));
1281 #ifdef XENNET_DEBUG_DUMP
1282 		xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1283 			       	req->txreq_id);
1284 #endif
1285 
1286 		txreq = RING_GET_REQUEST(&sc->sc_tx_ring, req_prod);
1287 		txreq->id = req->txreq_id;
1288 		txreq->gref = req->txreq_gntref;
1289 		txreq->offset = pa & ~PG_FRAME;
1290 		txreq->size = m->m_pkthdr.len;
1291 		txreq->flags = txflags;
1292 
1293 		req_prod++;
1294 		sc->sc_tx_ring.req_prod_pvt = req_prod;
1295 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1296 		if (notify)
1297 			do_notify = 1;
1298 
1299 #ifdef XENNET_DEBUG
1300 		DPRINTFN(XEDB_MEM, ("packet addr %p/%p, physical %p/%p, "
1301 		    "m_paddr %p, len %d/%d\n", M_BUFADDR(m), mtod(m, void *),
1302 		    (void *)*kvtopte(mtod(m, vaddr_t)),
1303 		    (void *)xpmap_mtop(*kvtopte(mtod(m, vaddr_t))),
1304 		    (void *)m->m_paddr, m->m_pkthdr.len, m->m_len));
1305 		DPRINTFN(XEDB_MEM, ("id %d gref %d offset %d size %d flags %d"
1306 		    " prod %d\n",
1307 		    txreq->id, txreq->gref, txreq->offset, txreq->size,
1308 		    txreq->flags, req_prod));
1309 #endif
1310 
1311 		/*
1312 		 * Pass packet to bpf if there is a listener.
1313 		 */
1314 		bpf_mtap(ifp, m);
1315 	}
1316 
1317 	if (do_notify) {
1318 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1319 		ifp->if_timer = 5;
1320 	}
1321 
1322 	mutex_exit(&sc->sc_tx_lock);
1323 
1324 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1325 	    device_xname(sc->sc_dev)));
1326 }
1327 
1328 int
1329 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1330 {
1331 #ifdef XENNET_DEBUG
1332 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1333 #endif
1334 	int s, error = 0;
1335 
1336 	s = splnet();
1337 
1338 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1339 	    device_xname(sc->sc_dev)));
1340 	error = ether_ioctl(ifp, cmd, data);
1341 	if (error == ENETRESET)
1342 		error = 0;
1343 	splx(s);
1344 
1345 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1346 	    device_xname(sc->sc_dev), error));
1347 
1348 	return error;
1349 }
1350 
1351 void
1352 xennet_watchdog(struct ifnet *ifp)
1353 {
1354 	aprint_verbose_ifnet(ifp, "xennet_watchdog\n");
1355 }
1356 
1357 int
1358 xennet_init(struct ifnet *ifp)
1359 {
1360 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1361 	mutex_enter(&sc->sc_rx_lock);
1362 
1363 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1364 	    device_xname(sc->sc_dev)));
1365 
1366 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1367 		sc->sc_rx_ring.sring->rsp_event =
1368 		    sc->sc_rx_ring.rsp_cons + 1;
1369 		hypervisor_enable_event(sc->sc_evtchn);
1370 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1371 		xennet_reset(sc);
1372 	}
1373 	ifp->if_flags |= IFF_RUNNING;
1374 	ifp->if_flags &= ~IFF_OACTIVE;
1375 	ifp->if_timer = 0;
1376 	mutex_exit(&sc->sc_rx_lock);
1377 	return 0;
1378 }
1379 
1380 void
1381 xennet_stop(struct ifnet *ifp, int disable)
1382 {
1383 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1384 
1385 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1386 	hypervisor_mask_event(sc->sc_evtchn);
1387 	xennet_reset(sc);
1388 }
1389 
1390 void
1391 xennet_reset(struct xennet_xenbus_softc *sc)
1392 {
1393 
1394 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_reset()\n",
1395 	    device_xname(sc->sc_dev)));
1396 }
1397 
1398 #if defined(NFS_BOOT_BOOTSTATIC)
1399 int
1400 xennet_bootstatic_callback(struct nfs_diskless *nd)
1401 {
1402 #if 0
1403 	struct ifnet *ifp = nd->nd_ifp;
1404 	struct xennet_xenbus_softc *sc =
1405 	    (struct xennet_xenbus_softc *)ifp->if_softc;
1406 #endif
1407 	int flags = 0;
1408 	union xen_cmdline_parseinfo xcp;
1409 	struct sockaddr_in *sin;
1410 
1411 	memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1412 	xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1413 	xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1414 	xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1415 
1416 	if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1417 		flags |= NFS_BOOT_HAS_SERVER;
1418 		if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1419 			flags |= NFS_BOOT_HAS_ROOTPATH;
1420 	}
1421 
1422 	nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1423 	nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1424 	nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1425 
1426 	sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1427 	memset((void *)sin, 0, sizeof(*sin));
1428 	sin->sin_len = sizeof(*sin);
1429 	sin->sin_family = AF_INET;
1430 	sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1431 
1432 	if (nd->nd_myip.s_addr)
1433 		flags |= NFS_BOOT_HAS_MYIP;
1434 	if (nd->nd_gwip.s_addr)
1435 		flags |= NFS_BOOT_HAS_GWIP;
1436 	if (nd->nd_mask.s_addr)
1437 		flags |= NFS_BOOT_HAS_MASK;
1438 	if (sin->sin_addr.s_addr)
1439 		flags |= NFS_BOOT_HAS_SERVADDR;
1440 
1441 	return flags;
1442 }
1443 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
1444 
1445 #ifdef XENNET_DEBUG_DUMP
1446 #define XCHR(x) hexdigits[(x) & 0xf]
1447 static void
1448 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1449 {
1450 	size_t i, j;
1451 
1452 	printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
1453 	printf("00000000  ");
1454 	for(i=0; i<len; i++) {
1455 		printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1456 		if ((i+1) % 16 == 8)
1457 			printf(" ");
1458 		if ((i+1) % 16 == 0) {
1459 			printf(" %c", '|');
1460 			for(j=0; j<16; j++)
1461 				printf("%c", pkt[i-15+j]>=32 &&
1462 				    pkt[i-15+j]<127?pkt[i-15+j]:'.');
1463 			printf("%c\n%c%c%c%c%c%c%c%c  ", '|',
1464 			    XCHR((i+1)>>28), XCHR((i+1)>>24),
1465 			    XCHR((i+1)>>20), XCHR((i+1)>>16),
1466 			    XCHR((i+1)>>12), XCHR((i+1)>>8),
1467 			    XCHR((i+1)>>4), XCHR(i+1));
1468 		}
1469 	}
1470 	printf("\n");
1471 }
1472 #undef XCHR
1473 #endif
1474