xref: /netbsd-src/sys/arch/xen/xen/if_xennet_xenbus.c (revision 46057a43bd180934f4e98e5ba50e4a3b6208014d)
1 /*      $NetBSD: if_xennet_xenbus.c,v 1.130 2024/01/09 18:39:53 jdolecek Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * Copyright (c) 2004 Christian Limpach.
29  * All rights reserved.
30  *
31  * Redistribution and use in source and binary forms, with or without
32  * modification, are permitted provided that the following conditions
33  * are met:
34  * 1. Redistributions of source code must retain the above copyright
35  *    notice, this list of conditions and the following disclaimer.
36  * 2. Redistributions in binary form must reproduce the above copyright
37  *    notice, this list of conditions and the following disclaimer in the
38  *    documentation and/or other materials provided with the distribution.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
41  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
43  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
44  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
46  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
47  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
48  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
49  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50  */
51 
52 /*
53  * This file contains the xennet frontend code required for the network
54  * communication between two Xen domains.
55  * It ressembles xbd, but is a little more complex as it must deal with two
56  * rings:
57  * - the TX ring, to transmit packets to backend (inside => outside)
58  * - the RX ring, to receive packets from backend (outside => inside)
59  *
60  * Principles are following.
61  *
62  * For TX:
63  * Purpose is to transmit packets to the outside. The start of day is in
64  * xennet_start() (output routine of xennet) scheduled via a softint.
65  * xennet_start() generates the requests associated
66  * to the TX mbufs queued (see altq(9)).
67  * The backend's responses are processed by xennet_tx_complete(), called
68  * from xennet_start()
69  *
70  * for RX:
71  * Purpose is to process the packets received from the outside. RX buffers
72  * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
73  * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
74  * preparation for incoming packets from backend.
75  * When RX packets need to be processed, backend takes the requests previously
76  * offered by frontend and pushes the associated responses inside the I/O ring.
77  * When done, it notifies frontend through an event notification, which will
78  * asynchronously call xennet_handler() in frontend.
79  * xennet_handler() processes the responses, generates the associated mbuf, and
80  * passes it to the MI layer for further processing.
81  */
82 
83 #include <sys/cdefs.h>
84 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.130 2024/01/09 18:39:53 jdolecek Exp $");
85 
86 #include "opt_xen.h"
87 #include "opt_nfs_boot.h"
88 #include "opt_net_mpsafe.h"
89 
90 #include <sys/param.h>
91 #include <sys/device.h>
92 #include <sys/conf.h>
93 #include <sys/kernel.h>
94 #include <sys/proc.h>
95 #include <sys/systm.h>
96 #include <sys/intr.h>
97 #include <sys/rndsource.h>
98 
99 #include <net/if.h>
100 #include <net/if_dl.h>
101 #include <net/if_ether.h>
102 #include <net/bpf.h>
103 
104 #if defined(NFS_BOOT_BOOTSTATIC)
105 #include <sys/fstypes.h>
106 #include <sys/mount.h>
107 #include <sys/statvfs.h>
108 #include <netinet/in.h>
109 #include <nfs/rpcv2.h>
110 #include <nfs/nfsproto.h>
111 #include <nfs/nfs.h>
112 #include <nfs/nfsmount.h>
113 #include <nfs/nfsdiskless.h>
114 #include <xen/if_xennetvar.h>
115 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
116 
117 #include <xen/xennet_checksum.h>
118 
119 #include <uvm/uvm.h>
120 
121 #include <xen/intr.h>
122 #include <xen/hypervisor.h>
123 #include <xen/evtchn.h>
124 #include <xen/granttables.h>
125 #include <xen/include/public/io/netif.h>
126 #include <xen/xenpmap.h>
127 
128 #include <xen/xenbus.h>
129 #include "locators.h"
130 
131 #undef XENNET_DEBUG_DUMP
132 #undef XENNET_DEBUG
133 
134 #ifdef XENNET_DEBUG
135 #define XEDB_FOLLOW     0x01
136 #define XEDB_INIT       0x02
137 #define XEDB_EVENT      0x04
138 #define XEDB_MBUF       0x08
139 #define XEDB_MEM        0x10
140 int xennet_debug = 0xff;
141 #define DPRINTF(x) if (xennet_debug) printf x;
142 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
143 #else
144 #define DPRINTF(x)
145 #define DPRINTFN(n,x)
146 #endif
147 
148 #define GRANT_INVALID_REF -1 /* entry is free */
149 
150 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
151 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
152 
153 struct xennet_txreq {
154 	SLIST_ENTRY(xennet_txreq) txreq_next;
155 	uint16_t txreq_id; /* ID passed to backend */
156 	grant_ref_t txreq_gntref; /* grant ref of this request */
157 	struct mbuf *txreq_m; /* mbuf being transmitted */
158 	bus_dmamap_t txreq_dmamap;
159 };
160 
161 struct xennet_rxreq {
162 	SLIST_ENTRY(xennet_rxreq) rxreq_next;
163 	uint16_t rxreq_id; /* ID passed to backend */
164 	grant_ref_t rxreq_gntref; /* grant ref of this request */
165 	struct mbuf *rxreq_m;
166 	bus_dmamap_t rxreq_dmamap;
167 };
168 
169 struct xennet_xenbus_softc {
170 	device_t sc_dev;
171 	struct ethercom sc_ethercom;
172 	uint8_t sc_enaddr[ETHER_ADDR_LEN];
173 	struct xenbus_device *sc_xbusd;
174 
175 	netif_tx_front_ring_t sc_tx_ring;
176 	netif_rx_front_ring_t sc_rx_ring;
177 
178 	unsigned int sc_evtchn;
179 	struct intrhand *sc_ih;
180 
181 	grant_ref_t sc_tx_ring_gntref;
182 	grant_ref_t sc_rx_ring_gntref;
183 
184 	kmutex_t sc_tx_lock; /* protects free TX list, TX ring */
185 	kmutex_t sc_rx_lock; /* protects free RX list, RX ring, rxreql */
186 	struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
187 	struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
188 	SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
189 	SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
190 	int sc_free_txreql; /* number of free transmit request structs */
191 	int sc_free_rxreql; /* number of free receive request structs */
192 
193 	int sc_backend_status; /* our status with backend */
194 #define BEST_CLOSED		0
195 #define BEST_DISCONNECTED	1
196 #define BEST_CONNECTED		2
197 #define BEST_SUSPENDED		3
198 	int sc_features;
199 #define FEATURE_IPV6CSUM	0x01	/* IPv6 checksum offload */
200 #define FEATURE_SG		0x02	/* scatter-gatter */
201 #define FEATURE_RX_COPY		0x04	/* RX-copy */
202 #define FEATURE_BITS		"\20\1IPV6-CSUM\2SG\3RX-COPY"
203 	krndsource_t sc_rnd_source;
204 	struct evcnt sc_cnt_tx_defrag;
205 	struct evcnt sc_cnt_tx_queue_full;
206 	struct evcnt sc_cnt_tx_drop;
207 	struct evcnt sc_cnt_tx_frag;
208 	struct evcnt sc_cnt_rx_frag;
209 	struct evcnt sc_cnt_rx_cksum_blank;
210 	struct evcnt sc_cnt_rx_cksum_undefer;
211 };
212 
213 static pool_cache_t if_xennetrxbuf_cache;
214 static int if_xennetrxbuf_cache_inited = 0;
215 
216 static int  xennet_xenbus_match(device_t, cfdata_t, void *);
217 static void xennet_xenbus_attach(device_t, device_t, void *);
218 static int  xennet_xenbus_detach(device_t, int);
219 static void xennet_backend_changed(void *, XenbusState);
220 
221 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
222 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *, bool);
223 static void xennet_tx_complete(struct xennet_xenbus_softc *);
224 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
225 static int  xennet_handler(void *);
226 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
227 #ifdef XENNET_DEBUG_DUMP
228 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
229 #endif
230 
231 static int  xennet_init(struct ifnet *);
232 static void xennet_stop(struct ifnet *, int);
233 static void xennet_start(struct ifnet *);
234 static int  xennet_ioctl(struct ifnet *, u_long, void *);
235 
236 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *);
237 static bool xennet_xenbus_resume(device_t dev, const pmf_qual_t *);
238 
239 CFATTACH_DECL3_NEW(xennet, sizeof(struct xennet_xenbus_softc),
240    xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL,
241    NULL, NULL, DVF_DETACH_SHUTDOWN);
242 
243 static int
xennet_xenbus_match(device_t parent,cfdata_t match,void * aux)244 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
245 {
246 	struct xenbusdev_attach_args *xa = aux;
247 
248 	if (strcmp(xa->xa_type, "vif") != 0)
249 		return 0;
250 
251 	if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
252 	    match->cf_loc[XENBUSCF_ID] != xa->xa_id)
253 		return 0;
254 
255 	return 1;
256 }
257 
258 static void
xennet_xenbus_attach(device_t parent,device_t self,void * aux)259 xennet_xenbus_attach(device_t parent, device_t self, void *aux)
260 {
261 	struct xennet_xenbus_softc *sc = device_private(self);
262 	struct xenbusdev_attach_args *xa = aux;
263 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
264 	int err;
265 	netif_tx_sring_t *tx_ring;
266 	netif_rx_sring_t *rx_ring;
267 	RING_IDX i;
268 	char *e, *p;
269 	unsigned long uval;
270 	extern int ifqmaxlen; /* XXX */
271 	char mac[32];
272 	char buf[64];
273 	bus_size_t maxsz;
274 	int nsegs;
275 
276 	aprint_normal(": Xen Virtual Network Interface\n");
277 	sc->sc_dev = self;
278 
279 	sc->sc_xbusd = xa->xa_xbusd;
280 	sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
281 
282 	/* read feature support flags */
283 	err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
284 	    "feature-rx-copy", &uval, 10);
285 	if (!err && uval == 1)
286 		sc->sc_features |= FEATURE_RX_COPY;
287 	err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
288 	    "feature-ipv6-csum-offload", &uval, 10);
289 	if (!err && uval == 1)
290 		sc->sc_features |= FEATURE_IPV6CSUM;
291 	err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
292 	    "feature-sg", &uval, 10);
293 	if (!err && uval == 1)
294 		sc->sc_features |= FEATURE_SG;
295 	snprintb(buf, sizeof(buf), FEATURE_BITS, sc->sc_features);
296 	aprint_normal_dev(sc->sc_dev, "backend features %s\n", buf);
297 
298 	/* xenbus ensure 2 devices can't be probed at the same time */
299 	if (if_xennetrxbuf_cache_inited == 0) {
300 		if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
301 		    "xnfrx", NULL, IPL_NET, NULL, NULL, NULL);
302 		if_xennetrxbuf_cache_inited = 1;
303 	}
304 
305 	/* initialize free RX and RX request lists */
306 	if (sc->sc_features & FEATURE_SG) {
307 		maxsz = ETHER_MAX_LEN_JUMBO;
308 		/*
309 		 * Linux netback drops the packet if the request has more
310 		 * segments than XEN_NETIF_NR_SLOTS_MIN (== 18). With 2KB
311 		 * MCLBYTES this means maximum packet size 36KB, in reality
312 		 * less due to mbuf chain fragmentation.
313 		 */
314 		nsegs = XEN_NETIF_NR_SLOTS_MIN;
315 	} else {
316 		maxsz = PAGE_SIZE;
317 		nsegs = 1;
318 	}
319 	mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
320 	SLIST_INIT(&sc->sc_txreq_head);
321 	for (i = 0; i < NET_TX_RING_SIZE; i++) {
322 		struct xennet_txreq *txreq = &sc->sc_txreqs[i];
323 
324 		txreq->txreq_id = i;
325 		if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs,
326 		    PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
327 		    &txreq->txreq_dmamap) != 0)
328 			break;
329 
330 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
331 		    txreq_next);
332 	}
333 	sc->sc_free_txreql = i;
334 
335 	mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
336 	SLIST_INIT(&sc->sc_rxreq_head);
337 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
338 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
339 		rxreq->rxreq_id = i;
340 		if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs,
341 		    PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
342 		    &rxreq->rxreq_dmamap) != 0)
343 			break;
344 		rxreq->rxreq_gntref = GRANT_INVALID_REF;
345 		SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
346 	}
347 	sc->sc_free_rxreql = i;
348 	if (sc->sc_free_rxreql == 0) {
349 		aprint_error_dev(self, "failed to allocate rx memory\n");
350 		return;
351 	}
352 
353 	/* read mac address */
354 	err = xenbus_read(NULL, sc->sc_xbusd->xbusd_path, "mac",
355 	    mac, sizeof(mac));
356 	if (err) {
357 		aprint_error_dev(self, "can't read mac address, err %d\n", err);
358 		return;
359 	}
360 	for (i = 0, p = mac; i < ETHER_ADDR_LEN; i++) {
361 		sc->sc_enaddr[i] = strtoul(p, &e, 16);
362 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
363 			aprint_error_dev(self,
364 			    "%s is not a valid mac address\n", mac);
365 			return;
366 		}
367 		p = &e[1];
368 	}
369 	aprint_normal_dev(self, "MAC address %s\n",
370 	    ether_sprintf(sc->sc_enaddr));
371 
372 	/* Initialize ifnet structure and attach interface */
373 	strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
374 	sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
375 	if (sc->sc_features & FEATURE_SG)
376 		sc->sc_ethercom.ec_capabilities |= ETHERCAP_JUMBO_MTU;
377 	ifp->if_softc = sc;
378 	ifp->if_start = xennet_start;
379 	ifp->if_ioctl = xennet_ioctl;
380 	ifp->if_init = xennet_init;
381 	ifp->if_stop = xennet_stop;
382 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
383 	ifp->if_extflags = IFEF_MPSAFE;
384 	ifp->if_snd.ifq_maxlen = uimax(ifqmaxlen, NET_TX_RING_SIZE * 2);
385 	ifp->if_capabilities =
386 		IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx
387 		| IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx
388 		| IFCAP_CSUM_UDPv6_Rx
389 		| IFCAP_CSUM_TCPv6_Rx;
390 #define XN_M_CSUM_SUPPORTED						\
391 	(M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_TCPv6 | M_CSUM_UDPv6)
392 
393 	if (sc->sc_features & FEATURE_IPV6CSUM) {
394 		/*
395 		 * If backend supports IPv6 csum offloading, we can skip
396 		 * IPv6 csum for Tx packets. Rx packet validation can
397 		 * be skipped regardless.
398 		 */
399 		ifp->if_capabilities |=
400 		    IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_TCPv6_Tx;
401 	}
402 
403 	IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN));
404 	IFQ_SET_READY(&ifp->if_snd);
405 	if_attach(ifp);
406 	if_deferred_start_init(ifp, NULL);
407 	ether_ifattach(ifp, sc->sc_enaddr);
408 
409 	/* alloc shared rings */
410 	tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
411 	    UVM_KMF_WIRED);
412 	rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
413 	    UVM_KMF_WIRED);
414 	if (tx_ring == NULL || rx_ring == NULL)
415 		panic("%s: can't alloc rings", device_xname(self));
416 
417 	sc->sc_tx_ring.sring = tx_ring;
418 	sc->sc_rx_ring.sring = rx_ring;
419 
420 	rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
421 	    RND_TYPE_NET, RND_FLAG_DEFAULT);
422 
423 	evcnt_attach_dynamic(&sc->sc_cnt_tx_defrag, EVCNT_TYPE_MISC,
424 	    NULL, device_xname(sc->sc_dev), "Tx packet defrag");
425 	evcnt_attach_dynamic(&sc->sc_cnt_tx_frag, EVCNT_TYPE_MISC,
426 	    NULL, device_xname(sc->sc_dev), "Tx multi-segment packet");
427 	evcnt_attach_dynamic(&sc->sc_cnt_tx_drop, EVCNT_TYPE_MISC,
428 	    NULL, device_xname(sc->sc_dev), "Tx packet dropped");
429 	evcnt_attach_dynamic(&sc->sc_cnt_tx_queue_full, EVCNT_TYPE_MISC,
430 	    NULL, device_xname(sc->sc_dev), "Tx queue full");
431 	evcnt_attach_dynamic(&sc->sc_cnt_rx_frag, EVCNT_TYPE_MISC,
432 	    NULL, device_xname(sc->sc_dev), "Rx multi-segment packet");
433 	evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_blank, EVCNT_TYPE_MISC,
434 	    NULL, device_xname(sc->sc_dev), "Rx csum blank");
435 	evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_undefer, EVCNT_TYPE_MISC,
436 	    NULL, device_xname(sc->sc_dev), "Rx csum undeferred");
437 
438 	if (!pmf_device_register(self, xennet_xenbus_suspend,
439 	    xennet_xenbus_resume))
440 		aprint_error_dev(self, "couldn't establish power handler\n");
441 	else
442 		pmf_class_network_register(self, ifp);
443 
444 	/* resume shared structures and tell backend that we are ready */
445 	if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) {
446 		uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE,
447 		    UVM_KMF_WIRED);
448 		uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE,
449 		    UVM_KMF_WIRED);
450 		return;
451 	}
452 }
453 
454 static int
xennet_xenbus_detach(device_t self,int flags)455 xennet_xenbus_detach(device_t self, int flags)
456 {
457 	struct xennet_xenbus_softc *sc = device_private(self);
458 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
459 
460 	if ((flags & (DETACH_SHUTDOWN | DETACH_FORCE)) == DETACH_SHUTDOWN) {
461 		/* Trigger state transition with backend */
462 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing);
463 		return EBUSY;
464 	}
465 
466 	DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
467 
468 	/* stop interface */
469 	IFNET_LOCK(ifp);
470 	xennet_stop(ifp, 1);
471 	IFNET_UNLOCK(ifp);
472 	if (sc->sc_ih != NULL) {
473 		xen_intr_disestablish(sc->sc_ih);
474 		sc->sc_ih = NULL;
475 	}
476 
477 	/* collect any outstanding TX responses */
478 	mutex_enter(&sc->sc_tx_lock);
479 	xennet_tx_complete(sc);
480 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
481 		kpause("xndetach", true, hz/2, &sc->sc_tx_lock);
482 		xennet_tx_complete(sc);
483 	}
484 	mutex_exit(&sc->sc_tx_lock);
485 
486 	mutex_enter(&sc->sc_rx_lock);
487 	xennet_free_rx_buffer(sc, true);
488 	mutex_exit(&sc->sc_rx_lock);
489 
490 	ether_ifdetach(ifp);
491 	if_detach(ifp);
492 
493 	evcnt_detach(&sc->sc_cnt_tx_defrag);
494 	evcnt_detach(&sc->sc_cnt_tx_frag);
495 	evcnt_detach(&sc->sc_cnt_tx_drop);
496 	evcnt_detach(&sc->sc_cnt_tx_queue_full);
497 	evcnt_detach(&sc->sc_cnt_rx_frag);
498 	evcnt_detach(&sc->sc_cnt_rx_cksum_blank);
499 	evcnt_detach(&sc->sc_cnt_rx_cksum_undefer);
500 
501 	/* Unhook the entropy source. */
502 	rnd_detach_source(&sc->sc_rnd_source);
503 
504 	/* Wait until the tx/rx rings stop being used by backend */
505 	mutex_enter(&sc->sc_tx_lock);
506 	while (xengnt_status(sc->sc_tx_ring_gntref))
507 		kpause("xntxref", true, hz/2, &sc->sc_tx_lock);
508 	xengnt_revoke_access(sc->sc_tx_ring_gntref);
509 	mutex_exit(&sc->sc_tx_lock);
510 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
511 	    UVM_KMF_WIRED);
512 	mutex_enter(&sc->sc_rx_lock);
513 	while (xengnt_status(sc->sc_rx_ring_gntref))
514 		kpause("xnrxref", true, hz/2, &sc->sc_rx_lock);
515 	xengnt_revoke_access(sc->sc_rx_ring_gntref);
516 	mutex_exit(&sc->sc_rx_lock);
517 	uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
518 	    UVM_KMF_WIRED);
519 
520 	pmf_device_deregister(self);
521 
522 	sc->sc_backend_status = BEST_DISCONNECTED;
523 
524 	DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
525 	return 0;
526 }
527 
528 static bool
xennet_xenbus_resume(device_t dev,const pmf_qual_t * qual)529 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual)
530 {
531 	struct xennet_xenbus_softc *sc = device_private(dev);
532 	int error;
533 	netif_tx_sring_t *tx_ring;
534 	netif_rx_sring_t *rx_ring;
535 	paddr_t ma;
536 
537 	/* All grants were removed during suspend */
538 	sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
539 	sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
540 
541 	mutex_enter(&sc->sc_rx_lock);
542 	/* Free but don't revoke, the grant is gone */
543 	xennet_free_rx_buffer(sc, false);
544 	KASSERT(sc->sc_free_rxreql == NET_TX_RING_SIZE);
545 	mutex_exit(&sc->sc_rx_lock);
546 
547 	tx_ring = sc->sc_tx_ring.sring;
548 	rx_ring = sc->sc_rx_ring.sring;
549 
550 	/* Initialize rings */
551 	memset(tx_ring, 0, PAGE_SIZE);
552 	SHARED_RING_INIT(tx_ring);
553 	FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
554 
555 	memset(rx_ring, 0, PAGE_SIZE);
556 	SHARED_RING_INIT(rx_ring);
557 	FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
558 
559 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
560 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
561 	if (error)
562 		goto abort_resume;
563 	(void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
564 	error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
565 	if (error)
566 		goto abort_resume;
567 
568 	if (sc->sc_ih != NULL) {
569 		xen_intr_disestablish(sc->sc_ih);
570 		sc->sc_ih = NULL;
571 	}
572 	error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
573 	if (error)
574 		goto abort_resume;
575 	aprint_verbose_dev(dev, "using event channel %d\n",
576 	    sc->sc_evtchn);
577 	sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn,
578 	    IST_LEVEL, IPL_NET, &xennet_handler, sc, true, device_xname(dev));
579 	KASSERT(sc->sc_ih != NULL);
580 
581 	/* Re-fill Rx ring */
582 	mutex_enter(&sc->sc_rx_lock);
583 	xennet_alloc_rx_buffer(sc);
584 	KASSERT(sc->sc_free_rxreql == 0);
585 	mutex_exit(&sc->sc_rx_lock);
586 
587 	xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised);
588 
589 	if (sc->sc_backend_status == BEST_SUSPENDED) {
590 		if (xennet_talk_to_backend(sc)) {
591 			xenbus_device_resume(sc->sc_xbusd);
592 			hypervisor_unmask_event(sc->sc_evtchn);
593 			xenbus_switch_state(sc->sc_xbusd, NULL,
594 			    XenbusStateConnected);
595 		}
596 	}
597 
598 	return true;
599 
600 abort_resume:
601 	xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
602 	return false;
603 }
604 
605 static bool
xennet_talk_to_backend(struct xennet_xenbus_softc * sc)606 xennet_talk_to_backend(struct xennet_xenbus_softc *sc)
607 {
608 	int error;
609 	struct xenbus_transaction *xbt;
610 	const char *errmsg;
611 
612 again:
613 	xbt = xenbus_transaction_start();
614 	if (xbt == NULL)
615 		return false;
616 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
617 	    "vifname", "%s", device_xname(sc->sc_dev));
618 	if (error) {
619 		errmsg = "vifname";
620 		goto abort_transaction;
621 	}
622 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
623 	    "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
624 	if (error) {
625 		errmsg = "writing tx ring-ref";
626 		goto abort_transaction;
627 	}
628 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
629 	    "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
630 	if (error) {
631 		errmsg = "writing rx ring-ref";
632 		goto abort_transaction;
633 	}
634 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
635 	    "request-rx-copy", "%u", 1);
636 	if (error) {
637 		errmsg = "writing request-rx-copy";
638 		goto abort_transaction;
639 	}
640 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
641 	    "feature-rx-notify", "%u", 1);
642 	if (error) {
643 		errmsg = "writing feature-rx-notify";
644 		goto abort_transaction;
645 	}
646 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
647 	    "feature-ipv6-csum-offload", "%u", 1);
648 	if (error) {
649 		errmsg = "writing feature-ipv6-csum-offload";
650 		goto abort_transaction;
651 	}
652 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
653 	    "feature-sg", "%u", 1);
654 	if (error) {
655 		errmsg = "writing feature-sg";
656 		goto abort_transaction;
657 	}
658 	error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
659 	    "event-channel", "%u", sc->sc_evtchn);
660 	if (error) {
661 		errmsg = "writing event channel";
662 		goto abort_transaction;
663 	}
664 	error = xenbus_transaction_end(xbt, 0);
665 	if (error == EAGAIN)
666 		goto again;
667 	if (error) {
668 		xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
669 		return false;
670 	}
671 	mutex_enter(&sc->sc_rx_lock);
672 	xennet_alloc_rx_buffer(sc);
673 	mutex_exit(&sc->sc_rx_lock);
674 
675 	sc->sc_backend_status = BEST_CONNECTED;
676 
677 	return true;
678 
679 abort_transaction:
680 	xenbus_transaction_end(xbt, 1);
681 	xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
682 	return false;
683 }
684 
685 static bool
xennet_xenbus_suspend(device_t dev,const pmf_qual_t * qual)686 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual)
687 {
688 	struct xennet_xenbus_softc *sc = device_private(dev);
689 
690 	/*
691 	 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(),
692 	 * so we do not mask event channel here
693 	 */
694 
695 	mutex_enter(&sc->sc_tx_lock);
696 
697 	/* collect any outstanding TX responses */
698 	xennet_tx_complete(sc);
699 	while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
700 		kpause("xnsuspend", true, hz/2, &sc->sc_tx_lock);
701 		xennet_tx_complete(sc);
702 	}
703 	KASSERT(sc->sc_free_txreql == NET_RX_RING_SIZE);
704 	mutex_exit(&sc->sc_tx_lock);
705 
706 	/*
707 	 * dom0 may still use references to the grants we gave away
708 	 * earlier during RX buffers allocation. So we do not free RX buffers
709 	 * here, as dom0 does not expect the guest domain to suddenly revoke
710 	 * access to these grants.
711 	 */
712 	sc->sc_backend_status = BEST_SUSPENDED;
713 
714 	xenbus_device_suspend(sc->sc_xbusd);
715 	aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
716 
717 	return true;
718 }
719 
xennet_backend_changed(void * arg,XenbusState new_state)720 static void xennet_backend_changed(void *arg, XenbusState new_state)
721 {
722 	struct xennet_xenbus_softc *sc = device_private((device_t)arg);
723 	DPRINTF(("%s: new backend state %d\n",
724 	    device_xname(sc->sc_dev), new_state));
725 
726 	switch (new_state) {
727 	case XenbusStateInitialising:
728 	case XenbusStateInitialised:
729 	case XenbusStateConnected:
730 		break;
731 	case XenbusStateClosing:
732 		sc->sc_backend_status = BEST_CLOSED;
733 		xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
734 		break;
735 	case XenbusStateInitWait:
736 		if (sc->sc_backend_status == BEST_CONNECTED
737 		   || sc->sc_backend_status == BEST_SUSPENDED)
738 			break;
739 
740 		if (xennet_talk_to_backend(sc))
741 			xenbus_switch_state(sc->sc_xbusd, NULL,
742 			    XenbusStateConnected);
743 		break;
744 	case XenbusStateUnknown:
745 	default:
746 		panic("bad backend state %d", new_state);
747 	}
748 }
749 
750 /*
751  * Allocate RX buffers and put the associated request structures
752  * in the ring. This allows the backend to use them to communicate with
753  * frontend when some data is destined to frontend
754  */
755 static void
xennet_alloc_rx_buffer(struct xennet_xenbus_softc * sc)756 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
757 {
758 	RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
759 	RING_IDX i;
760 	netif_rx_request_t *rxreq;
761 	struct xennet_rxreq *req;
762 	int otherend_id, notify;
763 	struct mbuf *m;
764 	vaddr_t va;
765 	paddr_t pa, ma;
766 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
767 
768 	KASSERT(mutex_owned(&sc->sc_rx_lock));
769 
770 	otherend_id = sc->sc_xbusd->xbusd_otherend_id;
771 
772 	for (i = 0; sc->sc_free_rxreql != 0; i++) {
773 		req  = SLIST_FIRST(&sc->sc_rxreq_head);
774 		KASSERT(req != NULL);
775 		KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
776 		KASSERT(req->rxreq_m == NULL);
777 		KASSERT(req->rxreq_gntref == GRANT_INVALID_REF);
778 
779 		MGETHDR(m, M_DONTWAIT, MT_DATA);
780 		if (__predict_false(m == NULL)) {
781 			printf("%s: rx no mbuf\n", ifp->if_xname);
782 			break;
783 		}
784 
785 		va = (vaddr_t)pool_cache_get_paddr(
786 		    if_xennetrxbuf_cache, PR_NOWAIT, &pa);
787 		if (__predict_false(va == 0)) {
788 			printf("%s: rx no cluster\n", ifp->if_xname);
789 			m_freem(m);
790 			break;
791 		}
792 
793 		MEXTADD(m, va, PAGE_SIZE,
794 		    M_DEVBUF, xennet_rx_mbuf_free, NULL);
795 		m->m_len = m->m_pkthdr.len = PAGE_SIZE;
796 		m->m_ext.ext_paddr = pa;
797 		m->m_flags |= M_EXT_RW; /* we own the buffer */
798 
799 		/* Set M_EXT_CLUSTER so that load_mbuf uses m_ext.ext_paddr */
800 		m->m_flags |= M_EXT_CLUSTER;
801 		if (__predict_false(bus_dmamap_load_mbuf(
802 		    sc->sc_xbusd->xbusd_dmat,
803 		    req->rxreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
804 			printf("%s: rx mbuf load failed", ifp->if_xname);
805 			m->m_flags &= ~M_EXT_CLUSTER;
806 			m_freem(m);
807 			break;
808 		}
809 		m->m_flags &= ~M_EXT_CLUSTER;
810 
811 		KASSERT(req->rxreq_dmamap->dm_nsegs == 1);
812 		ma = req->rxreq_dmamap->dm_segs[0].ds_addr;
813 
814 		if (xengnt_grant_access(otherend_id, trunc_page(ma),
815 		    0, &req->rxreq_gntref) != 0) {
816 			m_freem(m);
817 			break;
818 		}
819 
820 		req->rxreq_m = m;
821 
822 		rxreq = RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i);
823 		rxreq->id = req->rxreq_id;
824 		rxreq->gref = req->rxreq_gntref;
825 
826 		SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
827 		sc->sc_free_rxreql--;
828 	}
829 
830 	/* Notify backend if more Rx is possible */
831 	if (i > 0) {
832 		sc->sc_rx_ring.req_prod_pvt = req_prod + i;
833 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
834 		if (notify)
835 			hypervisor_notify_via_evtchn(sc->sc_evtchn);
836 	}
837 }
838 
839 /*
840  * Reclaim all RX buffers used by the I/O ring between frontend and backend
841  */
842 static void
xennet_free_rx_buffer(struct xennet_xenbus_softc * sc,bool revoke)843 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc, bool revoke)
844 {
845 	RING_IDX i;
846 
847 	KASSERT(mutex_owned(&sc->sc_rx_lock));
848 
849 	DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
850 	/* get back memory from RX ring */
851 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
852 		struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
853 
854 		if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
855 			/*
856 			 * this req is still granted. Get back the page or
857 			 * allocate a new one, and remap it.
858 			 */
859 			SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
860 			    rxreq_next);
861 			sc->sc_free_rxreql++;
862 
863 			if (revoke)
864 				xengnt_revoke_access(rxreq->rxreq_gntref);
865 			rxreq->rxreq_gntref = GRANT_INVALID_REF;
866 		}
867 
868 		if (rxreq->rxreq_m != NULL) {
869 			m_freem(rxreq->rxreq_m);
870 			rxreq->rxreq_m = NULL;
871 		}
872 	}
873 	DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
874 }
875 
876 /*
877  * Clears a used RX request when its associated mbuf has been processed
878  */
879 static void
xennet_rx_mbuf_free(struct mbuf * m,void * buf,size_t size,void * arg)880 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
881 {
882 	KASSERT(buf == m->m_ext.ext_buf);
883 	KASSERT(arg == NULL);
884 	KASSERT(m != NULL);
885 	vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
886 	pool_cache_put_paddr(if_xennetrxbuf_cache,
887 	    (void *)va, m->m_ext.ext_paddr);
888 	pool_cache_put(mb_cache, m);
889 };
890 
891 static void
xennet_rx_free_req(struct xennet_xenbus_softc * sc,struct xennet_rxreq * req)892 xennet_rx_free_req(struct xennet_xenbus_softc *sc, struct xennet_rxreq *req)
893 {
894 	KASSERT(mutex_owned(&sc->sc_rx_lock));
895 
896 	/* puts back the RX request in the list of free RX requests */
897 	SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
898 	sc->sc_free_rxreql++;
899 
900 	/*
901 	 * ring needs more requests to be pushed in, allocate some
902 	 * RX buffers to catch-up with backend's consumption
903 	 */
904 	if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
905 	    __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
906 		xennet_alloc_rx_buffer(sc);
907 	}
908 }
909 
910 /*
911  * Process responses associated to the TX mbufs sent previously through
912  * xennet_start()
913  * Called at splsoftnet.
914  */
915 static void
xennet_tx_complete(struct xennet_xenbus_softc * sc)916 xennet_tx_complete(struct xennet_xenbus_softc *sc)
917 {
918 	struct xennet_txreq *req;
919 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
920 	RING_IDX resp_prod, i;
921 
922 	DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
923 	    sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
924 
925 	KASSERT(mutex_owned(&sc->sc_tx_lock));
926 again:
927 	resp_prod = sc->sc_tx_ring.sring->rsp_prod;
928 	xen_rmb();
929 	for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
930 		req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
931 		KASSERT(req->txreq_id ==
932 		    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
933 		KASSERT(xengnt_status(req->txreq_gntref) == 0);
934 		xengnt_revoke_access(req->txreq_gntref);
935 		req->txreq_gntref = GRANT_INVALID_REF;
936 
937 		/* Cleanup/statistics if this is the master req of a chain */
938 		if (req->txreq_m) {
939 			if (__predict_false(
940 			    RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
941 			    NETIF_RSP_OKAY))
942 				if_statinc(ifp, if_oerrors);
943 			else
944 				if_statinc(ifp, if_opackets);
945 			bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
946 			    req->txreq_dmamap);
947 			m_freem(req->txreq_m);
948 			req->txreq_m = NULL;
949 		}
950 
951 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
952 		sc->sc_free_txreql++;
953 	}
954 	sc->sc_tx_ring.rsp_cons = resp_prod;
955 	/* set new event and check for race with rsp_cons update */
956 	xen_wmb();
957 	sc->sc_tx_ring.sring->rsp_event =
958 	    resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
959 	xen_mb();
960 	if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
961 		goto again;
962 }
963 
964 /*
965  * Xennet event handler.
966  * Get outstanding responses of TX packets, then collect all responses of
967  * pending RX packets
968  * Called at splnet.
969  */
970 static int
xennet_handler(void * arg)971 xennet_handler(void *arg)
972 {
973 	struct xennet_xenbus_softc *sc = arg;
974 	struct ifnet *ifp = &sc->sc_ethercom.ec_if;
975 	RING_IDX resp_prod, i;
976 	struct xennet_rxreq *req;
977 	struct mbuf *m, *m0;
978 	int rxflags, m0_rxflags;
979 	int more_to_do;
980 
981 	if (sc->sc_backend_status != BEST_CONNECTED)
982 		return 1;
983 
984 	/* Poke Tx queue if we run out of Tx buffers earlier */
985 	if_schedule_deferred_start(ifp);
986 
987 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
988 
989 again:
990 	DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
991 	    sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
992 
993 	mutex_enter(&sc->sc_rx_lock);
994 	resp_prod = sc->sc_rx_ring.sring->rsp_prod;
995 	xen_rmb(); /* ensure we see replies up to resp_prod */
996 
997 	m0 = NULL;
998 	for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
999 		netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
1000 		req = &sc->sc_rxreqs[rx->id];
1001 		KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
1002 		KASSERT(req->rxreq_id == rx->id);
1003 
1004 		xengnt_revoke_access(req->rxreq_gntref);
1005 		req->rxreq_gntref = GRANT_INVALID_REF;
1006 
1007 		m = req->rxreq_m;
1008 		req->rxreq_m = NULL;
1009 
1010 		m->m_len = m->m_pkthdr.len = rx->status;
1011 		bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->rxreq_dmamap, 0,
1012 		     m->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1013 
1014 		if (m0 == NULL) {
1015 			MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
1016 			m_set_rcvif(m, ifp);
1017 		}
1018 
1019 		rxflags = rx->flags;
1020 
1021 		if (m0 || rxflags & NETRXF_more_data) {
1022 			/*
1023 			 * On Rx, every fragment (even first one) contain
1024 			 * just length of data in the fragment.
1025 			 */
1026 			if (m0 == NULL) {
1027 				m0 = m;
1028 				m0_rxflags = rxflags;
1029 			} else {
1030 				m_cat(m0, m);
1031 				m0->m_pkthdr.len += m->m_len;
1032 			}
1033 
1034 			if (rxflags & NETRXF_more_data) {
1035 				/* Still more fragments to receive */
1036 				xennet_rx_free_req(sc, req);
1037 				continue;
1038 			}
1039 
1040 			sc->sc_cnt_rx_frag.ev_count++;
1041 			m = m0;
1042 			m0 = NULL;
1043 			rxflags = m0_rxflags;
1044 		}
1045 
1046 		if (rxflags & NETRXF_csum_blank) {
1047 			xennet_checksum_fill(ifp, m, &sc->sc_cnt_rx_cksum_blank,
1048 			    &sc->sc_cnt_rx_cksum_undefer);
1049 		} else if (rxflags & NETRXF_data_validated)
1050 			m->m_pkthdr.csum_flags = XN_M_CSUM_SUPPORTED;
1051 
1052 		/* We'are done with req */
1053 		xennet_rx_free_req(sc, req);
1054 
1055 		/* Pass the packet up. */
1056 		if_percpuq_enqueue(ifp->if_percpuq, m);
1057 	}
1058 	/* If the queued Rx fragments did not finish the packet, drop it */
1059 	if (m0) {
1060 		if_statinc(ifp, if_iqdrops);
1061 		m_freem(m0);
1062 	}
1063 	sc->sc_rx_ring.rsp_cons = i;
1064 	xen_wmb();
1065 	RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
1066 	mutex_exit(&sc->sc_rx_lock);
1067 
1068 	if (more_to_do) {
1069 		DPRINTF(("%s: %s more_to_do\n", ifp->if_xname, __func__));
1070 		goto again;
1071 	}
1072 
1073 	return 1;
1074 }
1075 
1076 static bool
xennet_submit_tx_request(struct xennet_xenbus_softc * sc,struct mbuf * m,struct xennet_txreq * req0,int * req_prod)1077 xennet_submit_tx_request(struct xennet_xenbus_softc *sc, struct mbuf *m,
1078     struct xennet_txreq *req0, int *req_prod)
1079 {
1080 	struct xennet_txreq *req = req0;
1081 	netif_tx_request_t *txreq;
1082 	int i, prod = *req_prod;
1083 	const bool multiseg = (req0->txreq_dmamap->dm_nsegs > 1);
1084 	const int lastseg = req0->txreq_dmamap->dm_nsegs - 1;
1085 	bus_dma_segment_t *ds;
1086 	SLIST_HEAD(, xennet_txreq) txchain;
1087 
1088 	KASSERT(mutex_owned(&sc->sc_tx_lock));
1089 	KASSERT(req0->txreq_dmamap->dm_nsegs > 0);
1090 
1091 	bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->txreq_dmamap, 0,
1092 	     m->m_pkthdr.len, BUS_DMASYNC_POSTWRITE);
1093 	MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1094 	SLIST_INIT(&txchain);
1095 
1096 	for (i = 0; i < req0->txreq_dmamap->dm_nsegs; i++) {
1097 		KASSERT(req != NULL);
1098 
1099 		ds = &req0->txreq_dmamap->dm_segs[i];
1100 
1101 		if (__predict_false(xengnt_grant_access(
1102 		    sc->sc_xbusd->xbusd_otherend_id,
1103 		    trunc_page(ds->ds_addr),
1104 		    GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1105 			goto grant_fail;
1106 		}
1107 
1108 		KASSERT(SLIST_FIRST(&sc->sc_txreq_head) == req);
1109 		SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1110 		SLIST_INSERT_HEAD(&txchain, req, txreq_next);
1111 		sc->sc_free_txreql--;
1112 		req->txreq_m = (req == req0) ? m : NULL;
1113 
1114 		txreq = RING_GET_REQUEST(&sc->sc_tx_ring, prod + i);
1115 		txreq->id = req->txreq_id;
1116 		txreq->gref = req->txreq_gntref;
1117 		txreq->offset = ds->ds_addr & PAGE_MASK;
1118 		/* For Tx, first fragment size is always set to total size */
1119 		txreq->size = (i == 0) ? m->m_pkthdr.len : ds->ds_len;
1120 		txreq->flags = 0;
1121 		if (i == 0) {
1122 			if (m->m_pkthdr.csum_flags & XN_M_CSUM_SUPPORTED) {
1123 				txreq->flags |= NETTXF_csum_blank;
1124 			} else {
1125 #if 0
1126 				/*
1127 				 * XXX Checksum optimization disabled
1128 				 * to avoid port-xen/57743.
1129 				 */
1130 				txreq->flags |= NETTXF_data_validated;
1131 #endif
1132 			}
1133 		}
1134 		if (multiseg && i < lastseg)
1135 			txreq->flags |= NETTXF_more_data;
1136 
1137 		req = SLIST_FIRST(&sc->sc_txreq_head);
1138 	}
1139 
1140 	if (i > 1)
1141 		sc->sc_cnt_tx_frag.ev_count++;
1142 
1143 	/* All done */
1144 	*req_prod += i;
1145 	return true;
1146 
1147 grant_fail:
1148 	printf("%s: grant_access failed\n", device_xname(sc->sc_dev));
1149 	while (!SLIST_EMPTY(&txchain)) {
1150 		req = SLIST_FIRST(&txchain);
1151 		SLIST_REMOVE_HEAD(&txchain, txreq_next);
1152 		xengnt_revoke_access(req->txreq_gntref);
1153 		req->txreq_gntref = GRANT_INVALID_REF;
1154 		SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
1155 		sc->sc_free_txreql++;
1156 	}
1157 	req0->txreq_m = NULL;
1158 	return false;
1159 }
1160 
1161 /*
1162  * The output routine of a xennet interface. Prepares mbufs for TX,
1163  * and notify backend when finished.
1164  * Called at splsoftnet.
1165  */
1166 void
xennet_start(struct ifnet * ifp)1167 xennet_start(struct ifnet *ifp)
1168 {
1169 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1170 	struct mbuf *m;
1171 	RING_IDX req_prod;
1172 	struct xennet_txreq *req;
1173 	int notify;
1174 
1175 	mutex_enter(&sc->sc_tx_lock);
1176 
1177 	rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
1178 
1179 	xennet_tx_complete(sc);
1180 
1181 	req_prod = sc->sc_tx_ring.req_prod_pvt;
1182 	while (/*CONSTCOND*/1) {
1183 		req = SLIST_FIRST(&sc->sc_txreq_head);
1184 		if (__predict_false(req == NULL)) {
1185 			if (!IFQ_IS_EMPTY(&ifp->if_snd))
1186 				sc->sc_cnt_tx_queue_full.ev_count++;
1187 			break;
1188 		}
1189 		IFQ_DEQUEUE(&ifp->if_snd, m);
1190 		if (m == NULL)
1191 			break;
1192 
1193 		/*
1194 		 * For short packets it's always way faster passing
1195 		 * single defragmented packet, even with feature-sg.
1196 		 * Try to defragment first if the result is likely to fit
1197 		 * into a single mbuf.
1198 		 */
1199 		if (m->m_pkthdr.len < MCLBYTES && m->m_next)
1200 			(void)m_defrag(m, M_DONTWAIT);
1201 
1202 		/* Try to load the mbuf as-is, if that fails defrag */
1203 		if (__predict_false(bus_dmamap_load_mbuf(
1204 		    sc->sc_xbusd->xbusd_dmat,
1205 		    req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
1206 			sc->sc_cnt_tx_defrag.ev_count++;
1207 			if (__predict_false(m_defrag(m, M_DONTWAIT) == NULL)) {
1208 				DPRINTF(("%s: defrag failed\n",
1209 				    device_xname(sc->sc_dev)));
1210 				m_freem(m);
1211 				break;
1212 			}
1213 
1214 			if (__predict_false(bus_dmamap_load_mbuf(
1215 			    sc->sc_xbusd->xbusd_dmat,
1216 			    req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
1217 				printf("%s: cannot load new mbuf len %d\n",
1218 				    device_xname(sc->sc_dev),
1219 				    m->m_pkthdr.len);
1220 				m_freem(m);
1221 				break;
1222 			}
1223 		}
1224 
1225 		if (req->txreq_dmamap->dm_nsegs > sc->sc_free_txreql) {
1226 			/* Not enough slots right now, postpone */
1227 			sc->sc_cnt_tx_queue_full.ev_count++;
1228 			sc->sc_cnt_tx_drop.ev_count++;
1229 			bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
1230 			    req->txreq_dmamap);
1231 			m_freem(m);
1232 			break;
1233 		}
1234 
1235 		DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1236 		    "mbuf %p, buf %p, size %d\n",
1237 		    req->txreq_id, m, mtod(m, void *), m->m_pkthdr.len));
1238 
1239 #ifdef XENNET_DEBUG_DUMP
1240 		xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1241 		    req->txreq_id);
1242 #endif
1243 
1244 		if (!xennet_submit_tx_request(sc, m, req, &req_prod)) {
1245 			/* Grant failed, postpone */
1246 			sc->sc_cnt_tx_drop.ev_count++;
1247 			bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
1248 			    req->txreq_dmamap);
1249 			m_freem(m);
1250 			break;
1251 		}
1252 
1253 		/*
1254 		 * Pass packet to bpf if there is a listener.
1255 		 */
1256 		bpf_mtap(ifp, m, BPF_D_OUT);
1257 	}
1258 
1259 	sc->sc_tx_ring.req_prod_pvt = req_prod;
1260 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1261 	if (notify)
1262 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1263 
1264 	mutex_exit(&sc->sc_tx_lock);
1265 
1266 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1267 	    device_xname(sc->sc_dev)));
1268 }
1269 
1270 int
xennet_ioctl(struct ifnet * ifp,u_long cmd,void * data)1271 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1272 {
1273 #ifdef XENNET_DEBUG
1274 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1275 #endif
1276 	int error = 0;
1277 
1278 #ifdef NET_MPSAFE
1279 #ifdef notyet
1280 	/* XXX IFNET_LOCK() is not taken in some cases e.g. multicast ioctls */
1281 	KASSERT(IFNET_LOCKED(ifp));
1282 #endif
1283 #endif
1284 	int s = splnet();
1285 
1286 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1287 	    device_xname(sc->sc_dev)));
1288 	error = ether_ioctl(ifp, cmd, data);
1289 	if (error == ENETRESET)
1290 		error = 0;
1291 
1292 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1293 	    device_xname(sc->sc_dev), error));
1294 
1295 	splx(s);
1296 
1297 	return error;
1298 }
1299 
1300 int
xennet_init(struct ifnet * ifp)1301 xennet_init(struct ifnet *ifp)
1302 {
1303 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1304 
1305 	KASSERT(IFNET_LOCKED(ifp));
1306 
1307 	DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1308 	    device_xname(sc->sc_dev)));
1309 
1310 	if ((ifp->if_flags & IFF_RUNNING) == 0) {
1311 		mutex_enter(&sc->sc_rx_lock);
1312 		sc->sc_rx_ring.sring->rsp_event =
1313 		    sc->sc_rx_ring.rsp_cons + 1;
1314 		mutex_exit(&sc->sc_rx_lock);
1315 		hypervisor_unmask_event(sc->sc_evtchn);
1316 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
1317 	}
1318 	ifp->if_flags |= IFF_RUNNING;
1319 
1320 	return 0;
1321 }
1322 
1323 void
xennet_stop(struct ifnet * ifp,int disable)1324 xennet_stop(struct ifnet *ifp, int disable)
1325 {
1326 	struct xennet_xenbus_softc *sc = ifp->if_softc;
1327 
1328 	KASSERT(IFNET_LOCKED(ifp));
1329 
1330 	ifp->if_flags &= ~IFF_RUNNING;
1331 	hypervisor_mask_event(sc->sc_evtchn);
1332 }
1333 
1334 #if defined(NFS_BOOT_BOOTSTATIC)
1335 int
xennet_bootstatic_callback(struct nfs_diskless * nd)1336 xennet_bootstatic_callback(struct nfs_diskless *nd)
1337 {
1338 #if 0
1339 	struct ifnet *ifp = nd->nd_ifp;
1340 	struct xennet_xenbus_softc *sc =
1341 	    (struct xennet_xenbus_softc *)ifp->if_softc;
1342 #endif
1343 	int flags = 0;
1344 	union xen_cmdline_parseinfo xcp;
1345 	struct sockaddr_in *sin;
1346 
1347 	memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1348 	xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1349 	xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1350 	xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1351 
1352 	if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1353 		flags |= NFS_BOOT_HAS_SERVER;
1354 		if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1355 			flags |= NFS_BOOT_HAS_ROOTPATH;
1356 	}
1357 
1358 	nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1359 	nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1360 	nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1361 
1362 	sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1363 	memset((void *)sin, 0, sizeof(*sin));
1364 	sin->sin_len = sizeof(*sin);
1365 	sin->sin_family = AF_INET;
1366 	sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1367 
1368 	if (nd->nd_myip.s_addr)
1369 		flags |= NFS_BOOT_HAS_MYIP;
1370 	if (nd->nd_gwip.s_addr)
1371 		flags |= NFS_BOOT_HAS_GWIP;
1372 	if (nd->nd_mask.s_addr)
1373 		flags |= NFS_BOOT_HAS_MASK;
1374 	if (sin->sin_addr.s_addr)
1375 		flags |= NFS_BOOT_HAS_SERVADDR;
1376 
1377 	return flags;
1378 }
1379 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
1380 
1381 #ifdef XENNET_DEBUG_DUMP
1382 #define XCHR(x) hexdigits[(x) & 0xf]
1383 static void
xennet_hex_dump(const unsigned char * pkt,size_t len,const char * type,int id)1384 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1385 {
1386 	size_t i, j;
1387 
1388 	printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
1389 	printf("00000000  ");
1390 	for(i=0; i<len; i++) {
1391 		printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1392 		if ((i+1) % 16 == 8)
1393 			printf(" ");
1394 		if ((i+1) % 16 == 0) {
1395 			printf(" %c", '|');
1396 			for(j=0; j<16; j++)
1397 				printf("%c", pkt[i-15+j]>=32 &&
1398 				    pkt[i-15+j]<127?pkt[i-15+j]:'.');
1399 			printf("%c\n%c%c%c%c%c%c%c%c  ", '|',
1400 			    XCHR((i+1)>>28), XCHR((i+1)>>24),
1401 			    XCHR((i+1)>>20), XCHR((i+1)>>16),
1402 			    XCHR((i+1)>>12), XCHR((i+1)>>8),
1403 			    XCHR((i+1)>>4), XCHR(i+1));
1404 		}
1405 	}
1406 	printf("\n");
1407 }
1408 #undef XCHR
1409 #endif
1410