xref: /netbsd-src/sys/arch/xen/xen/xennetback_xenbus.c (revision 481d3881954fd794ca5f2d880b68c53a5db8620e)
1 /*      $NetBSD: xennetback_xenbus.c,v 1.126 2024/07/05 04:31:50 rin Exp $      */
2 
3 /*
4  * Copyright (c) 2006 Manuel Bouyer.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.126 2024/07/05 04:31:50 rin Exp $");
29 
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kmem.h>
34 #include <sys/queue.h>
35 #include <sys/kernel.h>
36 #include <sys/mbuf.h>
37 #include <sys/protosw.h>
38 #include <sys/socket.h>
39 #include <sys/ioctl.h>
40 #include <sys/errno.h>
41 #include <sys/device.h>
42 
43 #include <net/if.h>
44 #include <net/if_types.h>
45 #include <net/if_dl.h>
46 #include <net/route.h>
47 #include <net/bpf.h>
48 
49 #include <net/if_ether.h>
50 
51 #include <xen/intr.h>
52 #include <xen/hypervisor.h>
53 #include <xen/xen.h>
54 #include <xen/xen_shm.h>
55 #include <xen/evtchn.h>
56 #include <xen/xenbus.h>
57 #include <xen/xennet_checksum.h>
58 
59 #include <uvm/uvm.h>
60 
61 /*
62  * Backend network device driver for Xen.
63  */
64 
65 #ifdef XENDEBUG_NET
66 #define XENPRINTF(x) printf x
67 #else
68 #define XENPRINTF(x)
69 #endif
70 
71 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
72 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
73 
74 /*
75  * Number of packets to transmit in one hypercall (= number of pages to
76  * transmit at once).
77  */
78 #define NB_XMIT_PAGES_BATCH 64
79 CTASSERT(NB_XMIT_PAGES_BATCH >= XEN_NETIF_NR_SLOTS_MIN);
80 
81 /* ratecheck(9) for pool allocation failures */
82 static const struct timeval xni_pool_errintvl = { 30, 0 };  /* 30s, each */
83 
84 /* state of a xnetback instance */
85 typedef enum {
86 	CONNECTED,
87 	DISCONNECTING,
88 	DISCONNECTED
89 } xnetback_state_t;
90 
91 struct xnetback_xstate {
92 	bus_dmamap_t xs_dmamap;
93 	bool xs_loaded;
94 	struct mbuf *xs_m;
95 	struct netif_tx_request xs_tx;
96 	uint16_t xs_tx_size;		/* Size of data in this Tx fragment */
97 };
98 
99 /* we keep the xnetback instances in a linked list */
100 struct xnetback_instance {
101 	SLIST_ENTRY(xnetback_instance) next;
102 	struct xenbus_device *xni_xbusd; /* our xenstore entry */
103 	domid_t xni_domid;		/* attached to this domain */
104 	uint32_t xni_handle;	/* domain-specific handle */
105 	xnetback_state_t xni_status;
106 
107 	/* network interface stuff */
108 	struct ethercom xni_ec;
109 	struct callout xni_restart;
110 	uint8_t xni_enaddr[ETHER_ADDR_LEN];
111 
112 	/* remote domain communication stuff */
113 	unsigned int xni_evtchn; /* our event channel */
114 	struct intrhand *xni_ih;
115 	netif_tx_back_ring_t xni_txring;
116 	netif_rx_back_ring_t xni_rxring;
117 	grant_handle_t xni_tx_ring_handle; /* to unmap the ring */
118 	grant_handle_t xni_rx_ring_handle;
119 	vaddr_t xni_tx_ring_va; /* to unmap the ring */
120 	vaddr_t xni_rx_ring_va;
121 
122 	/* arrays used in xennetback_ifstart(), used for both Rx and Tx */
123 	gnttab_copy_t     	xni_gop_copy[NB_XMIT_PAGES_BATCH];
124 	struct xnetback_xstate	xni_xstate[NB_XMIT_PAGES_BATCH];
125 
126 	/* event counters */
127 	struct evcnt xni_cnt_rx_cksum_blank;
128 	struct evcnt xni_cnt_rx_cksum_undefer;
129 };
130 #define xni_if    xni_ec.ec_if
131 #define xni_bpf   xni_if.if_bpf
132 
133        void xvifattach(int);
134 static int  xennetback_ifioctl(struct ifnet *, u_long, void *);
135 static void xennetback_ifstart(struct ifnet *);
136 static void xennetback_ifsoftstart_copy(struct xnetback_instance *);
137 static void xennetback_ifwatchdog(struct ifnet *);
138 static int  xennetback_ifinit(struct ifnet *);
139 static void xennetback_ifstop(struct ifnet *, int);
140 
141 static int  xennetback_xenbus_create(struct xenbus_device *);
142 static int  xennetback_xenbus_destroy(void *);
143 static void xennetback_frontend_changed(void *, XenbusState);
144 
145 static inline void xennetback_tx_response(struct xnetback_instance *,
146     int, int);
147 
148 static SLIST_HEAD(, xnetback_instance) xnetback_instances;
149 static kmutex_t xnetback_lock;
150 
151 static bool xnetif_lookup(domid_t, uint32_t);
152 static int  xennetback_evthandler(void *);
153 
154 static struct xenbus_backend_driver xvif_backend_driver = {
155 	.xbakd_create = xennetback_xenbus_create,
156 	.xbakd_type = "vif"
157 };
158 
159 void
xvifattach(int n)160 xvifattach(int n)
161 {
162 	XENPRINTF(("xennetback_init\n"));
163 
164 	SLIST_INIT(&xnetback_instances);
165 	mutex_init(&xnetback_lock, MUTEX_DEFAULT, IPL_NONE);
166 
167 	xenbus_backend_register(&xvif_backend_driver);
168 }
169 
170 static int
xennetback_xenbus_create(struct xenbus_device * xbusd)171 xennetback_xenbus_create(struct xenbus_device *xbusd)
172 {
173 	struct xnetback_instance *xneti;
174 	long domid, handle;
175 	struct ifnet *ifp;
176 	extern int ifqmaxlen; /* XXX */
177 	char *e, *p;
178 	char mac[32];
179 	int i, err;
180 	struct xenbus_transaction *xbt;
181 
182 	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
183 	    "frontend-id", &domid, 10)) != 0) {
184 		aprint_error("xvif: can't read %s/frontend-id: %d\n",
185 		    xbusd->xbusd_path, err);
186 		return err;
187 	}
188 	if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
189 	    "handle", &handle, 10)) != 0) {
190 		aprint_error("xvif: can't read %s/handle: %d\n",
191 		    xbusd->xbusd_path, err);
192 		return err;
193 	}
194 
195 	xneti = kmem_zalloc(sizeof(*xneti), KM_SLEEP);
196 	xneti->xni_domid = domid;
197 	xneti->xni_handle = handle;
198 	xneti->xni_status = DISCONNECTED;
199 
200 	/* Need to keep the lock for lookup and the list update */
201 	mutex_enter(&xnetback_lock);
202 	if (xnetif_lookup(domid, handle)) {
203 		mutex_exit(&xnetback_lock);
204 		kmem_free(xneti, sizeof(*xneti));
205 		return EEXIST;
206 	}
207 	SLIST_INSERT_HEAD(&xnetback_instances, xneti, next);
208 	mutex_exit(&xnetback_lock);
209 
210 	xbusd->xbusd_u.b.b_cookie = xneti;
211 	xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy;
212 	xneti->xni_xbusd = xbusd;
213 
214 	ifp = &xneti->xni_if;
215 	ifp->if_softc = xneti;
216 	snprintf(ifp->if_xname, IFNAMSIZ, "xvif%di%d",
217 	    (int)domid, (int)handle);
218 
219 	/* read mac address */
220 	err = xenbus_read(NULL, xbusd->xbusd_path, "mac", mac, sizeof(mac));
221 	if (err) {
222 		aprint_error_ifnet(ifp, "can't read %s/mac: %d\n",
223 		    xbusd->xbusd_path, err);
224 		goto fail;
225 	}
226 	for (i = 0, p = mac; i < ETHER_ADDR_LEN; i++) {
227 		xneti->xni_enaddr[i] = strtoul(p, &e, 16);
228 		if ((e[0] == '\0' && i != 5) && e[0] != ':') {
229 			aprint_error_ifnet(ifp,
230 			    "%s is not a valid mac address\n", mac);
231 			err = EINVAL;
232 			goto fail;
233 		}
234 		p = &e[1];
235 	}
236 
237 	/* we can't use the same MAC addr as our guest */
238 	xneti->xni_enaddr[3]++;
239 
240 	/* Initialize DMA map, used only for loading PA */
241 	for (i = 0; i < __arraycount(xneti->xni_xstate); i++) {
242 		if (bus_dmamap_create(xneti->xni_xbusd->xbusd_dmat,
243 		    ETHER_MAX_LEN_JUMBO, XEN_NETIF_NR_SLOTS_MIN,
244 		    PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
245 		    &xneti->xni_xstate[i].xs_dmamap)
246 		    != 0) {
247 			aprint_error_ifnet(ifp,
248 			    "failed to allocate dma map\n");
249 			err = ENOMEM;
250 			goto fail;
251 		}
252 	}
253 
254 	evcnt_attach_dynamic(&xneti->xni_cnt_rx_cksum_blank, EVCNT_TYPE_MISC,
255 	    NULL, ifp->if_xname, "Rx csum blank");
256 	evcnt_attach_dynamic(&xneti->xni_cnt_rx_cksum_undefer, EVCNT_TYPE_MISC,
257 	    NULL, ifp->if_xname, "Rx csum undeferred");
258 
259 	/* create pseudo-interface */
260 	aprint_verbose_ifnet(ifp, "Ethernet address %s\n",
261 	    ether_sprintf(xneti->xni_enaddr));
262 	xneti->xni_ec.ec_capabilities |= ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU;
263 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
264 	ifp->if_snd.ifq_maxlen =
265 	    uimax(ifqmaxlen, NET_TX_RING_SIZE * 2);
266 	ifp->if_capabilities =
267 		IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx
268 		| IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx
269 		| IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx
270 		| IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx;
271 #define XN_M_CSUM_SUPPORTED						\
272 	(M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_TCPv6 | M_CSUM_UDPv6)
273 
274 	ifp->if_ioctl = xennetback_ifioctl;
275 	ifp->if_start = xennetback_ifstart;
276 	ifp->if_watchdog = xennetback_ifwatchdog;
277 	ifp->if_init = xennetback_ifinit;
278 	ifp->if_stop = xennetback_ifstop;
279 	ifp->if_timer = 0;
280 	IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN));
281 	IFQ_SET_READY(&ifp->if_snd);
282 	if_attach(ifp);
283 	if_deferred_start_init(ifp, NULL);
284 	ether_ifattach(&xneti->xni_if, xneti->xni_enaddr);
285 
286 	xbusd->xbusd_otherend_changed = xennetback_frontend_changed;
287 
288 	do {
289 		xbt = xenbus_transaction_start();
290 		if (xbt == NULL) {
291 			aprint_error_ifnet(ifp,
292 			    "%s: can't start transaction\n",
293 			    xbusd->xbusd_path);
294 			goto fail;
295 		}
296 		err = xenbus_printf(xbt, xbusd->xbusd_path,
297 		    "vifname", "%s", ifp->if_xname);
298 		if (err) {
299 			aprint_error_ifnet(ifp,
300 			    "failed to write %s/vifname: %d\n",
301 			    xbusd->xbusd_path, err);
302 			goto abort_xbt;
303 		}
304 		err = xenbus_printf(xbt, xbusd->xbusd_path,
305 		    "feature-rx-copy", "%d", 1);
306 		if (err) {
307 			aprint_error_ifnet(ifp,
308 			    "failed to write %s/feature-rx-copy: %d\n",
309 			    xbusd->xbusd_path, err);
310 			goto abort_xbt;
311 		}
312 		err = xenbus_printf(xbt, xbusd->xbusd_path,
313 		    "feature-ipv6-csum-offload", "%d", 1);
314 		if (err) {
315 			aprint_error_ifnet(ifp,
316 			    "failed to write %s/feature-ipv6-csum-offload: %d\n",
317 			    xbusd->xbusd_path, err);
318 			goto abort_xbt;
319 		}
320 		err = xenbus_printf(xbt, xbusd->xbusd_path,
321 		    "feature-sg", "%d", 1);
322 		if (err) {
323 			aprint_error_ifnet(ifp,
324 			    "failed to write %s/feature-sg: %d\n",
325 			    xbusd->xbusd_path, err);
326 			goto abort_xbt;
327 		}
328 	} while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN);
329 	if (err) {
330 		aprint_error_ifnet(ifp,
331 		    "%s: can't end transaction: %d\n",
332 		    xbusd->xbusd_path, err);
333 	}
334 
335 	err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
336 	if (err) {
337 		aprint_error_ifnet(ifp,
338 		    "failed to switch state on %s: %d\n",
339 		    xbusd->xbusd_path, err);
340 		goto fail;
341 	}
342 	return 0;
343 
344 abort_xbt:
345 	xenbus_transaction_end(xbt, 1);
346 fail:
347 	kmem_free(xneti, sizeof(*xneti));
348 	return err;
349 }
350 
351 int
xennetback_xenbus_destroy(void * arg)352 xennetback_xenbus_destroy(void *arg)
353 {
354 	struct xnetback_instance *xneti = arg;
355 
356 	aprint_verbose_ifnet(&xneti->xni_if, "disconnecting\n");
357 
358 	if (xneti->xni_ih != NULL) {
359 		hypervisor_mask_event(xneti->xni_evtchn);
360 		xen_intr_disestablish(xneti->xni_ih);
361 		xneti->xni_ih = NULL;
362 	}
363 
364 	mutex_enter(&xnetback_lock);
365 	SLIST_REMOVE(&xnetback_instances,
366 	    xneti, xnetback_instance, next);
367 	mutex_exit(&xnetback_lock);
368 
369 	ether_ifdetach(&xneti->xni_if);
370 	if_detach(&xneti->xni_if);
371 
372 	evcnt_detach(&xneti->xni_cnt_rx_cksum_blank);
373 	evcnt_detach(&xneti->xni_cnt_rx_cksum_undefer);
374 
375 	/* Destroy DMA maps */
376 	for (int i = 0; i < __arraycount(xneti->xni_xstate); i++) {
377 		if (xneti->xni_xstate[i].xs_dmamap != NULL) {
378 			bus_dmamap_destroy(xneti->xni_xbusd->xbusd_dmat,
379 			    xneti->xni_xstate[i].xs_dmamap);
380 			xneti->xni_xstate[i].xs_dmamap = NULL;
381 		}
382 	}
383 
384 	if (xneti->xni_txring.sring) {
385 		xen_shm_unmap(xneti->xni_tx_ring_va, 1,
386 		    &xneti->xni_tx_ring_handle);
387 	}
388 	if (xneti->xni_rxring.sring) {
389 		xen_shm_unmap(xneti->xni_rx_ring_va, 1,
390 		    &xneti->xni_rx_ring_handle);
391 	}
392 	if (xneti->xni_tx_ring_va != 0) {
393 		uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
394 		    PAGE_SIZE, UVM_KMF_VAONLY);
395 		xneti->xni_tx_ring_va = 0;
396 	}
397 	if (xneti->xni_rx_ring_va != 0) {
398 		uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
399 		    PAGE_SIZE, UVM_KMF_VAONLY);
400 		xneti->xni_rx_ring_va = 0;
401 	}
402 	kmem_free(xneti, sizeof(*xneti));
403 	return 0;
404 }
405 
406 static int
xennetback_connect(struct xnetback_instance * xneti)407 xennetback_connect(struct xnetback_instance *xneti)
408 {
409 	int err;
410 	netif_tx_sring_t *tx_ring;
411 	netif_rx_sring_t *rx_ring;
412 	evtchn_op_t evop;
413 	u_long tx_ring_ref, rx_ring_ref;
414 	grant_ref_t gtx_ring_ref, grx_ring_ref;
415 	u_long revtchn, rx_copy;
416 	struct xenbus_device *xbusd = xneti->xni_xbusd;
417 
418 	/* read communication information */
419 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
420 	    "tx-ring-ref", &tx_ring_ref, 10);
421 	if (err) {
422 		xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref",
423 		    xbusd->xbusd_otherend);
424 		return -1;
425 	}
426 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
427 	    "rx-ring-ref", &rx_ring_ref, 10);
428 	if (err) {
429 		xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref",
430 		    xbusd->xbusd_otherend);
431 		return -1;
432 	}
433 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
434 	    "event-channel", &revtchn, 10);
435 	if (err) {
436 		xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
437 		    xbusd->xbusd_otherend);
438 		return -1;
439 	}
440 	err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
441 	    "request-rx-copy", &rx_copy, 10);
442 	if (err == ENOENT || !rx_copy) {
443 		xenbus_dev_fatal(xbusd, err,
444 		    "%s/request-rx-copy not supported by frontend",
445 		    xbusd->xbusd_otherend);
446 		return -1;
447 	} else if (err) {
448 		xenbus_dev_fatal(xbusd, err, "reading %s/request-rx-copy",
449 		    xbusd->xbusd_otherend);
450 		return -1;
451 	}
452 
453 	/* allocate VA space and map rings */
454 	xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
455 	    UVM_KMF_VAONLY);
456 	if (xneti->xni_tx_ring_va == 0) {
457 		xenbus_dev_fatal(xbusd, ENOMEM,
458 		    "can't get VA for TX ring", xbusd->xbusd_otherend);
459 		goto err1;
460 	}
461 	tx_ring = (void *)xneti->xni_tx_ring_va;
462 
463 	xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
464 	    UVM_KMF_VAONLY);
465 	if (xneti->xni_rx_ring_va == 0) {
466 		xenbus_dev_fatal(xbusd, ENOMEM,
467 		    "can't get VA for RX ring", xbusd->xbusd_otherend);
468 		goto err1;
469 	}
470 	rx_ring = (void *)xneti->xni_rx_ring_va;
471 
472 	gtx_ring_ref = tx_ring_ref;
473         if (xen_shm_map(1, xneti->xni_domid, &gtx_ring_ref,
474 	    xneti->xni_tx_ring_va, &xneti->xni_tx_ring_handle, 0) != 0) {
475 		aprint_error_ifnet(&xneti->xni_if,
476 		    "can't map TX grant ref\n");
477 		goto err2;
478 	}
479 	BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE);
480 
481 	grx_ring_ref = rx_ring_ref;
482         if (xen_shm_map(1, xneti->xni_domid, &grx_ring_ref,
483 	    xneti->xni_rx_ring_va, &xneti->xni_rx_ring_handle, 0) != 0) {
484 		aprint_error_ifnet(&xneti->xni_if,
485 		    "can't map RX grant ref\n");
486 		goto err2;
487 	}
488 	BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE);
489 
490 	evop.cmd = EVTCHNOP_bind_interdomain;
491 	evop.u.bind_interdomain.remote_dom = xneti->xni_domid;
492 	evop.u.bind_interdomain.remote_port = revtchn;
493 	err = HYPERVISOR_event_channel_op(&evop);
494 	if (err) {
495 		aprint_error_ifnet(&xneti->xni_if,
496 		    "can't get event channel: %d\n", err);
497 		goto err2;
498 	}
499 	xneti->xni_evtchn = evop.u.bind_interdomain.local_port;
500 	xneti->xni_status = CONNECTED;
501 
502 	xneti->xni_ih = xen_intr_establish_xname(-1, &xen_pic,
503 	    xneti->xni_evtchn, IST_LEVEL, IPL_NET, xennetback_evthandler,
504 	    xneti, false, xneti->xni_if.if_xname);
505 	KASSERT(xneti->xni_ih != NULL);
506 	xennetback_ifinit(&xneti->xni_if);
507 	hypervisor_unmask_event(xneti->xni_evtchn);
508 	hypervisor_notify_via_evtchn(xneti->xni_evtchn);
509 	return 0;
510 
511 err2:
512 	/* unmap rings */
513 	if (xneti->xni_tx_ring_handle != 0) {
514 		xen_shm_unmap(xneti->xni_tx_ring_va, 1,
515 		    &xneti->xni_tx_ring_handle);
516 	}
517 
518 	if (xneti->xni_rx_ring_handle != 0) {
519 		xen_shm_unmap(xneti->xni_rx_ring_va, 1,
520 		    &xneti->xni_rx_ring_handle);
521 	}
522 err1:
523 	/* free rings VA space */
524 	if (xneti->xni_rx_ring_va != 0)
525 		uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
526 		    PAGE_SIZE, UVM_KMF_VAONLY);
527 
528 	if (xneti->xni_tx_ring_va != 0)
529 		uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
530 		    PAGE_SIZE, UVM_KMF_VAONLY);
531 
532 	return -1;
533 
534 }
535 
536 static void
xennetback_frontend_changed(void * arg,XenbusState new_state)537 xennetback_frontend_changed(void *arg, XenbusState new_state)
538 {
539 	struct xnetback_instance *xneti = arg;
540 	struct xenbus_device *xbusd = xneti->xni_xbusd;
541 
542 	XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state));
543 	switch(new_state) {
544 	case XenbusStateInitialising:
545 	case XenbusStateInitialised:
546 		break;
547 
548 	case XenbusStateConnected:
549 		if (xneti->xni_status == CONNECTED)
550 			break;
551 		if (xennetback_connect(xneti) == 0)
552 			xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
553 		break;
554 
555 	case XenbusStateClosing:
556 		xneti->xni_status = DISCONNECTING;
557 		xneti->xni_if.if_flags &= ~IFF_RUNNING;
558 		xneti->xni_if.if_timer = 0;
559 		xenbus_switch_state(xbusd, NULL, XenbusStateClosing);
560 		break;
561 
562 	case XenbusStateClosed:
563 		/* otherend_changed() should handle it for us */
564 		panic("xennetback_frontend_changed: closed\n");
565 	case XenbusStateUnknown:
566 	case XenbusStateInitWait:
567 	default:
568 		aprint_error("%s: invalid frontend state %d\n",
569 		    xneti->xni_if.if_xname, new_state);
570 		break;
571 	}
572 	return;
573 
574 }
575 
576 /* lookup a xneti based on domain id and interface handle */
577 static bool
xnetif_lookup(domid_t dom,uint32_t handle)578 xnetif_lookup(domid_t dom , uint32_t handle)
579 {
580 	struct xnetback_instance *xneti;
581 	bool found = false;
582 
583 	KASSERT(mutex_owned(&xnetback_lock));
584 
585 	SLIST_FOREACH(xneti, &xnetback_instances, next) {
586 		if (xneti->xni_domid == dom && xneti->xni_handle == handle) {
587 			found = true;
588 			break;
589 		}
590 	}
591 
592 	return found;
593 }
594 
595 static inline void
xennetback_tx_response(struct xnetback_instance * xneti,int id,int status)596 xennetback_tx_response(struct xnetback_instance *xneti, int id, int status)
597 {
598 	RING_IDX resp_prod;
599 	netif_tx_response_t *txresp;
600 	int do_event;
601 
602 	resp_prod = xneti->xni_txring.rsp_prod_pvt;
603 	txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod);
604 
605 	txresp->id = id;
606 	txresp->status = status;
607 	xneti->xni_txring.rsp_prod_pvt++;
608 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event);
609 	if (do_event) {
610 		XENPRINTF(("%s send event\n", xneti->xni_if.if_xname));
611 		hypervisor_notify_via_evtchn(xneti->xni_evtchn);
612 	}
613 }
614 
615 static const char *
xennetback_tx_check_packet(const netif_tx_request_t * txreq,bool first)616 xennetback_tx_check_packet(const netif_tx_request_t *txreq, bool first)
617 {
618 	if (__predict_false((txreq->flags & NETTXF_more_data) == 0 &&
619 	    txreq->offset + txreq->size > PAGE_SIZE))
620 		return "crossing page boundary";
621 
622 	if (__predict_false(txreq->size > ETHER_MAX_LEN_JUMBO))
623 		return "bigger then jumbo";
624 
625 	if (first &&
626 	    __predict_false(txreq->size < ETHER_HDR_LEN))
627 		return "too short";
628 
629 	return NULL;
630 }
631 
632 static int
xennetback_copy(struct ifnet * ifp,gnttab_copy_t * gop,int copycnt,const char * dir)633 xennetback_copy(struct ifnet *ifp, gnttab_copy_t *gop, int copycnt,
634     const char *dir)
635 {
636 	/*
637 	 * Copy the data and ack it. Delaying it until the mbuf is
638 	 * freed will stall transmit.
639 	 */
640 	if (HYPERVISOR_grant_table_op(GNTTABOP_copy, gop, copycnt) != 0) {
641 		printf("%s: GNTTABOP_copy %s failed", ifp->if_xname, dir);
642 		return EINVAL;
643 	}
644 
645 	for (int i = 0; i < copycnt; i++) {
646 		if (gop->status != GNTST_okay) {
647 			printf("%s GNTTABOP_copy[%d] %s %d\n",
648 			    ifp->if_xname, i, dir, gop->status);
649 			return EINVAL;
650 		}
651 	}
652 
653 	return 0;
654 }
655 
656 static void
xennetback_tx_copy_abort(struct ifnet * ifp,struct xnetback_instance * xneti,int queued)657 xennetback_tx_copy_abort(struct ifnet *ifp, struct xnetback_instance *xneti,
658 	int queued)
659 {
660 	struct xnetback_xstate *xst;
661 
662 	for (int i = 0; i < queued; i++) {
663 		xst = &xneti->xni_xstate[i];
664 
665 		if (xst->xs_loaded) {
666 			KASSERT(xst->xs_m != NULL);
667 			bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
668 			    xst->xs_dmamap);
669 			xst->xs_loaded = false;
670 			m_freem(xst->xs_m);
671 		}
672 
673 		xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_ERROR);
674 		if_statinc(ifp, if_ierrors);
675 	}
676 }
677 
678 static void
xennetback_tx_copy_process(struct ifnet * ifp,struct xnetback_instance * xneti,int queued)679 xennetback_tx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti,
680 	int queued)
681 {
682 	gnttab_copy_t *gop;
683 	struct xnetback_xstate *xst;
684 	int copycnt = 0, seg = 0;
685 	size_t goff = 0, segoff = 0, gsize, take;
686 	bus_dmamap_t dm = NULL;
687 	paddr_t ma;
688 
689 	for (int i = 0; i < queued; i++) {
690 		xst = &xneti->xni_xstate[i];
691 
692 		if (xst->xs_m != NULL) {
693 			KASSERT(xst->xs_m->m_pkthdr.len == xst->xs_tx.size);
694 			if (__predict_false(bus_dmamap_load_mbuf(
695 			    xneti->xni_xbusd->xbusd_dmat,
696 			    xst->xs_dmamap, xst->xs_m, BUS_DMA_NOWAIT) != 0))
697 				goto abort;
698 			xst->xs_loaded = true;
699 			dm = xst->xs_dmamap;
700 			seg = 0;
701 			goff = segoff = 0;
702 		}
703 
704 		gsize = xst->xs_tx_size;
705 		goff = 0;
706 		for (; seg < dm->dm_nsegs && gsize > 0; seg++) {
707 			bus_dma_segment_t *ds = &dm->dm_segs[seg];
708 			ma = ds->ds_addr;
709 			take = uimin(gsize, ds->ds_len);
710 
711 			KASSERT(copycnt <= NB_XMIT_PAGES_BATCH);
712 			if (copycnt == NB_XMIT_PAGES_BATCH) {
713 				if (xennetback_copy(ifp, xneti->xni_gop_copy,
714 				    copycnt, "Tx") != 0)
715 					goto abort;
716 				copycnt = 0;
717 			}
718 
719 			/* Queue for the copy */
720 			gop = &xneti->xni_gop_copy[copycnt++];
721 			memset(gop, 0, sizeof(*gop));
722 			gop->flags = GNTCOPY_source_gref;
723 			gop->len = take;
724 
725 			gop->source.u.ref = xst->xs_tx.gref;
726 			gop->source.offset = xst->xs_tx.offset + goff;
727 			gop->source.domid = xneti->xni_domid;
728 
729 			gop->dest.offset = (ma & PAGE_MASK) + segoff;
730 			KASSERT(gop->dest.offset <= PAGE_SIZE);
731 			gop->dest.domid = DOMID_SELF;
732 			gop->dest.u.gmfn = ma >> PAGE_SHIFT;
733 
734 			goff += take;
735 			gsize -= take;
736 			if (take + segoff < ds->ds_len) {
737 				segoff += take;
738 				/* Segment not completely consumed yet */
739 				break;
740 			}
741 			segoff = 0;
742 		}
743 		KASSERT(gsize == 0);
744 		KASSERT(goff == xst->xs_tx_size);
745 	}
746 	if (copycnt > 0) {
747 		if (xennetback_copy(ifp, xneti->xni_gop_copy, copycnt, "Tx"))
748 			goto abort;
749 		copycnt = 0;
750 	}
751 
752 	/* If we got here, the whole copy was successful */
753 	for (int i = 0; i < queued; i++) {
754 		xst = &xneti->xni_xstate[i];
755 
756 		xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_OKAY);
757 
758 		if (xst->xs_m != NULL) {
759 			KASSERT(xst->xs_loaded);
760 			bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
761 			    xst->xs_dmamap);
762 
763 			if (xst->xs_tx.flags & NETTXF_csum_blank) {
764 				xennet_checksum_fill(ifp, xst->xs_m,
765 				    &xneti->xni_cnt_rx_cksum_blank,
766 				    &xneti->xni_cnt_rx_cksum_undefer);
767 			} else if (xst->xs_tx.flags & NETTXF_data_validated) {
768 				xst->xs_m->m_pkthdr.csum_flags =
769 				    XN_M_CSUM_SUPPORTED;
770 			}
771 			m_set_rcvif(xst->xs_m, ifp);
772 
773 			if_percpuq_enqueue(ifp->if_percpuq, xst->xs_m);
774 		}
775 	}
776 
777 	return;
778 
779 abort:
780 	xennetback_tx_copy_abort(ifp, xneti, queued);
781 }
782 
783 static int
xennetback_tx_m0len_fragment(struct xnetback_instance * xneti,int m0_len,int req_cons,int * cntp)784 xennetback_tx_m0len_fragment(struct xnetback_instance *xneti,
785     int m0_len, int req_cons, int *cntp)
786 {
787 	netif_tx_request_t *txreq;
788 
789 	/* This assumes all the requests are already pushed into the ring */
790 	*cntp = 1;
791 	do {
792 		txreq = RING_GET_REQUEST(&xneti->xni_txring, req_cons);
793 		if (m0_len <= txreq->size || *cntp > XEN_NETIF_NR_SLOTS_MIN)
794 			return -1;
795 		if (RING_REQUEST_CONS_OVERFLOW(&xneti->xni_txring, req_cons))
796 			return -1;
797 
798 		m0_len -= txreq->size;
799 		req_cons++;
800 		(*cntp)++;
801 	} while (txreq->flags & NETTXF_more_data);
802 
803 	return m0_len;
804 }
805 
806 static int
xennetback_evthandler(void * arg)807 xennetback_evthandler(void *arg)
808 {
809 	struct xnetback_instance *xneti = arg;
810 	struct ifnet *ifp = &xneti->xni_if;
811 	netif_tx_request_t txreq;
812 	struct mbuf *m, *m0 = NULL, *mlast = NULL;
813 	int receive_pending;
814 	int queued = 0, m0_len = 0;
815 	struct xnetback_xstate *xst;
816 	const bool nupnrun = ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
817 	    (IFF_UP | IFF_RUNNING));
818 	bool discard = 0;
819 
820 	XENPRINTF(("xennetback_evthandler "));
821 again:
822 	while (RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_txring)) {
823 		/*
824 		 * Ensure we have read the producer's queue index in
825 		 * RING_FINAL_CHECK_FOR_REQUESTS before we read the
826 		 * content of the producer's next request in
827 		 * RING_COPY_REQUEST.
828 		 */
829 		xen_rmb();
830 		RING_COPY_REQUEST(&xneti->xni_txring,
831 		    xneti->xni_txring.req_cons,
832 		    &txreq);
833 		XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname,
834 		    txreq.size));
835 		xneti->xni_txring.req_cons++;
836 		if (__predict_false(nupnrun || discard)) {
837 			/* interface not up, drop all requests */
838 			if_statinc(ifp, if_iqdrops);
839 			discard = (txreq.flags & NETTXF_more_data) != 0;
840 			xennetback_tx_response(xneti, txreq.id,
841 			    NETIF_RSP_DROPPED);
842 			continue;
843 		}
844 
845 		/*
846 		 * Do some sanity checks, and queue copy of the data.
847 		 */
848 		const char *msg = xennetback_tx_check_packet(&txreq,
849 		    m0 == NULL);
850 		if (__predict_false(msg != NULL)) {
851 			printf("%s: packet with size %d is %s\n",
852 			    ifp->if_xname, txreq.size, msg);
853 			discard = (txreq.flags & NETTXF_more_data) != 0;
854 			xennetback_tx_response(xneti, txreq.id,
855 			    NETIF_RSP_ERROR);
856 			if_statinc(ifp, if_ierrors);
857 			continue;
858 		}
859 
860 		/* get a mbuf for this fragment */
861 		MGETHDR(m, M_DONTWAIT, MT_DATA);
862 		if (__predict_false(m == NULL)) {
863 			static struct timeval lasttime;
864 mbuf_fail:
865 			if (ratecheck(&lasttime, &xni_pool_errintvl))
866 				printf("%s: mbuf alloc failed\n",
867 				    ifp->if_xname);
868 			xennetback_tx_copy_abort(ifp, xneti, queued);
869 			queued = 0;
870 			m0 = NULL;
871 			discard = (txreq.flags & NETTXF_more_data) != 0;
872 			xennetback_tx_response(xneti, txreq.id,
873 			    NETIF_RSP_DROPPED);
874 			if_statinc(ifp, if_ierrors);
875 			continue;
876 		}
877 		m->m_len = m->m_pkthdr.len = txreq.size;
878 
879 		if (!m0 && (txreq.flags & NETTXF_more_data)) {
880 			/*
881 			 * The first fragment of multi-fragment Tx request
882 			 * contains total size. Need to read whole
883 			 * chain to determine actual size of the first
884 			 * (i.e. current) fragment.
885 			 */
886 			int cnt;
887 			m0_len = xennetback_tx_m0len_fragment(xneti,
888 			    txreq.size, xneti->xni_txring.req_cons, &cnt);
889 			if (m0_len < 0) {
890 				m_freem(m);
891 				discard = 1;
892 				xennetback_tx_response(xneti, txreq.id,
893 				    NETIF_RSP_DROPPED);
894 				if_statinc(ifp, if_ierrors);
895 				continue;
896 			}
897 			m->m_len = m0_len;
898 			KASSERT(cnt <= XEN_NETIF_NR_SLOTS_MIN);
899 
900 			if (queued + cnt >= NB_XMIT_PAGES_BATCH) {
901 				/*
902 				 * Flush queue if too full to fit this
903 				 * new packet whole.
904 				 */
905 				xennetback_tx_copy_process(ifp, xneti, queued);
906 				queued = 0;
907 			}
908 		}
909 
910 		if (m->m_len > MHLEN) {
911 			MCLGET(m, M_DONTWAIT);
912 			if (__predict_false((m->m_flags & M_EXT) == 0)) {
913 				m_freem(m);
914 				goto mbuf_fail;
915 			}
916 			if (__predict_false(m->m_len > MCLBYTES)) {
917 				/* one more mbuf necessary */
918 				struct mbuf *mn;
919 				MGET(mn, M_DONTWAIT, MT_DATA);
920 				if (__predict_false(mn == NULL)) {
921 					m_freem(m);
922 					goto mbuf_fail;
923 				}
924 				if (m->m_len - MCLBYTES > MLEN) {
925 					MCLGET(mn, M_DONTWAIT);
926 					if ((mn->m_flags & M_EXT) == 0) {
927 						m_freem(mn);
928 						m_freem(m);
929 						goto mbuf_fail;
930 					}
931 				}
932 				mn->m_len = m->m_len - MCLBYTES;
933 				m->m_len = MCLBYTES;
934 				m->m_next = mn;
935 				KASSERT(mn->m_len <= MCLBYTES);
936 			}
937 			KASSERT(m->m_len <= MCLBYTES);
938 		}
939 
940 		if (m0 || (txreq.flags & NETTXF_more_data)) {
941 			if (m0 == NULL) {
942 				m0 = m;
943 				mlast = (m->m_next) ? m->m_next : m;
944 				KASSERT(mlast->m_next == NULL);
945 			} else {
946 				/* Coalesce like m_cat(), but without copy */
947 				KASSERT(mlast != NULL);
948 				if (M_TRAILINGSPACE(mlast) >= m->m_pkthdr.len) {
949 					mlast->m_len +=  m->m_pkthdr.len;
950 					m_freem(m);
951 				} else {
952 					mlast->m_next = m;
953 					mlast = (m->m_next) ? m->m_next : m;
954 					KASSERT(mlast->m_next == NULL);
955 				}
956 			}
957 		}
958 
959 		XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n",
960 		    xneti->xni_if.if_xname, txreq.offset,
961 		    txreq.size, txreq.id,
962 		    xneti->xni_txring.req_cons & (RING_SIZE(&xneti->xni_txring) - 1)));
963 
964 		xst = &xneti->xni_xstate[queued];
965 		xst->xs_m = (m0 == NULL || m == m0) ? m : NULL;
966 		xst->xs_tx = txreq;
967 		/* Fill the length of _this_ fragment */
968 		xst->xs_tx_size = (m == m0) ? m0_len : m->m_pkthdr.len;
969 		queued++;
970 
971 		KASSERT(queued <= NB_XMIT_PAGES_BATCH);
972 		if (__predict_false(m0 &&
973 		    (txreq.flags & NETTXF_more_data) == 0)) {
974 			/* Last fragment, stop appending mbufs */
975 			m0 = NULL;
976 		}
977 		if (queued == NB_XMIT_PAGES_BATCH) {
978 			KASSERT(m0 == NULL);
979 			xennetback_tx_copy_process(ifp, xneti, queued);
980 			queued = 0;
981 		}
982 	}
983 	RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring, receive_pending);
984 	if (receive_pending)
985 		goto again;
986 	if (m0) {
987 		/* Queue empty, and still unfinished multi-fragment request */
988 		printf("%s: dropped unfinished multi-fragment\n",
989 		    ifp->if_xname);
990 		xennetback_tx_copy_abort(ifp, xneti, queued);
991 		queued = 0;
992 		m0 = NULL;
993 	}
994 	if (queued > 0)
995 		xennetback_tx_copy_process(ifp, xneti, queued);
996 
997 	/* check to see if we can transmit more packets */
998 	if_schedule_deferred_start(ifp);
999 
1000 	return 1;
1001 }
1002 
1003 static int
xennetback_ifioctl(struct ifnet * ifp,u_long cmd,void * data)1004 xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data)
1005 {
1006 	//struct xnetback_instance *xneti = ifp->if_softc;
1007 	//struct ifreq *ifr = (struct ifreq *)data;
1008 	int s, error;
1009 
1010 	s = splnet();
1011 	error = ether_ioctl(ifp, cmd, data);
1012 	if (error == ENETRESET)
1013 		error = 0;
1014 	splx(s);
1015 	return error;
1016 }
1017 
1018 static void
xennetback_ifstart(struct ifnet * ifp)1019 xennetback_ifstart(struct ifnet *ifp)
1020 {
1021 	struct xnetback_instance *xneti = ifp->if_softc;
1022 
1023 	/*
1024 	 * The Xen communication channel is much more efficient if we can
1025 	 * schedule batch of packets for the domain. Deferred start by network
1026 	 * stack will enqueue all pending mbufs in the interface's send queue
1027 	 * before it is processed by the soft interrupt handler.
1028 	 */
1029 	xennetback_ifsoftstart_copy(xneti);
1030 }
1031 
1032 static void
xennetback_rx_copy_process(struct ifnet * ifp,struct xnetback_instance * xneti,int queued,int copycnt)1033 xennetback_rx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti,
1034 	int queued, int copycnt)
1035 {
1036 	int notify;
1037 	struct xnetback_xstate *xst;
1038 
1039 	if (xennetback_copy(ifp, xneti->xni_gop_copy, copycnt, "Rx") != 0) {
1040 		/* message already displayed */
1041 		goto free_mbufs;
1042 	}
1043 
1044 	/* update pointer */
1045 	xneti->xni_rxring.req_cons += queued;
1046 	xneti->xni_rxring.rsp_prod_pvt += queued;
1047 	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_rxring, notify);
1048 
1049 	/* send event */
1050 	if (notify) {
1051 		XENPRINTF(("%s receive event\n",
1052 		    xneti->xni_if.if_xname));
1053 		hypervisor_notify_via_evtchn(xneti->xni_evtchn);
1054 	}
1055 
1056 free_mbufs:
1057 	/* now that data was copied we can free the mbufs */
1058 	for (int j = 0; j < queued; j++) {
1059 		xst = &xneti->xni_xstate[j];
1060 		if (xst->xs_loaded) {
1061 			bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
1062 			    xst->xs_dmamap);
1063 			xst->xs_loaded = false;
1064 		}
1065 		m_freem(xst->xs_m);
1066 		xst->xs_m = NULL;
1067 	}
1068 }
1069 
1070 static void
xennetback_rx_copy_queue(struct xnetback_instance * xneti,struct xnetback_xstate * xst0,int rsp_prod_pvt,int * queued,int * copycntp)1071 xennetback_rx_copy_queue(struct xnetback_instance *xneti,
1072     struct xnetback_xstate *xst0, int rsp_prod_pvt, int *queued, int *copycntp)
1073 {
1074 	struct xnetback_xstate *xst = xst0;
1075 	gnttab_copy_t *gop;
1076 	struct netif_rx_request rxreq;
1077 	netif_rx_response_t *rxresp;
1078 	paddr_t ma;
1079 	size_t goff, segoff, segsize, take, totsize;
1080 	int copycnt = *copycntp, reqcnt = *queued;
1081 	const bus_dmamap_t dm = xst0->xs_dmamap;
1082 	const bool multiseg = (dm->dm_nsegs > 1);
1083 
1084 	KASSERT(xst0 == &xneti->xni_xstate[reqcnt]);
1085 
1086 	RING_COPY_REQUEST(&xneti->xni_rxring,
1087 	    xneti->xni_rxring.req_cons + reqcnt, &rxreq);
1088 	goff = 0;
1089 	rxresp = RING_GET_RESPONSE(&xneti->xni_rxring, rsp_prod_pvt + reqcnt);
1090 	reqcnt++;
1091 
1092 	rxresp->id = rxreq.id;
1093 	rxresp->offset = 0;
1094 	if ((xst0->xs_m->m_pkthdr.csum_flags & XN_M_CSUM_SUPPORTED) != 0) {
1095 		rxresp->flags = NETRXF_csum_blank;
1096 	} else {
1097 		rxresp->flags = NETRXF_data_validated;
1098 	}
1099 	if (multiseg)
1100 		rxresp->flags |= NETRXF_more_data;
1101 
1102 	totsize = xst0->xs_m->m_pkthdr.len;
1103 
1104 	/*
1105 	 * Arrange for the mbuf contents to be copied into one or more
1106 	 * provided memory pages.
1107 	 */
1108 	for (int seg = 0; seg < dm->dm_nsegs; seg++) {
1109 		ma = dm->dm_segs[seg].ds_addr;
1110 		segsize = dm->dm_segs[seg].ds_len;
1111 		segoff = 0;
1112 
1113 		while (segoff < segsize) {
1114 			take = uimin(PAGE_SIZE - goff, segsize - segoff);
1115 			KASSERT(take <= totsize);
1116 
1117 			/* add copy request */
1118 			gop = &xneti->xni_gop_copy[copycnt++];
1119 			gop->flags = GNTCOPY_dest_gref;
1120 			gop->source.offset = (ma & PAGE_MASK) + segoff;
1121 			gop->source.domid = DOMID_SELF;
1122 			gop->source.u.gmfn = ma >> PAGE_SHIFT;
1123 
1124 			gop->dest.u.ref = rxreq.gref;
1125 			gop->dest.offset = goff;
1126 			gop->dest.domid = xneti->xni_domid;
1127 
1128 			gop->len = take;
1129 
1130 			segoff += take;
1131 			goff += take;
1132 			totsize -= take;
1133 
1134 			if (goff == PAGE_SIZE && totsize > 0) {
1135 				rxresp->status = goff;
1136 
1137 				/* Take next grant */
1138 				RING_COPY_REQUEST(&xneti->xni_rxring,
1139 				    xneti->xni_rxring.req_cons + reqcnt,
1140 				    &rxreq);
1141 				goff = 0;
1142 				rxresp = RING_GET_RESPONSE(&xneti->xni_rxring,
1143 				    rsp_prod_pvt + reqcnt);
1144 				reqcnt++;
1145 
1146 				rxresp->id = rxreq.id;
1147 				rxresp->offset = 0;
1148 				rxresp->flags = NETRXF_more_data;
1149 
1150 				xst++;
1151 				xst->xs_m = NULL;
1152 			}
1153 		}
1154 	}
1155 	rxresp->flags &= ~NETRXF_more_data;
1156 	rxresp->status = goff;
1157 	KASSERT(totsize == 0);
1158 
1159 	KASSERT(copycnt > *copycntp);
1160 	KASSERT(reqcnt > *queued);
1161 	*copycntp = copycnt;
1162 	*queued = reqcnt;
1163 }
1164 
1165 static void
xennetback_ifsoftstart_copy(struct xnetback_instance * xneti)1166 xennetback_ifsoftstart_copy(struct xnetback_instance *xneti)
1167 {
1168 	struct ifnet *ifp = &xneti->xni_if;
1169 	struct mbuf *m;
1170 	int queued = 0;
1171 	RING_IDX req_prod, rsp_prod_pvt;
1172 	struct xnetback_xstate *xst;
1173 	int copycnt = 0;
1174 	bool abort;
1175 
1176 	XENPRINTF(("xennetback_ifsoftstart_copy "));
1177 	int s = splnet();
1178 	if (__predict_false((ifp->if_flags & IFF_RUNNING) == 0)) {
1179 		splx(s);
1180 		return;
1181 	}
1182 
1183 	while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
1184 		XENPRINTF(("pkt\n"));
1185 		req_prod = xneti->xni_rxring.sring->req_prod;
1186 		rsp_prod_pvt = xneti->xni_rxring.rsp_prod_pvt;
1187 		xen_rmb();
1188 
1189 		abort = false;
1190 		KASSERT(queued == 0);
1191 		KASSERT(copycnt == 0);
1192 		while (copycnt < NB_XMIT_PAGES_BATCH) {
1193 #define XN_RING_FULL(cnt)	\
1194 			req_prod == xneti->xni_rxring.req_cons + (cnt) ||  \
1195 			xneti->xni_rxring.req_cons - (rsp_prod_pvt + cnt) ==  \
1196 			NET_RX_RING_SIZE
1197 
1198 			if (__predict_false(XN_RING_FULL(1))) {
1199 				/* out of ring space */
1200 				XENPRINTF(("xennetback_ifstart: ring full "
1201 				    "req_prod 0x%x req_cons 0x%x rsp_prod_pvt "
1202 				    "0x%x\n",
1203 				    req_prod,
1204 				    xneti->xni_rxring.req_cons + queued,
1205 				    rsp_prod_pvt + queued));
1206 				abort = true;
1207 				break;
1208 			}
1209 
1210 			IFQ_DEQUEUE(&ifp->if_snd, m);
1211 			if (m == NULL)
1212 				break;
1213 
1214 again:
1215 			xst = &xneti->xni_xstate[queued];
1216 
1217 			/*
1218 			 * For short packets it's always way faster passing
1219 			 * single defragmented packet, even with feature-sg.
1220 			 * Try to defragment first if the result is likely
1221 			 * to fit into a single mbuf.
1222 			 */
1223 			if (m->m_pkthdr.len < MCLBYTES && m->m_next)
1224 				(void)m_defrag(m, M_DONTWAIT);
1225 
1226 			if (bus_dmamap_load_mbuf(
1227 			    xneti->xni_xbusd->xbusd_dmat,
1228 			    xst->xs_dmamap, m, BUS_DMA_NOWAIT) != 0) {
1229 				if (m_defrag(m, M_DONTWAIT) == NULL) {
1230 					m_freem(m);
1231 					static struct timeval lasttime;
1232 					if (ratecheck(&lasttime, &xni_pool_errintvl))
1233 						printf("%s: fail defrag mbuf\n",
1234 						    ifp->if_xname);
1235 					continue;
1236 				}
1237 
1238 				if (__predict_false(bus_dmamap_load_mbuf(
1239 				    xneti->xni_xbusd->xbusd_dmat,
1240 				    xst->xs_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
1241 					printf("%s: cannot load mbuf\n",
1242 					    ifp->if_xname);
1243 					m_freem(m);
1244 					continue;
1245 				}
1246 			}
1247 			KASSERT(xst->xs_dmamap->dm_nsegs < NB_XMIT_PAGES_BATCH);
1248 			KASSERTMSG(queued <= copycnt, "queued %d > copycnt %d",
1249 			    queued, copycnt);
1250 
1251 			if (__predict_false(XN_RING_FULL(
1252 			    xst->xs_dmamap->dm_nsegs))) {
1253 				/* Ring too full to fit the packet */
1254 				bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
1255 				    xst->xs_dmamap);
1256 				m_freem(m);
1257 				abort = true;
1258 				break;
1259 			}
1260 			if (__predict_false(copycnt + xst->xs_dmamap->dm_nsegs >
1261 			    NB_XMIT_PAGES_BATCH)) {
1262 				/* Batch already too full, flush and retry */
1263 				bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
1264 				    xst->xs_dmamap);
1265 				xennetback_rx_copy_process(ifp, xneti, queued,
1266 				    copycnt);
1267 				queued = copycnt = 0;
1268 				goto again;
1269 			}
1270 
1271 			/* Now committed to send */
1272 			xst->xs_loaded = true;
1273 			xst->xs_m = m;
1274 			xennetback_rx_copy_queue(xneti, xst,
1275 			    rsp_prod_pvt, &queued, &copycnt);
1276 
1277 			if_statinc(ifp, if_opackets);
1278 			bpf_mtap(ifp, m, BPF_D_OUT);
1279 		}
1280 		KASSERT(copycnt <= NB_XMIT_PAGES_BATCH);
1281 		KASSERT(queued <= copycnt);
1282 		if (copycnt > 0) {
1283 			xennetback_rx_copy_process(ifp, xneti, queued, copycnt);
1284 			queued = copycnt = 0;
1285 		}
1286 		/*
1287 		 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS()
1288 		 * here, as the frontend doesn't notify when adding
1289 		 * requests anyway
1290 		 */
1291 		if (__predict_false(abort ||
1292 		    !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) {
1293 			/* ring full */
1294 			ifp->if_timer = 1;
1295 			break;
1296 		}
1297 	}
1298 	splx(s);
1299 }
1300 
1301 static void
xennetback_ifwatchdog(struct ifnet * ifp)1302 xennetback_ifwatchdog(struct ifnet * ifp)
1303 {
1304 	/*
1305 	 * We can get to the following condition: transmit stalls because the
1306 	 * ring is full when the ifq is full too.
1307 	 *
1308 	 * In this case (as, unfortunately, we don't get an interrupt from xen
1309 	 * on transmit) nothing will ever call xennetback_ifstart() again.
1310 	 * Here we abuse the watchdog to get out of this condition.
1311 	 */
1312 	XENPRINTF(("xennetback_ifwatchdog\n"));
1313 	xennetback_ifstart(ifp);
1314 }
1315 
1316 static int
xennetback_ifinit(struct ifnet * ifp)1317 xennetback_ifinit(struct ifnet *ifp)
1318 {
1319 	struct xnetback_instance *xneti = ifp->if_softc;
1320 	int s = splnet();
1321 
1322 	if ((ifp->if_flags & IFF_UP) == 0) {
1323 		splx(s);
1324 		return 0;
1325 	}
1326 	if (xneti->xni_status == CONNECTED)
1327 		ifp->if_flags |= IFF_RUNNING;
1328 	splx(s);
1329 	return 0;
1330 }
1331 
1332 static void
xennetback_ifstop(struct ifnet * ifp,int disable)1333 xennetback_ifstop(struct ifnet *ifp, int disable)
1334 {
1335 	struct xnetback_instance *xneti = ifp->if_softc;
1336 	int s = splnet();
1337 
1338 	ifp->if_flags &= ~IFF_RUNNING;
1339 	ifp->if_timer = 0;
1340 	if (xneti->xni_status == CONNECTED) {
1341 		xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */
1342 	}
1343 	splx(s);
1344 }
1345