1 /* $NetBSD: xennetback_xenbus.c,v 1.126 2024/07/05 04:31:50 rin Exp $ */
2
3 /*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __KERNEL_RCSID(0, "$NetBSD: xennetback_xenbus.c,v 1.126 2024/07/05 04:31:50 rin Exp $");
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/kmem.h>
34 #include <sys/queue.h>
35 #include <sys/kernel.h>
36 #include <sys/mbuf.h>
37 #include <sys/protosw.h>
38 #include <sys/socket.h>
39 #include <sys/ioctl.h>
40 #include <sys/errno.h>
41 #include <sys/device.h>
42
43 #include <net/if.h>
44 #include <net/if_types.h>
45 #include <net/if_dl.h>
46 #include <net/route.h>
47 #include <net/bpf.h>
48
49 #include <net/if_ether.h>
50
51 #include <xen/intr.h>
52 #include <xen/hypervisor.h>
53 #include <xen/xen.h>
54 #include <xen/xen_shm.h>
55 #include <xen/evtchn.h>
56 #include <xen/xenbus.h>
57 #include <xen/xennet_checksum.h>
58
59 #include <uvm/uvm.h>
60
61 /*
62 * Backend network device driver for Xen.
63 */
64
65 #ifdef XENDEBUG_NET
66 #define XENPRINTF(x) printf x
67 #else
68 #define XENPRINTF(x)
69 #endif
70
71 #define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
72 #define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
73
74 /*
75 * Number of packets to transmit in one hypercall (= number of pages to
76 * transmit at once).
77 */
78 #define NB_XMIT_PAGES_BATCH 64
79 CTASSERT(NB_XMIT_PAGES_BATCH >= XEN_NETIF_NR_SLOTS_MIN);
80
81 /* ratecheck(9) for pool allocation failures */
82 static const struct timeval xni_pool_errintvl = { 30, 0 }; /* 30s, each */
83
84 /* state of a xnetback instance */
85 typedef enum {
86 CONNECTED,
87 DISCONNECTING,
88 DISCONNECTED
89 } xnetback_state_t;
90
91 struct xnetback_xstate {
92 bus_dmamap_t xs_dmamap;
93 bool xs_loaded;
94 struct mbuf *xs_m;
95 struct netif_tx_request xs_tx;
96 uint16_t xs_tx_size; /* Size of data in this Tx fragment */
97 };
98
99 /* we keep the xnetback instances in a linked list */
100 struct xnetback_instance {
101 SLIST_ENTRY(xnetback_instance) next;
102 struct xenbus_device *xni_xbusd; /* our xenstore entry */
103 domid_t xni_domid; /* attached to this domain */
104 uint32_t xni_handle; /* domain-specific handle */
105 xnetback_state_t xni_status;
106
107 /* network interface stuff */
108 struct ethercom xni_ec;
109 struct callout xni_restart;
110 uint8_t xni_enaddr[ETHER_ADDR_LEN];
111
112 /* remote domain communication stuff */
113 unsigned int xni_evtchn; /* our event channel */
114 struct intrhand *xni_ih;
115 netif_tx_back_ring_t xni_txring;
116 netif_rx_back_ring_t xni_rxring;
117 grant_handle_t xni_tx_ring_handle; /* to unmap the ring */
118 grant_handle_t xni_rx_ring_handle;
119 vaddr_t xni_tx_ring_va; /* to unmap the ring */
120 vaddr_t xni_rx_ring_va;
121
122 /* arrays used in xennetback_ifstart(), used for both Rx and Tx */
123 gnttab_copy_t xni_gop_copy[NB_XMIT_PAGES_BATCH];
124 struct xnetback_xstate xni_xstate[NB_XMIT_PAGES_BATCH];
125
126 /* event counters */
127 struct evcnt xni_cnt_rx_cksum_blank;
128 struct evcnt xni_cnt_rx_cksum_undefer;
129 };
130 #define xni_if xni_ec.ec_if
131 #define xni_bpf xni_if.if_bpf
132
133 void xvifattach(int);
134 static int xennetback_ifioctl(struct ifnet *, u_long, void *);
135 static void xennetback_ifstart(struct ifnet *);
136 static void xennetback_ifsoftstart_copy(struct xnetback_instance *);
137 static void xennetback_ifwatchdog(struct ifnet *);
138 static int xennetback_ifinit(struct ifnet *);
139 static void xennetback_ifstop(struct ifnet *, int);
140
141 static int xennetback_xenbus_create(struct xenbus_device *);
142 static int xennetback_xenbus_destroy(void *);
143 static void xennetback_frontend_changed(void *, XenbusState);
144
145 static inline void xennetback_tx_response(struct xnetback_instance *,
146 int, int);
147
148 static SLIST_HEAD(, xnetback_instance) xnetback_instances;
149 static kmutex_t xnetback_lock;
150
151 static bool xnetif_lookup(domid_t, uint32_t);
152 static int xennetback_evthandler(void *);
153
154 static struct xenbus_backend_driver xvif_backend_driver = {
155 .xbakd_create = xennetback_xenbus_create,
156 .xbakd_type = "vif"
157 };
158
159 void
xvifattach(int n)160 xvifattach(int n)
161 {
162 XENPRINTF(("xennetback_init\n"));
163
164 SLIST_INIT(&xnetback_instances);
165 mutex_init(&xnetback_lock, MUTEX_DEFAULT, IPL_NONE);
166
167 xenbus_backend_register(&xvif_backend_driver);
168 }
169
170 static int
xennetback_xenbus_create(struct xenbus_device * xbusd)171 xennetback_xenbus_create(struct xenbus_device *xbusd)
172 {
173 struct xnetback_instance *xneti;
174 long domid, handle;
175 struct ifnet *ifp;
176 extern int ifqmaxlen; /* XXX */
177 char *e, *p;
178 char mac[32];
179 int i, err;
180 struct xenbus_transaction *xbt;
181
182 if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
183 "frontend-id", &domid, 10)) != 0) {
184 aprint_error("xvif: can't read %s/frontend-id: %d\n",
185 xbusd->xbusd_path, err);
186 return err;
187 }
188 if ((err = xenbus_read_ul(NULL, xbusd->xbusd_path,
189 "handle", &handle, 10)) != 0) {
190 aprint_error("xvif: can't read %s/handle: %d\n",
191 xbusd->xbusd_path, err);
192 return err;
193 }
194
195 xneti = kmem_zalloc(sizeof(*xneti), KM_SLEEP);
196 xneti->xni_domid = domid;
197 xneti->xni_handle = handle;
198 xneti->xni_status = DISCONNECTED;
199
200 /* Need to keep the lock for lookup and the list update */
201 mutex_enter(&xnetback_lock);
202 if (xnetif_lookup(domid, handle)) {
203 mutex_exit(&xnetback_lock);
204 kmem_free(xneti, sizeof(*xneti));
205 return EEXIST;
206 }
207 SLIST_INSERT_HEAD(&xnetback_instances, xneti, next);
208 mutex_exit(&xnetback_lock);
209
210 xbusd->xbusd_u.b.b_cookie = xneti;
211 xbusd->xbusd_u.b.b_detach = xennetback_xenbus_destroy;
212 xneti->xni_xbusd = xbusd;
213
214 ifp = &xneti->xni_if;
215 ifp->if_softc = xneti;
216 snprintf(ifp->if_xname, IFNAMSIZ, "xvif%di%d",
217 (int)domid, (int)handle);
218
219 /* read mac address */
220 err = xenbus_read(NULL, xbusd->xbusd_path, "mac", mac, sizeof(mac));
221 if (err) {
222 aprint_error_ifnet(ifp, "can't read %s/mac: %d\n",
223 xbusd->xbusd_path, err);
224 goto fail;
225 }
226 for (i = 0, p = mac; i < ETHER_ADDR_LEN; i++) {
227 xneti->xni_enaddr[i] = strtoul(p, &e, 16);
228 if ((e[0] == '\0' && i != 5) && e[0] != ':') {
229 aprint_error_ifnet(ifp,
230 "%s is not a valid mac address\n", mac);
231 err = EINVAL;
232 goto fail;
233 }
234 p = &e[1];
235 }
236
237 /* we can't use the same MAC addr as our guest */
238 xneti->xni_enaddr[3]++;
239
240 /* Initialize DMA map, used only for loading PA */
241 for (i = 0; i < __arraycount(xneti->xni_xstate); i++) {
242 if (bus_dmamap_create(xneti->xni_xbusd->xbusd_dmat,
243 ETHER_MAX_LEN_JUMBO, XEN_NETIF_NR_SLOTS_MIN,
244 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
245 &xneti->xni_xstate[i].xs_dmamap)
246 != 0) {
247 aprint_error_ifnet(ifp,
248 "failed to allocate dma map\n");
249 err = ENOMEM;
250 goto fail;
251 }
252 }
253
254 evcnt_attach_dynamic(&xneti->xni_cnt_rx_cksum_blank, EVCNT_TYPE_MISC,
255 NULL, ifp->if_xname, "Rx csum blank");
256 evcnt_attach_dynamic(&xneti->xni_cnt_rx_cksum_undefer, EVCNT_TYPE_MISC,
257 NULL, ifp->if_xname, "Rx csum undeferred");
258
259 /* create pseudo-interface */
260 aprint_verbose_ifnet(ifp, "Ethernet address %s\n",
261 ether_sprintf(xneti->xni_enaddr));
262 xneti->xni_ec.ec_capabilities |= ETHERCAP_VLAN_MTU | ETHERCAP_JUMBO_MTU;
263 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
264 ifp->if_snd.ifq_maxlen =
265 uimax(ifqmaxlen, NET_TX_RING_SIZE * 2);
266 ifp->if_capabilities =
267 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx
268 | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx
269 | IFCAP_CSUM_UDPv6_Rx | IFCAP_CSUM_UDPv6_Tx
270 | IFCAP_CSUM_TCPv6_Rx | IFCAP_CSUM_TCPv6_Tx;
271 #define XN_M_CSUM_SUPPORTED \
272 (M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_TCPv6 | M_CSUM_UDPv6)
273
274 ifp->if_ioctl = xennetback_ifioctl;
275 ifp->if_start = xennetback_ifstart;
276 ifp->if_watchdog = xennetback_ifwatchdog;
277 ifp->if_init = xennetback_ifinit;
278 ifp->if_stop = xennetback_ifstop;
279 ifp->if_timer = 0;
280 IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN));
281 IFQ_SET_READY(&ifp->if_snd);
282 if_attach(ifp);
283 if_deferred_start_init(ifp, NULL);
284 ether_ifattach(&xneti->xni_if, xneti->xni_enaddr);
285
286 xbusd->xbusd_otherend_changed = xennetback_frontend_changed;
287
288 do {
289 xbt = xenbus_transaction_start();
290 if (xbt == NULL) {
291 aprint_error_ifnet(ifp,
292 "%s: can't start transaction\n",
293 xbusd->xbusd_path);
294 goto fail;
295 }
296 err = xenbus_printf(xbt, xbusd->xbusd_path,
297 "vifname", "%s", ifp->if_xname);
298 if (err) {
299 aprint_error_ifnet(ifp,
300 "failed to write %s/vifname: %d\n",
301 xbusd->xbusd_path, err);
302 goto abort_xbt;
303 }
304 err = xenbus_printf(xbt, xbusd->xbusd_path,
305 "feature-rx-copy", "%d", 1);
306 if (err) {
307 aprint_error_ifnet(ifp,
308 "failed to write %s/feature-rx-copy: %d\n",
309 xbusd->xbusd_path, err);
310 goto abort_xbt;
311 }
312 err = xenbus_printf(xbt, xbusd->xbusd_path,
313 "feature-ipv6-csum-offload", "%d", 1);
314 if (err) {
315 aprint_error_ifnet(ifp,
316 "failed to write %s/feature-ipv6-csum-offload: %d\n",
317 xbusd->xbusd_path, err);
318 goto abort_xbt;
319 }
320 err = xenbus_printf(xbt, xbusd->xbusd_path,
321 "feature-sg", "%d", 1);
322 if (err) {
323 aprint_error_ifnet(ifp,
324 "failed to write %s/feature-sg: %d\n",
325 xbusd->xbusd_path, err);
326 goto abort_xbt;
327 }
328 } while ((err = xenbus_transaction_end(xbt, 0)) == EAGAIN);
329 if (err) {
330 aprint_error_ifnet(ifp,
331 "%s: can't end transaction: %d\n",
332 xbusd->xbusd_path, err);
333 }
334
335 err = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
336 if (err) {
337 aprint_error_ifnet(ifp,
338 "failed to switch state on %s: %d\n",
339 xbusd->xbusd_path, err);
340 goto fail;
341 }
342 return 0;
343
344 abort_xbt:
345 xenbus_transaction_end(xbt, 1);
346 fail:
347 kmem_free(xneti, sizeof(*xneti));
348 return err;
349 }
350
351 int
xennetback_xenbus_destroy(void * arg)352 xennetback_xenbus_destroy(void *arg)
353 {
354 struct xnetback_instance *xneti = arg;
355
356 aprint_verbose_ifnet(&xneti->xni_if, "disconnecting\n");
357
358 if (xneti->xni_ih != NULL) {
359 hypervisor_mask_event(xneti->xni_evtchn);
360 xen_intr_disestablish(xneti->xni_ih);
361 xneti->xni_ih = NULL;
362 }
363
364 mutex_enter(&xnetback_lock);
365 SLIST_REMOVE(&xnetback_instances,
366 xneti, xnetback_instance, next);
367 mutex_exit(&xnetback_lock);
368
369 ether_ifdetach(&xneti->xni_if);
370 if_detach(&xneti->xni_if);
371
372 evcnt_detach(&xneti->xni_cnt_rx_cksum_blank);
373 evcnt_detach(&xneti->xni_cnt_rx_cksum_undefer);
374
375 /* Destroy DMA maps */
376 for (int i = 0; i < __arraycount(xneti->xni_xstate); i++) {
377 if (xneti->xni_xstate[i].xs_dmamap != NULL) {
378 bus_dmamap_destroy(xneti->xni_xbusd->xbusd_dmat,
379 xneti->xni_xstate[i].xs_dmamap);
380 xneti->xni_xstate[i].xs_dmamap = NULL;
381 }
382 }
383
384 if (xneti->xni_txring.sring) {
385 xen_shm_unmap(xneti->xni_tx_ring_va, 1,
386 &xneti->xni_tx_ring_handle);
387 }
388 if (xneti->xni_rxring.sring) {
389 xen_shm_unmap(xneti->xni_rx_ring_va, 1,
390 &xneti->xni_rx_ring_handle);
391 }
392 if (xneti->xni_tx_ring_va != 0) {
393 uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
394 PAGE_SIZE, UVM_KMF_VAONLY);
395 xneti->xni_tx_ring_va = 0;
396 }
397 if (xneti->xni_rx_ring_va != 0) {
398 uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
399 PAGE_SIZE, UVM_KMF_VAONLY);
400 xneti->xni_rx_ring_va = 0;
401 }
402 kmem_free(xneti, sizeof(*xneti));
403 return 0;
404 }
405
406 static int
xennetback_connect(struct xnetback_instance * xneti)407 xennetback_connect(struct xnetback_instance *xneti)
408 {
409 int err;
410 netif_tx_sring_t *tx_ring;
411 netif_rx_sring_t *rx_ring;
412 evtchn_op_t evop;
413 u_long tx_ring_ref, rx_ring_ref;
414 grant_ref_t gtx_ring_ref, grx_ring_ref;
415 u_long revtchn, rx_copy;
416 struct xenbus_device *xbusd = xneti->xni_xbusd;
417
418 /* read communication information */
419 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
420 "tx-ring-ref", &tx_ring_ref, 10);
421 if (err) {
422 xenbus_dev_fatal(xbusd, err, "reading %s/tx-ring-ref",
423 xbusd->xbusd_otherend);
424 return -1;
425 }
426 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
427 "rx-ring-ref", &rx_ring_ref, 10);
428 if (err) {
429 xenbus_dev_fatal(xbusd, err, "reading %s/rx-ring-ref",
430 xbusd->xbusd_otherend);
431 return -1;
432 }
433 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
434 "event-channel", &revtchn, 10);
435 if (err) {
436 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
437 xbusd->xbusd_otherend);
438 return -1;
439 }
440 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
441 "request-rx-copy", &rx_copy, 10);
442 if (err == ENOENT || !rx_copy) {
443 xenbus_dev_fatal(xbusd, err,
444 "%s/request-rx-copy not supported by frontend",
445 xbusd->xbusd_otherend);
446 return -1;
447 } else if (err) {
448 xenbus_dev_fatal(xbusd, err, "reading %s/request-rx-copy",
449 xbusd->xbusd_otherend);
450 return -1;
451 }
452
453 /* allocate VA space and map rings */
454 xneti->xni_tx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
455 UVM_KMF_VAONLY);
456 if (xneti->xni_tx_ring_va == 0) {
457 xenbus_dev_fatal(xbusd, ENOMEM,
458 "can't get VA for TX ring", xbusd->xbusd_otherend);
459 goto err1;
460 }
461 tx_ring = (void *)xneti->xni_tx_ring_va;
462
463 xneti->xni_rx_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
464 UVM_KMF_VAONLY);
465 if (xneti->xni_rx_ring_va == 0) {
466 xenbus_dev_fatal(xbusd, ENOMEM,
467 "can't get VA for RX ring", xbusd->xbusd_otherend);
468 goto err1;
469 }
470 rx_ring = (void *)xneti->xni_rx_ring_va;
471
472 gtx_ring_ref = tx_ring_ref;
473 if (xen_shm_map(1, xneti->xni_domid, >x_ring_ref,
474 xneti->xni_tx_ring_va, &xneti->xni_tx_ring_handle, 0) != 0) {
475 aprint_error_ifnet(&xneti->xni_if,
476 "can't map TX grant ref\n");
477 goto err2;
478 }
479 BACK_RING_INIT(&xneti->xni_txring, tx_ring, PAGE_SIZE);
480
481 grx_ring_ref = rx_ring_ref;
482 if (xen_shm_map(1, xneti->xni_domid, &grx_ring_ref,
483 xneti->xni_rx_ring_va, &xneti->xni_rx_ring_handle, 0) != 0) {
484 aprint_error_ifnet(&xneti->xni_if,
485 "can't map RX grant ref\n");
486 goto err2;
487 }
488 BACK_RING_INIT(&xneti->xni_rxring, rx_ring, PAGE_SIZE);
489
490 evop.cmd = EVTCHNOP_bind_interdomain;
491 evop.u.bind_interdomain.remote_dom = xneti->xni_domid;
492 evop.u.bind_interdomain.remote_port = revtchn;
493 err = HYPERVISOR_event_channel_op(&evop);
494 if (err) {
495 aprint_error_ifnet(&xneti->xni_if,
496 "can't get event channel: %d\n", err);
497 goto err2;
498 }
499 xneti->xni_evtchn = evop.u.bind_interdomain.local_port;
500 xneti->xni_status = CONNECTED;
501
502 xneti->xni_ih = xen_intr_establish_xname(-1, &xen_pic,
503 xneti->xni_evtchn, IST_LEVEL, IPL_NET, xennetback_evthandler,
504 xneti, false, xneti->xni_if.if_xname);
505 KASSERT(xneti->xni_ih != NULL);
506 xennetback_ifinit(&xneti->xni_if);
507 hypervisor_unmask_event(xneti->xni_evtchn);
508 hypervisor_notify_via_evtchn(xneti->xni_evtchn);
509 return 0;
510
511 err2:
512 /* unmap rings */
513 if (xneti->xni_tx_ring_handle != 0) {
514 xen_shm_unmap(xneti->xni_tx_ring_va, 1,
515 &xneti->xni_tx_ring_handle);
516 }
517
518 if (xneti->xni_rx_ring_handle != 0) {
519 xen_shm_unmap(xneti->xni_rx_ring_va, 1,
520 &xneti->xni_rx_ring_handle);
521 }
522 err1:
523 /* free rings VA space */
524 if (xneti->xni_rx_ring_va != 0)
525 uvm_km_free(kernel_map, xneti->xni_rx_ring_va,
526 PAGE_SIZE, UVM_KMF_VAONLY);
527
528 if (xneti->xni_tx_ring_va != 0)
529 uvm_km_free(kernel_map, xneti->xni_tx_ring_va,
530 PAGE_SIZE, UVM_KMF_VAONLY);
531
532 return -1;
533
534 }
535
536 static void
xennetback_frontend_changed(void * arg,XenbusState new_state)537 xennetback_frontend_changed(void *arg, XenbusState new_state)
538 {
539 struct xnetback_instance *xneti = arg;
540 struct xenbus_device *xbusd = xneti->xni_xbusd;
541
542 XENPRINTF(("%s: new state %d\n", xneti->xni_if.if_xname, new_state));
543 switch(new_state) {
544 case XenbusStateInitialising:
545 case XenbusStateInitialised:
546 break;
547
548 case XenbusStateConnected:
549 if (xneti->xni_status == CONNECTED)
550 break;
551 if (xennetback_connect(xneti) == 0)
552 xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
553 break;
554
555 case XenbusStateClosing:
556 xneti->xni_status = DISCONNECTING;
557 xneti->xni_if.if_flags &= ~IFF_RUNNING;
558 xneti->xni_if.if_timer = 0;
559 xenbus_switch_state(xbusd, NULL, XenbusStateClosing);
560 break;
561
562 case XenbusStateClosed:
563 /* otherend_changed() should handle it for us */
564 panic("xennetback_frontend_changed: closed\n");
565 case XenbusStateUnknown:
566 case XenbusStateInitWait:
567 default:
568 aprint_error("%s: invalid frontend state %d\n",
569 xneti->xni_if.if_xname, new_state);
570 break;
571 }
572 return;
573
574 }
575
576 /* lookup a xneti based on domain id and interface handle */
577 static bool
xnetif_lookup(domid_t dom,uint32_t handle)578 xnetif_lookup(domid_t dom , uint32_t handle)
579 {
580 struct xnetback_instance *xneti;
581 bool found = false;
582
583 KASSERT(mutex_owned(&xnetback_lock));
584
585 SLIST_FOREACH(xneti, &xnetback_instances, next) {
586 if (xneti->xni_domid == dom && xneti->xni_handle == handle) {
587 found = true;
588 break;
589 }
590 }
591
592 return found;
593 }
594
595 static inline void
xennetback_tx_response(struct xnetback_instance * xneti,int id,int status)596 xennetback_tx_response(struct xnetback_instance *xneti, int id, int status)
597 {
598 RING_IDX resp_prod;
599 netif_tx_response_t *txresp;
600 int do_event;
601
602 resp_prod = xneti->xni_txring.rsp_prod_pvt;
603 txresp = RING_GET_RESPONSE(&xneti->xni_txring, resp_prod);
604
605 txresp->id = id;
606 txresp->status = status;
607 xneti->xni_txring.rsp_prod_pvt++;
608 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_txring, do_event);
609 if (do_event) {
610 XENPRINTF(("%s send event\n", xneti->xni_if.if_xname));
611 hypervisor_notify_via_evtchn(xneti->xni_evtchn);
612 }
613 }
614
615 static const char *
xennetback_tx_check_packet(const netif_tx_request_t * txreq,bool first)616 xennetback_tx_check_packet(const netif_tx_request_t *txreq, bool first)
617 {
618 if (__predict_false((txreq->flags & NETTXF_more_data) == 0 &&
619 txreq->offset + txreq->size > PAGE_SIZE))
620 return "crossing page boundary";
621
622 if (__predict_false(txreq->size > ETHER_MAX_LEN_JUMBO))
623 return "bigger then jumbo";
624
625 if (first &&
626 __predict_false(txreq->size < ETHER_HDR_LEN))
627 return "too short";
628
629 return NULL;
630 }
631
632 static int
xennetback_copy(struct ifnet * ifp,gnttab_copy_t * gop,int copycnt,const char * dir)633 xennetback_copy(struct ifnet *ifp, gnttab_copy_t *gop, int copycnt,
634 const char *dir)
635 {
636 /*
637 * Copy the data and ack it. Delaying it until the mbuf is
638 * freed will stall transmit.
639 */
640 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, gop, copycnt) != 0) {
641 printf("%s: GNTTABOP_copy %s failed", ifp->if_xname, dir);
642 return EINVAL;
643 }
644
645 for (int i = 0; i < copycnt; i++) {
646 if (gop->status != GNTST_okay) {
647 printf("%s GNTTABOP_copy[%d] %s %d\n",
648 ifp->if_xname, i, dir, gop->status);
649 return EINVAL;
650 }
651 }
652
653 return 0;
654 }
655
656 static void
xennetback_tx_copy_abort(struct ifnet * ifp,struct xnetback_instance * xneti,int queued)657 xennetback_tx_copy_abort(struct ifnet *ifp, struct xnetback_instance *xneti,
658 int queued)
659 {
660 struct xnetback_xstate *xst;
661
662 for (int i = 0; i < queued; i++) {
663 xst = &xneti->xni_xstate[i];
664
665 if (xst->xs_loaded) {
666 KASSERT(xst->xs_m != NULL);
667 bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
668 xst->xs_dmamap);
669 xst->xs_loaded = false;
670 m_freem(xst->xs_m);
671 }
672
673 xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_ERROR);
674 if_statinc(ifp, if_ierrors);
675 }
676 }
677
678 static void
xennetback_tx_copy_process(struct ifnet * ifp,struct xnetback_instance * xneti,int queued)679 xennetback_tx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti,
680 int queued)
681 {
682 gnttab_copy_t *gop;
683 struct xnetback_xstate *xst;
684 int copycnt = 0, seg = 0;
685 size_t goff = 0, segoff = 0, gsize, take;
686 bus_dmamap_t dm = NULL;
687 paddr_t ma;
688
689 for (int i = 0; i < queued; i++) {
690 xst = &xneti->xni_xstate[i];
691
692 if (xst->xs_m != NULL) {
693 KASSERT(xst->xs_m->m_pkthdr.len == xst->xs_tx.size);
694 if (__predict_false(bus_dmamap_load_mbuf(
695 xneti->xni_xbusd->xbusd_dmat,
696 xst->xs_dmamap, xst->xs_m, BUS_DMA_NOWAIT) != 0))
697 goto abort;
698 xst->xs_loaded = true;
699 dm = xst->xs_dmamap;
700 seg = 0;
701 goff = segoff = 0;
702 }
703
704 gsize = xst->xs_tx_size;
705 goff = 0;
706 for (; seg < dm->dm_nsegs && gsize > 0; seg++) {
707 bus_dma_segment_t *ds = &dm->dm_segs[seg];
708 ma = ds->ds_addr;
709 take = uimin(gsize, ds->ds_len);
710
711 KASSERT(copycnt <= NB_XMIT_PAGES_BATCH);
712 if (copycnt == NB_XMIT_PAGES_BATCH) {
713 if (xennetback_copy(ifp, xneti->xni_gop_copy,
714 copycnt, "Tx") != 0)
715 goto abort;
716 copycnt = 0;
717 }
718
719 /* Queue for the copy */
720 gop = &xneti->xni_gop_copy[copycnt++];
721 memset(gop, 0, sizeof(*gop));
722 gop->flags = GNTCOPY_source_gref;
723 gop->len = take;
724
725 gop->source.u.ref = xst->xs_tx.gref;
726 gop->source.offset = xst->xs_tx.offset + goff;
727 gop->source.domid = xneti->xni_domid;
728
729 gop->dest.offset = (ma & PAGE_MASK) + segoff;
730 KASSERT(gop->dest.offset <= PAGE_SIZE);
731 gop->dest.domid = DOMID_SELF;
732 gop->dest.u.gmfn = ma >> PAGE_SHIFT;
733
734 goff += take;
735 gsize -= take;
736 if (take + segoff < ds->ds_len) {
737 segoff += take;
738 /* Segment not completely consumed yet */
739 break;
740 }
741 segoff = 0;
742 }
743 KASSERT(gsize == 0);
744 KASSERT(goff == xst->xs_tx_size);
745 }
746 if (copycnt > 0) {
747 if (xennetback_copy(ifp, xneti->xni_gop_copy, copycnt, "Tx"))
748 goto abort;
749 copycnt = 0;
750 }
751
752 /* If we got here, the whole copy was successful */
753 for (int i = 0; i < queued; i++) {
754 xst = &xneti->xni_xstate[i];
755
756 xennetback_tx_response(xneti, xst->xs_tx.id, NETIF_RSP_OKAY);
757
758 if (xst->xs_m != NULL) {
759 KASSERT(xst->xs_loaded);
760 bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
761 xst->xs_dmamap);
762
763 if (xst->xs_tx.flags & NETTXF_csum_blank) {
764 xennet_checksum_fill(ifp, xst->xs_m,
765 &xneti->xni_cnt_rx_cksum_blank,
766 &xneti->xni_cnt_rx_cksum_undefer);
767 } else if (xst->xs_tx.flags & NETTXF_data_validated) {
768 xst->xs_m->m_pkthdr.csum_flags =
769 XN_M_CSUM_SUPPORTED;
770 }
771 m_set_rcvif(xst->xs_m, ifp);
772
773 if_percpuq_enqueue(ifp->if_percpuq, xst->xs_m);
774 }
775 }
776
777 return;
778
779 abort:
780 xennetback_tx_copy_abort(ifp, xneti, queued);
781 }
782
783 static int
xennetback_tx_m0len_fragment(struct xnetback_instance * xneti,int m0_len,int req_cons,int * cntp)784 xennetback_tx_m0len_fragment(struct xnetback_instance *xneti,
785 int m0_len, int req_cons, int *cntp)
786 {
787 netif_tx_request_t *txreq;
788
789 /* This assumes all the requests are already pushed into the ring */
790 *cntp = 1;
791 do {
792 txreq = RING_GET_REQUEST(&xneti->xni_txring, req_cons);
793 if (m0_len <= txreq->size || *cntp > XEN_NETIF_NR_SLOTS_MIN)
794 return -1;
795 if (RING_REQUEST_CONS_OVERFLOW(&xneti->xni_txring, req_cons))
796 return -1;
797
798 m0_len -= txreq->size;
799 req_cons++;
800 (*cntp)++;
801 } while (txreq->flags & NETTXF_more_data);
802
803 return m0_len;
804 }
805
806 static int
xennetback_evthandler(void * arg)807 xennetback_evthandler(void *arg)
808 {
809 struct xnetback_instance *xneti = arg;
810 struct ifnet *ifp = &xneti->xni_if;
811 netif_tx_request_t txreq;
812 struct mbuf *m, *m0 = NULL, *mlast = NULL;
813 int receive_pending;
814 int queued = 0, m0_len = 0;
815 struct xnetback_xstate *xst;
816 const bool nupnrun = ((ifp->if_flags & (IFF_UP | IFF_RUNNING)) !=
817 (IFF_UP | IFF_RUNNING));
818 bool discard = 0;
819
820 XENPRINTF(("xennetback_evthandler "));
821 again:
822 while (RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_txring)) {
823 /*
824 * Ensure we have read the producer's queue index in
825 * RING_FINAL_CHECK_FOR_REQUESTS before we read the
826 * content of the producer's next request in
827 * RING_COPY_REQUEST.
828 */
829 xen_rmb();
830 RING_COPY_REQUEST(&xneti->xni_txring,
831 xneti->xni_txring.req_cons,
832 &txreq);
833 XENPRINTF(("%s pkt size %d\n", xneti->xni_if.if_xname,
834 txreq.size));
835 xneti->xni_txring.req_cons++;
836 if (__predict_false(nupnrun || discard)) {
837 /* interface not up, drop all requests */
838 if_statinc(ifp, if_iqdrops);
839 discard = (txreq.flags & NETTXF_more_data) != 0;
840 xennetback_tx_response(xneti, txreq.id,
841 NETIF_RSP_DROPPED);
842 continue;
843 }
844
845 /*
846 * Do some sanity checks, and queue copy of the data.
847 */
848 const char *msg = xennetback_tx_check_packet(&txreq,
849 m0 == NULL);
850 if (__predict_false(msg != NULL)) {
851 printf("%s: packet with size %d is %s\n",
852 ifp->if_xname, txreq.size, msg);
853 discard = (txreq.flags & NETTXF_more_data) != 0;
854 xennetback_tx_response(xneti, txreq.id,
855 NETIF_RSP_ERROR);
856 if_statinc(ifp, if_ierrors);
857 continue;
858 }
859
860 /* get a mbuf for this fragment */
861 MGETHDR(m, M_DONTWAIT, MT_DATA);
862 if (__predict_false(m == NULL)) {
863 static struct timeval lasttime;
864 mbuf_fail:
865 if (ratecheck(&lasttime, &xni_pool_errintvl))
866 printf("%s: mbuf alloc failed\n",
867 ifp->if_xname);
868 xennetback_tx_copy_abort(ifp, xneti, queued);
869 queued = 0;
870 m0 = NULL;
871 discard = (txreq.flags & NETTXF_more_data) != 0;
872 xennetback_tx_response(xneti, txreq.id,
873 NETIF_RSP_DROPPED);
874 if_statinc(ifp, if_ierrors);
875 continue;
876 }
877 m->m_len = m->m_pkthdr.len = txreq.size;
878
879 if (!m0 && (txreq.flags & NETTXF_more_data)) {
880 /*
881 * The first fragment of multi-fragment Tx request
882 * contains total size. Need to read whole
883 * chain to determine actual size of the first
884 * (i.e. current) fragment.
885 */
886 int cnt;
887 m0_len = xennetback_tx_m0len_fragment(xneti,
888 txreq.size, xneti->xni_txring.req_cons, &cnt);
889 if (m0_len < 0) {
890 m_freem(m);
891 discard = 1;
892 xennetback_tx_response(xneti, txreq.id,
893 NETIF_RSP_DROPPED);
894 if_statinc(ifp, if_ierrors);
895 continue;
896 }
897 m->m_len = m0_len;
898 KASSERT(cnt <= XEN_NETIF_NR_SLOTS_MIN);
899
900 if (queued + cnt >= NB_XMIT_PAGES_BATCH) {
901 /*
902 * Flush queue if too full to fit this
903 * new packet whole.
904 */
905 xennetback_tx_copy_process(ifp, xneti, queued);
906 queued = 0;
907 }
908 }
909
910 if (m->m_len > MHLEN) {
911 MCLGET(m, M_DONTWAIT);
912 if (__predict_false((m->m_flags & M_EXT) == 0)) {
913 m_freem(m);
914 goto mbuf_fail;
915 }
916 if (__predict_false(m->m_len > MCLBYTES)) {
917 /* one more mbuf necessary */
918 struct mbuf *mn;
919 MGET(mn, M_DONTWAIT, MT_DATA);
920 if (__predict_false(mn == NULL)) {
921 m_freem(m);
922 goto mbuf_fail;
923 }
924 if (m->m_len - MCLBYTES > MLEN) {
925 MCLGET(mn, M_DONTWAIT);
926 if ((mn->m_flags & M_EXT) == 0) {
927 m_freem(mn);
928 m_freem(m);
929 goto mbuf_fail;
930 }
931 }
932 mn->m_len = m->m_len - MCLBYTES;
933 m->m_len = MCLBYTES;
934 m->m_next = mn;
935 KASSERT(mn->m_len <= MCLBYTES);
936 }
937 KASSERT(m->m_len <= MCLBYTES);
938 }
939
940 if (m0 || (txreq.flags & NETTXF_more_data)) {
941 if (m0 == NULL) {
942 m0 = m;
943 mlast = (m->m_next) ? m->m_next : m;
944 KASSERT(mlast->m_next == NULL);
945 } else {
946 /* Coalesce like m_cat(), but without copy */
947 KASSERT(mlast != NULL);
948 if (M_TRAILINGSPACE(mlast) >= m->m_pkthdr.len) {
949 mlast->m_len += m->m_pkthdr.len;
950 m_freem(m);
951 } else {
952 mlast->m_next = m;
953 mlast = (m->m_next) ? m->m_next : m;
954 KASSERT(mlast->m_next == NULL);
955 }
956 }
957 }
958
959 XENPRINTF(("%s pkt offset %d size %d id %d req_cons %d\n",
960 xneti->xni_if.if_xname, txreq.offset,
961 txreq.size, txreq.id,
962 xneti->xni_txring.req_cons & (RING_SIZE(&xneti->xni_txring) - 1)));
963
964 xst = &xneti->xni_xstate[queued];
965 xst->xs_m = (m0 == NULL || m == m0) ? m : NULL;
966 xst->xs_tx = txreq;
967 /* Fill the length of _this_ fragment */
968 xst->xs_tx_size = (m == m0) ? m0_len : m->m_pkthdr.len;
969 queued++;
970
971 KASSERT(queued <= NB_XMIT_PAGES_BATCH);
972 if (__predict_false(m0 &&
973 (txreq.flags & NETTXF_more_data) == 0)) {
974 /* Last fragment, stop appending mbufs */
975 m0 = NULL;
976 }
977 if (queued == NB_XMIT_PAGES_BATCH) {
978 KASSERT(m0 == NULL);
979 xennetback_tx_copy_process(ifp, xneti, queued);
980 queued = 0;
981 }
982 }
983 RING_FINAL_CHECK_FOR_REQUESTS(&xneti->xni_txring, receive_pending);
984 if (receive_pending)
985 goto again;
986 if (m0) {
987 /* Queue empty, and still unfinished multi-fragment request */
988 printf("%s: dropped unfinished multi-fragment\n",
989 ifp->if_xname);
990 xennetback_tx_copy_abort(ifp, xneti, queued);
991 queued = 0;
992 m0 = NULL;
993 }
994 if (queued > 0)
995 xennetback_tx_copy_process(ifp, xneti, queued);
996
997 /* check to see if we can transmit more packets */
998 if_schedule_deferred_start(ifp);
999
1000 return 1;
1001 }
1002
1003 static int
xennetback_ifioctl(struct ifnet * ifp,u_long cmd,void * data)1004 xennetback_ifioctl(struct ifnet *ifp, u_long cmd, void *data)
1005 {
1006 //struct xnetback_instance *xneti = ifp->if_softc;
1007 //struct ifreq *ifr = (struct ifreq *)data;
1008 int s, error;
1009
1010 s = splnet();
1011 error = ether_ioctl(ifp, cmd, data);
1012 if (error == ENETRESET)
1013 error = 0;
1014 splx(s);
1015 return error;
1016 }
1017
1018 static void
xennetback_ifstart(struct ifnet * ifp)1019 xennetback_ifstart(struct ifnet *ifp)
1020 {
1021 struct xnetback_instance *xneti = ifp->if_softc;
1022
1023 /*
1024 * The Xen communication channel is much more efficient if we can
1025 * schedule batch of packets for the domain. Deferred start by network
1026 * stack will enqueue all pending mbufs in the interface's send queue
1027 * before it is processed by the soft interrupt handler.
1028 */
1029 xennetback_ifsoftstart_copy(xneti);
1030 }
1031
1032 static void
xennetback_rx_copy_process(struct ifnet * ifp,struct xnetback_instance * xneti,int queued,int copycnt)1033 xennetback_rx_copy_process(struct ifnet *ifp, struct xnetback_instance *xneti,
1034 int queued, int copycnt)
1035 {
1036 int notify;
1037 struct xnetback_xstate *xst;
1038
1039 if (xennetback_copy(ifp, xneti->xni_gop_copy, copycnt, "Rx") != 0) {
1040 /* message already displayed */
1041 goto free_mbufs;
1042 }
1043
1044 /* update pointer */
1045 xneti->xni_rxring.req_cons += queued;
1046 xneti->xni_rxring.rsp_prod_pvt += queued;
1047 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xneti->xni_rxring, notify);
1048
1049 /* send event */
1050 if (notify) {
1051 XENPRINTF(("%s receive event\n",
1052 xneti->xni_if.if_xname));
1053 hypervisor_notify_via_evtchn(xneti->xni_evtchn);
1054 }
1055
1056 free_mbufs:
1057 /* now that data was copied we can free the mbufs */
1058 for (int j = 0; j < queued; j++) {
1059 xst = &xneti->xni_xstate[j];
1060 if (xst->xs_loaded) {
1061 bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
1062 xst->xs_dmamap);
1063 xst->xs_loaded = false;
1064 }
1065 m_freem(xst->xs_m);
1066 xst->xs_m = NULL;
1067 }
1068 }
1069
1070 static void
xennetback_rx_copy_queue(struct xnetback_instance * xneti,struct xnetback_xstate * xst0,int rsp_prod_pvt,int * queued,int * copycntp)1071 xennetback_rx_copy_queue(struct xnetback_instance *xneti,
1072 struct xnetback_xstate *xst0, int rsp_prod_pvt, int *queued, int *copycntp)
1073 {
1074 struct xnetback_xstate *xst = xst0;
1075 gnttab_copy_t *gop;
1076 struct netif_rx_request rxreq;
1077 netif_rx_response_t *rxresp;
1078 paddr_t ma;
1079 size_t goff, segoff, segsize, take, totsize;
1080 int copycnt = *copycntp, reqcnt = *queued;
1081 const bus_dmamap_t dm = xst0->xs_dmamap;
1082 const bool multiseg = (dm->dm_nsegs > 1);
1083
1084 KASSERT(xst0 == &xneti->xni_xstate[reqcnt]);
1085
1086 RING_COPY_REQUEST(&xneti->xni_rxring,
1087 xneti->xni_rxring.req_cons + reqcnt, &rxreq);
1088 goff = 0;
1089 rxresp = RING_GET_RESPONSE(&xneti->xni_rxring, rsp_prod_pvt + reqcnt);
1090 reqcnt++;
1091
1092 rxresp->id = rxreq.id;
1093 rxresp->offset = 0;
1094 if ((xst0->xs_m->m_pkthdr.csum_flags & XN_M_CSUM_SUPPORTED) != 0) {
1095 rxresp->flags = NETRXF_csum_blank;
1096 } else {
1097 rxresp->flags = NETRXF_data_validated;
1098 }
1099 if (multiseg)
1100 rxresp->flags |= NETRXF_more_data;
1101
1102 totsize = xst0->xs_m->m_pkthdr.len;
1103
1104 /*
1105 * Arrange for the mbuf contents to be copied into one or more
1106 * provided memory pages.
1107 */
1108 for (int seg = 0; seg < dm->dm_nsegs; seg++) {
1109 ma = dm->dm_segs[seg].ds_addr;
1110 segsize = dm->dm_segs[seg].ds_len;
1111 segoff = 0;
1112
1113 while (segoff < segsize) {
1114 take = uimin(PAGE_SIZE - goff, segsize - segoff);
1115 KASSERT(take <= totsize);
1116
1117 /* add copy request */
1118 gop = &xneti->xni_gop_copy[copycnt++];
1119 gop->flags = GNTCOPY_dest_gref;
1120 gop->source.offset = (ma & PAGE_MASK) + segoff;
1121 gop->source.domid = DOMID_SELF;
1122 gop->source.u.gmfn = ma >> PAGE_SHIFT;
1123
1124 gop->dest.u.ref = rxreq.gref;
1125 gop->dest.offset = goff;
1126 gop->dest.domid = xneti->xni_domid;
1127
1128 gop->len = take;
1129
1130 segoff += take;
1131 goff += take;
1132 totsize -= take;
1133
1134 if (goff == PAGE_SIZE && totsize > 0) {
1135 rxresp->status = goff;
1136
1137 /* Take next grant */
1138 RING_COPY_REQUEST(&xneti->xni_rxring,
1139 xneti->xni_rxring.req_cons + reqcnt,
1140 &rxreq);
1141 goff = 0;
1142 rxresp = RING_GET_RESPONSE(&xneti->xni_rxring,
1143 rsp_prod_pvt + reqcnt);
1144 reqcnt++;
1145
1146 rxresp->id = rxreq.id;
1147 rxresp->offset = 0;
1148 rxresp->flags = NETRXF_more_data;
1149
1150 xst++;
1151 xst->xs_m = NULL;
1152 }
1153 }
1154 }
1155 rxresp->flags &= ~NETRXF_more_data;
1156 rxresp->status = goff;
1157 KASSERT(totsize == 0);
1158
1159 KASSERT(copycnt > *copycntp);
1160 KASSERT(reqcnt > *queued);
1161 *copycntp = copycnt;
1162 *queued = reqcnt;
1163 }
1164
1165 static void
xennetback_ifsoftstart_copy(struct xnetback_instance * xneti)1166 xennetback_ifsoftstart_copy(struct xnetback_instance *xneti)
1167 {
1168 struct ifnet *ifp = &xneti->xni_if;
1169 struct mbuf *m;
1170 int queued = 0;
1171 RING_IDX req_prod, rsp_prod_pvt;
1172 struct xnetback_xstate *xst;
1173 int copycnt = 0;
1174 bool abort;
1175
1176 XENPRINTF(("xennetback_ifsoftstart_copy "));
1177 int s = splnet();
1178 if (__predict_false((ifp->if_flags & IFF_RUNNING) == 0)) {
1179 splx(s);
1180 return;
1181 }
1182
1183 while (!IFQ_IS_EMPTY(&ifp->if_snd)) {
1184 XENPRINTF(("pkt\n"));
1185 req_prod = xneti->xni_rxring.sring->req_prod;
1186 rsp_prod_pvt = xneti->xni_rxring.rsp_prod_pvt;
1187 xen_rmb();
1188
1189 abort = false;
1190 KASSERT(queued == 0);
1191 KASSERT(copycnt == 0);
1192 while (copycnt < NB_XMIT_PAGES_BATCH) {
1193 #define XN_RING_FULL(cnt) \
1194 req_prod == xneti->xni_rxring.req_cons + (cnt) || \
1195 xneti->xni_rxring.req_cons - (rsp_prod_pvt + cnt) == \
1196 NET_RX_RING_SIZE
1197
1198 if (__predict_false(XN_RING_FULL(1))) {
1199 /* out of ring space */
1200 XENPRINTF(("xennetback_ifstart: ring full "
1201 "req_prod 0x%x req_cons 0x%x rsp_prod_pvt "
1202 "0x%x\n",
1203 req_prod,
1204 xneti->xni_rxring.req_cons + queued,
1205 rsp_prod_pvt + queued));
1206 abort = true;
1207 break;
1208 }
1209
1210 IFQ_DEQUEUE(&ifp->if_snd, m);
1211 if (m == NULL)
1212 break;
1213
1214 again:
1215 xst = &xneti->xni_xstate[queued];
1216
1217 /*
1218 * For short packets it's always way faster passing
1219 * single defragmented packet, even with feature-sg.
1220 * Try to defragment first if the result is likely
1221 * to fit into a single mbuf.
1222 */
1223 if (m->m_pkthdr.len < MCLBYTES && m->m_next)
1224 (void)m_defrag(m, M_DONTWAIT);
1225
1226 if (bus_dmamap_load_mbuf(
1227 xneti->xni_xbusd->xbusd_dmat,
1228 xst->xs_dmamap, m, BUS_DMA_NOWAIT) != 0) {
1229 if (m_defrag(m, M_DONTWAIT) == NULL) {
1230 m_freem(m);
1231 static struct timeval lasttime;
1232 if (ratecheck(&lasttime, &xni_pool_errintvl))
1233 printf("%s: fail defrag mbuf\n",
1234 ifp->if_xname);
1235 continue;
1236 }
1237
1238 if (__predict_false(bus_dmamap_load_mbuf(
1239 xneti->xni_xbusd->xbusd_dmat,
1240 xst->xs_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
1241 printf("%s: cannot load mbuf\n",
1242 ifp->if_xname);
1243 m_freem(m);
1244 continue;
1245 }
1246 }
1247 KASSERT(xst->xs_dmamap->dm_nsegs < NB_XMIT_PAGES_BATCH);
1248 KASSERTMSG(queued <= copycnt, "queued %d > copycnt %d",
1249 queued, copycnt);
1250
1251 if (__predict_false(XN_RING_FULL(
1252 xst->xs_dmamap->dm_nsegs))) {
1253 /* Ring too full to fit the packet */
1254 bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
1255 xst->xs_dmamap);
1256 m_freem(m);
1257 abort = true;
1258 break;
1259 }
1260 if (__predict_false(copycnt + xst->xs_dmamap->dm_nsegs >
1261 NB_XMIT_PAGES_BATCH)) {
1262 /* Batch already too full, flush and retry */
1263 bus_dmamap_unload(xneti->xni_xbusd->xbusd_dmat,
1264 xst->xs_dmamap);
1265 xennetback_rx_copy_process(ifp, xneti, queued,
1266 copycnt);
1267 queued = copycnt = 0;
1268 goto again;
1269 }
1270
1271 /* Now committed to send */
1272 xst->xs_loaded = true;
1273 xst->xs_m = m;
1274 xennetback_rx_copy_queue(xneti, xst,
1275 rsp_prod_pvt, &queued, ©cnt);
1276
1277 if_statinc(ifp, if_opackets);
1278 bpf_mtap(ifp, m, BPF_D_OUT);
1279 }
1280 KASSERT(copycnt <= NB_XMIT_PAGES_BATCH);
1281 KASSERT(queued <= copycnt);
1282 if (copycnt > 0) {
1283 xennetback_rx_copy_process(ifp, xneti, queued, copycnt);
1284 queued = copycnt = 0;
1285 }
1286 /*
1287 * note that we don't use RING_FINAL_CHECK_FOR_REQUESTS()
1288 * here, as the frontend doesn't notify when adding
1289 * requests anyway
1290 */
1291 if (__predict_false(abort ||
1292 !RING_HAS_UNCONSUMED_REQUESTS(&xneti->xni_rxring))) {
1293 /* ring full */
1294 ifp->if_timer = 1;
1295 break;
1296 }
1297 }
1298 splx(s);
1299 }
1300
1301 static void
xennetback_ifwatchdog(struct ifnet * ifp)1302 xennetback_ifwatchdog(struct ifnet * ifp)
1303 {
1304 /*
1305 * We can get to the following condition: transmit stalls because the
1306 * ring is full when the ifq is full too.
1307 *
1308 * In this case (as, unfortunately, we don't get an interrupt from xen
1309 * on transmit) nothing will ever call xennetback_ifstart() again.
1310 * Here we abuse the watchdog to get out of this condition.
1311 */
1312 XENPRINTF(("xennetback_ifwatchdog\n"));
1313 xennetback_ifstart(ifp);
1314 }
1315
1316 static int
xennetback_ifinit(struct ifnet * ifp)1317 xennetback_ifinit(struct ifnet *ifp)
1318 {
1319 struct xnetback_instance *xneti = ifp->if_softc;
1320 int s = splnet();
1321
1322 if ((ifp->if_flags & IFF_UP) == 0) {
1323 splx(s);
1324 return 0;
1325 }
1326 if (xneti->xni_status == CONNECTED)
1327 ifp->if_flags |= IFF_RUNNING;
1328 splx(s);
1329 return 0;
1330 }
1331
1332 static void
xennetback_ifstop(struct ifnet * ifp,int disable)1333 xennetback_ifstop(struct ifnet *ifp, int disable)
1334 {
1335 struct xnetback_instance *xneti = ifp->if_softc;
1336 int s = splnet();
1337
1338 ifp->if_flags &= ~IFF_RUNNING;
1339 ifp->if_timer = 0;
1340 if (xneti->xni_status == CONNECTED) {
1341 xennetback_evthandler(ifp->if_softc); /* flush pending RX requests */
1342 }
1343 splx(s);
1344 }
1345