1 /* $NetBSD: if_xennet_xenbus.c,v 1.130 2024/01/09 18:39:53 jdolecek Exp $ */
2
3 /*
4 * Copyright (c) 2006 Manuel Bouyer.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /*
28 * Copyright (c) 2004 Christian Limpach.
29 * All rights reserved.
30 *
31 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
34 * 1. Redistributions of source code must retain the above copyright
35 * notice, this list of conditions and the following disclaimer.
36 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 *
40 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
41 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
42 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
43 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
44 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
46 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
47 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
48 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
49 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50 */
51
52 /*
53 * This file contains the xennet frontend code required for the network
54 * communication between two Xen domains.
55 * It ressembles xbd, but is a little more complex as it must deal with two
56 * rings:
57 * - the TX ring, to transmit packets to backend (inside => outside)
58 * - the RX ring, to receive packets from backend (outside => inside)
59 *
60 * Principles are following.
61 *
62 * For TX:
63 * Purpose is to transmit packets to the outside. The start of day is in
64 * xennet_start() (output routine of xennet) scheduled via a softint.
65 * xennet_start() generates the requests associated
66 * to the TX mbufs queued (see altq(9)).
67 * The backend's responses are processed by xennet_tx_complete(), called
68 * from xennet_start()
69 *
70 * for RX:
71 * Purpose is to process the packets received from the outside. RX buffers
72 * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf
73 * attach. During pre-allocation, frontend pushes requests in the I/O ring, in
74 * preparation for incoming packets from backend.
75 * When RX packets need to be processed, backend takes the requests previously
76 * offered by frontend and pushes the associated responses inside the I/O ring.
77 * When done, it notifies frontend through an event notification, which will
78 * asynchronously call xennet_handler() in frontend.
79 * xennet_handler() processes the responses, generates the associated mbuf, and
80 * passes it to the MI layer for further processing.
81 */
82
83 #include <sys/cdefs.h>
84 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.130 2024/01/09 18:39:53 jdolecek Exp $");
85
86 #include "opt_xen.h"
87 #include "opt_nfs_boot.h"
88 #include "opt_net_mpsafe.h"
89
90 #include <sys/param.h>
91 #include <sys/device.h>
92 #include <sys/conf.h>
93 #include <sys/kernel.h>
94 #include <sys/proc.h>
95 #include <sys/systm.h>
96 #include <sys/intr.h>
97 #include <sys/rndsource.h>
98
99 #include <net/if.h>
100 #include <net/if_dl.h>
101 #include <net/if_ether.h>
102 #include <net/bpf.h>
103
104 #if defined(NFS_BOOT_BOOTSTATIC)
105 #include <sys/fstypes.h>
106 #include <sys/mount.h>
107 #include <sys/statvfs.h>
108 #include <netinet/in.h>
109 #include <nfs/rpcv2.h>
110 #include <nfs/nfsproto.h>
111 #include <nfs/nfs.h>
112 #include <nfs/nfsmount.h>
113 #include <nfs/nfsdiskless.h>
114 #include <xen/if_xennetvar.h>
115 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
116
117 #include <xen/xennet_checksum.h>
118
119 #include <uvm/uvm.h>
120
121 #include <xen/intr.h>
122 #include <xen/hypervisor.h>
123 #include <xen/evtchn.h>
124 #include <xen/granttables.h>
125 #include <xen/include/public/io/netif.h>
126 #include <xen/xenpmap.h>
127
128 #include <xen/xenbus.h>
129 #include "locators.h"
130
131 #undef XENNET_DEBUG_DUMP
132 #undef XENNET_DEBUG
133
134 #ifdef XENNET_DEBUG
135 #define XEDB_FOLLOW 0x01
136 #define XEDB_INIT 0x02
137 #define XEDB_EVENT 0x04
138 #define XEDB_MBUF 0x08
139 #define XEDB_MEM 0x10
140 int xennet_debug = 0xff;
141 #define DPRINTF(x) if (xennet_debug) printf x;
142 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x;
143 #else
144 #define DPRINTF(x)
145 #define DPRINTFN(n,x)
146 #endif
147
148 #define GRANT_INVALID_REF -1 /* entry is free */
149
150 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE)
151 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE)
152
153 struct xennet_txreq {
154 SLIST_ENTRY(xennet_txreq) txreq_next;
155 uint16_t txreq_id; /* ID passed to backend */
156 grant_ref_t txreq_gntref; /* grant ref of this request */
157 struct mbuf *txreq_m; /* mbuf being transmitted */
158 bus_dmamap_t txreq_dmamap;
159 };
160
161 struct xennet_rxreq {
162 SLIST_ENTRY(xennet_rxreq) rxreq_next;
163 uint16_t rxreq_id; /* ID passed to backend */
164 grant_ref_t rxreq_gntref; /* grant ref of this request */
165 struct mbuf *rxreq_m;
166 bus_dmamap_t rxreq_dmamap;
167 };
168
169 struct xennet_xenbus_softc {
170 device_t sc_dev;
171 struct ethercom sc_ethercom;
172 uint8_t sc_enaddr[ETHER_ADDR_LEN];
173 struct xenbus_device *sc_xbusd;
174
175 netif_tx_front_ring_t sc_tx_ring;
176 netif_rx_front_ring_t sc_rx_ring;
177
178 unsigned int sc_evtchn;
179 struct intrhand *sc_ih;
180
181 grant_ref_t sc_tx_ring_gntref;
182 grant_ref_t sc_rx_ring_gntref;
183
184 kmutex_t sc_tx_lock; /* protects free TX list, TX ring */
185 kmutex_t sc_rx_lock; /* protects free RX list, RX ring, rxreql */
186 struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE];
187 struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE];
188 SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */
189 SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */
190 int sc_free_txreql; /* number of free transmit request structs */
191 int sc_free_rxreql; /* number of free receive request structs */
192
193 int sc_backend_status; /* our status with backend */
194 #define BEST_CLOSED 0
195 #define BEST_DISCONNECTED 1
196 #define BEST_CONNECTED 2
197 #define BEST_SUSPENDED 3
198 int sc_features;
199 #define FEATURE_IPV6CSUM 0x01 /* IPv6 checksum offload */
200 #define FEATURE_SG 0x02 /* scatter-gatter */
201 #define FEATURE_RX_COPY 0x04 /* RX-copy */
202 #define FEATURE_BITS "\20\1IPV6-CSUM\2SG\3RX-COPY"
203 krndsource_t sc_rnd_source;
204 struct evcnt sc_cnt_tx_defrag;
205 struct evcnt sc_cnt_tx_queue_full;
206 struct evcnt sc_cnt_tx_drop;
207 struct evcnt sc_cnt_tx_frag;
208 struct evcnt sc_cnt_rx_frag;
209 struct evcnt sc_cnt_rx_cksum_blank;
210 struct evcnt sc_cnt_rx_cksum_undefer;
211 };
212
213 static pool_cache_t if_xennetrxbuf_cache;
214 static int if_xennetrxbuf_cache_inited = 0;
215
216 static int xennet_xenbus_match(device_t, cfdata_t, void *);
217 static void xennet_xenbus_attach(device_t, device_t, void *);
218 static int xennet_xenbus_detach(device_t, int);
219 static void xennet_backend_changed(void *, XenbusState);
220
221 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *);
222 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *, bool);
223 static void xennet_tx_complete(struct xennet_xenbus_softc *);
224 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *);
225 static int xennet_handler(void *);
226 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *);
227 #ifdef XENNET_DEBUG_DUMP
228 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int);
229 #endif
230
231 static int xennet_init(struct ifnet *);
232 static void xennet_stop(struct ifnet *, int);
233 static void xennet_start(struct ifnet *);
234 static int xennet_ioctl(struct ifnet *, u_long, void *);
235
236 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *);
237 static bool xennet_xenbus_resume(device_t dev, const pmf_qual_t *);
238
239 CFATTACH_DECL3_NEW(xennet, sizeof(struct xennet_xenbus_softc),
240 xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL,
241 NULL, NULL, DVF_DETACH_SHUTDOWN);
242
243 static int
xennet_xenbus_match(device_t parent,cfdata_t match,void * aux)244 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux)
245 {
246 struct xenbusdev_attach_args *xa = aux;
247
248 if (strcmp(xa->xa_type, "vif") != 0)
249 return 0;
250
251 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
252 match->cf_loc[XENBUSCF_ID] != xa->xa_id)
253 return 0;
254
255 return 1;
256 }
257
258 static void
xennet_xenbus_attach(device_t parent,device_t self,void * aux)259 xennet_xenbus_attach(device_t parent, device_t self, void *aux)
260 {
261 struct xennet_xenbus_softc *sc = device_private(self);
262 struct xenbusdev_attach_args *xa = aux;
263 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
264 int err;
265 netif_tx_sring_t *tx_ring;
266 netif_rx_sring_t *rx_ring;
267 RING_IDX i;
268 char *e, *p;
269 unsigned long uval;
270 extern int ifqmaxlen; /* XXX */
271 char mac[32];
272 char buf[64];
273 bus_size_t maxsz;
274 int nsegs;
275
276 aprint_normal(": Xen Virtual Network Interface\n");
277 sc->sc_dev = self;
278
279 sc->sc_xbusd = xa->xa_xbusd;
280 sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed;
281
282 /* read feature support flags */
283 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
284 "feature-rx-copy", &uval, 10);
285 if (!err && uval == 1)
286 sc->sc_features |= FEATURE_RX_COPY;
287 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
288 "feature-ipv6-csum-offload", &uval, 10);
289 if (!err && uval == 1)
290 sc->sc_features |= FEATURE_IPV6CSUM;
291 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
292 "feature-sg", &uval, 10);
293 if (!err && uval == 1)
294 sc->sc_features |= FEATURE_SG;
295 snprintb(buf, sizeof(buf), FEATURE_BITS, sc->sc_features);
296 aprint_normal_dev(sc->sc_dev, "backend features %s\n", buf);
297
298 /* xenbus ensure 2 devices can't be probed at the same time */
299 if (if_xennetrxbuf_cache_inited == 0) {
300 if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0,
301 "xnfrx", NULL, IPL_NET, NULL, NULL, NULL);
302 if_xennetrxbuf_cache_inited = 1;
303 }
304
305 /* initialize free RX and RX request lists */
306 if (sc->sc_features & FEATURE_SG) {
307 maxsz = ETHER_MAX_LEN_JUMBO;
308 /*
309 * Linux netback drops the packet if the request has more
310 * segments than XEN_NETIF_NR_SLOTS_MIN (== 18). With 2KB
311 * MCLBYTES this means maximum packet size 36KB, in reality
312 * less due to mbuf chain fragmentation.
313 */
314 nsegs = XEN_NETIF_NR_SLOTS_MIN;
315 } else {
316 maxsz = PAGE_SIZE;
317 nsegs = 1;
318 }
319 mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET);
320 SLIST_INIT(&sc->sc_txreq_head);
321 for (i = 0; i < NET_TX_RING_SIZE; i++) {
322 struct xennet_txreq *txreq = &sc->sc_txreqs[i];
323
324 txreq->txreq_id = i;
325 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs,
326 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
327 &txreq->txreq_dmamap) != 0)
328 break;
329
330 SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i],
331 txreq_next);
332 }
333 sc->sc_free_txreql = i;
334
335 mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET);
336 SLIST_INIT(&sc->sc_rxreq_head);
337 for (i = 0; i < NET_RX_RING_SIZE; i++) {
338 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
339 rxreq->rxreq_id = i;
340 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs,
341 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
342 &rxreq->rxreq_dmamap) != 0)
343 break;
344 rxreq->rxreq_gntref = GRANT_INVALID_REF;
345 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next);
346 }
347 sc->sc_free_rxreql = i;
348 if (sc->sc_free_rxreql == 0) {
349 aprint_error_dev(self, "failed to allocate rx memory\n");
350 return;
351 }
352
353 /* read mac address */
354 err = xenbus_read(NULL, sc->sc_xbusd->xbusd_path, "mac",
355 mac, sizeof(mac));
356 if (err) {
357 aprint_error_dev(self, "can't read mac address, err %d\n", err);
358 return;
359 }
360 for (i = 0, p = mac; i < ETHER_ADDR_LEN; i++) {
361 sc->sc_enaddr[i] = strtoul(p, &e, 16);
362 if ((e[0] == '\0' && i != 5) && e[0] != ':') {
363 aprint_error_dev(self,
364 "%s is not a valid mac address\n", mac);
365 return;
366 }
367 p = &e[1];
368 }
369 aprint_normal_dev(self, "MAC address %s\n",
370 ether_sprintf(sc->sc_enaddr));
371
372 /* Initialize ifnet structure and attach interface */
373 strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ);
374 sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU;
375 if (sc->sc_features & FEATURE_SG)
376 sc->sc_ethercom.ec_capabilities |= ETHERCAP_JUMBO_MTU;
377 ifp->if_softc = sc;
378 ifp->if_start = xennet_start;
379 ifp->if_ioctl = xennet_ioctl;
380 ifp->if_init = xennet_init;
381 ifp->if_stop = xennet_stop;
382 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
383 ifp->if_extflags = IFEF_MPSAFE;
384 ifp->if_snd.ifq_maxlen = uimax(ifqmaxlen, NET_TX_RING_SIZE * 2);
385 ifp->if_capabilities =
386 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx
387 | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx
388 | IFCAP_CSUM_UDPv6_Rx
389 | IFCAP_CSUM_TCPv6_Rx;
390 #define XN_M_CSUM_SUPPORTED \
391 (M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_TCPv6 | M_CSUM_UDPv6)
392
393 if (sc->sc_features & FEATURE_IPV6CSUM) {
394 /*
395 * If backend supports IPv6 csum offloading, we can skip
396 * IPv6 csum for Tx packets. Rx packet validation can
397 * be skipped regardless.
398 */
399 ifp->if_capabilities |=
400 IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_TCPv6_Tx;
401 }
402
403 IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN));
404 IFQ_SET_READY(&ifp->if_snd);
405 if_attach(ifp);
406 if_deferred_start_init(ifp, NULL);
407 ether_ifattach(ifp, sc->sc_enaddr);
408
409 /* alloc shared rings */
410 tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
411 UVM_KMF_WIRED);
412 rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
413 UVM_KMF_WIRED);
414 if (tx_ring == NULL || rx_ring == NULL)
415 panic("%s: can't alloc rings", device_xname(self));
416
417 sc->sc_tx_ring.sring = tx_ring;
418 sc->sc_rx_ring.sring = rx_ring;
419
420 rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev),
421 RND_TYPE_NET, RND_FLAG_DEFAULT);
422
423 evcnt_attach_dynamic(&sc->sc_cnt_tx_defrag, EVCNT_TYPE_MISC,
424 NULL, device_xname(sc->sc_dev), "Tx packet defrag");
425 evcnt_attach_dynamic(&sc->sc_cnt_tx_frag, EVCNT_TYPE_MISC,
426 NULL, device_xname(sc->sc_dev), "Tx multi-segment packet");
427 evcnt_attach_dynamic(&sc->sc_cnt_tx_drop, EVCNT_TYPE_MISC,
428 NULL, device_xname(sc->sc_dev), "Tx packet dropped");
429 evcnt_attach_dynamic(&sc->sc_cnt_tx_queue_full, EVCNT_TYPE_MISC,
430 NULL, device_xname(sc->sc_dev), "Tx queue full");
431 evcnt_attach_dynamic(&sc->sc_cnt_rx_frag, EVCNT_TYPE_MISC,
432 NULL, device_xname(sc->sc_dev), "Rx multi-segment packet");
433 evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_blank, EVCNT_TYPE_MISC,
434 NULL, device_xname(sc->sc_dev), "Rx csum blank");
435 evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_undefer, EVCNT_TYPE_MISC,
436 NULL, device_xname(sc->sc_dev), "Rx csum undeferred");
437
438 if (!pmf_device_register(self, xennet_xenbus_suspend,
439 xennet_xenbus_resume))
440 aprint_error_dev(self, "couldn't establish power handler\n");
441 else
442 pmf_class_network_register(self, ifp);
443
444 /* resume shared structures and tell backend that we are ready */
445 if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) {
446 uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE,
447 UVM_KMF_WIRED);
448 uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE,
449 UVM_KMF_WIRED);
450 return;
451 }
452 }
453
454 static int
xennet_xenbus_detach(device_t self,int flags)455 xennet_xenbus_detach(device_t self, int flags)
456 {
457 struct xennet_xenbus_softc *sc = device_private(self);
458 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
459
460 if ((flags & (DETACH_SHUTDOWN | DETACH_FORCE)) == DETACH_SHUTDOWN) {
461 /* Trigger state transition with backend */
462 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing);
463 return EBUSY;
464 }
465
466 DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self)));
467
468 /* stop interface */
469 IFNET_LOCK(ifp);
470 xennet_stop(ifp, 1);
471 IFNET_UNLOCK(ifp);
472 if (sc->sc_ih != NULL) {
473 xen_intr_disestablish(sc->sc_ih);
474 sc->sc_ih = NULL;
475 }
476
477 /* collect any outstanding TX responses */
478 mutex_enter(&sc->sc_tx_lock);
479 xennet_tx_complete(sc);
480 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
481 kpause("xndetach", true, hz/2, &sc->sc_tx_lock);
482 xennet_tx_complete(sc);
483 }
484 mutex_exit(&sc->sc_tx_lock);
485
486 mutex_enter(&sc->sc_rx_lock);
487 xennet_free_rx_buffer(sc, true);
488 mutex_exit(&sc->sc_rx_lock);
489
490 ether_ifdetach(ifp);
491 if_detach(ifp);
492
493 evcnt_detach(&sc->sc_cnt_tx_defrag);
494 evcnt_detach(&sc->sc_cnt_tx_frag);
495 evcnt_detach(&sc->sc_cnt_tx_drop);
496 evcnt_detach(&sc->sc_cnt_tx_queue_full);
497 evcnt_detach(&sc->sc_cnt_rx_frag);
498 evcnt_detach(&sc->sc_cnt_rx_cksum_blank);
499 evcnt_detach(&sc->sc_cnt_rx_cksum_undefer);
500
501 /* Unhook the entropy source. */
502 rnd_detach_source(&sc->sc_rnd_source);
503
504 /* Wait until the tx/rx rings stop being used by backend */
505 mutex_enter(&sc->sc_tx_lock);
506 while (xengnt_status(sc->sc_tx_ring_gntref))
507 kpause("xntxref", true, hz/2, &sc->sc_tx_lock);
508 xengnt_revoke_access(sc->sc_tx_ring_gntref);
509 mutex_exit(&sc->sc_tx_lock);
510 uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE,
511 UVM_KMF_WIRED);
512 mutex_enter(&sc->sc_rx_lock);
513 while (xengnt_status(sc->sc_rx_ring_gntref))
514 kpause("xnrxref", true, hz/2, &sc->sc_rx_lock);
515 xengnt_revoke_access(sc->sc_rx_ring_gntref);
516 mutex_exit(&sc->sc_rx_lock);
517 uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE,
518 UVM_KMF_WIRED);
519
520 pmf_device_deregister(self);
521
522 sc->sc_backend_status = BEST_DISCONNECTED;
523
524 DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self)));
525 return 0;
526 }
527
528 static bool
xennet_xenbus_resume(device_t dev,const pmf_qual_t * qual)529 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual)
530 {
531 struct xennet_xenbus_softc *sc = device_private(dev);
532 int error;
533 netif_tx_sring_t *tx_ring;
534 netif_rx_sring_t *rx_ring;
535 paddr_t ma;
536
537 /* All grants were removed during suspend */
538 sc->sc_tx_ring_gntref = GRANT_INVALID_REF;
539 sc->sc_rx_ring_gntref = GRANT_INVALID_REF;
540
541 mutex_enter(&sc->sc_rx_lock);
542 /* Free but don't revoke, the grant is gone */
543 xennet_free_rx_buffer(sc, false);
544 KASSERT(sc->sc_free_rxreql == NET_TX_RING_SIZE);
545 mutex_exit(&sc->sc_rx_lock);
546
547 tx_ring = sc->sc_tx_ring.sring;
548 rx_ring = sc->sc_rx_ring.sring;
549
550 /* Initialize rings */
551 memset(tx_ring, 0, PAGE_SIZE);
552 SHARED_RING_INIT(tx_ring);
553 FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE);
554
555 memset(rx_ring, 0, PAGE_SIZE);
556 SHARED_RING_INIT(rx_ring);
557 FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE);
558
559 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma);
560 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref);
561 if (error)
562 goto abort_resume;
563 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma);
564 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref);
565 if (error)
566 goto abort_resume;
567
568 if (sc->sc_ih != NULL) {
569 xen_intr_disestablish(sc->sc_ih);
570 sc->sc_ih = NULL;
571 }
572 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
573 if (error)
574 goto abort_resume;
575 aprint_verbose_dev(dev, "using event channel %d\n",
576 sc->sc_evtchn);
577 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn,
578 IST_LEVEL, IPL_NET, &xennet_handler, sc, true, device_xname(dev));
579 KASSERT(sc->sc_ih != NULL);
580
581 /* Re-fill Rx ring */
582 mutex_enter(&sc->sc_rx_lock);
583 xennet_alloc_rx_buffer(sc);
584 KASSERT(sc->sc_free_rxreql == 0);
585 mutex_exit(&sc->sc_rx_lock);
586
587 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised);
588
589 if (sc->sc_backend_status == BEST_SUSPENDED) {
590 if (xennet_talk_to_backend(sc)) {
591 xenbus_device_resume(sc->sc_xbusd);
592 hypervisor_unmask_event(sc->sc_evtchn);
593 xenbus_switch_state(sc->sc_xbusd, NULL,
594 XenbusStateConnected);
595 }
596 }
597
598 return true;
599
600 abort_resume:
601 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
602 return false;
603 }
604
605 static bool
xennet_talk_to_backend(struct xennet_xenbus_softc * sc)606 xennet_talk_to_backend(struct xennet_xenbus_softc *sc)
607 {
608 int error;
609 struct xenbus_transaction *xbt;
610 const char *errmsg;
611
612 again:
613 xbt = xenbus_transaction_start();
614 if (xbt == NULL)
615 return false;
616 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
617 "vifname", "%s", device_xname(sc->sc_dev));
618 if (error) {
619 errmsg = "vifname";
620 goto abort_transaction;
621 }
622 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
623 "tx-ring-ref","%u", sc->sc_tx_ring_gntref);
624 if (error) {
625 errmsg = "writing tx ring-ref";
626 goto abort_transaction;
627 }
628 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
629 "rx-ring-ref","%u", sc->sc_rx_ring_gntref);
630 if (error) {
631 errmsg = "writing rx ring-ref";
632 goto abort_transaction;
633 }
634 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
635 "request-rx-copy", "%u", 1);
636 if (error) {
637 errmsg = "writing request-rx-copy";
638 goto abort_transaction;
639 }
640 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
641 "feature-rx-notify", "%u", 1);
642 if (error) {
643 errmsg = "writing feature-rx-notify";
644 goto abort_transaction;
645 }
646 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
647 "feature-ipv6-csum-offload", "%u", 1);
648 if (error) {
649 errmsg = "writing feature-ipv6-csum-offload";
650 goto abort_transaction;
651 }
652 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
653 "feature-sg", "%u", 1);
654 if (error) {
655 errmsg = "writing feature-sg";
656 goto abort_transaction;
657 }
658 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
659 "event-channel", "%u", sc->sc_evtchn);
660 if (error) {
661 errmsg = "writing event channel";
662 goto abort_transaction;
663 }
664 error = xenbus_transaction_end(xbt, 0);
665 if (error == EAGAIN)
666 goto again;
667 if (error) {
668 xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction");
669 return false;
670 }
671 mutex_enter(&sc->sc_rx_lock);
672 xennet_alloc_rx_buffer(sc);
673 mutex_exit(&sc->sc_rx_lock);
674
675 sc->sc_backend_status = BEST_CONNECTED;
676
677 return true;
678
679 abort_transaction:
680 xenbus_transaction_end(xbt, 1);
681 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
682 return false;
683 }
684
685 static bool
xennet_xenbus_suspend(device_t dev,const pmf_qual_t * qual)686 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual)
687 {
688 struct xennet_xenbus_softc *sc = device_private(dev);
689
690 /*
691 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(),
692 * so we do not mask event channel here
693 */
694
695 mutex_enter(&sc->sc_tx_lock);
696
697 /* collect any outstanding TX responses */
698 xennet_tx_complete(sc);
699 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) {
700 kpause("xnsuspend", true, hz/2, &sc->sc_tx_lock);
701 xennet_tx_complete(sc);
702 }
703 KASSERT(sc->sc_free_txreql == NET_RX_RING_SIZE);
704 mutex_exit(&sc->sc_tx_lock);
705
706 /*
707 * dom0 may still use references to the grants we gave away
708 * earlier during RX buffers allocation. So we do not free RX buffers
709 * here, as dom0 does not expect the guest domain to suddenly revoke
710 * access to these grants.
711 */
712 sc->sc_backend_status = BEST_SUSPENDED;
713
714 xenbus_device_suspend(sc->sc_xbusd);
715 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
716
717 return true;
718 }
719
xennet_backend_changed(void * arg,XenbusState new_state)720 static void xennet_backend_changed(void *arg, XenbusState new_state)
721 {
722 struct xennet_xenbus_softc *sc = device_private((device_t)arg);
723 DPRINTF(("%s: new backend state %d\n",
724 device_xname(sc->sc_dev), new_state));
725
726 switch (new_state) {
727 case XenbusStateInitialising:
728 case XenbusStateInitialised:
729 case XenbusStateConnected:
730 break;
731 case XenbusStateClosing:
732 sc->sc_backend_status = BEST_CLOSED;
733 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
734 break;
735 case XenbusStateInitWait:
736 if (sc->sc_backend_status == BEST_CONNECTED
737 || sc->sc_backend_status == BEST_SUSPENDED)
738 break;
739
740 if (xennet_talk_to_backend(sc))
741 xenbus_switch_state(sc->sc_xbusd, NULL,
742 XenbusStateConnected);
743 break;
744 case XenbusStateUnknown:
745 default:
746 panic("bad backend state %d", new_state);
747 }
748 }
749
750 /*
751 * Allocate RX buffers and put the associated request structures
752 * in the ring. This allows the backend to use them to communicate with
753 * frontend when some data is destined to frontend
754 */
755 static void
xennet_alloc_rx_buffer(struct xennet_xenbus_softc * sc)756 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc)
757 {
758 RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt;
759 RING_IDX i;
760 netif_rx_request_t *rxreq;
761 struct xennet_rxreq *req;
762 int otherend_id, notify;
763 struct mbuf *m;
764 vaddr_t va;
765 paddr_t pa, ma;
766 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
767
768 KASSERT(mutex_owned(&sc->sc_rx_lock));
769
770 otherend_id = sc->sc_xbusd->xbusd_otherend_id;
771
772 for (i = 0; sc->sc_free_rxreql != 0; i++) {
773 req = SLIST_FIRST(&sc->sc_rxreq_head);
774 KASSERT(req != NULL);
775 KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]);
776 KASSERT(req->rxreq_m == NULL);
777 KASSERT(req->rxreq_gntref == GRANT_INVALID_REF);
778
779 MGETHDR(m, M_DONTWAIT, MT_DATA);
780 if (__predict_false(m == NULL)) {
781 printf("%s: rx no mbuf\n", ifp->if_xname);
782 break;
783 }
784
785 va = (vaddr_t)pool_cache_get_paddr(
786 if_xennetrxbuf_cache, PR_NOWAIT, &pa);
787 if (__predict_false(va == 0)) {
788 printf("%s: rx no cluster\n", ifp->if_xname);
789 m_freem(m);
790 break;
791 }
792
793 MEXTADD(m, va, PAGE_SIZE,
794 M_DEVBUF, xennet_rx_mbuf_free, NULL);
795 m->m_len = m->m_pkthdr.len = PAGE_SIZE;
796 m->m_ext.ext_paddr = pa;
797 m->m_flags |= M_EXT_RW; /* we own the buffer */
798
799 /* Set M_EXT_CLUSTER so that load_mbuf uses m_ext.ext_paddr */
800 m->m_flags |= M_EXT_CLUSTER;
801 if (__predict_false(bus_dmamap_load_mbuf(
802 sc->sc_xbusd->xbusd_dmat,
803 req->rxreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
804 printf("%s: rx mbuf load failed", ifp->if_xname);
805 m->m_flags &= ~M_EXT_CLUSTER;
806 m_freem(m);
807 break;
808 }
809 m->m_flags &= ~M_EXT_CLUSTER;
810
811 KASSERT(req->rxreq_dmamap->dm_nsegs == 1);
812 ma = req->rxreq_dmamap->dm_segs[0].ds_addr;
813
814 if (xengnt_grant_access(otherend_id, trunc_page(ma),
815 0, &req->rxreq_gntref) != 0) {
816 m_freem(m);
817 break;
818 }
819
820 req->rxreq_m = m;
821
822 rxreq = RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i);
823 rxreq->id = req->rxreq_id;
824 rxreq->gref = req->rxreq_gntref;
825
826 SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next);
827 sc->sc_free_rxreql--;
828 }
829
830 /* Notify backend if more Rx is possible */
831 if (i > 0) {
832 sc->sc_rx_ring.req_prod_pvt = req_prod + i;
833 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify);
834 if (notify)
835 hypervisor_notify_via_evtchn(sc->sc_evtchn);
836 }
837 }
838
839 /*
840 * Reclaim all RX buffers used by the I/O ring between frontend and backend
841 */
842 static void
xennet_free_rx_buffer(struct xennet_xenbus_softc * sc,bool revoke)843 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc, bool revoke)
844 {
845 RING_IDX i;
846
847 KASSERT(mutex_owned(&sc->sc_rx_lock));
848
849 DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev)));
850 /* get back memory from RX ring */
851 for (i = 0; i < NET_RX_RING_SIZE; i++) {
852 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i];
853
854 if (rxreq->rxreq_gntref != GRANT_INVALID_REF) {
855 /*
856 * this req is still granted. Get back the page or
857 * allocate a new one, and remap it.
858 */
859 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq,
860 rxreq_next);
861 sc->sc_free_rxreql++;
862
863 if (revoke)
864 xengnt_revoke_access(rxreq->rxreq_gntref);
865 rxreq->rxreq_gntref = GRANT_INVALID_REF;
866 }
867
868 if (rxreq->rxreq_m != NULL) {
869 m_freem(rxreq->rxreq_m);
870 rxreq->rxreq_m = NULL;
871 }
872 }
873 DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev)));
874 }
875
876 /*
877 * Clears a used RX request when its associated mbuf has been processed
878 */
879 static void
xennet_rx_mbuf_free(struct mbuf * m,void * buf,size_t size,void * arg)880 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg)
881 {
882 KASSERT(buf == m->m_ext.ext_buf);
883 KASSERT(arg == NULL);
884 KASSERT(m != NULL);
885 vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK);
886 pool_cache_put_paddr(if_xennetrxbuf_cache,
887 (void *)va, m->m_ext.ext_paddr);
888 pool_cache_put(mb_cache, m);
889 };
890
891 static void
xennet_rx_free_req(struct xennet_xenbus_softc * sc,struct xennet_rxreq * req)892 xennet_rx_free_req(struct xennet_xenbus_softc *sc, struct xennet_rxreq *req)
893 {
894 KASSERT(mutex_owned(&sc->sc_rx_lock));
895
896 /* puts back the RX request in the list of free RX requests */
897 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next);
898 sc->sc_free_rxreql++;
899
900 /*
901 * ring needs more requests to be pushed in, allocate some
902 * RX buffers to catch-up with backend's consumption
903 */
904 if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) &&
905 __predict_true(sc->sc_backend_status == BEST_CONNECTED)) {
906 xennet_alloc_rx_buffer(sc);
907 }
908 }
909
910 /*
911 * Process responses associated to the TX mbufs sent previously through
912 * xennet_start()
913 * Called at splsoftnet.
914 */
915 static void
xennet_tx_complete(struct xennet_xenbus_softc * sc)916 xennet_tx_complete(struct xennet_xenbus_softc *sc)
917 {
918 struct xennet_txreq *req;
919 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
920 RING_IDX resp_prod, i;
921
922 DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n",
923 sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons));
924
925 KASSERT(mutex_owned(&sc->sc_tx_lock));
926 again:
927 resp_prod = sc->sc_tx_ring.sring->rsp_prod;
928 xen_rmb();
929 for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) {
930 req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id];
931 KASSERT(req->txreq_id ==
932 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id);
933 KASSERT(xengnt_status(req->txreq_gntref) == 0);
934 xengnt_revoke_access(req->txreq_gntref);
935 req->txreq_gntref = GRANT_INVALID_REF;
936
937 /* Cleanup/statistics if this is the master req of a chain */
938 if (req->txreq_m) {
939 if (__predict_false(
940 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status !=
941 NETIF_RSP_OKAY))
942 if_statinc(ifp, if_oerrors);
943 else
944 if_statinc(ifp, if_opackets);
945 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
946 req->txreq_dmamap);
947 m_freem(req->txreq_m);
948 req->txreq_m = NULL;
949 }
950
951 SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
952 sc->sc_free_txreql++;
953 }
954 sc->sc_tx_ring.rsp_cons = resp_prod;
955 /* set new event and check for race with rsp_cons update */
956 xen_wmb();
957 sc->sc_tx_ring.sring->rsp_event =
958 resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1;
959 xen_mb();
960 if (resp_prod != sc->sc_tx_ring.sring->rsp_prod)
961 goto again;
962 }
963
964 /*
965 * Xennet event handler.
966 * Get outstanding responses of TX packets, then collect all responses of
967 * pending RX packets
968 * Called at splnet.
969 */
970 static int
xennet_handler(void * arg)971 xennet_handler(void *arg)
972 {
973 struct xennet_xenbus_softc *sc = arg;
974 struct ifnet *ifp = &sc->sc_ethercom.ec_if;
975 RING_IDX resp_prod, i;
976 struct xennet_rxreq *req;
977 struct mbuf *m, *m0;
978 int rxflags, m0_rxflags;
979 int more_to_do;
980
981 if (sc->sc_backend_status != BEST_CONNECTED)
982 return 1;
983
984 /* Poke Tx queue if we run out of Tx buffers earlier */
985 if_schedule_deferred_start(ifp);
986
987 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
988
989 again:
990 DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n",
991 sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons));
992
993 mutex_enter(&sc->sc_rx_lock);
994 resp_prod = sc->sc_rx_ring.sring->rsp_prod;
995 xen_rmb(); /* ensure we see replies up to resp_prod */
996
997 m0 = NULL;
998 for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) {
999 netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i);
1000 req = &sc->sc_rxreqs[rx->id];
1001 KASSERT(req->rxreq_gntref != GRANT_INVALID_REF);
1002 KASSERT(req->rxreq_id == rx->id);
1003
1004 xengnt_revoke_access(req->rxreq_gntref);
1005 req->rxreq_gntref = GRANT_INVALID_REF;
1006
1007 m = req->rxreq_m;
1008 req->rxreq_m = NULL;
1009
1010 m->m_len = m->m_pkthdr.len = rx->status;
1011 bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->rxreq_dmamap, 0,
1012 m->m_pkthdr.len, BUS_DMASYNC_PREREAD);
1013
1014 if (m0 == NULL) {
1015 MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner);
1016 m_set_rcvif(m, ifp);
1017 }
1018
1019 rxflags = rx->flags;
1020
1021 if (m0 || rxflags & NETRXF_more_data) {
1022 /*
1023 * On Rx, every fragment (even first one) contain
1024 * just length of data in the fragment.
1025 */
1026 if (m0 == NULL) {
1027 m0 = m;
1028 m0_rxflags = rxflags;
1029 } else {
1030 m_cat(m0, m);
1031 m0->m_pkthdr.len += m->m_len;
1032 }
1033
1034 if (rxflags & NETRXF_more_data) {
1035 /* Still more fragments to receive */
1036 xennet_rx_free_req(sc, req);
1037 continue;
1038 }
1039
1040 sc->sc_cnt_rx_frag.ev_count++;
1041 m = m0;
1042 m0 = NULL;
1043 rxflags = m0_rxflags;
1044 }
1045
1046 if (rxflags & NETRXF_csum_blank) {
1047 xennet_checksum_fill(ifp, m, &sc->sc_cnt_rx_cksum_blank,
1048 &sc->sc_cnt_rx_cksum_undefer);
1049 } else if (rxflags & NETRXF_data_validated)
1050 m->m_pkthdr.csum_flags = XN_M_CSUM_SUPPORTED;
1051
1052 /* We'are done with req */
1053 xennet_rx_free_req(sc, req);
1054
1055 /* Pass the packet up. */
1056 if_percpuq_enqueue(ifp->if_percpuq, m);
1057 }
1058 /* If the queued Rx fragments did not finish the packet, drop it */
1059 if (m0) {
1060 if_statinc(ifp, if_iqdrops);
1061 m_freem(m0);
1062 }
1063 sc->sc_rx_ring.rsp_cons = i;
1064 xen_wmb();
1065 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do);
1066 mutex_exit(&sc->sc_rx_lock);
1067
1068 if (more_to_do) {
1069 DPRINTF(("%s: %s more_to_do\n", ifp->if_xname, __func__));
1070 goto again;
1071 }
1072
1073 return 1;
1074 }
1075
1076 static bool
xennet_submit_tx_request(struct xennet_xenbus_softc * sc,struct mbuf * m,struct xennet_txreq * req0,int * req_prod)1077 xennet_submit_tx_request(struct xennet_xenbus_softc *sc, struct mbuf *m,
1078 struct xennet_txreq *req0, int *req_prod)
1079 {
1080 struct xennet_txreq *req = req0;
1081 netif_tx_request_t *txreq;
1082 int i, prod = *req_prod;
1083 const bool multiseg = (req0->txreq_dmamap->dm_nsegs > 1);
1084 const int lastseg = req0->txreq_dmamap->dm_nsegs - 1;
1085 bus_dma_segment_t *ds;
1086 SLIST_HEAD(, xennet_txreq) txchain;
1087
1088 KASSERT(mutex_owned(&sc->sc_tx_lock));
1089 KASSERT(req0->txreq_dmamap->dm_nsegs > 0);
1090
1091 bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->txreq_dmamap, 0,
1092 m->m_pkthdr.len, BUS_DMASYNC_POSTWRITE);
1093 MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner);
1094 SLIST_INIT(&txchain);
1095
1096 for (i = 0; i < req0->txreq_dmamap->dm_nsegs; i++) {
1097 KASSERT(req != NULL);
1098
1099 ds = &req0->txreq_dmamap->dm_segs[i];
1100
1101 if (__predict_false(xengnt_grant_access(
1102 sc->sc_xbusd->xbusd_otherend_id,
1103 trunc_page(ds->ds_addr),
1104 GNTMAP_readonly, &req->txreq_gntref) != 0)) {
1105 goto grant_fail;
1106 }
1107
1108 KASSERT(SLIST_FIRST(&sc->sc_txreq_head) == req);
1109 SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next);
1110 SLIST_INSERT_HEAD(&txchain, req, txreq_next);
1111 sc->sc_free_txreql--;
1112 req->txreq_m = (req == req0) ? m : NULL;
1113
1114 txreq = RING_GET_REQUEST(&sc->sc_tx_ring, prod + i);
1115 txreq->id = req->txreq_id;
1116 txreq->gref = req->txreq_gntref;
1117 txreq->offset = ds->ds_addr & PAGE_MASK;
1118 /* For Tx, first fragment size is always set to total size */
1119 txreq->size = (i == 0) ? m->m_pkthdr.len : ds->ds_len;
1120 txreq->flags = 0;
1121 if (i == 0) {
1122 if (m->m_pkthdr.csum_flags & XN_M_CSUM_SUPPORTED) {
1123 txreq->flags |= NETTXF_csum_blank;
1124 } else {
1125 #if 0
1126 /*
1127 * XXX Checksum optimization disabled
1128 * to avoid port-xen/57743.
1129 */
1130 txreq->flags |= NETTXF_data_validated;
1131 #endif
1132 }
1133 }
1134 if (multiseg && i < lastseg)
1135 txreq->flags |= NETTXF_more_data;
1136
1137 req = SLIST_FIRST(&sc->sc_txreq_head);
1138 }
1139
1140 if (i > 1)
1141 sc->sc_cnt_tx_frag.ev_count++;
1142
1143 /* All done */
1144 *req_prod += i;
1145 return true;
1146
1147 grant_fail:
1148 printf("%s: grant_access failed\n", device_xname(sc->sc_dev));
1149 while (!SLIST_EMPTY(&txchain)) {
1150 req = SLIST_FIRST(&txchain);
1151 SLIST_REMOVE_HEAD(&txchain, txreq_next);
1152 xengnt_revoke_access(req->txreq_gntref);
1153 req->txreq_gntref = GRANT_INVALID_REF;
1154 SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next);
1155 sc->sc_free_txreql++;
1156 }
1157 req0->txreq_m = NULL;
1158 return false;
1159 }
1160
1161 /*
1162 * The output routine of a xennet interface. Prepares mbufs for TX,
1163 * and notify backend when finished.
1164 * Called at splsoftnet.
1165 */
1166 void
xennet_start(struct ifnet * ifp)1167 xennet_start(struct ifnet *ifp)
1168 {
1169 struct xennet_xenbus_softc *sc = ifp->if_softc;
1170 struct mbuf *m;
1171 RING_IDX req_prod;
1172 struct xennet_txreq *req;
1173 int notify;
1174
1175 mutex_enter(&sc->sc_tx_lock);
1176
1177 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt);
1178
1179 xennet_tx_complete(sc);
1180
1181 req_prod = sc->sc_tx_ring.req_prod_pvt;
1182 while (/*CONSTCOND*/1) {
1183 req = SLIST_FIRST(&sc->sc_txreq_head);
1184 if (__predict_false(req == NULL)) {
1185 if (!IFQ_IS_EMPTY(&ifp->if_snd))
1186 sc->sc_cnt_tx_queue_full.ev_count++;
1187 break;
1188 }
1189 IFQ_DEQUEUE(&ifp->if_snd, m);
1190 if (m == NULL)
1191 break;
1192
1193 /*
1194 * For short packets it's always way faster passing
1195 * single defragmented packet, even with feature-sg.
1196 * Try to defragment first if the result is likely to fit
1197 * into a single mbuf.
1198 */
1199 if (m->m_pkthdr.len < MCLBYTES && m->m_next)
1200 (void)m_defrag(m, M_DONTWAIT);
1201
1202 /* Try to load the mbuf as-is, if that fails defrag */
1203 if (__predict_false(bus_dmamap_load_mbuf(
1204 sc->sc_xbusd->xbusd_dmat,
1205 req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
1206 sc->sc_cnt_tx_defrag.ev_count++;
1207 if (__predict_false(m_defrag(m, M_DONTWAIT) == NULL)) {
1208 DPRINTF(("%s: defrag failed\n",
1209 device_xname(sc->sc_dev)));
1210 m_freem(m);
1211 break;
1212 }
1213
1214 if (__predict_false(bus_dmamap_load_mbuf(
1215 sc->sc_xbusd->xbusd_dmat,
1216 req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) {
1217 printf("%s: cannot load new mbuf len %d\n",
1218 device_xname(sc->sc_dev),
1219 m->m_pkthdr.len);
1220 m_freem(m);
1221 break;
1222 }
1223 }
1224
1225 if (req->txreq_dmamap->dm_nsegs > sc->sc_free_txreql) {
1226 /* Not enough slots right now, postpone */
1227 sc->sc_cnt_tx_queue_full.ev_count++;
1228 sc->sc_cnt_tx_drop.ev_count++;
1229 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
1230 req->txreq_dmamap);
1231 m_freem(m);
1232 break;
1233 }
1234
1235 DPRINTFN(XEDB_MBUF, ("xennet_start id %d, "
1236 "mbuf %p, buf %p, size %d\n",
1237 req->txreq_id, m, mtod(m, void *), m->m_pkthdr.len));
1238
1239 #ifdef XENNET_DEBUG_DUMP
1240 xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s",
1241 req->txreq_id);
1242 #endif
1243
1244 if (!xennet_submit_tx_request(sc, m, req, &req_prod)) {
1245 /* Grant failed, postpone */
1246 sc->sc_cnt_tx_drop.ev_count++;
1247 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
1248 req->txreq_dmamap);
1249 m_freem(m);
1250 break;
1251 }
1252
1253 /*
1254 * Pass packet to bpf if there is a listener.
1255 */
1256 bpf_mtap(ifp, m, BPF_D_OUT);
1257 }
1258
1259 sc->sc_tx_ring.req_prod_pvt = req_prod;
1260 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify);
1261 if (notify)
1262 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1263
1264 mutex_exit(&sc->sc_tx_lock);
1265
1266 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n",
1267 device_xname(sc->sc_dev)));
1268 }
1269
1270 int
xennet_ioctl(struct ifnet * ifp,u_long cmd,void * data)1271 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data)
1272 {
1273 #ifdef XENNET_DEBUG
1274 struct xennet_xenbus_softc *sc = ifp->if_softc;
1275 #endif
1276 int error = 0;
1277
1278 #ifdef NET_MPSAFE
1279 #ifdef notyet
1280 /* XXX IFNET_LOCK() is not taken in some cases e.g. multicast ioctls */
1281 KASSERT(IFNET_LOCKED(ifp));
1282 #endif
1283 #endif
1284 int s = splnet();
1285
1286 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n",
1287 device_xname(sc->sc_dev)));
1288 error = ether_ioctl(ifp, cmd, data);
1289 if (error == ENETRESET)
1290 error = 0;
1291
1292 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n",
1293 device_xname(sc->sc_dev), error));
1294
1295 splx(s);
1296
1297 return error;
1298 }
1299
1300 int
xennet_init(struct ifnet * ifp)1301 xennet_init(struct ifnet *ifp)
1302 {
1303 struct xennet_xenbus_softc *sc = ifp->if_softc;
1304
1305 KASSERT(IFNET_LOCKED(ifp));
1306
1307 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n",
1308 device_xname(sc->sc_dev)));
1309
1310 if ((ifp->if_flags & IFF_RUNNING) == 0) {
1311 mutex_enter(&sc->sc_rx_lock);
1312 sc->sc_rx_ring.sring->rsp_event =
1313 sc->sc_rx_ring.rsp_cons + 1;
1314 mutex_exit(&sc->sc_rx_lock);
1315 hypervisor_unmask_event(sc->sc_evtchn);
1316 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1317 }
1318 ifp->if_flags |= IFF_RUNNING;
1319
1320 return 0;
1321 }
1322
1323 void
xennet_stop(struct ifnet * ifp,int disable)1324 xennet_stop(struct ifnet *ifp, int disable)
1325 {
1326 struct xennet_xenbus_softc *sc = ifp->if_softc;
1327
1328 KASSERT(IFNET_LOCKED(ifp));
1329
1330 ifp->if_flags &= ~IFF_RUNNING;
1331 hypervisor_mask_event(sc->sc_evtchn);
1332 }
1333
1334 #if defined(NFS_BOOT_BOOTSTATIC)
1335 int
xennet_bootstatic_callback(struct nfs_diskless * nd)1336 xennet_bootstatic_callback(struct nfs_diskless *nd)
1337 {
1338 #if 0
1339 struct ifnet *ifp = nd->nd_ifp;
1340 struct xennet_xenbus_softc *sc =
1341 (struct xennet_xenbus_softc *)ifp->if_softc;
1342 #endif
1343 int flags = 0;
1344 union xen_cmdline_parseinfo xcp;
1345 struct sockaddr_in *sin;
1346
1347 memset(&xcp, 0, sizeof(xcp.xcp_netinfo));
1348 xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0;
1349 xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host;
1350 xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp);
1351
1352 if (xcp.xcp_netinfo.xi_root[0] != '\0') {
1353 flags |= NFS_BOOT_HAS_SERVER;
1354 if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL)
1355 flags |= NFS_BOOT_HAS_ROOTPATH;
1356 }
1357
1358 nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]);
1359 nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]);
1360 nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]);
1361
1362 sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr;
1363 memset((void *)sin, 0, sizeof(*sin));
1364 sin->sin_len = sizeof(*sin);
1365 sin->sin_family = AF_INET;
1366 sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]);
1367
1368 if (nd->nd_myip.s_addr)
1369 flags |= NFS_BOOT_HAS_MYIP;
1370 if (nd->nd_gwip.s_addr)
1371 flags |= NFS_BOOT_HAS_GWIP;
1372 if (nd->nd_mask.s_addr)
1373 flags |= NFS_BOOT_HAS_MASK;
1374 if (sin->sin_addr.s_addr)
1375 flags |= NFS_BOOT_HAS_SERVADDR;
1376
1377 return flags;
1378 }
1379 #endif /* defined(NFS_BOOT_BOOTSTATIC) */
1380
1381 #ifdef XENNET_DEBUG_DUMP
1382 #define XCHR(x) hexdigits[(x) & 0xf]
1383 static void
xennet_hex_dump(const unsigned char * pkt,size_t len,const char * type,int id)1384 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id)
1385 {
1386 size_t i, j;
1387
1388 printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id);
1389 printf("00000000 ");
1390 for(i=0; i<len; i++) {
1391 printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i]));
1392 if ((i+1) % 16 == 8)
1393 printf(" ");
1394 if ((i+1) % 16 == 0) {
1395 printf(" %c", '|');
1396 for(j=0; j<16; j++)
1397 printf("%c", pkt[i-15+j]>=32 &&
1398 pkt[i-15+j]<127?pkt[i-15+j]:'.');
1399 printf("%c\n%c%c%c%c%c%c%c%c ", '|',
1400 XCHR((i+1)>>28), XCHR((i+1)>>24),
1401 XCHR((i+1)>>20), XCHR((i+1)>>16),
1402 XCHR((i+1)>>12), XCHR((i+1)>>8),
1403 XCHR((i+1)>>4), XCHR(i+1));
1404 }
1405 }
1406 printf("\n");
1407 }
1408 #undef XCHR
1409 #endif
1410