1 /* $NetBSD: if_xennet_xenbus.c,v 1.128 2020/08/26 15:54:10 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * Copyright (c) 2004 Christian Limpach. 29 * All rights reserved. 30 * 31 * Redistribution and use in source and binary forms, with or without 32 * modification, are permitted provided that the following conditions 33 * are met: 34 * 1. Redistributions of source code must retain the above copyright 35 * notice, this list of conditions and the following disclaimer. 36 * 2. Redistributions in binary form must reproduce the above copyright 37 * notice, this list of conditions and the following disclaimer in the 38 * documentation and/or other materials provided with the distribution. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 41 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 42 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 43 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 44 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 47 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 48 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 49 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 */ 51 52 /* 53 * This file contains the xennet frontend code required for the network 54 * communication between two Xen domains. 55 * It ressembles xbd, but is a little more complex as it must deal with two 56 * rings: 57 * - the TX ring, to transmit packets to backend (inside => outside) 58 * - the RX ring, to receive packets from backend (outside => inside) 59 * 60 * Principles are following. 61 * 62 * For TX: 63 * Purpose is to transmit packets to the outside. The start of day is in 64 * xennet_start() (output routine of xennet) scheduled via a softint. 65 * xennet_start() generates the requests associated 66 * to the TX mbufs queued (see altq(9)). 67 * The backend's responses are processed by xennet_tx_complete(), called 68 * from xennet_start() 69 * 70 * for RX: 71 * Purpose is to process the packets received from the outside. RX buffers 72 * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf 73 * attach. During pre-allocation, frontend pushes requests in the I/O ring, in 74 * preparation for incoming packets from backend. 75 * When RX packets need to be processed, backend takes the requests previously 76 * offered by frontend and pushes the associated responses inside the I/O ring. 77 * When done, it notifies frontend through an event notification, which will 78 * asynchronously call xennet_handler() in frontend. 79 * xennet_handler() processes the responses, generates the associated mbuf, and 80 * passes it to the MI layer for further processing. 81 */ 82 83 #include <sys/cdefs.h> 84 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.128 2020/08/26 15:54:10 riastradh Exp $"); 85 86 #include "opt_xen.h" 87 #include "opt_nfs_boot.h" 88 #include "opt_net_mpsafe.h" 89 90 #include <sys/param.h> 91 #include <sys/device.h> 92 #include <sys/conf.h> 93 #include <sys/kernel.h> 94 #include <sys/proc.h> 95 #include <sys/systm.h> 96 #include <sys/intr.h> 97 #include <sys/rndsource.h> 98 99 #include <net/if.h> 100 #include <net/if_dl.h> 101 #include <net/if_ether.h> 102 #include <net/bpf.h> 103 104 #if defined(NFS_BOOT_BOOTSTATIC) 105 #include <sys/fstypes.h> 106 #include <sys/mount.h> 107 #include <sys/statvfs.h> 108 #include <netinet/in.h> 109 #include <nfs/rpcv2.h> 110 #include <nfs/nfsproto.h> 111 #include <nfs/nfs.h> 112 #include <nfs/nfsmount.h> 113 #include <nfs/nfsdiskless.h> 114 #include <xen/if_xennetvar.h> 115 #endif /* defined(NFS_BOOT_BOOTSTATIC) */ 116 117 #include <xen/xennet_checksum.h> 118 119 #include <uvm/uvm.h> 120 121 #include <xen/intr.h> 122 #include <xen/hypervisor.h> 123 #include <xen/evtchn.h> 124 #include <xen/granttables.h> 125 #include <xen/include/public/io/netif.h> 126 #include <xen/xenpmap.h> 127 128 #include <xen/xenbus.h> 129 #include "locators.h" 130 131 #undef XENNET_DEBUG_DUMP 132 #undef XENNET_DEBUG 133 134 #ifdef XENNET_DEBUG 135 #define XEDB_FOLLOW 0x01 136 #define XEDB_INIT 0x02 137 #define XEDB_EVENT 0x04 138 #define XEDB_MBUF 0x08 139 #define XEDB_MEM 0x10 140 int xennet_debug = 0xff; 141 #define DPRINTF(x) if (xennet_debug) printf x; 142 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x; 143 #else 144 #define DPRINTF(x) 145 #define DPRINTFN(n,x) 146 #endif 147 148 #define GRANT_INVALID_REF -1 /* entry is free */ 149 150 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE) 151 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE) 152 153 struct xennet_txreq { 154 SLIST_ENTRY(xennet_txreq) txreq_next; 155 uint16_t txreq_id; /* ID passed to backend */ 156 grant_ref_t txreq_gntref; /* grant ref of this request */ 157 struct mbuf *txreq_m; /* mbuf being transmitted */ 158 bus_dmamap_t txreq_dmamap; 159 }; 160 161 struct xennet_rxreq { 162 SLIST_ENTRY(xennet_rxreq) rxreq_next; 163 uint16_t rxreq_id; /* ID passed to backend */ 164 grant_ref_t rxreq_gntref; /* grant ref of this request */ 165 struct mbuf *rxreq_m; 166 bus_dmamap_t rxreq_dmamap; 167 }; 168 169 struct xennet_xenbus_softc { 170 device_t sc_dev; 171 struct ethercom sc_ethercom; 172 uint8_t sc_enaddr[ETHER_ADDR_LEN]; 173 struct xenbus_device *sc_xbusd; 174 175 netif_tx_front_ring_t sc_tx_ring; 176 netif_rx_front_ring_t sc_rx_ring; 177 178 unsigned int sc_evtchn; 179 struct intrhand *sc_ih; 180 181 grant_ref_t sc_tx_ring_gntref; 182 grant_ref_t sc_rx_ring_gntref; 183 184 kmutex_t sc_tx_lock; /* protects free TX list, TX ring */ 185 kmutex_t sc_rx_lock; /* protects free RX list, RX ring, rxreql */ 186 struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE]; 187 struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE]; 188 SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */ 189 SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */ 190 int sc_free_txreql; /* number of free transmit request structs */ 191 int sc_free_rxreql; /* number of free receive request structs */ 192 193 int sc_backend_status; /* our status with backend */ 194 #define BEST_CLOSED 0 195 #define BEST_DISCONNECTED 1 196 #define BEST_CONNECTED 2 197 #define BEST_SUSPENDED 3 198 int sc_features; 199 #define FEATURE_IPV6CSUM 0x01 /* IPv6 checksum offload */ 200 #define FEATURE_SG 0x02 /* scatter-gatter */ 201 #define FEATURE_RX_COPY 0x04 /* RX-copy */ 202 #define FEATURE_BITS "\20\1IPV6-CSUM\2SG\3RX-COPY" 203 krndsource_t sc_rnd_source; 204 struct evcnt sc_cnt_tx_defrag; 205 struct evcnt sc_cnt_tx_queue_full; 206 struct evcnt sc_cnt_tx_drop; 207 struct evcnt sc_cnt_tx_frag; 208 struct evcnt sc_cnt_rx_frag; 209 struct evcnt sc_cnt_rx_cksum_blank; 210 struct evcnt sc_cnt_rx_cksum_undefer; 211 }; 212 213 static pool_cache_t if_xennetrxbuf_cache; 214 static int if_xennetrxbuf_cache_inited = 0; 215 216 static int xennet_xenbus_match(device_t, cfdata_t, void *); 217 static void xennet_xenbus_attach(device_t, device_t, void *); 218 static int xennet_xenbus_detach(device_t, int); 219 static void xennet_backend_changed(void *, XenbusState); 220 221 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *); 222 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *, bool); 223 static void xennet_tx_complete(struct xennet_xenbus_softc *); 224 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *); 225 static int xennet_handler(void *); 226 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *); 227 #ifdef XENNET_DEBUG_DUMP 228 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int); 229 #endif 230 231 static int xennet_init(struct ifnet *); 232 static void xennet_stop(struct ifnet *, int); 233 static void xennet_start(struct ifnet *); 234 static int xennet_ioctl(struct ifnet *, u_long, void *); 235 236 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *); 237 static bool xennet_xenbus_resume(device_t dev, const pmf_qual_t *); 238 239 CFATTACH_DECL3_NEW(xennet, sizeof(struct xennet_xenbus_softc), 240 xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL, 241 NULL, NULL, DVF_DETACH_SHUTDOWN); 242 243 static int 244 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux) 245 { 246 struct xenbusdev_attach_args *xa = aux; 247 248 if (strcmp(xa->xa_type, "vif") != 0) 249 return 0; 250 251 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT && 252 match->cf_loc[XENBUSCF_ID] != xa->xa_id) 253 return 0; 254 255 return 1; 256 } 257 258 static void 259 xennet_xenbus_attach(device_t parent, device_t self, void *aux) 260 { 261 struct xennet_xenbus_softc *sc = device_private(self); 262 struct xenbusdev_attach_args *xa = aux; 263 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 264 int err; 265 netif_tx_sring_t *tx_ring; 266 netif_rx_sring_t *rx_ring; 267 RING_IDX i; 268 char *e, *p; 269 unsigned long uval; 270 extern int ifqmaxlen; /* XXX */ 271 char mac[32]; 272 char buf[64]; 273 bus_size_t maxsz; 274 int nsegs; 275 276 aprint_normal(": Xen Virtual Network Interface\n"); 277 sc->sc_dev = self; 278 279 sc->sc_xbusd = xa->xa_xbusd; 280 sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed; 281 282 /* read feature support flags */ 283 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 284 "feature-rx-copy", &uval, 10); 285 if (!err && uval == 1) 286 sc->sc_features |= FEATURE_RX_COPY; 287 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 288 "feature-ipv6-csum-offload", &uval, 10); 289 if (!err && uval == 1) 290 sc->sc_features |= FEATURE_IPV6CSUM; 291 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 292 "feature-sg", &uval, 10); 293 if (!err && uval == 1) 294 sc->sc_features |= FEATURE_SG; 295 snprintb(buf, sizeof(buf), FEATURE_BITS, sc->sc_features); 296 aprint_normal_dev(sc->sc_dev, "backend features %s\n", buf); 297 298 /* xenbus ensure 2 devices can't be probed at the same time */ 299 if (if_xennetrxbuf_cache_inited == 0) { 300 if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0, 301 "xnfrx", NULL, IPL_NET, NULL, NULL, NULL); 302 if_xennetrxbuf_cache_inited = 1; 303 } 304 305 /* initialize free RX and RX request lists */ 306 if (sc->sc_features & FEATURE_SG) { 307 maxsz = ETHER_MAX_LEN_JUMBO; 308 /* 309 * Linux netback drops the packet if the request has more 310 * segments than XEN_NETIF_NR_SLOTS_MIN (== 18). With 2KB 311 * MCLBYTES this means maximum packet size 36KB, in reality 312 * less due to mbuf chain fragmentation. 313 */ 314 nsegs = XEN_NETIF_NR_SLOTS_MIN; 315 } else { 316 maxsz = PAGE_SIZE; 317 nsegs = 1; 318 } 319 mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET); 320 SLIST_INIT(&sc->sc_txreq_head); 321 for (i = 0; i < NET_TX_RING_SIZE; i++) { 322 struct xennet_txreq *txreq = &sc->sc_txreqs[i]; 323 324 txreq->txreq_id = i; 325 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs, 326 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 327 &txreq->txreq_dmamap) != 0) 328 break; 329 330 SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i], 331 txreq_next); 332 } 333 sc->sc_free_txreql = i; 334 335 mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET); 336 SLIST_INIT(&sc->sc_rxreq_head); 337 for (i = 0; i < NET_RX_RING_SIZE; i++) { 338 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i]; 339 rxreq->rxreq_id = i; 340 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs, 341 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 342 &rxreq->rxreq_dmamap) != 0) 343 break; 344 rxreq->rxreq_gntref = GRANT_INVALID_REF; 345 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next); 346 } 347 sc->sc_free_rxreql = i; 348 if (sc->sc_free_rxreql == 0) { 349 aprint_error_dev(self, "failed to allocate rx memory\n"); 350 return; 351 } 352 353 /* read mac address */ 354 err = xenbus_read(NULL, sc->sc_xbusd->xbusd_path, "mac", 355 mac, sizeof(mac)); 356 if (err) { 357 aprint_error_dev(self, "can't read mac address, err %d\n", err); 358 return; 359 } 360 for (i = 0, p = mac; i < ETHER_ADDR_LEN; i++) { 361 sc->sc_enaddr[i] = strtoul(p, &e, 16); 362 if ((e[0] == '\0' && i != 5) && e[0] != ':') { 363 aprint_error_dev(self, 364 "%s is not a valid mac address\n", mac); 365 return; 366 } 367 p = &e[1]; 368 } 369 aprint_normal_dev(self, "MAC address %s\n", 370 ether_sprintf(sc->sc_enaddr)); 371 372 /* Initialize ifnet structure and attach interface */ 373 strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ); 374 sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU; 375 if (sc->sc_features & FEATURE_SG) 376 sc->sc_ethercom.ec_capabilities |= ETHERCAP_JUMBO_MTU; 377 ifp->if_softc = sc; 378 ifp->if_start = xennet_start; 379 ifp->if_ioctl = xennet_ioctl; 380 ifp->if_init = xennet_init; 381 ifp->if_stop = xennet_stop; 382 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 383 ifp->if_extflags = IFEF_MPSAFE; 384 ifp->if_snd.ifq_maxlen = uimax(ifqmaxlen, NET_TX_RING_SIZE * 2); 385 ifp->if_capabilities = 386 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx 387 | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx 388 | IFCAP_CSUM_UDPv6_Rx 389 | IFCAP_CSUM_TCPv6_Rx; 390 #define XN_M_CSUM_SUPPORTED \ 391 (M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_TCPv6 | M_CSUM_UDPv6) 392 393 if (sc->sc_features & FEATURE_IPV6CSUM) { 394 /* 395 * If backend supports IPv6 csum offloading, we can skip 396 * IPv6 csum for Tx packets. Rx packet validation can 397 * be skipped regardless. 398 */ 399 ifp->if_capabilities |= 400 IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_TCPv6_Tx; 401 } 402 403 IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN)); 404 IFQ_SET_READY(&ifp->if_snd); 405 if_attach(ifp); 406 if_deferred_start_init(ifp, NULL); 407 ether_ifattach(ifp, sc->sc_enaddr); 408 409 /* alloc shared rings */ 410 tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 411 UVM_KMF_WIRED); 412 rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 413 UVM_KMF_WIRED); 414 if (tx_ring == NULL || rx_ring == NULL) 415 panic("%s: can't alloc rings", device_xname(self)); 416 417 sc->sc_tx_ring.sring = tx_ring; 418 sc->sc_rx_ring.sring = rx_ring; 419 420 rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev), 421 RND_TYPE_NET, RND_FLAG_DEFAULT); 422 423 evcnt_attach_dynamic(&sc->sc_cnt_tx_defrag, EVCNT_TYPE_MISC, 424 NULL, device_xname(sc->sc_dev), "Tx packet defrag"); 425 evcnt_attach_dynamic(&sc->sc_cnt_tx_frag, EVCNT_TYPE_MISC, 426 NULL, device_xname(sc->sc_dev), "Tx multi-segment packet"); 427 evcnt_attach_dynamic(&sc->sc_cnt_tx_drop, EVCNT_TYPE_MISC, 428 NULL, device_xname(sc->sc_dev), "Tx packet dropped"); 429 evcnt_attach_dynamic(&sc->sc_cnt_tx_queue_full, EVCNT_TYPE_MISC, 430 NULL, device_xname(sc->sc_dev), "Tx queue full"); 431 evcnt_attach_dynamic(&sc->sc_cnt_rx_frag, EVCNT_TYPE_MISC, 432 NULL, device_xname(sc->sc_dev), "Rx multi-segment packet"); 433 evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_blank, EVCNT_TYPE_MISC, 434 NULL, device_xname(sc->sc_dev), "Rx csum blank"); 435 evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_undefer, EVCNT_TYPE_MISC, 436 NULL, device_xname(sc->sc_dev), "Rx csum undeferred"); 437 438 if (!pmf_device_register(self, xennet_xenbus_suspend, 439 xennet_xenbus_resume)) 440 aprint_error_dev(self, "couldn't establish power handler\n"); 441 else 442 pmf_class_network_register(self, ifp); 443 444 /* resume shared structures and tell backend that we are ready */ 445 if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) { 446 uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE, 447 UVM_KMF_WIRED); 448 uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE, 449 UVM_KMF_WIRED); 450 return; 451 } 452 } 453 454 static int 455 xennet_xenbus_detach(device_t self, int flags) 456 { 457 struct xennet_xenbus_softc *sc = device_private(self); 458 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 459 460 if ((flags & (DETACH_SHUTDOWN | DETACH_FORCE)) == DETACH_SHUTDOWN) { 461 /* Trigger state transition with backend */ 462 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing); 463 return EBUSY; 464 } 465 466 DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self))); 467 468 /* stop interface */ 469 IFNET_LOCK(ifp); 470 xennet_stop(ifp, 1); 471 IFNET_UNLOCK(ifp); 472 if (sc->sc_ih != NULL) { 473 xen_intr_disestablish(sc->sc_ih); 474 sc->sc_ih = NULL; 475 } 476 477 /* collect any outstanding TX responses */ 478 mutex_enter(&sc->sc_tx_lock); 479 xennet_tx_complete(sc); 480 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) { 481 kpause("xndetach", true, hz/2, &sc->sc_tx_lock); 482 xennet_tx_complete(sc); 483 } 484 mutex_exit(&sc->sc_tx_lock); 485 486 mutex_enter(&sc->sc_rx_lock); 487 xennet_free_rx_buffer(sc, true); 488 mutex_exit(&sc->sc_rx_lock); 489 490 ether_ifdetach(ifp); 491 if_detach(ifp); 492 493 evcnt_detach(&sc->sc_cnt_tx_defrag); 494 evcnt_detach(&sc->sc_cnt_tx_frag); 495 evcnt_detach(&sc->sc_cnt_tx_drop); 496 evcnt_detach(&sc->sc_cnt_tx_queue_full); 497 evcnt_detach(&sc->sc_cnt_rx_frag); 498 evcnt_detach(&sc->sc_cnt_rx_cksum_blank); 499 evcnt_detach(&sc->sc_cnt_rx_cksum_undefer); 500 501 /* Unhook the entropy source. */ 502 rnd_detach_source(&sc->sc_rnd_source); 503 504 /* Wait until the tx/rx rings stop being used by backend */ 505 mutex_enter(&sc->sc_tx_lock); 506 while (xengnt_status(sc->sc_tx_ring_gntref)) 507 kpause("xntxref", true, hz/2, &sc->sc_tx_lock); 508 xengnt_revoke_access(sc->sc_tx_ring_gntref); 509 mutex_exit(&sc->sc_tx_lock); 510 uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE, 511 UVM_KMF_WIRED); 512 mutex_enter(&sc->sc_rx_lock); 513 while (xengnt_status(sc->sc_rx_ring_gntref)) 514 kpause("xnrxref", true, hz/2, &sc->sc_rx_lock); 515 xengnt_revoke_access(sc->sc_rx_ring_gntref); 516 mutex_exit(&sc->sc_rx_lock); 517 uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE, 518 UVM_KMF_WIRED); 519 520 pmf_device_deregister(self); 521 522 sc->sc_backend_status = BEST_DISCONNECTED; 523 524 DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self))); 525 return 0; 526 } 527 528 static bool 529 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual) 530 { 531 struct xennet_xenbus_softc *sc = device_private(dev); 532 int error; 533 netif_tx_sring_t *tx_ring; 534 netif_rx_sring_t *rx_ring; 535 paddr_t ma; 536 537 /* All grants were removed during suspend */ 538 sc->sc_tx_ring_gntref = GRANT_INVALID_REF; 539 sc->sc_rx_ring_gntref = GRANT_INVALID_REF; 540 541 mutex_enter(&sc->sc_rx_lock); 542 /* Free but don't revoke, the grant is gone */ 543 xennet_free_rx_buffer(sc, false); 544 KASSERT(sc->sc_free_rxreql == NET_TX_RING_SIZE); 545 mutex_exit(&sc->sc_rx_lock); 546 547 tx_ring = sc->sc_tx_ring.sring; 548 rx_ring = sc->sc_rx_ring.sring; 549 550 /* Initialize rings */ 551 memset(tx_ring, 0, PAGE_SIZE); 552 SHARED_RING_INIT(tx_ring); 553 FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE); 554 555 memset(rx_ring, 0, PAGE_SIZE); 556 SHARED_RING_INIT(rx_ring); 557 FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE); 558 559 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma); 560 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref); 561 if (error) 562 goto abort_resume; 563 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma); 564 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref); 565 if (error) 566 goto abort_resume; 567 568 if (sc->sc_ih != NULL) { 569 xen_intr_disestablish(sc->sc_ih); 570 sc->sc_ih = NULL; 571 } 572 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn); 573 if (error) 574 goto abort_resume; 575 aprint_verbose_dev(dev, "using event channel %d\n", 576 sc->sc_evtchn); 577 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn, 578 IST_LEVEL, IPL_NET, &xennet_handler, sc, true, device_xname(dev)); 579 KASSERT(sc->sc_ih != NULL); 580 581 /* Re-fill Rx ring */ 582 mutex_enter(&sc->sc_rx_lock); 583 xennet_alloc_rx_buffer(sc); 584 KASSERT(sc->sc_free_rxreql == 0); 585 mutex_exit(&sc->sc_rx_lock); 586 587 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised); 588 589 if (sc->sc_backend_status == BEST_SUSPENDED) { 590 if (xennet_talk_to_backend(sc)) { 591 xenbus_device_resume(sc->sc_xbusd); 592 hypervisor_unmask_event(sc->sc_evtchn); 593 xenbus_switch_state(sc->sc_xbusd, NULL, 594 XenbusStateConnected); 595 } 596 } 597 598 return true; 599 600 abort_resume: 601 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device"); 602 return false; 603 } 604 605 static bool 606 xennet_talk_to_backend(struct xennet_xenbus_softc *sc) 607 { 608 int error; 609 struct xenbus_transaction *xbt; 610 const char *errmsg; 611 612 again: 613 xbt = xenbus_transaction_start(); 614 if (xbt == NULL) 615 return false; 616 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 617 "vifname", "%s", device_xname(sc->sc_dev)); 618 if (error) { 619 errmsg = "vifname"; 620 goto abort_transaction; 621 } 622 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 623 "tx-ring-ref","%u", sc->sc_tx_ring_gntref); 624 if (error) { 625 errmsg = "writing tx ring-ref"; 626 goto abort_transaction; 627 } 628 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 629 "rx-ring-ref","%u", sc->sc_rx_ring_gntref); 630 if (error) { 631 errmsg = "writing rx ring-ref"; 632 goto abort_transaction; 633 } 634 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 635 "request-rx-copy", "%u", 1); 636 if (error) { 637 errmsg = "writing request-rx-copy"; 638 goto abort_transaction; 639 } 640 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 641 "feature-rx-notify", "%u", 1); 642 if (error) { 643 errmsg = "writing feature-rx-notify"; 644 goto abort_transaction; 645 } 646 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 647 "feature-ipv6-csum-offload", "%u", 1); 648 if (error) { 649 errmsg = "writing feature-ipv6-csum-offload"; 650 goto abort_transaction; 651 } 652 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 653 "feature-sg", "%u", 1); 654 if (error) { 655 errmsg = "writing feature-sg"; 656 goto abort_transaction; 657 } 658 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 659 "event-channel", "%u", sc->sc_evtchn); 660 if (error) { 661 errmsg = "writing event channel"; 662 goto abort_transaction; 663 } 664 error = xenbus_transaction_end(xbt, 0); 665 if (error == EAGAIN) 666 goto again; 667 if (error) { 668 xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction"); 669 return false; 670 } 671 mutex_enter(&sc->sc_rx_lock); 672 xennet_alloc_rx_buffer(sc); 673 mutex_exit(&sc->sc_rx_lock); 674 675 sc->sc_backend_status = BEST_CONNECTED; 676 677 return true; 678 679 abort_transaction: 680 xenbus_transaction_end(xbt, 1); 681 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg); 682 return false; 683 } 684 685 static bool 686 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual) 687 { 688 struct xennet_xenbus_softc *sc = device_private(dev); 689 690 /* 691 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(), 692 * so we do not mask event channel here 693 */ 694 695 mutex_enter(&sc->sc_tx_lock); 696 697 /* collect any outstanding TX responses */ 698 xennet_tx_complete(sc); 699 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) { 700 kpause("xnsuspend", true, hz/2, &sc->sc_tx_lock); 701 xennet_tx_complete(sc); 702 } 703 KASSERT(sc->sc_free_txreql == NET_RX_RING_SIZE); 704 mutex_exit(&sc->sc_tx_lock); 705 706 /* 707 * dom0 may still use references to the grants we gave away 708 * earlier during RX buffers allocation. So we do not free RX buffers 709 * here, as dom0 does not expect the guest domain to suddenly revoke 710 * access to these grants. 711 */ 712 sc->sc_backend_status = BEST_SUSPENDED; 713 714 xenbus_device_suspend(sc->sc_xbusd); 715 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn); 716 717 return true; 718 } 719 720 static void xennet_backend_changed(void *arg, XenbusState new_state) 721 { 722 struct xennet_xenbus_softc *sc = device_private((device_t)arg); 723 DPRINTF(("%s: new backend state %d\n", 724 device_xname(sc->sc_dev), new_state)); 725 726 switch (new_state) { 727 case XenbusStateInitialising: 728 case XenbusStateInitialised: 729 case XenbusStateConnected: 730 break; 731 case XenbusStateClosing: 732 sc->sc_backend_status = BEST_CLOSED; 733 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed); 734 break; 735 case XenbusStateInitWait: 736 if (sc->sc_backend_status == BEST_CONNECTED 737 || sc->sc_backend_status == BEST_SUSPENDED) 738 break; 739 740 if (xennet_talk_to_backend(sc)) 741 xenbus_switch_state(sc->sc_xbusd, NULL, 742 XenbusStateConnected); 743 break; 744 case XenbusStateUnknown: 745 default: 746 panic("bad backend state %d", new_state); 747 } 748 } 749 750 /* 751 * Allocate RX buffers and put the associated request structures 752 * in the ring. This allows the backend to use them to communicate with 753 * frontend when some data is destined to frontend 754 */ 755 static void 756 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc) 757 { 758 RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt; 759 RING_IDX i; 760 netif_rx_request_t *rxreq; 761 struct xennet_rxreq *req; 762 int otherend_id, notify; 763 struct mbuf *m; 764 vaddr_t va; 765 paddr_t pa, ma; 766 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 767 768 KASSERT(mutex_owned(&sc->sc_rx_lock)); 769 770 otherend_id = sc->sc_xbusd->xbusd_otherend_id; 771 772 for (i = 0; sc->sc_free_rxreql != 0; i++) { 773 req = SLIST_FIRST(&sc->sc_rxreq_head); 774 KASSERT(req != NULL); 775 KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]); 776 KASSERT(req->rxreq_m == NULL); 777 KASSERT(req->rxreq_gntref == GRANT_INVALID_REF); 778 779 MGETHDR(m, M_DONTWAIT, MT_DATA); 780 if (__predict_false(m == NULL)) { 781 printf("%s: rx no mbuf\n", ifp->if_xname); 782 break; 783 } 784 785 va = (vaddr_t)pool_cache_get_paddr( 786 if_xennetrxbuf_cache, PR_NOWAIT, &pa); 787 if (__predict_false(va == 0)) { 788 printf("%s: rx no cluster\n", ifp->if_xname); 789 m_freem(m); 790 break; 791 } 792 793 MEXTADD(m, va, PAGE_SIZE, 794 M_DEVBUF, xennet_rx_mbuf_free, NULL); 795 m->m_len = m->m_pkthdr.len = PAGE_SIZE; 796 m->m_ext.ext_paddr = pa; 797 m->m_flags |= M_EXT_RW; /* we own the buffer */ 798 799 /* Set M_EXT_CLUSTER so that load_mbuf uses m_ext.ext_paddr */ 800 m->m_flags |= M_EXT_CLUSTER; 801 if (__predict_false(bus_dmamap_load_mbuf( 802 sc->sc_xbusd->xbusd_dmat, 803 req->rxreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) { 804 printf("%s: rx mbuf load failed", ifp->if_xname); 805 m->m_flags &= ~M_EXT_CLUSTER; 806 m_freem(m); 807 break; 808 } 809 m->m_flags &= ~M_EXT_CLUSTER; 810 811 KASSERT(req->rxreq_dmamap->dm_nsegs == 1); 812 ma = req->rxreq_dmamap->dm_segs[0].ds_addr; 813 814 if (xengnt_grant_access(otherend_id, trunc_page(ma), 815 0, &req->rxreq_gntref) != 0) { 816 m_freem(m); 817 break; 818 } 819 820 req->rxreq_m = m; 821 822 rxreq = RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i); 823 rxreq->id = req->rxreq_id; 824 rxreq->gref = req->rxreq_gntref; 825 826 SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next); 827 sc->sc_free_rxreql--; 828 } 829 830 /* Notify backend if more Rx is possible */ 831 if (i > 0) { 832 sc->sc_rx_ring.req_prod_pvt = req_prod + i; 833 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify); 834 if (notify) 835 hypervisor_notify_via_evtchn(sc->sc_evtchn); 836 } 837 } 838 839 /* 840 * Reclaim all RX buffers used by the I/O ring between frontend and backend 841 */ 842 static void 843 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc, bool revoke) 844 { 845 RING_IDX i; 846 847 KASSERT(mutex_owned(&sc->sc_rx_lock)); 848 849 DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev))); 850 /* get back memory from RX ring */ 851 for (i = 0; i < NET_RX_RING_SIZE; i++) { 852 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i]; 853 854 if (rxreq->rxreq_gntref != GRANT_INVALID_REF) { 855 /* 856 * this req is still granted. Get back the page or 857 * allocate a new one, and remap it. 858 */ 859 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, 860 rxreq_next); 861 sc->sc_free_rxreql++; 862 863 if (revoke) 864 xengnt_revoke_access(rxreq->rxreq_gntref); 865 rxreq->rxreq_gntref = GRANT_INVALID_REF; 866 } 867 868 if (rxreq->rxreq_m != NULL) { 869 m_freem(rxreq->rxreq_m); 870 rxreq->rxreq_m = NULL; 871 } 872 } 873 DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev))); 874 } 875 876 /* 877 * Clears a used RX request when its associated mbuf has been processed 878 */ 879 static void 880 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg) 881 { 882 KASSERT(buf == m->m_ext.ext_buf); 883 KASSERT(arg == NULL); 884 KASSERT(m != NULL); 885 vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK); 886 pool_cache_put_paddr(if_xennetrxbuf_cache, 887 (void *)va, m->m_ext.ext_paddr); 888 pool_cache_put(mb_cache, m); 889 }; 890 891 static void 892 xennet_rx_free_req(struct xennet_xenbus_softc *sc, struct xennet_rxreq *req) 893 { 894 KASSERT(mutex_owned(&sc->sc_rx_lock)); 895 896 /* puts back the RX request in the list of free RX requests */ 897 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next); 898 sc->sc_free_rxreql++; 899 900 /* 901 * ring needs more requests to be pushed in, allocate some 902 * RX buffers to catch-up with backend's consumption 903 */ 904 if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) && 905 __predict_true(sc->sc_backend_status == BEST_CONNECTED)) { 906 xennet_alloc_rx_buffer(sc); 907 } 908 } 909 910 /* 911 * Process responses associated to the TX mbufs sent previously through 912 * xennet_start() 913 * Called at splsoftnet. 914 */ 915 static void 916 xennet_tx_complete(struct xennet_xenbus_softc *sc) 917 { 918 struct xennet_txreq *req; 919 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 920 RING_IDX resp_prod, i; 921 922 DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n", 923 sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons)); 924 925 KASSERT(mutex_owned(&sc->sc_tx_lock)); 926 again: 927 resp_prod = sc->sc_tx_ring.sring->rsp_prod; 928 xen_rmb(); 929 for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) { 930 req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id]; 931 KASSERT(req->txreq_id == 932 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id); 933 KASSERT(xengnt_status(req->txreq_gntref) == 0); 934 xengnt_revoke_access(req->txreq_gntref); 935 req->txreq_gntref = GRANT_INVALID_REF; 936 937 /* Cleanup/statistics if this is the master req of a chain */ 938 if (req->txreq_m) { 939 if (__predict_false( 940 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status != 941 NETIF_RSP_OKAY)) 942 if_statinc(ifp, if_oerrors); 943 else 944 if_statinc(ifp, if_opackets); 945 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 946 req->txreq_dmamap); 947 m_freem(req->txreq_m); 948 req->txreq_m = NULL; 949 } 950 951 SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next); 952 sc->sc_free_txreql++; 953 } 954 955 sc->sc_tx_ring.rsp_cons = resp_prod; 956 /* set new event and check for race with rsp_cons update */ 957 sc->sc_tx_ring.sring->rsp_event = 958 resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1; 959 xen_wmb(); 960 if (resp_prod != sc->sc_tx_ring.sring->rsp_prod) 961 goto again; 962 } 963 964 /* 965 * Xennet event handler. 966 * Get outstanding responses of TX packets, then collect all responses of 967 * pending RX packets 968 * Called at splnet. 969 */ 970 static int 971 xennet_handler(void *arg) 972 { 973 struct xennet_xenbus_softc *sc = arg; 974 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 975 RING_IDX resp_prod, i; 976 struct xennet_rxreq *req; 977 struct mbuf *m, *m0; 978 int rxflags, m0_rxflags; 979 int more_to_do; 980 981 if (sc->sc_backend_status != BEST_CONNECTED) 982 return 1; 983 984 /* Poke Tx queue if we run out of Tx buffers earlier */ 985 if_schedule_deferred_start(ifp); 986 987 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt); 988 989 again: 990 DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n", 991 sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons)); 992 993 mutex_enter(&sc->sc_rx_lock); 994 resp_prod = sc->sc_rx_ring.sring->rsp_prod; 995 xen_rmb(); /* ensure we see replies up to resp_prod */ 996 997 m0 = NULL; 998 for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) { 999 netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i); 1000 req = &sc->sc_rxreqs[rx->id]; 1001 KASSERT(req->rxreq_gntref != GRANT_INVALID_REF); 1002 KASSERT(req->rxreq_id == rx->id); 1003 1004 xengnt_revoke_access(req->rxreq_gntref); 1005 req->rxreq_gntref = GRANT_INVALID_REF; 1006 1007 m = req->rxreq_m; 1008 req->rxreq_m = NULL; 1009 1010 m->m_len = m->m_pkthdr.len = rx->status; 1011 bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->rxreq_dmamap, 0, 1012 m->m_pkthdr.len, BUS_DMASYNC_PREREAD); 1013 1014 if (m0 == NULL) { 1015 MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner); 1016 m_set_rcvif(m, ifp); 1017 } 1018 1019 rxflags = rx->flags; 1020 1021 if (m0 || rxflags & NETRXF_more_data) { 1022 /* 1023 * On Rx, every fragment (even first one) contain 1024 * just length of data in the fragment. 1025 */ 1026 if (m0 == NULL) { 1027 m0 = m; 1028 m0_rxflags = rxflags; 1029 } else { 1030 m_cat(m0, m); 1031 m0->m_pkthdr.len += m->m_len; 1032 } 1033 1034 if (rxflags & NETRXF_more_data) { 1035 /* Still more fragments to receive */ 1036 xennet_rx_free_req(sc, req); 1037 continue; 1038 } 1039 1040 sc->sc_cnt_rx_frag.ev_count++; 1041 m = m0; 1042 m0 = NULL; 1043 rxflags = m0_rxflags; 1044 } 1045 1046 if (rxflags & NETRXF_csum_blank) { 1047 xennet_checksum_fill(ifp, m, &sc->sc_cnt_rx_cksum_blank, 1048 &sc->sc_cnt_rx_cksum_undefer); 1049 } else if (rxflags & NETRXF_data_validated) 1050 m->m_pkthdr.csum_flags = XN_M_CSUM_SUPPORTED; 1051 1052 /* We'are done with req */ 1053 xennet_rx_free_req(sc, req); 1054 1055 /* Pass the packet up. */ 1056 if_percpuq_enqueue(ifp->if_percpuq, m); 1057 } 1058 /* If the queued Rx fragments did not finish the packet, drop it */ 1059 if (m0) { 1060 if_statinc(ifp, if_iqdrops); 1061 m_freem(m0); 1062 } 1063 xen_rmb(); 1064 sc->sc_rx_ring.rsp_cons = i; 1065 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do); 1066 mutex_exit(&sc->sc_rx_lock); 1067 1068 if (more_to_do) { 1069 DPRINTF(("%s: %s more_to_do\n", ifp->if_xname, __func__)); 1070 goto again; 1071 } 1072 1073 return 1; 1074 } 1075 1076 static bool 1077 xennet_submit_tx_request(struct xennet_xenbus_softc *sc, struct mbuf *m, 1078 struct xennet_txreq *req0, int *req_prod) 1079 { 1080 struct xennet_txreq *req = req0; 1081 netif_tx_request_t *txreq; 1082 int i, prod = *req_prod; 1083 const bool multiseg = (req0->txreq_dmamap->dm_nsegs > 1); 1084 const int lastseg = req0->txreq_dmamap->dm_nsegs - 1; 1085 bus_dma_segment_t *ds; 1086 SLIST_HEAD(, xennet_txreq) txchain; 1087 1088 KASSERT(mutex_owned(&sc->sc_tx_lock)); 1089 KASSERT(req0->txreq_dmamap->dm_nsegs > 0); 1090 1091 bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->txreq_dmamap, 0, 1092 m->m_pkthdr.len, BUS_DMASYNC_POSTWRITE); 1093 MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner); 1094 SLIST_INIT(&txchain); 1095 1096 for (i = 0; i < req0->txreq_dmamap->dm_nsegs; i++) { 1097 KASSERT(req != NULL); 1098 1099 ds = &req0->txreq_dmamap->dm_segs[i]; 1100 1101 if (__predict_false(xengnt_grant_access( 1102 sc->sc_xbusd->xbusd_otherend_id, 1103 trunc_page(ds->ds_addr), 1104 GNTMAP_readonly, &req->txreq_gntref) != 0)) { 1105 goto grant_fail; 1106 } 1107 1108 KASSERT(SLIST_FIRST(&sc->sc_txreq_head) == req); 1109 SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next); 1110 SLIST_INSERT_HEAD(&txchain, req, txreq_next); 1111 sc->sc_free_txreql--; 1112 req->txreq_m = (req == req0) ? m : NULL; 1113 1114 txreq = RING_GET_REQUEST(&sc->sc_tx_ring, prod + i); 1115 txreq->id = req->txreq_id; 1116 txreq->gref = req->txreq_gntref; 1117 txreq->offset = ds->ds_addr & PAGE_MASK; 1118 /* For Tx, first fragment size is always set to total size */ 1119 txreq->size = (i == 0) ? m->m_pkthdr.len : ds->ds_len; 1120 txreq->flags = 0; 1121 if (i == 0) { 1122 if (m->m_pkthdr.csum_flags & XN_M_CSUM_SUPPORTED) { 1123 txreq->flags |= NETTXF_csum_blank; 1124 } else { 1125 txreq->flags |= NETTXF_data_validated; 1126 } 1127 } 1128 if (multiseg && i < lastseg) 1129 txreq->flags |= NETTXF_more_data; 1130 1131 req = SLIST_FIRST(&sc->sc_txreq_head); 1132 } 1133 1134 if (i > 1) 1135 sc->sc_cnt_tx_frag.ev_count++; 1136 1137 /* All done */ 1138 *req_prod += i; 1139 return true; 1140 1141 grant_fail: 1142 printf("%s: grant_access failed\n", device_xname(sc->sc_dev)); 1143 while (!SLIST_EMPTY(&txchain)) { 1144 req = SLIST_FIRST(&txchain); 1145 SLIST_REMOVE_HEAD(&txchain, txreq_next); 1146 xengnt_revoke_access(req->txreq_gntref); 1147 req->txreq_gntref = GRANT_INVALID_REF; 1148 SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next); 1149 sc->sc_free_txreql++; 1150 } 1151 req0->txreq_m = NULL; 1152 return false; 1153 } 1154 1155 /* 1156 * The output routine of a xennet interface. Prepares mbufs for TX, 1157 * and notify backend when finished. 1158 * Called at splsoftnet. 1159 */ 1160 void 1161 xennet_start(struct ifnet *ifp) 1162 { 1163 struct xennet_xenbus_softc *sc = ifp->if_softc; 1164 struct mbuf *m; 1165 RING_IDX req_prod; 1166 struct xennet_txreq *req; 1167 int notify; 1168 1169 mutex_enter(&sc->sc_tx_lock); 1170 1171 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt); 1172 1173 xennet_tx_complete(sc); 1174 1175 req_prod = sc->sc_tx_ring.req_prod_pvt; 1176 while (/*CONSTCOND*/1) { 1177 req = SLIST_FIRST(&sc->sc_txreq_head); 1178 if (__predict_false(req == NULL)) { 1179 if (!IFQ_IS_EMPTY(&ifp->if_snd)) 1180 sc->sc_cnt_tx_queue_full.ev_count++; 1181 break; 1182 } 1183 IFQ_DEQUEUE(&ifp->if_snd, m); 1184 if (m == NULL) 1185 break; 1186 1187 /* 1188 * For short packets it's always way faster passing 1189 * single defragmented packet, even with feature-sg. 1190 * Try to defragment first if the result is likely to fit 1191 * into a single mbuf. 1192 */ 1193 if (m->m_pkthdr.len < MCLBYTES && m->m_next) 1194 (void)m_defrag(m, M_DONTWAIT); 1195 1196 /* Try to load the mbuf as-is, if that fails defrag */ 1197 if (__predict_false(bus_dmamap_load_mbuf( 1198 sc->sc_xbusd->xbusd_dmat, 1199 req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) { 1200 sc->sc_cnt_tx_defrag.ev_count++; 1201 if (__predict_false(m_defrag(m, M_DONTWAIT) == NULL)) { 1202 DPRINTF(("%s: defrag failed\n", 1203 device_xname(sc->sc_dev))); 1204 m_freem(m); 1205 break; 1206 } 1207 1208 if (__predict_false(bus_dmamap_load_mbuf( 1209 sc->sc_xbusd->xbusd_dmat, 1210 req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) { 1211 printf("%s: cannot load new mbuf len %d\n", 1212 device_xname(sc->sc_dev), 1213 m->m_pkthdr.len); 1214 m_freem(m); 1215 break; 1216 } 1217 } 1218 1219 if (req->txreq_dmamap->dm_nsegs > sc->sc_free_txreql) { 1220 /* Not enough slots right now, postpone */ 1221 sc->sc_cnt_tx_queue_full.ev_count++; 1222 sc->sc_cnt_tx_drop.ev_count++; 1223 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1224 req->txreq_dmamap); 1225 m_freem(m); 1226 break; 1227 } 1228 1229 DPRINTFN(XEDB_MBUF, ("xennet_start id %d, " 1230 "mbuf %p, buf %p, size %d\n", 1231 req->txreq_id, m, mtod(m, void *), m->m_pkthdr.len)); 1232 1233 #ifdef XENNET_DEBUG_DUMP 1234 xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s", 1235 req->txreq_id); 1236 #endif 1237 1238 if (!xennet_submit_tx_request(sc, m, req, &req_prod)) { 1239 /* Grant failed, postpone */ 1240 sc->sc_cnt_tx_drop.ev_count++; 1241 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1242 req->txreq_dmamap); 1243 m_freem(m); 1244 break; 1245 } 1246 1247 /* 1248 * Pass packet to bpf if there is a listener. 1249 */ 1250 bpf_mtap(ifp, m, BPF_D_OUT); 1251 } 1252 1253 sc->sc_tx_ring.req_prod_pvt = req_prod; 1254 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify); 1255 if (notify) 1256 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1257 1258 mutex_exit(&sc->sc_tx_lock); 1259 1260 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n", 1261 device_xname(sc->sc_dev))); 1262 } 1263 1264 int 1265 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data) 1266 { 1267 #ifdef XENNET_DEBUG 1268 struct xennet_xenbus_softc *sc = ifp->if_softc; 1269 #endif 1270 int error = 0; 1271 1272 #ifdef NET_MPSAFE 1273 #ifdef notyet 1274 /* XXX IFNET_LOCK() is not taken in some cases e.g. multicast ioctls */ 1275 KASSERT(IFNET_LOCKED(ifp)); 1276 #endif 1277 #endif 1278 int s = splnet(); 1279 1280 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n", 1281 device_xname(sc->sc_dev))); 1282 error = ether_ioctl(ifp, cmd, data); 1283 if (error == ENETRESET) 1284 error = 0; 1285 1286 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n", 1287 device_xname(sc->sc_dev), error)); 1288 1289 splx(s); 1290 1291 return error; 1292 } 1293 1294 int 1295 xennet_init(struct ifnet *ifp) 1296 { 1297 struct xennet_xenbus_softc *sc = ifp->if_softc; 1298 1299 KASSERT(IFNET_LOCKED(ifp)); 1300 1301 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n", 1302 device_xname(sc->sc_dev))); 1303 1304 if ((ifp->if_flags & IFF_RUNNING) == 0) { 1305 mutex_enter(&sc->sc_rx_lock); 1306 sc->sc_rx_ring.sring->rsp_event = 1307 sc->sc_rx_ring.rsp_cons + 1; 1308 mutex_exit(&sc->sc_rx_lock); 1309 hypervisor_unmask_event(sc->sc_evtchn); 1310 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1311 } 1312 ifp->if_flags |= IFF_RUNNING; 1313 1314 return 0; 1315 } 1316 1317 void 1318 xennet_stop(struct ifnet *ifp, int disable) 1319 { 1320 struct xennet_xenbus_softc *sc = ifp->if_softc; 1321 1322 KASSERT(IFNET_LOCKED(ifp)); 1323 1324 ifp->if_flags &= ~IFF_RUNNING; 1325 hypervisor_mask_event(sc->sc_evtchn); 1326 } 1327 1328 #if defined(NFS_BOOT_BOOTSTATIC) 1329 int 1330 xennet_bootstatic_callback(struct nfs_diskless *nd) 1331 { 1332 #if 0 1333 struct ifnet *ifp = nd->nd_ifp; 1334 struct xennet_xenbus_softc *sc = 1335 (struct xennet_xenbus_softc *)ifp->if_softc; 1336 #endif 1337 int flags = 0; 1338 union xen_cmdline_parseinfo xcp; 1339 struct sockaddr_in *sin; 1340 1341 memset(&xcp, 0, sizeof(xcp.xcp_netinfo)); 1342 xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0; 1343 xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host; 1344 xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp); 1345 1346 if (xcp.xcp_netinfo.xi_root[0] != '\0') { 1347 flags |= NFS_BOOT_HAS_SERVER; 1348 if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL) 1349 flags |= NFS_BOOT_HAS_ROOTPATH; 1350 } 1351 1352 nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]); 1353 nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]); 1354 nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]); 1355 1356 sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr; 1357 memset((void *)sin, 0, sizeof(*sin)); 1358 sin->sin_len = sizeof(*sin); 1359 sin->sin_family = AF_INET; 1360 sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]); 1361 1362 if (nd->nd_myip.s_addr) 1363 flags |= NFS_BOOT_HAS_MYIP; 1364 if (nd->nd_gwip.s_addr) 1365 flags |= NFS_BOOT_HAS_GWIP; 1366 if (nd->nd_mask.s_addr) 1367 flags |= NFS_BOOT_HAS_MASK; 1368 if (sin->sin_addr.s_addr) 1369 flags |= NFS_BOOT_HAS_SERVADDR; 1370 1371 return flags; 1372 } 1373 #endif /* defined(NFS_BOOT_BOOTSTATIC) */ 1374 1375 #ifdef XENNET_DEBUG_DUMP 1376 #define XCHR(x) hexdigits[(x) & 0xf] 1377 static void 1378 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id) 1379 { 1380 size_t i, j; 1381 1382 printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id); 1383 printf("00000000 "); 1384 for(i=0; i<len; i++) { 1385 printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i])); 1386 if ((i+1) % 16 == 8) 1387 printf(" "); 1388 if ((i+1) % 16 == 0) { 1389 printf(" %c", '|'); 1390 for(j=0; j<16; j++) 1391 printf("%c", pkt[i-15+j]>=32 && 1392 pkt[i-15+j]<127?pkt[i-15+j]:'.'); 1393 printf("%c\n%c%c%c%c%c%c%c%c ", '|', 1394 XCHR((i+1)>>28), XCHR((i+1)>>24), 1395 XCHR((i+1)>>20), XCHR((i+1)>>16), 1396 XCHR((i+1)>>12), XCHR((i+1)>>8), 1397 XCHR((i+1)>>4), XCHR(i+1)); 1398 } 1399 } 1400 printf("\n"); 1401 } 1402 #undef XCHR 1403 #endif 1404