1 /* $NetBSD: if_xennet_xenbus.c,v 1.130 2024/01/09 18:39:53 jdolecek Exp $ */ 2 3 /* 4 * Copyright (c) 2006 Manuel Bouyer. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * Copyright (c) 2004 Christian Limpach. 29 * All rights reserved. 30 * 31 * Redistribution and use in source and binary forms, with or without 32 * modification, are permitted provided that the following conditions 33 * are met: 34 * 1. Redistributions of source code must retain the above copyright 35 * notice, this list of conditions and the following disclaimer. 36 * 2. Redistributions in binary form must reproduce the above copyright 37 * notice, this list of conditions and the following disclaimer in the 38 * documentation and/or other materials provided with the distribution. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 41 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 42 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 43 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 44 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 45 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 46 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 47 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 48 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 49 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 50 */ 51 52 /* 53 * This file contains the xennet frontend code required for the network 54 * communication between two Xen domains. 55 * It ressembles xbd, but is a little more complex as it must deal with two 56 * rings: 57 * - the TX ring, to transmit packets to backend (inside => outside) 58 * - the RX ring, to receive packets from backend (outside => inside) 59 * 60 * Principles are following. 61 * 62 * For TX: 63 * Purpose is to transmit packets to the outside. The start of day is in 64 * xennet_start() (output routine of xennet) scheduled via a softint. 65 * xennet_start() generates the requests associated 66 * to the TX mbufs queued (see altq(9)). 67 * The backend's responses are processed by xennet_tx_complete(), called 68 * from xennet_start() 69 * 70 * for RX: 71 * Purpose is to process the packets received from the outside. RX buffers 72 * are pre-allocated through xennet_alloc_rx_buffer(), during xennet autoconf 73 * attach. During pre-allocation, frontend pushes requests in the I/O ring, in 74 * preparation for incoming packets from backend. 75 * When RX packets need to be processed, backend takes the requests previously 76 * offered by frontend and pushes the associated responses inside the I/O ring. 77 * When done, it notifies frontend through an event notification, which will 78 * asynchronously call xennet_handler() in frontend. 79 * xennet_handler() processes the responses, generates the associated mbuf, and 80 * passes it to the MI layer for further processing. 81 */ 82 83 #include <sys/cdefs.h> 84 __KERNEL_RCSID(0, "$NetBSD: if_xennet_xenbus.c,v 1.130 2024/01/09 18:39:53 jdolecek Exp $"); 85 86 #include "opt_xen.h" 87 #include "opt_nfs_boot.h" 88 #include "opt_net_mpsafe.h" 89 90 #include <sys/param.h> 91 #include <sys/device.h> 92 #include <sys/conf.h> 93 #include <sys/kernel.h> 94 #include <sys/proc.h> 95 #include <sys/systm.h> 96 #include <sys/intr.h> 97 #include <sys/rndsource.h> 98 99 #include <net/if.h> 100 #include <net/if_dl.h> 101 #include <net/if_ether.h> 102 #include <net/bpf.h> 103 104 #if defined(NFS_BOOT_BOOTSTATIC) 105 #include <sys/fstypes.h> 106 #include <sys/mount.h> 107 #include <sys/statvfs.h> 108 #include <netinet/in.h> 109 #include <nfs/rpcv2.h> 110 #include <nfs/nfsproto.h> 111 #include <nfs/nfs.h> 112 #include <nfs/nfsmount.h> 113 #include <nfs/nfsdiskless.h> 114 #include <xen/if_xennetvar.h> 115 #endif /* defined(NFS_BOOT_BOOTSTATIC) */ 116 117 #include <xen/xennet_checksum.h> 118 119 #include <uvm/uvm.h> 120 121 #include <xen/intr.h> 122 #include <xen/hypervisor.h> 123 #include <xen/evtchn.h> 124 #include <xen/granttables.h> 125 #include <xen/include/public/io/netif.h> 126 #include <xen/xenpmap.h> 127 128 #include <xen/xenbus.h> 129 #include "locators.h" 130 131 #undef XENNET_DEBUG_DUMP 132 #undef XENNET_DEBUG 133 134 #ifdef XENNET_DEBUG 135 #define XEDB_FOLLOW 0x01 136 #define XEDB_INIT 0x02 137 #define XEDB_EVENT 0x04 138 #define XEDB_MBUF 0x08 139 #define XEDB_MEM 0x10 140 int xennet_debug = 0xff; 141 #define DPRINTF(x) if (xennet_debug) printf x; 142 #define DPRINTFN(n,x) if (xennet_debug & (n)) printf x; 143 #else 144 #define DPRINTF(x) 145 #define DPRINTFN(n,x) 146 #endif 147 148 #define GRANT_INVALID_REF -1 /* entry is free */ 149 150 #define NET_TX_RING_SIZE __CONST_RING_SIZE(netif_tx, PAGE_SIZE) 151 #define NET_RX_RING_SIZE __CONST_RING_SIZE(netif_rx, PAGE_SIZE) 152 153 struct xennet_txreq { 154 SLIST_ENTRY(xennet_txreq) txreq_next; 155 uint16_t txreq_id; /* ID passed to backend */ 156 grant_ref_t txreq_gntref; /* grant ref of this request */ 157 struct mbuf *txreq_m; /* mbuf being transmitted */ 158 bus_dmamap_t txreq_dmamap; 159 }; 160 161 struct xennet_rxreq { 162 SLIST_ENTRY(xennet_rxreq) rxreq_next; 163 uint16_t rxreq_id; /* ID passed to backend */ 164 grant_ref_t rxreq_gntref; /* grant ref of this request */ 165 struct mbuf *rxreq_m; 166 bus_dmamap_t rxreq_dmamap; 167 }; 168 169 struct xennet_xenbus_softc { 170 device_t sc_dev; 171 struct ethercom sc_ethercom; 172 uint8_t sc_enaddr[ETHER_ADDR_LEN]; 173 struct xenbus_device *sc_xbusd; 174 175 netif_tx_front_ring_t sc_tx_ring; 176 netif_rx_front_ring_t sc_rx_ring; 177 178 unsigned int sc_evtchn; 179 struct intrhand *sc_ih; 180 181 grant_ref_t sc_tx_ring_gntref; 182 grant_ref_t sc_rx_ring_gntref; 183 184 kmutex_t sc_tx_lock; /* protects free TX list, TX ring */ 185 kmutex_t sc_rx_lock; /* protects free RX list, RX ring, rxreql */ 186 struct xennet_txreq sc_txreqs[NET_TX_RING_SIZE]; 187 struct xennet_rxreq sc_rxreqs[NET_RX_RING_SIZE]; 188 SLIST_HEAD(,xennet_txreq) sc_txreq_head; /* list of free TX requests */ 189 SLIST_HEAD(,xennet_rxreq) sc_rxreq_head; /* list of free RX requests */ 190 int sc_free_txreql; /* number of free transmit request structs */ 191 int sc_free_rxreql; /* number of free receive request structs */ 192 193 int sc_backend_status; /* our status with backend */ 194 #define BEST_CLOSED 0 195 #define BEST_DISCONNECTED 1 196 #define BEST_CONNECTED 2 197 #define BEST_SUSPENDED 3 198 int sc_features; 199 #define FEATURE_IPV6CSUM 0x01 /* IPv6 checksum offload */ 200 #define FEATURE_SG 0x02 /* scatter-gatter */ 201 #define FEATURE_RX_COPY 0x04 /* RX-copy */ 202 #define FEATURE_BITS "\20\1IPV6-CSUM\2SG\3RX-COPY" 203 krndsource_t sc_rnd_source; 204 struct evcnt sc_cnt_tx_defrag; 205 struct evcnt sc_cnt_tx_queue_full; 206 struct evcnt sc_cnt_tx_drop; 207 struct evcnt sc_cnt_tx_frag; 208 struct evcnt sc_cnt_rx_frag; 209 struct evcnt sc_cnt_rx_cksum_blank; 210 struct evcnt sc_cnt_rx_cksum_undefer; 211 }; 212 213 static pool_cache_t if_xennetrxbuf_cache; 214 static int if_xennetrxbuf_cache_inited = 0; 215 216 static int xennet_xenbus_match(device_t, cfdata_t, void *); 217 static void xennet_xenbus_attach(device_t, device_t, void *); 218 static int xennet_xenbus_detach(device_t, int); 219 static void xennet_backend_changed(void *, XenbusState); 220 221 static void xennet_alloc_rx_buffer(struct xennet_xenbus_softc *); 222 static void xennet_free_rx_buffer(struct xennet_xenbus_softc *, bool); 223 static void xennet_tx_complete(struct xennet_xenbus_softc *); 224 static void xennet_rx_mbuf_free(struct mbuf *, void *, size_t, void *); 225 static int xennet_handler(void *); 226 static bool xennet_talk_to_backend(struct xennet_xenbus_softc *); 227 #ifdef XENNET_DEBUG_DUMP 228 static void xennet_hex_dump(const unsigned char *, size_t, const char *, int); 229 #endif 230 231 static int xennet_init(struct ifnet *); 232 static void xennet_stop(struct ifnet *, int); 233 static void xennet_start(struct ifnet *); 234 static int xennet_ioctl(struct ifnet *, u_long, void *); 235 236 static bool xennet_xenbus_suspend(device_t dev, const pmf_qual_t *); 237 static bool xennet_xenbus_resume(device_t dev, const pmf_qual_t *); 238 239 CFATTACH_DECL3_NEW(xennet, sizeof(struct xennet_xenbus_softc), 240 xennet_xenbus_match, xennet_xenbus_attach, xennet_xenbus_detach, NULL, 241 NULL, NULL, DVF_DETACH_SHUTDOWN); 242 243 static int 244 xennet_xenbus_match(device_t parent, cfdata_t match, void *aux) 245 { 246 struct xenbusdev_attach_args *xa = aux; 247 248 if (strcmp(xa->xa_type, "vif") != 0) 249 return 0; 250 251 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT && 252 match->cf_loc[XENBUSCF_ID] != xa->xa_id) 253 return 0; 254 255 return 1; 256 } 257 258 static void 259 xennet_xenbus_attach(device_t parent, device_t self, void *aux) 260 { 261 struct xennet_xenbus_softc *sc = device_private(self); 262 struct xenbusdev_attach_args *xa = aux; 263 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 264 int err; 265 netif_tx_sring_t *tx_ring; 266 netif_rx_sring_t *rx_ring; 267 RING_IDX i; 268 char *e, *p; 269 unsigned long uval; 270 extern int ifqmaxlen; /* XXX */ 271 char mac[32]; 272 char buf[64]; 273 bus_size_t maxsz; 274 int nsegs; 275 276 aprint_normal(": Xen Virtual Network Interface\n"); 277 sc->sc_dev = self; 278 279 sc->sc_xbusd = xa->xa_xbusd; 280 sc->sc_xbusd->xbusd_otherend_changed = xennet_backend_changed; 281 282 /* read feature support flags */ 283 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 284 "feature-rx-copy", &uval, 10); 285 if (!err && uval == 1) 286 sc->sc_features |= FEATURE_RX_COPY; 287 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 288 "feature-ipv6-csum-offload", &uval, 10); 289 if (!err && uval == 1) 290 sc->sc_features |= FEATURE_IPV6CSUM; 291 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 292 "feature-sg", &uval, 10); 293 if (!err && uval == 1) 294 sc->sc_features |= FEATURE_SG; 295 snprintb(buf, sizeof(buf), FEATURE_BITS, sc->sc_features); 296 aprint_normal_dev(sc->sc_dev, "backend features %s\n", buf); 297 298 /* xenbus ensure 2 devices can't be probed at the same time */ 299 if (if_xennetrxbuf_cache_inited == 0) { 300 if_xennetrxbuf_cache = pool_cache_init(PAGE_SIZE, 0, 0, 0, 301 "xnfrx", NULL, IPL_NET, NULL, NULL, NULL); 302 if_xennetrxbuf_cache_inited = 1; 303 } 304 305 /* initialize free RX and RX request lists */ 306 if (sc->sc_features & FEATURE_SG) { 307 maxsz = ETHER_MAX_LEN_JUMBO; 308 /* 309 * Linux netback drops the packet if the request has more 310 * segments than XEN_NETIF_NR_SLOTS_MIN (== 18). With 2KB 311 * MCLBYTES this means maximum packet size 36KB, in reality 312 * less due to mbuf chain fragmentation. 313 */ 314 nsegs = XEN_NETIF_NR_SLOTS_MIN; 315 } else { 316 maxsz = PAGE_SIZE; 317 nsegs = 1; 318 } 319 mutex_init(&sc->sc_tx_lock, MUTEX_DEFAULT, IPL_NET); 320 SLIST_INIT(&sc->sc_txreq_head); 321 for (i = 0; i < NET_TX_RING_SIZE; i++) { 322 struct xennet_txreq *txreq = &sc->sc_txreqs[i]; 323 324 txreq->txreq_id = i; 325 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs, 326 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 327 &txreq->txreq_dmamap) != 0) 328 break; 329 330 SLIST_INSERT_HEAD(&sc->sc_txreq_head, &sc->sc_txreqs[i], 331 txreq_next); 332 } 333 sc->sc_free_txreql = i; 334 335 mutex_init(&sc->sc_rx_lock, MUTEX_DEFAULT, IPL_NET); 336 SLIST_INIT(&sc->sc_rxreq_head); 337 for (i = 0; i < NET_RX_RING_SIZE; i++) { 338 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i]; 339 rxreq->rxreq_id = i; 340 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, maxsz, nsegs, 341 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 342 &rxreq->rxreq_dmamap) != 0) 343 break; 344 rxreq->rxreq_gntref = GRANT_INVALID_REF; 345 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, rxreq_next); 346 } 347 sc->sc_free_rxreql = i; 348 if (sc->sc_free_rxreql == 0) { 349 aprint_error_dev(self, "failed to allocate rx memory\n"); 350 return; 351 } 352 353 /* read mac address */ 354 err = xenbus_read(NULL, sc->sc_xbusd->xbusd_path, "mac", 355 mac, sizeof(mac)); 356 if (err) { 357 aprint_error_dev(self, "can't read mac address, err %d\n", err); 358 return; 359 } 360 for (i = 0, p = mac; i < ETHER_ADDR_LEN; i++) { 361 sc->sc_enaddr[i] = strtoul(p, &e, 16); 362 if ((e[0] == '\0' && i != 5) && e[0] != ':') { 363 aprint_error_dev(self, 364 "%s is not a valid mac address\n", mac); 365 return; 366 } 367 p = &e[1]; 368 } 369 aprint_normal_dev(self, "MAC address %s\n", 370 ether_sprintf(sc->sc_enaddr)); 371 372 /* Initialize ifnet structure and attach interface */ 373 strlcpy(ifp->if_xname, device_xname(self), IFNAMSIZ); 374 sc->sc_ethercom.ec_capabilities |= ETHERCAP_VLAN_MTU; 375 if (sc->sc_features & FEATURE_SG) 376 sc->sc_ethercom.ec_capabilities |= ETHERCAP_JUMBO_MTU; 377 ifp->if_softc = sc; 378 ifp->if_start = xennet_start; 379 ifp->if_ioctl = xennet_ioctl; 380 ifp->if_init = xennet_init; 381 ifp->if_stop = xennet_stop; 382 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 383 ifp->if_extflags = IFEF_MPSAFE; 384 ifp->if_snd.ifq_maxlen = uimax(ifqmaxlen, NET_TX_RING_SIZE * 2); 385 ifp->if_capabilities = 386 IFCAP_CSUM_UDPv4_Rx | IFCAP_CSUM_UDPv4_Tx 387 | IFCAP_CSUM_TCPv4_Rx | IFCAP_CSUM_TCPv4_Tx 388 | IFCAP_CSUM_UDPv6_Rx 389 | IFCAP_CSUM_TCPv6_Rx; 390 #define XN_M_CSUM_SUPPORTED \ 391 (M_CSUM_TCPv4 | M_CSUM_UDPv4 | M_CSUM_TCPv6 | M_CSUM_UDPv6) 392 393 if (sc->sc_features & FEATURE_IPV6CSUM) { 394 /* 395 * If backend supports IPv6 csum offloading, we can skip 396 * IPv6 csum for Tx packets. Rx packet validation can 397 * be skipped regardless. 398 */ 399 ifp->if_capabilities |= 400 IFCAP_CSUM_UDPv6_Tx | IFCAP_CSUM_TCPv6_Tx; 401 } 402 403 IFQ_SET_MAXLEN(&ifp->if_snd, uimax(2 * NET_TX_RING_SIZE, IFQ_MAXLEN)); 404 IFQ_SET_READY(&ifp->if_snd); 405 if_attach(ifp); 406 if_deferred_start_init(ifp, NULL); 407 ether_ifattach(ifp, sc->sc_enaddr); 408 409 /* alloc shared rings */ 410 tx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 411 UVM_KMF_WIRED); 412 rx_ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 413 UVM_KMF_WIRED); 414 if (tx_ring == NULL || rx_ring == NULL) 415 panic("%s: can't alloc rings", device_xname(self)); 416 417 sc->sc_tx_ring.sring = tx_ring; 418 sc->sc_rx_ring.sring = rx_ring; 419 420 rnd_attach_source(&sc->sc_rnd_source, device_xname(sc->sc_dev), 421 RND_TYPE_NET, RND_FLAG_DEFAULT); 422 423 evcnt_attach_dynamic(&sc->sc_cnt_tx_defrag, EVCNT_TYPE_MISC, 424 NULL, device_xname(sc->sc_dev), "Tx packet defrag"); 425 evcnt_attach_dynamic(&sc->sc_cnt_tx_frag, EVCNT_TYPE_MISC, 426 NULL, device_xname(sc->sc_dev), "Tx multi-segment packet"); 427 evcnt_attach_dynamic(&sc->sc_cnt_tx_drop, EVCNT_TYPE_MISC, 428 NULL, device_xname(sc->sc_dev), "Tx packet dropped"); 429 evcnt_attach_dynamic(&sc->sc_cnt_tx_queue_full, EVCNT_TYPE_MISC, 430 NULL, device_xname(sc->sc_dev), "Tx queue full"); 431 evcnt_attach_dynamic(&sc->sc_cnt_rx_frag, EVCNT_TYPE_MISC, 432 NULL, device_xname(sc->sc_dev), "Rx multi-segment packet"); 433 evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_blank, EVCNT_TYPE_MISC, 434 NULL, device_xname(sc->sc_dev), "Rx csum blank"); 435 evcnt_attach_dynamic(&sc->sc_cnt_rx_cksum_undefer, EVCNT_TYPE_MISC, 436 NULL, device_xname(sc->sc_dev), "Rx csum undeferred"); 437 438 if (!pmf_device_register(self, xennet_xenbus_suspend, 439 xennet_xenbus_resume)) 440 aprint_error_dev(self, "couldn't establish power handler\n"); 441 else 442 pmf_class_network_register(self, ifp); 443 444 /* resume shared structures and tell backend that we are ready */ 445 if (xennet_xenbus_resume(self, PMF_Q_NONE) == false) { 446 uvm_km_free(kernel_map, (vaddr_t)tx_ring, PAGE_SIZE, 447 UVM_KMF_WIRED); 448 uvm_km_free(kernel_map, (vaddr_t)rx_ring, PAGE_SIZE, 449 UVM_KMF_WIRED); 450 return; 451 } 452 } 453 454 static int 455 xennet_xenbus_detach(device_t self, int flags) 456 { 457 struct xennet_xenbus_softc *sc = device_private(self); 458 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 459 460 if ((flags & (DETACH_SHUTDOWN | DETACH_FORCE)) == DETACH_SHUTDOWN) { 461 /* Trigger state transition with backend */ 462 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing); 463 return EBUSY; 464 } 465 466 DPRINTF(("%s: xennet_xenbus_detach\n", device_xname(self))); 467 468 /* stop interface */ 469 IFNET_LOCK(ifp); 470 xennet_stop(ifp, 1); 471 IFNET_UNLOCK(ifp); 472 if (sc->sc_ih != NULL) { 473 xen_intr_disestablish(sc->sc_ih); 474 sc->sc_ih = NULL; 475 } 476 477 /* collect any outstanding TX responses */ 478 mutex_enter(&sc->sc_tx_lock); 479 xennet_tx_complete(sc); 480 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) { 481 kpause("xndetach", true, hz/2, &sc->sc_tx_lock); 482 xennet_tx_complete(sc); 483 } 484 mutex_exit(&sc->sc_tx_lock); 485 486 mutex_enter(&sc->sc_rx_lock); 487 xennet_free_rx_buffer(sc, true); 488 mutex_exit(&sc->sc_rx_lock); 489 490 ether_ifdetach(ifp); 491 if_detach(ifp); 492 493 evcnt_detach(&sc->sc_cnt_tx_defrag); 494 evcnt_detach(&sc->sc_cnt_tx_frag); 495 evcnt_detach(&sc->sc_cnt_tx_drop); 496 evcnt_detach(&sc->sc_cnt_tx_queue_full); 497 evcnt_detach(&sc->sc_cnt_rx_frag); 498 evcnt_detach(&sc->sc_cnt_rx_cksum_blank); 499 evcnt_detach(&sc->sc_cnt_rx_cksum_undefer); 500 501 /* Unhook the entropy source. */ 502 rnd_detach_source(&sc->sc_rnd_source); 503 504 /* Wait until the tx/rx rings stop being used by backend */ 505 mutex_enter(&sc->sc_tx_lock); 506 while (xengnt_status(sc->sc_tx_ring_gntref)) 507 kpause("xntxref", true, hz/2, &sc->sc_tx_lock); 508 xengnt_revoke_access(sc->sc_tx_ring_gntref); 509 mutex_exit(&sc->sc_tx_lock); 510 uvm_km_free(kernel_map, (vaddr_t)sc->sc_tx_ring.sring, PAGE_SIZE, 511 UVM_KMF_WIRED); 512 mutex_enter(&sc->sc_rx_lock); 513 while (xengnt_status(sc->sc_rx_ring_gntref)) 514 kpause("xnrxref", true, hz/2, &sc->sc_rx_lock); 515 xengnt_revoke_access(sc->sc_rx_ring_gntref); 516 mutex_exit(&sc->sc_rx_lock); 517 uvm_km_free(kernel_map, (vaddr_t)sc->sc_rx_ring.sring, PAGE_SIZE, 518 UVM_KMF_WIRED); 519 520 pmf_device_deregister(self); 521 522 sc->sc_backend_status = BEST_DISCONNECTED; 523 524 DPRINTF(("%s: xennet_xenbus_detach done\n", device_xname(self))); 525 return 0; 526 } 527 528 static bool 529 xennet_xenbus_resume(device_t dev, const pmf_qual_t *qual) 530 { 531 struct xennet_xenbus_softc *sc = device_private(dev); 532 int error; 533 netif_tx_sring_t *tx_ring; 534 netif_rx_sring_t *rx_ring; 535 paddr_t ma; 536 537 /* All grants were removed during suspend */ 538 sc->sc_tx_ring_gntref = GRANT_INVALID_REF; 539 sc->sc_rx_ring_gntref = GRANT_INVALID_REF; 540 541 mutex_enter(&sc->sc_rx_lock); 542 /* Free but don't revoke, the grant is gone */ 543 xennet_free_rx_buffer(sc, false); 544 KASSERT(sc->sc_free_rxreql == NET_TX_RING_SIZE); 545 mutex_exit(&sc->sc_rx_lock); 546 547 tx_ring = sc->sc_tx_ring.sring; 548 rx_ring = sc->sc_rx_ring.sring; 549 550 /* Initialize rings */ 551 memset(tx_ring, 0, PAGE_SIZE); 552 SHARED_RING_INIT(tx_ring); 553 FRONT_RING_INIT(&sc->sc_tx_ring, tx_ring, PAGE_SIZE); 554 555 memset(rx_ring, 0, PAGE_SIZE); 556 SHARED_RING_INIT(rx_ring); 557 FRONT_RING_INIT(&sc->sc_rx_ring, rx_ring, PAGE_SIZE); 558 559 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)tx_ring, &ma); 560 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_tx_ring_gntref); 561 if (error) 562 goto abort_resume; 563 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)rx_ring, &ma); 564 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_rx_ring_gntref); 565 if (error) 566 goto abort_resume; 567 568 if (sc->sc_ih != NULL) { 569 xen_intr_disestablish(sc->sc_ih); 570 sc->sc_ih = NULL; 571 } 572 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn); 573 if (error) 574 goto abort_resume; 575 aprint_verbose_dev(dev, "using event channel %d\n", 576 sc->sc_evtchn); 577 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn, 578 IST_LEVEL, IPL_NET, &xennet_handler, sc, true, device_xname(dev)); 579 KASSERT(sc->sc_ih != NULL); 580 581 /* Re-fill Rx ring */ 582 mutex_enter(&sc->sc_rx_lock); 583 xennet_alloc_rx_buffer(sc); 584 KASSERT(sc->sc_free_rxreql == 0); 585 mutex_exit(&sc->sc_rx_lock); 586 587 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised); 588 589 if (sc->sc_backend_status == BEST_SUSPENDED) { 590 if (xennet_talk_to_backend(sc)) { 591 xenbus_device_resume(sc->sc_xbusd); 592 hypervisor_unmask_event(sc->sc_evtchn); 593 xenbus_switch_state(sc->sc_xbusd, NULL, 594 XenbusStateConnected); 595 } 596 } 597 598 return true; 599 600 abort_resume: 601 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device"); 602 return false; 603 } 604 605 static bool 606 xennet_talk_to_backend(struct xennet_xenbus_softc *sc) 607 { 608 int error; 609 struct xenbus_transaction *xbt; 610 const char *errmsg; 611 612 again: 613 xbt = xenbus_transaction_start(); 614 if (xbt == NULL) 615 return false; 616 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 617 "vifname", "%s", device_xname(sc->sc_dev)); 618 if (error) { 619 errmsg = "vifname"; 620 goto abort_transaction; 621 } 622 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 623 "tx-ring-ref","%u", sc->sc_tx_ring_gntref); 624 if (error) { 625 errmsg = "writing tx ring-ref"; 626 goto abort_transaction; 627 } 628 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 629 "rx-ring-ref","%u", sc->sc_rx_ring_gntref); 630 if (error) { 631 errmsg = "writing rx ring-ref"; 632 goto abort_transaction; 633 } 634 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 635 "request-rx-copy", "%u", 1); 636 if (error) { 637 errmsg = "writing request-rx-copy"; 638 goto abort_transaction; 639 } 640 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 641 "feature-rx-notify", "%u", 1); 642 if (error) { 643 errmsg = "writing feature-rx-notify"; 644 goto abort_transaction; 645 } 646 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 647 "feature-ipv6-csum-offload", "%u", 1); 648 if (error) { 649 errmsg = "writing feature-ipv6-csum-offload"; 650 goto abort_transaction; 651 } 652 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 653 "feature-sg", "%u", 1); 654 if (error) { 655 errmsg = "writing feature-sg"; 656 goto abort_transaction; 657 } 658 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 659 "event-channel", "%u", sc->sc_evtchn); 660 if (error) { 661 errmsg = "writing event channel"; 662 goto abort_transaction; 663 } 664 error = xenbus_transaction_end(xbt, 0); 665 if (error == EAGAIN) 666 goto again; 667 if (error) { 668 xenbus_dev_fatal(sc->sc_xbusd, error, "completing transaction"); 669 return false; 670 } 671 mutex_enter(&sc->sc_rx_lock); 672 xennet_alloc_rx_buffer(sc); 673 mutex_exit(&sc->sc_rx_lock); 674 675 sc->sc_backend_status = BEST_CONNECTED; 676 677 return true; 678 679 abort_transaction: 680 xenbus_transaction_end(xbt, 1); 681 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg); 682 return false; 683 } 684 685 static bool 686 xennet_xenbus_suspend(device_t dev, const pmf_qual_t *qual) 687 { 688 struct xennet_xenbus_softc *sc = device_private(dev); 689 690 /* 691 * xennet_stop() is called by pmf(9) before xennet_xenbus_suspend(), 692 * so we do not mask event channel here 693 */ 694 695 mutex_enter(&sc->sc_tx_lock); 696 697 /* collect any outstanding TX responses */ 698 xennet_tx_complete(sc); 699 while (sc->sc_tx_ring.sring->rsp_prod != sc->sc_tx_ring.rsp_cons) { 700 kpause("xnsuspend", true, hz/2, &sc->sc_tx_lock); 701 xennet_tx_complete(sc); 702 } 703 KASSERT(sc->sc_free_txreql == NET_RX_RING_SIZE); 704 mutex_exit(&sc->sc_tx_lock); 705 706 /* 707 * dom0 may still use references to the grants we gave away 708 * earlier during RX buffers allocation. So we do not free RX buffers 709 * here, as dom0 does not expect the guest domain to suddenly revoke 710 * access to these grants. 711 */ 712 sc->sc_backend_status = BEST_SUSPENDED; 713 714 xenbus_device_suspend(sc->sc_xbusd); 715 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn); 716 717 return true; 718 } 719 720 static void xennet_backend_changed(void *arg, XenbusState new_state) 721 { 722 struct xennet_xenbus_softc *sc = device_private((device_t)arg); 723 DPRINTF(("%s: new backend state %d\n", 724 device_xname(sc->sc_dev), new_state)); 725 726 switch (new_state) { 727 case XenbusStateInitialising: 728 case XenbusStateInitialised: 729 case XenbusStateConnected: 730 break; 731 case XenbusStateClosing: 732 sc->sc_backend_status = BEST_CLOSED; 733 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed); 734 break; 735 case XenbusStateInitWait: 736 if (sc->sc_backend_status == BEST_CONNECTED 737 || sc->sc_backend_status == BEST_SUSPENDED) 738 break; 739 740 if (xennet_talk_to_backend(sc)) 741 xenbus_switch_state(sc->sc_xbusd, NULL, 742 XenbusStateConnected); 743 break; 744 case XenbusStateUnknown: 745 default: 746 panic("bad backend state %d", new_state); 747 } 748 } 749 750 /* 751 * Allocate RX buffers and put the associated request structures 752 * in the ring. This allows the backend to use them to communicate with 753 * frontend when some data is destined to frontend 754 */ 755 static void 756 xennet_alloc_rx_buffer(struct xennet_xenbus_softc *sc) 757 { 758 RING_IDX req_prod = sc->sc_rx_ring.req_prod_pvt; 759 RING_IDX i; 760 netif_rx_request_t *rxreq; 761 struct xennet_rxreq *req; 762 int otherend_id, notify; 763 struct mbuf *m; 764 vaddr_t va; 765 paddr_t pa, ma; 766 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 767 768 KASSERT(mutex_owned(&sc->sc_rx_lock)); 769 770 otherend_id = sc->sc_xbusd->xbusd_otherend_id; 771 772 for (i = 0; sc->sc_free_rxreql != 0; i++) { 773 req = SLIST_FIRST(&sc->sc_rxreq_head); 774 KASSERT(req != NULL); 775 KASSERT(req == &sc->sc_rxreqs[req->rxreq_id]); 776 KASSERT(req->rxreq_m == NULL); 777 KASSERT(req->rxreq_gntref == GRANT_INVALID_REF); 778 779 MGETHDR(m, M_DONTWAIT, MT_DATA); 780 if (__predict_false(m == NULL)) { 781 printf("%s: rx no mbuf\n", ifp->if_xname); 782 break; 783 } 784 785 va = (vaddr_t)pool_cache_get_paddr( 786 if_xennetrxbuf_cache, PR_NOWAIT, &pa); 787 if (__predict_false(va == 0)) { 788 printf("%s: rx no cluster\n", ifp->if_xname); 789 m_freem(m); 790 break; 791 } 792 793 MEXTADD(m, va, PAGE_SIZE, 794 M_DEVBUF, xennet_rx_mbuf_free, NULL); 795 m->m_len = m->m_pkthdr.len = PAGE_SIZE; 796 m->m_ext.ext_paddr = pa; 797 m->m_flags |= M_EXT_RW; /* we own the buffer */ 798 799 /* Set M_EXT_CLUSTER so that load_mbuf uses m_ext.ext_paddr */ 800 m->m_flags |= M_EXT_CLUSTER; 801 if (__predict_false(bus_dmamap_load_mbuf( 802 sc->sc_xbusd->xbusd_dmat, 803 req->rxreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) { 804 printf("%s: rx mbuf load failed", ifp->if_xname); 805 m->m_flags &= ~M_EXT_CLUSTER; 806 m_freem(m); 807 break; 808 } 809 m->m_flags &= ~M_EXT_CLUSTER; 810 811 KASSERT(req->rxreq_dmamap->dm_nsegs == 1); 812 ma = req->rxreq_dmamap->dm_segs[0].ds_addr; 813 814 if (xengnt_grant_access(otherend_id, trunc_page(ma), 815 0, &req->rxreq_gntref) != 0) { 816 m_freem(m); 817 break; 818 } 819 820 req->rxreq_m = m; 821 822 rxreq = RING_GET_REQUEST(&sc->sc_rx_ring, req_prod + i); 823 rxreq->id = req->rxreq_id; 824 rxreq->gref = req->rxreq_gntref; 825 826 SLIST_REMOVE_HEAD(&sc->sc_rxreq_head, rxreq_next); 827 sc->sc_free_rxreql--; 828 } 829 830 /* Notify backend if more Rx is possible */ 831 if (i > 0) { 832 sc->sc_rx_ring.req_prod_pvt = req_prod + i; 833 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_rx_ring, notify); 834 if (notify) 835 hypervisor_notify_via_evtchn(sc->sc_evtchn); 836 } 837 } 838 839 /* 840 * Reclaim all RX buffers used by the I/O ring between frontend and backend 841 */ 842 static void 843 xennet_free_rx_buffer(struct xennet_xenbus_softc *sc, bool revoke) 844 { 845 RING_IDX i; 846 847 KASSERT(mutex_owned(&sc->sc_rx_lock)); 848 849 DPRINTF(("%s: xennet_free_rx_buffer\n", device_xname(sc->sc_dev))); 850 /* get back memory from RX ring */ 851 for (i = 0; i < NET_RX_RING_SIZE; i++) { 852 struct xennet_rxreq *rxreq = &sc->sc_rxreqs[i]; 853 854 if (rxreq->rxreq_gntref != GRANT_INVALID_REF) { 855 /* 856 * this req is still granted. Get back the page or 857 * allocate a new one, and remap it. 858 */ 859 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, rxreq, 860 rxreq_next); 861 sc->sc_free_rxreql++; 862 863 if (revoke) 864 xengnt_revoke_access(rxreq->rxreq_gntref); 865 rxreq->rxreq_gntref = GRANT_INVALID_REF; 866 } 867 868 if (rxreq->rxreq_m != NULL) { 869 m_freem(rxreq->rxreq_m); 870 rxreq->rxreq_m = NULL; 871 } 872 } 873 DPRINTF(("%s: xennet_free_rx_buffer done\n", device_xname(sc->sc_dev))); 874 } 875 876 /* 877 * Clears a used RX request when its associated mbuf has been processed 878 */ 879 static void 880 xennet_rx_mbuf_free(struct mbuf *m, void *buf, size_t size, void *arg) 881 { 882 KASSERT(buf == m->m_ext.ext_buf); 883 KASSERT(arg == NULL); 884 KASSERT(m != NULL); 885 vaddr_t va = (vaddr_t)(buf) & ~((vaddr_t)PAGE_MASK); 886 pool_cache_put_paddr(if_xennetrxbuf_cache, 887 (void *)va, m->m_ext.ext_paddr); 888 pool_cache_put(mb_cache, m); 889 }; 890 891 static void 892 xennet_rx_free_req(struct xennet_xenbus_softc *sc, struct xennet_rxreq *req) 893 { 894 KASSERT(mutex_owned(&sc->sc_rx_lock)); 895 896 /* puts back the RX request in the list of free RX requests */ 897 SLIST_INSERT_HEAD(&sc->sc_rxreq_head, req, rxreq_next); 898 sc->sc_free_rxreql++; 899 900 /* 901 * ring needs more requests to be pushed in, allocate some 902 * RX buffers to catch-up with backend's consumption 903 */ 904 if (sc->sc_free_rxreql >= (NET_RX_RING_SIZE * 4 / 5) && 905 __predict_true(sc->sc_backend_status == BEST_CONNECTED)) { 906 xennet_alloc_rx_buffer(sc); 907 } 908 } 909 910 /* 911 * Process responses associated to the TX mbufs sent previously through 912 * xennet_start() 913 * Called at splsoftnet. 914 */ 915 static void 916 xennet_tx_complete(struct xennet_xenbus_softc *sc) 917 { 918 struct xennet_txreq *req; 919 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 920 RING_IDX resp_prod, i; 921 922 DPRINTFN(XEDB_EVENT, ("xennet_tx_complete prod %d cons %d\n", 923 sc->sc_tx_ring.sring->rsp_prod, sc->sc_tx_ring.rsp_cons)); 924 925 KASSERT(mutex_owned(&sc->sc_tx_lock)); 926 again: 927 resp_prod = sc->sc_tx_ring.sring->rsp_prod; 928 xen_rmb(); 929 for (i = sc->sc_tx_ring.rsp_cons; i != resp_prod; i++) { 930 req = &sc->sc_txreqs[RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id]; 931 KASSERT(req->txreq_id == 932 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->id); 933 KASSERT(xengnt_status(req->txreq_gntref) == 0); 934 xengnt_revoke_access(req->txreq_gntref); 935 req->txreq_gntref = GRANT_INVALID_REF; 936 937 /* Cleanup/statistics if this is the master req of a chain */ 938 if (req->txreq_m) { 939 if (__predict_false( 940 RING_GET_RESPONSE(&sc->sc_tx_ring, i)->status != 941 NETIF_RSP_OKAY)) 942 if_statinc(ifp, if_oerrors); 943 else 944 if_statinc(ifp, if_opackets); 945 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 946 req->txreq_dmamap); 947 m_freem(req->txreq_m); 948 req->txreq_m = NULL; 949 } 950 951 SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next); 952 sc->sc_free_txreql++; 953 } 954 sc->sc_tx_ring.rsp_cons = resp_prod; 955 /* set new event and check for race with rsp_cons update */ 956 xen_wmb(); 957 sc->sc_tx_ring.sring->rsp_event = 958 resp_prod + ((sc->sc_tx_ring.sring->req_prod - resp_prod) >> 1) + 1; 959 xen_mb(); 960 if (resp_prod != sc->sc_tx_ring.sring->rsp_prod) 961 goto again; 962 } 963 964 /* 965 * Xennet event handler. 966 * Get outstanding responses of TX packets, then collect all responses of 967 * pending RX packets 968 * Called at splnet. 969 */ 970 static int 971 xennet_handler(void *arg) 972 { 973 struct xennet_xenbus_softc *sc = arg; 974 struct ifnet *ifp = &sc->sc_ethercom.ec_if; 975 RING_IDX resp_prod, i; 976 struct xennet_rxreq *req; 977 struct mbuf *m, *m0; 978 int rxflags, m0_rxflags; 979 int more_to_do; 980 981 if (sc->sc_backend_status != BEST_CONNECTED) 982 return 1; 983 984 /* Poke Tx queue if we run out of Tx buffers earlier */ 985 if_schedule_deferred_start(ifp); 986 987 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt); 988 989 again: 990 DPRINTFN(XEDB_EVENT, ("xennet_handler prod %d cons %d\n", 991 sc->sc_rx_ring.sring->rsp_prod, sc->sc_rx_ring.rsp_cons)); 992 993 mutex_enter(&sc->sc_rx_lock); 994 resp_prod = sc->sc_rx_ring.sring->rsp_prod; 995 xen_rmb(); /* ensure we see replies up to resp_prod */ 996 997 m0 = NULL; 998 for (i = sc->sc_rx_ring.rsp_cons; i != resp_prod; i++) { 999 netif_rx_response_t *rx = RING_GET_RESPONSE(&sc->sc_rx_ring, i); 1000 req = &sc->sc_rxreqs[rx->id]; 1001 KASSERT(req->rxreq_gntref != GRANT_INVALID_REF); 1002 KASSERT(req->rxreq_id == rx->id); 1003 1004 xengnt_revoke_access(req->rxreq_gntref); 1005 req->rxreq_gntref = GRANT_INVALID_REF; 1006 1007 m = req->rxreq_m; 1008 req->rxreq_m = NULL; 1009 1010 m->m_len = m->m_pkthdr.len = rx->status; 1011 bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->rxreq_dmamap, 0, 1012 m->m_pkthdr.len, BUS_DMASYNC_PREREAD); 1013 1014 if (m0 == NULL) { 1015 MCLAIM(m, &sc->sc_ethercom.ec_rx_mowner); 1016 m_set_rcvif(m, ifp); 1017 } 1018 1019 rxflags = rx->flags; 1020 1021 if (m0 || rxflags & NETRXF_more_data) { 1022 /* 1023 * On Rx, every fragment (even first one) contain 1024 * just length of data in the fragment. 1025 */ 1026 if (m0 == NULL) { 1027 m0 = m; 1028 m0_rxflags = rxflags; 1029 } else { 1030 m_cat(m0, m); 1031 m0->m_pkthdr.len += m->m_len; 1032 } 1033 1034 if (rxflags & NETRXF_more_data) { 1035 /* Still more fragments to receive */ 1036 xennet_rx_free_req(sc, req); 1037 continue; 1038 } 1039 1040 sc->sc_cnt_rx_frag.ev_count++; 1041 m = m0; 1042 m0 = NULL; 1043 rxflags = m0_rxflags; 1044 } 1045 1046 if (rxflags & NETRXF_csum_blank) { 1047 xennet_checksum_fill(ifp, m, &sc->sc_cnt_rx_cksum_blank, 1048 &sc->sc_cnt_rx_cksum_undefer); 1049 } else if (rxflags & NETRXF_data_validated) 1050 m->m_pkthdr.csum_flags = XN_M_CSUM_SUPPORTED; 1051 1052 /* We'are done with req */ 1053 xennet_rx_free_req(sc, req); 1054 1055 /* Pass the packet up. */ 1056 if_percpuq_enqueue(ifp->if_percpuq, m); 1057 } 1058 /* If the queued Rx fragments did not finish the packet, drop it */ 1059 if (m0) { 1060 if_statinc(ifp, if_iqdrops); 1061 m_freem(m0); 1062 } 1063 sc->sc_rx_ring.rsp_cons = i; 1064 xen_wmb(); 1065 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_rx_ring, more_to_do); 1066 mutex_exit(&sc->sc_rx_lock); 1067 1068 if (more_to_do) { 1069 DPRINTF(("%s: %s more_to_do\n", ifp->if_xname, __func__)); 1070 goto again; 1071 } 1072 1073 return 1; 1074 } 1075 1076 static bool 1077 xennet_submit_tx_request(struct xennet_xenbus_softc *sc, struct mbuf *m, 1078 struct xennet_txreq *req0, int *req_prod) 1079 { 1080 struct xennet_txreq *req = req0; 1081 netif_tx_request_t *txreq; 1082 int i, prod = *req_prod; 1083 const bool multiseg = (req0->txreq_dmamap->dm_nsegs > 1); 1084 const int lastseg = req0->txreq_dmamap->dm_nsegs - 1; 1085 bus_dma_segment_t *ds; 1086 SLIST_HEAD(, xennet_txreq) txchain; 1087 1088 KASSERT(mutex_owned(&sc->sc_tx_lock)); 1089 KASSERT(req0->txreq_dmamap->dm_nsegs > 0); 1090 1091 bus_dmamap_sync(sc->sc_xbusd->xbusd_dmat, req->txreq_dmamap, 0, 1092 m->m_pkthdr.len, BUS_DMASYNC_POSTWRITE); 1093 MCLAIM(m, &sc->sc_ethercom.ec_tx_mowner); 1094 SLIST_INIT(&txchain); 1095 1096 for (i = 0; i < req0->txreq_dmamap->dm_nsegs; i++) { 1097 KASSERT(req != NULL); 1098 1099 ds = &req0->txreq_dmamap->dm_segs[i]; 1100 1101 if (__predict_false(xengnt_grant_access( 1102 sc->sc_xbusd->xbusd_otherend_id, 1103 trunc_page(ds->ds_addr), 1104 GNTMAP_readonly, &req->txreq_gntref) != 0)) { 1105 goto grant_fail; 1106 } 1107 1108 KASSERT(SLIST_FIRST(&sc->sc_txreq_head) == req); 1109 SLIST_REMOVE_HEAD(&sc->sc_txreq_head, txreq_next); 1110 SLIST_INSERT_HEAD(&txchain, req, txreq_next); 1111 sc->sc_free_txreql--; 1112 req->txreq_m = (req == req0) ? m : NULL; 1113 1114 txreq = RING_GET_REQUEST(&sc->sc_tx_ring, prod + i); 1115 txreq->id = req->txreq_id; 1116 txreq->gref = req->txreq_gntref; 1117 txreq->offset = ds->ds_addr & PAGE_MASK; 1118 /* For Tx, first fragment size is always set to total size */ 1119 txreq->size = (i == 0) ? m->m_pkthdr.len : ds->ds_len; 1120 txreq->flags = 0; 1121 if (i == 0) { 1122 if (m->m_pkthdr.csum_flags & XN_M_CSUM_SUPPORTED) { 1123 txreq->flags |= NETTXF_csum_blank; 1124 } else { 1125 #if 0 1126 /* 1127 * XXX Checksum optimization disabled 1128 * to avoid port-xen/57743. 1129 */ 1130 txreq->flags |= NETTXF_data_validated; 1131 #endif 1132 } 1133 } 1134 if (multiseg && i < lastseg) 1135 txreq->flags |= NETTXF_more_data; 1136 1137 req = SLIST_FIRST(&sc->sc_txreq_head); 1138 } 1139 1140 if (i > 1) 1141 sc->sc_cnt_tx_frag.ev_count++; 1142 1143 /* All done */ 1144 *req_prod += i; 1145 return true; 1146 1147 grant_fail: 1148 printf("%s: grant_access failed\n", device_xname(sc->sc_dev)); 1149 while (!SLIST_EMPTY(&txchain)) { 1150 req = SLIST_FIRST(&txchain); 1151 SLIST_REMOVE_HEAD(&txchain, txreq_next); 1152 xengnt_revoke_access(req->txreq_gntref); 1153 req->txreq_gntref = GRANT_INVALID_REF; 1154 SLIST_INSERT_HEAD(&sc->sc_txreq_head, req, txreq_next); 1155 sc->sc_free_txreql++; 1156 } 1157 req0->txreq_m = NULL; 1158 return false; 1159 } 1160 1161 /* 1162 * The output routine of a xennet interface. Prepares mbufs for TX, 1163 * and notify backend when finished. 1164 * Called at splsoftnet. 1165 */ 1166 void 1167 xennet_start(struct ifnet *ifp) 1168 { 1169 struct xennet_xenbus_softc *sc = ifp->if_softc; 1170 struct mbuf *m; 1171 RING_IDX req_prod; 1172 struct xennet_txreq *req; 1173 int notify; 1174 1175 mutex_enter(&sc->sc_tx_lock); 1176 1177 rnd_add_uint32(&sc->sc_rnd_source, sc->sc_tx_ring.req_prod_pvt); 1178 1179 xennet_tx_complete(sc); 1180 1181 req_prod = sc->sc_tx_ring.req_prod_pvt; 1182 while (/*CONSTCOND*/1) { 1183 req = SLIST_FIRST(&sc->sc_txreq_head); 1184 if (__predict_false(req == NULL)) { 1185 if (!IFQ_IS_EMPTY(&ifp->if_snd)) 1186 sc->sc_cnt_tx_queue_full.ev_count++; 1187 break; 1188 } 1189 IFQ_DEQUEUE(&ifp->if_snd, m); 1190 if (m == NULL) 1191 break; 1192 1193 /* 1194 * For short packets it's always way faster passing 1195 * single defragmented packet, even with feature-sg. 1196 * Try to defragment first if the result is likely to fit 1197 * into a single mbuf. 1198 */ 1199 if (m->m_pkthdr.len < MCLBYTES && m->m_next) 1200 (void)m_defrag(m, M_DONTWAIT); 1201 1202 /* Try to load the mbuf as-is, if that fails defrag */ 1203 if (__predict_false(bus_dmamap_load_mbuf( 1204 sc->sc_xbusd->xbusd_dmat, 1205 req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) { 1206 sc->sc_cnt_tx_defrag.ev_count++; 1207 if (__predict_false(m_defrag(m, M_DONTWAIT) == NULL)) { 1208 DPRINTF(("%s: defrag failed\n", 1209 device_xname(sc->sc_dev))); 1210 m_freem(m); 1211 break; 1212 } 1213 1214 if (__predict_false(bus_dmamap_load_mbuf( 1215 sc->sc_xbusd->xbusd_dmat, 1216 req->txreq_dmamap, m, BUS_DMA_NOWAIT) != 0)) { 1217 printf("%s: cannot load new mbuf len %d\n", 1218 device_xname(sc->sc_dev), 1219 m->m_pkthdr.len); 1220 m_freem(m); 1221 break; 1222 } 1223 } 1224 1225 if (req->txreq_dmamap->dm_nsegs > sc->sc_free_txreql) { 1226 /* Not enough slots right now, postpone */ 1227 sc->sc_cnt_tx_queue_full.ev_count++; 1228 sc->sc_cnt_tx_drop.ev_count++; 1229 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1230 req->txreq_dmamap); 1231 m_freem(m); 1232 break; 1233 } 1234 1235 DPRINTFN(XEDB_MBUF, ("xennet_start id %d, " 1236 "mbuf %p, buf %p, size %d\n", 1237 req->txreq_id, m, mtod(m, void *), m->m_pkthdr.len)); 1238 1239 #ifdef XENNET_DEBUG_DUMP 1240 xennet_hex_dump(mtod(m, u_char *), m->m_pkthdr.len, "s", 1241 req->txreq_id); 1242 #endif 1243 1244 if (!xennet_submit_tx_request(sc, m, req, &req_prod)) { 1245 /* Grant failed, postpone */ 1246 sc->sc_cnt_tx_drop.ev_count++; 1247 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1248 req->txreq_dmamap); 1249 m_freem(m); 1250 break; 1251 } 1252 1253 /* 1254 * Pass packet to bpf if there is a listener. 1255 */ 1256 bpf_mtap(ifp, m, BPF_D_OUT); 1257 } 1258 1259 sc->sc_tx_ring.req_prod_pvt = req_prod; 1260 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_tx_ring, notify); 1261 if (notify) 1262 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1263 1264 mutex_exit(&sc->sc_tx_lock); 1265 1266 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_start() done\n", 1267 device_xname(sc->sc_dev))); 1268 } 1269 1270 int 1271 xennet_ioctl(struct ifnet *ifp, u_long cmd, void *data) 1272 { 1273 #ifdef XENNET_DEBUG 1274 struct xennet_xenbus_softc *sc = ifp->if_softc; 1275 #endif 1276 int error = 0; 1277 1278 #ifdef NET_MPSAFE 1279 #ifdef notyet 1280 /* XXX IFNET_LOCK() is not taken in some cases e.g. multicast ioctls */ 1281 KASSERT(IFNET_LOCKED(ifp)); 1282 #endif 1283 #endif 1284 int s = splnet(); 1285 1286 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl()\n", 1287 device_xname(sc->sc_dev))); 1288 error = ether_ioctl(ifp, cmd, data); 1289 if (error == ENETRESET) 1290 error = 0; 1291 1292 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_ioctl() returning %d\n", 1293 device_xname(sc->sc_dev), error)); 1294 1295 splx(s); 1296 1297 return error; 1298 } 1299 1300 int 1301 xennet_init(struct ifnet *ifp) 1302 { 1303 struct xennet_xenbus_softc *sc = ifp->if_softc; 1304 1305 KASSERT(IFNET_LOCKED(ifp)); 1306 1307 DPRINTFN(XEDB_FOLLOW, ("%s: xennet_init()\n", 1308 device_xname(sc->sc_dev))); 1309 1310 if ((ifp->if_flags & IFF_RUNNING) == 0) { 1311 mutex_enter(&sc->sc_rx_lock); 1312 sc->sc_rx_ring.sring->rsp_event = 1313 sc->sc_rx_ring.rsp_cons + 1; 1314 mutex_exit(&sc->sc_rx_lock); 1315 hypervisor_unmask_event(sc->sc_evtchn); 1316 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1317 } 1318 ifp->if_flags |= IFF_RUNNING; 1319 1320 return 0; 1321 } 1322 1323 void 1324 xennet_stop(struct ifnet *ifp, int disable) 1325 { 1326 struct xennet_xenbus_softc *sc = ifp->if_softc; 1327 1328 KASSERT(IFNET_LOCKED(ifp)); 1329 1330 ifp->if_flags &= ~IFF_RUNNING; 1331 hypervisor_mask_event(sc->sc_evtchn); 1332 } 1333 1334 #if defined(NFS_BOOT_BOOTSTATIC) 1335 int 1336 xennet_bootstatic_callback(struct nfs_diskless *nd) 1337 { 1338 #if 0 1339 struct ifnet *ifp = nd->nd_ifp; 1340 struct xennet_xenbus_softc *sc = 1341 (struct xennet_xenbus_softc *)ifp->if_softc; 1342 #endif 1343 int flags = 0; 1344 union xen_cmdline_parseinfo xcp; 1345 struct sockaddr_in *sin; 1346 1347 memset(&xcp, 0, sizeof(xcp.xcp_netinfo)); 1348 xcp.xcp_netinfo.xi_ifno = /* XXX sc->sc_ifno */ 0; 1349 xcp.xcp_netinfo.xi_root = nd->nd_root.ndm_host; 1350 xen_parse_cmdline(XEN_PARSE_NETINFO, &xcp); 1351 1352 if (xcp.xcp_netinfo.xi_root[0] != '\0') { 1353 flags |= NFS_BOOT_HAS_SERVER; 1354 if (strchr(xcp.xcp_netinfo.xi_root, ':') != NULL) 1355 flags |= NFS_BOOT_HAS_ROOTPATH; 1356 } 1357 1358 nd->nd_myip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[0]); 1359 nd->nd_gwip.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[2]); 1360 nd->nd_mask.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[3]); 1361 1362 sin = (struct sockaddr_in *) &nd->nd_root.ndm_saddr; 1363 memset((void *)sin, 0, sizeof(*sin)); 1364 sin->sin_len = sizeof(*sin); 1365 sin->sin_family = AF_INET; 1366 sin->sin_addr.s_addr = ntohl(xcp.xcp_netinfo.xi_ip[1]); 1367 1368 if (nd->nd_myip.s_addr) 1369 flags |= NFS_BOOT_HAS_MYIP; 1370 if (nd->nd_gwip.s_addr) 1371 flags |= NFS_BOOT_HAS_GWIP; 1372 if (nd->nd_mask.s_addr) 1373 flags |= NFS_BOOT_HAS_MASK; 1374 if (sin->sin_addr.s_addr) 1375 flags |= NFS_BOOT_HAS_SERVADDR; 1376 1377 return flags; 1378 } 1379 #endif /* defined(NFS_BOOT_BOOTSTATIC) */ 1380 1381 #ifdef XENNET_DEBUG_DUMP 1382 #define XCHR(x) hexdigits[(x) & 0xf] 1383 static void 1384 xennet_hex_dump(const unsigned char *pkt, size_t len, const char *type, int id) 1385 { 1386 size_t i, j; 1387 1388 printf("pkt %p len %zd/%zx type %s id %d\n", pkt, len, len, type, id); 1389 printf("00000000 "); 1390 for(i=0; i<len; i++) { 1391 printf("%c%c ", XCHR(pkt[i]>>4), XCHR(pkt[i])); 1392 if ((i+1) % 16 == 8) 1393 printf(" "); 1394 if ((i+1) % 16 == 0) { 1395 printf(" %c", '|'); 1396 for(j=0; j<16; j++) 1397 printf("%c", pkt[i-15+j]>=32 && 1398 pkt[i-15+j]<127?pkt[i-15+j]:'.'); 1399 printf("%c\n%c%c%c%c%c%c%c%c ", '|', 1400 XCHR((i+1)>>28), XCHR((i+1)>>24), 1401 XCHR((i+1)>>20), XCHR((i+1)>>16), 1402 XCHR((i+1)>>12), XCHR((i+1)>>8), 1403 XCHR((i+1)>>4), XCHR(i+1)); 1404 } 1405 } 1406 printf("\n"); 1407 } 1408 #undef XCHR 1409 #endif 1410