154dfc97bSShailend Chand /*- 254dfc97bSShailend Chand * SPDX-License-Identifier: BSD-3-Clause 354dfc97bSShailend Chand * 4*d438b4efSShailend Chand * Copyright (c) 2023-2024 Google LLC 554dfc97bSShailend Chand * 654dfc97bSShailend Chand * Redistribution and use in source and binary forms, with or without modification, 754dfc97bSShailend Chand * are permitted provided that the following conditions are met: 854dfc97bSShailend Chand * 954dfc97bSShailend Chand * 1. Redistributions of source code must retain the above copyright notice, this 1054dfc97bSShailend Chand * list of conditions and the following disclaimer. 1154dfc97bSShailend Chand * 1254dfc97bSShailend Chand * 2. Redistributions in binary form must reproduce the above copyright notice, 1354dfc97bSShailend Chand * this list of conditions and the following disclaimer in the documentation 1454dfc97bSShailend Chand * and/or other materials provided with the distribution. 1554dfc97bSShailend Chand * 1654dfc97bSShailend Chand * 3. Neither the name of the copyright holder nor the names of its contributors 1754dfc97bSShailend Chand * may be used to endorse or promote products derived from this software without 1854dfc97bSShailend Chand * specific prior written permission. 1954dfc97bSShailend Chand * 2054dfc97bSShailend Chand * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 2154dfc97bSShailend Chand * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 2254dfc97bSShailend Chand * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 2354dfc97bSShailend Chand * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 2454dfc97bSShailend Chand * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 2554dfc97bSShailend Chand * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2654dfc97bSShailend Chand * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 2754dfc97bSShailend Chand * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2854dfc97bSShailend Chand * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 2954dfc97bSShailend Chand * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3054dfc97bSShailend Chand */ 3154dfc97bSShailend Chand #include "gve.h" 3254dfc97bSShailend Chand #include "gve_adminq.h" 33*d438b4efSShailend Chand #include "gve_dqo.h" 3454dfc97bSShailend Chand 3554dfc97bSShailend Chand static void 36*d438b4efSShailend Chand gve_rx_free_ring_gqi(struct gve_priv *priv, int i) 3754dfc97bSShailend Chand { 3854dfc97bSShailend Chand struct gve_rx_ring *rx = &priv->rx[i]; 3954dfc97bSShailend Chand 4054dfc97bSShailend Chand if (rx->page_info != NULL) { 4154dfc97bSShailend Chand free(rx->page_info, M_GVE); 4254dfc97bSShailend Chand rx->page_info = NULL; 4354dfc97bSShailend Chand } 4454dfc97bSShailend Chand 4554dfc97bSShailend Chand if (rx->data_ring != NULL) { 4654dfc97bSShailend Chand gve_dma_free_coherent(&rx->data_ring_mem); 4754dfc97bSShailend Chand rx->data_ring = NULL; 4854dfc97bSShailend Chand } 4954dfc97bSShailend Chand 5054dfc97bSShailend Chand if (rx->desc_ring != NULL) { 5154dfc97bSShailend Chand gve_dma_free_coherent(&rx->desc_ring_mem); 5254dfc97bSShailend Chand rx->desc_ring = NULL; 5354dfc97bSShailend Chand } 54*d438b4efSShailend Chand } 55*d438b4efSShailend Chand 56*d438b4efSShailend Chand static void 57*d438b4efSShailend Chand gve_rx_free_ring(struct gve_priv *priv, int i) 58*d438b4efSShailend Chand { 59*d438b4efSShailend Chand struct gve_rx_ring *rx = &priv->rx[i]; 60*d438b4efSShailend Chand struct gve_ring_com *com = &rx->com; 61*d438b4efSShailend Chand 62*d438b4efSShailend Chand /* Safe to call even if never allocated */ 63*d438b4efSShailend Chand gve_free_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS); 64*d438b4efSShailend Chand 65*d438b4efSShailend Chand if (gve_is_gqi(priv)) 66*d438b4efSShailend Chand gve_rx_free_ring_gqi(priv, i); 67*d438b4efSShailend Chand else 68*d438b4efSShailend Chand gve_rx_free_ring_dqo(priv, i); 6954dfc97bSShailend Chand 7054dfc97bSShailend Chand if (com->q_resources != NULL) { 7154dfc97bSShailend Chand gve_dma_free_coherent(&com->q_resources_mem); 7254dfc97bSShailend Chand com->q_resources = NULL; 7354dfc97bSShailend Chand } 7454dfc97bSShailend Chand } 7554dfc97bSShailend Chand 7654dfc97bSShailend Chand static void 7754dfc97bSShailend Chand gve_prefill_rx_slots(struct gve_rx_ring *rx) 7854dfc97bSShailend Chand { 7954dfc97bSShailend Chand struct gve_ring_com *com = &rx->com; 8054dfc97bSShailend Chand struct gve_dma_handle *dma; 8154dfc97bSShailend Chand int i; 8254dfc97bSShailend Chand 8354dfc97bSShailend Chand for (i = 0; i < com->priv->rx_desc_cnt; i++) { 8454dfc97bSShailend Chand rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i); 8554dfc97bSShailend Chand rx->page_info[i].page_offset = 0; 8654dfc97bSShailend Chand rx->page_info[i].page_address = com->qpl->dmas[i].cpu_addr; 8754dfc97bSShailend Chand rx->page_info[i].page = com->qpl->pages[i]; 8854dfc97bSShailend Chand 8954dfc97bSShailend Chand dma = &com->qpl->dmas[i]; 9054dfc97bSShailend Chand bus_dmamap_sync(dma->tag, dma->map, BUS_DMASYNC_PREREAD); 9154dfc97bSShailend Chand } 9254dfc97bSShailend Chand 9354dfc97bSShailend Chand bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map, 9454dfc97bSShailend Chand BUS_DMASYNC_PREWRITE); 9554dfc97bSShailend Chand } 9654dfc97bSShailend Chand 9754dfc97bSShailend Chand static int 98*d438b4efSShailend Chand gve_rx_alloc_ring_gqi(struct gve_priv *priv, int i) 99*d438b4efSShailend Chand { 100*d438b4efSShailend Chand struct gve_rx_ring *rx = &priv->rx[i]; 101*d438b4efSShailend Chand struct gve_ring_com *com = &rx->com; 102*d438b4efSShailend Chand int err; 103*d438b4efSShailend Chand 104*d438b4efSShailend Chand err = gve_dma_alloc_coherent(priv, 105*d438b4efSShailend Chand sizeof(struct gve_rx_desc) * priv->rx_desc_cnt, 106*d438b4efSShailend Chand CACHE_LINE_SIZE, &rx->desc_ring_mem); 107*d438b4efSShailend Chand if (err != 0) { 108*d438b4efSShailend Chand device_printf(priv->dev, 109*d438b4efSShailend Chand "Failed to alloc desc ring for rx ring %d", i); 110*d438b4efSShailend Chand goto abort; 111*d438b4efSShailend Chand } 112*d438b4efSShailend Chand 113*d438b4efSShailend Chand rx->mask = priv->rx_pages_per_qpl - 1; 114*d438b4efSShailend Chand rx->desc_ring = rx->desc_ring_mem.cpu_addr; 115*d438b4efSShailend Chand 116*d438b4efSShailend Chand com->qpl = &priv->qpls[priv->tx_cfg.max_queues + i]; 117*d438b4efSShailend Chand if (com->qpl == NULL) { 118*d438b4efSShailend Chand device_printf(priv->dev, "No QPL left for rx ring %d", i); 119*d438b4efSShailend Chand return (ENOMEM); 120*d438b4efSShailend Chand } 121*d438b4efSShailend Chand 122*d438b4efSShailend Chand rx->page_info = malloc(priv->rx_desc_cnt * sizeof(*rx->page_info), 123*d438b4efSShailend Chand M_GVE, M_WAITOK | M_ZERO); 124*d438b4efSShailend Chand 125*d438b4efSShailend Chand err = gve_dma_alloc_coherent(priv, 126*d438b4efSShailend Chand sizeof(union gve_rx_data_slot) * priv->rx_desc_cnt, 127*d438b4efSShailend Chand CACHE_LINE_SIZE, &rx->data_ring_mem); 128*d438b4efSShailend Chand if (err != 0) { 129*d438b4efSShailend Chand device_printf(priv->dev, 130*d438b4efSShailend Chand "Failed to alloc data ring for rx ring %d", i); 131*d438b4efSShailend Chand goto abort; 132*d438b4efSShailend Chand } 133*d438b4efSShailend Chand rx->data_ring = rx->data_ring_mem.cpu_addr; 134*d438b4efSShailend Chand 135*d438b4efSShailend Chand gve_prefill_rx_slots(rx); 136*d438b4efSShailend Chand return (0); 137*d438b4efSShailend Chand 138*d438b4efSShailend Chand abort: 139*d438b4efSShailend Chand gve_rx_free_ring_gqi(priv, i); 140*d438b4efSShailend Chand return (err); 141*d438b4efSShailend Chand } 142*d438b4efSShailend Chand 143*d438b4efSShailend Chand static int 14454dfc97bSShailend Chand gve_rx_alloc_ring(struct gve_priv *priv, int i) 14554dfc97bSShailend Chand { 14654dfc97bSShailend Chand struct gve_rx_ring *rx = &priv->rx[i]; 14754dfc97bSShailend Chand struct gve_ring_com *com = &rx->com; 14854dfc97bSShailend Chand int err; 14954dfc97bSShailend Chand 15054dfc97bSShailend Chand com->priv = priv; 15154dfc97bSShailend Chand com->id = i; 15254dfc97bSShailend Chand 15354dfc97bSShailend Chand gve_alloc_counters((counter_u64_t *)&rx->stats, NUM_RX_STATS); 15454dfc97bSShailend Chand 15554dfc97bSShailend Chand err = gve_dma_alloc_coherent(priv, sizeof(struct gve_queue_resources), 15654dfc97bSShailend Chand PAGE_SIZE, &com->q_resources_mem); 15754dfc97bSShailend Chand if (err != 0) { 158*d438b4efSShailend Chand device_printf(priv->dev, 159*d438b4efSShailend Chand "Failed to alloc queue resources for rx ring %d", i); 16054dfc97bSShailend Chand goto abort; 16154dfc97bSShailend Chand } 16254dfc97bSShailend Chand com->q_resources = com->q_resources_mem.cpu_addr; 16354dfc97bSShailend Chand 164*d438b4efSShailend Chand if (gve_is_gqi(priv)) 165*d438b4efSShailend Chand err = gve_rx_alloc_ring_gqi(priv, i); 166*d438b4efSShailend Chand else 167*d438b4efSShailend Chand err = gve_rx_alloc_ring_dqo(priv, i); 168*d438b4efSShailend Chand if (err != 0) 16954dfc97bSShailend Chand goto abort; 17054dfc97bSShailend Chand 17154dfc97bSShailend Chand return (0); 17254dfc97bSShailend Chand 17354dfc97bSShailend Chand abort: 17454dfc97bSShailend Chand gve_rx_free_ring(priv, i); 17554dfc97bSShailend Chand return (err); 17654dfc97bSShailend Chand } 17754dfc97bSShailend Chand 17854dfc97bSShailend Chand int 17954dfc97bSShailend Chand gve_alloc_rx_rings(struct gve_priv *priv) 18054dfc97bSShailend Chand { 18154dfc97bSShailend Chand int err = 0; 18254dfc97bSShailend Chand int i; 18354dfc97bSShailend Chand 18454dfc97bSShailend Chand priv->rx = malloc(sizeof(struct gve_rx_ring) * priv->rx_cfg.num_queues, 18554dfc97bSShailend Chand M_GVE, M_WAITOK | M_ZERO); 18654dfc97bSShailend Chand 18754dfc97bSShailend Chand for (i = 0; i < priv->rx_cfg.num_queues; i++) { 18854dfc97bSShailend Chand err = gve_rx_alloc_ring(priv, i); 18954dfc97bSShailend Chand if (err != 0) 19054dfc97bSShailend Chand goto free_rings; 19154dfc97bSShailend Chand } 19254dfc97bSShailend Chand 19354dfc97bSShailend Chand return (0); 19454dfc97bSShailend Chand 19554dfc97bSShailend Chand free_rings: 19654dfc97bSShailend Chand while (i--) 19754dfc97bSShailend Chand gve_rx_free_ring(priv, i); 19854dfc97bSShailend Chand free(priv->rx, M_GVE); 19954dfc97bSShailend Chand return (err); 20054dfc97bSShailend Chand } 20154dfc97bSShailend Chand 20254dfc97bSShailend Chand void 20354dfc97bSShailend Chand gve_free_rx_rings(struct gve_priv *priv) 20454dfc97bSShailend Chand { 20554dfc97bSShailend Chand int i; 20654dfc97bSShailend Chand 20754dfc97bSShailend Chand for (i = 0; i < priv->rx_cfg.num_queues; i++) 20854dfc97bSShailend Chand gve_rx_free_ring(priv, i); 20954dfc97bSShailend Chand 21054dfc97bSShailend Chand free(priv->rx, M_GVE); 21154dfc97bSShailend Chand } 21254dfc97bSShailend Chand 21354dfc97bSShailend Chand static void 21454dfc97bSShailend Chand gve_rx_clear_data_ring(struct gve_rx_ring *rx) 21554dfc97bSShailend Chand { 21654dfc97bSShailend Chand struct gve_priv *priv = rx->com.priv; 21754dfc97bSShailend Chand int i; 21854dfc97bSShailend Chand 21954dfc97bSShailend Chand /* 22054dfc97bSShailend Chand * The Rx data ring has this invariant: "the networking stack is not 22154dfc97bSShailend Chand * using the buffer beginning at any page_offset". This invariant is 22254dfc97bSShailend Chand * established initially by gve_prefill_rx_slots at alloc-time and is 22354dfc97bSShailend Chand * maintained by the cleanup taskqueue. This invariant implies that the 22454dfc97bSShailend Chand * ring can be considered to be fully posted with buffers at this point, 22554dfc97bSShailend Chand * even if there are unfreed mbufs still being processed, which is why we 22654dfc97bSShailend Chand * can fill the ring without waiting on can_flip at each slot to become true. 22754dfc97bSShailend Chand */ 22854dfc97bSShailend Chand for (i = 0; i < priv->rx_desc_cnt; i++) { 22954dfc97bSShailend Chand rx->data_ring[i].qpl_offset = htobe64(PAGE_SIZE * i + 23054dfc97bSShailend Chand rx->page_info[i].page_offset); 23154dfc97bSShailend Chand rx->fill_cnt++; 23254dfc97bSShailend Chand } 23354dfc97bSShailend Chand 23454dfc97bSShailend Chand bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map, 23554dfc97bSShailend Chand BUS_DMASYNC_PREWRITE); 23654dfc97bSShailend Chand } 23754dfc97bSShailend Chand 23854dfc97bSShailend Chand static void 23954dfc97bSShailend Chand gve_rx_clear_desc_ring(struct gve_rx_ring *rx) 24054dfc97bSShailend Chand { 24154dfc97bSShailend Chand struct gve_priv *priv = rx->com.priv; 24254dfc97bSShailend Chand int i; 24354dfc97bSShailend Chand 24454dfc97bSShailend Chand for (i = 0; i < priv->rx_desc_cnt; i++) 24554dfc97bSShailend Chand rx->desc_ring[i] = (struct gve_rx_desc){}; 24654dfc97bSShailend Chand 24754dfc97bSShailend Chand bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 24854dfc97bSShailend Chand BUS_DMASYNC_PREWRITE); 24954dfc97bSShailend Chand } 25054dfc97bSShailend Chand 25154dfc97bSShailend Chand static void 25254dfc97bSShailend Chand gve_clear_rx_ring(struct gve_priv *priv, int i) 25354dfc97bSShailend Chand { 25454dfc97bSShailend Chand struct gve_rx_ring *rx = &priv->rx[i]; 25554dfc97bSShailend Chand 256*d438b4efSShailend Chand if (!gve_is_gqi(priv)) { 257*d438b4efSShailend Chand gve_clear_rx_ring_dqo(priv, i); 258*d438b4efSShailend Chand return; 259*d438b4efSShailend Chand } 260*d438b4efSShailend Chand 26154dfc97bSShailend Chand rx->seq_no = 1; 26254dfc97bSShailend Chand rx->cnt = 0; 26354dfc97bSShailend Chand rx->fill_cnt = 0; 26454dfc97bSShailend Chand rx->mask = priv->rx_desc_cnt - 1; 26554dfc97bSShailend Chand 26654dfc97bSShailend Chand gve_rx_clear_desc_ring(rx); 26754dfc97bSShailend Chand gve_rx_clear_data_ring(rx); 26854dfc97bSShailend Chand } 26954dfc97bSShailend Chand 27054dfc97bSShailend Chand static void 27154dfc97bSShailend Chand gve_start_rx_ring(struct gve_priv *priv, int i) 27254dfc97bSShailend Chand { 27354dfc97bSShailend Chand struct gve_rx_ring *rx = &priv->rx[i]; 27454dfc97bSShailend Chand struct gve_ring_com *com = &rx->com; 27554dfc97bSShailend Chand 27654dfc97bSShailend Chand if ((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) { 27754dfc97bSShailend Chand if (tcp_lro_init(&rx->lro) != 0) 27854dfc97bSShailend Chand device_printf(priv->dev, "Failed to init lro for rx ring %d", i); 27954dfc97bSShailend Chand rx->lro.ifp = priv->ifp; 28054dfc97bSShailend Chand } 28154dfc97bSShailend Chand 282*d438b4efSShailend Chand if (gve_is_gqi(priv)) 28354dfc97bSShailend Chand NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq, rx); 284*d438b4efSShailend Chand else 285*d438b4efSShailend Chand NET_TASK_INIT(&com->cleanup_task, 0, gve_rx_cleanup_tq_dqo, rx); 28654dfc97bSShailend Chand com->cleanup_tq = taskqueue_create_fast("gve rx", M_WAITOK, 28754dfc97bSShailend Chand taskqueue_thread_enqueue, &com->cleanup_tq); 28854dfc97bSShailend Chand 28954dfc97bSShailend Chand taskqueue_start_threads(&com->cleanup_tq, 1, PI_NET, 29054dfc97bSShailend Chand "%s rxq %d", device_get_nameunit(priv->dev), i); 29154dfc97bSShailend Chand 292*d438b4efSShailend Chand if (gve_is_gqi(priv)) { 293*d438b4efSShailend Chand /* GQ RX bufs are prefilled at ring alloc time */ 29454dfc97bSShailend Chand gve_db_bar_write_4(priv, com->db_offset, rx->fill_cnt); 295*d438b4efSShailend Chand } else 296*d438b4efSShailend Chand gve_rx_prefill_buffers_dqo(rx); 29754dfc97bSShailend Chand } 29854dfc97bSShailend Chand 29954dfc97bSShailend Chand int 30054dfc97bSShailend Chand gve_create_rx_rings(struct gve_priv *priv) 30154dfc97bSShailend Chand { 30254dfc97bSShailend Chand struct gve_ring_com *com; 30354dfc97bSShailend Chand struct gve_rx_ring *rx; 30454dfc97bSShailend Chand int err; 30554dfc97bSShailend Chand int i; 30654dfc97bSShailend Chand 30754dfc97bSShailend Chand if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK)) 30854dfc97bSShailend Chand return (0); 30954dfc97bSShailend Chand 31054dfc97bSShailend Chand for (i = 0; i < priv->rx_cfg.num_queues; i++) 31154dfc97bSShailend Chand gve_clear_rx_ring(priv, i); 31254dfc97bSShailend Chand 31354dfc97bSShailend Chand err = gve_adminq_create_rx_queues(priv, priv->rx_cfg.num_queues); 31454dfc97bSShailend Chand if (err != 0) 31554dfc97bSShailend Chand return (err); 31654dfc97bSShailend Chand 31754dfc97bSShailend Chand bus_dmamap_sync(priv->irqs_db_mem.tag, priv->irqs_db_mem.map, 31854dfc97bSShailend Chand BUS_DMASYNC_POSTREAD); 31954dfc97bSShailend Chand 32054dfc97bSShailend Chand for (i = 0; i < priv->rx_cfg.num_queues; i++) { 32154dfc97bSShailend Chand rx = &priv->rx[i]; 32254dfc97bSShailend Chand com = &rx->com; 32354dfc97bSShailend Chand 32454dfc97bSShailend Chand com->irq_db_offset = 4 * be32toh(priv->irq_db_indices[com->ntfy_id].index); 32554dfc97bSShailend Chand 32654dfc97bSShailend Chand bus_dmamap_sync(com->q_resources_mem.tag, com->q_resources_mem.map, 32754dfc97bSShailend Chand BUS_DMASYNC_POSTREAD); 32854dfc97bSShailend Chand com->db_offset = 4 * be32toh(com->q_resources->db_index); 32954dfc97bSShailend Chand com->counter_idx = be32toh(com->q_resources->counter_index); 33054dfc97bSShailend Chand 33154dfc97bSShailend Chand gve_start_rx_ring(priv, i); 33254dfc97bSShailend Chand } 33354dfc97bSShailend Chand 33454dfc97bSShailend Chand gve_set_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK); 33554dfc97bSShailend Chand return (0); 33654dfc97bSShailend Chand } 33754dfc97bSShailend Chand 33854dfc97bSShailend Chand static void 33954dfc97bSShailend Chand gve_stop_rx_ring(struct gve_priv *priv, int i) 34054dfc97bSShailend Chand { 34154dfc97bSShailend Chand struct gve_rx_ring *rx = &priv->rx[i]; 34254dfc97bSShailend Chand struct gve_ring_com *com = &rx->com; 34354dfc97bSShailend Chand 34454dfc97bSShailend Chand if (com->cleanup_tq != NULL) { 34554dfc97bSShailend Chand taskqueue_quiesce(com->cleanup_tq); 34654dfc97bSShailend Chand taskqueue_free(com->cleanup_tq); 34754dfc97bSShailend Chand com->cleanup_tq = NULL; 34854dfc97bSShailend Chand } 34954dfc97bSShailend Chand 35054dfc97bSShailend Chand tcp_lro_free(&rx->lro); 35154dfc97bSShailend Chand rx->ctx = (struct gve_rx_ctx){}; 35254dfc97bSShailend Chand } 35354dfc97bSShailend Chand 35454dfc97bSShailend Chand int 35554dfc97bSShailend Chand gve_destroy_rx_rings(struct gve_priv *priv) 35654dfc97bSShailend Chand { 35754dfc97bSShailend Chand int err; 35854dfc97bSShailend Chand int i; 35954dfc97bSShailend Chand 36054dfc97bSShailend Chand for (i = 0; i < priv->rx_cfg.num_queues; i++) 36154dfc97bSShailend Chand gve_stop_rx_ring(priv, i); 36254dfc97bSShailend Chand 36354dfc97bSShailend Chand if (gve_get_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK)) { 36454dfc97bSShailend Chand err = gve_adminq_destroy_rx_queues(priv, priv->rx_cfg.num_queues); 36554dfc97bSShailend Chand if (err != 0) 36654dfc97bSShailend Chand return (err); 36754dfc97bSShailend Chand gve_clear_state_flag(priv, GVE_STATE_FLAG_RX_RINGS_OK); 36854dfc97bSShailend Chand } 36954dfc97bSShailend Chand 37054dfc97bSShailend Chand return (0); 37154dfc97bSShailend Chand } 37254dfc97bSShailend Chand 37354dfc97bSShailend Chand int 37454dfc97bSShailend Chand gve_rx_intr(void *arg) 37554dfc97bSShailend Chand { 37654dfc97bSShailend Chand struct gve_rx_ring *rx = arg; 37754dfc97bSShailend Chand struct gve_priv *priv = rx->com.priv; 37854dfc97bSShailend Chand struct gve_ring_com *com = &rx->com; 37954dfc97bSShailend Chand 38054dfc97bSShailend Chand if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 38154dfc97bSShailend Chand return (FILTER_STRAY); 38254dfc97bSShailend Chand 38354dfc97bSShailend Chand gve_db_bar_write_4(priv, com->irq_db_offset, GVE_IRQ_MASK); 38454dfc97bSShailend Chand taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task); 38554dfc97bSShailend Chand return (FILTER_HANDLED); 38654dfc97bSShailend Chand } 38754dfc97bSShailend Chand 38854dfc97bSShailend Chand static inline void 38954dfc97bSShailend Chand gve_set_rss_type(__be16 flag, struct mbuf *mbuf) 39054dfc97bSShailend Chand { 39154dfc97bSShailend Chand if ((flag & GVE_RXF_IPV4) != 0) { 39254dfc97bSShailend Chand if ((flag & GVE_RXF_TCP) != 0) 39354dfc97bSShailend Chand M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV4); 39454dfc97bSShailend Chand else if ((flag & GVE_RXF_UDP) != 0) 39554dfc97bSShailend Chand M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV4); 39654dfc97bSShailend Chand else 39754dfc97bSShailend Chand M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV4); 39854dfc97bSShailend Chand return; 39954dfc97bSShailend Chand } 40054dfc97bSShailend Chand 40154dfc97bSShailend Chand if ((flag & GVE_RXF_IPV6) != 0) { 40254dfc97bSShailend Chand if ((flag & GVE_RXF_TCP) != 0) 40354dfc97bSShailend Chand M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_TCP_IPV6); 40454dfc97bSShailend Chand else if ((flag & GVE_RXF_UDP) != 0) 40554dfc97bSShailend Chand M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_UDP_IPV6); 40654dfc97bSShailend Chand else 40754dfc97bSShailend Chand M_HASHTYPE_SET(mbuf, M_HASHTYPE_RSS_IPV6); 40854dfc97bSShailend Chand return; 40954dfc97bSShailend Chand } 41054dfc97bSShailend Chand } 41154dfc97bSShailend Chand 41254dfc97bSShailend Chand static void 41354dfc97bSShailend Chand gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, __be64 *slot_addr) 41454dfc97bSShailend Chand { 41554dfc97bSShailend Chand const __be64 offset = htobe64(GVE_DEFAULT_RX_BUFFER_OFFSET); 41654dfc97bSShailend Chand page_info->page_offset ^= GVE_DEFAULT_RX_BUFFER_OFFSET; 41754dfc97bSShailend Chand *(slot_addr) ^= offset; 41854dfc97bSShailend Chand } 41954dfc97bSShailend Chand 42054dfc97bSShailend Chand static struct mbuf * 42154dfc97bSShailend Chand gve_rx_create_mbuf(struct gve_priv *priv, struct gve_rx_ring *rx, 42254dfc97bSShailend Chand struct gve_rx_slot_page_info *page_info, uint16_t len, 42354dfc97bSShailend Chand union gve_rx_data_slot *data_slot, bool is_only_frag) 42454dfc97bSShailend Chand { 42554dfc97bSShailend Chand struct gve_rx_ctx *ctx = &rx->ctx; 42654dfc97bSShailend Chand struct mbuf *mbuf; 42754dfc97bSShailend Chand u_int ref_count; 42854dfc97bSShailend Chand bool can_flip; 42954dfc97bSShailend Chand 43054dfc97bSShailend Chand uint32_t offset = page_info->page_offset + page_info->pad; 43154dfc97bSShailend Chand void *va = (char *)page_info->page_address + offset; 43254dfc97bSShailend Chand 43354dfc97bSShailend Chand if (len <= priv->rx_copybreak && is_only_frag) { 43454dfc97bSShailend Chand mbuf = m_get2(len, M_NOWAIT, MT_DATA, M_PKTHDR); 43554dfc97bSShailend Chand if (__predict_false(mbuf == NULL)) 43654dfc97bSShailend Chand return (NULL); 43754dfc97bSShailend Chand 43854dfc97bSShailend Chand m_copyback(mbuf, 0, len, va); 43954dfc97bSShailend Chand counter_enter(); 44054dfc97bSShailend Chand counter_u64_add_protected(rx->stats.rx_copybreak_cnt, 1); 44154dfc97bSShailend Chand counter_exit(); 44254dfc97bSShailend Chand ctx->mbuf_head = mbuf; 44354dfc97bSShailend Chand ctx->mbuf_tail = mbuf; 44454dfc97bSShailend Chand } else { 44554dfc97bSShailend Chand struct mbuf *mbuf_tail = ctx->mbuf_tail; 44654dfc97bSShailend Chand KASSERT(len <= MCLBYTES, ("gve rx fragment bigger than cluster mbuf")); 44754dfc97bSShailend Chand 44854dfc97bSShailend Chand /* 44954dfc97bSShailend Chand * This page was created with VM_ALLOC_WIRED, thus the lowest 45054dfc97bSShailend Chand * wire count experienced by the page until the interface is 45154dfc97bSShailend Chand * destroyed is 1. 45254dfc97bSShailend Chand * 45354dfc97bSShailend Chand * We wire the page again before supplying an mbuf pointing to 45454dfc97bSShailend Chand * it to the networking stack, so before the mbuf leaves the 45554dfc97bSShailend Chand * driver, the wire count rises to 2. 45654dfc97bSShailend Chand * 45754dfc97bSShailend Chand * If it is 1 again, it necessarily means that the mbuf has been 45854dfc97bSShailend Chand * consumed and it was gve_mextadd_free that brought down the wire 45954dfc97bSShailend Chand * count back to 1. We only need to eventually observe the 1. 46054dfc97bSShailend Chand */ 46154dfc97bSShailend Chand ref_count = atomic_load_int(&page_info->page->ref_count); 46254dfc97bSShailend Chand can_flip = VPRC_WIRE_COUNT(ref_count) == 1; 46354dfc97bSShailend Chand 46454dfc97bSShailend Chand if (mbuf_tail == NULL) { 46554dfc97bSShailend Chand if (can_flip) 46654dfc97bSShailend Chand mbuf = m_gethdr(M_NOWAIT, MT_DATA); 46754dfc97bSShailend Chand else 46854dfc97bSShailend Chand mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 46954dfc97bSShailend Chand 47054dfc97bSShailend Chand ctx->mbuf_head = mbuf; 47154dfc97bSShailend Chand ctx->mbuf_tail = mbuf; 47254dfc97bSShailend Chand } else { 47354dfc97bSShailend Chand if (can_flip) 47454dfc97bSShailend Chand mbuf = m_get(M_NOWAIT, MT_DATA); 47554dfc97bSShailend Chand else 47654dfc97bSShailend Chand mbuf = m_getcl(M_NOWAIT, MT_DATA, 0); 47754dfc97bSShailend Chand 47854dfc97bSShailend Chand mbuf_tail->m_next = mbuf; 47954dfc97bSShailend Chand ctx->mbuf_tail = mbuf; 48054dfc97bSShailend Chand } 48154dfc97bSShailend Chand 48254dfc97bSShailend Chand if (__predict_false(mbuf == NULL)) 48354dfc97bSShailend Chand return (NULL); 48454dfc97bSShailend Chand 48554dfc97bSShailend Chand if (can_flip) { 48654dfc97bSShailend Chand MEXTADD(mbuf, va, len, gve_mextadd_free, 48754dfc97bSShailend Chand page_info->page, page_info->page_address, 48854dfc97bSShailend Chand 0, EXT_NET_DRV); 48954dfc97bSShailend Chand 49054dfc97bSShailend Chand counter_enter(); 49154dfc97bSShailend Chand counter_u64_add_protected(rx->stats.rx_frag_flip_cnt, 1); 49254dfc97bSShailend Chand counter_exit(); 49354dfc97bSShailend Chand 49454dfc97bSShailend Chand /* 49554dfc97bSShailend Chand * Grab an extra ref to the page so that gve_mextadd_free 49654dfc97bSShailend Chand * does not end up freeing the page while the interface exists. 49754dfc97bSShailend Chand */ 49854dfc97bSShailend Chand vm_page_wire(page_info->page); 49954dfc97bSShailend Chand 50054dfc97bSShailend Chand gve_rx_flip_buff(page_info, &data_slot->qpl_offset); 50154dfc97bSShailend Chand } else { 50254dfc97bSShailend Chand m_copyback(mbuf, 0, len, va); 50354dfc97bSShailend Chand counter_enter(); 50454dfc97bSShailend Chand counter_u64_add_protected(rx->stats.rx_frag_copy_cnt, 1); 50554dfc97bSShailend Chand counter_exit(); 50654dfc97bSShailend Chand } 50754dfc97bSShailend Chand } 50854dfc97bSShailend Chand 50954dfc97bSShailend Chand mbuf->m_len = len; 51054dfc97bSShailend Chand ctx->total_size += len; 51154dfc97bSShailend Chand 51254dfc97bSShailend Chand return (mbuf); 51354dfc97bSShailend Chand } 51454dfc97bSShailend Chand 51554dfc97bSShailend Chand static inline bool 51654dfc97bSShailend Chand gve_needs_rss(__be16 flag) 51754dfc97bSShailend Chand { 51854dfc97bSShailend Chand if ((flag & GVE_RXF_FRAG) != 0) 51954dfc97bSShailend Chand return (false); 52054dfc97bSShailend Chand if ((flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) != 0) 52154dfc97bSShailend Chand return (true); 52254dfc97bSShailend Chand return (false); 52354dfc97bSShailend Chand } 52454dfc97bSShailend Chand 52554dfc97bSShailend Chand static void 52654dfc97bSShailend Chand gve_rx(struct gve_priv *priv, struct gve_rx_ring *rx, struct gve_rx_desc *desc, 52754dfc97bSShailend Chand uint32_t idx) 52854dfc97bSShailend Chand { 52954dfc97bSShailend Chand struct gve_rx_slot_page_info *page_info; 53054dfc97bSShailend Chand struct gve_dma_handle *page_dma_handle; 53154dfc97bSShailend Chand union gve_rx_data_slot *data_slot; 53254dfc97bSShailend Chand struct gve_rx_ctx *ctx = &rx->ctx; 53354dfc97bSShailend Chand struct mbuf *mbuf = NULL; 53454dfc97bSShailend Chand if_t ifp = priv->ifp; 53554dfc97bSShailend Chand bool do_if_input; 53654dfc97bSShailend Chand uint16_t len; 53754dfc97bSShailend Chand 53854dfc97bSShailend Chand bool is_first_frag = ctx->frag_cnt == 0; 53954dfc97bSShailend Chand bool is_last_frag = !(GVE_RXF_PKT_CONT & desc->flags_seq); 54054dfc97bSShailend Chand bool is_only_frag = is_first_frag && is_last_frag; 54154dfc97bSShailend Chand 54254dfc97bSShailend Chand if (__predict_false(ctx->drop_pkt)) 54354dfc97bSShailend Chand goto finish_frag; 54454dfc97bSShailend Chand 54554dfc97bSShailend Chand if ((desc->flags_seq & GVE_RXF_ERR) != 0) { 54654dfc97bSShailend Chand ctx->drop_pkt = true; 54754dfc97bSShailend Chand counter_enter(); 54854dfc97bSShailend Chand counter_u64_add_protected(rx->stats.rx_dropped_pkt_desc_err, 1); 54954dfc97bSShailend Chand counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 55054dfc97bSShailend Chand counter_exit(); 55154dfc97bSShailend Chand m_freem(ctx->mbuf_head); 55254dfc97bSShailend Chand goto finish_frag; 55354dfc97bSShailend Chand } 55454dfc97bSShailend Chand 55554dfc97bSShailend Chand page_info = &rx->page_info[idx]; 55654dfc97bSShailend Chand data_slot = &rx->data_ring[idx]; 55754dfc97bSShailend Chand page_dma_handle = &(rx->com.qpl->dmas[idx]); 55854dfc97bSShailend Chand 55954dfc97bSShailend Chand page_info->pad = is_first_frag ? GVE_RX_PAD : 0; 56054dfc97bSShailend Chand len = be16toh(desc->len) - page_info->pad; 56154dfc97bSShailend Chand 56254dfc97bSShailend Chand bus_dmamap_sync(page_dma_handle->tag, page_dma_handle->map, 56354dfc97bSShailend Chand BUS_DMASYNC_POSTREAD); 56454dfc97bSShailend Chand 56554dfc97bSShailend Chand mbuf = gve_rx_create_mbuf(priv, rx, page_info, len, data_slot, 56654dfc97bSShailend Chand is_only_frag); 56754dfc97bSShailend Chand if (mbuf == NULL) { 56854dfc97bSShailend Chand ctx->drop_pkt = true; 56954dfc97bSShailend Chand counter_enter(); 57054dfc97bSShailend Chand counter_u64_add_protected(rx->stats.rx_dropped_pkt_mbuf_alloc_fail, 1); 57154dfc97bSShailend Chand counter_u64_add_protected(rx->stats.rx_dropped_pkt, 1); 57254dfc97bSShailend Chand counter_exit(); 57354dfc97bSShailend Chand m_freem(ctx->mbuf_head); 57454dfc97bSShailend Chand goto finish_frag; 57554dfc97bSShailend Chand } 57654dfc97bSShailend Chand 57754dfc97bSShailend Chand if (is_first_frag) { 57854dfc97bSShailend Chand mbuf->m_pkthdr.rcvif = priv->ifp; 5795f62584aSShailend Chand ctx->is_tcp = desc->flags_seq & GVE_RXF_TCP; 58054dfc97bSShailend Chand 58154dfc97bSShailend Chand if (gve_needs_rss(desc->flags_seq)) { 58254dfc97bSShailend Chand gve_set_rss_type(desc->flags_seq, mbuf); 58354dfc97bSShailend Chand mbuf->m_pkthdr.flowid = be32toh(desc->rss_hash); 58454dfc97bSShailend Chand } 58554dfc97bSShailend Chand 58654dfc97bSShailend Chand if ((desc->csum != 0) && ((desc->flags_seq & GVE_RXF_FRAG) == 0)) { 58754dfc97bSShailend Chand mbuf->m_pkthdr.csum_flags = CSUM_IP_CHECKED | 58854dfc97bSShailend Chand CSUM_IP_VALID | 58954dfc97bSShailend Chand CSUM_DATA_VALID | 59054dfc97bSShailend Chand CSUM_PSEUDO_HDR; 59154dfc97bSShailend Chand mbuf->m_pkthdr.csum_data = 0xffff; 59254dfc97bSShailend Chand } 59354dfc97bSShailend Chand } 59454dfc97bSShailend Chand 59554dfc97bSShailend Chand if (is_last_frag) { 59654dfc97bSShailend Chand mbuf = ctx->mbuf_head; 59754dfc97bSShailend Chand mbuf->m_pkthdr.len = ctx->total_size; 59854dfc97bSShailend Chand do_if_input = true; 59954dfc97bSShailend Chand 60054dfc97bSShailend Chand if (((if_getcapenable(priv->ifp) & IFCAP_LRO) != 0) && /* LRO is enabled */ 6015f62584aSShailend Chand (ctx->is_tcp) && /* pkt is a TCP pkt */ 60254dfc97bSShailend Chand ((mbuf->m_pkthdr.csum_flags & CSUM_DATA_VALID) != 0) && /* NIC verified csum */ 60354dfc97bSShailend Chand (rx->lro.lro_cnt != 0) && /* LRO resources exist */ 60454dfc97bSShailend Chand (tcp_lro_rx(&rx->lro, mbuf, 0) == 0)) 60554dfc97bSShailend Chand do_if_input = false; 60654dfc97bSShailend Chand 60754dfc97bSShailend Chand if (do_if_input) 60854dfc97bSShailend Chand if_input(ifp, mbuf); 60954dfc97bSShailend Chand 61054dfc97bSShailend Chand counter_enter(); 61154dfc97bSShailend Chand counter_u64_add_protected(rx->stats.rbytes, ctx->total_size); 61254dfc97bSShailend Chand counter_u64_add_protected(rx->stats.rpackets, 1); 61354dfc97bSShailend Chand counter_exit(); 61454dfc97bSShailend Chand } 61554dfc97bSShailend Chand 61654dfc97bSShailend Chand finish_frag: 61754dfc97bSShailend Chand ctx->frag_cnt++; 61854dfc97bSShailend Chand if (is_last_frag) 61954dfc97bSShailend Chand rx->ctx = (struct gve_rx_ctx){}; 62054dfc97bSShailend Chand } 62154dfc97bSShailend Chand 62254dfc97bSShailend Chand static bool 62354dfc97bSShailend Chand gve_rx_work_pending(struct gve_rx_ring *rx) 62454dfc97bSShailend Chand { 62554dfc97bSShailend Chand struct gve_rx_desc *desc; 62654dfc97bSShailend Chand __be16 flags_seq; 62754dfc97bSShailend Chand uint32_t next_idx; 62854dfc97bSShailend Chand 62954dfc97bSShailend Chand next_idx = rx->cnt & rx->mask; 63054dfc97bSShailend Chand desc = rx->desc_ring + next_idx; 63154dfc97bSShailend Chand 63254dfc97bSShailend Chand flags_seq = desc->flags_seq; 63354dfc97bSShailend Chand 63454dfc97bSShailend Chand return (GVE_SEQNO(flags_seq) == rx->seq_no); 63554dfc97bSShailend Chand } 63654dfc97bSShailend Chand 63754dfc97bSShailend Chand static inline uint8_t 63854dfc97bSShailend Chand gve_next_seqno(uint8_t seq) 63954dfc97bSShailend Chand { 64054dfc97bSShailend Chand return ((seq + 1) == 8 ? 1 : seq + 1); 64154dfc97bSShailend Chand } 64254dfc97bSShailend Chand 64354dfc97bSShailend Chand static void 64454dfc97bSShailend Chand gve_rx_cleanup(struct gve_priv *priv, struct gve_rx_ring *rx, int budget) 64554dfc97bSShailend Chand { 64654dfc97bSShailend Chand uint32_t idx = rx->cnt & rx->mask; 64754dfc97bSShailend Chand struct gve_rx_desc *desc; 64854dfc97bSShailend Chand struct gve_rx_ctx *ctx = &rx->ctx; 64954dfc97bSShailend Chand uint32_t work_done = 0; 65054dfc97bSShailend Chand 65154dfc97bSShailend Chand NET_EPOCH_ASSERT(); 65254dfc97bSShailend Chand 65354dfc97bSShailend Chand bus_dmamap_sync(rx->desc_ring_mem.tag, rx->desc_ring_mem.map, 65454dfc97bSShailend Chand BUS_DMASYNC_POSTREAD); 65554dfc97bSShailend Chand desc = &rx->desc_ring[idx]; 65654dfc97bSShailend Chand 65754dfc97bSShailend Chand while ((work_done < budget || ctx->frag_cnt) && 65854dfc97bSShailend Chand (GVE_SEQNO(desc->flags_seq) == rx->seq_no)) { 65954dfc97bSShailend Chand 66054dfc97bSShailend Chand gve_rx(priv, rx, desc, idx); 66154dfc97bSShailend Chand 66254dfc97bSShailend Chand rx->cnt++; 66354dfc97bSShailend Chand idx = rx->cnt & rx->mask; 66454dfc97bSShailend Chand desc = &rx->desc_ring[idx]; 66554dfc97bSShailend Chand rx->seq_no = gve_next_seqno(rx->seq_no); 66654dfc97bSShailend Chand work_done++; 66754dfc97bSShailend Chand } 66854dfc97bSShailend Chand 66954dfc97bSShailend Chand /* The device will only send whole packets. */ 67054dfc97bSShailend Chand if (__predict_false(ctx->frag_cnt)) { 67154dfc97bSShailend Chand m_freem(ctx->mbuf_head); 67254dfc97bSShailend Chand rx->ctx = (struct gve_rx_ctx){}; 67354dfc97bSShailend Chand device_printf(priv->dev, 67454dfc97bSShailend Chand "Unexpected seq number %d with incomplete packet, expected %d, scheduling reset", 67554dfc97bSShailend Chand GVE_SEQNO(desc->flags_seq), rx->seq_no); 67654dfc97bSShailend Chand gve_schedule_reset(priv); 67754dfc97bSShailend Chand } 67854dfc97bSShailend Chand 67954dfc97bSShailend Chand if (work_done != 0) 68054dfc97bSShailend Chand tcp_lro_flush_all(&rx->lro); 68154dfc97bSShailend Chand 68254dfc97bSShailend Chand bus_dmamap_sync(rx->data_ring_mem.tag, rx->data_ring_mem.map, 68354dfc97bSShailend Chand BUS_DMASYNC_PREWRITE); 68454dfc97bSShailend Chand 68554dfc97bSShailend Chand /* Buffers are refilled as the descs are processed */ 68654dfc97bSShailend Chand rx->fill_cnt += work_done; 68754dfc97bSShailend Chand gve_db_bar_write_4(priv, rx->com.db_offset, rx->fill_cnt); 68854dfc97bSShailend Chand } 68954dfc97bSShailend Chand 69054dfc97bSShailend Chand void 69154dfc97bSShailend Chand gve_rx_cleanup_tq(void *arg, int pending) 69254dfc97bSShailend Chand { 69354dfc97bSShailend Chand struct gve_rx_ring *rx = arg; 69454dfc97bSShailend Chand struct gve_priv *priv = rx->com.priv; 69554dfc97bSShailend Chand 69654dfc97bSShailend Chand if (__predict_false((if_getdrvflags(priv->ifp) & IFF_DRV_RUNNING) == 0)) 69754dfc97bSShailend Chand return; 69854dfc97bSShailend Chand 69954dfc97bSShailend Chand gve_rx_cleanup(priv, rx, /*budget=*/128); 70054dfc97bSShailend Chand 70154dfc97bSShailend Chand gve_db_bar_write_4(priv, rx->com.irq_db_offset, 70254dfc97bSShailend Chand GVE_IRQ_ACK | GVE_IRQ_EVENT); 70354dfc97bSShailend Chand 70454dfc97bSShailend Chand /* 70554dfc97bSShailend Chand * Fragments received before this barrier MAY NOT cause the NIC to send an 70654dfc97bSShailend Chand * interrupt but they will still be handled by the enqueue below. 70754dfc97bSShailend Chand * Fragments received after the barrier WILL trigger an interrupt. 70854dfc97bSShailend Chand */ 70954dfc97bSShailend Chand mb(); 71054dfc97bSShailend Chand 71154dfc97bSShailend Chand if (gve_rx_work_pending(rx)) { 71254dfc97bSShailend Chand gve_db_bar_write_4(priv, rx->com.irq_db_offset, GVE_IRQ_MASK); 71354dfc97bSShailend Chand taskqueue_enqueue(rx->com.cleanup_tq, &rx->com.cleanup_task); 71454dfc97bSShailend Chand } 71554dfc97bSShailend Chand } 716