1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2020-2021 HiSilicon Limited. 3 */ 4 5 #include <rte_io.h> 6 #include <ethdev_driver.h> 7 8 #include "hns3_ethdev.h" 9 #include "hns3_rxtx.h" 10 #include "hns3_rxtx_vec.h" 11 12 #if defined RTE_ARCH_ARM64 13 #include "hns3_rxtx_vec_neon.h" 14 #endif 15 16 int 17 hns3_tx_check_vec_support(struct rte_eth_dev *dev) 18 { 19 struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode; 20 21 struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); 22 if (hns3_dev_get_support(hw, PTP)) 23 return -ENOTSUP; 24 25 /* Only support DEV_TX_OFFLOAD_MBUF_FAST_FREE */ 26 if (txmode->offloads != DEV_TX_OFFLOAD_MBUF_FAST_FREE) 27 return -ENOTSUP; 28 29 return 0; 30 } 31 32 uint16_t 33 hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) 34 { 35 struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue; 36 uint16_t nb_tx = 0; 37 38 while (nb_pkts) { 39 uint16_t ret, new_burst; 40 41 new_burst = RTE_MIN(nb_pkts, txq->tx_rs_thresh); 42 ret = hns3_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx], 43 new_burst); 44 nb_tx += ret; 45 nb_pkts -= ret; 46 if (ret < new_burst) 47 break; 48 } 49 50 return nb_tx; 51 } 52 53 static inline void 54 hns3_rxq_rearm_mbuf(struct hns3_rx_queue *rxq) 55 { 56 #define REARM_LOOP_STEP_NUM 4 57 struct hns3_entry *rxep = &rxq->sw_ring[rxq->rx_rearm_start]; 58 struct hns3_desc *rxdp = rxq->rx_ring + rxq->rx_rearm_start; 59 uint64_t dma_addr; 60 int i; 61 62 if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep, 63 HNS3_DEFAULT_RXQ_REARM_THRESH) < 0)) { 64 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++; 65 return; 66 } 67 68 for (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM, 69 rxep += REARM_LOOP_STEP_NUM, rxdp += REARM_LOOP_STEP_NUM) { 70 if (likely(i < 71 HNS3_DEFAULT_RXQ_REARM_THRESH - REARM_LOOP_STEP_NUM)) { 72 rte_prefetch_non_temporal(rxep[4].mbuf); 73 rte_prefetch_non_temporal(rxep[5].mbuf); 74 rte_prefetch_non_temporal(rxep[6].mbuf); 75 rte_prefetch_non_temporal(rxep[7].mbuf); 76 } 77 78 dma_addr = rte_mbuf_data_iova_default(rxep[0].mbuf); 79 rxdp[0].addr = rte_cpu_to_le_64(dma_addr); 80 rxdp[0].rx.bd_base_info = 0; 81 82 dma_addr = rte_mbuf_data_iova_default(rxep[1].mbuf); 83 rxdp[1].addr = rte_cpu_to_le_64(dma_addr); 84 rxdp[1].rx.bd_base_info = 0; 85 86 dma_addr = rte_mbuf_data_iova_default(rxep[2].mbuf); 87 rxdp[2].addr = rte_cpu_to_le_64(dma_addr); 88 rxdp[2].rx.bd_base_info = 0; 89 90 dma_addr = rte_mbuf_data_iova_default(rxep[3].mbuf); 91 rxdp[3].addr = rte_cpu_to_le_64(dma_addr); 92 rxdp[3].rx.bd_base_info = 0; 93 } 94 95 rxq->rx_rearm_start += HNS3_DEFAULT_RXQ_REARM_THRESH; 96 if (rxq->rx_rearm_start >= rxq->nb_rx_desc) 97 rxq->rx_rearm_start = 0; 98 99 rxq->rx_rearm_nb -= HNS3_DEFAULT_RXQ_REARM_THRESH; 100 101 hns3_write_reg_opt(rxq->io_head_reg, HNS3_DEFAULT_RXQ_REARM_THRESH); 102 } 103 104 uint16_t 105 hns3_recv_pkts_vec(void *__restrict rx_queue, 106 struct rte_mbuf **__restrict rx_pkts, 107 uint16_t nb_pkts) 108 { 109 struct hns3_rx_queue *rxq = rx_queue; 110 struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use]; 111 uint64_t pkt_err_mask; /* bit mask indicate whick pkts is error */ 112 uint16_t nb_rx; 113 114 rte_prefetch_non_temporal(rxdp); 115 116 nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP); 117 118 if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH) 119 hns3_rxq_rearm_mbuf(rxq); 120 121 if (unlikely(!(rxdp->rx.bd_base_info & 122 rte_cpu_to_le_32(1u << HNS3_RXD_VLD_B)))) 123 return 0; 124 125 rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 0].mbuf); 126 rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 1].mbuf); 127 rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf); 128 rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf); 129 130 if (likely(nb_pkts <= HNS3_DEFAULT_RX_BURST)) { 131 pkt_err_mask = 0; 132 nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, 133 &pkt_err_mask); 134 nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, pkt_err_mask); 135 return nb_rx; 136 } 137 138 nb_rx = 0; 139 while (nb_pkts > 0) { 140 uint16_t ret, n; 141 142 n = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST); 143 pkt_err_mask = 0; 144 ret = hns3_recv_burst_vec(rxq, &rx_pkts[nb_rx], n, 145 &pkt_err_mask); 146 nb_pkts -= ret; 147 nb_rx += hns3_rx_reassemble_pkts(&rx_pkts[nb_rx], ret, 148 pkt_err_mask); 149 if (ret < n) 150 break; 151 152 if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH) 153 hns3_rxq_rearm_mbuf(rxq); 154 } 155 156 return nb_rx; 157 } 158 159 static void 160 hns3_rxq_vec_setup_rearm_data(struct hns3_rx_queue *rxq) 161 { 162 uintptr_t p; 163 struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ 164 165 mb_def.nb_segs = 1; 166 mb_def.data_off = RTE_PKTMBUF_HEADROOM; 167 mb_def.port = rxq->port_id; 168 rte_mbuf_refcnt_set(&mb_def, 1); 169 170 /* compile-time verifies the rearm_data first 8bytes */ 171 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) < 172 offsetof(struct rte_mbuf, rearm_data)); 173 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, refcnt) < 174 offsetof(struct rte_mbuf, rearm_data)); 175 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, nb_segs) < 176 offsetof(struct rte_mbuf, rearm_data)); 177 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, port) < 178 offsetof(struct rte_mbuf, rearm_data)); 179 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) - 180 offsetof(struct rte_mbuf, rearm_data) > 6); 181 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, refcnt) - 182 offsetof(struct rte_mbuf, rearm_data) > 6); 183 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, nb_segs) - 184 offsetof(struct rte_mbuf, rearm_data) > 6); 185 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, port) - 186 offsetof(struct rte_mbuf, rearm_data) > 6); 187 188 /* prevent compiler reordering: rearm_data covers previous fields */ 189 rte_compiler_barrier(); 190 p = (uintptr_t)&mb_def.rearm_data; 191 rxq->mbuf_initializer = *(uint64_t *)p; 192 } 193 194 void 195 hns3_rxq_vec_setup(struct hns3_rx_queue *rxq) 196 { 197 struct hns3_entry *sw_ring = &rxq->sw_ring[rxq->nb_rx_desc]; 198 unsigned int i; 199 200 memset(&rxq->rx_ring[rxq->nb_rx_desc], 0, 201 sizeof(struct hns3_desc) * HNS3_DEFAULT_RX_BURST); 202 203 memset(&rxq->fake_mbuf, 0, sizeof(rxq->fake_mbuf)); 204 for (i = 0; i < HNS3_DEFAULT_RX_BURST; i++) 205 sw_ring[i].mbuf = &rxq->fake_mbuf; 206 207 hns3_rxq_vec_setup_rearm_data(rxq); 208 209 memset(rxq->offset_table, 0, sizeof(rxq->offset_table)); 210 } 211 212 static int 213 hns3_rxq_vec_check(struct hns3_rx_queue *rxq, void *arg) 214 { 215 uint32_t min_vec_bds = HNS3_DEFAULT_RXQ_REARM_THRESH + 216 HNS3_DEFAULT_RX_BURST; 217 218 if (rxq->nb_rx_desc < min_vec_bds) 219 return -ENOTSUP; 220 221 if (rxq->nb_rx_desc % HNS3_DEFAULT_RXQ_REARM_THRESH) 222 return -ENOTSUP; 223 224 RTE_SET_USED(arg); 225 return 0; 226 } 227 228 int 229 hns3_rx_check_vec_support(struct rte_eth_dev *dev) 230 { 231 struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf; 232 struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; 233 uint64_t offloads_mask = DEV_RX_OFFLOAD_TCP_LRO | 234 DEV_RX_OFFLOAD_VLAN; 235 236 struct hns3_hw *hw = HNS3_DEV_PRIVATE_TO_HW(dev->data->dev_private); 237 if (hns3_dev_get_support(hw, PTP)) 238 return -ENOTSUP; 239 240 if (dev->data->scattered_rx) 241 return -ENOTSUP; 242 243 if (fconf->mode != RTE_FDIR_MODE_NONE) 244 return -ENOTSUP; 245 246 if (rxmode->offloads & offloads_mask) 247 return -ENOTSUP; 248 249 if (hns3_rxq_iterate(dev, hns3_rxq_vec_check, NULL) != 0) 250 return -ENOTSUP; 251 252 return 0; 253 } 254