1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2015-2024 Beijing WangXun Technology Co., Ltd. 3 * Copyright(c) 2010-2015 Intel Corporation 4 */ 5 6 #include <ethdev_driver.h> 7 #include <rte_malloc.h> 8 #include <rte_vect.h> 9 10 #include "ngbe_type.h" 11 #include "ngbe_ethdev.h" 12 #include "ngbe_rxtx.h" 13 #include "ngbe_rxtx_vec_common.h" 14 15 static inline void 16 ngbe_rxq_rearm(struct ngbe_rx_queue *rxq) 17 { 18 int i; 19 uint16_t rx_id; 20 volatile struct ngbe_rx_desc *rxdp; 21 struct ngbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; 22 struct rte_mbuf *mb0, *mb1; 23 uint64x2_t dma_addr0, dma_addr1; 24 uint64x2_t zero = vdupq_n_u64(0); 25 uint64_t paddr; 26 uint8x8_t p; 27 28 rxdp = rxq->rx_ring + rxq->rxrearm_start; 29 30 /* Pull 'n' more MBUFs into the software ring */ 31 if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, 32 (void *)rxep, 33 RTE_NGBE_RXQ_REARM_THRESH) < 0)) { 34 if (rxq->rxrearm_nb + RTE_NGBE_RXQ_REARM_THRESH >= 35 rxq->nb_rx_desc) { 36 for (i = 0; i < RTE_NGBE_DESCS_PER_LOOP; i++) { 37 rxep[i].mbuf = &rxq->fake_mbuf; 38 vst1q_u64(RTE_CAST_PTR(uint64_t *, &rxdp[i]), zero); 39 } 40 } 41 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += 42 RTE_NGBE_RXQ_REARM_THRESH; 43 return; 44 } 45 46 p = vld1_u8((uint8_t *)&rxq->mbuf_initializer); 47 48 /* Initialize the mbufs in vector, process 2 mbufs in one loop */ 49 for (i = 0; i < RTE_NGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) { 50 mb0 = rxep[0].mbuf; 51 mb1 = rxep[1].mbuf; 52 53 /* 54 * Flush mbuf with pkt template. 55 * Data to be rearmed is 6 bytes long. 56 */ 57 vst1_u8((uint8_t *)&mb0->rearm_data, p); 58 paddr = mb0->buf_iova + RTE_PKTMBUF_HEADROOM; 59 dma_addr0 = vsetq_lane_u64(paddr, zero, 0); 60 /* flush desc with pa dma_addr */ 61 vst1q_u64(RTE_CAST_PTR(uint64_t *, rxdp++), dma_addr0); 62 63 vst1_u8((uint8_t *)&mb1->rearm_data, p); 64 paddr = mb1->buf_iova + RTE_PKTMBUF_HEADROOM; 65 dma_addr1 = vsetq_lane_u64(paddr, zero, 0); 66 vst1q_u64(RTE_CAST_PTR(uint64_t *, rxdp++), dma_addr1); 67 } 68 69 rxq->rxrearm_start += RTE_NGBE_RXQ_REARM_THRESH; 70 if (rxq->rxrearm_start >= rxq->nb_rx_desc) 71 rxq->rxrearm_start = 0; 72 73 rxq->rxrearm_nb -= RTE_NGBE_RXQ_REARM_THRESH; 74 75 rx_id = (uint16_t)((rxq->rxrearm_start == 0) ? 76 (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); 77 78 /* Update the tail pointer on the NIC */ 79 ngbe_set32(rxq->rdt_reg_addr, rx_id); 80 } 81 82 static inline void 83 desc_to_olflags_v(uint8x16x2_t sterr_tmp1, uint8x16x2_t sterr_tmp2, 84 uint8x16_t staterr, uint8_t vlan_flags, 85 struct rte_mbuf **rx_pkts) 86 { 87 uint8x16_t ptype; 88 uint8x16_t vtag_lo, vtag_hi, vtag; 89 uint8x16_t temp_csum, temp_vp; 90 uint8x16_t vtag_mask = vdupq_n_u8(0x0F); 91 uint32x4_t csum = {0, 0, 0, 0}; 92 93 union { 94 uint16_t e[4]; 95 uint64_t word; 96 } vol; 97 98 const uint8x16_t rsstype_msk = { 99 0x0F, 0x0F, 0x0F, 0x0F, 100 0x00, 0x00, 0x00, 0x00, 101 0x00, 0x00, 0x00, 0x00, 102 0x00, 0x00, 0x00, 0x00}; 103 104 const uint8x16_t rss_flags = { 105 0, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, 106 0, RTE_MBUF_F_RX_RSS_HASH, 0, RTE_MBUF_F_RX_RSS_HASH, 107 RTE_MBUF_F_RX_RSS_HASH, 0, 0, 0, 108 0, 0, 0, RTE_MBUF_F_RX_FDIR}; 109 110 /* mask everything except vlan present and l4/ip csum error */ 111 const uint8x16_t vlan_csum_msk = { 112 NGBE_RXD_STAT_VLAN, NGBE_RXD_STAT_VLAN, 113 NGBE_RXD_STAT_VLAN, NGBE_RXD_STAT_VLAN, 114 0, 0, 0, 0, 115 0, 0, 0, 0, 116 (NGBE_RXD_ERR_L4CS | NGBE_RXD_ERR_IPCS) >> 24, 117 (NGBE_RXD_ERR_L4CS | NGBE_RXD_ERR_IPCS) >> 24, 118 (NGBE_RXD_ERR_L4CS | NGBE_RXD_ERR_IPCS) >> 24, 119 (NGBE_RXD_ERR_L4CS | NGBE_RXD_ERR_IPCS) >> 24}; 120 121 /* map vlan present and l4/ip csum error to ol_flags */ 122 const uint8x16_t vlan_csum_map_lo = { 123 RTE_MBUF_F_RX_IP_CKSUM_GOOD, 124 RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD, 125 RTE_MBUF_F_RX_IP_CKSUM_BAD, 126 RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD, 127 0, 0, 0, 0, 128 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD, 129 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD, 130 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD, 131 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD, 132 0, 0, 0, 0}; 133 134 const uint8x16_t vlan_csum_map_hi = { 135 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, 136 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, 137 0, 0, 0, 0, 138 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, 139 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, 140 0, 0, 0, 0}; 141 142 ptype = vzipq_u8(sterr_tmp1.val[0], sterr_tmp2.val[0]).val[0]; 143 ptype = vandq_u8(ptype, rsstype_msk); 144 ptype = vqtbl1q_u8(rss_flags, ptype); 145 146 /* extract vlan_flags and csum_error from staterr */ 147 vtag = vandq_u8(staterr, vlan_csum_msk); 148 149 /* csum bits are in the most significant, to use shuffle we need to 150 * shift them. Change mask from 0xc0 to 0x03. 151 */ 152 temp_csum = vshrq_n_u8(vtag, 6); 153 154 /* Change vlan present mask from 0x20 to 0x08. 155 */ 156 temp_vp = vshrq_n_u8(vtag, 2); 157 158 /* 'OR' the most significant 32 bits containing the checksum flags with 159 * the vlan present flags. Then bits layout of each lane(8bits) will be 160 * 'xxxx,VLAN,x,ERR_IPCS,ERR_L4CS' 161 */ 162 csum = vsetq_lane_u32(vgetq_lane_u32(vreinterpretq_u32_u8(temp_csum), 3), csum, 0); 163 vtag = vorrq_u8(vreinterpretq_u8_u32(csum), vtag); 164 vtag = vorrq_u8(vtag, temp_vp); 165 vtag = vandq_u8(vtag, vtag_mask); 166 167 /* convert L4 checksum correct type to vtag_hi */ 168 vtag_hi = vqtbl1q_u8(vlan_csum_map_hi, vtag); 169 vtag_hi = vshrq_n_u8(vtag_hi, 7); 170 171 /* convert VP, IPE, L4E to vtag_lo */ 172 vtag_lo = vqtbl1q_u8(vlan_csum_map_lo, vtag); 173 vtag_lo = vorrq_u8(ptype, vtag_lo); 174 175 vtag = vzipq_u8(vtag_lo, vtag_hi).val[0]; 176 vol.word = vgetq_lane_u64(vreinterpretq_u64_u8(vtag), 0); 177 178 rx_pkts[0]->ol_flags = vol.e[0]; 179 rx_pkts[1]->ol_flags = vol.e[1]; 180 rx_pkts[2]->ol_flags = vol.e[2]; 181 rx_pkts[3]->ol_flags = vol.e[3]; 182 } 183 184 #define NGBE_VPMD_DESC_EOP_MASK 0x02020202 185 #define NGBE_UINT8_BIT (CHAR_BIT * sizeof(uint8_t)) 186 187 static inline void 188 desc_to_ptype_v(uint64x2_t descs[4], uint16_t pkt_type_mask, 189 struct rte_mbuf **rx_pkts) 190 { 191 uint32x4_t ptype_mask = vdupq_n_u32((uint32_t)pkt_type_mask); 192 uint32x4_t ptype0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]), 193 vreinterpretq_u32_u64(descs[2])).val[0]; 194 uint32x4_t ptype1 = vzipq_u32(vreinterpretq_u32_u64(descs[1]), 195 vreinterpretq_u32_u64(descs[3])).val[0]; 196 197 /* interleave low 32 bits, 198 * now we have 4 ptypes in a NEON register 199 */ 200 ptype0 = vzipq_u32(ptype0, ptype1).val[0]; 201 202 /* shift right by NGBE_RXD_PTID_SHIFT, and apply ptype mask */ 203 ptype0 = vandq_u32(vshrq_n_u32(ptype0, NGBE_RXD_PTID_SHIFT), ptype_mask); 204 205 rx_pkts[0]->packet_type = ngbe_decode_ptype(vgetq_lane_u32(ptype0, 0)); 206 rx_pkts[1]->packet_type = ngbe_decode_ptype(vgetq_lane_u32(ptype0, 1)); 207 rx_pkts[2]->packet_type = ngbe_decode_ptype(vgetq_lane_u32(ptype0, 2)); 208 rx_pkts[3]->packet_type = ngbe_decode_ptype(vgetq_lane_u32(ptype0, 3)); 209 } 210 211 /** 212 * vPMD raw receive routine, only accept(nb_pkts >= RTE_NGBE_DESCS_PER_LOOP) 213 * 214 * Notice: 215 * - nb_pkts < RTE_NGBE_DESCS_PER_LOOP, just return no packet 216 * - floor align nb_pkts to a RTE_NGBE_DESC_PER_LOOP power-of-two 217 */ 218 static inline uint16_t 219 _recv_raw_pkts_vec(struct ngbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, 220 uint16_t nb_pkts, uint8_t *split_packet) 221 { 222 volatile struct ngbe_rx_desc *rxdp; 223 struct ngbe_rx_entry *sw_ring; 224 uint16_t nb_pkts_recd; 225 int pos; 226 uint8x16_t shuf_msk = { 227 0xFF, 0xFF, 228 0xFF, 0xFF, /* skip 32 bits pkt_type */ 229 12, 13, /* octet 12~13, low 16 bits pkt_len */ 230 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 231 12, 13, /* octet 12~13, 16 bits data_len */ 232 14, 15, /* octet 14~15, low 16 bits vlan_macip */ 233 4, 5, 6, 7 /* octet 4~7, 32bits rss */ 234 }; 235 uint16x8_t crc_adjust = {0, 0, rxq->crc_len, 0, 236 rxq->crc_len, 0, 0, 0}; 237 uint8_t vlan_flags; 238 239 /* nb_pkts has to be floor-aligned to RTE_NGBE_DESCS_PER_LOOP */ 240 nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_NGBE_DESCS_PER_LOOP); 241 242 /* Just the act of getting into the function from the application is 243 * going to cost about 7 cycles 244 */ 245 rxdp = rxq->rx_ring + rxq->rx_tail; 246 247 rte_prefetch_non_temporal(rxdp); 248 249 /* See if we need to rearm the RX queue - gives the prefetch a bit 250 * of time to act 251 */ 252 if (rxq->rxrearm_nb > RTE_NGBE_RXQ_REARM_THRESH) 253 ngbe_rxq_rearm(rxq); 254 255 /* Before we start moving massive data around, check to see if 256 * there is actually a packet available 257 */ 258 if (!(rxdp->qw1.lo.status & rte_cpu_to_le_32(NGBE_RXD_STAT_DD))) 259 return 0; 260 261 /* Cache is empty -> need to scan the buffer rings, but first move 262 * the next 'n' mbufs into the cache 263 */ 264 sw_ring = &rxq->sw_ring[rxq->rx_tail]; 265 266 /* ensure these 2 flags are in the lower 8 bits */ 267 RTE_BUILD_BUG_ON((RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED) > UINT8_MAX); 268 vlan_flags = rxq->vlan_flags & UINT8_MAX; 269 270 /* A. load 4 packet in one loop 271 * B. copy 4 mbuf point from swring to rx_pkts 272 * C. calc the number of DD bits among the 4 packets 273 * [C*. extract the end-of-packet bit, if requested] 274 * D. fill info. from desc to mbuf 275 */ 276 for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; 277 pos += RTE_NGBE_DESCS_PER_LOOP, 278 rxdp += RTE_NGBE_DESCS_PER_LOOP) { 279 uint64x2_t descs[RTE_NGBE_DESCS_PER_LOOP]; 280 uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; 281 uint8x16x2_t sterr_tmp1, sterr_tmp2; 282 uint64x2_t mbp1, mbp2; 283 uint8x16_t staterr; 284 uint16x8_t tmp; 285 uint32_t stat; 286 287 /* B.1 load 2 mbuf point */ 288 mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]); 289 290 /* B.2 copy 2 mbuf point into rx_pkts */ 291 vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1); 292 293 /* B.1 load 2 mbuf point */ 294 mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]); 295 296 /* A. load 4 pkts descs */ 297 descs[0] = vld1q_u64((uint64_t *)(uintptr_t)(rxdp)); 298 descs[1] = vld1q_u64((uint64_t *)(uintptr_t)(rxdp + 1)); 299 descs[2] = vld1q_u64((uint64_t *)(uintptr_t)(rxdp + 2)); 300 descs[3] = vld1q_u64((uint64_t *)(uintptr_t)(rxdp + 3)); 301 302 /* B.2 copy 2 mbuf point into rx_pkts */ 303 vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2); 304 305 if (split_packet) { 306 rte_mbuf_prefetch_part2(rx_pkts[pos]); 307 rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); 308 rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); 309 rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); 310 } 311 312 /* D.1 pkt 3,4 convert format from desc to pktmbuf */ 313 pkt_mb4 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[3]), shuf_msk); 314 pkt_mb3 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[2]), shuf_msk); 315 316 /* D.1 pkt 1,2 convert format from desc to pktmbuf */ 317 pkt_mb2 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[1]), shuf_msk); 318 pkt_mb1 = vqtbl1q_u8(vreinterpretq_u8_u64(descs[0]), shuf_msk); 319 320 /* C.1 4=>2 filter staterr info only */ 321 sterr_tmp2 = vzipq_u8(vreinterpretq_u8_u64(descs[1]), 322 vreinterpretq_u8_u64(descs[3])); 323 /* C.1 4=>2 filter staterr info only */ 324 sterr_tmp1 = vzipq_u8(vreinterpretq_u8_u64(descs[0]), 325 vreinterpretq_u8_u64(descs[2])); 326 327 /* C.2 get 4 pkts staterr value */ 328 staterr = vzipq_u8(sterr_tmp1.val[1], sterr_tmp2.val[1]).val[0]; 329 330 /* set ol_flags with vlan packet type */ 331 desc_to_olflags_v(sterr_tmp1, sterr_tmp2, staterr, vlan_flags, 332 &rx_pkts[pos]); 333 334 /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ 335 tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust); 336 pkt_mb4 = vreinterpretq_u8_u16(tmp); 337 tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust); 338 pkt_mb3 = vreinterpretq_u8_u16(tmp); 339 340 /* D.3 copy final 3,4 data to rx_pkts */ 341 vst1q_u8((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1, 342 pkt_mb4); 343 vst1q_u8((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1, 344 pkt_mb3); 345 346 /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ 347 tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust); 348 pkt_mb2 = vreinterpretq_u8_u16(tmp); 349 tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust); 350 pkt_mb1 = vreinterpretq_u8_u16(tmp); 351 352 /* C* extract and record EOP bit */ 353 if (split_packet) { 354 stat = vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0); 355 /* and with mask to extract bits, flipping 1-0 */ 356 *(int *)split_packet = ~stat & NGBE_VPMD_DESC_EOP_MASK; 357 358 split_packet += RTE_NGBE_DESCS_PER_LOOP; 359 } 360 361 /* C.4 expand DD bit to saturate UINT8 */ 362 staterr = vshlq_n_u8(staterr, NGBE_UINT8_BIT - 1); 363 staterr = vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u8(staterr), 364 NGBE_UINT8_BIT - 1)); 365 stat = ~vgetq_lane_u32(vreinterpretq_u32_u8(staterr), 0); 366 367 rte_prefetch_non_temporal(rxdp + RTE_NGBE_DESCS_PER_LOOP); 368 369 /* D.3 copy final 1,2 data to rx_pkts */ 370 vst1q_u8((uint8_t *)&rx_pkts[pos + 1]->rx_descriptor_fields1, 371 pkt_mb2); 372 vst1q_u8((uint8_t *)&rx_pkts[pos]->rx_descriptor_fields1, 373 pkt_mb1); 374 375 desc_to_ptype_v(descs, NGBE_PTID_MASK, &rx_pkts[pos]); 376 377 /* C.5 calc available number of desc */ 378 if (unlikely(stat == 0)) { 379 nb_pkts_recd += RTE_NGBE_DESCS_PER_LOOP; 380 } else { 381 nb_pkts_recd += rte_ctz32(stat) / NGBE_UINT8_BIT; 382 break; 383 } 384 } 385 386 /* Update our internal tail pointer */ 387 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); 388 rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); 389 rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); 390 391 return nb_pkts_recd; 392 } 393 394 /** 395 * vPMD receive routine, only accept(nb_pkts >= RTE_NGBE_DESCS_PER_LOOP) 396 * 397 * Notice: 398 * - nb_pkts < RTE_NGBE_DESCS_PER_LOOP, just return no packet 399 * - floor align nb_pkts to a RTE_NGBE_DESC_PER_LOOP power-of-two 400 */ 401 uint16_t 402 ngbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 403 uint16_t nb_pkts) 404 { 405 return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); 406 } 407 408 /** 409 * vPMD receive routine that reassembles scattered packets 410 * 411 * Notice: 412 * - nb_pkts < RTE_NGBE_DESCS_PER_LOOP, just return no packet 413 * - floor align nb_pkts to a RTE_NGBE_DESC_PER_LOOP power-of-two 414 */ 415 static uint16_t 416 ngbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 417 uint16_t nb_pkts) 418 { 419 struct ngbe_rx_queue *rxq = rx_queue; 420 uint8_t split_flags[RTE_NGBE_MAX_RX_BURST] = {0}; 421 422 /* get some new buffers */ 423 uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, 424 split_flags); 425 if (nb_bufs == 0) 426 return 0; 427 428 /* happy day case, full burst + no packets to be joined */ 429 const uint64_t *split_fl64 = (uint64_t *)split_flags; 430 if (rxq->pkt_first_seg == NULL && 431 split_fl64[0] == 0 && split_fl64[1] == 0 && 432 split_fl64[2] == 0 && split_fl64[3] == 0) 433 return nb_bufs; 434 435 /* reassemble any packets that need reassembly*/ 436 unsigned int i = 0; 437 if (rxq->pkt_first_seg == NULL) { 438 /* find the first split flag, and only reassemble then*/ 439 while (i < nb_bufs && !split_flags[i]) 440 i++; 441 if (i == nb_bufs) 442 return nb_bufs; 443 rxq->pkt_first_seg = rx_pkts[i]; 444 } 445 return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, 446 &split_flags[i]); 447 } 448 449 /** 450 * vPMD receive routine that reassembles scattered packets. 451 */ 452 uint16_t 453 ngbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 454 uint16_t nb_pkts) 455 { 456 uint16_t retval = 0; 457 458 while (nb_pkts > RTE_NGBE_MAX_RX_BURST) { 459 uint16_t burst; 460 461 burst = ngbe_recv_scattered_burst_vec(rx_queue, 462 rx_pkts + retval, 463 RTE_NGBE_MAX_RX_BURST); 464 retval += burst; 465 nb_pkts -= burst; 466 if (burst < RTE_NGBE_MAX_RX_BURST) 467 return retval; 468 } 469 470 return retval + ngbe_recv_scattered_burst_vec(rx_queue, 471 rx_pkts + retval, 472 nb_pkts); 473 } 474 475 static inline void 476 vtx1(volatile struct ngbe_tx_desc *txdp, 477 struct rte_mbuf *pkt, uint64_t flags) 478 { 479 uint16_t pkt_len = pkt->data_len; 480 481 if (pkt_len < RTE_ETHER_HDR_LEN) 482 pkt_len = NGBE_FRAME_SIZE_DFT; 483 484 uint64x2_t descriptor = {pkt->buf_iova + pkt->data_off, 485 (uint64_t)pkt_len << 45 | flags | pkt_len}; 486 487 vst1q_u64(RTE_CAST_PTR(uint64_t *, txdp), descriptor); 488 } 489 490 static inline void 491 vtx(volatile struct ngbe_tx_desc *txdp, 492 struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) 493 { 494 int i; 495 496 for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) 497 vtx1(txdp, *pkt, flags); 498 } 499 500 uint16_t 501 ngbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, 502 uint16_t nb_pkts) 503 { 504 struct ngbe_tx_queue *txq = (struct ngbe_tx_queue *)tx_queue; 505 volatile struct ngbe_tx_desc *txdp; 506 struct ngbe_tx_entry_v *txep; 507 uint16_t n, nb_commit, tx_id; 508 uint64_t flags = NGBE_TXD_FLAGS; 509 uint64_t rs = NGBE_TXD_FLAGS; 510 int i; 511 512 /* cross rx_thresh boundary is not allowed */ 513 nb_pkts = RTE_MIN(nb_pkts, txq->tx_free_thresh); 514 515 if (txq->nb_tx_free < txq->tx_free_thresh) 516 ngbe_tx_free_bufs(txq); 517 518 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); 519 if (unlikely(nb_pkts == 0)) 520 return 0; 521 522 tx_id = txq->tx_tail; 523 txdp = &txq->tx_ring[tx_id]; 524 txep = &txq->sw_ring_v[tx_id]; 525 526 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); 527 528 n = (uint16_t)(txq->nb_tx_desc - tx_id); 529 nb_commit = nb_pkts; 530 if (nb_commit >= n) { 531 tx_backlog_entry(txep, tx_pkts, n); 532 533 for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) 534 vtx1(txdp, *tx_pkts, flags); 535 536 vtx1(txdp, *tx_pkts++, rs); 537 538 nb_commit = (uint16_t)(nb_commit - n); 539 540 tx_id = 0; 541 542 /* avoid reach the end of ring */ 543 txdp = &txq->tx_ring[tx_id]; 544 txep = &txq->sw_ring_v[tx_id]; 545 } 546 547 tx_backlog_entry(txep, tx_pkts, nb_commit); 548 549 vtx(txdp, tx_pkts, nb_commit, flags); 550 551 tx_id = (uint16_t)(tx_id + nb_commit); 552 553 txq->tx_tail = tx_id; 554 555 ngbe_set32(txq->tdt_reg_addr, txq->tx_tail); 556 557 return nb_pkts; 558 } 559 560 static void __rte_cold 561 ngbe_tx_queue_release_mbufs_vec(struct ngbe_tx_queue *txq) 562 { 563 _ngbe_tx_queue_release_mbufs_vec(txq); 564 } 565 566 void __rte_cold 567 ngbe_rx_queue_release_mbufs_vec(struct ngbe_rx_queue *rxq) 568 { 569 _ngbe_rx_queue_release_mbufs_vec(rxq); 570 } 571 572 static void __rte_cold 573 ngbe_tx_free_swring(struct ngbe_tx_queue *txq) 574 { 575 _ngbe_tx_free_swring_vec(txq); 576 } 577 578 static void __rte_cold 579 ngbe_reset_tx_queue(struct ngbe_tx_queue *txq) 580 { 581 _ngbe_reset_tx_queue_vec(txq); 582 } 583 584 static const struct ngbe_txq_ops vec_txq_ops = { 585 .release_mbufs = ngbe_tx_queue_release_mbufs_vec, 586 .free_swring = ngbe_tx_free_swring, 587 .reset = ngbe_reset_tx_queue, 588 }; 589 590 int __rte_cold 591 ngbe_rxq_vec_setup(struct ngbe_rx_queue *rxq) 592 { 593 return ngbe_rxq_vec_setup_default(rxq); 594 } 595 596 int __rte_cold 597 ngbe_txq_vec_setup(struct ngbe_tx_queue *txq) 598 { 599 return ngbe_txq_vec_setup_default(txq, &vec_txq_ops); 600 } 601 602 int __rte_cold 603 ngbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) 604 { 605 return ngbe_rx_vec_dev_conf_condition_check_default(dev); 606 } 607