1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010 - 2015 Intel Corporation 3 * Copyright(c) 2017 IBM Corporation. 4 */ 5 6 #include <stdint.h> 7 #include <ethdev_driver.h> 8 #include <rte_malloc.h> 9 10 #include "base/i40e_prototype.h" 11 #include "base/i40e_type.h" 12 #include "i40e_ethdev.h" 13 #include "i40e_rxtx.h" 14 #include "i40e_rxtx_vec_common.h" 15 16 #include <rte_altivec.h> 17 18 static inline void 19 i40e_rxq_rearm(struct i40e_rx_queue *rxq) 20 { 21 int i; 22 uint16_t rx_id; 23 volatile union i40e_rx_desc *rxdp; 24 25 struct i40e_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; 26 struct rte_mbuf *mb0, *mb1; 27 28 __vector unsigned long hdr_room = (__vector unsigned long){ 29 RTE_PKTMBUF_HEADROOM, 30 RTE_PKTMBUF_HEADROOM}; 31 __vector unsigned long dma_addr0, dma_addr1; 32 33 rxdp = rxq->rx_ring + rxq->rxrearm_start; 34 35 /* Pull 'n' more MBUFs into the software ring */ 36 if (rte_mempool_get_bulk(rxq->mp, 37 (void *)rxep, 38 RTE_I40E_RXQ_REARM_THRESH) < 0) { 39 if (rxq->rxrearm_nb + RTE_I40E_RXQ_REARM_THRESH >= 40 rxq->nb_rx_desc) { 41 dma_addr0 = (__vector unsigned long){}; 42 for (i = 0; i < RTE_I40E_DESCS_PER_LOOP; i++) { 43 rxep[i].mbuf = &rxq->fake_mbuf; 44 vec_st(dma_addr0, 0, 45 RTE_CAST_PTR(__vector unsigned long *, &rxdp[i].read)); 46 } 47 } 48 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += 49 RTE_I40E_RXQ_REARM_THRESH; 50 return; 51 } 52 53 /* Initialize the mbufs in vector, process 2 mbufs in one loop */ 54 for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { 55 __vector unsigned long vaddr0, vaddr1; 56 uintptr_t p0, p1; 57 58 mb0 = rxep[0].mbuf; 59 mb1 = rxep[1].mbuf; 60 61 /* Flush mbuf with pkt template. 62 * Data to be rearmed is 6 bytes long. 63 * Though, RX will overwrite ol_flags that are coming next 64 * anyway. So overwrite whole 8 bytes with one load: 65 * 6 bytes of rearm_data plus first 2 bytes of ol_flags. 66 */ 67 p0 = (uintptr_t)&mb0->rearm_data; 68 *(uint64_t *)p0 = rxq->mbuf_initializer; 69 p1 = (uintptr_t)&mb1->rearm_data; 70 *(uint64_t *)p1 = rxq->mbuf_initializer; 71 72 /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ 73 vaddr0 = vec_ld(0, (__vector unsigned long *)&mb0->buf_addr); 74 vaddr1 = vec_ld(0, (__vector unsigned long *)&mb1->buf_addr); 75 76 /* convert pa to dma_addr hdr/data */ 77 dma_addr0 = vec_mergel(vaddr0, vaddr0); 78 dma_addr1 = vec_mergel(vaddr1, vaddr1); 79 80 /* add headroom to pa values */ 81 dma_addr0 = vec_add(dma_addr0, hdr_room); 82 dma_addr1 = vec_add(dma_addr1, hdr_room); 83 84 /* flush desc with pa dma_addr */ 85 vec_st(dma_addr0, 0, RTE_CAST_PTR(__vector unsigned long *, &rxdp++->read)); 86 vec_st(dma_addr1, 0, RTE_CAST_PTR(__vector unsigned long *, &rxdp++->read)); 87 } 88 89 rxq->rxrearm_start += RTE_I40E_RXQ_REARM_THRESH; 90 rx_id = rxq->rxrearm_start - 1; 91 92 if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) { 93 rxq->rxrearm_start = 0; 94 rx_id = rxq->nb_rx_desc - 1; 95 } 96 97 rxq->rxrearm_nb -= RTE_I40E_RXQ_REARM_THRESH; 98 99 /* Update the tail pointer on the NIC */ 100 I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id); 101 } 102 103 static inline void 104 desc_to_olflags_v(__vector unsigned long descs[4], struct rte_mbuf **rx_pkts) 105 { 106 __vector unsigned int vlan0, vlan1, rss, l3_l4e; 107 108 /* mask everything except RSS, flow director and VLAN flags 109 * bit2 is for VLAN tag, bit11 for flow director indication 110 * bit13:12 for RSS indication. 111 */ 112 const __vector unsigned int rss_vlan_msk = (__vector unsigned int){ 113 (int32_t)0x1c03804, (int32_t)0x1c03804, 114 (int32_t)0x1c03804, (int32_t)0x1c03804}; 115 116 /* map rss and vlan type to rss hash and vlan flag */ 117 const __vector unsigned char vlan_flags = (__vector unsigned char){ 118 0, 0, 0, 0, 119 RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED, 0, 0, 0, 120 0, 0, 0, 0, 121 0, 0, 0, 0}; 122 123 const __vector unsigned char rss_flags = (__vector unsigned char){ 124 0, RTE_MBUF_F_RX_FDIR, 0, 0, 125 0, 0, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH | RTE_MBUF_F_RX_FDIR, 126 0, 0, 0, 0, 127 0, 0, 0, 0}; 128 129 const __vector unsigned char l3_l4e_flags = (__vector unsigned char){ 130 0, 131 RTE_MBUF_F_RX_IP_CKSUM_BAD, 132 RTE_MBUF_F_RX_L4_CKSUM_BAD, 133 RTE_MBUF_F_RX_L4_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD, 134 RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD, 135 RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_IP_CKSUM_BAD, 136 RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD, 137 RTE_MBUF_F_RX_OUTER_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD 138 | RTE_MBUF_F_RX_IP_CKSUM_BAD, 139 0, 0, 0, 0, 0, 0, 0, 0}; 140 141 vlan0 = (__vector unsigned int)vec_mergel(descs[0], descs[1]); 142 vlan1 = (__vector unsigned int)vec_mergel(descs[2], descs[3]); 143 vlan0 = (__vector unsigned int)vec_mergeh(vlan0, vlan1); 144 145 vlan1 = vec_and(vlan0, rss_vlan_msk); 146 vlan0 = (__vector unsigned int)vec_perm(vlan_flags, 147 (__vector unsigned char){}, 148 *(__vector unsigned char *)&vlan1); 149 150 rss = vec_sr(vlan1, (__vector unsigned int){11, 11, 11, 11}); 151 rss = (__vector unsigned int)vec_perm(rss_flags, (__vector unsigned char){}, 152 *(__vector unsigned char *)&rss); 153 154 l3_l4e = vec_sr(vlan1, (__vector unsigned int){22, 22, 22, 22}); 155 l3_l4e = (__vector unsigned int)vec_perm(l3_l4e_flags, 156 (__vector unsigned char){}, 157 *(__vector unsigned char *)&l3_l4e); 158 159 vlan0 = vec_or(vlan0, rss); 160 vlan0 = vec_or(vlan0, l3_l4e); 161 162 rx_pkts[0]->ol_flags = (uint64_t)vlan0[2]; 163 rx_pkts[1]->ol_flags = (uint64_t)vlan0[3]; 164 rx_pkts[2]->ol_flags = (uint64_t)vlan0[0]; 165 rx_pkts[3]->ol_flags = (uint64_t)vlan0[1]; 166 } 167 168 #define PKTLEN_SHIFT 10 169 170 static inline void 171 desc_to_ptype_v(__vector unsigned long descs[4], struct rte_mbuf **rx_pkts, 172 uint32_t *ptype_tbl) 173 { 174 __vector unsigned long ptype0 = vec_mergel(descs[0], descs[1]); 175 __vector unsigned long ptype1 = vec_mergel(descs[2], descs[3]); 176 177 ptype0 = vec_sr(ptype0, (__vector unsigned long){30, 30}); 178 ptype1 = vec_sr(ptype1, (__vector unsigned long){30, 30}); 179 180 rx_pkts[0]->packet_type = 181 ptype_tbl[(*(__vector unsigned char *)&ptype0)[0]]; 182 rx_pkts[1]->packet_type = 183 ptype_tbl[(*(__vector unsigned char *)&ptype0)[8]]; 184 rx_pkts[2]->packet_type = 185 ptype_tbl[(*(__vector unsigned char *)&ptype1)[0]]; 186 rx_pkts[3]->packet_type = 187 ptype_tbl[(*(__vector unsigned char *)&ptype1)[8]]; 188 } 189 190 /** 191 * vPMD raw receive routine, only accept(nb_pkts >= RTE_I40E_DESCS_PER_LOOP) 192 * 193 * Notice: 194 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet 195 * - floor align nb_pkts to a RTE_I40E_DESCS_PER_LOOP power-of-two 196 */ 197 static inline uint16_t 198 _recv_raw_pkts_vec(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts, 199 uint16_t nb_pkts, uint8_t *split_packet) 200 { 201 volatile union i40e_rx_desc *rxdp; 202 struct i40e_rx_entry *sw_ring; 203 uint16_t nb_pkts_recd; 204 int pos; 205 uint64_t var; 206 __vector unsigned char shuf_msk; 207 uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; 208 209 __vector unsigned short crc_adjust = (__vector unsigned short){ 210 0, 0, /* ignore pkt_type field */ 211 rxq->crc_len, /* sub crc on pkt_len */ 212 0, /* ignore high-16bits of pkt_len */ 213 rxq->crc_len, /* sub crc on data_len */ 214 0, 0, 0 /* ignore non-length fields */ 215 }; 216 __vector unsigned long dd_check, eop_check; 217 218 /* nb_pkts has to be floor-aligned to RTE_I40E_DESCS_PER_LOOP */ 219 nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_I40E_DESCS_PER_LOOP); 220 221 /* Just the act of getting into the function from the application is 222 * going to cost about 7 cycles 223 */ 224 rxdp = rxq->rx_ring + rxq->rx_tail; 225 226 rte_prefetch0(rxdp); 227 228 /* See if we need to rearm the RX queue - gives the prefetch a bit 229 * of time to act 230 */ 231 if (rxq->rxrearm_nb > RTE_I40E_RXQ_REARM_THRESH) 232 i40e_rxq_rearm(rxq); 233 234 /* Before we start moving massive data around, check to see if 235 * there is actually a packet available 236 */ 237 if (!(rxdp->wb.qword1.status_error_len & 238 rte_cpu_to_le_32(1 << I40E_RX_DESC_STATUS_DD_SHIFT))) 239 return 0; 240 241 /* 4 packets DD mask */ 242 dd_check = (__vector unsigned long){0x0000000100000001ULL, 243 0x0000000100000001ULL}; 244 245 /* 4 packets EOP mask */ 246 eop_check = (__vector unsigned long){0x0000000200000002ULL, 247 0x0000000200000002ULL}; 248 249 /* mask to shuffle from desc. to mbuf */ 250 shuf_msk = (__vector unsigned char){ 251 0xFF, 0xFF, /* pkt_type set as unknown */ 252 0xFF, 0xFF, /* pkt_type set as unknown */ 253 14, 15, /* octet 15~14, low 16 bits pkt_len */ 254 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 255 14, 15, /* octet 15~14, 16 bits data_len */ 256 2, 3, /* octet 2~3, low 16 bits vlan_macip */ 257 4, 5, 6, 7 /* octet 4~7, 32bits rss */ 258 }; 259 260 /* Cache is empty -> need to scan the buffer rings, but first move 261 * the next 'n' mbufs into the cache 262 */ 263 sw_ring = &rxq->sw_ring[rxq->rx_tail]; 264 265 /* A. load 4 packet in one loop 266 * [A*. mask out 4 unused dirty field in desc] 267 * B. copy 4 mbuf point from swring to rx_pkts 268 * C. calc the number of DD bits among the 4 packets 269 * [C*. extract the end-of-packet bit, if requested] 270 * D. fill info. from desc to mbuf 271 */ 272 273 for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; 274 pos += RTE_I40E_DESCS_PER_LOOP, 275 rxdp += RTE_I40E_DESCS_PER_LOOP) { 276 __vector unsigned long descs[RTE_I40E_DESCS_PER_LOOP]; 277 __vector unsigned char pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; 278 __vector unsigned short staterr, sterr_tmp1, sterr_tmp2; 279 __vector unsigned long mbp1, mbp2; /* two mbuf pointer 280 * in one XMM reg. 281 */ 282 283 /* B.1 load 2 mbuf point */ 284 mbp1 = *(__vector unsigned long *)&sw_ring[pos]; 285 /* Read desc statuses backwards to avoid race condition */ 286 /* A.1 load desc[3] */ 287 descs[3] = *RTE_CAST_PTR(__vector unsigned long *, rxdp + 3); 288 rte_compiler_barrier(); 289 290 /* B.2 copy 2 mbuf point into rx_pkts */ 291 *(__vector unsigned long *)&rx_pkts[pos] = mbp1; 292 293 /* B.1 load 2 mbuf point */ 294 mbp2 = *(__vector unsigned long *)&sw_ring[pos + 2]; 295 296 /* A.1 load desc[2-0] */ 297 descs[2] = *RTE_CAST_PTR(__vector unsigned long *, rxdp + 2); 298 rte_compiler_barrier(); 299 descs[1] = *RTE_CAST_PTR(__vector unsigned long *, rxdp + 1); 300 rte_compiler_barrier(); 301 descs[0] = *RTE_CAST_PTR(__vector unsigned long *, rxdp); 302 303 /* B.2 copy 2 mbuf point into rx_pkts */ 304 *(__vector unsigned long *)&rx_pkts[pos + 2] = mbp2; 305 306 if (split_packet) { 307 rte_mbuf_prefetch_part2(rx_pkts[pos]); 308 rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); 309 rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); 310 rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); 311 } 312 313 /* avoid compiler reorder optimization */ 314 rte_compiler_barrier(); 315 316 /* pkt 3,4 shift the pktlen field to be 16-bit aligned*/ 317 const __vector unsigned int len3 = vec_sl( 318 vec_ld(0, (__vector unsigned int *)&descs[3]), 319 (__vector unsigned int){0, 0, 0, PKTLEN_SHIFT}); 320 321 const __vector unsigned int len2 = vec_sl( 322 vec_ld(0, (__vector unsigned int *)&descs[2]), 323 (__vector unsigned int){0, 0, 0, PKTLEN_SHIFT}); 324 325 /* merge the now-aligned packet length fields back in */ 326 descs[3] = (__vector unsigned long)len3; 327 descs[2] = (__vector unsigned long)len2; 328 329 /* D.1 pkt 3,4 convert format from desc to pktmbuf */ 330 pkt_mb4 = vec_perm((__vector unsigned char)descs[3], 331 (__vector unsigned char){}, shuf_msk); 332 pkt_mb3 = vec_perm((__vector unsigned char)descs[2], 333 (__vector unsigned char){}, shuf_msk); 334 335 /* C.1 4=>2 filter staterr info only */ 336 sterr_tmp2 = vec_mergel((__vector unsigned short)descs[3], 337 (__vector unsigned short)descs[2]); 338 /* C.1 4=>2 filter staterr info only */ 339 sterr_tmp1 = vec_mergel((__vector unsigned short)descs[1], 340 (__vector unsigned short)descs[0]); 341 /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ 342 pkt_mb4 = (__vector unsigned char)vec_sub( 343 (__vector unsigned short)pkt_mb4, crc_adjust); 344 pkt_mb3 = (__vector unsigned char)vec_sub( 345 (__vector unsigned short)pkt_mb3, crc_adjust); 346 347 /* pkt 1,2 shift the pktlen field to be 16-bit aligned*/ 348 const __vector unsigned int len1 = vec_sl( 349 vec_ld(0, (__vector unsigned int *)&descs[1]), 350 (__vector unsigned int){0, 0, 0, PKTLEN_SHIFT}); 351 const __vector unsigned int len0 = vec_sl( 352 vec_ld(0, (__vector unsigned int *)&descs[0]), 353 (__vector unsigned int){0, 0, 0, PKTLEN_SHIFT}); 354 355 /* merge the now-aligned packet length fields back in */ 356 descs[1] = (__vector unsigned long)len1; 357 descs[0] = (__vector unsigned long)len0; 358 359 /* D.1 pkt 1,2 convert format from desc to pktmbuf */ 360 pkt_mb2 = vec_perm((__vector unsigned char)descs[1], 361 (__vector unsigned char){}, shuf_msk); 362 pkt_mb1 = vec_perm((__vector unsigned char)descs[0], 363 (__vector unsigned char){}, shuf_msk); 364 365 /* C.2 get 4 pkts staterr value */ 366 staterr = (__vector unsigned short)vec_mergeh( 367 sterr_tmp1, sterr_tmp2); 368 369 /* D.3 copy final 3,4 data to rx_pkts */ 370 vec_st(pkt_mb4, 0, 371 (__vector unsigned char *)&rx_pkts[pos + 3] 372 ->rx_descriptor_fields1 373 ); 374 vec_st(pkt_mb3, 0, 375 (__vector unsigned char *)&rx_pkts[pos + 2] 376 ->rx_descriptor_fields1 377 ); 378 379 /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ 380 pkt_mb2 = (__vector unsigned char)vec_sub( 381 (__vector unsigned short)pkt_mb2, crc_adjust); 382 pkt_mb1 = (__vector unsigned char)vec_sub( 383 (__vector unsigned short)pkt_mb1, crc_adjust); 384 385 /* C* extract and record EOP bit */ 386 if (split_packet) { 387 __vector unsigned char eop_shuf_mask = 388 (__vector unsigned char){ 389 0xFF, 0xFF, 0xFF, 0xFF, 390 0xFF, 0xFF, 0xFF, 0xFF, 391 0xFF, 0xFF, 0xFF, 0xFF, 392 0x04, 0x0C, 0x00, 0x08 393 }; 394 395 /* and with mask to extract bits, flipping 1-0 */ 396 __vector unsigned char eop_bits = vec_and( 397 (__vector unsigned char)vec_nor(staterr, staterr), 398 (__vector unsigned char)eop_check); 399 /* the staterr values are not in order, as the count 400 * of dd bits doesn't care. However, for end of 401 * packet tracking, we do care, so shuffle. This also 402 * compresses the 32-bit values to 8-bit 403 */ 404 eop_bits = vec_perm(eop_bits, (__vector unsigned char){}, 405 eop_shuf_mask); 406 /* store the resulting 32-bit value */ 407 *split_packet = (vec_ld(0, 408 (__vector unsigned int *)&eop_bits))[0]; 409 split_packet += RTE_I40E_DESCS_PER_LOOP; 410 411 /* zero-out next pointers */ 412 rx_pkts[pos]->next = NULL; 413 rx_pkts[pos + 1]->next = NULL; 414 rx_pkts[pos + 2]->next = NULL; 415 rx_pkts[pos + 3]->next = NULL; 416 } 417 418 /* C.3 calc available number of desc */ 419 staterr = vec_and(staterr, (__vector unsigned short)dd_check); 420 421 /* D.3 copy final 1,2 data to rx_pkts */ 422 vec_st(pkt_mb2, 0, 423 (__vector unsigned char *)&rx_pkts[pos + 1] 424 ->rx_descriptor_fields1 425 ); 426 vec_st(pkt_mb1, 0, 427 (__vector unsigned char *)&rx_pkts[pos]->rx_descriptor_fields1 428 ); 429 desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); 430 desc_to_olflags_v(descs, &rx_pkts[pos]); 431 432 /* C.4 calc available number of desc */ 433 var = rte_popcount64((vec_ld(0, 434 (__vector unsigned long *)&staterr)[0])); 435 nb_pkts_recd += var; 436 if (likely(var != RTE_I40E_DESCS_PER_LOOP)) 437 break; 438 } 439 440 /* Update our internal tail pointer */ 441 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); 442 rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); 443 rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); 444 445 return nb_pkts_recd; 446 } 447 448 /* Notice: 449 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet 450 */ 451 uint16_t 452 i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 453 uint16_t nb_pkts) 454 { 455 return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); 456 } 457 458 /** 459 * vPMD receive routine that reassembles single burst of 32 scattered packets 460 * 461 * Notice: 462 * - nb_pkts < RTE_I40E_DESCS_PER_LOOP, just return no packet 463 */ 464 static uint16_t 465 i40e_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 466 uint16_t nb_pkts) 467 { 468 struct i40e_rx_queue *rxq = rx_queue; 469 uint8_t split_flags[RTE_I40E_VPMD_RX_BURST] = {0}; 470 471 /* get some new buffers */ 472 uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, 473 split_flags); 474 if (nb_bufs == 0) 475 return 0; 476 477 /* happy day case, full burst + no packets to be joined */ 478 const uint64_t *split_fl64 = (uint64_t *)split_flags; 479 480 if (rxq->pkt_first_seg == NULL && 481 split_fl64[0] == 0 && split_fl64[1] == 0 && 482 split_fl64[2] == 0 && split_fl64[3] == 0) 483 return nb_bufs; 484 485 /* reassemble any packets that need reassembly*/ 486 unsigned int i = 0; 487 488 if (!rxq->pkt_first_seg) { 489 /* find the first split flag, and only reassemble then*/ 490 while (i < nb_bufs && !split_flags[i]) 491 i++; 492 if (i == nb_bufs) 493 return nb_bufs; 494 } 495 return i + ci_rx_reassemble_packets(&rx_pkts[i], nb_bufs - i, &split_flags[i], 496 &rxq->pkt_first_seg, &rxq->pkt_last_seg, rxq->crc_len); 497 } 498 499 /** 500 * vPMD receive routine that reassembles scattered packets. 501 */ 502 uint16_t 503 i40e_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 504 uint16_t nb_pkts) 505 { 506 uint16_t retval = 0; 507 508 while (nb_pkts > RTE_I40E_VPMD_RX_BURST) { 509 uint16_t burst; 510 511 burst = i40e_recv_scattered_burst_vec(rx_queue, 512 rx_pkts + retval, 513 RTE_I40E_VPMD_RX_BURST); 514 retval += burst; 515 nb_pkts -= burst; 516 if (burst < RTE_I40E_VPMD_RX_BURST) 517 return retval; 518 } 519 520 return retval + i40e_recv_scattered_burst_vec(rx_queue, 521 rx_pkts + retval, 522 nb_pkts); 523 } 524 525 static inline void 526 vtx1(volatile struct i40e_tx_desc *txdp, 527 struct rte_mbuf *pkt, uint64_t flags) 528 { 529 uint64_t high_qw = (I40E_TX_DESC_DTYPE_DATA | 530 ((uint64_t)flags << I40E_TXD_QW1_CMD_SHIFT) | 531 ((uint64_t)pkt->data_len << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)); 532 533 __vector unsigned long descriptor = (__vector unsigned long){ 534 pkt->buf_iova + pkt->data_off, high_qw}; 535 *RTE_CAST_PTR(__vector unsigned long *, txdp) = descriptor; 536 } 537 538 static inline void 539 vtx(volatile struct i40e_tx_desc *txdp, 540 struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) 541 { 542 int i; 543 544 for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) 545 vtx1(txdp, *pkt, flags); 546 } 547 548 uint16_t 549 i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, 550 uint16_t nb_pkts) 551 { 552 struct i40e_tx_queue *txq = (struct i40e_tx_queue *)tx_queue; 553 volatile struct i40e_tx_desc *txdp; 554 struct ci_tx_entry *txep; 555 uint16_t n, nb_commit, tx_id; 556 uint64_t flags = I40E_TD_CMD; 557 uint64_t rs = I40E_TX_DESC_CMD_RS | I40E_TD_CMD; 558 int i; 559 560 if (txq->nb_tx_free < txq->tx_free_thresh) 561 i40e_tx_free_bufs(txq); 562 563 nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); 564 nb_commit = nb_pkts; 565 if (unlikely(nb_pkts == 0)) 566 return 0; 567 568 tx_id = txq->tx_tail; 569 txdp = &txq->tx_ring[tx_id]; 570 txep = &txq->sw_ring[tx_id]; 571 572 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); 573 574 n = (uint16_t)(txq->nb_tx_desc - tx_id); 575 if (nb_commit >= n) { 576 tx_backlog_entry(txep, tx_pkts, n); 577 578 for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) 579 vtx1(txdp, *tx_pkts, flags); 580 581 vtx1(txdp, *tx_pkts++, rs); 582 583 nb_commit = (uint16_t)(nb_commit - n); 584 585 tx_id = 0; 586 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1); 587 588 /* avoid reach the end of ring */ 589 txdp = &txq->tx_ring[tx_id]; 590 txep = &txq->sw_ring[tx_id]; 591 } 592 593 tx_backlog_entry(txep, tx_pkts, nb_commit); 594 595 vtx(txdp, tx_pkts, nb_commit, flags); 596 597 tx_id = (uint16_t)(tx_id + nb_commit); 598 if (tx_id > txq->tx_next_rs) { 599 txq->tx_ring[txq->tx_next_rs].cmd_type_offset_bsz |= 600 rte_cpu_to_le_64(((uint64_t)I40E_TX_DESC_CMD_RS) << 601 I40E_TXD_QW1_CMD_SHIFT); 602 txq->tx_next_rs = 603 (uint16_t)(txq->tx_next_rs + txq->tx_rs_thresh); 604 } 605 606 txq->tx_tail = tx_id; 607 608 I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail); 609 610 return nb_pkts; 611 } 612 613 void __rte_cold 614 i40e_rx_queue_release_mbufs_vec(struct i40e_rx_queue *rxq) 615 { 616 _i40e_rx_queue_release_mbufs_vec(rxq); 617 } 618 619 int __rte_cold 620 i40e_rxq_vec_setup(struct i40e_rx_queue *rxq) 621 { 622 return i40e_rxq_vec_setup_default(rxq); 623 } 624 625 int __rte_cold 626 i40e_txq_vec_setup(struct i40e_tx_queue __rte_unused * txq) 627 { 628 return 0; 629 } 630 631 int __rte_cold 632 i40e_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) 633 { 634 return i40e_rx_vec_dev_conf_condition_check_default(dev); 635 } 636