1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright(c) 2010-2015 Intel Corporation 3 */ 4 5 #include <stdint.h> 6 #include <ethdev_driver.h> 7 #include <rte_malloc.h> 8 9 #include "ixgbe_ethdev.h" 10 #include "ixgbe_rxtx.h" 11 #include "ixgbe_rxtx_vec_common.h" 12 13 #include <rte_vect.h> 14 15 static inline void 16 ixgbe_rxq_rearm(struct ixgbe_rx_queue *rxq) 17 { 18 int i; 19 uint16_t rx_id; 20 volatile union ixgbe_adv_rx_desc *rxdp; 21 struct ixgbe_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start]; 22 struct rte_mbuf *mb0, *mb1; 23 __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, 24 RTE_PKTMBUF_HEADROOM); 25 __m128i dma_addr0, dma_addr1; 26 27 const __m128i hba_msk = _mm_set_epi64x(0, UINT64_MAX); 28 29 rxdp = rxq->rx_ring + rxq->rxrearm_start; 30 31 /* Pull 'n' more MBUFs into the software ring */ 32 if (rte_mempool_get_bulk(rxq->mb_pool, 33 (void *)rxep, 34 RTE_IXGBE_RXQ_REARM_THRESH) < 0) { 35 if (rxq->rxrearm_nb + RTE_IXGBE_RXQ_REARM_THRESH >= 36 rxq->nb_rx_desc) { 37 dma_addr0 = _mm_setzero_si128(); 38 for (i = 0; i < RTE_IXGBE_DESCS_PER_LOOP; i++) { 39 rxep[i].mbuf = &rxq->fake_mbuf; 40 _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp[i].read), 41 dma_addr0); 42 } 43 } 44 rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed += 45 RTE_IXGBE_RXQ_REARM_THRESH; 46 return; 47 } 48 49 /* Initialize the mbufs in vector, process 2 mbufs in one loop */ 50 for (i = 0; i < RTE_IXGBE_RXQ_REARM_THRESH; i += 2, rxep += 2) { 51 __m128i vaddr0, vaddr1; 52 53 mb0 = rxep[0].mbuf; 54 mb1 = rxep[1].mbuf; 55 56 /* load buf_addr(lo 64bit) and buf_iova(hi 64bit) */ 57 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_iova) != 58 offsetof(struct rte_mbuf, buf_addr) + 8); 59 vaddr0 = _mm_loadu_si128((__m128i *)&(mb0->buf_addr)); 60 vaddr1 = _mm_loadu_si128((__m128i *)&(mb1->buf_addr)); 61 62 /* convert pa to dma_addr hdr/data */ 63 dma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0); 64 dma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1); 65 66 /* add headroom to pa values */ 67 dma_addr0 = _mm_add_epi64(dma_addr0, hdr_room); 68 dma_addr1 = _mm_add_epi64(dma_addr1, hdr_room); 69 70 /* set Header Buffer Address to zero */ 71 dma_addr0 = _mm_and_si128(dma_addr0, hba_msk); 72 dma_addr1 = _mm_and_si128(dma_addr1, hba_msk); 73 74 /* flush desc with pa dma_addr */ 75 _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr0); 76 _mm_store_si128(RTE_CAST_PTR(__m128i *, &rxdp++->read), dma_addr1); 77 } 78 79 rxq->rxrearm_start += RTE_IXGBE_RXQ_REARM_THRESH; 80 if (rxq->rxrearm_start >= rxq->nb_rx_desc) 81 rxq->rxrearm_start = 0; 82 83 rxq->rxrearm_nb -= RTE_IXGBE_RXQ_REARM_THRESH; 84 85 rx_id = (uint16_t) ((rxq->rxrearm_start == 0) ? 86 (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1)); 87 88 /* Update the tail pointer on the NIC */ 89 IXGBE_PCI_REG_WC_WRITE(rxq->rdt_reg_addr, rx_id); 90 } 91 92 #ifdef RTE_LIB_SECURITY 93 static inline void 94 desc_to_olflags_v_ipsec(__m128i descs[4], struct rte_mbuf **rx_pkts) 95 { 96 __m128i sterr, rearm, tmp_e, tmp_p; 97 uint32_t *rearm0 = (uint32_t *)rx_pkts[0]->rearm_data + 2; 98 uint32_t *rearm1 = (uint32_t *)rx_pkts[1]->rearm_data + 2; 99 uint32_t *rearm2 = (uint32_t *)rx_pkts[2]->rearm_data + 2; 100 uint32_t *rearm3 = (uint32_t *)rx_pkts[3]->rearm_data + 2; 101 const __m128i ipsec_sterr_msk = 102 _mm_set1_epi32(IXGBE_RXDADV_IPSEC_STATUS_SECP | 103 IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED); 104 const __m128i ipsec_proc_msk = 105 _mm_set1_epi32(IXGBE_RXDADV_IPSEC_STATUS_SECP); 106 const __m128i ipsec_err_flag = 107 _mm_set1_epi32(RTE_MBUF_F_RX_SEC_OFFLOAD_FAILED | 108 RTE_MBUF_F_RX_SEC_OFFLOAD); 109 const __m128i ipsec_proc_flag = _mm_set1_epi32(RTE_MBUF_F_RX_SEC_OFFLOAD); 110 111 rearm = _mm_set_epi32(*rearm3, *rearm2, *rearm1, *rearm0); 112 sterr = _mm_set_epi32(_mm_extract_epi32(descs[3], 2), 113 _mm_extract_epi32(descs[2], 2), 114 _mm_extract_epi32(descs[1], 2), 115 _mm_extract_epi32(descs[0], 2)); 116 sterr = _mm_and_si128(sterr, ipsec_sterr_msk); 117 tmp_e = _mm_cmpeq_epi32(sterr, ipsec_sterr_msk); 118 tmp_p = _mm_cmpeq_epi32(sterr, ipsec_proc_msk); 119 sterr = _mm_or_si128(_mm_and_si128(tmp_e, ipsec_err_flag), 120 _mm_and_si128(tmp_p, ipsec_proc_flag)); 121 rearm = _mm_or_si128(rearm, sterr); 122 *rearm0 = _mm_extract_epi32(rearm, 0); 123 *rearm1 = _mm_extract_epi32(rearm, 1); 124 *rearm2 = _mm_extract_epi32(rearm, 2); 125 *rearm3 = _mm_extract_epi32(rearm, 3); 126 } 127 #endif 128 129 static inline void 130 desc_to_olflags_v(__m128i descs[4], __m128i mbuf_init, uint8_t vlan_flags, 131 uint16_t udp_p_flag, struct rte_mbuf **rx_pkts) 132 { 133 __m128i ptype0, ptype1, vtag0, vtag1, csum, udp_csum_skip; 134 __m128i rearm0, rearm1, rearm2, rearm3; 135 136 /* mask everything except rss type */ 137 const __m128i rsstype_msk = _mm_set_epi16( 138 0x0000, 0x0000, 0x0000, 0x0000, 139 0x000F, 0x000F, 0x000F, 0x000F); 140 141 /* mask the lower byte of ol_flags */ 142 const __m128i ol_flags_msk = _mm_set_epi16( 143 0x0000, 0x0000, 0x0000, 0x0000, 144 0x00FF, 0x00FF, 0x00FF, 0x00FF); 145 146 /* map rss type to rss hash flag */ 147 const __m128i rss_flags = _mm_set_epi8(RTE_MBUF_F_RX_FDIR, 0, 0, 0, 148 0, 0, 0, RTE_MBUF_F_RX_RSS_HASH, 149 RTE_MBUF_F_RX_RSS_HASH, 0, RTE_MBUF_F_RX_RSS_HASH, 0, 150 RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, RTE_MBUF_F_RX_RSS_HASH, 0); 151 152 /* mask everything except vlan present and l4/ip csum error */ 153 const __m128i vlan_csum_msk = _mm_set_epi16( 154 (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, 155 (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, 156 (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, 157 (IXGBE_RXDADV_ERR_TCPE | IXGBE_RXDADV_ERR_IPE) >> 16, 158 IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP, 159 IXGBE_RXD_STAT_VP, IXGBE_RXD_STAT_VP); 160 161 /* map vlan present (0x8), IPE (0x2), L4E (0x1) to ol_flags */ 162 const __m128i vlan_csum_map_lo = _mm_set_epi8( 163 0, 0, 0, 0, 164 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD, 165 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_BAD, 166 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD, 167 vlan_flags | RTE_MBUF_F_RX_IP_CKSUM_GOOD, 168 0, 0, 0, 0, 169 RTE_MBUF_F_RX_IP_CKSUM_BAD | RTE_MBUF_F_RX_L4_CKSUM_BAD, 170 RTE_MBUF_F_RX_IP_CKSUM_BAD, 171 RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD, 172 RTE_MBUF_F_RX_IP_CKSUM_GOOD); 173 174 const __m128i vlan_csum_map_hi = _mm_set_epi8( 175 0, 0, 0, 0, 176 0, RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, 177 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 178 0, 0, 0, 0, 179 0, RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t), 0, 180 RTE_MBUF_F_RX_L4_CKSUM_GOOD >> sizeof(uint8_t)); 181 182 /* mask everything except UDP header present if specified */ 183 const __m128i udp_hdr_p_msk = _mm_set_epi16 184 (0, 0, 0, 0, 185 udp_p_flag, udp_p_flag, udp_p_flag, udp_p_flag); 186 187 const __m128i udp_csum_bad_shuf = _mm_set_epi8 188 (0, 0, 0, 0, 0, 0, 0, 0, 189 0, 0, 0, 0, 0, 0, ~(uint8_t)RTE_MBUF_F_RX_L4_CKSUM_BAD, 0xFF); 190 191 ptype0 = _mm_unpacklo_epi16(descs[0], descs[1]); 192 ptype1 = _mm_unpacklo_epi16(descs[2], descs[3]); 193 vtag0 = _mm_unpackhi_epi16(descs[0], descs[1]); 194 vtag1 = _mm_unpackhi_epi16(descs[2], descs[3]); 195 196 ptype0 = _mm_unpacklo_epi32(ptype0, ptype1); 197 /* save the UDP header present information */ 198 udp_csum_skip = _mm_and_si128(ptype0, udp_hdr_p_msk); 199 ptype0 = _mm_and_si128(ptype0, rsstype_msk); 200 ptype0 = _mm_shuffle_epi8(rss_flags, ptype0); 201 202 vtag1 = _mm_unpacklo_epi32(vtag0, vtag1); 203 vtag1 = _mm_and_si128(vtag1, vlan_csum_msk); 204 205 /* csum bits are in the most significant, to use shuffle we need to 206 * shift them. Change mask to 0xc000 to 0x0003. 207 */ 208 csum = _mm_srli_epi16(vtag1, 14); 209 210 /* now or the most significant 64 bits containing the checksum 211 * flags with the vlan present flags. 212 */ 213 csum = _mm_srli_si128(csum, 8); 214 vtag1 = _mm_or_si128(csum, vtag1); 215 216 /* convert VP, IPE, L4E to ol_flags */ 217 vtag0 = _mm_shuffle_epi8(vlan_csum_map_hi, vtag1); 218 vtag0 = _mm_slli_epi16(vtag0, sizeof(uint8_t)); 219 220 vtag1 = _mm_shuffle_epi8(vlan_csum_map_lo, vtag1); 221 vtag1 = _mm_and_si128(vtag1, ol_flags_msk); 222 vtag1 = _mm_or_si128(vtag0, vtag1); 223 224 vtag1 = _mm_or_si128(ptype0, vtag1); 225 226 /* convert the UDP header present 0x200 to 0x1 for aligning with each 227 * RTE_MBUF_F_RX_L4_CKSUM_BAD value in low byte of 16 bits word ol_flag in 228 * vtag1 (4x16). Then mask out the bad checksum value by shuffle and 229 * bit-mask. 230 */ 231 udp_csum_skip = _mm_srli_epi16(udp_csum_skip, 9); 232 udp_csum_skip = _mm_shuffle_epi8(udp_csum_bad_shuf, udp_csum_skip); 233 vtag1 = _mm_and_si128(vtag1, udp_csum_skip); 234 235 /* 236 * At this point, we have the 4 sets of flags in the low 64-bits 237 * of vtag1 (4x16). 238 * We want to extract these, and merge them with the mbuf init data 239 * so we can do a single 16-byte write to the mbuf to set the flags 240 * and all the other initialization fields. Extracting the 241 * appropriate flags means that we have to do a shift and blend for 242 * each mbuf before we do the write. 243 */ 244 rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtag1, 8), 0x10); 245 rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtag1, 6), 0x10); 246 rearm2 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtag1, 4), 0x10); 247 rearm3 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(vtag1, 2), 0x10); 248 249 /* write the rearm data and the olflags in one write */ 250 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) != 251 offsetof(struct rte_mbuf, rearm_data) + 8); 252 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) != 253 RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16)); 254 _mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0); 255 _mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1); 256 _mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2); 257 _mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3); 258 } 259 260 static inline uint32_t get_packet_type(int index, 261 uint32_t pkt_info, 262 uint32_t etqf_check, 263 uint32_t tunnel_check) 264 { 265 if (etqf_check & (0x02 << (index * RTE_IXGBE_DESCS_PER_LOOP))) 266 return RTE_PTYPE_UNKNOWN; 267 268 if (tunnel_check & (0x02 << (index * RTE_IXGBE_DESCS_PER_LOOP))) { 269 pkt_info &= IXGBE_PACKET_TYPE_MASK_TUNNEL; 270 return ptype_table_tn[pkt_info]; 271 } 272 273 pkt_info &= IXGBE_PACKET_TYPE_MASK_82599; 274 return ptype_table[pkt_info]; 275 } 276 277 static inline void 278 desc_to_ptype_v(__m128i descs[4], uint16_t pkt_type_mask, 279 struct rte_mbuf **rx_pkts) 280 { 281 __m128i etqf_mask = _mm_set_epi64x(0x800000008000LL, 0x800000008000LL); 282 __m128i ptype_mask = _mm_set_epi32( 283 pkt_type_mask, pkt_type_mask, pkt_type_mask, pkt_type_mask); 284 __m128i tunnel_mask = 285 _mm_set_epi64x(0x100000001000LL, 0x100000001000LL); 286 287 uint32_t etqf_check, tunnel_check, pkt_info; 288 289 __m128i ptype0 = _mm_unpacklo_epi32(descs[0], descs[2]); 290 __m128i ptype1 = _mm_unpacklo_epi32(descs[1], descs[3]); 291 292 /* interleave low 32 bits, 293 * now we have 4 ptypes in a XMM register 294 */ 295 ptype0 = _mm_unpacklo_epi32(ptype0, ptype1); 296 297 /* create a etqf bitmask based on the etqf bit. */ 298 etqf_check = _mm_movemask_epi8(_mm_and_si128(ptype0, etqf_mask)); 299 300 /* shift left by IXGBE_PACKET_TYPE_SHIFT, and apply ptype mask */ 301 ptype0 = _mm_and_si128(_mm_srli_epi32(ptype0, IXGBE_PACKET_TYPE_SHIFT), 302 ptype_mask); 303 304 /* create a tunnel bitmask based on the tunnel bit */ 305 tunnel_check = _mm_movemask_epi8( 306 _mm_slli_epi32(_mm_and_si128(ptype0, tunnel_mask), 0x3)); 307 308 pkt_info = _mm_extract_epi32(ptype0, 0); 309 rx_pkts[0]->packet_type = 310 get_packet_type(0, pkt_info, etqf_check, tunnel_check); 311 pkt_info = _mm_extract_epi32(ptype0, 1); 312 rx_pkts[1]->packet_type = 313 get_packet_type(1, pkt_info, etqf_check, tunnel_check); 314 pkt_info = _mm_extract_epi32(ptype0, 2); 315 rx_pkts[2]->packet_type = 316 get_packet_type(2, pkt_info, etqf_check, tunnel_check); 317 pkt_info = _mm_extract_epi32(ptype0, 3); 318 rx_pkts[3]->packet_type = 319 get_packet_type(3, pkt_info, etqf_check, tunnel_check); 320 } 321 322 /** 323 * vPMD raw receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP) 324 * 325 * Notice: 326 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet 327 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two 328 */ 329 static inline uint16_t 330 _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts, 331 uint16_t nb_pkts, uint8_t *split_packet) 332 { 333 volatile union ixgbe_adv_rx_desc *rxdp; 334 struct ixgbe_rx_entry *sw_ring; 335 uint16_t nb_pkts_recd; 336 #ifdef RTE_LIB_SECURITY 337 uint8_t use_ipsec = rxq->using_ipsec; 338 #endif 339 int pos; 340 uint64_t var; 341 __m128i shuf_msk; 342 __m128i crc_adjust = _mm_set_epi16( 343 0, 0, 0, /* ignore non-length fields */ 344 -rxq->crc_len, /* sub crc on data_len */ 345 0, /* ignore high-16bits of pkt_len */ 346 -rxq->crc_len, /* sub crc on pkt_len */ 347 0, 0 /* ignore pkt_type field */ 348 ); 349 /* 350 * compile-time check the above crc_adjust layout is correct. 351 * NOTE: the first field (lowest address) is given last in set_epi16 352 * call above. 353 */ 354 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != 355 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); 356 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != 357 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); 358 __m128i dd_check, eop_check; 359 __m128i mbuf_init; 360 uint8_t vlan_flags; 361 uint16_t udp_p_flag = 0; /* Rx Descriptor UDP header present */ 362 363 /* 364 * Under the circumstance that `rx_tail` wrap back to zero 365 * and the advance speed of `rx_tail` is greater than `rxrearm_start`, 366 * `rx_tail` will catch up with `rxrearm_start` and surpass it. 367 * This may cause some mbufs be reused by application. 368 * 369 * So we need to make some restrictions to ensure that 370 * `rx_tail` will not exceed `rxrearm_start`. 371 */ 372 nb_pkts = RTE_MIN(nb_pkts, RTE_IXGBE_RXQ_REARM_THRESH); 373 374 /* nb_pkts has to be floor-aligned to RTE_IXGBE_DESCS_PER_LOOP */ 375 nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP); 376 377 /* Just the act of getting into the function from the application is 378 * going to cost about 7 cycles 379 */ 380 rxdp = rxq->rx_ring + rxq->rx_tail; 381 382 rte_prefetch0(rxdp); 383 384 /* See if we need to rearm the RX queue - gives the prefetch a bit 385 * of time to act 386 */ 387 if (rxq->rxrearm_nb > RTE_IXGBE_RXQ_REARM_THRESH) 388 ixgbe_rxq_rearm(rxq); 389 390 /* Before we start moving massive data around, check to see if 391 * there is actually a packet available 392 */ 393 if (!(rxdp->wb.upper.status_error & 394 rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD))) 395 return 0; 396 397 if (rxq->rx_udp_csum_zero_err) 398 udp_p_flag = IXGBE_RXDADV_PKTTYPE_UDP; 399 400 /* 4 packets DD mask */ 401 dd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL); 402 403 /* 4 packets EOP mask */ 404 eop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL); 405 406 /* mask to shuffle from desc. to mbuf */ 407 shuf_msk = _mm_set_epi8( 408 7, 6, 5, 4, /* octet 4~7, 32bits rss */ 409 15, 14, /* octet 14~15, low 16 bits vlan_macip */ 410 13, 12, /* octet 12~13, 16 bits data_len */ 411 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ 412 13, 12, /* octet 12~13, low 16 bits pkt_len */ 413 0xFF, 0xFF, /* skip 32 bit pkt_type */ 414 0xFF, 0xFF 415 ); 416 /* 417 * Compile-time verify the shuffle mask 418 * NOTE: some field positions already verified above, but duplicated 419 * here for completeness in case of future modifications. 420 */ 421 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != 422 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); 423 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != 424 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); 425 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) != 426 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10); 427 RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) != 428 offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12); 429 430 mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer); 431 432 /* Cache is empty -> need to scan the buffer rings, but first move 433 * the next 'n' mbufs into the cache 434 */ 435 sw_ring = &rxq->sw_ring[rxq->rx_tail]; 436 437 /* ensure these 2 flags are in the lower 8 bits */ 438 RTE_BUILD_BUG_ON((RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED) > UINT8_MAX); 439 vlan_flags = rxq->vlan_flags & UINT8_MAX; 440 441 /* A. load 4 packet in one loop 442 * [A*. mask out 4 unused dirty field in desc] 443 * B. copy 4 mbuf point from swring to rx_pkts 444 * C. calc the number of DD bits among the 4 packets 445 * [C*. extract the end-of-packet bit, if requested] 446 * D. fill info. from desc to mbuf 447 */ 448 for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; 449 pos += RTE_IXGBE_DESCS_PER_LOOP, 450 rxdp += RTE_IXGBE_DESCS_PER_LOOP) { 451 __m128i descs[RTE_IXGBE_DESCS_PER_LOOP]; 452 __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; 453 __m128i zero, staterr, sterr_tmp1, sterr_tmp2; 454 /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */ 455 __m128i mbp1; 456 #if defined(RTE_ARCH_X86_64) 457 __m128i mbp2; 458 #endif 459 460 /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ 461 mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); 462 463 /* Read desc statuses backwards to avoid race condition */ 464 /* A.1 load desc[3] */ 465 descs[3] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 3)); 466 rte_compiler_barrier(); 467 468 /* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */ 469 _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); 470 471 #if defined(RTE_ARCH_X86_64) 472 /* B.1 load 2 64 bit mbuf points */ 473 mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos+2]); 474 #endif 475 476 /* A.1 load desc[2-0] */ 477 descs[2] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 2)); 478 rte_compiler_barrier(); 479 descs[1] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp + 1)); 480 rte_compiler_barrier(); 481 descs[0] = _mm_loadu_si128(RTE_CAST_PTR(const __m128i *, rxdp)); 482 483 #if defined(RTE_ARCH_X86_64) 484 /* B.2 copy 2 mbuf point into rx_pkts */ 485 _mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2); 486 #endif 487 488 if (split_packet) { 489 rte_mbuf_prefetch_part2(rx_pkts[pos]); 490 rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); 491 rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); 492 rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); 493 } 494 495 /* avoid compiler reorder optimization */ 496 rte_compiler_barrier(); 497 498 /* D.1 pkt 3,4 convert format from desc to pktmbuf */ 499 pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); 500 pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); 501 502 /* D.1 pkt 1,2 convert format from desc to pktmbuf */ 503 pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); 504 pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); 505 506 /* C.1 4=>2 filter staterr info only */ 507 sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); 508 /* C.1 4=>2 filter staterr info only */ 509 sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); 510 511 /* set ol_flags with vlan packet type */ 512 desc_to_olflags_v(descs, mbuf_init, vlan_flags, udp_p_flag, 513 &rx_pkts[pos]); 514 515 #ifdef RTE_LIB_SECURITY 516 if (unlikely(use_ipsec)) 517 desc_to_olflags_v_ipsec(descs, &rx_pkts[pos]); 518 #endif 519 520 /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ 521 pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); 522 pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); 523 524 /* C.2 get 4 pkts staterr value */ 525 zero = _mm_xor_si128(dd_check, dd_check); 526 staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); 527 528 /* D.3 copy final 3,4 data to rx_pkts */ 529 _mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1, 530 pkt_mb4); 531 _mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1, 532 pkt_mb3); 533 534 /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ 535 pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); 536 pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); 537 538 /* C* extract and record EOP bit */ 539 if (split_packet) { 540 __m128i eop_shuf_mask = _mm_set_epi8( 541 0xFF, 0xFF, 0xFF, 0xFF, 542 0xFF, 0xFF, 0xFF, 0xFF, 543 0xFF, 0xFF, 0xFF, 0xFF, 544 0x04, 0x0C, 0x00, 0x08 545 ); 546 547 /* and with mask to extract bits, flipping 1-0 */ 548 __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); 549 /* the staterr values are not in order, as the count 550 * of dd bits doesn't care. However, for end of 551 * packet tracking, we do care, so shuffle. This also 552 * compresses the 32-bit values to 8-bit 553 */ 554 eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); 555 /* store the resulting 32-bit value */ 556 *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); 557 split_packet += RTE_IXGBE_DESCS_PER_LOOP; 558 } 559 560 /* C.3 calc available number of desc */ 561 staterr = _mm_and_si128(staterr, dd_check); 562 staterr = _mm_packs_epi32(staterr, zero); 563 564 /* D.3 copy final 1,2 data to rx_pkts */ 565 _mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1, 566 pkt_mb2); 567 _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, 568 pkt_mb1); 569 570 desc_to_ptype_v(descs, rxq->pkt_type_mask, &rx_pkts[pos]); 571 572 /* C.4 calc available number of desc */ 573 var = rte_popcount64(_mm_cvtsi128_si64(staterr)); 574 nb_pkts_recd += var; 575 if (likely(var != RTE_IXGBE_DESCS_PER_LOOP)) 576 break; 577 } 578 579 /* Update our internal tail pointer */ 580 rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); 581 rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); 582 rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); 583 584 return nb_pkts_recd; 585 } 586 587 /** 588 * vPMD receive routine, only accept(nb_pkts >= RTE_IXGBE_DESCS_PER_LOOP) 589 * 590 * Notice: 591 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet 592 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two 593 */ 594 uint16_t 595 ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 596 uint16_t nb_pkts) 597 { 598 return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); 599 } 600 601 /** 602 * vPMD receive routine that reassembles scattered packets 603 * 604 * Notice: 605 * - nb_pkts < RTE_IXGBE_DESCS_PER_LOOP, just return no packet 606 * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two 607 */ 608 static uint16_t 609 ixgbe_recv_scattered_burst_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 610 uint16_t nb_pkts) 611 { 612 struct ixgbe_rx_queue *rxq = rx_queue; 613 uint8_t split_flags[RTE_IXGBE_MAX_RX_BURST] = {0}; 614 615 /* get some new buffers */ 616 uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts, 617 split_flags); 618 if (nb_bufs == 0) 619 return 0; 620 621 /* happy day case, full burst + no packets to be joined */ 622 const uint64_t *split_fl64 = (uint64_t *)split_flags; 623 if (rxq->pkt_first_seg == NULL && 624 split_fl64[0] == 0 && split_fl64[1] == 0 && 625 split_fl64[2] == 0 && split_fl64[3] == 0) 626 return nb_bufs; 627 628 /* reassemble any packets that need reassembly*/ 629 unsigned i = 0; 630 if (rxq->pkt_first_seg == NULL) { 631 /* find the first split flag, and only reassemble then*/ 632 while (i < nb_bufs && !split_flags[i]) 633 i++; 634 if (i == nb_bufs) 635 return nb_bufs; 636 rxq->pkt_first_seg = rx_pkts[i]; 637 } 638 return i + ci_rx_reassemble_packets(&rx_pkts[i], nb_bufs - i, &split_flags[i], 639 &rxq->pkt_first_seg, &rxq->pkt_last_seg, rxq->crc_len); 640 } 641 642 /** 643 * vPMD receive routine that reassembles scattered packets. 644 */ 645 uint16_t 646 ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 647 uint16_t nb_pkts) 648 { 649 uint16_t retval = 0; 650 651 while (nb_pkts > RTE_IXGBE_MAX_RX_BURST) { 652 uint16_t burst; 653 654 burst = ixgbe_recv_scattered_burst_vec(rx_queue, 655 rx_pkts + retval, 656 RTE_IXGBE_MAX_RX_BURST); 657 retval += burst; 658 nb_pkts -= burst; 659 if (burst < RTE_IXGBE_MAX_RX_BURST) 660 return retval; 661 } 662 663 return retval + ixgbe_recv_scattered_burst_vec(rx_queue, 664 rx_pkts + retval, 665 nb_pkts); 666 } 667 668 static inline void 669 vtx1(volatile union ixgbe_adv_tx_desc *txdp, 670 struct rte_mbuf *pkt, uint64_t flags) 671 { 672 __m128i descriptor = _mm_set_epi64x((uint64_t)pkt->pkt_len << 46 | 673 flags | pkt->data_len, 674 pkt->buf_iova + pkt->data_off); 675 _mm_store_si128(RTE_CAST_PTR(__m128i *, &txdp->read), descriptor); 676 } 677 678 static inline void 679 vtx(volatile union ixgbe_adv_tx_desc *txdp, 680 struct rte_mbuf **pkt, uint16_t nb_pkts, uint64_t flags) 681 { 682 int i; 683 684 for (i = 0; i < nb_pkts; ++i, ++txdp, ++pkt) 685 vtx1(txdp, *pkt, flags); 686 } 687 688 uint16_t 689 ixgbe_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, 690 uint16_t nb_pkts) 691 { 692 struct ixgbe_tx_queue *txq = (struct ixgbe_tx_queue *)tx_queue; 693 volatile union ixgbe_adv_tx_desc *txdp; 694 struct ci_tx_entry_vec *txep; 695 uint16_t n, nb_commit, tx_id; 696 uint64_t flags = DCMD_DTYP_FLAGS; 697 uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS; 698 int i; 699 700 /* cross rx_thresh boundary is not allowed */ 701 nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh); 702 703 if (txq->nb_tx_free < txq->tx_free_thresh) 704 ixgbe_tx_free_bufs(txq); 705 706 nb_commit = nb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts); 707 if (unlikely(nb_pkts == 0)) 708 return 0; 709 710 tx_id = txq->tx_tail; 711 txdp = &txq->tx_ring[tx_id]; 712 txep = &txq->sw_ring_v[tx_id]; 713 714 txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts); 715 716 n = (uint16_t)(txq->nb_tx_desc - tx_id); 717 if (nb_commit >= n) { 718 719 tx_backlog_entry(txep, tx_pkts, n); 720 721 for (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp) 722 vtx1(txdp, *tx_pkts, flags); 723 724 vtx1(txdp, *tx_pkts++, rs); 725 726 nb_commit = (uint16_t)(nb_commit - n); 727 728 tx_id = 0; 729 txq->tx_next_rs = (uint16_t)(txq->tx_rs_thresh - 1); 730 731 /* avoid reach the end of ring */ 732 txdp = &(txq->tx_ring[tx_id]); 733 txep = &txq->sw_ring_v[tx_id]; 734 } 735 736 tx_backlog_entry(txep, tx_pkts, nb_commit); 737 738 vtx(txdp, tx_pkts, nb_commit, flags); 739 740 tx_id = (uint16_t)(tx_id + nb_commit); 741 if (tx_id > txq->tx_next_rs) { 742 txq->tx_ring[txq->tx_next_rs].read.cmd_type_len |= 743 rte_cpu_to_le_32(IXGBE_ADVTXD_DCMD_RS); 744 txq->tx_next_rs = (uint16_t)(txq->tx_next_rs + 745 txq->tx_rs_thresh); 746 } 747 748 txq->tx_tail = tx_id; 749 750 IXGBE_PCI_REG_WC_WRITE(txq->qtx_tail, txq->tx_tail); 751 752 return nb_pkts; 753 } 754 755 static void __rte_cold 756 ixgbe_tx_queue_release_mbufs_vec(struct ixgbe_tx_queue *txq) 757 { 758 _ixgbe_tx_queue_release_mbufs_vec(txq); 759 } 760 761 void __rte_cold 762 ixgbe_rx_queue_release_mbufs_vec(struct ixgbe_rx_queue *rxq) 763 { 764 _ixgbe_rx_queue_release_mbufs_vec(rxq); 765 } 766 767 static void __rte_cold 768 ixgbe_tx_free_swring(struct ixgbe_tx_queue *txq) 769 { 770 _ixgbe_tx_free_swring_vec(txq); 771 } 772 773 static void __rte_cold 774 ixgbe_reset_tx_queue(struct ixgbe_tx_queue *txq) 775 { 776 _ixgbe_reset_tx_queue_vec(txq); 777 } 778 779 static const struct ixgbe_txq_ops vec_txq_ops = { 780 .release_mbufs = ixgbe_tx_queue_release_mbufs_vec, 781 .free_swring = ixgbe_tx_free_swring, 782 .reset = ixgbe_reset_tx_queue, 783 }; 784 785 int __rte_cold 786 ixgbe_rxq_vec_setup(struct ixgbe_rx_queue *rxq) 787 { 788 return ixgbe_rxq_vec_setup_default(rxq); 789 } 790 791 int __rte_cold 792 ixgbe_txq_vec_setup(struct ixgbe_tx_queue *txq) 793 { 794 return ixgbe_txq_vec_setup_default(txq, &vec_txq_ops); 795 } 796 797 int __rte_cold 798 ixgbe_rx_vec_dev_conf_condition_check(struct rte_eth_dev *dev) 799 { 800 return ixgbe_rx_vec_dev_conf_condition_check_default(dev); 801 } 802