1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <assert.h> 35 #include <stdint.h> 36 #include <string.h> 37 #include <stdlib.h> 38 39 /* Verbs header. */ 40 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 41 #ifdef PEDANTIC 42 #pragma GCC diagnostic ignored "-Wpedantic" 43 #endif 44 #include <infiniband/verbs.h> 45 #include <infiniband/mlx5_hw.h> 46 #include <infiniband/arch.h> 47 #ifdef PEDANTIC 48 #pragma GCC diagnostic error "-Wpedantic" 49 #endif 50 51 /* DPDK headers don't like -pedantic. */ 52 #ifdef PEDANTIC 53 #pragma GCC diagnostic ignored "-Wpedantic" 54 #endif 55 #include <rte_mbuf.h> 56 #include <rte_mempool.h> 57 #include <rte_prefetch.h> 58 #include <rte_common.h> 59 #include <rte_branch_prediction.h> 60 #include <rte_ether.h> 61 #ifdef PEDANTIC 62 #pragma GCC diagnostic error "-Wpedantic" 63 #endif 64 65 #include "mlx5.h" 66 #include "mlx5_utils.h" 67 #include "mlx5_rxtx.h" 68 #include "mlx5_autoconf.h" 69 #include "mlx5_defs.h" 70 #include "mlx5_prm.h" 71 72 static __rte_always_inline uint32_t 73 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe); 74 75 static __rte_always_inline int 76 mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, 77 uint16_t cqe_cnt, uint32_t *rss_hash); 78 79 static __rte_always_inline uint32_t 80 rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe); 81 82 /* 83 * The index to the array should have: 84 * bit[1:0] = l3_hdr_type, bit[2] = tunneled, bit[3] = outer_l3_type 85 */ 86 const uint32_t mlx5_ptype_table[] = { 87 RTE_PTYPE_UNKNOWN, 88 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, /* b0001 */ 89 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, /* b0010 */ 90 RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, 91 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 92 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, /* b0101 */ 93 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 94 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, /* b0110 */ 95 RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, 96 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, /* b1001 */ 97 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, /* b1010 */ 98 RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN, 99 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 100 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, /* b1101 */ 101 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 102 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, /* b1110 */ 103 RTE_PTYPE_ALL_MASK /* b1111 */ 104 }; 105 106 /** 107 * Return the size of tailroom of WQ. 108 * 109 * @param txq 110 * Pointer to TX queue structure. 111 * @param addr 112 * Pointer to tail of WQ. 113 * 114 * @return 115 * Size of tailroom. 116 */ 117 static inline size_t 118 tx_mlx5_wq_tailroom(struct txq *txq, void *addr) 119 { 120 size_t tailroom; 121 tailroom = (uintptr_t)(txq->wqes) + 122 (1 << txq->wqe_n) * MLX5_WQE_SIZE - 123 (uintptr_t)addr; 124 return tailroom; 125 } 126 127 /** 128 * Copy data to tailroom of circular queue. 129 * 130 * @param dst 131 * Pointer to destination. 132 * @param src 133 * Pointer to source. 134 * @param n 135 * Number of bytes to copy. 136 * @param base 137 * Pointer to head of queue. 138 * @param tailroom 139 * Size of tailroom from dst. 140 * 141 * @return 142 * Pointer after copied data. 143 */ 144 static inline void * 145 mlx5_copy_to_wq(void *dst, const void *src, size_t n, 146 void *base, size_t tailroom) 147 { 148 void *ret; 149 150 if (n > tailroom) { 151 rte_memcpy(dst, src, tailroom); 152 rte_memcpy(base, (void *)((uintptr_t)src + tailroom), 153 n - tailroom); 154 ret = (uint8_t *)base + n - tailroom; 155 } else { 156 rte_memcpy(dst, src, n); 157 ret = (n == tailroom) ? base : (uint8_t *)dst + n; 158 } 159 return ret; 160 } 161 162 /** 163 * DPDK callback to check the status of a tx descriptor. 164 * 165 * @param tx_queue 166 * The tx queue. 167 * @param[in] offset 168 * The index of the descriptor in the ring. 169 * 170 * @return 171 * The status of the tx descriptor. 172 */ 173 int 174 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 175 { 176 struct txq *txq = tx_queue; 177 uint16_t used; 178 179 mlx5_tx_complete(txq); 180 used = txq->elts_head - txq->elts_tail; 181 if (offset < used) 182 return RTE_ETH_TX_DESC_FULL; 183 return RTE_ETH_TX_DESC_DONE; 184 } 185 186 /** 187 * DPDK callback to check the status of a rx descriptor. 188 * 189 * @param rx_queue 190 * The rx queue. 191 * @param[in] offset 192 * The index of the descriptor in the ring. 193 * 194 * @return 195 * The status of the tx descriptor. 196 */ 197 int 198 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 199 { 200 struct rxq *rxq = rx_queue; 201 struct rxq_zip *zip = &rxq->zip; 202 volatile struct mlx5_cqe *cqe; 203 const unsigned int cqe_n = (1 << rxq->cqe_n); 204 const unsigned int cqe_cnt = cqe_n - 1; 205 unsigned int cq_ci; 206 unsigned int used; 207 208 /* if we are processing a compressed cqe */ 209 if (zip->ai) { 210 used = zip->cqe_cnt - zip->ca; 211 cq_ci = zip->cq_ci; 212 } else { 213 used = 0; 214 cq_ci = rxq->cq_ci; 215 } 216 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 217 while (check_cqe(cqe, cqe_n, cq_ci) == 0) { 218 int8_t op_own; 219 unsigned int n; 220 221 op_own = cqe->op_own; 222 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 223 n = ntohl(cqe->byte_cnt); 224 else 225 n = 1; 226 cq_ci += n; 227 used += n; 228 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 229 } 230 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 231 if (offset < used) 232 return RTE_ETH_RX_DESC_DONE; 233 return RTE_ETH_RX_DESC_AVAIL; 234 } 235 236 /** 237 * DPDK callback for TX. 238 * 239 * @param dpdk_txq 240 * Generic pointer to TX queue structure. 241 * @param[in] pkts 242 * Packets to transmit. 243 * @param pkts_n 244 * Number of packets in array. 245 * 246 * @return 247 * Number of packets successfully transmitted (<= pkts_n). 248 */ 249 uint16_t 250 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 251 { 252 struct txq *txq = (struct txq *)dpdk_txq; 253 uint16_t elts_head = txq->elts_head; 254 const uint16_t elts_n = 1 << txq->elts_n; 255 const uint16_t elts_m = elts_n - 1; 256 unsigned int i = 0; 257 unsigned int j = 0; 258 unsigned int k = 0; 259 uint16_t max_elts; 260 unsigned int max_inline = txq->max_inline; 261 const unsigned int inline_en = !!max_inline && txq->inline_en; 262 uint16_t max_wqe; 263 unsigned int comp; 264 volatile struct mlx5_wqe_v *wqe = NULL; 265 volatile struct mlx5_wqe_ctrl *last_wqe = NULL; 266 unsigned int segs_n = 0; 267 struct rte_mbuf *buf = NULL; 268 uint8_t *raw; 269 270 if (unlikely(!pkts_n)) 271 return 0; 272 /* Prefetch first packet cacheline. */ 273 rte_prefetch0(*pkts); 274 /* Start processing. */ 275 mlx5_tx_complete(txq); 276 max_elts = (elts_n - (elts_head - txq->elts_tail)); 277 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 278 if (unlikely(!max_wqe)) 279 return 0; 280 do { 281 volatile rte_v128u32_t *dseg = NULL; 282 uint32_t length; 283 unsigned int ds = 0; 284 unsigned int sg = 0; /* counter of additional segs attached. */ 285 uintptr_t addr; 286 uint64_t naddr; 287 uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; 288 uint16_t tso_header_sz = 0; 289 uint16_t ehdr; 290 uint8_t cs_flags = 0; 291 uint64_t tso = 0; 292 uint16_t tso_segsz = 0; 293 #ifdef MLX5_PMD_SOFT_COUNTERS 294 uint32_t total_length = 0; 295 #endif 296 297 /* first_seg */ 298 buf = *pkts; 299 segs_n = buf->nb_segs; 300 /* 301 * Make sure there is enough room to store this packet and 302 * that one ring entry remains unused. 303 */ 304 assert(segs_n); 305 if (max_elts < segs_n) 306 break; 307 max_elts -= segs_n; 308 --segs_n; 309 if (unlikely(--max_wqe == 0)) 310 break; 311 wqe = (volatile struct mlx5_wqe_v *) 312 tx_mlx5_wqe(txq, txq->wqe_ci); 313 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 314 if (pkts_n - i > 1) 315 rte_prefetch0(*(pkts + 1)); 316 addr = rte_pktmbuf_mtod(buf, uintptr_t); 317 length = DATA_LEN(buf); 318 ehdr = (((uint8_t *)addr)[1] << 8) | 319 ((uint8_t *)addr)[0]; 320 #ifdef MLX5_PMD_SOFT_COUNTERS 321 total_length = length; 322 #endif 323 if (length < (MLX5_WQE_DWORD_SIZE + 2)) 324 break; 325 /* Update element. */ 326 (*txq->elts)[elts_head & elts_m] = buf; 327 /* Prefetch next buffer data. */ 328 if (pkts_n - i > 1) 329 rte_prefetch0( 330 rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); 331 /* Should we enable HW CKSUM offload */ 332 if (buf->ol_flags & 333 (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) { 334 const uint64_t is_tunneled = buf->ol_flags & 335 (PKT_TX_TUNNEL_GRE | 336 PKT_TX_TUNNEL_VXLAN); 337 338 if (is_tunneled && txq->tunnel_en) { 339 cs_flags = MLX5_ETH_WQE_L3_INNER_CSUM | 340 MLX5_ETH_WQE_L4_INNER_CSUM; 341 if (buf->ol_flags & PKT_TX_OUTER_IP_CKSUM) 342 cs_flags |= MLX5_ETH_WQE_L3_CSUM; 343 } else { 344 cs_flags = MLX5_ETH_WQE_L3_CSUM | 345 MLX5_ETH_WQE_L4_CSUM; 346 } 347 } 348 raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; 349 /* Replace the Ethernet type by the VLAN if necessary. */ 350 if (buf->ol_flags & PKT_TX_VLAN_PKT) { 351 uint32_t vlan = htonl(0x81000000 | buf->vlan_tci); 352 unsigned int len = 2 * ETHER_ADDR_LEN - 2; 353 354 addr += 2; 355 length -= 2; 356 /* Copy Destination and source mac address. */ 357 memcpy((uint8_t *)raw, ((uint8_t *)addr), len); 358 /* Copy VLAN. */ 359 memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan)); 360 /* Copy missing two bytes to end the DSeg. */ 361 memcpy((uint8_t *)raw + len + sizeof(vlan), 362 ((uint8_t *)addr) + len, 2); 363 addr += len + 2; 364 length -= (len + 2); 365 } else { 366 memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, 367 MLX5_WQE_DWORD_SIZE); 368 length -= pkt_inline_sz; 369 addr += pkt_inline_sz; 370 } 371 if (txq->tso_en) { 372 tso = buf->ol_flags & PKT_TX_TCP_SEG; 373 if (tso) { 374 uintptr_t end = (uintptr_t) 375 (((uintptr_t)txq->wqes) + 376 (1 << txq->wqe_n) * 377 MLX5_WQE_SIZE); 378 unsigned int copy_b; 379 uint8_t vlan_sz = (buf->ol_flags & 380 PKT_TX_VLAN_PKT) ? 4 : 0; 381 const uint64_t is_tunneled = 382 buf->ol_flags & 383 (PKT_TX_TUNNEL_GRE | 384 PKT_TX_TUNNEL_VXLAN); 385 386 tso_header_sz = buf->l2_len + vlan_sz + 387 buf->l3_len + buf->l4_len; 388 tso_segsz = buf->tso_segsz; 389 390 if (is_tunneled && txq->tunnel_en) { 391 tso_header_sz += buf->outer_l2_len + 392 buf->outer_l3_len; 393 cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; 394 } else { 395 cs_flags |= MLX5_ETH_WQE_L4_CSUM; 396 } 397 if (unlikely(tso_header_sz > 398 MLX5_MAX_TSO_HEADER)) 399 break; 400 copy_b = tso_header_sz - pkt_inline_sz; 401 /* First seg must contain all headers. */ 402 assert(copy_b <= length); 403 raw += MLX5_WQE_DWORD_SIZE; 404 if (copy_b && 405 ((end - (uintptr_t)raw) > copy_b)) { 406 uint16_t n = (MLX5_WQE_DS(copy_b) - 407 1 + 3) / 4; 408 409 if (unlikely(max_wqe < n)) 410 break; 411 max_wqe -= n; 412 rte_memcpy((void *)raw, 413 (void *)addr, copy_b); 414 addr += copy_b; 415 length -= copy_b; 416 pkt_inline_sz += copy_b; 417 /* 418 * Another DWORD will be added 419 * in the inline part. 420 */ 421 raw += MLX5_WQE_DS(copy_b) * 422 MLX5_WQE_DWORD_SIZE - 423 MLX5_WQE_DWORD_SIZE; 424 } else { 425 /* NOP WQE. */ 426 wqe->ctrl = (rte_v128u32_t){ 427 htonl(txq->wqe_ci << 8), 428 htonl(txq->qp_num_8s | 1), 429 0, 430 0, 431 }; 432 ds = 1; 433 total_length = 0; 434 k++; 435 goto next_wqe; 436 } 437 } 438 } 439 /* Inline if enough room. */ 440 if (inline_en || tso) { 441 uintptr_t end = (uintptr_t) 442 (((uintptr_t)txq->wqes) + 443 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 444 unsigned int inline_room = max_inline * 445 RTE_CACHE_LINE_SIZE - 446 (pkt_inline_sz - 2); 447 uintptr_t addr_end = (addr + inline_room) & 448 ~(RTE_CACHE_LINE_SIZE - 1); 449 unsigned int copy_b = (addr_end > addr) ? 450 RTE_MIN((addr_end - addr), length) : 451 0; 452 453 raw += MLX5_WQE_DWORD_SIZE; 454 if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { 455 /* 456 * One Dseg remains in the current WQE. To 457 * keep the computation positive, it is 458 * removed after the bytes to Dseg conversion. 459 */ 460 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 461 462 if (unlikely(max_wqe < n)) 463 break; 464 max_wqe -= n; 465 if (tso) { 466 uint32_t inl = 467 htonl(copy_b | MLX5_INLINE_SEG); 468 469 pkt_inline_sz = 470 MLX5_WQE_DS(tso_header_sz) * 471 MLX5_WQE_DWORD_SIZE; 472 rte_memcpy((void *)raw, 473 (void *)&inl, sizeof(inl)); 474 raw += sizeof(inl); 475 pkt_inline_sz += sizeof(inl); 476 } 477 rte_memcpy((void *)raw, (void *)addr, copy_b); 478 addr += copy_b; 479 length -= copy_b; 480 pkt_inline_sz += copy_b; 481 } 482 /* 483 * 2 DWORDs consumed by the WQE header + ETH segment + 484 * the size of the inline part of the packet. 485 */ 486 ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); 487 if (length > 0) { 488 if (ds % (MLX5_WQE_SIZE / 489 MLX5_WQE_DWORD_SIZE) == 0) { 490 if (unlikely(--max_wqe == 0)) 491 break; 492 dseg = (volatile rte_v128u32_t *) 493 tx_mlx5_wqe(txq, txq->wqe_ci + 494 ds / 4); 495 } else { 496 dseg = (volatile rte_v128u32_t *) 497 ((uintptr_t)wqe + 498 (ds * MLX5_WQE_DWORD_SIZE)); 499 } 500 goto use_dseg; 501 } else if (!segs_n) { 502 goto next_pkt; 503 } else { 504 /* dseg will be advance as part of next_seg */ 505 dseg = (volatile rte_v128u32_t *) 506 ((uintptr_t)wqe + 507 ((ds - 1) * MLX5_WQE_DWORD_SIZE)); 508 goto next_seg; 509 } 510 } else { 511 /* 512 * No inline has been done in the packet, only the 513 * Ethernet Header as been stored. 514 */ 515 dseg = (volatile rte_v128u32_t *) 516 ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); 517 ds = 3; 518 use_dseg: 519 /* Add the remaining packet as a simple ds. */ 520 naddr = htonll(addr); 521 *dseg = (rte_v128u32_t){ 522 htonl(length), 523 mlx5_tx_mb2mr(txq, buf), 524 naddr, 525 naddr >> 32, 526 }; 527 ++ds; 528 if (!segs_n) 529 goto next_pkt; 530 } 531 next_seg: 532 assert(buf); 533 assert(ds); 534 assert(wqe); 535 /* 536 * Spill on next WQE when the current one does not have 537 * enough room left. Size of WQE must a be a multiple 538 * of data segment size. 539 */ 540 assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); 541 if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { 542 if (unlikely(--max_wqe == 0)) 543 break; 544 dseg = (volatile rte_v128u32_t *) 545 tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4); 546 rte_prefetch0(tx_mlx5_wqe(txq, 547 txq->wqe_ci + ds / 4 + 1)); 548 } else { 549 ++dseg; 550 } 551 ++ds; 552 buf = buf->next; 553 assert(buf); 554 length = DATA_LEN(buf); 555 #ifdef MLX5_PMD_SOFT_COUNTERS 556 total_length += length; 557 #endif 558 /* Store segment information. */ 559 naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t)); 560 *dseg = (rte_v128u32_t){ 561 htonl(length), 562 mlx5_tx_mb2mr(txq, buf), 563 naddr, 564 naddr >> 32, 565 }; 566 (*txq->elts)[++elts_head & elts_m] = buf; 567 ++sg; 568 /* Advance counter only if all segs are successfully posted. */ 569 if (sg < segs_n) 570 goto next_seg; 571 else 572 j += sg; 573 next_pkt: 574 ++elts_head; 575 ++pkts; 576 ++i; 577 /* Initialize known and common part of the WQE structure. */ 578 if (tso) { 579 wqe->ctrl = (rte_v128u32_t){ 580 htonl((txq->wqe_ci << 8) | MLX5_OPCODE_TSO), 581 htonl(txq->qp_num_8s | ds), 582 0, 583 0, 584 }; 585 wqe->eseg = (rte_v128u32_t){ 586 0, 587 cs_flags | (htons(tso_segsz) << 16), 588 0, 589 (ehdr << 16) | htons(tso_header_sz), 590 }; 591 } else { 592 wqe->ctrl = (rte_v128u32_t){ 593 htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND), 594 htonl(txq->qp_num_8s | ds), 595 0, 596 0, 597 }; 598 wqe->eseg = (rte_v128u32_t){ 599 0, 600 cs_flags, 601 0, 602 (ehdr << 16) | htons(pkt_inline_sz), 603 }; 604 } 605 next_wqe: 606 txq->wqe_ci += (ds + 3) / 4; 607 /* Save the last successful WQE for completion request */ 608 last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe; 609 #ifdef MLX5_PMD_SOFT_COUNTERS 610 /* Increment sent bytes counter. */ 611 txq->stats.obytes += total_length; 612 #endif 613 } while (i < pkts_n); 614 /* Take a shortcut if nothing must be sent. */ 615 if (unlikely((i + k) == 0)) 616 return 0; 617 txq->elts_head += (i + j); 618 /* Check whether completion threshold has been reached. */ 619 comp = txq->elts_comp + i + j + k; 620 if (comp >= MLX5_TX_COMP_THRESH) { 621 /* Request completion on last WQE. */ 622 last_wqe->ctrl2 = htonl(8); 623 /* Save elts_head in unused "immediate" field of WQE. */ 624 last_wqe->ctrl3 = txq->elts_head; 625 txq->elts_comp = 0; 626 } else { 627 txq->elts_comp = comp; 628 } 629 #ifdef MLX5_PMD_SOFT_COUNTERS 630 /* Increment sent packets counter. */ 631 txq->stats.opackets += i; 632 #endif 633 /* Ring QP doorbell. */ 634 mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe); 635 return i; 636 } 637 638 /** 639 * Open a MPW session. 640 * 641 * @param txq 642 * Pointer to TX queue structure. 643 * @param mpw 644 * Pointer to MPW session structure. 645 * @param length 646 * Packet length. 647 */ 648 static inline void 649 mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) 650 { 651 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 652 volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = 653 (volatile struct mlx5_wqe_data_seg (*)[]) 654 tx_mlx5_wqe(txq, idx + 1); 655 656 mpw->state = MLX5_MPW_STATE_OPENED; 657 mpw->pkts_n = 0; 658 mpw->len = length; 659 mpw->total_len = 0; 660 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 661 mpw->wqe->eseg.mss = htons(length); 662 mpw->wqe->eseg.inline_hdr_sz = 0; 663 mpw->wqe->eseg.rsvd0 = 0; 664 mpw->wqe->eseg.rsvd1 = 0; 665 mpw->wqe->eseg.rsvd2 = 0; 666 mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) | 667 (txq->wqe_ci << 8) | MLX5_OPCODE_TSO); 668 mpw->wqe->ctrl[2] = 0; 669 mpw->wqe->ctrl[3] = 0; 670 mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *) 671 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 672 mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *) 673 (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE)); 674 mpw->data.dseg[2] = &(*dseg)[0]; 675 mpw->data.dseg[3] = &(*dseg)[1]; 676 mpw->data.dseg[4] = &(*dseg)[2]; 677 } 678 679 /** 680 * Close a MPW session. 681 * 682 * @param txq 683 * Pointer to TX queue structure. 684 * @param mpw 685 * Pointer to MPW session structure. 686 */ 687 static inline void 688 mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw) 689 { 690 unsigned int num = mpw->pkts_n; 691 692 /* 693 * Store size in multiple of 16 bytes. Control and Ethernet segments 694 * count as 2. 695 */ 696 mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num)); 697 mpw->state = MLX5_MPW_STATE_CLOSED; 698 if (num < 3) 699 ++txq->wqe_ci; 700 else 701 txq->wqe_ci += 2; 702 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 703 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 704 } 705 706 /** 707 * DPDK callback for TX with MPW support. 708 * 709 * @param dpdk_txq 710 * Generic pointer to TX queue structure. 711 * @param[in] pkts 712 * Packets to transmit. 713 * @param pkts_n 714 * Number of packets in array. 715 * 716 * @return 717 * Number of packets successfully transmitted (<= pkts_n). 718 */ 719 uint16_t 720 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 721 { 722 struct txq *txq = (struct txq *)dpdk_txq; 723 uint16_t elts_head = txq->elts_head; 724 const uint16_t elts_n = 1 << txq->elts_n; 725 const uint16_t elts_m = elts_n - 1; 726 unsigned int i = 0; 727 unsigned int j = 0; 728 uint16_t max_elts; 729 uint16_t max_wqe; 730 unsigned int comp; 731 struct mlx5_mpw mpw = { 732 .state = MLX5_MPW_STATE_CLOSED, 733 }; 734 735 if (unlikely(!pkts_n)) 736 return 0; 737 /* Prefetch first packet cacheline. */ 738 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 739 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 740 /* Start processing. */ 741 mlx5_tx_complete(txq); 742 max_elts = (elts_n - (elts_head - txq->elts_tail)); 743 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 744 if (unlikely(!max_wqe)) 745 return 0; 746 do { 747 struct rte_mbuf *buf = *(pkts++); 748 uint32_t length; 749 unsigned int segs_n = buf->nb_segs; 750 uint32_t cs_flags = 0; 751 752 /* 753 * Make sure there is enough room to store this packet and 754 * that one ring entry remains unused. 755 */ 756 assert(segs_n); 757 if (max_elts < segs_n) 758 break; 759 /* Do not bother with large packets MPW cannot handle. */ 760 if (segs_n > MLX5_MPW_DSEG_MAX) 761 break; 762 max_elts -= segs_n; 763 --pkts_n; 764 /* Should we enable HW CKSUM offload */ 765 if (buf->ol_flags & 766 (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) 767 cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 768 /* Retrieve packet information. */ 769 length = PKT_LEN(buf); 770 assert(length); 771 /* Start new session if packet differs. */ 772 if ((mpw.state == MLX5_MPW_STATE_OPENED) && 773 ((mpw.len != length) || 774 (segs_n != 1) || 775 (mpw.wqe->eseg.cs_flags != cs_flags))) 776 mlx5_mpw_close(txq, &mpw); 777 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 778 /* 779 * Multi-Packet WQE consumes at most two WQE. 780 * mlx5_mpw_new() expects to be able to use such 781 * resources. 782 */ 783 if (unlikely(max_wqe < 2)) 784 break; 785 max_wqe -= 2; 786 mlx5_mpw_new(txq, &mpw, length); 787 mpw.wqe->eseg.cs_flags = cs_flags; 788 } 789 /* Multi-segment packets must be alone in their MPW. */ 790 assert((segs_n == 1) || (mpw.pkts_n == 0)); 791 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 792 length = 0; 793 #endif 794 do { 795 volatile struct mlx5_wqe_data_seg *dseg; 796 uintptr_t addr; 797 798 assert(buf); 799 (*txq->elts)[elts_head++ & elts_m] = buf; 800 dseg = mpw.data.dseg[mpw.pkts_n]; 801 addr = rte_pktmbuf_mtod(buf, uintptr_t); 802 *dseg = (struct mlx5_wqe_data_seg){ 803 .byte_count = htonl(DATA_LEN(buf)), 804 .lkey = mlx5_tx_mb2mr(txq, buf), 805 .addr = htonll(addr), 806 }; 807 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 808 length += DATA_LEN(buf); 809 #endif 810 buf = buf->next; 811 ++mpw.pkts_n; 812 ++j; 813 } while (--segs_n); 814 assert(length == mpw.len); 815 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 816 mlx5_mpw_close(txq, &mpw); 817 #ifdef MLX5_PMD_SOFT_COUNTERS 818 /* Increment sent bytes counter. */ 819 txq->stats.obytes += length; 820 #endif 821 ++i; 822 } while (pkts_n); 823 /* Take a shortcut if nothing must be sent. */ 824 if (unlikely(i == 0)) 825 return 0; 826 /* Check whether completion threshold has been reached. */ 827 /* "j" includes both packets and segments. */ 828 comp = txq->elts_comp + j; 829 if (comp >= MLX5_TX_COMP_THRESH) { 830 volatile struct mlx5_wqe *wqe = mpw.wqe; 831 832 /* Request completion on last WQE. */ 833 wqe->ctrl[2] = htonl(8); 834 /* Save elts_head in unused "immediate" field of WQE. */ 835 wqe->ctrl[3] = elts_head; 836 txq->elts_comp = 0; 837 } else { 838 txq->elts_comp = comp; 839 } 840 #ifdef MLX5_PMD_SOFT_COUNTERS 841 /* Increment sent packets counter. */ 842 txq->stats.opackets += i; 843 #endif 844 /* Ring QP doorbell. */ 845 if (mpw.state == MLX5_MPW_STATE_OPENED) 846 mlx5_mpw_close(txq, &mpw); 847 mlx5_tx_dbrec(txq, mpw.wqe); 848 txq->elts_head = elts_head; 849 return i; 850 } 851 852 /** 853 * Open a MPW inline session. 854 * 855 * @param txq 856 * Pointer to TX queue structure. 857 * @param mpw 858 * Pointer to MPW session structure. 859 * @param length 860 * Packet length. 861 */ 862 static inline void 863 mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length) 864 { 865 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 866 struct mlx5_wqe_inl_small *inl; 867 868 mpw->state = MLX5_MPW_INL_STATE_OPENED; 869 mpw->pkts_n = 0; 870 mpw->len = length; 871 mpw->total_len = 0; 872 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 873 mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) | 874 (txq->wqe_ci << 8) | 875 MLX5_OPCODE_TSO); 876 mpw->wqe->ctrl[2] = 0; 877 mpw->wqe->ctrl[3] = 0; 878 mpw->wqe->eseg.mss = htons(length); 879 mpw->wqe->eseg.inline_hdr_sz = 0; 880 mpw->wqe->eseg.cs_flags = 0; 881 mpw->wqe->eseg.rsvd0 = 0; 882 mpw->wqe->eseg.rsvd1 = 0; 883 mpw->wqe->eseg.rsvd2 = 0; 884 inl = (struct mlx5_wqe_inl_small *) 885 (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); 886 mpw->data.raw = (uint8_t *)&inl->raw; 887 } 888 889 /** 890 * Close a MPW inline session. 891 * 892 * @param txq 893 * Pointer to TX queue structure. 894 * @param mpw 895 * Pointer to MPW session structure. 896 */ 897 static inline void 898 mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw) 899 { 900 unsigned int size; 901 struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *) 902 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 903 904 size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len; 905 /* 906 * Store size in multiple of 16 bytes. Control and Ethernet segments 907 * count as 2. 908 */ 909 mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size)); 910 mpw->state = MLX5_MPW_STATE_CLOSED; 911 inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG); 912 txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 913 } 914 915 /** 916 * DPDK callback for TX with MPW inline support. 917 * 918 * @param dpdk_txq 919 * Generic pointer to TX queue structure. 920 * @param[in] pkts 921 * Packets to transmit. 922 * @param pkts_n 923 * Number of packets in array. 924 * 925 * @return 926 * Number of packets successfully transmitted (<= pkts_n). 927 */ 928 uint16_t 929 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, 930 uint16_t pkts_n) 931 { 932 struct txq *txq = (struct txq *)dpdk_txq; 933 uint16_t elts_head = txq->elts_head; 934 const uint16_t elts_n = 1 << txq->elts_n; 935 const uint16_t elts_m = elts_n - 1; 936 unsigned int i = 0; 937 unsigned int j = 0; 938 uint16_t max_elts; 939 uint16_t max_wqe; 940 unsigned int comp; 941 unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; 942 struct mlx5_mpw mpw = { 943 .state = MLX5_MPW_STATE_CLOSED, 944 }; 945 /* 946 * Compute the maximum number of WQE which can be consumed by inline 947 * code. 948 * - 2 DSEG for: 949 * - 1 control segment, 950 * - 1 Ethernet segment, 951 * - N Dseg from the inline request. 952 */ 953 const unsigned int wqe_inl_n = 954 ((2 * MLX5_WQE_DWORD_SIZE + 955 txq->max_inline * RTE_CACHE_LINE_SIZE) + 956 RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; 957 958 if (unlikely(!pkts_n)) 959 return 0; 960 /* Prefetch first packet cacheline. */ 961 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 962 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 963 /* Start processing. */ 964 mlx5_tx_complete(txq); 965 max_elts = (elts_n - (elts_head - txq->elts_tail)); 966 do { 967 struct rte_mbuf *buf = *(pkts++); 968 uintptr_t addr; 969 uint32_t length; 970 unsigned int segs_n = buf->nb_segs; 971 uint32_t cs_flags = 0; 972 973 /* 974 * Make sure there is enough room to store this packet and 975 * that one ring entry remains unused. 976 */ 977 assert(segs_n); 978 if (max_elts < segs_n) 979 break; 980 /* Do not bother with large packets MPW cannot handle. */ 981 if (segs_n > MLX5_MPW_DSEG_MAX) 982 break; 983 max_elts -= segs_n; 984 --pkts_n; 985 /* 986 * Compute max_wqe in case less WQE were consumed in previous 987 * iteration. 988 */ 989 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 990 /* Should we enable HW CKSUM offload */ 991 if (buf->ol_flags & 992 (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) 993 cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 994 /* Retrieve packet information. */ 995 length = PKT_LEN(buf); 996 /* Start new session if packet differs. */ 997 if (mpw.state == MLX5_MPW_STATE_OPENED) { 998 if ((mpw.len != length) || 999 (segs_n != 1) || 1000 (mpw.wqe->eseg.cs_flags != cs_flags)) 1001 mlx5_mpw_close(txq, &mpw); 1002 } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { 1003 if ((mpw.len != length) || 1004 (segs_n != 1) || 1005 (length > inline_room) || 1006 (mpw.wqe->eseg.cs_flags != cs_flags)) { 1007 mlx5_mpw_inline_close(txq, &mpw); 1008 inline_room = 1009 txq->max_inline * RTE_CACHE_LINE_SIZE; 1010 } 1011 } 1012 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 1013 if ((segs_n != 1) || 1014 (length > inline_room)) { 1015 /* 1016 * Multi-Packet WQE consumes at most two WQE. 1017 * mlx5_mpw_new() expects to be able to use 1018 * such resources. 1019 */ 1020 if (unlikely(max_wqe < 2)) 1021 break; 1022 max_wqe -= 2; 1023 mlx5_mpw_new(txq, &mpw, length); 1024 mpw.wqe->eseg.cs_flags = cs_flags; 1025 } else { 1026 if (unlikely(max_wqe < wqe_inl_n)) 1027 break; 1028 max_wqe -= wqe_inl_n; 1029 mlx5_mpw_inline_new(txq, &mpw, length); 1030 mpw.wqe->eseg.cs_flags = cs_flags; 1031 } 1032 } 1033 /* Multi-segment packets must be alone in their MPW. */ 1034 assert((segs_n == 1) || (mpw.pkts_n == 0)); 1035 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1036 assert(inline_room == 1037 txq->max_inline * RTE_CACHE_LINE_SIZE); 1038 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1039 length = 0; 1040 #endif 1041 do { 1042 volatile struct mlx5_wqe_data_seg *dseg; 1043 1044 assert(buf); 1045 (*txq->elts)[elts_head++ & elts_m] = buf; 1046 dseg = mpw.data.dseg[mpw.pkts_n]; 1047 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1048 *dseg = (struct mlx5_wqe_data_seg){ 1049 .byte_count = htonl(DATA_LEN(buf)), 1050 .lkey = mlx5_tx_mb2mr(txq, buf), 1051 .addr = htonll(addr), 1052 }; 1053 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1054 length += DATA_LEN(buf); 1055 #endif 1056 buf = buf->next; 1057 ++mpw.pkts_n; 1058 ++j; 1059 } while (--segs_n); 1060 assert(length == mpw.len); 1061 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 1062 mlx5_mpw_close(txq, &mpw); 1063 } else { 1064 unsigned int max; 1065 1066 assert(mpw.state == MLX5_MPW_INL_STATE_OPENED); 1067 assert(length <= inline_room); 1068 assert(length == DATA_LEN(buf)); 1069 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1070 (*txq->elts)[elts_head++ & elts_m] = buf; 1071 /* Maximum number of bytes before wrapping. */ 1072 max = ((((uintptr_t)(txq->wqes)) + 1073 (1 << txq->wqe_n) * 1074 MLX5_WQE_SIZE) - 1075 (uintptr_t)mpw.data.raw); 1076 if (length > max) { 1077 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1078 (void *)addr, 1079 max); 1080 mpw.data.raw = (volatile void *)txq->wqes; 1081 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1082 (void *)(addr + max), 1083 length - max); 1084 mpw.data.raw += length - max; 1085 } else { 1086 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1087 (void *)addr, 1088 length); 1089 1090 if (length == max) 1091 mpw.data.raw = 1092 (volatile void *)txq->wqes; 1093 else 1094 mpw.data.raw += length; 1095 } 1096 ++mpw.pkts_n; 1097 mpw.total_len += length; 1098 ++j; 1099 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { 1100 mlx5_mpw_inline_close(txq, &mpw); 1101 inline_room = 1102 txq->max_inline * RTE_CACHE_LINE_SIZE; 1103 } else { 1104 inline_room -= length; 1105 } 1106 } 1107 #ifdef MLX5_PMD_SOFT_COUNTERS 1108 /* Increment sent bytes counter. */ 1109 txq->stats.obytes += length; 1110 #endif 1111 ++i; 1112 } while (pkts_n); 1113 /* Take a shortcut if nothing must be sent. */ 1114 if (unlikely(i == 0)) 1115 return 0; 1116 /* Check whether completion threshold has been reached. */ 1117 /* "j" includes both packets and segments. */ 1118 comp = txq->elts_comp + j; 1119 if (comp >= MLX5_TX_COMP_THRESH) { 1120 volatile struct mlx5_wqe *wqe = mpw.wqe; 1121 1122 /* Request completion on last WQE. */ 1123 wqe->ctrl[2] = htonl(8); 1124 /* Save elts_head in unused "immediate" field of WQE. */ 1125 wqe->ctrl[3] = elts_head; 1126 txq->elts_comp = 0; 1127 } else { 1128 txq->elts_comp = comp; 1129 } 1130 #ifdef MLX5_PMD_SOFT_COUNTERS 1131 /* Increment sent packets counter. */ 1132 txq->stats.opackets += i; 1133 #endif 1134 /* Ring QP doorbell. */ 1135 if (mpw.state == MLX5_MPW_INL_STATE_OPENED) 1136 mlx5_mpw_inline_close(txq, &mpw); 1137 else if (mpw.state == MLX5_MPW_STATE_OPENED) 1138 mlx5_mpw_close(txq, &mpw); 1139 mlx5_tx_dbrec(txq, mpw.wqe); 1140 txq->elts_head = elts_head; 1141 return i; 1142 } 1143 1144 /** 1145 * Open an Enhanced MPW session. 1146 * 1147 * @param txq 1148 * Pointer to TX queue structure. 1149 * @param mpw 1150 * Pointer to MPW session structure. 1151 * @param length 1152 * Packet length. 1153 */ 1154 static inline void 1155 mlx5_empw_new(struct txq *txq, struct mlx5_mpw *mpw, int padding) 1156 { 1157 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1158 1159 mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED; 1160 mpw->pkts_n = 0; 1161 mpw->total_len = sizeof(struct mlx5_wqe); 1162 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1163 mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_ENHANCED_MPSW << 24) | 1164 (txq->wqe_ci << 8) | 1165 MLX5_OPCODE_ENHANCED_MPSW); 1166 mpw->wqe->ctrl[2] = 0; 1167 mpw->wqe->ctrl[3] = 0; 1168 memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE); 1169 if (unlikely(padding)) { 1170 uintptr_t addr = (uintptr_t)(mpw->wqe + 1); 1171 1172 /* Pad the first 2 DWORDs with zero-length inline header. */ 1173 *(volatile uint32_t *)addr = htonl(MLX5_INLINE_SEG); 1174 *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) = 1175 htonl(MLX5_INLINE_SEG); 1176 mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE; 1177 /* Start from the next WQEBB. */ 1178 mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1)); 1179 } else { 1180 mpw->data.raw = (volatile void *)(mpw->wqe + 1); 1181 } 1182 } 1183 1184 /** 1185 * Close an Enhanced MPW session. 1186 * 1187 * @param txq 1188 * Pointer to TX queue structure. 1189 * @param mpw 1190 * Pointer to MPW session structure. 1191 * 1192 * @return 1193 * Number of consumed WQEs. 1194 */ 1195 static inline uint16_t 1196 mlx5_empw_close(struct txq *txq, struct mlx5_mpw *mpw) 1197 { 1198 uint16_t ret; 1199 1200 /* Store size in multiple of 16 bytes. Control and Ethernet segments 1201 * count as 2. 1202 */ 1203 mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(mpw->total_len)); 1204 mpw->state = MLX5_MPW_STATE_CLOSED; 1205 ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1206 txq->wqe_ci += ret; 1207 return ret; 1208 } 1209 1210 /** 1211 * DPDK callback for TX with Enhanced MPW support. 1212 * 1213 * @param dpdk_txq 1214 * Generic pointer to TX queue structure. 1215 * @param[in] pkts 1216 * Packets to transmit. 1217 * @param pkts_n 1218 * Number of packets in array. 1219 * 1220 * @return 1221 * Number of packets successfully transmitted (<= pkts_n). 1222 */ 1223 uint16_t 1224 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1225 { 1226 struct txq *txq = (struct txq *)dpdk_txq; 1227 uint16_t elts_head = txq->elts_head; 1228 const uint16_t elts_n = 1 << txq->elts_n; 1229 const uint16_t elts_m = elts_n - 1; 1230 unsigned int i = 0; 1231 unsigned int j = 0; 1232 uint16_t max_elts; 1233 uint16_t max_wqe; 1234 unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE; 1235 unsigned int mpw_room = 0; 1236 unsigned int inl_pad = 0; 1237 uint32_t inl_hdr; 1238 struct mlx5_mpw mpw = { 1239 .state = MLX5_MPW_STATE_CLOSED, 1240 }; 1241 1242 if (unlikely(!pkts_n)) 1243 return 0; 1244 /* Start processing. */ 1245 mlx5_tx_complete(txq); 1246 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1247 /* A CQE slot must always be available. */ 1248 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 1249 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1250 if (unlikely(!max_wqe)) 1251 return 0; 1252 do { 1253 struct rte_mbuf *buf = *(pkts++); 1254 uintptr_t addr; 1255 uint64_t naddr; 1256 unsigned int n; 1257 unsigned int do_inline = 0; /* Whether inline is possible. */ 1258 uint32_t length; 1259 unsigned int segs_n = buf->nb_segs; 1260 uint32_t cs_flags = 0; 1261 1262 /* 1263 * Make sure there is enough room to store this packet and 1264 * that one ring entry remains unused. 1265 */ 1266 assert(segs_n); 1267 if (max_elts - j < segs_n) 1268 break; 1269 /* Do not bother with large packets MPW cannot handle. */ 1270 if (segs_n > MLX5_MPW_DSEG_MAX) 1271 break; 1272 /* Should we enable HW CKSUM offload. */ 1273 if (buf->ol_flags & 1274 (PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) 1275 cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM; 1276 /* Retrieve packet information. */ 1277 length = PKT_LEN(buf); 1278 /* Start new session if: 1279 * - multi-segment packet 1280 * - no space left even for a dseg 1281 * - next packet can be inlined with a new WQE 1282 * - cs_flag differs 1283 * It can't be MLX5_MPW_STATE_OPENED as always have a single 1284 * segmented packet. 1285 */ 1286 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) { 1287 if ((segs_n != 1) || 1288 (inl_pad + sizeof(struct mlx5_wqe_data_seg) > 1289 mpw_room) || 1290 (length <= txq->inline_max_packet_sz && 1291 inl_pad + sizeof(inl_hdr) + length > 1292 mpw_room) || 1293 (mpw.wqe->eseg.cs_flags != cs_flags)) 1294 max_wqe -= mlx5_empw_close(txq, &mpw); 1295 } 1296 if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) { 1297 if (unlikely(segs_n != 1)) { 1298 /* Fall back to legacy MPW. 1299 * A MPW session consumes 2 WQEs at most to 1300 * include MLX5_MPW_DSEG_MAX pointers. 1301 */ 1302 if (unlikely(max_wqe < 2)) 1303 break; 1304 mlx5_mpw_new(txq, &mpw, length); 1305 } else { 1306 /* In Enhanced MPW, inline as much as the budget 1307 * is allowed. The remaining space is to be 1308 * filled with dsegs. If the title WQEBB isn't 1309 * padded, it will have 2 dsegs there. 1310 */ 1311 mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX, 1312 (max_inline ? max_inline : 1313 pkts_n * MLX5_WQE_DWORD_SIZE) + 1314 MLX5_WQE_SIZE); 1315 if (unlikely(max_wqe * MLX5_WQE_SIZE < 1316 mpw_room)) 1317 break; 1318 /* Don't pad the title WQEBB to not waste WQ. */ 1319 mlx5_empw_new(txq, &mpw, 0); 1320 mpw_room -= mpw.total_len; 1321 inl_pad = 0; 1322 do_inline = 1323 length <= txq->inline_max_packet_sz && 1324 sizeof(inl_hdr) + length <= mpw_room && 1325 !txq->mpw_hdr_dseg; 1326 } 1327 mpw.wqe->eseg.cs_flags = cs_flags; 1328 } else { 1329 /* Evaluate whether the next packet can be inlined. 1330 * Inlininig is possible when: 1331 * - length is less than configured value 1332 * - length fits for remaining space 1333 * - not required to fill the title WQEBB with dsegs 1334 */ 1335 do_inline = 1336 length <= txq->inline_max_packet_sz && 1337 inl_pad + sizeof(inl_hdr) + length <= 1338 mpw_room && 1339 (!txq->mpw_hdr_dseg || 1340 mpw.total_len >= MLX5_WQE_SIZE); 1341 } 1342 /* Multi-segment packets must be alone in their MPW. */ 1343 assert((segs_n == 1) || (mpw.pkts_n == 0)); 1344 if (unlikely(mpw.state == MLX5_MPW_STATE_OPENED)) { 1345 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1346 length = 0; 1347 #endif 1348 do { 1349 volatile struct mlx5_wqe_data_seg *dseg; 1350 1351 assert(buf); 1352 (*txq->elts)[elts_head++ & elts_m] = buf; 1353 dseg = mpw.data.dseg[mpw.pkts_n]; 1354 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1355 *dseg = (struct mlx5_wqe_data_seg){ 1356 .byte_count = htonl(DATA_LEN(buf)), 1357 .lkey = mlx5_tx_mb2mr(txq, buf), 1358 .addr = htonll(addr), 1359 }; 1360 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1361 length += DATA_LEN(buf); 1362 #endif 1363 buf = buf->next; 1364 ++j; 1365 ++mpw.pkts_n; 1366 } while (--segs_n); 1367 /* A multi-segmented packet takes one MPW session. 1368 * TODO: Pack more multi-segmented packets if possible. 1369 */ 1370 mlx5_mpw_close(txq, &mpw); 1371 if (mpw.pkts_n < 3) 1372 max_wqe--; 1373 else 1374 max_wqe -= 2; 1375 } else if (do_inline) { 1376 /* Inline packet into WQE. */ 1377 unsigned int max; 1378 1379 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1380 assert(length == DATA_LEN(buf)); 1381 inl_hdr = htonl(length | MLX5_INLINE_SEG); 1382 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1383 mpw.data.raw = (volatile void *) 1384 ((uintptr_t)mpw.data.raw + inl_pad); 1385 max = tx_mlx5_wq_tailroom(txq, 1386 (void *)(uintptr_t)mpw.data.raw); 1387 /* Copy inline header. */ 1388 mpw.data.raw = (volatile void *) 1389 mlx5_copy_to_wq( 1390 (void *)(uintptr_t)mpw.data.raw, 1391 &inl_hdr, 1392 sizeof(inl_hdr), 1393 (void *)(uintptr_t)txq->wqes, 1394 max); 1395 max = tx_mlx5_wq_tailroom(txq, 1396 (void *)(uintptr_t)mpw.data.raw); 1397 /* Copy packet data. */ 1398 mpw.data.raw = (volatile void *) 1399 mlx5_copy_to_wq( 1400 (void *)(uintptr_t)mpw.data.raw, 1401 (void *)addr, 1402 length, 1403 (void *)(uintptr_t)txq->wqes, 1404 max); 1405 ++mpw.pkts_n; 1406 mpw.total_len += (inl_pad + sizeof(inl_hdr) + length); 1407 /* No need to get completion as the entire packet is 1408 * copied to WQ. Free the buf right away. 1409 */ 1410 rte_pktmbuf_free_seg(buf); 1411 mpw_room -= (inl_pad + sizeof(inl_hdr) + length); 1412 /* Add pad in the next packet if any. */ 1413 inl_pad = (((uintptr_t)mpw.data.raw + 1414 (MLX5_WQE_DWORD_SIZE - 1)) & 1415 ~(MLX5_WQE_DWORD_SIZE - 1)) - 1416 (uintptr_t)mpw.data.raw; 1417 } else { 1418 /* No inline. Load a dseg of packet pointer. */ 1419 volatile rte_v128u32_t *dseg; 1420 1421 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1422 assert((inl_pad + sizeof(*dseg)) <= mpw_room); 1423 assert(length == DATA_LEN(buf)); 1424 if (!tx_mlx5_wq_tailroom(txq, 1425 (void *)((uintptr_t)mpw.data.raw 1426 + inl_pad))) 1427 dseg = (volatile void *)txq->wqes; 1428 else 1429 dseg = (volatile void *) 1430 ((uintptr_t)mpw.data.raw + 1431 inl_pad); 1432 (*txq->elts)[elts_head++ & elts_m] = buf; 1433 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1434 for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++) 1435 rte_prefetch2((void *)(addr + 1436 n * RTE_CACHE_LINE_SIZE)); 1437 naddr = htonll(addr); 1438 *dseg = (rte_v128u32_t) { 1439 htonl(length), 1440 mlx5_tx_mb2mr(txq, buf), 1441 naddr, 1442 naddr >> 32, 1443 }; 1444 mpw.data.raw = (volatile void *)(dseg + 1); 1445 mpw.total_len += (inl_pad + sizeof(*dseg)); 1446 ++j; 1447 ++mpw.pkts_n; 1448 mpw_room -= (inl_pad + sizeof(*dseg)); 1449 inl_pad = 0; 1450 } 1451 #ifdef MLX5_PMD_SOFT_COUNTERS 1452 /* Increment sent bytes counter. */ 1453 txq->stats.obytes += length; 1454 #endif 1455 ++i; 1456 } while (i < pkts_n); 1457 /* Take a shortcut if nothing must be sent. */ 1458 if (unlikely(i == 0)) 1459 return 0; 1460 /* Check whether completion threshold has been reached. */ 1461 if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH || 1462 (uint16_t)(txq->wqe_ci - txq->mpw_comp) >= 1463 (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) { 1464 volatile struct mlx5_wqe *wqe = mpw.wqe; 1465 1466 /* Request completion on last WQE. */ 1467 wqe->ctrl[2] = htonl(8); 1468 /* Save elts_head in unused "immediate" field of WQE. */ 1469 wqe->ctrl[3] = elts_head; 1470 txq->elts_comp = 0; 1471 txq->mpw_comp = txq->wqe_ci; 1472 txq->cq_pi++; 1473 } else { 1474 txq->elts_comp += j; 1475 } 1476 #ifdef MLX5_PMD_SOFT_COUNTERS 1477 /* Increment sent packets counter. */ 1478 txq->stats.opackets += i; 1479 #endif 1480 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) 1481 mlx5_empw_close(txq, &mpw); 1482 else if (mpw.state == MLX5_MPW_STATE_OPENED) 1483 mlx5_mpw_close(txq, &mpw); 1484 /* Ring QP doorbell. */ 1485 mlx5_tx_dbrec(txq, mpw.wqe); 1486 txq->elts_head = elts_head; 1487 return i; 1488 } 1489 1490 /** 1491 * Translate RX completion flags to packet type. 1492 * 1493 * @param[in] cqe 1494 * Pointer to CQE. 1495 * 1496 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 1497 * 1498 * @return 1499 * Packet type for struct rte_mbuf. 1500 */ 1501 static inline uint32_t 1502 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) 1503 { 1504 uint32_t pkt_type; 1505 uint16_t flags = ntohs(cqe->hdr_type_etc); 1506 1507 if (cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) { 1508 pkt_type = 1509 TRANSPOSE(flags, 1510 MLX5_CQE_RX_IPV4_PACKET, 1511 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN) | 1512 TRANSPOSE(flags, 1513 MLX5_CQE_RX_IPV6_PACKET, 1514 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN); 1515 pkt_type |= ((cqe->pkt_info & MLX5_CQE_RX_OUTER_PACKET) ? 1516 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN : 1517 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN); 1518 } else { 1519 pkt_type = 1520 TRANSPOSE(flags, 1521 MLX5_CQE_L3_HDR_TYPE_IPV6, 1522 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN) | 1523 TRANSPOSE(flags, 1524 MLX5_CQE_L3_HDR_TYPE_IPV4, 1525 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN); 1526 } 1527 return pkt_type; 1528 } 1529 1530 /** 1531 * Get size of the next packet for a given CQE. For compressed CQEs, the 1532 * consumer index is updated only once all packets of the current one have 1533 * been processed. 1534 * 1535 * @param rxq 1536 * Pointer to RX queue. 1537 * @param cqe 1538 * CQE to process. 1539 * @param[out] rss_hash 1540 * Packet RSS Hash result. 1541 * 1542 * @return 1543 * Packet size in bytes (0 if there is none), -1 in case of completion 1544 * with error. 1545 */ 1546 static inline int 1547 mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe, 1548 uint16_t cqe_cnt, uint32_t *rss_hash) 1549 { 1550 struct rxq_zip *zip = &rxq->zip; 1551 uint16_t cqe_n = cqe_cnt + 1; 1552 int len = 0; 1553 uint16_t idx, end; 1554 1555 /* Process compressed data in the CQE and mini arrays. */ 1556 if (zip->ai) { 1557 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1558 (volatile struct mlx5_mini_cqe8 (*)[8]) 1559 (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt]); 1560 1561 len = ntohl((*mc)[zip->ai & 7].byte_cnt); 1562 *rss_hash = ntohl((*mc)[zip->ai & 7].rx_hash_result); 1563 if ((++zip->ai & 7) == 0) { 1564 /* Invalidate consumed CQEs */ 1565 idx = zip->ca; 1566 end = zip->na; 1567 while (idx != end) { 1568 (*rxq->cqes)[idx & cqe_cnt].op_own = 1569 MLX5_CQE_INVALIDATE; 1570 ++idx; 1571 } 1572 /* 1573 * Increment consumer index to skip the number of 1574 * CQEs consumed. Hardware leaves holes in the CQ 1575 * ring for software use. 1576 */ 1577 zip->ca = zip->na; 1578 zip->na += 8; 1579 } 1580 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1581 /* Invalidate the rest */ 1582 idx = zip->ca; 1583 end = zip->cq_ci; 1584 1585 while (idx != end) { 1586 (*rxq->cqes)[idx & cqe_cnt].op_own = 1587 MLX5_CQE_INVALIDATE; 1588 ++idx; 1589 } 1590 rxq->cq_ci = zip->cq_ci; 1591 zip->ai = 0; 1592 } 1593 /* No compressed data, get next CQE and verify if it is compressed. */ 1594 } else { 1595 int ret; 1596 int8_t op_own; 1597 1598 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1599 if (unlikely(ret == 1)) 1600 return 0; 1601 ++rxq->cq_ci; 1602 op_own = cqe->op_own; 1603 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1604 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1605 (volatile struct mlx5_mini_cqe8 (*)[8]) 1606 (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci & 1607 cqe_cnt]); 1608 1609 /* Fix endianness. */ 1610 zip->cqe_cnt = ntohl(cqe->byte_cnt); 1611 /* 1612 * Current mini array position is the one returned by 1613 * check_cqe64(). 1614 * 1615 * If completion comprises several mini arrays, as a 1616 * special case the second one is located 7 CQEs after 1617 * the initial CQE instead of 8 for subsequent ones. 1618 */ 1619 zip->ca = rxq->cq_ci; 1620 zip->na = zip->ca + 7; 1621 /* Compute the next non compressed CQE. */ 1622 --rxq->cq_ci; 1623 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1624 /* Get packet size to return. */ 1625 len = ntohl((*mc)[0].byte_cnt); 1626 *rss_hash = ntohl((*mc)[0].rx_hash_result); 1627 zip->ai = 1; 1628 /* Prefetch all the entries to be invalidated */ 1629 idx = zip->ca; 1630 end = zip->cq_ci; 1631 while (idx != end) { 1632 rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]); 1633 ++idx; 1634 } 1635 } else { 1636 len = ntohl(cqe->byte_cnt); 1637 *rss_hash = ntohl(cqe->rx_hash_res); 1638 } 1639 /* Error while receiving packet. */ 1640 if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR)) 1641 return -1; 1642 } 1643 return len; 1644 } 1645 1646 /** 1647 * Translate RX completion flags to offload flags. 1648 * 1649 * @param[in] rxq 1650 * Pointer to RX queue structure. 1651 * @param[in] cqe 1652 * Pointer to CQE. 1653 * 1654 * @return 1655 * Offload flags (ol_flags) for struct rte_mbuf. 1656 */ 1657 static inline uint32_t 1658 rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe) 1659 { 1660 uint32_t ol_flags = 0; 1661 uint16_t flags = ntohs(cqe->hdr_type_etc); 1662 1663 ol_flags = 1664 TRANSPOSE(flags, 1665 MLX5_CQE_RX_L3_HDR_VALID, 1666 PKT_RX_IP_CKSUM_GOOD) | 1667 TRANSPOSE(flags, 1668 MLX5_CQE_RX_L4_HDR_VALID, 1669 PKT_RX_L4_CKSUM_GOOD); 1670 if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) 1671 ol_flags |= 1672 TRANSPOSE(flags, 1673 MLX5_CQE_RX_L3_HDR_VALID, 1674 PKT_RX_IP_CKSUM_GOOD) | 1675 TRANSPOSE(flags, 1676 MLX5_CQE_RX_L4_HDR_VALID, 1677 PKT_RX_L4_CKSUM_GOOD); 1678 return ol_flags; 1679 } 1680 1681 /** 1682 * DPDK callback for RX. 1683 * 1684 * @param dpdk_rxq 1685 * Generic pointer to RX queue structure. 1686 * @param[out] pkts 1687 * Array to store received packets. 1688 * @param pkts_n 1689 * Maximum number of packets in array. 1690 * 1691 * @return 1692 * Number of packets successfully received (<= pkts_n). 1693 */ 1694 uint16_t 1695 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1696 { 1697 struct rxq *rxq = dpdk_rxq; 1698 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1699 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1700 const unsigned int sges_n = rxq->sges_n; 1701 struct rte_mbuf *pkt = NULL; 1702 struct rte_mbuf *seg = NULL; 1703 volatile struct mlx5_cqe *cqe = 1704 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1705 unsigned int i = 0; 1706 unsigned int rq_ci = rxq->rq_ci << sges_n; 1707 int len = 0; /* keep its value across iterations. */ 1708 1709 while (pkts_n) { 1710 unsigned int idx = rq_ci & wqe_cnt; 1711 volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx]; 1712 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1713 uint32_t rss_hash_res = 0; 1714 1715 if (pkt) 1716 NEXT(seg) = rep; 1717 seg = rep; 1718 rte_prefetch0(seg); 1719 rte_prefetch0(cqe); 1720 rte_prefetch0(wqe); 1721 rep = rte_mbuf_raw_alloc(rxq->mp); 1722 if (unlikely(rep == NULL)) { 1723 ++rxq->stats.rx_nombuf; 1724 if (!pkt) { 1725 /* 1726 * no buffers before we even started, 1727 * bail out silently. 1728 */ 1729 break; 1730 } 1731 while (pkt != seg) { 1732 assert(pkt != (*rxq->elts)[idx]); 1733 rep = NEXT(pkt); 1734 NEXT(pkt) = NULL; 1735 NB_SEGS(pkt) = 1; 1736 rte_mbuf_raw_free(pkt); 1737 pkt = rep; 1738 } 1739 break; 1740 } 1741 if (!pkt) { 1742 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1743 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, 1744 &rss_hash_res); 1745 if (!len) { 1746 rte_mbuf_raw_free(rep); 1747 break; 1748 } 1749 if (unlikely(len == -1)) { 1750 /* RX error, packet is likely too large. */ 1751 rte_mbuf_raw_free(rep); 1752 ++rxq->stats.idropped; 1753 goto skip; 1754 } 1755 pkt = seg; 1756 assert(len >= (rxq->crc_present << 2)); 1757 /* Update packet information. */ 1758 pkt->packet_type = 0; 1759 pkt->ol_flags = 0; 1760 if (rss_hash_res && rxq->rss_hash) { 1761 pkt->hash.rss = rss_hash_res; 1762 pkt->ol_flags = PKT_RX_RSS_HASH; 1763 } 1764 if (rxq->mark && 1765 MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1766 pkt->ol_flags |= PKT_RX_FDIR; 1767 if (cqe->sop_drop_qpn != 1768 htonl(MLX5_FLOW_MARK_DEFAULT)) { 1769 uint32_t mark = cqe->sop_drop_qpn; 1770 1771 pkt->ol_flags |= PKT_RX_FDIR_ID; 1772 pkt->hash.fdir.hi = 1773 mlx5_flow_mark_get(mark); 1774 } 1775 } 1776 if (rxq->csum | rxq->csum_l2tun) { 1777 pkt->packet_type = rxq_cq_to_pkt_type(cqe); 1778 pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe); 1779 } 1780 if (rxq->vlan_strip && 1781 (cqe->hdr_type_etc & 1782 htons(MLX5_CQE_VLAN_STRIPPED))) { 1783 pkt->ol_flags |= PKT_RX_VLAN_PKT | 1784 PKT_RX_VLAN_STRIPPED; 1785 pkt->vlan_tci = ntohs(cqe->vlan_info); 1786 } 1787 if (rxq->crc_present) 1788 len -= ETHER_CRC_LEN; 1789 PKT_LEN(pkt) = len; 1790 } 1791 DATA_LEN(rep) = DATA_LEN(seg); 1792 PKT_LEN(rep) = PKT_LEN(seg); 1793 SET_DATA_OFF(rep, DATA_OFF(seg)); 1794 PORT(rep) = PORT(seg); 1795 (*rxq->elts)[idx] = rep; 1796 /* 1797 * Fill NIC descriptor with the new buffer. The lkey and size 1798 * of the buffers are already known, only the buffer address 1799 * changes. 1800 */ 1801 wqe->addr = htonll(rte_pktmbuf_mtod(rep, uintptr_t)); 1802 if (len > DATA_LEN(seg)) { 1803 len -= DATA_LEN(seg); 1804 ++NB_SEGS(pkt); 1805 ++rq_ci; 1806 continue; 1807 } 1808 DATA_LEN(seg) = len; 1809 #ifdef MLX5_PMD_SOFT_COUNTERS 1810 /* Increment bytes counter. */ 1811 rxq->stats.ibytes += PKT_LEN(pkt); 1812 #endif 1813 /* Return packet. */ 1814 *(pkts++) = pkt; 1815 pkt = NULL; 1816 --pkts_n; 1817 ++i; 1818 skip: 1819 /* Align consumer index to the next stride. */ 1820 rq_ci >>= sges_n; 1821 ++rq_ci; 1822 rq_ci <<= sges_n; 1823 } 1824 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1825 return 0; 1826 /* Update the consumer index. */ 1827 rxq->rq_ci = rq_ci >> sges_n; 1828 rte_wmb(); 1829 *rxq->cq_db = htonl(rxq->cq_ci); 1830 rte_wmb(); 1831 *rxq->rq_db = htonl(rxq->rq_ci); 1832 #ifdef MLX5_PMD_SOFT_COUNTERS 1833 /* Increment packets counter. */ 1834 rxq->stats.ipackets += i; 1835 #endif 1836 return i; 1837 } 1838 1839 /** 1840 * Dummy DPDK callback for TX. 1841 * 1842 * This function is used to temporarily replace the real callback during 1843 * unsafe control operations on the queue, or in case of error. 1844 * 1845 * @param dpdk_txq 1846 * Generic pointer to TX queue structure. 1847 * @param[in] pkts 1848 * Packets to transmit. 1849 * @param pkts_n 1850 * Number of packets in array. 1851 * 1852 * @return 1853 * Number of packets successfully transmitted (<= pkts_n). 1854 */ 1855 uint16_t 1856 removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1857 { 1858 (void)dpdk_txq; 1859 (void)pkts; 1860 (void)pkts_n; 1861 return 0; 1862 } 1863 1864 /** 1865 * Dummy DPDK callback for RX. 1866 * 1867 * This function is used to temporarily replace the real callback during 1868 * unsafe control operations on the queue, or in case of error. 1869 * 1870 * @param dpdk_rxq 1871 * Generic pointer to RX queue structure. 1872 * @param[out] pkts 1873 * Array to store received packets. 1874 * @param pkts_n 1875 * Maximum number of packets in array. 1876 * 1877 * @return 1878 * Number of packets successfully received (<= pkts_n). 1879 */ 1880 uint16_t 1881 removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1882 { 1883 (void)dpdk_rxq; 1884 (void)pkts; 1885 (void)pkts_n; 1886 return 0; 1887 } 1888 1889 /* 1890 * Vectorized Rx/Tx routines are not compiled in when required vector 1891 * instructions are not supported on a target architecture. The following null 1892 * stubs are needed for linkage when those are not included outside of this file 1893 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1894 */ 1895 1896 uint16_t __attribute__((weak)) 1897 mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1898 { 1899 (void)dpdk_txq; 1900 (void)pkts; 1901 (void)pkts_n; 1902 return 0; 1903 } 1904 1905 uint16_t __attribute__((weak)) 1906 mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1907 { 1908 (void)dpdk_txq; 1909 (void)pkts; 1910 (void)pkts_n; 1911 return 0; 1912 } 1913 1914 uint16_t __attribute__((weak)) 1915 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1916 { 1917 (void)dpdk_rxq; 1918 (void)pkts; 1919 (void)pkts_n; 1920 return 0; 1921 } 1922 1923 int __attribute__((weak)) 1924 priv_check_raw_vec_tx_support(struct priv *priv) 1925 { 1926 (void)priv; 1927 return -ENOTSUP; 1928 } 1929 1930 int __attribute__((weak)) 1931 priv_check_vec_tx_support(struct priv *priv) 1932 { 1933 (void)priv; 1934 return -ENOTSUP; 1935 } 1936 1937 int __attribute__((weak)) 1938 rxq_check_vec_support(struct rxq *rxq) 1939 { 1940 (void)rxq; 1941 return -ENOTSUP; 1942 } 1943 1944 int __attribute__((weak)) 1945 priv_check_vec_rx_support(struct priv *priv) 1946 { 1947 (void)priv; 1948 return -ENOTSUP; 1949 } 1950 1951 void __attribute__((weak)) 1952 priv_prep_vec_rx_function(struct priv *priv) 1953 { 1954 (void)priv; 1955 } 1956