1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox. 4 */ 5 6 #include <assert.h> 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdlib.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #include <infiniband/mlx5dv.h> 18 #ifdef PEDANTIC 19 #pragma GCC diagnostic error "-Wpedantic" 20 #endif 21 22 #include <rte_mbuf.h> 23 #include <rte_mempool.h> 24 #include <rte_prefetch.h> 25 #include <rte_common.h> 26 #include <rte_branch_prediction.h> 27 #include <rte_ether.h> 28 29 #include "mlx5.h" 30 #include "mlx5_utils.h" 31 #include "mlx5_rxtx.h" 32 #include "mlx5_autoconf.h" 33 #include "mlx5_defs.h" 34 #include "mlx5_prm.h" 35 36 static __rte_always_inline uint32_t 37 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe); 38 39 static __rte_always_inline int 40 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 41 uint16_t cqe_cnt, uint32_t *rss_hash); 42 43 static __rte_always_inline uint32_t 44 rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 45 46 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 47 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 48 }; 49 50 /** 51 * Build a table to translate Rx completion flags to packet type. 52 * 53 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 54 */ 55 void 56 mlx5_set_ptype_table(void) 57 { 58 unsigned int i; 59 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 60 61 /* Last entry must not be overwritten, reserved for errored packet. */ 62 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 63 (*p)[i] = RTE_PTYPE_UNKNOWN; 64 /* 65 * The index to the array should have: 66 * bit[1:0] = l3_hdr_type 67 * bit[4:2] = l4_hdr_type 68 * bit[5] = ip_frag 69 * bit[6] = tunneled 70 * bit[7] = outer_l3_type 71 */ 72 /* L2 */ 73 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 74 /* L3 */ 75 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 76 RTE_PTYPE_L4_NONFRAG; 77 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 78 RTE_PTYPE_L4_NONFRAG; 79 /* Fragmented */ 80 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 81 RTE_PTYPE_L4_FRAG; 82 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 83 RTE_PTYPE_L4_FRAG; 84 /* TCP */ 85 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 86 RTE_PTYPE_L4_TCP; 87 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 88 RTE_PTYPE_L4_TCP; 89 /* UDP */ 90 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 91 RTE_PTYPE_L4_UDP; 92 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 93 RTE_PTYPE_L4_UDP; 94 /* Repeat with outer_l3_type being set. Just in case. */ 95 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 96 RTE_PTYPE_L4_NONFRAG; 97 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 98 RTE_PTYPE_L4_NONFRAG; 99 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 100 RTE_PTYPE_L4_FRAG; 101 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 102 RTE_PTYPE_L4_FRAG; 103 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 104 RTE_PTYPE_L4_TCP; 105 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 106 RTE_PTYPE_L4_TCP; 107 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 108 RTE_PTYPE_L4_UDP; 109 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 110 RTE_PTYPE_L4_UDP; 111 /* Tunneled - L3 */ 112 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 113 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 114 RTE_PTYPE_INNER_L4_NONFRAG; 115 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 116 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 117 RTE_PTYPE_INNER_L4_NONFRAG; 118 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 119 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 120 RTE_PTYPE_INNER_L4_NONFRAG; 121 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 122 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 123 RTE_PTYPE_INNER_L4_NONFRAG; 124 /* Tunneled - Fragmented */ 125 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 126 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 127 RTE_PTYPE_INNER_L4_FRAG; 128 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 129 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 130 RTE_PTYPE_INNER_L4_FRAG; 131 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 132 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 133 RTE_PTYPE_INNER_L4_FRAG; 134 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 135 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 136 RTE_PTYPE_INNER_L4_FRAG; 137 /* Tunneled - TCP */ 138 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 139 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 140 RTE_PTYPE_INNER_L4_TCP; 141 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 142 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 143 RTE_PTYPE_INNER_L4_TCP; 144 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 145 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 146 RTE_PTYPE_INNER_L4_TCP; 147 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 148 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 149 RTE_PTYPE_INNER_L4_TCP; 150 /* Tunneled - UDP */ 151 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 152 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 153 RTE_PTYPE_INNER_L4_UDP; 154 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 155 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 156 RTE_PTYPE_INNER_L4_UDP; 157 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 158 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 159 RTE_PTYPE_INNER_L4_UDP; 160 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 161 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 162 RTE_PTYPE_INNER_L4_UDP; 163 } 164 165 /** 166 * Return the size of tailroom of WQ. 167 * 168 * @param txq 169 * Pointer to TX queue structure. 170 * @param addr 171 * Pointer to tail of WQ. 172 * 173 * @return 174 * Size of tailroom. 175 */ 176 static inline size_t 177 tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr) 178 { 179 size_t tailroom; 180 tailroom = (uintptr_t)(txq->wqes) + 181 (1 << txq->wqe_n) * MLX5_WQE_SIZE - 182 (uintptr_t)addr; 183 return tailroom; 184 } 185 186 /** 187 * Copy data to tailroom of circular queue. 188 * 189 * @param dst 190 * Pointer to destination. 191 * @param src 192 * Pointer to source. 193 * @param n 194 * Number of bytes to copy. 195 * @param base 196 * Pointer to head of queue. 197 * @param tailroom 198 * Size of tailroom from dst. 199 * 200 * @return 201 * Pointer after copied data. 202 */ 203 static inline void * 204 mlx5_copy_to_wq(void *dst, const void *src, size_t n, 205 void *base, size_t tailroom) 206 { 207 void *ret; 208 209 if (n > tailroom) { 210 rte_memcpy(dst, src, tailroom); 211 rte_memcpy(base, (void *)((uintptr_t)src + tailroom), 212 n - tailroom); 213 ret = (uint8_t *)base + n - tailroom; 214 } else { 215 rte_memcpy(dst, src, n); 216 ret = (n == tailroom) ? base : (uint8_t *)dst + n; 217 } 218 return ret; 219 } 220 221 /** 222 * DPDK callback to check the status of a tx descriptor. 223 * 224 * @param tx_queue 225 * The tx queue. 226 * @param[in] offset 227 * The index of the descriptor in the ring. 228 * 229 * @return 230 * The status of the tx descriptor. 231 */ 232 int 233 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 234 { 235 struct mlx5_txq_data *txq = tx_queue; 236 uint16_t used; 237 238 mlx5_tx_complete(txq); 239 used = txq->elts_head - txq->elts_tail; 240 if (offset < used) 241 return RTE_ETH_TX_DESC_FULL; 242 return RTE_ETH_TX_DESC_DONE; 243 } 244 245 /** 246 * DPDK callback to check the status of a rx descriptor. 247 * 248 * @param rx_queue 249 * The rx queue. 250 * @param[in] offset 251 * The index of the descriptor in the ring. 252 * 253 * @return 254 * The status of the tx descriptor. 255 */ 256 int 257 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 258 { 259 struct mlx5_rxq_data *rxq = rx_queue; 260 struct rxq_zip *zip = &rxq->zip; 261 volatile struct mlx5_cqe *cqe; 262 const unsigned int cqe_n = (1 << rxq->cqe_n); 263 const unsigned int cqe_cnt = cqe_n - 1; 264 unsigned int cq_ci; 265 unsigned int used; 266 267 /* if we are processing a compressed cqe */ 268 if (zip->ai) { 269 used = zip->cqe_cnt - zip->ca; 270 cq_ci = zip->cq_ci; 271 } else { 272 used = 0; 273 cq_ci = rxq->cq_ci; 274 } 275 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 276 while (check_cqe(cqe, cqe_n, cq_ci) == 0) { 277 int8_t op_own; 278 unsigned int n; 279 280 op_own = cqe->op_own; 281 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 282 n = rte_be_to_cpu_32(cqe->byte_cnt); 283 else 284 n = 1; 285 cq_ci += n; 286 used += n; 287 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 288 } 289 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 290 if (offset < used) 291 return RTE_ETH_RX_DESC_DONE; 292 return RTE_ETH_RX_DESC_AVAIL; 293 } 294 295 /** 296 * DPDK callback for TX. 297 * 298 * @param dpdk_txq 299 * Generic pointer to TX queue structure. 300 * @param[in] pkts 301 * Packets to transmit. 302 * @param pkts_n 303 * Number of packets in array. 304 * 305 * @return 306 * Number of packets successfully transmitted (<= pkts_n). 307 */ 308 uint16_t 309 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 310 { 311 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 312 uint16_t elts_head = txq->elts_head; 313 const uint16_t elts_n = 1 << txq->elts_n; 314 const uint16_t elts_m = elts_n - 1; 315 unsigned int i = 0; 316 unsigned int j = 0; 317 unsigned int k = 0; 318 uint16_t max_elts; 319 uint16_t max_wqe; 320 unsigned int comp; 321 volatile struct mlx5_wqe_ctrl *last_wqe = NULL; 322 unsigned int segs_n = 0; 323 const unsigned int max_inline = txq->max_inline; 324 325 if (unlikely(!pkts_n)) 326 return 0; 327 /* Prefetch first packet cacheline. */ 328 rte_prefetch0(*pkts); 329 /* Start processing. */ 330 mlx5_tx_complete(txq); 331 max_elts = (elts_n - (elts_head - txq->elts_tail)); 332 /* A CQE slot must always be available. */ 333 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 334 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 335 if (unlikely(!max_wqe)) 336 return 0; 337 do { 338 struct rte_mbuf *buf = NULL; 339 uint8_t *raw; 340 volatile struct mlx5_wqe_v *wqe = NULL; 341 volatile rte_v128u32_t *dseg = NULL; 342 uint32_t length; 343 unsigned int ds = 0; 344 unsigned int sg = 0; /* counter of additional segs attached. */ 345 uintptr_t addr; 346 uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; 347 uint16_t tso_header_sz = 0; 348 uint16_t ehdr; 349 uint8_t cs_flags; 350 uint64_t tso = 0; 351 uint16_t tso_segsz = 0; 352 #ifdef MLX5_PMD_SOFT_COUNTERS 353 uint32_t total_length = 0; 354 #endif 355 356 /* first_seg */ 357 buf = *pkts; 358 segs_n = buf->nb_segs; 359 /* 360 * Make sure there is enough room to store this packet and 361 * that one ring entry remains unused. 362 */ 363 assert(segs_n); 364 if (max_elts < segs_n) 365 break; 366 max_elts -= segs_n; 367 sg = --segs_n; 368 if (unlikely(--max_wqe == 0)) 369 break; 370 wqe = (volatile struct mlx5_wqe_v *) 371 tx_mlx5_wqe(txq, txq->wqe_ci); 372 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 373 if (pkts_n - i > 1) 374 rte_prefetch0(*(pkts + 1)); 375 addr = rte_pktmbuf_mtod(buf, uintptr_t); 376 length = DATA_LEN(buf); 377 ehdr = (((uint8_t *)addr)[1] << 8) | 378 ((uint8_t *)addr)[0]; 379 #ifdef MLX5_PMD_SOFT_COUNTERS 380 total_length = length; 381 #endif 382 if (length < (MLX5_WQE_DWORD_SIZE + 2)) { 383 txq->stats.oerrors++; 384 break; 385 } 386 /* Update element. */ 387 (*txq->elts)[elts_head & elts_m] = buf; 388 /* Prefetch next buffer data. */ 389 if (pkts_n - i > 1) 390 rte_prefetch0( 391 rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); 392 cs_flags = txq_ol_cksum_to_cs(txq, buf); 393 raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; 394 /* Replace the Ethernet type by the VLAN if necessary. */ 395 if (buf->ol_flags & PKT_TX_VLAN_PKT) { 396 uint32_t vlan = rte_cpu_to_be_32(0x81000000 | 397 buf->vlan_tci); 398 unsigned int len = 2 * ETHER_ADDR_LEN - 2; 399 400 addr += 2; 401 length -= 2; 402 /* Copy Destination and source mac address. */ 403 memcpy((uint8_t *)raw, ((uint8_t *)addr), len); 404 /* Copy VLAN. */ 405 memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan)); 406 /* Copy missing two bytes to end the DSeg. */ 407 memcpy((uint8_t *)raw + len + sizeof(vlan), 408 ((uint8_t *)addr) + len, 2); 409 addr += len + 2; 410 length -= (len + 2); 411 } else { 412 memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, 413 MLX5_WQE_DWORD_SIZE); 414 length -= pkt_inline_sz; 415 addr += pkt_inline_sz; 416 } 417 raw += MLX5_WQE_DWORD_SIZE; 418 tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG); 419 if (tso) { 420 uintptr_t end = 421 (uintptr_t)(((uintptr_t)txq->wqes) + 422 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 423 unsigned int copy_b; 424 uint8_t vlan_sz = 425 (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0; 426 const uint64_t is_tunneled = 427 buf->ol_flags & (PKT_TX_TUNNEL_GRE | 428 PKT_TX_TUNNEL_VXLAN); 429 430 tso_header_sz = buf->l2_len + vlan_sz + 431 buf->l3_len + buf->l4_len; 432 tso_segsz = buf->tso_segsz; 433 if (unlikely(tso_segsz == 0)) { 434 txq->stats.oerrors++; 435 break; 436 } 437 if (is_tunneled && txq->tunnel_en) { 438 tso_header_sz += buf->outer_l2_len + 439 buf->outer_l3_len; 440 cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; 441 } else { 442 cs_flags |= MLX5_ETH_WQE_L4_CSUM; 443 } 444 if (unlikely(tso_header_sz > MLX5_MAX_TSO_HEADER)) { 445 txq->stats.oerrors++; 446 break; 447 } 448 copy_b = tso_header_sz - pkt_inline_sz; 449 /* First seg must contain all headers. */ 450 assert(copy_b <= length); 451 if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { 452 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 453 454 if (unlikely(max_wqe < n)) 455 break; 456 max_wqe -= n; 457 rte_memcpy((void *)raw, (void *)addr, copy_b); 458 addr += copy_b; 459 length -= copy_b; 460 /* Include padding for TSO header. */ 461 copy_b = MLX5_WQE_DS(copy_b) * 462 MLX5_WQE_DWORD_SIZE; 463 pkt_inline_sz += copy_b; 464 raw += copy_b; 465 } else { 466 /* NOP WQE. */ 467 wqe->ctrl = (rte_v128u32_t){ 468 rte_cpu_to_be_32(txq->wqe_ci << 8), 469 rte_cpu_to_be_32(txq->qp_num_8s | 1), 470 0, 471 0, 472 }; 473 ds = 1; 474 #ifdef MLX5_PMD_SOFT_COUNTERS 475 total_length = 0; 476 #endif 477 k++; 478 goto next_wqe; 479 } 480 } 481 /* Inline if enough room. */ 482 if (max_inline || tso) { 483 uint32_t inl = 0; 484 uintptr_t end = (uintptr_t) 485 (((uintptr_t)txq->wqes) + 486 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 487 unsigned int inline_room = max_inline * 488 RTE_CACHE_LINE_SIZE - 489 (pkt_inline_sz - 2) - 490 !!tso * sizeof(inl); 491 uintptr_t addr_end; 492 unsigned int copy_b; 493 494 pkt_inline: 495 addr_end = RTE_ALIGN_FLOOR(addr + inline_room, 496 RTE_CACHE_LINE_SIZE); 497 copy_b = (addr_end > addr) ? 498 RTE_MIN((addr_end - addr), length) : 0; 499 if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { 500 /* 501 * One Dseg remains in the current WQE. To 502 * keep the computation positive, it is 503 * removed after the bytes to Dseg conversion. 504 */ 505 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 506 507 if (unlikely(max_wqe < n)) 508 break; 509 max_wqe -= n; 510 if (tso && !inl) { 511 inl = rte_cpu_to_be_32(copy_b | 512 MLX5_INLINE_SEG); 513 rte_memcpy((void *)raw, 514 (void *)&inl, sizeof(inl)); 515 raw += sizeof(inl); 516 pkt_inline_sz += sizeof(inl); 517 } 518 rte_memcpy((void *)raw, (void *)addr, copy_b); 519 addr += copy_b; 520 length -= copy_b; 521 pkt_inline_sz += copy_b; 522 } 523 /* 524 * 2 DWORDs consumed by the WQE header + ETH segment + 525 * the size of the inline part of the packet. 526 */ 527 ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); 528 if (length > 0) { 529 if (ds % (MLX5_WQE_SIZE / 530 MLX5_WQE_DWORD_SIZE) == 0) { 531 if (unlikely(--max_wqe == 0)) 532 break; 533 dseg = (volatile rte_v128u32_t *) 534 tx_mlx5_wqe(txq, txq->wqe_ci + 535 ds / 4); 536 } else { 537 dseg = (volatile rte_v128u32_t *) 538 ((uintptr_t)wqe + 539 (ds * MLX5_WQE_DWORD_SIZE)); 540 } 541 goto use_dseg; 542 } else if (!segs_n) { 543 goto next_pkt; 544 } else { 545 raw += copy_b; 546 inline_room -= copy_b; 547 --segs_n; 548 buf = buf->next; 549 assert(buf); 550 addr = rte_pktmbuf_mtod(buf, uintptr_t); 551 length = DATA_LEN(buf); 552 #ifdef MLX5_PMD_SOFT_COUNTERS 553 total_length += length; 554 #endif 555 (*txq->elts)[++elts_head & elts_m] = buf; 556 goto pkt_inline; 557 } 558 } else { 559 /* 560 * No inline has been done in the packet, only the 561 * Ethernet Header as been stored. 562 */ 563 dseg = (volatile rte_v128u32_t *) 564 ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); 565 ds = 3; 566 use_dseg: 567 /* Add the remaining packet as a simple ds. */ 568 addr = rte_cpu_to_be_64(addr); 569 *dseg = (rte_v128u32_t){ 570 rte_cpu_to_be_32(length), 571 mlx5_tx_mb2mr(txq, buf), 572 addr, 573 addr >> 32, 574 }; 575 ++ds; 576 if (!segs_n) 577 goto next_pkt; 578 } 579 next_seg: 580 assert(buf); 581 assert(ds); 582 assert(wqe); 583 /* 584 * Spill on next WQE when the current one does not have 585 * enough room left. Size of WQE must a be a multiple 586 * of data segment size. 587 */ 588 assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); 589 if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { 590 if (unlikely(--max_wqe == 0)) 591 break; 592 dseg = (volatile rte_v128u32_t *) 593 tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4); 594 rte_prefetch0(tx_mlx5_wqe(txq, 595 txq->wqe_ci + ds / 4 + 1)); 596 } else { 597 ++dseg; 598 } 599 ++ds; 600 buf = buf->next; 601 assert(buf); 602 length = DATA_LEN(buf); 603 #ifdef MLX5_PMD_SOFT_COUNTERS 604 total_length += length; 605 #endif 606 /* Store segment information. */ 607 addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); 608 *dseg = (rte_v128u32_t){ 609 rte_cpu_to_be_32(length), 610 mlx5_tx_mb2mr(txq, buf), 611 addr, 612 addr >> 32, 613 }; 614 (*txq->elts)[++elts_head & elts_m] = buf; 615 if (--segs_n) 616 goto next_seg; 617 next_pkt: 618 if (ds > MLX5_DSEG_MAX) { 619 txq->stats.oerrors++; 620 break; 621 } 622 ++elts_head; 623 ++pkts; 624 ++i; 625 j += sg; 626 /* Initialize known and common part of the WQE structure. */ 627 if (tso) { 628 wqe->ctrl = (rte_v128u32_t){ 629 rte_cpu_to_be_32((txq->wqe_ci << 8) | 630 MLX5_OPCODE_TSO), 631 rte_cpu_to_be_32(txq->qp_num_8s | ds), 632 0, 633 0, 634 }; 635 wqe->eseg = (rte_v128u32_t){ 636 0, 637 cs_flags | (rte_cpu_to_be_16(tso_segsz) << 16), 638 0, 639 (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz), 640 }; 641 } else { 642 wqe->ctrl = (rte_v128u32_t){ 643 rte_cpu_to_be_32((txq->wqe_ci << 8) | 644 MLX5_OPCODE_SEND), 645 rte_cpu_to_be_32(txq->qp_num_8s | ds), 646 0, 647 0, 648 }; 649 wqe->eseg = (rte_v128u32_t){ 650 0, 651 cs_flags, 652 0, 653 (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz), 654 }; 655 } 656 next_wqe: 657 txq->wqe_ci += (ds + 3) / 4; 658 /* Save the last successful WQE for completion request */ 659 last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe; 660 #ifdef MLX5_PMD_SOFT_COUNTERS 661 /* Increment sent bytes counter. */ 662 txq->stats.obytes += total_length; 663 #endif 664 } while (i < pkts_n); 665 /* Take a shortcut if nothing must be sent. */ 666 if (unlikely((i + k) == 0)) 667 return 0; 668 txq->elts_head += (i + j); 669 /* Check whether completion threshold has been reached. */ 670 comp = txq->elts_comp + i + j + k; 671 if (comp >= MLX5_TX_COMP_THRESH) { 672 /* Request completion on last WQE. */ 673 last_wqe->ctrl2 = rte_cpu_to_be_32(8); 674 /* Save elts_head in unused "immediate" field of WQE. */ 675 last_wqe->ctrl3 = txq->elts_head; 676 txq->elts_comp = 0; 677 #ifndef NDEBUG 678 ++txq->cq_pi; 679 #endif 680 } else { 681 txq->elts_comp = comp; 682 } 683 #ifdef MLX5_PMD_SOFT_COUNTERS 684 /* Increment sent packets counter. */ 685 txq->stats.opackets += i; 686 #endif 687 /* Ring QP doorbell. */ 688 mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe); 689 return i; 690 } 691 692 /** 693 * Open a MPW session. 694 * 695 * @param txq 696 * Pointer to TX queue structure. 697 * @param mpw 698 * Pointer to MPW session structure. 699 * @param length 700 * Packet length. 701 */ 702 static inline void 703 mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length) 704 { 705 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 706 volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = 707 (volatile struct mlx5_wqe_data_seg (*)[]) 708 tx_mlx5_wqe(txq, idx + 1); 709 710 mpw->state = MLX5_MPW_STATE_OPENED; 711 mpw->pkts_n = 0; 712 mpw->len = length; 713 mpw->total_len = 0; 714 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 715 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 716 mpw->wqe->eseg.inline_hdr_sz = 0; 717 mpw->wqe->eseg.rsvd0 = 0; 718 mpw->wqe->eseg.rsvd1 = 0; 719 mpw->wqe->eseg.rsvd2 = 0; 720 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 721 (txq->wqe_ci << 8) | 722 MLX5_OPCODE_TSO); 723 mpw->wqe->ctrl[2] = 0; 724 mpw->wqe->ctrl[3] = 0; 725 mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *) 726 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 727 mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *) 728 (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE)); 729 mpw->data.dseg[2] = &(*dseg)[0]; 730 mpw->data.dseg[3] = &(*dseg)[1]; 731 mpw->data.dseg[4] = &(*dseg)[2]; 732 } 733 734 /** 735 * Close a MPW session. 736 * 737 * @param txq 738 * Pointer to TX queue structure. 739 * @param mpw 740 * Pointer to MPW session structure. 741 */ 742 static inline void 743 mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 744 { 745 unsigned int num = mpw->pkts_n; 746 747 /* 748 * Store size in multiple of 16 bytes. Control and Ethernet segments 749 * count as 2. 750 */ 751 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num)); 752 mpw->state = MLX5_MPW_STATE_CLOSED; 753 if (num < 3) 754 ++txq->wqe_ci; 755 else 756 txq->wqe_ci += 2; 757 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 758 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 759 } 760 761 /** 762 * DPDK callback for TX with MPW support. 763 * 764 * @param dpdk_txq 765 * Generic pointer to TX queue structure. 766 * @param[in] pkts 767 * Packets to transmit. 768 * @param pkts_n 769 * Number of packets in array. 770 * 771 * @return 772 * Number of packets successfully transmitted (<= pkts_n). 773 */ 774 uint16_t 775 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 776 { 777 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 778 uint16_t elts_head = txq->elts_head; 779 const uint16_t elts_n = 1 << txq->elts_n; 780 const uint16_t elts_m = elts_n - 1; 781 unsigned int i = 0; 782 unsigned int j = 0; 783 uint16_t max_elts; 784 uint16_t max_wqe; 785 unsigned int comp; 786 struct mlx5_mpw mpw = { 787 .state = MLX5_MPW_STATE_CLOSED, 788 }; 789 790 if (unlikely(!pkts_n)) 791 return 0; 792 /* Prefetch first packet cacheline. */ 793 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 794 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 795 /* Start processing. */ 796 mlx5_tx_complete(txq); 797 max_elts = (elts_n - (elts_head - txq->elts_tail)); 798 /* A CQE slot must always be available. */ 799 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 800 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 801 if (unlikely(!max_wqe)) 802 return 0; 803 do { 804 struct rte_mbuf *buf = *(pkts++); 805 uint32_t length; 806 unsigned int segs_n = buf->nb_segs; 807 uint32_t cs_flags; 808 809 /* 810 * Make sure there is enough room to store this packet and 811 * that one ring entry remains unused. 812 */ 813 assert(segs_n); 814 if (max_elts < segs_n) 815 break; 816 /* Do not bother with large packets MPW cannot handle. */ 817 if (segs_n > MLX5_MPW_DSEG_MAX) { 818 txq->stats.oerrors++; 819 break; 820 } 821 max_elts -= segs_n; 822 --pkts_n; 823 cs_flags = txq_ol_cksum_to_cs(txq, buf); 824 /* Retrieve packet information. */ 825 length = PKT_LEN(buf); 826 assert(length); 827 /* Start new session if packet differs. */ 828 if ((mpw.state == MLX5_MPW_STATE_OPENED) && 829 ((mpw.len != length) || 830 (segs_n != 1) || 831 (mpw.wqe->eseg.cs_flags != cs_flags))) 832 mlx5_mpw_close(txq, &mpw); 833 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 834 /* 835 * Multi-Packet WQE consumes at most two WQE. 836 * mlx5_mpw_new() expects to be able to use such 837 * resources. 838 */ 839 if (unlikely(max_wqe < 2)) 840 break; 841 max_wqe -= 2; 842 mlx5_mpw_new(txq, &mpw, length); 843 mpw.wqe->eseg.cs_flags = cs_flags; 844 } 845 /* Multi-segment packets must be alone in their MPW. */ 846 assert((segs_n == 1) || (mpw.pkts_n == 0)); 847 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 848 length = 0; 849 #endif 850 do { 851 volatile struct mlx5_wqe_data_seg *dseg; 852 uintptr_t addr; 853 854 assert(buf); 855 (*txq->elts)[elts_head++ & elts_m] = buf; 856 dseg = mpw.data.dseg[mpw.pkts_n]; 857 addr = rte_pktmbuf_mtod(buf, uintptr_t); 858 *dseg = (struct mlx5_wqe_data_seg){ 859 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), 860 .lkey = mlx5_tx_mb2mr(txq, buf), 861 .addr = rte_cpu_to_be_64(addr), 862 }; 863 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 864 length += DATA_LEN(buf); 865 #endif 866 buf = buf->next; 867 ++mpw.pkts_n; 868 ++j; 869 } while (--segs_n); 870 assert(length == mpw.len); 871 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 872 mlx5_mpw_close(txq, &mpw); 873 #ifdef MLX5_PMD_SOFT_COUNTERS 874 /* Increment sent bytes counter. */ 875 txq->stats.obytes += length; 876 #endif 877 ++i; 878 } while (pkts_n); 879 /* Take a shortcut if nothing must be sent. */ 880 if (unlikely(i == 0)) 881 return 0; 882 /* Check whether completion threshold has been reached. */ 883 /* "j" includes both packets and segments. */ 884 comp = txq->elts_comp + j; 885 if (comp >= MLX5_TX_COMP_THRESH) { 886 volatile struct mlx5_wqe *wqe = mpw.wqe; 887 888 /* Request completion on last WQE. */ 889 wqe->ctrl[2] = rte_cpu_to_be_32(8); 890 /* Save elts_head in unused "immediate" field of WQE. */ 891 wqe->ctrl[3] = elts_head; 892 txq->elts_comp = 0; 893 #ifndef NDEBUG 894 ++txq->cq_pi; 895 #endif 896 } else { 897 txq->elts_comp = comp; 898 } 899 #ifdef MLX5_PMD_SOFT_COUNTERS 900 /* Increment sent packets counter. */ 901 txq->stats.opackets += i; 902 #endif 903 /* Ring QP doorbell. */ 904 if (mpw.state == MLX5_MPW_STATE_OPENED) 905 mlx5_mpw_close(txq, &mpw); 906 mlx5_tx_dbrec(txq, mpw.wqe); 907 txq->elts_head = elts_head; 908 return i; 909 } 910 911 /** 912 * Open a MPW inline session. 913 * 914 * @param txq 915 * Pointer to TX queue structure. 916 * @param mpw 917 * Pointer to MPW session structure. 918 * @param length 919 * Packet length. 920 */ 921 static inline void 922 mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, 923 uint32_t length) 924 { 925 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 926 struct mlx5_wqe_inl_small *inl; 927 928 mpw->state = MLX5_MPW_INL_STATE_OPENED; 929 mpw->pkts_n = 0; 930 mpw->len = length; 931 mpw->total_len = 0; 932 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 933 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 934 (txq->wqe_ci << 8) | 935 MLX5_OPCODE_TSO); 936 mpw->wqe->ctrl[2] = 0; 937 mpw->wqe->ctrl[3] = 0; 938 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 939 mpw->wqe->eseg.inline_hdr_sz = 0; 940 mpw->wqe->eseg.cs_flags = 0; 941 mpw->wqe->eseg.rsvd0 = 0; 942 mpw->wqe->eseg.rsvd1 = 0; 943 mpw->wqe->eseg.rsvd2 = 0; 944 inl = (struct mlx5_wqe_inl_small *) 945 (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); 946 mpw->data.raw = (uint8_t *)&inl->raw; 947 } 948 949 /** 950 * Close a MPW inline session. 951 * 952 * @param txq 953 * Pointer to TX queue structure. 954 * @param mpw 955 * Pointer to MPW session structure. 956 */ 957 static inline void 958 mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 959 { 960 unsigned int size; 961 struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *) 962 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 963 964 size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len; 965 /* 966 * Store size in multiple of 16 bytes. Control and Ethernet segments 967 * count as 2. 968 */ 969 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 970 MLX5_WQE_DS(size)); 971 mpw->state = MLX5_MPW_STATE_CLOSED; 972 inl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG); 973 txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 974 } 975 976 /** 977 * DPDK callback for TX with MPW inline support. 978 * 979 * @param dpdk_txq 980 * Generic pointer to TX queue structure. 981 * @param[in] pkts 982 * Packets to transmit. 983 * @param pkts_n 984 * Number of packets in array. 985 * 986 * @return 987 * Number of packets successfully transmitted (<= pkts_n). 988 */ 989 uint16_t 990 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, 991 uint16_t pkts_n) 992 { 993 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 994 uint16_t elts_head = txq->elts_head; 995 const uint16_t elts_n = 1 << txq->elts_n; 996 const uint16_t elts_m = elts_n - 1; 997 unsigned int i = 0; 998 unsigned int j = 0; 999 uint16_t max_elts; 1000 uint16_t max_wqe; 1001 unsigned int comp; 1002 unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; 1003 struct mlx5_mpw mpw = { 1004 .state = MLX5_MPW_STATE_CLOSED, 1005 }; 1006 /* 1007 * Compute the maximum number of WQE which can be consumed by inline 1008 * code. 1009 * - 2 DSEG for: 1010 * - 1 control segment, 1011 * - 1 Ethernet segment, 1012 * - N Dseg from the inline request. 1013 */ 1014 const unsigned int wqe_inl_n = 1015 ((2 * MLX5_WQE_DWORD_SIZE + 1016 txq->max_inline * RTE_CACHE_LINE_SIZE) + 1017 RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; 1018 1019 if (unlikely(!pkts_n)) 1020 return 0; 1021 /* Prefetch first packet cacheline. */ 1022 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 1023 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 1024 /* Start processing. */ 1025 mlx5_tx_complete(txq); 1026 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1027 /* A CQE slot must always be available. */ 1028 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 1029 do { 1030 struct rte_mbuf *buf = *(pkts++); 1031 uintptr_t addr; 1032 uint32_t length; 1033 unsigned int segs_n = buf->nb_segs; 1034 uint8_t cs_flags; 1035 1036 /* 1037 * Make sure there is enough room to store this packet and 1038 * that one ring entry remains unused. 1039 */ 1040 assert(segs_n); 1041 if (max_elts < segs_n) 1042 break; 1043 /* Do not bother with large packets MPW cannot handle. */ 1044 if (segs_n > MLX5_MPW_DSEG_MAX) { 1045 txq->stats.oerrors++; 1046 break; 1047 } 1048 max_elts -= segs_n; 1049 --pkts_n; 1050 /* 1051 * Compute max_wqe in case less WQE were consumed in previous 1052 * iteration. 1053 */ 1054 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1055 cs_flags = txq_ol_cksum_to_cs(txq, buf); 1056 /* Retrieve packet information. */ 1057 length = PKT_LEN(buf); 1058 /* Start new session if packet differs. */ 1059 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1060 if ((mpw.len != length) || 1061 (segs_n != 1) || 1062 (mpw.wqe->eseg.cs_flags != cs_flags)) 1063 mlx5_mpw_close(txq, &mpw); 1064 } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { 1065 if ((mpw.len != length) || 1066 (segs_n != 1) || 1067 (length > inline_room) || 1068 (mpw.wqe->eseg.cs_flags != cs_flags)) { 1069 mlx5_mpw_inline_close(txq, &mpw); 1070 inline_room = 1071 txq->max_inline * RTE_CACHE_LINE_SIZE; 1072 } 1073 } 1074 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 1075 if ((segs_n != 1) || 1076 (length > inline_room)) { 1077 /* 1078 * Multi-Packet WQE consumes at most two WQE. 1079 * mlx5_mpw_new() expects to be able to use 1080 * such resources. 1081 */ 1082 if (unlikely(max_wqe < 2)) 1083 break; 1084 max_wqe -= 2; 1085 mlx5_mpw_new(txq, &mpw, length); 1086 mpw.wqe->eseg.cs_flags = cs_flags; 1087 } else { 1088 if (unlikely(max_wqe < wqe_inl_n)) 1089 break; 1090 max_wqe -= wqe_inl_n; 1091 mlx5_mpw_inline_new(txq, &mpw, length); 1092 mpw.wqe->eseg.cs_flags = cs_flags; 1093 } 1094 } 1095 /* Multi-segment packets must be alone in their MPW. */ 1096 assert((segs_n == 1) || (mpw.pkts_n == 0)); 1097 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1098 assert(inline_room == 1099 txq->max_inline * RTE_CACHE_LINE_SIZE); 1100 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1101 length = 0; 1102 #endif 1103 do { 1104 volatile struct mlx5_wqe_data_seg *dseg; 1105 1106 assert(buf); 1107 (*txq->elts)[elts_head++ & elts_m] = buf; 1108 dseg = mpw.data.dseg[mpw.pkts_n]; 1109 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1110 *dseg = (struct mlx5_wqe_data_seg){ 1111 .byte_count = 1112 rte_cpu_to_be_32(DATA_LEN(buf)), 1113 .lkey = mlx5_tx_mb2mr(txq, buf), 1114 .addr = rte_cpu_to_be_64(addr), 1115 }; 1116 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1117 length += DATA_LEN(buf); 1118 #endif 1119 buf = buf->next; 1120 ++mpw.pkts_n; 1121 ++j; 1122 } while (--segs_n); 1123 assert(length == mpw.len); 1124 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 1125 mlx5_mpw_close(txq, &mpw); 1126 } else { 1127 unsigned int max; 1128 1129 assert(mpw.state == MLX5_MPW_INL_STATE_OPENED); 1130 assert(length <= inline_room); 1131 assert(length == DATA_LEN(buf)); 1132 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1133 (*txq->elts)[elts_head++ & elts_m] = buf; 1134 /* Maximum number of bytes before wrapping. */ 1135 max = ((((uintptr_t)(txq->wqes)) + 1136 (1 << txq->wqe_n) * 1137 MLX5_WQE_SIZE) - 1138 (uintptr_t)mpw.data.raw); 1139 if (length > max) { 1140 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1141 (void *)addr, 1142 max); 1143 mpw.data.raw = (volatile void *)txq->wqes; 1144 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1145 (void *)(addr + max), 1146 length - max); 1147 mpw.data.raw += length - max; 1148 } else { 1149 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1150 (void *)addr, 1151 length); 1152 1153 if (length == max) 1154 mpw.data.raw = 1155 (volatile void *)txq->wqes; 1156 else 1157 mpw.data.raw += length; 1158 } 1159 ++mpw.pkts_n; 1160 mpw.total_len += length; 1161 ++j; 1162 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { 1163 mlx5_mpw_inline_close(txq, &mpw); 1164 inline_room = 1165 txq->max_inline * RTE_CACHE_LINE_SIZE; 1166 } else { 1167 inline_room -= length; 1168 } 1169 } 1170 #ifdef MLX5_PMD_SOFT_COUNTERS 1171 /* Increment sent bytes counter. */ 1172 txq->stats.obytes += length; 1173 #endif 1174 ++i; 1175 } while (pkts_n); 1176 /* Take a shortcut if nothing must be sent. */ 1177 if (unlikely(i == 0)) 1178 return 0; 1179 /* Check whether completion threshold has been reached. */ 1180 /* "j" includes both packets and segments. */ 1181 comp = txq->elts_comp + j; 1182 if (comp >= MLX5_TX_COMP_THRESH) { 1183 volatile struct mlx5_wqe *wqe = mpw.wqe; 1184 1185 /* Request completion on last WQE. */ 1186 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1187 /* Save elts_head in unused "immediate" field of WQE. */ 1188 wqe->ctrl[3] = elts_head; 1189 txq->elts_comp = 0; 1190 #ifndef NDEBUG 1191 ++txq->cq_pi; 1192 #endif 1193 } else { 1194 txq->elts_comp = comp; 1195 } 1196 #ifdef MLX5_PMD_SOFT_COUNTERS 1197 /* Increment sent packets counter. */ 1198 txq->stats.opackets += i; 1199 #endif 1200 /* Ring QP doorbell. */ 1201 if (mpw.state == MLX5_MPW_INL_STATE_OPENED) 1202 mlx5_mpw_inline_close(txq, &mpw); 1203 else if (mpw.state == MLX5_MPW_STATE_OPENED) 1204 mlx5_mpw_close(txq, &mpw); 1205 mlx5_tx_dbrec(txq, mpw.wqe); 1206 txq->elts_head = elts_head; 1207 return i; 1208 } 1209 1210 /** 1211 * Open an Enhanced MPW session. 1212 * 1213 * @param txq 1214 * Pointer to TX queue structure. 1215 * @param mpw 1216 * Pointer to MPW session structure. 1217 * @param length 1218 * Packet length. 1219 */ 1220 static inline void 1221 mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding) 1222 { 1223 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1224 1225 mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED; 1226 mpw->pkts_n = 0; 1227 mpw->total_len = sizeof(struct mlx5_wqe); 1228 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1229 mpw->wqe->ctrl[0] = 1230 rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) | 1231 (txq->wqe_ci << 8) | 1232 MLX5_OPCODE_ENHANCED_MPSW); 1233 mpw->wqe->ctrl[2] = 0; 1234 mpw->wqe->ctrl[3] = 0; 1235 memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE); 1236 if (unlikely(padding)) { 1237 uintptr_t addr = (uintptr_t)(mpw->wqe + 1); 1238 1239 /* Pad the first 2 DWORDs with zero-length inline header. */ 1240 *(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG); 1241 *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) = 1242 rte_cpu_to_be_32(MLX5_INLINE_SEG); 1243 mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE; 1244 /* Start from the next WQEBB. */ 1245 mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1)); 1246 } else { 1247 mpw->data.raw = (volatile void *)(mpw->wqe + 1); 1248 } 1249 } 1250 1251 /** 1252 * Close an Enhanced MPW session. 1253 * 1254 * @param txq 1255 * Pointer to TX queue structure. 1256 * @param mpw 1257 * Pointer to MPW session structure. 1258 * 1259 * @return 1260 * Number of consumed WQEs. 1261 */ 1262 static inline uint16_t 1263 mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 1264 { 1265 uint16_t ret; 1266 1267 /* Store size in multiple of 16 bytes. Control and Ethernet segments 1268 * count as 2. 1269 */ 1270 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 1271 MLX5_WQE_DS(mpw->total_len)); 1272 mpw->state = MLX5_MPW_STATE_CLOSED; 1273 ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1274 txq->wqe_ci += ret; 1275 return ret; 1276 } 1277 1278 /** 1279 * TX with Enhanced MPW support. 1280 * 1281 * @param txq 1282 * Pointer to TX queue structure. 1283 * @param[in] pkts 1284 * Packets to transmit. 1285 * @param pkts_n 1286 * Number of packets in array. 1287 * 1288 * @return 1289 * Number of packets successfully transmitted (<= pkts_n). 1290 */ 1291 static inline uint16_t 1292 txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 1293 uint16_t pkts_n) 1294 { 1295 uint16_t elts_head = txq->elts_head; 1296 const uint16_t elts_n = 1 << txq->elts_n; 1297 const uint16_t elts_m = elts_n - 1; 1298 unsigned int i = 0; 1299 unsigned int j = 0; 1300 uint16_t max_elts; 1301 uint16_t max_wqe; 1302 unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE; 1303 unsigned int mpw_room = 0; 1304 unsigned int inl_pad = 0; 1305 uint32_t inl_hdr; 1306 struct mlx5_mpw mpw = { 1307 .state = MLX5_MPW_STATE_CLOSED, 1308 }; 1309 1310 if (unlikely(!pkts_n)) 1311 return 0; 1312 /* Start processing. */ 1313 mlx5_tx_complete(txq); 1314 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1315 /* A CQE slot must always be available. */ 1316 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 1317 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1318 if (unlikely(!max_wqe)) 1319 return 0; 1320 do { 1321 struct rte_mbuf *buf = *(pkts++); 1322 uintptr_t addr; 1323 unsigned int n; 1324 unsigned int do_inline = 0; /* Whether inline is possible. */ 1325 uint32_t length; 1326 uint8_t cs_flags; 1327 1328 /* Multi-segmented packet is handled in slow-path outside. */ 1329 assert(NB_SEGS(buf) == 1); 1330 /* Make sure there is enough room to store this packet. */ 1331 if (max_elts - j == 0) 1332 break; 1333 cs_flags = txq_ol_cksum_to_cs(txq, buf); 1334 /* Retrieve packet information. */ 1335 length = PKT_LEN(buf); 1336 /* Start new session if: 1337 * - multi-segment packet 1338 * - no space left even for a dseg 1339 * - next packet can be inlined with a new WQE 1340 * - cs_flag differs 1341 */ 1342 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) { 1343 if ((inl_pad + sizeof(struct mlx5_wqe_data_seg) > 1344 mpw_room) || 1345 (length <= txq->inline_max_packet_sz && 1346 inl_pad + sizeof(inl_hdr) + length > 1347 mpw_room) || 1348 (mpw.wqe->eseg.cs_flags != cs_flags)) 1349 max_wqe -= mlx5_empw_close(txq, &mpw); 1350 } 1351 if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) { 1352 /* In Enhanced MPW, inline as much as the budget is 1353 * allowed. The remaining space is to be filled with 1354 * dsegs. If the title WQEBB isn't padded, it will have 1355 * 2 dsegs there. 1356 */ 1357 mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX, 1358 (max_inline ? max_inline : 1359 pkts_n * MLX5_WQE_DWORD_SIZE) + 1360 MLX5_WQE_SIZE); 1361 if (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room)) 1362 break; 1363 /* Don't pad the title WQEBB to not waste WQ. */ 1364 mlx5_empw_new(txq, &mpw, 0); 1365 mpw_room -= mpw.total_len; 1366 inl_pad = 0; 1367 do_inline = length <= txq->inline_max_packet_sz && 1368 sizeof(inl_hdr) + length <= mpw_room && 1369 !txq->mpw_hdr_dseg; 1370 mpw.wqe->eseg.cs_flags = cs_flags; 1371 } else { 1372 /* Evaluate whether the next packet can be inlined. 1373 * Inlininig is possible when: 1374 * - length is less than configured value 1375 * - length fits for remaining space 1376 * - not required to fill the title WQEBB with dsegs 1377 */ 1378 do_inline = 1379 length <= txq->inline_max_packet_sz && 1380 inl_pad + sizeof(inl_hdr) + length <= 1381 mpw_room && 1382 (!txq->mpw_hdr_dseg || 1383 mpw.total_len >= MLX5_WQE_SIZE); 1384 } 1385 if (max_inline && do_inline) { 1386 /* Inline packet into WQE. */ 1387 unsigned int max; 1388 1389 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1390 assert(length == DATA_LEN(buf)); 1391 inl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG); 1392 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1393 mpw.data.raw = (volatile void *) 1394 ((uintptr_t)mpw.data.raw + inl_pad); 1395 max = tx_mlx5_wq_tailroom(txq, 1396 (void *)(uintptr_t)mpw.data.raw); 1397 /* Copy inline header. */ 1398 mpw.data.raw = (volatile void *) 1399 mlx5_copy_to_wq( 1400 (void *)(uintptr_t)mpw.data.raw, 1401 &inl_hdr, 1402 sizeof(inl_hdr), 1403 (void *)(uintptr_t)txq->wqes, 1404 max); 1405 max = tx_mlx5_wq_tailroom(txq, 1406 (void *)(uintptr_t)mpw.data.raw); 1407 /* Copy packet data. */ 1408 mpw.data.raw = (volatile void *) 1409 mlx5_copy_to_wq( 1410 (void *)(uintptr_t)mpw.data.raw, 1411 (void *)addr, 1412 length, 1413 (void *)(uintptr_t)txq->wqes, 1414 max); 1415 ++mpw.pkts_n; 1416 mpw.total_len += (inl_pad + sizeof(inl_hdr) + length); 1417 /* No need to get completion as the entire packet is 1418 * copied to WQ. Free the buf right away. 1419 */ 1420 rte_pktmbuf_free_seg(buf); 1421 mpw_room -= (inl_pad + sizeof(inl_hdr) + length); 1422 /* Add pad in the next packet if any. */ 1423 inl_pad = (((uintptr_t)mpw.data.raw + 1424 (MLX5_WQE_DWORD_SIZE - 1)) & 1425 ~(MLX5_WQE_DWORD_SIZE - 1)) - 1426 (uintptr_t)mpw.data.raw; 1427 } else { 1428 /* No inline. Load a dseg of packet pointer. */ 1429 volatile rte_v128u32_t *dseg; 1430 1431 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1432 assert((inl_pad + sizeof(*dseg)) <= mpw_room); 1433 assert(length == DATA_LEN(buf)); 1434 if (!tx_mlx5_wq_tailroom(txq, 1435 (void *)((uintptr_t)mpw.data.raw 1436 + inl_pad))) 1437 dseg = (volatile void *)txq->wqes; 1438 else 1439 dseg = (volatile void *) 1440 ((uintptr_t)mpw.data.raw + 1441 inl_pad); 1442 (*txq->elts)[elts_head++ & elts_m] = buf; 1443 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1444 for (n = 0; n * RTE_CACHE_LINE_SIZE < length; n++) 1445 rte_prefetch2((void *)(addr + 1446 n * RTE_CACHE_LINE_SIZE)); 1447 addr = rte_cpu_to_be_64(addr); 1448 *dseg = (rte_v128u32_t) { 1449 rte_cpu_to_be_32(length), 1450 mlx5_tx_mb2mr(txq, buf), 1451 addr, 1452 addr >> 32, 1453 }; 1454 mpw.data.raw = (volatile void *)(dseg + 1); 1455 mpw.total_len += (inl_pad + sizeof(*dseg)); 1456 ++j; 1457 ++mpw.pkts_n; 1458 mpw_room -= (inl_pad + sizeof(*dseg)); 1459 inl_pad = 0; 1460 } 1461 #ifdef MLX5_PMD_SOFT_COUNTERS 1462 /* Increment sent bytes counter. */ 1463 txq->stats.obytes += length; 1464 #endif 1465 ++i; 1466 } while (i < pkts_n); 1467 /* Take a shortcut if nothing must be sent. */ 1468 if (unlikely(i == 0)) 1469 return 0; 1470 /* Check whether completion threshold has been reached. */ 1471 if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH || 1472 (uint16_t)(txq->wqe_ci - txq->mpw_comp) >= 1473 (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) { 1474 volatile struct mlx5_wqe *wqe = mpw.wqe; 1475 1476 /* Request completion on last WQE. */ 1477 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1478 /* Save elts_head in unused "immediate" field of WQE. */ 1479 wqe->ctrl[3] = elts_head; 1480 txq->elts_comp = 0; 1481 txq->mpw_comp = txq->wqe_ci; 1482 #ifndef NDEBUG 1483 ++txq->cq_pi; 1484 #endif 1485 } else { 1486 txq->elts_comp += j; 1487 } 1488 #ifdef MLX5_PMD_SOFT_COUNTERS 1489 /* Increment sent packets counter. */ 1490 txq->stats.opackets += i; 1491 #endif 1492 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) 1493 mlx5_empw_close(txq, &mpw); 1494 /* Ring QP doorbell. */ 1495 mlx5_tx_dbrec(txq, mpw.wqe); 1496 txq->elts_head = elts_head; 1497 return i; 1498 } 1499 1500 /** 1501 * DPDK callback for TX with Enhanced MPW support. 1502 * 1503 * @param dpdk_txq 1504 * Generic pointer to TX queue structure. 1505 * @param[in] pkts 1506 * Packets to transmit. 1507 * @param pkts_n 1508 * Number of packets in array. 1509 * 1510 * @return 1511 * Number of packets successfully transmitted (<= pkts_n). 1512 */ 1513 uint16_t 1514 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1515 { 1516 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1517 uint16_t nb_tx = 0; 1518 1519 while (pkts_n > nb_tx) { 1520 uint16_t n; 1521 uint16_t ret; 1522 1523 n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx); 1524 if (n) { 1525 ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n); 1526 if (!ret) 1527 break; 1528 nb_tx += ret; 1529 } 1530 n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx); 1531 if (n) { 1532 ret = txq_burst_empw(txq, &pkts[nb_tx], n); 1533 if (!ret) 1534 break; 1535 nb_tx += ret; 1536 } 1537 } 1538 return nb_tx; 1539 } 1540 1541 /** 1542 * Translate RX completion flags to packet type. 1543 * 1544 * @param[in] cqe 1545 * Pointer to CQE. 1546 * 1547 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 1548 * 1549 * @return 1550 * Packet type for struct rte_mbuf. 1551 */ 1552 static inline uint32_t 1553 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) 1554 { 1555 uint8_t idx; 1556 uint8_t pinfo = cqe->pkt_info; 1557 uint16_t ptype = cqe->hdr_type_etc; 1558 1559 /* 1560 * The index to the array should have: 1561 * bit[1:0] = l3_hdr_type 1562 * bit[4:2] = l4_hdr_type 1563 * bit[5] = ip_frag 1564 * bit[6] = tunneled 1565 * bit[7] = outer_l3_type 1566 */ 1567 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 1568 return mlx5_ptype_table[idx]; 1569 } 1570 1571 /** 1572 * Get size of the next packet for a given CQE. For compressed CQEs, the 1573 * consumer index is updated only once all packets of the current one have 1574 * been processed. 1575 * 1576 * @param rxq 1577 * Pointer to RX queue. 1578 * @param cqe 1579 * CQE to process. 1580 * @param[out] rss_hash 1581 * Packet RSS Hash result. 1582 * 1583 * @return 1584 * Packet size in bytes (0 if there is none), -1 in case of completion 1585 * with error. 1586 */ 1587 static inline int 1588 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1589 uint16_t cqe_cnt, uint32_t *rss_hash) 1590 { 1591 struct rxq_zip *zip = &rxq->zip; 1592 uint16_t cqe_n = cqe_cnt + 1; 1593 int len = 0; 1594 uint16_t idx, end; 1595 1596 /* Process compressed data in the CQE and mini arrays. */ 1597 if (zip->ai) { 1598 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1599 (volatile struct mlx5_mini_cqe8 (*)[8]) 1600 (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info); 1601 1602 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1603 *rss_hash = rte_be_to_cpu_32((*mc)[zip->ai & 7].rx_hash_result); 1604 if ((++zip->ai & 7) == 0) { 1605 /* Invalidate consumed CQEs */ 1606 idx = zip->ca; 1607 end = zip->na; 1608 while (idx != end) { 1609 (*rxq->cqes)[idx & cqe_cnt].op_own = 1610 MLX5_CQE_INVALIDATE; 1611 ++idx; 1612 } 1613 /* 1614 * Increment consumer index to skip the number of 1615 * CQEs consumed. Hardware leaves holes in the CQ 1616 * ring for software use. 1617 */ 1618 zip->ca = zip->na; 1619 zip->na += 8; 1620 } 1621 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1622 /* Invalidate the rest */ 1623 idx = zip->ca; 1624 end = zip->cq_ci; 1625 1626 while (idx != end) { 1627 (*rxq->cqes)[idx & cqe_cnt].op_own = 1628 MLX5_CQE_INVALIDATE; 1629 ++idx; 1630 } 1631 rxq->cq_ci = zip->cq_ci; 1632 zip->ai = 0; 1633 } 1634 /* No compressed data, get next CQE and verify if it is compressed. */ 1635 } else { 1636 int ret; 1637 int8_t op_own; 1638 1639 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1640 if (unlikely(ret == 1)) 1641 return 0; 1642 ++rxq->cq_ci; 1643 op_own = cqe->op_own; 1644 rte_cio_rmb(); 1645 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1646 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1647 (volatile struct mlx5_mini_cqe8 (*)[8]) 1648 (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci & 1649 cqe_cnt].pkt_info); 1650 1651 /* Fix endianness. */ 1652 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1653 /* 1654 * Current mini array position is the one returned by 1655 * check_cqe64(). 1656 * 1657 * If completion comprises several mini arrays, as a 1658 * special case the second one is located 7 CQEs after 1659 * the initial CQE instead of 8 for subsequent ones. 1660 */ 1661 zip->ca = rxq->cq_ci; 1662 zip->na = zip->ca + 7; 1663 /* Compute the next non compressed CQE. */ 1664 --rxq->cq_ci; 1665 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1666 /* Get packet size to return. */ 1667 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1668 *rss_hash = rte_be_to_cpu_32((*mc)[0].rx_hash_result); 1669 zip->ai = 1; 1670 /* Prefetch all the entries to be invalidated */ 1671 idx = zip->ca; 1672 end = zip->cq_ci; 1673 while (idx != end) { 1674 rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]); 1675 ++idx; 1676 } 1677 } else { 1678 len = rte_be_to_cpu_32(cqe->byte_cnt); 1679 *rss_hash = rte_be_to_cpu_32(cqe->rx_hash_res); 1680 } 1681 /* Error while receiving packet. */ 1682 if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR)) 1683 return -1; 1684 } 1685 return len; 1686 } 1687 1688 /** 1689 * Translate RX completion flags to offload flags. 1690 * 1691 * @param[in] rxq 1692 * Pointer to RX queue structure. 1693 * @param[in] cqe 1694 * Pointer to CQE. 1695 * 1696 * @return 1697 * Offload flags (ol_flags) for struct rte_mbuf. 1698 */ 1699 static inline uint32_t 1700 rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 1701 { 1702 uint32_t ol_flags = 0; 1703 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1704 1705 ol_flags = 1706 TRANSPOSE(flags, 1707 MLX5_CQE_RX_L3_HDR_VALID, 1708 PKT_RX_IP_CKSUM_GOOD) | 1709 TRANSPOSE(flags, 1710 MLX5_CQE_RX_L4_HDR_VALID, 1711 PKT_RX_L4_CKSUM_GOOD); 1712 if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) 1713 ol_flags |= 1714 TRANSPOSE(flags, 1715 MLX5_CQE_RX_L3_HDR_VALID, 1716 PKT_RX_IP_CKSUM_GOOD) | 1717 TRANSPOSE(flags, 1718 MLX5_CQE_RX_L4_HDR_VALID, 1719 PKT_RX_L4_CKSUM_GOOD); 1720 return ol_flags; 1721 } 1722 1723 /** 1724 * DPDK callback for RX. 1725 * 1726 * @param dpdk_rxq 1727 * Generic pointer to RX queue structure. 1728 * @param[out] pkts 1729 * Array to store received packets. 1730 * @param pkts_n 1731 * Maximum number of packets in array. 1732 * 1733 * @return 1734 * Number of packets successfully received (<= pkts_n). 1735 */ 1736 uint16_t 1737 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1738 { 1739 struct mlx5_rxq_data *rxq = dpdk_rxq; 1740 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1741 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1742 const unsigned int sges_n = rxq->sges_n; 1743 struct rte_mbuf *pkt = NULL; 1744 struct rte_mbuf *seg = NULL; 1745 volatile struct mlx5_cqe *cqe = 1746 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1747 unsigned int i = 0; 1748 unsigned int rq_ci = rxq->rq_ci << sges_n; 1749 int len = 0; /* keep its value across iterations. */ 1750 1751 while (pkts_n) { 1752 unsigned int idx = rq_ci & wqe_cnt; 1753 volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx]; 1754 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1755 uint32_t rss_hash_res = 0; 1756 1757 if (pkt) 1758 NEXT(seg) = rep; 1759 seg = rep; 1760 rte_prefetch0(seg); 1761 rte_prefetch0(cqe); 1762 rte_prefetch0(wqe); 1763 rep = rte_mbuf_raw_alloc(rxq->mp); 1764 if (unlikely(rep == NULL)) { 1765 ++rxq->stats.rx_nombuf; 1766 if (!pkt) { 1767 /* 1768 * no buffers before we even started, 1769 * bail out silently. 1770 */ 1771 break; 1772 } 1773 while (pkt != seg) { 1774 assert(pkt != (*rxq->elts)[idx]); 1775 rep = NEXT(pkt); 1776 NEXT(pkt) = NULL; 1777 NB_SEGS(pkt) = 1; 1778 rte_mbuf_raw_free(pkt); 1779 pkt = rep; 1780 } 1781 break; 1782 } 1783 if (!pkt) { 1784 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1785 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, 1786 &rss_hash_res); 1787 if (!len) { 1788 rte_mbuf_raw_free(rep); 1789 break; 1790 } 1791 if (unlikely(len == -1)) { 1792 /* RX error, packet is likely too large. */ 1793 rte_mbuf_raw_free(rep); 1794 ++rxq->stats.idropped; 1795 goto skip; 1796 } 1797 pkt = seg; 1798 assert(len >= (rxq->crc_present << 2)); 1799 /* Update packet information. */ 1800 pkt->packet_type = rxq_cq_to_pkt_type(cqe); 1801 pkt->ol_flags = 0; 1802 if (rss_hash_res && rxq->rss_hash) { 1803 pkt->hash.rss = rss_hash_res; 1804 pkt->ol_flags = PKT_RX_RSS_HASH; 1805 } 1806 if (rxq->mark && 1807 MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1808 pkt->ol_flags |= PKT_RX_FDIR; 1809 if (cqe->sop_drop_qpn != 1810 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1811 uint32_t mark = cqe->sop_drop_qpn; 1812 1813 pkt->ol_flags |= PKT_RX_FDIR_ID; 1814 pkt->hash.fdir.hi = 1815 mlx5_flow_mark_get(mark); 1816 } 1817 } 1818 if (rxq->csum | rxq->csum_l2tun) 1819 pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe); 1820 if (rxq->vlan_strip && 1821 (cqe->hdr_type_etc & 1822 rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1823 pkt->ol_flags |= PKT_RX_VLAN | 1824 PKT_RX_VLAN_STRIPPED; 1825 pkt->vlan_tci = 1826 rte_be_to_cpu_16(cqe->vlan_info); 1827 } 1828 if (rxq->hw_timestamp) { 1829 pkt->timestamp = 1830 rte_be_to_cpu_64(cqe->timestamp); 1831 pkt->ol_flags |= PKT_RX_TIMESTAMP; 1832 } 1833 if (rxq->crc_present) 1834 len -= ETHER_CRC_LEN; 1835 PKT_LEN(pkt) = len; 1836 } 1837 DATA_LEN(rep) = DATA_LEN(seg); 1838 PKT_LEN(rep) = PKT_LEN(seg); 1839 SET_DATA_OFF(rep, DATA_OFF(seg)); 1840 PORT(rep) = PORT(seg); 1841 (*rxq->elts)[idx] = rep; 1842 /* 1843 * Fill NIC descriptor with the new buffer. The lkey and size 1844 * of the buffers are already known, only the buffer address 1845 * changes. 1846 */ 1847 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1848 if (len > DATA_LEN(seg)) { 1849 len -= DATA_LEN(seg); 1850 ++NB_SEGS(pkt); 1851 ++rq_ci; 1852 continue; 1853 } 1854 DATA_LEN(seg) = len; 1855 #ifdef MLX5_PMD_SOFT_COUNTERS 1856 /* Increment bytes counter. */ 1857 rxq->stats.ibytes += PKT_LEN(pkt); 1858 #endif 1859 /* Return packet. */ 1860 *(pkts++) = pkt; 1861 pkt = NULL; 1862 --pkts_n; 1863 ++i; 1864 skip: 1865 /* Align consumer index to the next stride. */ 1866 rq_ci >>= sges_n; 1867 ++rq_ci; 1868 rq_ci <<= sges_n; 1869 } 1870 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1871 return 0; 1872 /* Update the consumer index. */ 1873 rxq->rq_ci = rq_ci >> sges_n; 1874 rte_cio_wmb(); 1875 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1876 rte_cio_wmb(); 1877 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1878 #ifdef MLX5_PMD_SOFT_COUNTERS 1879 /* Increment packets counter. */ 1880 rxq->stats.ipackets += i; 1881 #endif 1882 return i; 1883 } 1884 1885 /** 1886 * Dummy DPDK callback for TX. 1887 * 1888 * This function is used to temporarily replace the real callback during 1889 * unsafe control operations on the queue, or in case of error. 1890 * 1891 * @param dpdk_txq 1892 * Generic pointer to TX queue structure. 1893 * @param[in] pkts 1894 * Packets to transmit. 1895 * @param pkts_n 1896 * Number of packets in array. 1897 * 1898 * @return 1899 * Number of packets successfully transmitted (<= pkts_n). 1900 */ 1901 uint16_t 1902 removed_tx_burst(void *dpdk_txq __rte_unused, 1903 struct rte_mbuf **pkts __rte_unused, 1904 uint16_t pkts_n __rte_unused) 1905 { 1906 return 0; 1907 } 1908 1909 /** 1910 * Dummy DPDK callback for RX. 1911 * 1912 * This function is used to temporarily replace the real callback during 1913 * unsafe control operations on the queue, or in case of error. 1914 * 1915 * @param dpdk_rxq 1916 * Generic pointer to RX queue structure. 1917 * @param[out] pkts 1918 * Array to store received packets. 1919 * @param pkts_n 1920 * Maximum number of packets in array. 1921 * 1922 * @return 1923 * Number of packets successfully received (<= pkts_n). 1924 */ 1925 uint16_t 1926 removed_rx_burst(void *dpdk_txq __rte_unused, 1927 struct rte_mbuf **pkts __rte_unused, 1928 uint16_t pkts_n __rte_unused) 1929 { 1930 return 0; 1931 } 1932 1933 /* 1934 * Vectorized Rx/Tx routines are not compiled in when required vector 1935 * instructions are not supported on a target architecture. The following null 1936 * stubs are needed for linkage when those are not included outside of this file 1937 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1938 */ 1939 1940 uint16_t __attribute__((weak)) 1941 mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused, 1942 struct rte_mbuf **pkts __rte_unused, 1943 uint16_t pkts_n __rte_unused) 1944 { 1945 return 0; 1946 } 1947 1948 uint16_t __attribute__((weak)) 1949 mlx5_tx_burst_vec(void *dpdk_txq __rte_unused, 1950 struct rte_mbuf **pkts __rte_unused, 1951 uint16_t pkts_n __rte_unused) 1952 { 1953 return 0; 1954 } 1955 1956 uint16_t __attribute__((weak)) 1957 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1958 struct rte_mbuf **pkts __rte_unused, 1959 uint16_t pkts_n __rte_unused) 1960 { 1961 return 0; 1962 } 1963 1964 int __attribute__((weak)) 1965 mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 1966 { 1967 return -ENOTSUP; 1968 } 1969 1970 int __attribute__((weak)) 1971 mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 1972 { 1973 return -ENOTSUP; 1974 } 1975 1976 int __attribute__((weak)) 1977 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1978 { 1979 return -ENOTSUP; 1980 } 1981 1982 int __attribute__((weak)) 1983 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1984 { 1985 return -ENOTSUP; 1986 } 1987