1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <assert.h> 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdlib.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #include <infiniband/mlx5dv.h> 18 #ifdef PEDANTIC 19 #pragma GCC diagnostic error "-Wpedantic" 20 #endif 21 22 #include <rte_mbuf.h> 23 #include <rte_mempool.h> 24 #include <rte_prefetch.h> 25 #include <rte_common.h> 26 #include <rte_branch_prediction.h> 27 #include <rte_ether.h> 28 29 #include "mlx5.h" 30 #include "mlx5_utils.h" 31 #include "mlx5_rxtx.h" 32 #include "mlx5_autoconf.h" 33 #include "mlx5_defs.h" 34 #include "mlx5_prm.h" 35 36 static __rte_always_inline uint32_t 37 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe); 38 39 static __rte_always_inline int 40 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 41 uint16_t cqe_cnt, uint32_t *rss_hash); 42 43 static __rte_always_inline uint32_t 44 rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 45 46 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 47 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 48 }; 49 50 /** 51 * Build a table to translate Rx completion flags to packet type. 52 * 53 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 54 */ 55 void 56 mlx5_set_ptype_table(void) 57 { 58 unsigned int i; 59 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 60 61 /* Last entry must not be overwritten, reserved for errored packet. */ 62 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 63 (*p)[i] = RTE_PTYPE_UNKNOWN; 64 /* 65 * The index to the array should have: 66 * bit[1:0] = l3_hdr_type 67 * bit[4:2] = l4_hdr_type 68 * bit[5] = ip_frag 69 * bit[6] = tunneled 70 * bit[7] = outer_l3_type 71 */ 72 /* L2 */ 73 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 74 /* L3 */ 75 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 76 RTE_PTYPE_L4_NONFRAG; 77 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 78 RTE_PTYPE_L4_NONFRAG; 79 /* Fragmented */ 80 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 81 RTE_PTYPE_L4_FRAG; 82 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 83 RTE_PTYPE_L4_FRAG; 84 /* TCP */ 85 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 86 RTE_PTYPE_L4_TCP; 87 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 88 RTE_PTYPE_L4_TCP; 89 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 90 RTE_PTYPE_L4_TCP; 91 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 92 RTE_PTYPE_L4_TCP; 93 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 94 RTE_PTYPE_L4_TCP; 95 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 96 RTE_PTYPE_L4_TCP; 97 /* UDP */ 98 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 99 RTE_PTYPE_L4_UDP; 100 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 101 RTE_PTYPE_L4_UDP; 102 /* Repeat with outer_l3_type being set. Just in case. */ 103 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 104 RTE_PTYPE_L4_NONFRAG; 105 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 106 RTE_PTYPE_L4_NONFRAG; 107 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 108 RTE_PTYPE_L4_FRAG; 109 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 110 RTE_PTYPE_L4_FRAG; 111 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 112 RTE_PTYPE_L4_TCP; 113 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 114 RTE_PTYPE_L4_TCP; 115 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 116 RTE_PTYPE_L4_TCP; 117 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 118 RTE_PTYPE_L4_TCP; 119 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 120 RTE_PTYPE_L4_TCP; 121 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 122 RTE_PTYPE_L4_TCP; 123 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 124 RTE_PTYPE_L4_UDP; 125 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 126 RTE_PTYPE_L4_UDP; 127 /* Tunneled - L3 */ 128 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 129 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 130 RTE_PTYPE_INNER_L4_NONFRAG; 131 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 132 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 133 RTE_PTYPE_INNER_L4_NONFRAG; 134 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 135 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 136 RTE_PTYPE_INNER_L4_NONFRAG; 137 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 138 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 139 RTE_PTYPE_INNER_L4_NONFRAG; 140 /* Tunneled - Fragmented */ 141 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 142 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 143 RTE_PTYPE_INNER_L4_FRAG; 144 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 145 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 146 RTE_PTYPE_INNER_L4_FRAG; 147 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 148 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 149 RTE_PTYPE_INNER_L4_FRAG; 150 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 151 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 152 RTE_PTYPE_INNER_L4_FRAG; 153 /* Tunneled - TCP */ 154 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 155 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 156 RTE_PTYPE_INNER_L4_TCP; 157 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 158 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 159 RTE_PTYPE_INNER_L4_TCP; 160 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 161 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 162 RTE_PTYPE_INNER_L4_TCP; 163 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 164 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 165 RTE_PTYPE_INNER_L4_TCP; 166 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 167 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 168 RTE_PTYPE_INNER_L4_TCP; 169 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 170 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 171 RTE_PTYPE_INNER_L4_TCP; 172 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 173 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 174 RTE_PTYPE_INNER_L4_TCP; 175 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 176 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 177 RTE_PTYPE_INNER_L4_TCP; 178 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 179 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 180 RTE_PTYPE_INNER_L4_TCP; 181 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 182 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 183 RTE_PTYPE_INNER_L4_TCP; 184 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 185 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 186 RTE_PTYPE_INNER_L4_TCP; 187 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 188 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 189 RTE_PTYPE_INNER_L4_TCP; 190 /* Tunneled - UDP */ 191 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 192 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 193 RTE_PTYPE_INNER_L4_UDP; 194 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 195 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 196 RTE_PTYPE_INNER_L4_UDP; 197 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 198 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 199 RTE_PTYPE_INNER_L4_UDP; 200 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 201 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 202 RTE_PTYPE_INNER_L4_UDP; 203 } 204 205 /** 206 * Return the size of tailroom of WQ. 207 * 208 * @param txq 209 * Pointer to TX queue structure. 210 * @param addr 211 * Pointer to tail of WQ. 212 * 213 * @return 214 * Size of tailroom. 215 */ 216 static inline size_t 217 tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr) 218 { 219 size_t tailroom; 220 tailroom = (uintptr_t)(txq->wqes) + 221 (1 << txq->wqe_n) * MLX5_WQE_SIZE - 222 (uintptr_t)addr; 223 return tailroom; 224 } 225 226 /** 227 * Copy data to tailroom of circular queue. 228 * 229 * @param dst 230 * Pointer to destination. 231 * @param src 232 * Pointer to source. 233 * @param n 234 * Number of bytes to copy. 235 * @param base 236 * Pointer to head of queue. 237 * @param tailroom 238 * Size of tailroom from dst. 239 * 240 * @return 241 * Pointer after copied data. 242 */ 243 static inline void * 244 mlx5_copy_to_wq(void *dst, const void *src, size_t n, 245 void *base, size_t tailroom) 246 { 247 void *ret; 248 249 if (n > tailroom) { 250 rte_memcpy(dst, src, tailroom); 251 rte_memcpy(base, (void *)((uintptr_t)src + tailroom), 252 n - tailroom); 253 ret = (uint8_t *)base + n - tailroom; 254 } else { 255 rte_memcpy(dst, src, n); 256 ret = (n == tailroom) ? base : (uint8_t *)dst + n; 257 } 258 return ret; 259 } 260 261 /** 262 * DPDK callback to check the status of a tx descriptor. 263 * 264 * @param tx_queue 265 * The tx queue. 266 * @param[in] offset 267 * The index of the descriptor in the ring. 268 * 269 * @return 270 * The status of the tx descriptor. 271 */ 272 int 273 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 274 { 275 struct mlx5_txq_data *txq = tx_queue; 276 uint16_t used; 277 278 mlx5_tx_complete(txq); 279 used = txq->elts_head - txq->elts_tail; 280 if (offset < used) 281 return RTE_ETH_TX_DESC_FULL; 282 return RTE_ETH_TX_DESC_DONE; 283 } 284 285 /** 286 * DPDK callback to check the status of a rx descriptor. 287 * 288 * @param rx_queue 289 * The rx queue. 290 * @param[in] offset 291 * The index of the descriptor in the ring. 292 * 293 * @return 294 * The status of the tx descriptor. 295 */ 296 int 297 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 298 { 299 struct mlx5_rxq_data *rxq = rx_queue; 300 struct rxq_zip *zip = &rxq->zip; 301 volatile struct mlx5_cqe *cqe; 302 const unsigned int cqe_n = (1 << rxq->cqe_n); 303 const unsigned int cqe_cnt = cqe_n - 1; 304 unsigned int cq_ci; 305 unsigned int used; 306 307 /* if we are processing a compressed cqe */ 308 if (zip->ai) { 309 used = zip->cqe_cnt - zip->ca; 310 cq_ci = zip->cq_ci; 311 } else { 312 used = 0; 313 cq_ci = rxq->cq_ci; 314 } 315 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 316 while (check_cqe(cqe, cqe_n, cq_ci) == 0) { 317 int8_t op_own; 318 unsigned int n; 319 320 op_own = cqe->op_own; 321 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 322 n = rte_be_to_cpu_32(cqe->byte_cnt); 323 else 324 n = 1; 325 cq_ci += n; 326 used += n; 327 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 328 } 329 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 330 if (offset < used) 331 return RTE_ETH_RX_DESC_DONE; 332 return RTE_ETH_RX_DESC_AVAIL; 333 } 334 335 /** 336 * DPDK callback for TX. 337 * 338 * @param dpdk_txq 339 * Generic pointer to TX queue structure. 340 * @param[in] pkts 341 * Packets to transmit. 342 * @param pkts_n 343 * Number of packets in array. 344 * 345 * @return 346 * Number of packets successfully transmitted (<= pkts_n). 347 */ 348 uint16_t 349 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 350 { 351 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 352 uint16_t elts_head = txq->elts_head; 353 const uint16_t elts_n = 1 << txq->elts_n; 354 const uint16_t elts_m = elts_n - 1; 355 unsigned int i = 0; 356 unsigned int j = 0; 357 unsigned int k = 0; 358 uint16_t max_elts; 359 uint16_t max_wqe; 360 unsigned int comp; 361 volatile struct mlx5_wqe_ctrl *last_wqe = NULL; 362 unsigned int segs_n = 0; 363 const unsigned int max_inline = txq->max_inline; 364 365 if (unlikely(!pkts_n)) 366 return 0; 367 /* Prefetch first packet cacheline. */ 368 rte_prefetch0(*pkts); 369 /* Start processing. */ 370 mlx5_tx_complete(txq); 371 max_elts = (elts_n - (elts_head - txq->elts_tail)); 372 /* A CQE slot must always be available. */ 373 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 374 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 375 if (unlikely(!max_wqe)) 376 return 0; 377 do { 378 struct rte_mbuf *buf = NULL; 379 uint8_t *raw; 380 volatile struct mlx5_wqe_v *wqe = NULL; 381 volatile rte_v128u32_t *dseg = NULL; 382 uint32_t length; 383 unsigned int ds = 0; 384 unsigned int sg = 0; /* counter of additional segs attached. */ 385 uintptr_t addr; 386 uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; 387 uint16_t tso_header_sz = 0; 388 uint16_t ehdr; 389 uint8_t cs_flags; 390 uint64_t tso = 0; 391 uint16_t tso_segsz = 0; 392 #ifdef MLX5_PMD_SOFT_COUNTERS 393 uint32_t total_length = 0; 394 #endif 395 396 /* first_seg */ 397 buf = *pkts; 398 segs_n = buf->nb_segs; 399 /* 400 * Make sure there is enough room to store this packet and 401 * that one ring entry remains unused. 402 */ 403 assert(segs_n); 404 if (max_elts < segs_n) 405 break; 406 max_elts -= segs_n; 407 sg = --segs_n; 408 if (unlikely(--max_wqe == 0)) 409 break; 410 wqe = (volatile struct mlx5_wqe_v *) 411 tx_mlx5_wqe(txq, txq->wqe_ci); 412 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 413 if (pkts_n - i > 1) 414 rte_prefetch0(*(pkts + 1)); 415 addr = rte_pktmbuf_mtod(buf, uintptr_t); 416 length = DATA_LEN(buf); 417 ehdr = (((uint8_t *)addr)[1] << 8) | 418 ((uint8_t *)addr)[0]; 419 #ifdef MLX5_PMD_SOFT_COUNTERS 420 total_length = length; 421 #endif 422 if (length < (MLX5_WQE_DWORD_SIZE + 2)) { 423 txq->stats.oerrors++; 424 break; 425 } 426 /* Update element. */ 427 (*txq->elts)[elts_head & elts_m] = buf; 428 /* Prefetch next buffer data. */ 429 if (pkts_n - i > 1) 430 rte_prefetch0( 431 rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); 432 cs_flags = txq_ol_cksum_to_cs(txq, buf); 433 raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; 434 /* Replace the Ethernet type by the VLAN if necessary. */ 435 if (buf->ol_flags & PKT_TX_VLAN_PKT) { 436 uint32_t vlan = rte_cpu_to_be_32(0x81000000 | 437 buf->vlan_tci); 438 unsigned int len = 2 * ETHER_ADDR_LEN - 2; 439 440 addr += 2; 441 length -= 2; 442 /* Copy Destination and source mac address. */ 443 memcpy((uint8_t *)raw, ((uint8_t *)addr), len); 444 /* Copy VLAN. */ 445 memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan)); 446 /* Copy missing two bytes to end the DSeg. */ 447 memcpy((uint8_t *)raw + len + sizeof(vlan), 448 ((uint8_t *)addr) + len, 2); 449 addr += len + 2; 450 length -= (len + 2); 451 } else { 452 memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, 453 MLX5_WQE_DWORD_SIZE); 454 length -= pkt_inline_sz; 455 addr += pkt_inline_sz; 456 } 457 raw += MLX5_WQE_DWORD_SIZE; 458 tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG); 459 if (tso) { 460 uintptr_t end = 461 (uintptr_t)(((uintptr_t)txq->wqes) + 462 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 463 unsigned int copy_b; 464 uint8_t vlan_sz = 465 (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0; 466 const uint64_t is_tunneled = 467 buf->ol_flags & (PKT_TX_TUNNEL_GRE | 468 PKT_TX_TUNNEL_VXLAN); 469 470 tso_header_sz = buf->l2_len + vlan_sz + 471 buf->l3_len + buf->l4_len; 472 tso_segsz = buf->tso_segsz; 473 if (unlikely(tso_segsz == 0)) { 474 txq->stats.oerrors++; 475 break; 476 } 477 if (is_tunneled && txq->tunnel_en) { 478 tso_header_sz += buf->outer_l2_len + 479 buf->outer_l3_len; 480 cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; 481 } else { 482 cs_flags |= MLX5_ETH_WQE_L4_CSUM; 483 } 484 if (unlikely(tso_header_sz > MLX5_MAX_TSO_HEADER)) { 485 txq->stats.oerrors++; 486 break; 487 } 488 copy_b = tso_header_sz - pkt_inline_sz; 489 /* First seg must contain all headers. */ 490 assert(copy_b <= length); 491 if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { 492 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 493 494 if (unlikely(max_wqe < n)) 495 break; 496 max_wqe -= n; 497 rte_memcpy((void *)raw, (void *)addr, copy_b); 498 addr += copy_b; 499 length -= copy_b; 500 /* Include padding for TSO header. */ 501 copy_b = MLX5_WQE_DS(copy_b) * 502 MLX5_WQE_DWORD_SIZE; 503 pkt_inline_sz += copy_b; 504 raw += copy_b; 505 } else { 506 /* NOP WQE. */ 507 wqe->ctrl = (rte_v128u32_t){ 508 rte_cpu_to_be_32(txq->wqe_ci << 8), 509 rte_cpu_to_be_32(txq->qp_num_8s | 1), 510 0, 511 0, 512 }; 513 ds = 1; 514 #ifdef MLX5_PMD_SOFT_COUNTERS 515 total_length = 0; 516 #endif 517 k++; 518 goto next_wqe; 519 } 520 } 521 /* Inline if enough room. */ 522 if (max_inline || tso) { 523 uint32_t inl = 0; 524 uintptr_t end = (uintptr_t) 525 (((uintptr_t)txq->wqes) + 526 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 527 unsigned int inline_room = max_inline * 528 RTE_CACHE_LINE_SIZE - 529 (pkt_inline_sz - 2) - 530 !!tso * sizeof(inl); 531 uintptr_t addr_end; 532 unsigned int copy_b; 533 534 pkt_inline: 535 addr_end = RTE_ALIGN_FLOOR(addr + inline_room, 536 RTE_CACHE_LINE_SIZE); 537 copy_b = (addr_end > addr) ? 538 RTE_MIN((addr_end - addr), length) : 0; 539 if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { 540 /* 541 * One Dseg remains in the current WQE. To 542 * keep the computation positive, it is 543 * removed after the bytes to Dseg conversion. 544 */ 545 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 546 547 if (unlikely(max_wqe < n)) 548 break; 549 max_wqe -= n; 550 if (tso && !inl) { 551 inl = rte_cpu_to_be_32(copy_b | 552 MLX5_INLINE_SEG); 553 rte_memcpy((void *)raw, 554 (void *)&inl, sizeof(inl)); 555 raw += sizeof(inl); 556 pkt_inline_sz += sizeof(inl); 557 } 558 rte_memcpy((void *)raw, (void *)addr, copy_b); 559 addr += copy_b; 560 length -= copy_b; 561 pkt_inline_sz += copy_b; 562 } 563 /* 564 * 2 DWORDs consumed by the WQE header + ETH segment + 565 * the size of the inline part of the packet. 566 */ 567 ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); 568 if (length > 0) { 569 if (ds % (MLX5_WQE_SIZE / 570 MLX5_WQE_DWORD_SIZE) == 0) { 571 if (unlikely(--max_wqe == 0)) 572 break; 573 dseg = (volatile rte_v128u32_t *) 574 tx_mlx5_wqe(txq, txq->wqe_ci + 575 ds / 4); 576 } else { 577 dseg = (volatile rte_v128u32_t *) 578 ((uintptr_t)wqe + 579 (ds * MLX5_WQE_DWORD_SIZE)); 580 } 581 goto use_dseg; 582 } else if (!segs_n) { 583 goto next_pkt; 584 } else { 585 raw += copy_b; 586 inline_room -= copy_b; 587 --segs_n; 588 buf = buf->next; 589 assert(buf); 590 addr = rte_pktmbuf_mtod(buf, uintptr_t); 591 length = DATA_LEN(buf); 592 #ifdef MLX5_PMD_SOFT_COUNTERS 593 total_length += length; 594 #endif 595 (*txq->elts)[++elts_head & elts_m] = buf; 596 goto pkt_inline; 597 } 598 } else { 599 /* 600 * No inline has been done in the packet, only the 601 * Ethernet Header as been stored. 602 */ 603 dseg = (volatile rte_v128u32_t *) 604 ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); 605 ds = 3; 606 use_dseg: 607 /* Add the remaining packet as a simple ds. */ 608 addr = rte_cpu_to_be_64(addr); 609 *dseg = (rte_v128u32_t){ 610 rte_cpu_to_be_32(length), 611 mlx5_tx_mb2mr(txq, buf), 612 addr, 613 addr >> 32, 614 }; 615 ++ds; 616 if (!segs_n) 617 goto next_pkt; 618 } 619 next_seg: 620 assert(buf); 621 assert(ds); 622 assert(wqe); 623 /* 624 * Spill on next WQE when the current one does not have 625 * enough room left. Size of WQE must a be a multiple 626 * of data segment size. 627 */ 628 assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); 629 if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { 630 if (unlikely(--max_wqe == 0)) 631 break; 632 dseg = (volatile rte_v128u32_t *) 633 tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4); 634 rte_prefetch0(tx_mlx5_wqe(txq, 635 txq->wqe_ci + ds / 4 + 1)); 636 } else { 637 ++dseg; 638 } 639 ++ds; 640 buf = buf->next; 641 assert(buf); 642 length = DATA_LEN(buf); 643 #ifdef MLX5_PMD_SOFT_COUNTERS 644 total_length += length; 645 #endif 646 /* Store segment information. */ 647 addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); 648 *dseg = (rte_v128u32_t){ 649 rte_cpu_to_be_32(length), 650 mlx5_tx_mb2mr(txq, buf), 651 addr, 652 addr >> 32, 653 }; 654 (*txq->elts)[++elts_head & elts_m] = buf; 655 if (--segs_n) 656 goto next_seg; 657 next_pkt: 658 if (ds > MLX5_DSEG_MAX) { 659 txq->stats.oerrors++; 660 break; 661 } 662 ++elts_head; 663 ++pkts; 664 ++i; 665 j += sg; 666 /* Initialize known and common part of the WQE structure. */ 667 if (tso) { 668 wqe->ctrl = (rte_v128u32_t){ 669 rte_cpu_to_be_32((txq->wqe_ci << 8) | 670 MLX5_OPCODE_TSO), 671 rte_cpu_to_be_32(txq->qp_num_8s | ds), 672 0, 673 0, 674 }; 675 wqe->eseg = (rte_v128u32_t){ 676 0, 677 cs_flags | (rte_cpu_to_be_16(tso_segsz) << 16), 678 0, 679 (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz), 680 }; 681 } else { 682 wqe->ctrl = (rte_v128u32_t){ 683 rte_cpu_to_be_32((txq->wqe_ci << 8) | 684 MLX5_OPCODE_SEND), 685 rte_cpu_to_be_32(txq->qp_num_8s | ds), 686 0, 687 0, 688 }; 689 wqe->eseg = (rte_v128u32_t){ 690 0, 691 cs_flags, 692 0, 693 (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz), 694 }; 695 } 696 next_wqe: 697 txq->wqe_ci += (ds + 3) / 4; 698 /* Save the last successful WQE for completion request */ 699 last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe; 700 #ifdef MLX5_PMD_SOFT_COUNTERS 701 /* Increment sent bytes counter. */ 702 txq->stats.obytes += total_length; 703 #endif 704 } while (i < pkts_n); 705 /* Take a shortcut if nothing must be sent. */ 706 if (unlikely((i + k) == 0)) 707 return 0; 708 txq->elts_head += (i + j); 709 /* Check whether completion threshold has been reached. */ 710 comp = txq->elts_comp + i + j + k; 711 if (comp >= MLX5_TX_COMP_THRESH) { 712 /* Request completion on last WQE. */ 713 last_wqe->ctrl2 = rte_cpu_to_be_32(8); 714 /* Save elts_head in unused "immediate" field of WQE. */ 715 last_wqe->ctrl3 = txq->elts_head; 716 txq->elts_comp = 0; 717 #ifndef NDEBUG 718 ++txq->cq_pi; 719 #endif 720 } else { 721 txq->elts_comp = comp; 722 } 723 #ifdef MLX5_PMD_SOFT_COUNTERS 724 /* Increment sent packets counter. */ 725 txq->stats.opackets += i; 726 #endif 727 /* Ring QP doorbell. */ 728 mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe); 729 return i; 730 } 731 732 /** 733 * Open a MPW session. 734 * 735 * @param txq 736 * Pointer to TX queue structure. 737 * @param mpw 738 * Pointer to MPW session structure. 739 * @param length 740 * Packet length. 741 */ 742 static inline void 743 mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length) 744 { 745 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 746 volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = 747 (volatile struct mlx5_wqe_data_seg (*)[]) 748 tx_mlx5_wqe(txq, idx + 1); 749 750 mpw->state = MLX5_MPW_STATE_OPENED; 751 mpw->pkts_n = 0; 752 mpw->len = length; 753 mpw->total_len = 0; 754 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 755 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 756 mpw->wqe->eseg.inline_hdr_sz = 0; 757 mpw->wqe->eseg.rsvd0 = 0; 758 mpw->wqe->eseg.rsvd1 = 0; 759 mpw->wqe->eseg.rsvd2 = 0; 760 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 761 (txq->wqe_ci << 8) | 762 MLX5_OPCODE_TSO); 763 mpw->wqe->ctrl[2] = 0; 764 mpw->wqe->ctrl[3] = 0; 765 mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *) 766 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 767 mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *) 768 (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE)); 769 mpw->data.dseg[2] = &(*dseg)[0]; 770 mpw->data.dseg[3] = &(*dseg)[1]; 771 mpw->data.dseg[4] = &(*dseg)[2]; 772 } 773 774 /** 775 * Close a MPW session. 776 * 777 * @param txq 778 * Pointer to TX queue structure. 779 * @param mpw 780 * Pointer to MPW session structure. 781 */ 782 static inline void 783 mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 784 { 785 unsigned int num = mpw->pkts_n; 786 787 /* 788 * Store size in multiple of 16 bytes. Control and Ethernet segments 789 * count as 2. 790 */ 791 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num)); 792 mpw->state = MLX5_MPW_STATE_CLOSED; 793 if (num < 3) 794 ++txq->wqe_ci; 795 else 796 txq->wqe_ci += 2; 797 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 798 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 799 } 800 801 /** 802 * DPDK callback for TX with MPW support. 803 * 804 * @param dpdk_txq 805 * Generic pointer to TX queue structure. 806 * @param[in] pkts 807 * Packets to transmit. 808 * @param pkts_n 809 * Number of packets in array. 810 * 811 * @return 812 * Number of packets successfully transmitted (<= pkts_n). 813 */ 814 uint16_t 815 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 816 { 817 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 818 uint16_t elts_head = txq->elts_head; 819 const uint16_t elts_n = 1 << txq->elts_n; 820 const uint16_t elts_m = elts_n - 1; 821 unsigned int i = 0; 822 unsigned int j = 0; 823 uint16_t max_elts; 824 uint16_t max_wqe; 825 unsigned int comp; 826 struct mlx5_mpw mpw = { 827 .state = MLX5_MPW_STATE_CLOSED, 828 }; 829 830 if (unlikely(!pkts_n)) 831 return 0; 832 /* Prefetch first packet cacheline. */ 833 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 834 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 835 /* Start processing. */ 836 mlx5_tx_complete(txq); 837 max_elts = (elts_n - (elts_head - txq->elts_tail)); 838 /* A CQE slot must always be available. */ 839 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 840 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 841 if (unlikely(!max_wqe)) 842 return 0; 843 do { 844 struct rte_mbuf *buf = *(pkts++); 845 uint32_t length; 846 unsigned int segs_n = buf->nb_segs; 847 uint32_t cs_flags; 848 849 /* 850 * Make sure there is enough room to store this packet and 851 * that one ring entry remains unused. 852 */ 853 assert(segs_n); 854 if (max_elts < segs_n) 855 break; 856 /* Do not bother with large packets MPW cannot handle. */ 857 if (segs_n > MLX5_MPW_DSEG_MAX) { 858 txq->stats.oerrors++; 859 break; 860 } 861 max_elts -= segs_n; 862 --pkts_n; 863 cs_flags = txq_ol_cksum_to_cs(txq, buf); 864 /* Retrieve packet information. */ 865 length = PKT_LEN(buf); 866 assert(length); 867 /* Start new session if packet differs. */ 868 if ((mpw.state == MLX5_MPW_STATE_OPENED) && 869 ((mpw.len != length) || 870 (segs_n != 1) || 871 (mpw.wqe->eseg.cs_flags != cs_flags))) 872 mlx5_mpw_close(txq, &mpw); 873 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 874 /* 875 * Multi-Packet WQE consumes at most two WQE. 876 * mlx5_mpw_new() expects to be able to use such 877 * resources. 878 */ 879 if (unlikely(max_wqe < 2)) 880 break; 881 max_wqe -= 2; 882 mlx5_mpw_new(txq, &mpw, length); 883 mpw.wqe->eseg.cs_flags = cs_flags; 884 } 885 /* Multi-segment packets must be alone in their MPW. */ 886 assert((segs_n == 1) || (mpw.pkts_n == 0)); 887 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 888 length = 0; 889 #endif 890 do { 891 volatile struct mlx5_wqe_data_seg *dseg; 892 uintptr_t addr; 893 894 assert(buf); 895 (*txq->elts)[elts_head++ & elts_m] = buf; 896 dseg = mpw.data.dseg[mpw.pkts_n]; 897 addr = rte_pktmbuf_mtod(buf, uintptr_t); 898 *dseg = (struct mlx5_wqe_data_seg){ 899 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), 900 .lkey = mlx5_tx_mb2mr(txq, buf), 901 .addr = rte_cpu_to_be_64(addr), 902 }; 903 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 904 length += DATA_LEN(buf); 905 #endif 906 buf = buf->next; 907 ++mpw.pkts_n; 908 ++j; 909 } while (--segs_n); 910 assert(length == mpw.len); 911 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 912 mlx5_mpw_close(txq, &mpw); 913 #ifdef MLX5_PMD_SOFT_COUNTERS 914 /* Increment sent bytes counter. */ 915 txq->stats.obytes += length; 916 #endif 917 ++i; 918 } while (pkts_n); 919 /* Take a shortcut if nothing must be sent. */ 920 if (unlikely(i == 0)) 921 return 0; 922 /* Check whether completion threshold has been reached. */ 923 /* "j" includes both packets and segments. */ 924 comp = txq->elts_comp + j; 925 if (comp >= MLX5_TX_COMP_THRESH) { 926 volatile struct mlx5_wqe *wqe = mpw.wqe; 927 928 /* Request completion on last WQE. */ 929 wqe->ctrl[2] = rte_cpu_to_be_32(8); 930 /* Save elts_head in unused "immediate" field of WQE. */ 931 wqe->ctrl[3] = elts_head; 932 txq->elts_comp = 0; 933 #ifndef NDEBUG 934 ++txq->cq_pi; 935 #endif 936 } else { 937 txq->elts_comp = comp; 938 } 939 #ifdef MLX5_PMD_SOFT_COUNTERS 940 /* Increment sent packets counter. */ 941 txq->stats.opackets += i; 942 #endif 943 /* Ring QP doorbell. */ 944 if (mpw.state == MLX5_MPW_STATE_OPENED) 945 mlx5_mpw_close(txq, &mpw); 946 mlx5_tx_dbrec(txq, mpw.wqe); 947 txq->elts_head = elts_head; 948 return i; 949 } 950 951 /** 952 * Open a MPW inline session. 953 * 954 * @param txq 955 * Pointer to TX queue structure. 956 * @param mpw 957 * Pointer to MPW session structure. 958 * @param length 959 * Packet length. 960 */ 961 static inline void 962 mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, 963 uint32_t length) 964 { 965 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 966 struct mlx5_wqe_inl_small *inl; 967 968 mpw->state = MLX5_MPW_INL_STATE_OPENED; 969 mpw->pkts_n = 0; 970 mpw->len = length; 971 mpw->total_len = 0; 972 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 973 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 974 (txq->wqe_ci << 8) | 975 MLX5_OPCODE_TSO); 976 mpw->wqe->ctrl[2] = 0; 977 mpw->wqe->ctrl[3] = 0; 978 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 979 mpw->wqe->eseg.inline_hdr_sz = 0; 980 mpw->wqe->eseg.cs_flags = 0; 981 mpw->wqe->eseg.rsvd0 = 0; 982 mpw->wqe->eseg.rsvd1 = 0; 983 mpw->wqe->eseg.rsvd2 = 0; 984 inl = (struct mlx5_wqe_inl_small *) 985 (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); 986 mpw->data.raw = (uint8_t *)&inl->raw; 987 } 988 989 /** 990 * Close a MPW inline session. 991 * 992 * @param txq 993 * Pointer to TX queue structure. 994 * @param mpw 995 * Pointer to MPW session structure. 996 */ 997 static inline void 998 mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 999 { 1000 unsigned int size; 1001 struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *) 1002 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 1003 1004 size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len; 1005 /* 1006 * Store size in multiple of 16 bytes. Control and Ethernet segments 1007 * count as 2. 1008 */ 1009 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 1010 MLX5_WQE_DS(size)); 1011 mpw->state = MLX5_MPW_STATE_CLOSED; 1012 inl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG); 1013 txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1014 } 1015 1016 /** 1017 * DPDK callback for TX with MPW inline support. 1018 * 1019 * @param dpdk_txq 1020 * Generic pointer to TX queue structure. 1021 * @param[in] pkts 1022 * Packets to transmit. 1023 * @param pkts_n 1024 * Number of packets in array. 1025 * 1026 * @return 1027 * Number of packets successfully transmitted (<= pkts_n). 1028 */ 1029 uint16_t 1030 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, 1031 uint16_t pkts_n) 1032 { 1033 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1034 uint16_t elts_head = txq->elts_head; 1035 const uint16_t elts_n = 1 << txq->elts_n; 1036 const uint16_t elts_m = elts_n - 1; 1037 unsigned int i = 0; 1038 unsigned int j = 0; 1039 uint16_t max_elts; 1040 uint16_t max_wqe; 1041 unsigned int comp; 1042 unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; 1043 struct mlx5_mpw mpw = { 1044 .state = MLX5_MPW_STATE_CLOSED, 1045 }; 1046 /* 1047 * Compute the maximum number of WQE which can be consumed by inline 1048 * code. 1049 * - 2 DSEG for: 1050 * - 1 control segment, 1051 * - 1 Ethernet segment, 1052 * - N Dseg from the inline request. 1053 */ 1054 const unsigned int wqe_inl_n = 1055 ((2 * MLX5_WQE_DWORD_SIZE + 1056 txq->max_inline * RTE_CACHE_LINE_SIZE) + 1057 RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; 1058 1059 if (unlikely(!pkts_n)) 1060 return 0; 1061 /* Prefetch first packet cacheline. */ 1062 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 1063 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 1064 /* Start processing. */ 1065 mlx5_tx_complete(txq); 1066 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1067 /* A CQE slot must always be available. */ 1068 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 1069 do { 1070 struct rte_mbuf *buf = *(pkts++); 1071 uintptr_t addr; 1072 uint32_t length; 1073 unsigned int segs_n = buf->nb_segs; 1074 uint8_t cs_flags; 1075 1076 /* 1077 * Make sure there is enough room to store this packet and 1078 * that one ring entry remains unused. 1079 */ 1080 assert(segs_n); 1081 if (max_elts < segs_n) 1082 break; 1083 /* Do not bother with large packets MPW cannot handle. */ 1084 if (segs_n > MLX5_MPW_DSEG_MAX) { 1085 txq->stats.oerrors++; 1086 break; 1087 } 1088 max_elts -= segs_n; 1089 --pkts_n; 1090 /* 1091 * Compute max_wqe in case less WQE were consumed in previous 1092 * iteration. 1093 */ 1094 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1095 cs_flags = txq_ol_cksum_to_cs(txq, buf); 1096 /* Retrieve packet information. */ 1097 length = PKT_LEN(buf); 1098 /* Start new session if packet differs. */ 1099 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1100 if ((mpw.len != length) || 1101 (segs_n != 1) || 1102 (mpw.wqe->eseg.cs_flags != cs_flags)) 1103 mlx5_mpw_close(txq, &mpw); 1104 } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { 1105 if ((mpw.len != length) || 1106 (segs_n != 1) || 1107 (length > inline_room) || 1108 (mpw.wqe->eseg.cs_flags != cs_flags)) { 1109 mlx5_mpw_inline_close(txq, &mpw); 1110 inline_room = 1111 txq->max_inline * RTE_CACHE_LINE_SIZE; 1112 } 1113 } 1114 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 1115 if ((segs_n != 1) || 1116 (length > inline_room)) { 1117 /* 1118 * Multi-Packet WQE consumes at most two WQE. 1119 * mlx5_mpw_new() expects to be able to use 1120 * such resources. 1121 */ 1122 if (unlikely(max_wqe < 2)) 1123 break; 1124 max_wqe -= 2; 1125 mlx5_mpw_new(txq, &mpw, length); 1126 mpw.wqe->eseg.cs_flags = cs_flags; 1127 } else { 1128 if (unlikely(max_wqe < wqe_inl_n)) 1129 break; 1130 max_wqe -= wqe_inl_n; 1131 mlx5_mpw_inline_new(txq, &mpw, length); 1132 mpw.wqe->eseg.cs_flags = cs_flags; 1133 } 1134 } 1135 /* Multi-segment packets must be alone in their MPW. */ 1136 assert((segs_n == 1) || (mpw.pkts_n == 0)); 1137 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1138 assert(inline_room == 1139 txq->max_inline * RTE_CACHE_LINE_SIZE); 1140 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1141 length = 0; 1142 #endif 1143 do { 1144 volatile struct mlx5_wqe_data_seg *dseg; 1145 1146 assert(buf); 1147 (*txq->elts)[elts_head++ & elts_m] = buf; 1148 dseg = mpw.data.dseg[mpw.pkts_n]; 1149 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1150 *dseg = (struct mlx5_wqe_data_seg){ 1151 .byte_count = 1152 rte_cpu_to_be_32(DATA_LEN(buf)), 1153 .lkey = mlx5_tx_mb2mr(txq, buf), 1154 .addr = rte_cpu_to_be_64(addr), 1155 }; 1156 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1157 length += DATA_LEN(buf); 1158 #endif 1159 buf = buf->next; 1160 ++mpw.pkts_n; 1161 ++j; 1162 } while (--segs_n); 1163 assert(length == mpw.len); 1164 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 1165 mlx5_mpw_close(txq, &mpw); 1166 } else { 1167 unsigned int max; 1168 1169 assert(mpw.state == MLX5_MPW_INL_STATE_OPENED); 1170 assert(length <= inline_room); 1171 assert(length == DATA_LEN(buf)); 1172 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1173 (*txq->elts)[elts_head++ & elts_m] = buf; 1174 /* Maximum number of bytes before wrapping. */ 1175 max = ((((uintptr_t)(txq->wqes)) + 1176 (1 << txq->wqe_n) * 1177 MLX5_WQE_SIZE) - 1178 (uintptr_t)mpw.data.raw); 1179 if (length > max) { 1180 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1181 (void *)addr, 1182 max); 1183 mpw.data.raw = (volatile void *)txq->wqes; 1184 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1185 (void *)(addr + max), 1186 length - max); 1187 mpw.data.raw += length - max; 1188 } else { 1189 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1190 (void *)addr, 1191 length); 1192 1193 if (length == max) 1194 mpw.data.raw = 1195 (volatile void *)txq->wqes; 1196 else 1197 mpw.data.raw += length; 1198 } 1199 ++mpw.pkts_n; 1200 mpw.total_len += length; 1201 ++j; 1202 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { 1203 mlx5_mpw_inline_close(txq, &mpw); 1204 inline_room = 1205 txq->max_inline * RTE_CACHE_LINE_SIZE; 1206 } else { 1207 inline_room -= length; 1208 } 1209 } 1210 #ifdef MLX5_PMD_SOFT_COUNTERS 1211 /* Increment sent bytes counter. */ 1212 txq->stats.obytes += length; 1213 #endif 1214 ++i; 1215 } while (pkts_n); 1216 /* Take a shortcut if nothing must be sent. */ 1217 if (unlikely(i == 0)) 1218 return 0; 1219 /* Check whether completion threshold has been reached. */ 1220 /* "j" includes both packets and segments. */ 1221 comp = txq->elts_comp + j; 1222 if (comp >= MLX5_TX_COMP_THRESH) { 1223 volatile struct mlx5_wqe *wqe = mpw.wqe; 1224 1225 /* Request completion on last WQE. */ 1226 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1227 /* Save elts_head in unused "immediate" field of WQE. */ 1228 wqe->ctrl[3] = elts_head; 1229 txq->elts_comp = 0; 1230 #ifndef NDEBUG 1231 ++txq->cq_pi; 1232 #endif 1233 } else { 1234 txq->elts_comp = comp; 1235 } 1236 #ifdef MLX5_PMD_SOFT_COUNTERS 1237 /* Increment sent packets counter. */ 1238 txq->stats.opackets += i; 1239 #endif 1240 /* Ring QP doorbell. */ 1241 if (mpw.state == MLX5_MPW_INL_STATE_OPENED) 1242 mlx5_mpw_inline_close(txq, &mpw); 1243 else if (mpw.state == MLX5_MPW_STATE_OPENED) 1244 mlx5_mpw_close(txq, &mpw); 1245 mlx5_tx_dbrec(txq, mpw.wqe); 1246 txq->elts_head = elts_head; 1247 return i; 1248 } 1249 1250 /** 1251 * Open an Enhanced MPW session. 1252 * 1253 * @param txq 1254 * Pointer to TX queue structure. 1255 * @param mpw 1256 * Pointer to MPW session structure. 1257 * @param length 1258 * Packet length. 1259 */ 1260 static inline void 1261 mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding) 1262 { 1263 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1264 1265 mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED; 1266 mpw->pkts_n = 0; 1267 mpw->total_len = sizeof(struct mlx5_wqe); 1268 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1269 mpw->wqe->ctrl[0] = 1270 rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) | 1271 (txq->wqe_ci << 8) | 1272 MLX5_OPCODE_ENHANCED_MPSW); 1273 mpw->wqe->ctrl[2] = 0; 1274 mpw->wqe->ctrl[3] = 0; 1275 memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE); 1276 if (unlikely(padding)) { 1277 uintptr_t addr = (uintptr_t)(mpw->wqe + 1); 1278 1279 /* Pad the first 2 DWORDs with zero-length inline header. */ 1280 *(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG); 1281 *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) = 1282 rte_cpu_to_be_32(MLX5_INLINE_SEG); 1283 mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE; 1284 /* Start from the next WQEBB. */ 1285 mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1)); 1286 } else { 1287 mpw->data.raw = (volatile void *)(mpw->wqe + 1); 1288 } 1289 } 1290 1291 /** 1292 * Close an Enhanced MPW session. 1293 * 1294 * @param txq 1295 * Pointer to TX queue structure. 1296 * @param mpw 1297 * Pointer to MPW session structure. 1298 * 1299 * @return 1300 * Number of consumed WQEs. 1301 */ 1302 static inline uint16_t 1303 mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 1304 { 1305 uint16_t ret; 1306 1307 /* Store size in multiple of 16 bytes. Control and Ethernet segments 1308 * count as 2. 1309 */ 1310 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 1311 MLX5_WQE_DS(mpw->total_len)); 1312 mpw->state = MLX5_MPW_STATE_CLOSED; 1313 ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1314 txq->wqe_ci += ret; 1315 return ret; 1316 } 1317 1318 /** 1319 * TX with Enhanced MPW support. 1320 * 1321 * @param txq 1322 * Pointer to TX queue structure. 1323 * @param[in] pkts 1324 * Packets to transmit. 1325 * @param pkts_n 1326 * Number of packets in array. 1327 * 1328 * @return 1329 * Number of packets successfully transmitted (<= pkts_n). 1330 */ 1331 static inline uint16_t 1332 txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 1333 uint16_t pkts_n) 1334 { 1335 uint16_t elts_head = txq->elts_head; 1336 const uint16_t elts_n = 1 << txq->elts_n; 1337 const uint16_t elts_m = elts_n - 1; 1338 unsigned int i = 0; 1339 unsigned int j = 0; 1340 uint16_t max_elts; 1341 uint16_t max_wqe; 1342 unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE; 1343 unsigned int mpw_room = 0; 1344 unsigned int inl_pad = 0; 1345 uint32_t inl_hdr; 1346 struct mlx5_mpw mpw = { 1347 .state = MLX5_MPW_STATE_CLOSED, 1348 }; 1349 1350 if (unlikely(!pkts_n)) 1351 return 0; 1352 /* Start processing. */ 1353 mlx5_tx_complete(txq); 1354 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1355 /* A CQE slot must always be available. */ 1356 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 1357 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1358 if (unlikely(!max_wqe)) 1359 return 0; 1360 do { 1361 struct rte_mbuf *buf = *(pkts++); 1362 uintptr_t addr; 1363 unsigned int do_inline = 0; /* Whether inline is possible. */ 1364 uint32_t length; 1365 uint8_t cs_flags; 1366 1367 /* Multi-segmented packet is handled in slow-path outside. */ 1368 assert(NB_SEGS(buf) == 1); 1369 /* Make sure there is enough room to store this packet. */ 1370 if (max_elts - j == 0) 1371 break; 1372 cs_flags = txq_ol_cksum_to_cs(txq, buf); 1373 /* Retrieve packet information. */ 1374 length = PKT_LEN(buf); 1375 /* Start new session if: 1376 * - multi-segment packet 1377 * - no space left even for a dseg 1378 * - next packet can be inlined with a new WQE 1379 * - cs_flag differs 1380 */ 1381 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) { 1382 if ((inl_pad + sizeof(struct mlx5_wqe_data_seg) > 1383 mpw_room) || 1384 (length <= txq->inline_max_packet_sz && 1385 inl_pad + sizeof(inl_hdr) + length > 1386 mpw_room) || 1387 (mpw.wqe->eseg.cs_flags != cs_flags)) 1388 max_wqe -= mlx5_empw_close(txq, &mpw); 1389 } 1390 if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) { 1391 /* In Enhanced MPW, inline as much as the budget is 1392 * allowed. The remaining space is to be filled with 1393 * dsegs. If the title WQEBB isn't padded, it will have 1394 * 2 dsegs there. 1395 */ 1396 mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX, 1397 (max_inline ? max_inline : 1398 pkts_n * MLX5_WQE_DWORD_SIZE) + 1399 MLX5_WQE_SIZE); 1400 if (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room)) 1401 break; 1402 /* Don't pad the title WQEBB to not waste WQ. */ 1403 mlx5_empw_new(txq, &mpw, 0); 1404 mpw_room -= mpw.total_len; 1405 inl_pad = 0; 1406 do_inline = length <= txq->inline_max_packet_sz && 1407 sizeof(inl_hdr) + length <= mpw_room && 1408 !txq->mpw_hdr_dseg; 1409 mpw.wqe->eseg.cs_flags = cs_flags; 1410 } else { 1411 /* Evaluate whether the next packet can be inlined. 1412 * Inlininig is possible when: 1413 * - length is less than configured value 1414 * - length fits for remaining space 1415 * - not required to fill the title WQEBB with dsegs 1416 */ 1417 do_inline = 1418 length <= txq->inline_max_packet_sz && 1419 inl_pad + sizeof(inl_hdr) + length <= 1420 mpw_room && 1421 (!txq->mpw_hdr_dseg || 1422 mpw.total_len >= MLX5_WQE_SIZE); 1423 } 1424 if (max_inline && do_inline) { 1425 /* Inline packet into WQE. */ 1426 unsigned int max; 1427 1428 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1429 assert(length == DATA_LEN(buf)); 1430 inl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG); 1431 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1432 mpw.data.raw = (volatile void *) 1433 ((uintptr_t)mpw.data.raw + inl_pad); 1434 max = tx_mlx5_wq_tailroom(txq, 1435 (void *)(uintptr_t)mpw.data.raw); 1436 /* Copy inline header. */ 1437 mpw.data.raw = (volatile void *) 1438 mlx5_copy_to_wq( 1439 (void *)(uintptr_t)mpw.data.raw, 1440 &inl_hdr, 1441 sizeof(inl_hdr), 1442 (void *)(uintptr_t)txq->wqes, 1443 max); 1444 max = tx_mlx5_wq_tailroom(txq, 1445 (void *)(uintptr_t)mpw.data.raw); 1446 /* Copy packet data. */ 1447 mpw.data.raw = (volatile void *) 1448 mlx5_copy_to_wq( 1449 (void *)(uintptr_t)mpw.data.raw, 1450 (void *)addr, 1451 length, 1452 (void *)(uintptr_t)txq->wqes, 1453 max); 1454 ++mpw.pkts_n; 1455 mpw.total_len += (inl_pad + sizeof(inl_hdr) + length); 1456 /* No need to get completion as the entire packet is 1457 * copied to WQ. Free the buf right away. 1458 */ 1459 rte_pktmbuf_free_seg(buf); 1460 mpw_room -= (inl_pad + sizeof(inl_hdr) + length); 1461 /* Add pad in the next packet if any. */ 1462 inl_pad = (((uintptr_t)mpw.data.raw + 1463 (MLX5_WQE_DWORD_SIZE - 1)) & 1464 ~(MLX5_WQE_DWORD_SIZE - 1)) - 1465 (uintptr_t)mpw.data.raw; 1466 } else { 1467 /* No inline. Load a dseg of packet pointer. */ 1468 volatile rte_v128u32_t *dseg; 1469 1470 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1471 assert((inl_pad + sizeof(*dseg)) <= mpw_room); 1472 assert(length == DATA_LEN(buf)); 1473 if (!tx_mlx5_wq_tailroom(txq, 1474 (void *)((uintptr_t)mpw.data.raw 1475 + inl_pad))) 1476 dseg = (volatile void *)txq->wqes; 1477 else 1478 dseg = (volatile void *) 1479 ((uintptr_t)mpw.data.raw + 1480 inl_pad); 1481 (*txq->elts)[elts_head++ & elts_m] = buf; 1482 addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, 1483 uintptr_t)); 1484 *dseg = (rte_v128u32_t) { 1485 rte_cpu_to_be_32(length), 1486 mlx5_tx_mb2mr(txq, buf), 1487 addr, 1488 addr >> 32, 1489 }; 1490 mpw.data.raw = (volatile void *)(dseg + 1); 1491 mpw.total_len += (inl_pad + sizeof(*dseg)); 1492 ++j; 1493 ++mpw.pkts_n; 1494 mpw_room -= (inl_pad + sizeof(*dseg)); 1495 inl_pad = 0; 1496 } 1497 #ifdef MLX5_PMD_SOFT_COUNTERS 1498 /* Increment sent bytes counter. */ 1499 txq->stats.obytes += length; 1500 #endif 1501 ++i; 1502 } while (i < pkts_n); 1503 /* Take a shortcut if nothing must be sent. */ 1504 if (unlikely(i == 0)) 1505 return 0; 1506 /* Check whether completion threshold has been reached. */ 1507 if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH || 1508 (uint16_t)(txq->wqe_ci - txq->mpw_comp) >= 1509 (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) { 1510 volatile struct mlx5_wqe *wqe = mpw.wqe; 1511 1512 /* Request completion on last WQE. */ 1513 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1514 /* Save elts_head in unused "immediate" field of WQE. */ 1515 wqe->ctrl[3] = elts_head; 1516 txq->elts_comp = 0; 1517 txq->mpw_comp = txq->wqe_ci; 1518 #ifndef NDEBUG 1519 ++txq->cq_pi; 1520 #endif 1521 } else { 1522 txq->elts_comp += j; 1523 } 1524 #ifdef MLX5_PMD_SOFT_COUNTERS 1525 /* Increment sent packets counter. */ 1526 txq->stats.opackets += i; 1527 #endif 1528 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) 1529 mlx5_empw_close(txq, &mpw); 1530 /* Ring QP doorbell. */ 1531 mlx5_tx_dbrec(txq, mpw.wqe); 1532 txq->elts_head = elts_head; 1533 return i; 1534 } 1535 1536 /** 1537 * DPDK callback for TX with Enhanced MPW support. 1538 * 1539 * @param dpdk_txq 1540 * Generic pointer to TX queue structure. 1541 * @param[in] pkts 1542 * Packets to transmit. 1543 * @param pkts_n 1544 * Number of packets in array. 1545 * 1546 * @return 1547 * Number of packets successfully transmitted (<= pkts_n). 1548 */ 1549 uint16_t 1550 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1551 { 1552 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1553 uint16_t nb_tx = 0; 1554 1555 while (pkts_n > nb_tx) { 1556 uint16_t n; 1557 uint16_t ret; 1558 1559 n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx); 1560 if (n) { 1561 ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n); 1562 if (!ret) 1563 break; 1564 nb_tx += ret; 1565 } 1566 n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx); 1567 if (n) { 1568 ret = txq_burst_empw(txq, &pkts[nb_tx], n); 1569 if (!ret) 1570 break; 1571 nb_tx += ret; 1572 } 1573 } 1574 return nb_tx; 1575 } 1576 1577 /** 1578 * Translate RX completion flags to packet type. 1579 * 1580 * @param[in] cqe 1581 * Pointer to CQE. 1582 * 1583 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 1584 * 1585 * @return 1586 * Packet type for struct rte_mbuf. 1587 */ 1588 static inline uint32_t 1589 rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe) 1590 { 1591 uint8_t idx; 1592 uint8_t pinfo = cqe->pkt_info; 1593 uint16_t ptype = cqe->hdr_type_etc; 1594 1595 /* 1596 * The index to the array should have: 1597 * bit[1:0] = l3_hdr_type 1598 * bit[4:2] = l4_hdr_type 1599 * bit[5] = ip_frag 1600 * bit[6] = tunneled 1601 * bit[7] = outer_l3_type 1602 */ 1603 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 1604 return mlx5_ptype_table[idx]; 1605 } 1606 1607 /** 1608 * Get size of the next packet for a given CQE. For compressed CQEs, the 1609 * consumer index is updated only once all packets of the current one have 1610 * been processed. 1611 * 1612 * @param rxq 1613 * Pointer to RX queue. 1614 * @param cqe 1615 * CQE to process. 1616 * @param[out] rss_hash 1617 * Packet RSS Hash result. 1618 * 1619 * @return 1620 * Packet size in bytes (0 if there is none), -1 in case of completion 1621 * with error. 1622 */ 1623 static inline int 1624 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1625 uint16_t cqe_cnt, uint32_t *rss_hash) 1626 { 1627 struct rxq_zip *zip = &rxq->zip; 1628 uint16_t cqe_n = cqe_cnt + 1; 1629 int len = 0; 1630 uint16_t idx, end; 1631 1632 /* Process compressed data in the CQE and mini arrays. */ 1633 if (zip->ai) { 1634 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1635 (volatile struct mlx5_mini_cqe8 (*)[8]) 1636 (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info); 1637 1638 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1639 *rss_hash = rte_be_to_cpu_32((*mc)[zip->ai & 7].rx_hash_result); 1640 if ((++zip->ai & 7) == 0) { 1641 /* Invalidate consumed CQEs */ 1642 idx = zip->ca; 1643 end = zip->na; 1644 while (idx != end) { 1645 (*rxq->cqes)[idx & cqe_cnt].op_own = 1646 MLX5_CQE_INVALIDATE; 1647 ++idx; 1648 } 1649 /* 1650 * Increment consumer index to skip the number of 1651 * CQEs consumed. Hardware leaves holes in the CQ 1652 * ring for software use. 1653 */ 1654 zip->ca = zip->na; 1655 zip->na += 8; 1656 } 1657 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1658 /* Invalidate the rest */ 1659 idx = zip->ca; 1660 end = zip->cq_ci; 1661 1662 while (idx != end) { 1663 (*rxq->cqes)[idx & cqe_cnt].op_own = 1664 MLX5_CQE_INVALIDATE; 1665 ++idx; 1666 } 1667 rxq->cq_ci = zip->cq_ci; 1668 zip->ai = 0; 1669 } 1670 /* No compressed data, get next CQE and verify if it is compressed. */ 1671 } else { 1672 int ret; 1673 int8_t op_own; 1674 1675 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1676 if (unlikely(ret == 1)) 1677 return 0; 1678 ++rxq->cq_ci; 1679 op_own = cqe->op_own; 1680 rte_cio_rmb(); 1681 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1682 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1683 (volatile struct mlx5_mini_cqe8 (*)[8]) 1684 (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci & 1685 cqe_cnt].pkt_info); 1686 1687 /* Fix endianness. */ 1688 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1689 /* 1690 * Current mini array position is the one returned by 1691 * check_cqe64(). 1692 * 1693 * If completion comprises several mini arrays, as a 1694 * special case the second one is located 7 CQEs after 1695 * the initial CQE instead of 8 for subsequent ones. 1696 */ 1697 zip->ca = rxq->cq_ci; 1698 zip->na = zip->ca + 7; 1699 /* Compute the next non compressed CQE. */ 1700 --rxq->cq_ci; 1701 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1702 /* Get packet size to return. */ 1703 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1704 *rss_hash = rte_be_to_cpu_32((*mc)[0].rx_hash_result); 1705 zip->ai = 1; 1706 /* Prefetch all the entries to be invalidated */ 1707 idx = zip->ca; 1708 end = zip->cq_ci; 1709 while (idx != end) { 1710 rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]); 1711 ++idx; 1712 } 1713 } else { 1714 len = rte_be_to_cpu_32(cqe->byte_cnt); 1715 *rss_hash = rte_be_to_cpu_32(cqe->rx_hash_res); 1716 } 1717 /* Error while receiving packet. */ 1718 if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR)) 1719 return -1; 1720 } 1721 return len; 1722 } 1723 1724 /** 1725 * Translate RX completion flags to offload flags. 1726 * 1727 * @param[in] rxq 1728 * Pointer to RX queue structure. 1729 * @param[in] cqe 1730 * Pointer to CQE. 1731 * 1732 * @return 1733 * Offload flags (ol_flags) for struct rte_mbuf. 1734 */ 1735 static inline uint32_t 1736 rxq_cq_to_ol_flags(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 1737 { 1738 uint32_t ol_flags = 0; 1739 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1740 1741 ol_flags = 1742 TRANSPOSE(flags, 1743 MLX5_CQE_RX_L3_HDR_VALID, 1744 PKT_RX_IP_CKSUM_GOOD) | 1745 TRANSPOSE(flags, 1746 MLX5_CQE_RX_L4_HDR_VALID, 1747 PKT_RX_L4_CKSUM_GOOD); 1748 if ((cqe->pkt_info & MLX5_CQE_RX_TUNNEL_PACKET) && (rxq->csum_l2tun)) 1749 ol_flags |= 1750 TRANSPOSE(flags, 1751 MLX5_CQE_RX_L3_HDR_VALID, 1752 PKT_RX_IP_CKSUM_GOOD) | 1753 TRANSPOSE(flags, 1754 MLX5_CQE_RX_L4_HDR_VALID, 1755 PKT_RX_L4_CKSUM_GOOD); 1756 return ol_flags; 1757 } 1758 1759 /** 1760 * DPDK callback for RX. 1761 * 1762 * @param dpdk_rxq 1763 * Generic pointer to RX queue structure. 1764 * @param[out] pkts 1765 * Array to store received packets. 1766 * @param pkts_n 1767 * Maximum number of packets in array. 1768 * 1769 * @return 1770 * Number of packets successfully received (<= pkts_n). 1771 */ 1772 uint16_t 1773 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1774 { 1775 struct mlx5_rxq_data *rxq = dpdk_rxq; 1776 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1777 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1778 const unsigned int sges_n = rxq->sges_n; 1779 struct rte_mbuf *pkt = NULL; 1780 struct rte_mbuf *seg = NULL; 1781 volatile struct mlx5_cqe *cqe = 1782 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1783 unsigned int i = 0; 1784 unsigned int rq_ci = rxq->rq_ci << sges_n; 1785 int len = 0; /* keep its value across iterations. */ 1786 1787 while (pkts_n) { 1788 unsigned int idx = rq_ci & wqe_cnt; 1789 volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx]; 1790 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1791 uint32_t rss_hash_res = 0; 1792 1793 if (pkt) 1794 NEXT(seg) = rep; 1795 seg = rep; 1796 rte_prefetch0(seg); 1797 rte_prefetch0(cqe); 1798 rte_prefetch0(wqe); 1799 rep = rte_mbuf_raw_alloc(rxq->mp); 1800 if (unlikely(rep == NULL)) { 1801 ++rxq->stats.rx_nombuf; 1802 if (!pkt) { 1803 /* 1804 * no buffers before we even started, 1805 * bail out silently. 1806 */ 1807 break; 1808 } 1809 while (pkt != seg) { 1810 assert(pkt != (*rxq->elts)[idx]); 1811 rep = NEXT(pkt); 1812 NEXT(pkt) = NULL; 1813 NB_SEGS(pkt) = 1; 1814 rte_mbuf_raw_free(pkt); 1815 pkt = rep; 1816 } 1817 break; 1818 } 1819 if (!pkt) { 1820 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1821 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, 1822 &rss_hash_res); 1823 if (!len) { 1824 rte_mbuf_raw_free(rep); 1825 break; 1826 } 1827 if (unlikely(len == -1)) { 1828 /* RX error, packet is likely too large. */ 1829 rte_mbuf_raw_free(rep); 1830 ++rxq->stats.idropped; 1831 goto skip; 1832 } 1833 pkt = seg; 1834 assert(len >= (rxq->crc_present << 2)); 1835 /* Update packet information. */ 1836 pkt->packet_type = rxq_cq_to_pkt_type(cqe); 1837 pkt->ol_flags = 0; 1838 if (rss_hash_res && rxq->rss_hash) { 1839 pkt->hash.rss = rss_hash_res; 1840 pkt->ol_flags = PKT_RX_RSS_HASH; 1841 } 1842 if (rxq->mark && 1843 MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1844 pkt->ol_flags |= PKT_RX_FDIR; 1845 if (cqe->sop_drop_qpn != 1846 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1847 uint32_t mark = cqe->sop_drop_qpn; 1848 1849 pkt->ol_flags |= PKT_RX_FDIR_ID; 1850 pkt->hash.fdir.hi = 1851 mlx5_flow_mark_get(mark); 1852 } 1853 } 1854 if (rxq->csum | rxq->csum_l2tun) 1855 pkt->ol_flags |= rxq_cq_to_ol_flags(rxq, cqe); 1856 if (rxq->vlan_strip && 1857 (cqe->hdr_type_etc & 1858 rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1859 pkt->ol_flags |= PKT_RX_VLAN | 1860 PKT_RX_VLAN_STRIPPED; 1861 pkt->vlan_tci = 1862 rte_be_to_cpu_16(cqe->vlan_info); 1863 } 1864 if (rxq->hw_timestamp) { 1865 pkt->timestamp = 1866 rte_be_to_cpu_64(cqe->timestamp); 1867 pkt->ol_flags |= PKT_RX_TIMESTAMP; 1868 } 1869 if (rxq->crc_present) 1870 len -= ETHER_CRC_LEN; 1871 PKT_LEN(pkt) = len; 1872 } 1873 DATA_LEN(rep) = DATA_LEN(seg); 1874 PKT_LEN(rep) = PKT_LEN(seg); 1875 SET_DATA_OFF(rep, DATA_OFF(seg)); 1876 PORT(rep) = PORT(seg); 1877 (*rxq->elts)[idx] = rep; 1878 /* 1879 * Fill NIC descriptor with the new buffer. The lkey and size 1880 * of the buffers are already known, only the buffer address 1881 * changes. 1882 */ 1883 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1884 if (len > DATA_LEN(seg)) { 1885 len -= DATA_LEN(seg); 1886 ++NB_SEGS(pkt); 1887 ++rq_ci; 1888 continue; 1889 } 1890 DATA_LEN(seg) = len; 1891 #ifdef MLX5_PMD_SOFT_COUNTERS 1892 /* Increment bytes counter. */ 1893 rxq->stats.ibytes += PKT_LEN(pkt); 1894 #endif 1895 /* Return packet. */ 1896 *(pkts++) = pkt; 1897 pkt = NULL; 1898 --pkts_n; 1899 ++i; 1900 skip: 1901 /* Align consumer index to the next stride. */ 1902 rq_ci >>= sges_n; 1903 ++rq_ci; 1904 rq_ci <<= sges_n; 1905 } 1906 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1907 return 0; 1908 /* Update the consumer index. */ 1909 rxq->rq_ci = rq_ci >> sges_n; 1910 rte_cio_wmb(); 1911 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1912 rte_cio_wmb(); 1913 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1914 #ifdef MLX5_PMD_SOFT_COUNTERS 1915 /* Increment packets counter. */ 1916 rxq->stats.ipackets += i; 1917 #endif 1918 return i; 1919 } 1920 1921 /** 1922 * Dummy DPDK callback for TX. 1923 * 1924 * This function is used to temporarily replace the real callback during 1925 * unsafe control operations on the queue, or in case of error. 1926 * 1927 * @param dpdk_txq 1928 * Generic pointer to TX queue structure. 1929 * @param[in] pkts 1930 * Packets to transmit. 1931 * @param pkts_n 1932 * Number of packets in array. 1933 * 1934 * @return 1935 * Number of packets successfully transmitted (<= pkts_n). 1936 */ 1937 uint16_t 1938 removed_tx_burst(void *dpdk_txq __rte_unused, 1939 struct rte_mbuf **pkts __rte_unused, 1940 uint16_t pkts_n __rte_unused) 1941 { 1942 return 0; 1943 } 1944 1945 /** 1946 * Dummy DPDK callback for RX. 1947 * 1948 * This function is used to temporarily replace the real callback during 1949 * unsafe control operations on the queue, or in case of error. 1950 * 1951 * @param dpdk_rxq 1952 * Generic pointer to RX queue structure. 1953 * @param[out] pkts 1954 * Array to store received packets. 1955 * @param pkts_n 1956 * Maximum number of packets in array. 1957 * 1958 * @return 1959 * Number of packets successfully received (<= pkts_n). 1960 */ 1961 uint16_t 1962 removed_rx_burst(void *dpdk_txq __rte_unused, 1963 struct rte_mbuf **pkts __rte_unused, 1964 uint16_t pkts_n __rte_unused) 1965 { 1966 return 0; 1967 } 1968 1969 /* 1970 * Vectorized Rx/Tx routines are not compiled in when required vector 1971 * instructions are not supported on a target architecture. The following null 1972 * stubs are needed for linkage when those are not included outside of this file 1973 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1974 */ 1975 1976 uint16_t __attribute__((weak)) 1977 mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused, 1978 struct rte_mbuf **pkts __rte_unused, 1979 uint16_t pkts_n __rte_unused) 1980 { 1981 return 0; 1982 } 1983 1984 uint16_t __attribute__((weak)) 1985 mlx5_tx_burst_vec(void *dpdk_txq __rte_unused, 1986 struct rte_mbuf **pkts __rte_unused, 1987 uint16_t pkts_n __rte_unused) 1988 { 1989 return 0; 1990 } 1991 1992 uint16_t __attribute__((weak)) 1993 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1994 struct rte_mbuf **pkts __rte_unused, 1995 uint16_t pkts_n __rte_unused) 1996 { 1997 return 0; 1998 } 1999 2000 int __attribute__((weak)) 2001 mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 2002 { 2003 return -ENOTSUP; 2004 } 2005 2006 int __attribute__((weak)) 2007 mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 2008 { 2009 return -ENOTSUP; 2010 } 2011 2012 int __attribute__((weak)) 2013 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 2014 { 2015 return -ENOTSUP; 2016 } 2017 2018 int __attribute__((weak)) 2019 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 2020 { 2021 return -ENOTSUP; 2022 } 2023