1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <assert.h> 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdlib.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #include <infiniband/mlx5dv.h> 18 #ifdef PEDANTIC 19 #pragma GCC diagnostic error "-Wpedantic" 20 #endif 21 22 #include <rte_mbuf.h> 23 #include <rte_mempool.h> 24 #include <rte_prefetch.h> 25 #include <rte_common.h> 26 #include <rte_branch_prediction.h> 27 #include <rte_ether.h> 28 29 #include "mlx5.h" 30 #include "mlx5_utils.h" 31 #include "mlx5_rxtx.h" 32 #include "mlx5_autoconf.h" 33 #include "mlx5_defs.h" 34 #include "mlx5_prm.h" 35 36 static __rte_always_inline uint32_t 37 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 38 39 static __rte_always_inline int 40 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 41 uint16_t cqe_cnt, uint32_t *rss_hash); 42 43 static __rte_always_inline uint32_t 44 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 45 46 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 47 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 48 }; 49 50 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 51 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 52 53 /** 54 * Build a table to translate Rx completion flags to packet type. 55 * 56 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 57 */ 58 void 59 mlx5_set_ptype_table(void) 60 { 61 unsigned int i; 62 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 63 64 /* Last entry must not be overwritten, reserved for errored packet. */ 65 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 66 (*p)[i] = RTE_PTYPE_UNKNOWN; 67 /* 68 * The index to the array should have: 69 * bit[1:0] = l3_hdr_type 70 * bit[4:2] = l4_hdr_type 71 * bit[5] = ip_frag 72 * bit[6] = tunneled 73 * bit[7] = outer_l3_type 74 */ 75 /* L2 */ 76 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 77 /* L3 */ 78 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 79 RTE_PTYPE_L4_NONFRAG; 80 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 81 RTE_PTYPE_L4_NONFRAG; 82 /* Fragmented */ 83 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 84 RTE_PTYPE_L4_FRAG; 85 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 86 RTE_PTYPE_L4_FRAG; 87 /* TCP */ 88 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 89 RTE_PTYPE_L4_TCP; 90 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 91 RTE_PTYPE_L4_TCP; 92 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 93 RTE_PTYPE_L4_TCP; 94 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 95 RTE_PTYPE_L4_TCP; 96 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 97 RTE_PTYPE_L4_TCP; 98 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 99 RTE_PTYPE_L4_TCP; 100 /* UDP */ 101 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 102 RTE_PTYPE_L4_UDP; 103 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 104 RTE_PTYPE_L4_UDP; 105 /* Repeat with outer_l3_type being set. Just in case. */ 106 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 107 RTE_PTYPE_L4_NONFRAG; 108 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 109 RTE_PTYPE_L4_NONFRAG; 110 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 111 RTE_PTYPE_L4_FRAG; 112 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 113 RTE_PTYPE_L4_FRAG; 114 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 115 RTE_PTYPE_L4_TCP; 116 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 117 RTE_PTYPE_L4_TCP; 118 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 119 RTE_PTYPE_L4_TCP; 120 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 121 RTE_PTYPE_L4_TCP; 122 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 123 RTE_PTYPE_L4_TCP; 124 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 125 RTE_PTYPE_L4_TCP; 126 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 127 RTE_PTYPE_L4_UDP; 128 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 129 RTE_PTYPE_L4_UDP; 130 /* Tunneled - L3 */ 131 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 132 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 133 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 134 RTE_PTYPE_INNER_L4_NONFRAG; 135 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 136 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 137 RTE_PTYPE_INNER_L4_NONFRAG; 138 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 139 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 140 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 141 RTE_PTYPE_INNER_L4_NONFRAG; 142 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 143 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 144 RTE_PTYPE_INNER_L4_NONFRAG; 145 /* Tunneled - Fragmented */ 146 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 147 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 148 RTE_PTYPE_INNER_L4_FRAG; 149 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 150 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 151 RTE_PTYPE_INNER_L4_FRAG; 152 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 153 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 154 RTE_PTYPE_INNER_L4_FRAG; 155 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 156 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 157 RTE_PTYPE_INNER_L4_FRAG; 158 /* Tunneled - TCP */ 159 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 160 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 161 RTE_PTYPE_INNER_L4_TCP; 162 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 163 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 164 RTE_PTYPE_INNER_L4_TCP; 165 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 166 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 167 RTE_PTYPE_INNER_L4_TCP; 168 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 169 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 170 RTE_PTYPE_INNER_L4_TCP; 171 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 172 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 173 RTE_PTYPE_INNER_L4_TCP; 174 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 175 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 176 RTE_PTYPE_INNER_L4_TCP; 177 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 178 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 179 RTE_PTYPE_INNER_L4_TCP; 180 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 181 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 182 RTE_PTYPE_INNER_L4_TCP; 183 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 184 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 185 RTE_PTYPE_INNER_L4_TCP; 186 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 187 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 188 RTE_PTYPE_INNER_L4_TCP; 189 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 190 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 191 RTE_PTYPE_INNER_L4_TCP; 192 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 193 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 194 RTE_PTYPE_INNER_L4_TCP; 195 /* Tunneled - UDP */ 196 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 197 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 198 RTE_PTYPE_INNER_L4_UDP; 199 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 200 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 201 RTE_PTYPE_INNER_L4_UDP; 202 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 203 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 204 RTE_PTYPE_INNER_L4_UDP; 205 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 206 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 207 RTE_PTYPE_INNER_L4_UDP; 208 } 209 210 /** 211 * Build a table to translate packet to checksum type of Verbs. 212 */ 213 void 214 mlx5_set_cksum_table(void) 215 { 216 unsigned int i; 217 uint8_t v; 218 219 /* 220 * The index should have: 221 * bit[0] = PKT_TX_TCP_SEG 222 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 223 * bit[4] = PKT_TX_IP_CKSUM 224 * bit[8] = PKT_TX_OUTER_IP_CKSUM 225 * bit[9] = tunnel 226 */ 227 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 228 v = 0; 229 if (i & (1 << 9)) { 230 /* Tunneled packet. */ 231 if (i & (1 << 8)) /* Outer IP. */ 232 v |= MLX5_ETH_WQE_L3_CSUM; 233 if (i & (1 << 4)) /* Inner IP. */ 234 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 235 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 236 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 237 } else { 238 /* No tunnel. */ 239 if (i & (1 << 4)) /* IP. */ 240 v |= MLX5_ETH_WQE_L3_CSUM; 241 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 242 v |= MLX5_ETH_WQE_L4_CSUM; 243 } 244 mlx5_cksum_table[i] = v; 245 } 246 } 247 248 /** 249 * Build a table to translate packet type of mbuf to SWP type of Verbs. 250 */ 251 void 252 mlx5_set_swp_types_table(void) 253 { 254 unsigned int i; 255 uint8_t v; 256 257 /* 258 * The index should have: 259 * bit[0:1] = PKT_TX_L4_MASK 260 * bit[4] = PKT_TX_IPV6 261 * bit[8] = PKT_TX_OUTER_IPV6 262 * bit[9] = PKT_TX_OUTER_UDP 263 */ 264 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 265 v = 0; 266 if (i & (1 << 8)) 267 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 268 if (i & (1 << 9)) 269 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 270 if (i & (1 << 4)) 271 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 272 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 273 v |= MLX5_ETH_WQE_L4_INNER_UDP; 274 mlx5_swp_types_table[i] = v; 275 } 276 } 277 278 /** 279 * Return the size of tailroom of WQ. 280 * 281 * @param txq 282 * Pointer to TX queue structure. 283 * @param addr 284 * Pointer to tail of WQ. 285 * 286 * @return 287 * Size of tailroom. 288 */ 289 static inline size_t 290 tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr) 291 { 292 size_t tailroom; 293 tailroom = (uintptr_t)(txq->wqes) + 294 (1 << txq->wqe_n) * MLX5_WQE_SIZE - 295 (uintptr_t)addr; 296 return tailroom; 297 } 298 299 /** 300 * Copy data to tailroom of circular queue. 301 * 302 * @param dst 303 * Pointer to destination. 304 * @param src 305 * Pointer to source. 306 * @param n 307 * Number of bytes to copy. 308 * @param base 309 * Pointer to head of queue. 310 * @param tailroom 311 * Size of tailroom from dst. 312 * 313 * @return 314 * Pointer after copied data. 315 */ 316 static inline void * 317 mlx5_copy_to_wq(void *dst, const void *src, size_t n, 318 void *base, size_t tailroom) 319 { 320 void *ret; 321 322 if (n > tailroom) { 323 rte_memcpy(dst, src, tailroom); 324 rte_memcpy(base, (void *)((uintptr_t)src + tailroom), 325 n - tailroom); 326 ret = (uint8_t *)base + n - tailroom; 327 } else { 328 rte_memcpy(dst, src, n); 329 ret = (n == tailroom) ? base : (uint8_t *)dst + n; 330 } 331 return ret; 332 } 333 334 /** 335 * Inline TSO headers into WQE. 336 * 337 * @return 338 * 0 on success, negative errno value on failure. 339 */ 340 static int 341 inline_tso(struct mlx5_txq_data *txq, struct rte_mbuf *buf, 342 uint32_t *length, 343 uintptr_t *addr, 344 uint16_t *pkt_inline_sz, 345 uint8_t **raw, 346 uint16_t *max_wqe, 347 uint16_t *tso_segsz, 348 uint16_t *tso_header_sz) 349 { 350 uintptr_t end = (uintptr_t)(((uintptr_t)txq->wqes) + 351 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 352 unsigned int copy_b; 353 uint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0; 354 const uint8_t tunneled = txq->tunnel_en && (buf->ol_flags & 355 PKT_TX_TUNNEL_MASK); 356 uint16_t n_wqe; 357 358 *tso_segsz = buf->tso_segsz; 359 *tso_header_sz = buf->l2_len + vlan_sz + buf->l3_len + buf->l4_len; 360 if (unlikely(*tso_segsz == 0 || *tso_header_sz == 0)) { 361 txq->stats.oerrors++; 362 return -EINVAL; 363 } 364 if (tunneled) 365 *tso_header_sz += buf->outer_l2_len + buf->outer_l3_len; 366 /* First seg must contain all TSO headers. */ 367 if (unlikely(*tso_header_sz > MLX5_MAX_TSO_HEADER) || 368 *tso_header_sz > DATA_LEN(buf)) { 369 txq->stats.oerrors++; 370 return -EINVAL; 371 } 372 copy_b = *tso_header_sz - *pkt_inline_sz; 373 if (!copy_b || ((end - (uintptr_t)*raw) < copy_b)) 374 return -EAGAIN; 375 n_wqe = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 376 if (unlikely(*max_wqe < n_wqe)) 377 return -EINVAL; 378 *max_wqe -= n_wqe; 379 rte_memcpy((void *)*raw, (void *)*addr, copy_b); 380 *length -= copy_b; 381 *addr += copy_b; 382 copy_b = MLX5_WQE_DS(copy_b) * MLX5_WQE_DWORD_SIZE; 383 *pkt_inline_sz += copy_b; 384 *raw += copy_b; 385 return 0; 386 } 387 388 /** 389 * DPDK callback to check the status of a tx descriptor. 390 * 391 * @param tx_queue 392 * The tx queue. 393 * @param[in] offset 394 * The index of the descriptor in the ring. 395 * 396 * @return 397 * The status of the tx descriptor. 398 */ 399 int 400 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 401 { 402 struct mlx5_txq_data *txq = tx_queue; 403 uint16_t used; 404 405 mlx5_tx_complete(txq); 406 used = txq->elts_head - txq->elts_tail; 407 if (offset < used) 408 return RTE_ETH_TX_DESC_FULL; 409 return RTE_ETH_TX_DESC_DONE; 410 } 411 412 /** 413 * DPDK callback to check the status of a rx descriptor. 414 * 415 * @param rx_queue 416 * The rx queue. 417 * @param[in] offset 418 * The index of the descriptor in the ring. 419 * 420 * @return 421 * The status of the tx descriptor. 422 */ 423 int 424 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 425 { 426 struct mlx5_rxq_data *rxq = rx_queue; 427 struct rxq_zip *zip = &rxq->zip; 428 volatile struct mlx5_cqe *cqe; 429 const unsigned int cqe_n = (1 << rxq->cqe_n); 430 const unsigned int cqe_cnt = cqe_n - 1; 431 unsigned int cq_ci; 432 unsigned int used; 433 434 /* if we are processing a compressed cqe */ 435 if (zip->ai) { 436 used = zip->cqe_cnt - zip->ca; 437 cq_ci = zip->cq_ci; 438 } else { 439 used = 0; 440 cq_ci = rxq->cq_ci; 441 } 442 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 443 while (check_cqe(cqe, cqe_n, cq_ci) == 0) { 444 int8_t op_own; 445 unsigned int n; 446 447 op_own = cqe->op_own; 448 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 449 n = rte_be_to_cpu_32(cqe->byte_cnt); 450 else 451 n = 1; 452 cq_ci += n; 453 used += n; 454 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 455 } 456 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 457 if (offset < used) 458 return RTE_ETH_RX_DESC_DONE; 459 return RTE_ETH_RX_DESC_AVAIL; 460 } 461 462 /** 463 * DPDK callback for TX. 464 * 465 * @param dpdk_txq 466 * Generic pointer to TX queue structure. 467 * @param[in] pkts 468 * Packets to transmit. 469 * @param pkts_n 470 * Number of packets in array. 471 * 472 * @return 473 * Number of packets successfully transmitted (<= pkts_n). 474 */ 475 uint16_t 476 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 477 { 478 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 479 uint16_t elts_head = txq->elts_head; 480 const uint16_t elts_n = 1 << txq->elts_n; 481 const uint16_t elts_m = elts_n - 1; 482 unsigned int i = 0; 483 unsigned int j = 0; 484 unsigned int k = 0; 485 uint16_t max_elts; 486 uint16_t max_wqe; 487 unsigned int comp; 488 volatile struct mlx5_wqe_ctrl *last_wqe = NULL; 489 unsigned int segs_n = 0; 490 const unsigned int max_inline = txq->max_inline; 491 492 if (unlikely(!pkts_n)) 493 return 0; 494 /* Prefetch first packet cacheline. */ 495 rte_prefetch0(*pkts); 496 /* Start processing. */ 497 mlx5_tx_complete(txq); 498 max_elts = (elts_n - (elts_head - txq->elts_tail)); 499 /* A CQE slot must always be available. */ 500 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 501 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 502 if (unlikely(!max_wqe)) 503 return 0; 504 do { 505 struct rte_mbuf *buf = *pkts; /* First_seg. */ 506 uint8_t *raw; 507 volatile struct mlx5_wqe_v *wqe = NULL; 508 volatile rte_v128u32_t *dseg = NULL; 509 uint32_t length; 510 unsigned int ds = 0; 511 unsigned int sg = 0; /* counter of additional segs attached. */ 512 uintptr_t addr; 513 uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; 514 uint16_t tso_header_sz = 0; 515 uint16_t ehdr; 516 uint8_t cs_flags; 517 uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG); 518 uint8_t is_vlan = !!(buf->ol_flags & PKT_TX_VLAN_PKT); 519 uint32_t swp_offsets = 0; 520 uint8_t swp_types = 0; 521 uint16_t tso_segsz = 0; 522 #ifdef MLX5_PMD_SOFT_COUNTERS 523 uint32_t total_length = 0; 524 #endif 525 int ret; 526 527 segs_n = buf->nb_segs; 528 /* 529 * Make sure there is enough room to store this packet and 530 * that one ring entry remains unused. 531 */ 532 assert(segs_n); 533 if (max_elts < segs_n) 534 break; 535 max_elts -= segs_n; 536 sg = --segs_n; 537 if (unlikely(--max_wqe == 0)) 538 break; 539 wqe = (volatile struct mlx5_wqe_v *) 540 tx_mlx5_wqe(txq, txq->wqe_ci); 541 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 542 if (pkts_n - i > 1) 543 rte_prefetch0(*(pkts + 1)); 544 addr = rte_pktmbuf_mtod(buf, uintptr_t); 545 length = DATA_LEN(buf); 546 ehdr = (((uint8_t *)addr)[1] << 8) | 547 ((uint8_t *)addr)[0]; 548 #ifdef MLX5_PMD_SOFT_COUNTERS 549 total_length = length; 550 #endif 551 if (length < (MLX5_WQE_DWORD_SIZE + 2)) { 552 txq->stats.oerrors++; 553 break; 554 } 555 /* Update element. */ 556 (*txq->elts)[elts_head & elts_m] = buf; 557 /* Prefetch next buffer data. */ 558 if (pkts_n - i > 1) 559 rte_prefetch0( 560 rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); 561 cs_flags = txq_ol_cksum_to_cs(buf); 562 txq_mbuf_to_swp(txq, buf, tso, is_vlan, 563 (uint8_t *)&swp_offsets, &swp_types); 564 raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; 565 /* Replace the Ethernet type by the VLAN if necessary. */ 566 if (is_vlan) { 567 uint32_t vlan = rte_cpu_to_be_32(0x81000000 | 568 buf->vlan_tci); 569 unsigned int len = 2 * ETHER_ADDR_LEN - 2; 570 571 addr += 2; 572 length -= 2; 573 /* Copy Destination and source mac address. */ 574 memcpy((uint8_t *)raw, ((uint8_t *)addr), len); 575 /* Copy VLAN. */ 576 memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan)); 577 /* Copy missing two bytes to end the DSeg. */ 578 memcpy((uint8_t *)raw + len + sizeof(vlan), 579 ((uint8_t *)addr) + len, 2); 580 addr += len + 2; 581 length -= (len + 2); 582 } else { 583 memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, 584 MLX5_WQE_DWORD_SIZE); 585 length -= pkt_inline_sz; 586 addr += pkt_inline_sz; 587 } 588 raw += MLX5_WQE_DWORD_SIZE; 589 if (tso) { 590 ret = inline_tso(txq, buf, &length, 591 &addr, &pkt_inline_sz, 592 &raw, &max_wqe, 593 &tso_segsz, &tso_header_sz); 594 if (ret == -EINVAL) { 595 break; 596 } else if (ret == -EAGAIN) { 597 /* NOP WQE. */ 598 wqe->ctrl = (rte_v128u32_t){ 599 rte_cpu_to_be_32(txq->wqe_ci << 8), 600 rte_cpu_to_be_32(txq->qp_num_8s | 1), 601 0, 602 0, 603 }; 604 ds = 1; 605 #ifdef MLX5_PMD_SOFT_COUNTERS 606 total_length = 0; 607 #endif 608 k++; 609 goto next_wqe; 610 } 611 } 612 /* Inline if enough room. */ 613 if (max_inline || tso) { 614 uint32_t inl = 0; 615 uintptr_t end = (uintptr_t) 616 (((uintptr_t)txq->wqes) + 617 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 618 unsigned int inline_room = max_inline * 619 RTE_CACHE_LINE_SIZE - 620 (pkt_inline_sz - 2) - 621 !!tso * sizeof(inl); 622 uintptr_t addr_end; 623 unsigned int copy_b; 624 625 pkt_inline: 626 addr_end = RTE_ALIGN_FLOOR(addr + inline_room, 627 RTE_CACHE_LINE_SIZE); 628 copy_b = (addr_end > addr) ? 629 RTE_MIN((addr_end - addr), length) : 0; 630 if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { 631 /* 632 * One Dseg remains in the current WQE. To 633 * keep the computation positive, it is 634 * removed after the bytes to Dseg conversion. 635 */ 636 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 637 638 if (unlikely(max_wqe < n)) 639 break; 640 max_wqe -= n; 641 if (tso && !inl) { 642 inl = rte_cpu_to_be_32(copy_b | 643 MLX5_INLINE_SEG); 644 rte_memcpy((void *)raw, 645 (void *)&inl, sizeof(inl)); 646 raw += sizeof(inl); 647 pkt_inline_sz += sizeof(inl); 648 } 649 rte_memcpy((void *)raw, (void *)addr, copy_b); 650 addr += copy_b; 651 length -= copy_b; 652 pkt_inline_sz += copy_b; 653 } 654 /* 655 * 2 DWORDs consumed by the WQE header + ETH segment + 656 * the size of the inline part of the packet. 657 */ 658 ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); 659 if (length > 0) { 660 if (ds % (MLX5_WQE_SIZE / 661 MLX5_WQE_DWORD_SIZE) == 0) { 662 if (unlikely(--max_wqe == 0)) 663 break; 664 dseg = (volatile rte_v128u32_t *) 665 tx_mlx5_wqe(txq, txq->wqe_ci + 666 ds / 4); 667 } else { 668 dseg = (volatile rte_v128u32_t *) 669 ((uintptr_t)wqe + 670 (ds * MLX5_WQE_DWORD_SIZE)); 671 } 672 goto use_dseg; 673 } else if (!segs_n) { 674 goto next_pkt; 675 } else { 676 raw += copy_b; 677 inline_room -= copy_b; 678 --segs_n; 679 buf = buf->next; 680 assert(buf); 681 addr = rte_pktmbuf_mtod(buf, uintptr_t); 682 length = DATA_LEN(buf); 683 #ifdef MLX5_PMD_SOFT_COUNTERS 684 total_length += length; 685 #endif 686 (*txq->elts)[++elts_head & elts_m] = buf; 687 goto pkt_inline; 688 } 689 } else { 690 /* 691 * No inline has been done in the packet, only the 692 * Ethernet Header as been stored. 693 */ 694 dseg = (volatile rte_v128u32_t *) 695 ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); 696 ds = 3; 697 use_dseg: 698 /* Add the remaining packet as a simple ds. */ 699 addr = rte_cpu_to_be_64(addr); 700 *dseg = (rte_v128u32_t){ 701 rte_cpu_to_be_32(length), 702 mlx5_tx_mb2mr(txq, buf), 703 addr, 704 addr >> 32, 705 }; 706 ++ds; 707 if (!segs_n) 708 goto next_pkt; 709 } 710 next_seg: 711 assert(buf); 712 assert(ds); 713 assert(wqe); 714 /* 715 * Spill on next WQE when the current one does not have 716 * enough room left. Size of WQE must a be a multiple 717 * of data segment size. 718 */ 719 assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); 720 if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { 721 if (unlikely(--max_wqe == 0)) 722 break; 723 dseg = (volatile rte_v128u32_t *) 724 tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4); 725 rte_prefetch0(tx_mlx5_wqe(txq, 726 txq->wqe_ci + ds / 4 + 1)); 727 } else { 728 ++dseg; 729 } 730 ++ds; 731 buf = buf->next; 732 assert(buf); 733 length = DATA_LEN(buf); 734 #ifdef MLX5_PMD_SOFT_COUNTERS 735 total_length += length; 736 #endif 737 /* Store segment information. */ 738 addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); 739 *dseg = (rte_v128u32_t){ 740 rte_cpu_to_be_32(length), 741 mlx5_tx_mb2mr(txq, buf), 742 addr, 743 addr >> 32, 744 }; 745 (*txq->elts)[++elts_head & elts_m] = buf; 746 if (--segs_n) 747 goto next_seg; 748 next_pkt: 749 if (ds > MLX5_DSEG_MAX) { 750 txq->stats.oerrors++; 751 break; 752 } 753 ++elts_head; 754 ++pkts; 755 ++i; 756 j += sg; 757 /* Initialize known and common part of the WQE structure. */ 758 if (tso) { 759 wqe->ctrl = (rte_v128u32_t){ 760 rte_cpu_to_be_32((txq->wqe_ci << 8) | 761 MLX5_OPCODE_TSO), 762 rte_cpu_to_be_32(txq->qp_num_8s | ds), 763 0, 764 0, 765 }; 766 wqe->eseg = (rte_v128u32_t){ 767 swp_offsets, 768 cs_flags | (swp_types << 8) | 769 (rte_cpu_to_be_16(tso_segsz) << 16), 770 0, 771 (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz), 772 }; 773 } else { 774 wqe->ctrl = (rte_v128u32_t){ 775 rte_cpu_to_be_32((txq->wqe_ci << 8) | 776 MLX5_OPCODE_SEND), 777 rte_cpu_to_be_32(txq->qp_num_8s | ds), 778 0, 779 0, 780 }; 781 wqe->eseg = (rte_v128u32_t){ 782 swp_offsets, 783 cs_flags | (swp_types << 8), 784 0, 785 (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz), 786 }; 787 } 788 next_wqe: 789 txq->wqe_ci += (ds + 3) / 4; 790 /* Save the last successful WQE for completion request */ 791 last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe; 792 #ifdef MLX5_PMD_SOFT_COUNTERS 793 /* Increment sent bytes counter. */ 794 txq->stats.obytes += total_length; 795 #endif 796 } while (i < pkts_n); 797 /* Take a shortcut if nothing must be sent. */ 798 if (unlikely((i + k) == 0)) 799 return 0; 800 txq->elts_head += (i + j); 801 /* Check whether completion threshold has been reached. */ 802 comp = txq->elts_comp + i + j + k; 803 if (comp >= MLX5_TX_COMP_THRESH) { 804 /* Request completion on last WQE. */ 805 last_wqe->ctrl2 = rte_cpu_to_be_32(8); 806 /* Save elts_head in unused "immediate" field of WQE. */ 807 last_wqe->ctrl3 = txq->elts_head; 808 txq->elts_comp = 0; 809 #ifndef NDEBUG 810 ++txq->cq_pi; 811 #endif 812 } else { 813 txq->elts_comp = comp; 814 } 815 #ifdef MLX5_PMD_SOFT_COUNTERS 816 /* Increment sent packets counter. */ 817 txq->stats.opackets += i; 818 #endif 819 /* Ring QP doorbell. */ 820 mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe); 821 return i; 822 } 823 824 /** 825 * Open a MPW session. 826 * 827 * @param txq 828 * Pointer to TX queue structure. 829 * @param mpw 830 * Pointer to MPW session structure. 831 * @param length 832 * Packet length. 833 */ 834 static inline void 835 mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length) 836 { 837 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 838 volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = 839 (volatile struct mlx5_wqe_data_seg (*)[]) 840 tx_mlx5_wqe(txq, idx + 1); 841 842 mpw->state = MLX5_MPW_STATE_OPENED; 843 mpw->pkts_n = 0; 844 mpw->len = length; 845 mpw->total_len = 0; 846 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 847 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 848 mpw->wqe->eseg.inline_hdr_sz = 0; 849 mpw->wqe->eseg.rsvd0 = 0; 850 mpw->wqe->eseg.rsvd1 = 0; 851 mpw->wqe->eseg.rsvd2 = 0; 852 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 853 (txq->wqe_ci << 8) | 854 MLX5_OPCODE_TSO); 855 mpw->wqe->ctrl[2] = 0; 856 mpw->wqe->ctrl[3] = 0; 857 mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *) 858 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 859 mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *) 860 (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE)); 861 mpw->data.dseg[2] = &(*dseg)[0]; 862 mpw->data.dseg[3] = &(*dseg)[1]; 863 mpw->data.dseg[4] = &(*dseg)[2]; 864 } 865 866 /** 867 * Close a MPW session. 868 * 869 * @param txq 870 * Pointer to TX queue structure. 871 * @param mpw 872 * Pointer to MPW session structure. 873 */ 874 static inline void 875 mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 876 { 877 unsigned int num = mpw->pkts_n; 878 879 /* 880 * Store size in multiple of 16 bytes. Control and Ethernet segments 881 * count as 2. 882 */ 883 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num)); 884 mpw->state = MLX5_MPW_STATE_CLOSED; 885 if (num < 3) 886 ++txq->wqe_ci; 887 else 888 txq->wqe_ci += 2; 889 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 890 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 891 } 892 893 /** 894 * DPDK callback for TX with MPW support. 895 * 896 * @param dpdk_txq 897 * Generic pointer to TX queue structure. 898 * @param[in] pkts 899 * Packets to transmit. 900 * @param pkts_n 901 * Number of packets in array. 902 * 903 * @return 904 * Number of packets successfully transmitted (<= pkts_n). 905 */ 906 uint16_t 907 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 908 { 909 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 910 uint16_t elts_head = txq->elts_head; 911 const uint16_t elts_n = 1 << txq->elts_n; 912 const uint16_t elts_m = elts_n - 1; 913 unsigned int i = 0; 914 unsigned int j = 0; 915 uint16_t max_elts; 916 uint16_t max_wqe; 917 unsigned int comp; 918 struct mlx5_mpw mpw = { 919 .state = MLX5_MPW_STATE_CLOSED, 920 }; 921 922 if (unlikely(!pkts_n)) 923 return 0; 924 /* Prefetch first packet cacheline. */ 925 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 926 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 927 /* Start processing. */ 928 mlx5_tx_complete(txq); 929 max_elts = (elts_n - (elts_head - txq->elts_tail)); 930 /* A CQE slot must always be available. */ 931 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 932 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 933 if (unlikely(!max_wqe)) 934 return 0; 935 do { 936 struct rte_mbuf *buf = *(pkts++); 937 uint32_t length; 938 unsigned int segs_n = buf->nb_segs; 939 uint32_t cs_flags; 940 941 /* 942 * Make sure there is enough room to store this packet and 943 * that one ring entry remains unused. 944 */ 945 assert(segs_n); 946 if (max_elts < segs_n) 947 break; 948 /* Do not bother with large packets MPW cannot handle. */ 949 if (segs_n > MLX5_MPW_DSEG_MAX) { 950 txq->stats.oerrors++; 951 break; 952 } 953 max_elts -= segs_n; 954 --pkts_n; 955 cs_flags = txq_ol_cksum_to_cs(buf); 956 /* Retrieve packet information. */ 957 length = PKT_LEN(buf); 958 assert(length); 959 /* Start new session if packet differs. */ 960 if ((mpw.state == MLX5_MPW_STATE_OPENED) && 961 ((mpw.len != length) || 962 (segs_n != 1) || 963 (mpw.wqe->eseg.cs_flags != cs_flags))) 964 mlx5_mpw_close(txq, &mpw); 965 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 966 /* 967 * Multi-Packet WQE consumes at most two WQE. 968 * mlx5_mpw_new() expects to be able to use such 969 * resources. 970 */ 971 if (unlikely(max_wqe < 2)) 972 break; 973 max_wqe -= 2; 974 mlx5_mpw_new(txq, &mpw, length); 975 mpw.wqe->eseg.cs_flags = cs_flags; 976 } 977 /* Multi-segment packets must be alone in their MPW. */ 978 assert((segs_n == 1) || (mpw.pkts_n == 0)); 979 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 980 length = 0; 981 #endif 982 do { 983 volatile struct mlx5_wqe_data_seg *dseg; 984 uintptr_t addr; 985 986 assert(buf); 987 (*txq->elts)[elts_head++ & elts_m] = buf; 988 dseg = mpw.data.dseg[mpw.pkts_n]; 989 addr = rte_pktmbuf_mtod(buf, uintptr_t); 990 *dseg = (struct mlx5_wqe_data_seg){ 991 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), 992 .lkey = mlx5_tx_mb2mr(txq, buf), 993 .addr = rte_cpu_to_be_64(addr), 994 }; 995 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 996 length += DATA_LEN(buf); 997 #endif 998 buf = buf->next; 999 ++mpw.pkts_n; 1000 ++j; 1001 } while (--segs_n); 1002 assert(length == mpw.len); 1003 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 1004 mlx5_mpw_close(txq, &mpw); 1005 #ifdef MLX5_PMD_SOFT_COUNTERS 1006 /* Increment sent bytes counter. */ 1007 txq->stats.obytes += length; 1008 #endif 1009 ++i; 1010 } while (pkts_n); 1011 /* Take a shortcut if nothing must be sent. */ 1012 if (unlikely(i == 0)) 1013 return 0; 1014 /* Check whether completion threshold has been reached. */ 1015 /* "j" includes both packets and segments. */ 1016 comp = txq->elts_comp + j; 1017 if (comp >= MLX5_TX_COMP_THRESH) { 1018 volatile struct mlx5_wqe *wqe = mpw.wqe; 1019 1020 /* Request completion on last WQE. */ 1021 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1022 /* Save elts_head in unused "immediate" field of WQE. */ 1023 wqe->ctrl[3] = elts_head; 1024 txq->elts_comp = 0; 1025 #ifndef NDEBUG 1026 ++txq->cq_pi; 1027 #endif 1028 } else { 1029 txq->elts_comp = comp; 1030 } 1031 #ifdef MLX5_PMD_SOFT_COUNTERS 1032 /* Increment sent packets counter. */ 1033 txq->stats.opackets += i; 1034 #endif 1035 /* Ring QP doorbell. */ 1036 if (mpw.state == MLX5_MPW_STATE_OPENED) 1037 mlx5_mpw_close(txq, &mpw); 1038 mlx5_tx_dbrec(txq, mpw.wqe); 1039 txq->elts_head = elts_head; 1040 return i; 1041 } 1042 1043 /** 1044 * Open a MPW inline session. 1045 * 1046 * @param txq 1047 * Pointer to TX queue structure. 1048 * @param mpw 1049 * Pointer to MPW session structure. 1050 * @param length 1051 * Packet length. 1052 */ 1053 static inline void 1054 mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, 1055 uint32_t length) 1056 { 1057 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1058 struct mlx5_wqe_inl_small *inl; 1059 1060 mpw->state = MLX5_MPW_INL_STATE_OPENED; 1061 mpw->pkts_n = 0; 1062 mpw->len = length; 1063 mpw->total_len = 0; 1064 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1065 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 1066 (txq->wqe_ci << 8) | 1067 MLX5_OPCODE_TSO); 1068 mpw->wqe->ctrl[2] = 0; 1069 mpw->wqe->ctrl[3] = 0; 1070 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 1071 mpw->wqe->eseg.inline_hdr_sz = 0; 1072 mpw->wqe->eseg.cs_flags = 0; 1073 mpw->wqe->eseg.rsvd0 = 0; 1074 mpw->wqe->eseg.rsvd1 = 0; 1075 mpw->wqe->eseg.rsvd2 = 0; 1076 inl = (struct mlx5_wqe_inl_small *) 1077 (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); 1078 mpw->data.raw = (uint8_t *)&inl->raw; 1079 } 1080 1081 /** 1082 * Close a MPW inline session. 1083 * 1084 * @param txq 1085 * Pointer to TX queue structure. 1086 * @param mpw 1087 * Pointer to MPW session structure. 1088 */ 1089 static inline void 1090 mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 1091 { 1092 unsigned int size; 1093 struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *) 1094 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 1095 1096 size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len; 1097 /* 1098 * Store size in multiple of 16 bytes. Control and Ethernet segments 1099 * count as 2. 1100 */ 1101 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 1102 MLX5_WQE_DS(size)); 1103 mpw->state = MLX5_MPW_STATE_CLOSED; 1104 inl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG); 1105 txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1106 } 1107 1108 /** 1109 * DPDK callback for TX with MPW inline support. 1110 * 1111 * @param dpdk_txq 1112 * Generic pointer to TX queue structure. 1113 * @param[in] pkts 1114 * Packets to transmit. 1115 * @param pkts_n 1116 * Number of packets in array. 1117 * 1118 * @return 1119 * Number of packets successfully transmitted (<= pkts_n). 1120 */ 1121 uint16_t 1122 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, 1123 uint16_t pkts_n) 1124 { 1125 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1126 uint16_t elts_head = txq->elts_head; 1127 const uint16_t elts_n = 1 << txq->elts_n; 1128 const uint16_t elts_m = elts_n - 1; 1129 unsigned int i = 0; 1130 unsigned int j = 0; 1131 uint16_t max_elts; 1132 uint16_t max_wqe; 1133 unsigned int comp; 1134 unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; 1135 struct mlx5_mpw mpw = { 1136 .state = MLX5_MPW_STATE_CLOSED, 1137 }; 1138 /* 1139 * Compute the maximum number of WQE which can be consumed by inline 1140 * code. 1141 * - 2 DSEG for: 1142 * - 1 control segment, 1143 * - 1 Ethernet segment, 1144 * - N Dseg from the inline request. 1145 */ 1146 const unsigned int wqe_inl_n = 1147 ((2 * MLX5_WQE_DWORD_SIZE + 1148 txq->max_inline * RTE_CACHE_LINE_SIZE) + 1149 RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; 1150 1151 if (unlikely(!pkts_n)) 1152 return 0; 1153 /* Prefetch first packet cacheline. */ 1154 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 1155 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 1156 /* Start processing. */ 1157 mlx5_tx_complete(txq); 1158 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1159 /* A CQE slot must always be available. */ 1160 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 1161 do { 1162 struct rte_mbuf *buf = *(pkts++); 1163 uintptr_t addr; 1164 uint32_t length; 1165 unsigned int segs_n = buf->nb_segs; 1166 uint8_t cs_flags; 1167 1168 /* 1169 * Make sure there is enough room to store this packet and 1170 * that one ring entry remains unused. 1171 */ 1172 assert(segs_n); 1173 if (max_elts < segs_n) 1174 break; 1175 /* Do not bother with large packets MPW cannot handle. */ 1176 if (segs_n > MLX5_MPW_DSEG_MAX) { 1177 txq->stats.oerrors++; 1178 break; 1179 } 1180 max_elts -= segs_n; 1181 --pkts_n; 1182 /* 1183 * Compute max_wqe in case less WQE were consumed in previous 1184 * iteration. 1185 */ 1186 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1187 cs_flags = txq_ol_cksum_to_cs(buf); 1188 /* Retrieve packet information. */ 1189 length = PKT_LEN(buf); 1190 /* Start new session if packet differs. */ 1191 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1192 if ((mpw.len != length) || 1193 (segs_n != 1) || 1194 (mpw.wqe->eseg.cs_flags != cs_flags)) 1195 mlx5_mpw_close(txq, &mpw); 1196 } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { 1197 if ((mpw.len != length) || 1198 (segs_n != 1) || 1199 (length > inline_room) || 1200 (mpw.wqe->eseg.cs_flags != cs_flags)) { 1201 mlx5_mpw_inline_close(txq, &mpw); 1202 inline_room = 1203 txq->max_inline * RTE_CACHE_LINE_SIZE; 1204 } 1205 } 1206 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 1207 if ((segs_n != 1) || 1208 (length > inline_room)) { 1209 /* 1210 * Multi-Packet WQE consumes at most two WQE. 1211 * mlx5_mpw_new() expects to be able to use 1212 * such resources. 1213 */ 1214 if (unlikely(max_wqe < 2)) 1215 break; 1216 max_wqe -= 2; 1217 mlx5_mpw_new(txq, &mpw, length); 1218 mpw.wqe->eseg.cs_flags = cs_flags; 1219 } else { 1220 if (unlikely(max_wqe < wqe_inl_n)) 1221 break; 1222 max_wqe -= wqe_inl_n; 1223 mlx5_mpw_inline_new(txq, &mpw, length); 1224 mpw.wqe->eseg.cs_flags = cs_flags; 1225 } 1226 } 1227 /* Multi-segment packets must be alone in their MPW. */ 1228 assert((segs_n == 1) || (mpw.pkts_n == 0)); 1229 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1230 assert(inline_room == 1231 txq->max_inline * RTE_CACHE_LINE_SIZE); 1232 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1233 length = 0; 1234 #endif 1235 do { 1236 volatile struct mlx5_wqe_data_seg *dseg; 1237 1238 assert(buf); 1239 (*txq->elts)[elts_head++ & elts_m] = buf; 1240 dseg = mpw.data.dseg[mpw.pkts_n]; 1241 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1242 *dseg = (struct mlx5_wqe_data_seg){ 1243 .byte_count = 1244 rte_cpu_to_be_32(DATA_LEN(buf)), 1245 .lkey = mlx5_tx_mb2mr(txq, buf), 1246 .addr = rte_cpu_to_be_64(addr), 1247 }; 1248 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1249 length += DATA_LEN(buf); 1250 #endif 1251 buf = buf->next; 1252 ++mpw.pkts_n; 1253 ++j; 1254 } while (--segs_n); 1255 assert(length == mpw.len); 1256 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 1257 mlx5_mpw_close(txq, &mpw); 1258 } else { 1259 unsigned int max; 1260 1261 assert(mpw.state == MLX5_MPW_INL_STATE_OPENED); 1262 assert(length <= inline_room); 1263 assert(length == DATA_LEN(buf)); 1264 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1265 (*txq->elts)[elts_head++ & elts_m] = buf; 1266 /* Maximum number of bytes before wrapping. */ 1267 max = ((((uintptr_t)(txq->wqes)) + 1268 (1 << txq->wqe_n) * 1269 MLX5_WQE_SIZE) - 1270 (uintptr_t)mpw.data.raw); 1271 if (length > max) { 1272 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1273 (void *)addr, 1274 max); 1275 mpw.data.raw = (volatile void *)txq->wqes; 1276 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1277 (void *)(addr + max), 1278 length - max); 1279 mpw.data.raw += length - max; 1280 } else { 1281 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1282 (void *)addr, 1283 length); 1284 1285 if (length == max) 1286 mpw.data.raw = 1287 (volatile void *)txq->wqes; 1288 else 1289 mpw.data.raw += length; 1290 } 1291 ++mpw.pkts_n; 1292 mpw.total_len += length; 1293 ++j; 1294 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { 1295 mlx5_mpw_inline_close(txq, &mpw); 1296 inline_room = 1297 txq->max_inline * RTE_CACHE_LINE_SIZE; 1298 } else { 1299 inline_room -= length; 1300 } 1301 } 1302 #ifdef MLX5_PMD_SOFT_COUNTERS 1303 /* Increment sent bytes counter. */ 1304 txq->stats.obytes += length; 1305 #endif 1306 ++i; 1307 } while (pkts_n); 1308 /* Take a shortcut if nothing must be sent. */ 1309 if (unlikely(i == 0)) 1310 return 0; 1311 /* Check whether completion threshold has been reached. */ 1312 /* "j" includes both packets and segments. */ 1313 comp = txq->elts_comp + j; 1314 if (comp >= MLX5_TX_COMP_THRESH) { 1315 volatile struct mlx5_wqe *wqe = mpw.wqe; 1316 1317 /* Request completion on last WQE. */ 1318 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1319 /* Save elts_head in unused "immediate" field of WQE. */ 1320 wqe->ctrl[3] = elts_head; 1321 txq->elts_comp = 0; 1322 #ifndef NDEBUG 1323 ++txq->cq_pi; 1324 #endif 1325 } else { 1326 txq->elts_comp = comp; 1327 } 1328 #ifdef MLX5_PMD_SOFT_COUNTERS 1329 /* Increment sent packets counter. */ 1330 txq->stats.opackets += i; 1331 #endif 1332 /* Ring QP doorbell. */ 1333 if (mpw.state == MLX5_MPW_INL_STATE_OPENED) 1334 mlx5_mpw_inline_close(txq, &mpw); 1335 else if (mpw.state == MLX5_MPW_STATE_OPENED) 1336 mlx5_mpw_close(txq, &mpw); 1337 mlx5_tx_dbrec(txq, mpw.wqe); 1338 txq->elts_head = elts_head; 1339 return i; 1340 } 1341 1342 /** 1343 * Open an Enhanced MPW session. 1344 * 1345 * @param txq 1346 * Pointer to TX queue structure. 1347 * @param mpw 1348 * Pointer to MPW session structure. 1349 * @param length 1350 * Packet length. 1351 */ 1352 static inline void 1353 mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding) 1354 { 1355 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1356 1357 mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED; 1358 mpw->pkts_n = 0; 1359 mpw->total_len = sizeof(struct mlx5_wqe); 1360 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1361 mpw->wqe->ctrl[0] = 1362 rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) | 1363 (txq->wqe_ci << 8) | 1364 MLX5_OPCODE_ENHANCED_MPSW); 1365 mpw->wqe->ctrl[2] = 0; 1366 mpw->wqe->ctrl[3] = 0; 1367 memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE); 1368 if (unlikely(padding)) { 1369 uintptr_t addr = (uintptr_t)(mpw->wqe + 1); 1370 1371 /* Pad the first 2 DWORDs with zero-length inline header. */ 1372 *(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG); 1373 *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) = 1374 rte_cpu_to_be_32(MLX5_INLINE_SEG); 1375 mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE; 1376 /* Start from the next WQEBB. */ 1377 mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1)); 1378 } else { 1379 mpw->data.raw = (volatile void *)(mpw->wqe + 1); 1380 } 1381 } 1382 1383 /** 1384 * Close an Enhanced MPW session. 1385 * 1386 * @param txq 1387 * Pointer to TX queue structure. 1388 * @param mpw 1389 * Pointer to MPW session structure. 1390 * 1391 * @return 1392 * Number of consumed WQEs. 1393 */ 1394 static inline uint16_t 1395 mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 1396 { 1397 uint16_t ret; 1398 1399 /* Store size in multiple of 16 bytes. Control and Ethernet segments 1400 * count as 2. 1401 */ 1402 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 1403 MLX5_WQE_DS(mpw->total_len)); 1404 mpw->state = MLX5_MPW_STATE_CLOSED; 1405 ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1406 txq->wqe_ci += ret; 1407 return ret; 1408 } 1409 1410 /** 1411 * TX with Enhanced MPW support. 1412 * 1413 * @param txq 1414 * Pointer to TX queue structure. 1415 * @param[in] pkts 1416 * Packets to transmit. 1417 * @param pkts_n 1418 * Number of packets in array. 1419 * 1420 * @return 1421 * Number of packets successfully transmitted (<= pkts_n). 1422 */ 1423 static inline uint16_t 1424 txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 1425 uint16_t pkts_n) 1426 { 1427 uint16_t elts_head = txq->elts_head; 1428 const uint16_t elts_n = 1 << txq->elts_n; 1429 const uint16_t elts_m = elts_n - 1; 1430 unsigned int i = 0; 1431 unsigned int j = 0; 1432 uint16_t max_elts; 1433 uint16_t max_wqe; 1434 unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE; 1435 unsigned int mpw_room = 0; 1436 unsigned int inl_pad = 0; 1437 uint32_t inl_hdr; 1438 struct mlx5_mpw mpw = { 1439 .state = MLX5_MPW_STATE_CLOSED, 1440 }; 1441 1442 if (unlikely(!pkts_n)) 1443 return 0; 1444 /* Start processing. */ 1445 mlx5_tx_complete(txq); 1446 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1447 /* A CQE slot must always be available. */ 1448 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 1449 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1450 if (unlikely(!max_wqe)) 1451 return 0; 1452 do { 1453 struct rte_mbuf *buf = *(pkts++); 1454 uintptr_t addr; 1455 unsigned int do_inline = 0; /* Whether inline is possible. */ 1456 uint32_t length; 1457 uint8_t cs_flags; 1458 1459 /* Multi-segmented packet is handled in slow-path outside. */ 1460 assert(NB_SEGS(buf) == 1); 1461 /* Make sure there is enough room to store this packet. */ 1462 if (max_elts - j == 0) 1463 break; 1464 cs_flags = txq_ol_cksum_to_cs(buf); 1465 /* Retrieve packet information. */ 1466 length = PKT_LEN(buf); 1467 /* Start new session if: 1468 * - multi-segment packet 1469 * - no space left even for a dseg 1470 * - next packet can be inlined with a new WQE 1471 * - cs_flag differs 1472 */ 1473 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) { 1474 if ((inl_pad + sizeof(struct mlx5_wqe_data_seg) > 1475 mpw_room) || 1476 (length <= txq->inline_max_packet_sz && 1477 inl_pad + sizeof(inl_hdr) + length > 1478 mpw_room) || 1479 (mpw.wqe->eseg.cs_flags != cs_flags)) 1480 max_wqe -= mlx5_empw_close(txq, &mpw); 1481 } 1482 if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) { 1483 /* In Enhanced MPW, inline as much as the budget is 1484 * allowed. The remaining space is to be filled with 1485 * dsegs. If the title WQEBB isn't padded, it will have 1486 * 2 dsegs there. 1487 */ 1488 mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX, 1489 (max_inline ? max_inline : 1490 pkts_n * MLX5_WQE_DWORD_SIZE) + 1491 MLX5_WQE_SIZE); 1492 if (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room)) 1493 break; 1494 /* Don't pad the title WQEBB to not waste WQ. */ 1495 mlx5_empw_new(txq, &mpw, 0); 1496 mpw_room -= mpw.total_len; 1497 inl_pad = 0; 1498 do_inline = length <= txq->inline_max_packet_sz && 1499 sizeof(inl_hdr) + length <= mpw_room && 1500 !txq->mpw_hdr_dseg; 1501 mpw.wqe->eseg.cs_flags = cs_flags; 1502 } else { 1503 /* Evaluate whether the next packet can be inlined. 1504 * Inlininig is possible when: 1505 * - length is less than configured value 1506 * - length fits for remaining space 1507 * - not required to fill the title WQEBB with dsegs 1508 */ 1509 do_inline = 1510 length <= txq->inline_max_packet_sz && 1511 inl_pad + sizeof(inl_hdr) + length <= 1512 mpw_room && 1513 (!txq->mpw_hdr_dseg || 1514 mpw.total_len >= MLX5_WQE_SIZE); 1515 } 1516 if (max_inline && do_inline) { 1517 /* Inline packet into WQE. */ 1518 unsigned int max; 1519 1520 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1521 assert(length == DATA_LEN(buf)); 1522 inl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG); 1523 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1524 mpw.data.raw = (volatile void *) 1525 ((uintptr_t)mpw.data.raw + inl_pad); 1526 max = tx_mlx5_wq_tailroom(txq, 1527 (void *)(uintptr_t)mpw.data.raw); 1528 /* Copy inline header. */ 1529 mpw.data.raw = (volatile void *) 1530 mlx5_copy_to_wq( 1531 (void *)(uintptr_t)mpw.data.raw, 1532 &inl_hdr, 1533 sizeof(inl_hdr), 1534 (void *)(uintptr_t)txq->wqes, 1535 max); 1536 max = tx_mlx5_wq_tailroom(txq, 1537 (void *)(uintptr_t)mpw.data.raw); 1538 /* Copy packet data. */ 1539 mpw.data.raw = (volatile void *) 1540 mlx5_copy_to_wq( 1541 (void *)(uintptr_t)mpw.data.raw, 1542 (void *)addr, 1543 length, 1544 (void *)(uintptr_t)txq->wqes, 1545 max); 1546 ++mpw.pkts_n; 1547 mpw.total_len += (inl_pad + sizeof(inl_hdr) + length); 1548 /* No need to get completion as the entire packet is 1549 * copied to WQ. Free the buf right away. 1550 */ 1551 rte_pktmbuf_free_seg(buf); 1552 mpw_room -= (inl_pad + sizeof(inl_hdr) + length); 1553 /* Add pad in the next packet if any. */ 1554 inl_pad = (((uintptr_t)mpw.data.raw + 1555 (MLX5_WQE_DWORD_SIZE - 1)) & 1556 ~(MLX5_WQE_DWORD_SIZE - 1)) - 1557 (uintptr_t)mpw.data.raw; 1558 } else { 1559 /* No inline. Load a dseg of packet pointer. */ 1560 volatile rte_v128u32_t *dseg; 1561 1562 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1563 assert((inl_pad + sizeof(*dseg)) <= mpw_room); 1564 assert(length == DATA_LEN(buf)); 1565 if (!tx_mlx5_wq_tailroom(txq, 1566 (void *)((uintptr_t)mpw.data.raw 1567 + inl_pad))) 1568 dseg = (volatile void *)txq->wqes; 1569 else 1570 dseg = (volatile void *) 1571 ((uintptr_t)mpw.data.raw + 1572 inl_pad); 1573 (*txq->elts)[elts_head++ & elts_m] = buf; 1574 addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, 1575 uintptr_t)); 1576 *dseg = (rte_v128u32_t) { 1577 rte_cpu_to_be_32(length), 1578 mlx5_tx_mb2mr(txq, buf), 1579 addr, 1580 addr >> 32, 1581 }; 1582 mpw.data.raw = (volatile void *)(dseg + 1); 1583 mpw.total_len += (inl_pad + sizeof(*dseg)); 1584 ++j; 1585 ++mpw.pkts_n; 1586 mpw_room -= (inl_pad + sizeof(*dseg)); 1587 inl_pad = 0; 1588 } 1589 #ifdef MLX5_PMD_SOFT_COUNTERS 1590 /* Increment sent bytes counter. */ 1591 txq->stats.obytes += length; 1592 #endif 1593 ++i; 1594 } while (i < pkts_n); 1595 /* Take a shortcut if nothing must be sent. */ 1596 if (unlikely(i == 0)) 1597 return 0; 1598 /* Check whether completion threshold has been reached. */ 1599 if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH || 1600 (uint16_t)(txq->wqe_ci - txq->mpw_comp) >= 1601 (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) { 1602 volatile struct mlx5_wqe *wqe = mpw.wqe; 1603 1604 /* Request completion on last WQE. */ 1605 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1606 /* Save elts_head in unused "immediate" field of WQE. */ 1607 wqe->ctrl[3] = elts_head; 1608 txq->elts_comp = 0; 1609 txq->mpw_comp = txq->wqe_ci; 1610 #ifndef NDEBUG 1611 ++txq->cq_pi; 1612 #endif 1613 } else { 1614 txq->elts_comp += j; 1615 } 1616 #ifdef MLX5_PMD_SOFT_COUNTERS 1617 /* Increment sent packets counter. */ 1618 txq->stats.opackets += i; 1619 #endif 1620 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) 1621 mlx5_empw_close(txq, &mpw); 1622 /* Ring QP doorbell. */ 1623 mlx5_tx_dbrec(txq, mpw.wqe); 1624 txq->elts_head = elts_head; 1625 return i; 1626 } 1627 1628 /** 1629 * DPDK callback for TX with Enhanced MPW support. 1630 * 1631 * @param dpdk_txq 1632 * Generic pointer to TX queue structure. 1633 * @param[in] pkts 1634 * Packets to transmit. 1635 * @param pkts_n 1636 * Number of packets in array. 1637 * 1638 * @return 1639 * Number of packets successfully transmitted (<= pkts_n). 1640 */ 1641 uint16_t 1642 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1643 { 1644 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1645 uint16_t nb_tx = 0; 1646 1647 while (pkts_n > nb_tx) { 1648 uint16_t n; 1649 uint16_t ret; 1650 1651 n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx); 1652 if (n) { 1653 ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n); 1654 if (!ret) 1655 break; 1656 nb_tx += ret; 1657 } 1658 n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx); 1659 if (n) { 1660 ret = txq_burst_empw(txq, &pkts[nb_tx], n); 1661 if (!ret) 1662 break; 1663 nb_tx += ret; 1664 } 1665 } 1666 return nb_tx; 1667 } 1668 1669 /** 1670 * Translate RX completion flags to packet type. 1671 * 1672 * @param[in] rxq 1673 * Pointer to RX queue structure. 1674 * @param[in] cqe 1675 * Pointer to CQE. 1676 * 1677 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 1678 * 1679 * @return 1680 * Packet type for struct rte_mbuf. 1681 */ 1682 static inline uint32_t 1683 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 1684 { 1685 uint8_t idx; 1686 uint8_t pinfo = cqe->pkt_info; 1687 uint16_t ptype = cqe->hdr_type_etc; 1688 1689 /* 1690 * The index to the array should have: 1691 * bit[1:0] = l3_hdr_type 1692 * bit[4:2] = l4_hdr_type 1693 * bit[5] = ip_frag 1694 * bit[6] = tunneled 1695 * bit[7] = outer_l3_type 1696 */ 1697 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 1698 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 1699 } 1700 1701 /** 1702 * Get size of the next packet for a given CQE. For compressed CQEs, the 1703 * consumer index is updated only once all packets of the current one have 1704 * been processed. 1705 * 1706 * @param rxq 1707 * Pointer to RX queue. 1708 * @param cqe 1709 * CQE to process. 1710 * @param[out] rss_hash 1711 * Packet RSS Hash result. 1712 * 1713 * @return 1714 * Packet size in bytes (0 if there is none), -1 in case of completion 1715 * with error. 1716 */ 1717 static inline int 1718 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1719 uint16_t cqe_cnt, uint32_t *rss_hash) 1720 { 1721 struct rxq_zip *zip = &rxq->zip; 1722 uint16_t cqe_n = cqe_cnt + 1; 1723 int len = 0; 1724 uint16_t idx, end; 1725 1726 /* Process compressed data in the CQE and mini arrays. */ 1727 if (zip->ai) { 1728 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1729 (volatile struct mlx5_mini_cqe8 (*)[8]) 1730 (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info); 1731 1732 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1733 *rss_hash = rte_be_to_cpu_32((*mc)[zip->ai & 7].rx_hash_result); 1734 if ((++zip->ai & 7) == 0) { 1735 /* Invalidate consumed CQEs */ 1736 idx = zip->ca; 1737 end = zip->na; 1738 while (idx != end) { 1739 (*rxq->cqes)[idx & cqe_cnt].op_own = 1740 MLX5_CQE_INVALIDATE; 1741 ++idx; 1742 } 1743 /* 1744 * Increment consumer index to skip the number of 1745 * CQEs consumed. Hardware leaves holes in the CQ 1746 * ring for software use. 1747 */ 1748 zip->ca = zip->na; 1749 zip->na += 8; 1750 } 1751 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1752 /* Invalidate the rest */ 1753 idx = zip->ca; 1754 end = zip->cq_ci; 1755 1756 while (idx != end) { 1757 (*rxq->cqes)[idx & cqe_cnt].op_own = 1758 MLX5_CQE_INVALIDATE; 1759 ++idx; 1760 } 1761 rxq->cq_ci = zip->cq_ci; 1762 zip->ai = 0; 1763 } 1764 /* No compressed data, get next CQE and verify if it is compressed. */ 1765 } else { 1766 int ret; 1767 int8_t op_own; 1768 1769 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1770 if (unlikely(ret == 1)) 1771 return 0; 1772 ++rxq->cq_ci; 1773 op_own = cqe->op_own; 1774 rte_cio_rmb(); 1775 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1776 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1777 (volatile struct mlx5_mini_cqe8 (*)[8]) 1778 (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci & 1779 cqe_cnt].pkt_info); 1780 1781 /* Fix endianness. */ 1782 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1783 /* 1784 * Current mini array position is the one returned by 1785 * check_cqe64(). 1786 * 1787 * If completion comprises several mini arrays, as a 1788 * special case the second one is located 7 CQEs after 1789 * the initial CQE instead of 8 for subsequent ones. 1790 */ 1791 zip->ca = rxq->cq_ci; 1792 zip->na = zip->ca + 7; 1793 /* Compute the next non compressed CQE. */ 1794 --rxq->cq_ci; 1795 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1796 /* Get packet size to return. */ 1797 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1798 *rss_hash = rte_be_to_cpu_32((*mc)[0].rx_hash_result); 1799 zip->ai = 1; 1800 /* Prefetch all the entries to be invalidated */ 1801 idx = zip->ca; 1802 end = zip->cq_ci; 1803 while (idx != end) { 1804 rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]); 1805 ++idx; 1806 } 1807 } else { 1808 len = rte_be_to_cpu_32(cqe->byte_cnt); 1809 *rss_hash = rte_be_to_cpu_32(cqe->rx_hash_res); 1810 } 1811 /* Error while receiving packet. */ 1812 if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR)) 1813 return -1; 1814 } 1815 return len; 1816 } 1817 1818 /** 1819 * Translate RX completion flags to offload flags. 1820 * 1821 * @param[in] cqe 1822 * Pointer to CQE. 1823 * 1824 * @return 1825 * Offload flags (ol_flags) for struct rte_mbuf. 1826 */ 1827 static inline uint32_t 1828 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1829 { 1830 uint32_t ol_flags = 0; 1831 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1832 1833 ol_flags = 1834 TRANSPOSE(flags, 1835 MLX5_CQE_RX_L3_HDR_VALID, 1836 PKT_RX_IP_CKSUM_GOOD) | 1837 TRANSPOSE(flags, 1838 MLX5_CQE_RX_L4_HDR_VALID, 1839 PKT_RX_L4_CKSUM_GOOD); 1840 return ol_flags; 1841 } 1842 1843 /** 1844 * DPDK callback for RX. 1845 * 1846 * @param dpdk_rxq 1847 * Generic pointer to RX queue structure. 1848 * @param[out] pkts 1849 * Array to store received packets. 1850 * @param pkts_n 1851 * Maximum number of packets in array. 1852 * 1853 * @return 1854 * Number of packets successfully received (<= pkts_n). 1855 */ 1856 uint16_t 1857 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1858 { 1859 struct mlx5_rxq_data *rxq = dpdk_rxq; 1860 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1861 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1862 const unsigned int sges_n = rxq->sges_n; 1863 struct rte_mbuf *pkt = NULL; 1864 struct rte_mbuf *seg = NULL; 1865 volatile struct mlx5_cqe *cqe = 1866 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1867 unsigned int i = 0; 1868 unsigned int rq_ci = rxq->rq_ci << sges_n; 1869 int len = 0; /* keep its value across iterations. */ 1870 1871 while (pkts_n) { 1872 unsigned int idx = rq_ci & wqe_cnt; 1873 volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx]; 1874 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1875 uint32_t rss_hash_res = 0; 1876 1877 if (pkt) 1878 NEXT(seg) = rep; 1879 seg = rep; 1880 rte_prefetch0(seg); 1881 rte_prefetch0(cqe); 1882 rte_prefetch0(wqe); 1883 rep = rte_mbuf_raw_alloc(rxq->mp); 1884 if (unlikely(rep == NULL)) { 1885 ++rxq->stats.rx_nombuf; 1886 if (!pkt) { 1887 /* 1888 * no buffers before we even started, 1889 * bail out silently. 1890 */ 1891 break; 1892 } 1893 while (pkt != seg) { 1894 assert(pkt != (*rxq->elts)[idx]); 1895 rep = NEXT(pkt); 1896 NEXT(pkt) = NULL; 1897 NB_SEGS(pkt) = 1; 1898 rte_mbuf_raw_free(pkt); 1899 pkt = rep; 1900 } 1901 break; 1902 } 1903 if (!pkt) { 1904 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1905 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, 1906 &rss_hash_res); 1907 if (!len) { 1908 rte_mbuf_raw_free(rep); 1909 break; 1910 } 1911 if (unlikely(len == -1)) { 1912 /* RX error, packet is likely too large. */ 1913 rte_mbuf_raw_free(rep); 1914 ++rxq->stats.idropped; 1915 goto skip; 1916 } 1917 pkt = seg; 1918 assert(len >= (rxq->crc_present << 2)); 1919 /* Update packet information. */ 1920 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); 1921 pkt->ol_flags = 0; 1922 if (rss_hash_res && rxq->rss_hash) { 1923 pkt->hash.rss = rss_hash_res; 1924 pkt->ol_flags = PKT_RX_RSS_HASH; 1925 } 1926 if (rxq->mark && 1927 MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1928 pkt->ol_flags |= PKT_RX_FDIR; 1929 if (cqe->sop_drop_qpn != 1930 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1931 uint32_t mark = cqe->sop_drop_qpn; 1932 1933 pkt->ol_flags |= PKT_RX_FDIR_ID; 1934 pkt->hash.fdir.hi = 1935 mlx5_flow_mark_get(mark); 1936 } 1937 } 1938 if (rxq->csum) 1939 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1940 if (rxq->vlan_strip && 1941 (cqe->hdr_type_etc & 1942 rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1943 pkt->ol_flags |= PKT_RX_VLAN | 1944 PKT_RX_VLAN_STRIPPED; 1945 pkt->vlan_tci = 1946 rte_be_to_cpu_16(cqe->vlan_info); 1947 } 1948 if (rxq->hw_timestamp) { 1949 pkt->timestamp = 1950 rte_be_to_cpu_64(cqe->timestamp); 1951 pkt->ol_flags |= PKT_RX_TIMESTAMP; 1952 } 1953 if (rxq->crc_present) 1954 len -= ETHER_CRC_LEN; 1955 PKT_LEN(pkt) = len; 1956 } 1957 DATA_LEN(rep) = DATA_LEN(seg); 1958 PKT_LEN(rep) = PKT_LEN(seg); 1959 SET_DATA_OFF(rep, DATA_OFF(seg)); 1960 PORT(rep) = PORT(seg); 1961 (*rxq->elts)[idx] = rep; 1962 /* 1963 * Fill NIC descriptor with the new buffer. The lkey and size 1964 * of the buffers are already known, only the buffer address 1965 * changes. 1966 */ 1967 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1968 if (len > DATA_LEN(seg)) { 1969 len -= DATA_LEN(seg); 1970 ++NB_SEGS(pkt); 1971 ++rq_ci; 1972 continue; 1973 } 1974 DATA_LEN(seg) = len; 1975 #ifdef MLX5_PMD_SOFT_COUNTERS 1976 /* Increment bytes counter. */ 1977 rxq->stats.ibytes += PKT_LEN(pkt); 1978 #endif 1979 /* Return packet. */ 1980 *(pkts++) = pkt; 1981 pkt = NULL; 1982 --pkts_n; 1983 ++i; 1984 skip: 1985 /* Align consumer index to the next stride. */ 1986 rq_ci >>= sges_n; 1987 ++rq_ci; 1988 rq_ci <<= sges_n; 1989 } 1990 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1991 return 0; 1992 /* Update the consumer index. */ 1993 rxq->rq_ci = rq_ci >> sges_n; 1994 rte_cio_wmb(); 1995 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1996 rte_cio_wmb(); 1997 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1998 #ifdef MLX5_PMD_SOFT_COUNTERS 1999 /* Increment packets counter. */ 2000 rxq->stats.ipackets += i; 2001 #endif 2002 return i; 2003 } 2004 2005 /** 2006 * Dummy DPDK callback for TX. 2007 * 2008 * This function is used to temporarily replace the real callback during 2009 * unsafe control operations on the queue, or in case of error. 2010 * 2011 * @param dpdk_txq 2012 * Generic pointer to TX queue structure. 2013 * @param[in] pkts 2014 * Packets to transmit. 2015 * @param pkts_n 2016 * Number of packets in array. 2017 * 2018 * @return 2019 * Number of packets successfully transmitted (<= pkts_n). 2020 */ 2021 uint16_t 2022 removed_tx_burst(void *dpdk_txq __rte_unused, 2023 struct rte_mbuf **pkts __rte_unused, 2024 uint16_t pkts_n __rte_unused) 2025 { 2026 return 0; 2027 } 2028 2029 /** 2030 * Dummy DPDK callback for RX. 2031 * 2032 * This function is used to temporarily replace the real callback during 2033 * unsafe control operations on the queue, or in case of error. 2034 * 2035 * @param dpdk_rxq 2036 * Generic pointer to RX queue structure. 2037 * @param[out] pkts 2038 * Array to store received packets. 2039 * @param pkts_n 2040 * Maximum number of packets in array. 2041 * 2042 * @return 2043 * Number of packets successfully received (<= pkts_n). 2044 */ 2045 uint16_t 2046 removed_rx_burst(void *dpdk_txq __rte_unused, 2047 struct rte_mbuf **pkts __rte_unused, 2048 uint16_t pkts_n __rte_unused) 2049 { 2050 return 0; 2051 } 2052 2053 /* 2054 * Vectorized Rx/Tx routines are not compiled in when required vector 2055 * instructions are not supported on a target architecture. The following null 2056 * stubs are needed for linkage when those are not included outside of this file 2057 * (e.g. mlx5_rxtx_vec_sse.c for x86). 2058 */ 2059 2060 uint16_t __attribute__((weak)) 2061 mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused, 2062 struct rte_mbuf **pkts __rte_unused, 2063 uint16_t pkts_n __rte_unused) 2064 { 2065 return 0; 2066 } 2067 2068 uint16_t __attribute__((weak)) 2069 mlx5_tx_burst_vec(void *dpdk_txq __rte_unused, 2070 struct rte_mbuf **pkts __rte_unused, 2071 uint16_t pkts_n __rte_unused) 2072 { 2073 return 0; 2074 } 2075 2076 uint16_t __attribute__((weak)) 2077 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 2078 struct rte_mbuf **pkts __rte_unused, 2079 uint16_t pkts_n __rte_unused) 2080 { 2081 return 0; 2082 } 2083 2084 int __attribute__((weak)) 2085 mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 2086 { 2087 return -ENOTSUP; 2088 } 2089 2090 int __attribute__((weak)) 2091 mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 2092 { 2093 return -ENOTSUP; 2094 } 2095 2096 int __attribute__((weak)) 2097 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 2098 { 2099 return -ENOTSUP; 2100 } 2101 2102 int __attribute__((weak)) 2103 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 2104 { 2105 return -ENOTSUP; 2106 } 2107