1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <assert.h> 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdlib.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #include <infiniband/mlx5dv.h> 18 #ifdef PEDANTIC 19 #pragma GCC diagnostic error "-Wpedantic" 20 #endif 21 22 #include <rte_mbuf.h> 23 #include <rte_mempool.h> 24 #include <rte_prefetch.h> 25 #include <rte_common.h> 26 #include <rte_branch_prediction.h> 27 #include <rte_ether.h> 28 29 #include "mlx5.h" 30 #include "mlx5_utils.h" 31 #include "mlx5_rxtx.h" 32 #include "mlx5_autoconf.h" 33 #include "mlx5_defs.h" 34 #include "mlx5_prm.h" 35 36 static __rte_always_inline uint32_t 37 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 38 39 static __rte_always_inline int 40 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 41 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 42 43 static __rte_always_inline uint32_t 44 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 45 46 static __rte_always_inline void 47 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 48 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); 49 50 static __rte_always_inline void 51 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx); 52 53 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 54 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 55 }; 56 57 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 58 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 59 60 /** 61 * Build a table to translate Rx completion flags to packet type. 62 * 63 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 64 */ 65 void 66 mlx5_set_ptype_table(void) 67 { 68 unsigned int i; 69 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 70 71 /* Last entry must not be overwritten, reserved for errored packet. */ 72 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 73 (*p)[i] = RTE_PTYPE_UNKNOWN; 74 /* 75 * The index to the array should have: 76 * bit[1:0] = l3_hdr_type 77 * bit[4:2] = l4_hdr_type 78 * bit[5] = ip_frag 79 * bit[6] = tunneled 80 * bit[7] = outer_l3_type 81 */ 82 /* L2 */ 83 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 84 /* L3 */ 85 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 86 RTE_PTYPE_L4_NONFRAG; 87 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 88 RTE_PTYPE_L4_NONFRAG; 89 /* Fragmented */ 90 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 91 RTE_PTYPE_L4_FRAG; 92 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 93 RTE_PTYPE_L4_FRAG; 94 /* TCP */ 95 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 96 RTE_PTYPE_L4_TCP; 97 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 98 RTE_PTYPE_L4_TCP; 99 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 100 RTE_PTYPE_L4_TCP; 101 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 102 RTE_PTYPE_L4_TCP; 103 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 104 RTE_PTYPE_L4_TCP; 105 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 106 RTE_PTYPE_L4_TCP; 107 /* UDP */ 108 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 109 RTE_PTYPE_L4_UDP; 110 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 111 RTE_PTYPE_L4_UDP; 112 /* Repeat with outer_l3_type being set. Just in case. */ 113 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 114 RTE_PTYPE_L4_NONFRAG; 115 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 116 RTE_PTYPE_L4_NONFRAG; 117 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 118 RTE_PTYPE_L4_FRAG; 119 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 120 RTE_PTYPE_L4_FRAG; 121 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 122 RTE_PTYPE_L4_TCP; 123 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 124 RTE_PTYPE_L4_TCP; 125 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 126 RTE_PTYPE_L4_TCP; 127 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 128 RTE_PTYPE_L4_TCP; 129 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 130 RTE_PTYPE_L4_TCP; 131 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 132 RTE_PTYPE_L4_TCP; 133 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 134 RTE_PTYPE_L4_UDP; 135 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 136 RTE_PTYPE_L4_UDP; 137 /* Tunneled - L3 */ 138 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 139 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 140 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 141 RTE_PTYPE_INNER_L4_NONFRAG; 142 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 143 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 144 RTE_PTYPE_INNER_L4_NONFRAG; 145 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 146 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 147 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 148 RTE_PTYPE_INNER_L4_NONFRAG; 149 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 150 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 151 RTE_PTYPE_INNER_L4_NONFRAG; 152 /* Tunneled - Fragmented */ 153 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 154 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 155 RTE_PTYPE_INNER_L4_FRAG; 156 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 157 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 158 RTE_PTYPE_INNER_L4_FRAG; 159 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 160 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 161 RTE_PTYPE_INNER_L4_FRAG; 162 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 163 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 164 RTE_PTYPE_INNER_L4_FRAG; 165 /* Tunneled - TCP */ 166 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 167 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 168 RTE_PTYPE_INNER_L4_TCP; 169 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 170 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 171 RTE_PTYPE_INNER_L4_TCP; 172 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 173 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 174 RTE_PTYPE_INNER_L4_TCP; 175 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 176 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 177 RTE_PTYPE_INNER_L4_TCP; 178 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 179 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 180 RTE_PTYPE_INNER_L4_TCP; 181 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 182 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 183 RTE_PTYPE_INNER_L4_TCP; 184 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 185 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 186 RTE_PTYPE_INNER_L4_TCP; 187 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 188 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 189 RTE_PTYPE_INNER_L4_TCP; 190 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 191 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 192 RTE_PTYPE_INNER_L4_TCP; 193 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 194 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 195 RTE_PTYPE_INNER_L4_TCP; 196 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 197 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 198 RTE_PTYPE_INNER_L4_TCP; 199 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 200 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 201 RTE_PTYPE_INNER_L4_TCP; 202 /* Tunneled - UDP */ 203 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 204 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 205 RTE_PTYPE_INNER_L4_UDP; 206 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 207 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 208 RTE_PTYPE_INNER_L4_UDP; 209 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 210 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 211 RTE_PTYPE_INNER_L4_UDP; 212 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 213 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 214 RTE_PTYPE_INNER_L4_UDP; 215 } 216 217 /** 218 * Build a table to translate packet to checksum type of Verbs. 219 */ 220 void 221 mlx5_set_cksum_table(void) 222 { 223 unsigned int i; 224 uint8_t v; 225 226 /* 227 * The index should have: 228 * bit[0] = PKT_TX_TCP_SEG 229 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 230 * bit[4] = PKT_TX_IP_CKSUM 231 * bit[8] = PKT_TX_OUTER_IP_CKSUM 232 * bit[9] = tunnel 233 */ 234 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 235 v = 0; 236 if (i & (1 << 9)) { 237 /* Tunneled packet. */ 238 if (i & (1 << 8)) /* Outer IP. */ 239 v |= MLX5_ETH_WQE_L3_CSUM; 240 if (i & (1 << 4)) /* Inner IP. */ 241 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 242 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 243 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 244 } else { 245 /* No tunnel. */ 246 if (i & (1 << 4)) /* IP. */ 247 v |= MLX5_ETH_WQE_L3_CSUM; 248 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 249 v |= MLX5_ETH_WQE_L4_CSUM; 250 } 251 mlx5_cksum_table[i] = v; 252 } 253 } 254 255 /** 256 * Build a table to translate packet type of mbuf to SWP type of Verbs. 257 */ 258 void 259 mlx5_set_swp_types_table(void) 260 { 261 unsigned int i; 262 uint8_t v; 263 264 /* 265 * The index should have: 266 * bit[0:1] = PKT_TX_L4_MASK 267 * bit[4] = PKT_TX_IPV6 268 * bit[8] = PKT_TX_OUTER_IPV6 269 * bit[9] = PKT_TX_OUTER_UDP 270 */ 271 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 272 v = 0; 273 if (i & (1 << 8)) 274 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 275 if (i & (1 << 9)) 276 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 277 if (i & (1 << 4)) 278 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 279 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 280 v |= MLX5_ETH_WQE_L4_INNER_UDP; 281 mlx5_swp_types_table[i] = v; 282 } 283 } 284 285 /** 286 * Return the size of tailroom of WQ. 287 * 288 * @param txq 289 * Pointer to TX queue structure. 290 * @param addr 291 * Pointer to tail of WQ. 292 * 293 * @return 294 * Size of tailroom. 295 */ 296 static inline size_t 297 tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr) 298 { 299 size_t tailroom; 300 tailroom = (uintptr_t)(txq->wqes) + 301 (1 << txq->wqe_n) * MLX5_WQE_SIZE - 302 (uintptr_t)addr; 303 return tailroom; 304 } 305 306 /** 307 * Copy data to tailroom of circular queue. 308 * 309 * @param dst 310 * Pointer to destination. 311 * @param src 312 * Pointer to source. 313 * @param n 314 * Number of bytes to copy. 315 * @param base 316 * Pointer to head of queue. 317 * @param tailroom 318 * Size of tailroom from dst. 319 * 320 * @return 321 * Pointer after copied data. 322 */ 323 static inline void * 324 mlx5_copy_to_wq(void *dst, const void *src, size_t n, 325 void *base, size_t tailroom) 326 { 327 void *ret; 328 329 if (n > tailroom) { 330 rte_memcpy(dst, src, tailroom); 331 rte_memcpy(base, (void *)((uintptr_t)src + tailroom), 332 n - tailroom); 333 ret = (uint8_t *)base + n - tailroom; 334 } else { 335 rte_memcpy(dst, src, n); 336 ret = (n == tailroom) ? base : (uint8_t *)dst + n; 337 } 338 return ret; 339 } 340 341 /** 342 * Inline TSO headers into WQE. 343 * 344 * @return 345 * 0 on success, negative errno value on failure. 346 */ 347 static int 348 inline_tso(struct mlx5_txq_data *txq, struct rte_mbuf *buf, 349 uint32_t *length, 350 uintptr_t *addr, 351 uint16_t *pkt_inline_sz, 352 uint8_t **raw, 353 uint16_t *max_wqe, 354 uint16_t *tso_segsz, 355 uint16_t *tso_header_sz) 356 { 357 uintptr_t end = (uintptr_t)(((uintptr_t)txq->wqes) + 358 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 359 unsigned int copy_b; 360 uint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0; 361 const uint8_t tunneled = txq->tunnel_en && (buf->ol_flags & 362 PKT_TX_TUNNEL_MASK); 363 uint16_t n_wqe; 364 365 *tso_segsz = buf->tso_segsz; 366 *tso_header_sz = buf->l2_len + vlan_sz + buf->l3_len + buf->l4_len; 367 if (unlikely(*tso_segsz == 0 || *tso_header_sz == 0)) { 368 txq->stats.oerrors++; 369 return -EINVAL; 370 } 371 if (tunneled) 372 *tso_header_sz += buf->outer_l2_len + buf->outer_l3_len; 373 /* First seg must contain all TSO headers. */ 374 if (unlikely(*tso_header_sz > MLX5_MAX_TSO_HEADER) || 375 *tso_header_sz > DATA_LEN(buf)) { 376 txq->stats.oerrors++; 377 return -EINVAL; 378 } 379 copy_b = *tso_header_sz - *pkt_inline_sz; 380 if (!copy_b || ((end - (uintptr_t)*raw) < copy_b)) 381 return -EAGAIN; 382 n_wqe = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 383 if (unlikely(*max_wqe < n_wqe)) 384 return -EINVAL; 385 *max_wqe -= n_wqe; 386 rte_memcpy((void *)*raw, (void *)*addr, copy_b); 387 *length -= copy_b; 388 *addr += copy_b; 389 copy_b = MLX5_WQE_DS(copy_b) * MLX5_WQE_DWORD_SIZE; 390 *pkt_inline_sz += copy_b; 391 *raw += copy_b; 392 return 0; 393 } 394 395 /** 396 * DPDK callback to check the status of a tx descriptor. 397 * 398 * @param tx_queue 399 * The tx queue. 400 * @param[in] offset 401 * The index of the descriptor in the ring. 402 * 403 * @return 404 * The status of the tx descriptor. 405 */ 406 int 407 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 408 { 409 struct mlx5_txq_data *txq = tx_queue; 410 uint16_t used; 411 412 mlx5_tx_complete(txq); 413 used = txq->elts_head - txq->elts_tail; 414 if (offset < used) 415 return RTE_ETH_TX_DESC_FULL; 416 return RTE_ETH_TX_DESC_DONE; 417 } 418 419 /** 420 * DPDK callback to check the status of a rx descriptor. 421 * 422 * @param rx_queue 423 * The rx queue. 424 * @param[in] offset 425 * The index of the descriptor in the ring. 426 * 427 * @return 428 * The status of the tx descriptor. 429 */ 430 int 431 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 432 { 433 struct mlx5_rxq_data *rxq = rx_queue; 434 struct rxq_zip *zip = &rxq->zip; 435 volatile struct mlx5_cqe *cqe; 436 const unsigned int cqe_n = (1 << rxq->cqe_n); 437 const unsigned int cqe_cnt = cqe_n - 1; 438 unsigned int cq_ci; 439 unsigned int used; 440 441 /* if we are processing a compressed cqe */ 442 if (zip->ai) { 443 used = zip->cqe_cnt - zip->ca; 444 cq_ci = zip->cq_ci; 445 } else { 446 used = 0; 447 cq_ci = rxq->cq_ci; 448 } 449 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 450 while (check_cqe(cqe, cqe_n, cq_ci) == 0) { 451 int8_t op_own; 452 unsigned int n; 453 454 op_own = cqe->op_own; 455 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 456 n = rte_be_to_cpu_32(cqe->byte_cnt); 457 else 458 n = 1; 459 cq_ci += n; 460 used += n; 461 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 462 } 463 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 464 if (offset < used) 465 return RTE_ETH_RX_DESC_DONE; 466 return RTE_ETH_RX_DESC_AVAIL; 467 } 468 469 /** 470 * DPDK callback for TX. 471 * 472 * @param dpdk_txq 473 * Generic pointer to TX queue structure. 474 * @param[in] pkts 475 * Packets to transmit. 476 * @param pkts_n 477 * Number of packets in array. 478 * 479 * @return 480 * Number of packets successfully transmitted (<= pkts_n). 481 */ 482 uint16_t 483 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 484 { 485 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 486 uint16_t elts_head = txq->elts_head; 487 const uint16_t elts_n = 1 << txq->elts_n; 488 const uint16_t elts_m = elts_n - 1; 489 unsigned int i = 0; 490 unsigned int j = 0; 491 unsigned int k = 0; 492 uint16_t max_elts; 493 uint16_t max_wqe; 494 unsigned int comp; 495 volatile struct mlx5_wqe_ctrl *last_wqe = NULL; 496 unsigned int segs_n = 0; 497 const unsigned int max_inline = txq->max_inline; 498 499 if (unlikely(!pkts_n)) 500 return 0; 501 /* Prefetch first packet cacheline. */ 502 rte_prefetch0(*pkts); 503 /* Start processing. */ 504 mlx5_tx_complete(txq); 505 max_elts = (elts_n - (elts_head - txq->elts_tail)); 506 /* A CQE slot must always be available. */ 507 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 508 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 509 if (unlikely(!max_wqe)) 510 return 0; 511 do { 512 struct rte_mbuf *buf = *pkts; /* First_seg. */ 513 uint8_t *raw; 514 volatile struct mlx5_wqe_v *wqe = NULL; 515 volatile rte_v128u32_t *dseg = NULL; 516 uint32_t length; 517 unsigned int ds = 0; 518 unsigned int sg = 0; /* counter of additional segs attached. */ 519 uintptr_t addr; 520 uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; 521 uint16_t tso_header_sz = 0; 522 uint16_t ehdr; 523 uint8_t cs_flags; 524 uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG); 525 uint32_t swp_offsets = 0; 526 uint8_t swp_types = 0; 527 uint16_t tso_segsz = 0; 528 #ifdef MLX5_PMD_SOFT_COUNTERS 529 uint32_t total_length = 0; 530 #endif 531 int ret; 532 533 segs_n = buf->nb_segs; 534 /* 535 * Make sure there is enough room to store this packet and 536 * that one ring entry remains unused. 537 */ 538 assert(segs_n); 539 if (max_elts < segs_n) 540 break; 541 max_elts -= segs_n; 542 sg = --segs_n; 543 if (unlikely(--max_wqe == 0)) 544 break; 545 wqe = (volatile struct mlx5_wqe_v *) 546 tx_mlx5_wqe(txq, txq->wqe_ci); 547 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 548 if (pkts_n - i > 1) 549 rte_prefetch0(*(pkts + 1)); 550 addr = rte_pktmbuf_mtod(buf, uintptr_t); 551 length = DATA_LEN(buf); 552 ehdr = (((uint8_t *)addr)[1] << 8) | 553 ((uint8_t *)addr)[0]; 554 #ifdef MLX5_PMD_SOFT_COUNTERS 555 total_length = length; 556 #endif 557 if (length < (MLX5_WQE_DWORD_SIZE + 2)) { 558 txq->stats.oerrors++; 559 break; 560 } 561 /* Update element. */ 562 (*txq->elts)[elts_head & elts_m] = buf; 563 /* Prefetch next buffer data. */ 564 if (pkts_n - i > 1) 565 rte_prefetch0( 566 rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); 567 cs_flags = txq_ol_cksum_to_cs(buf); 568 txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types); 569 raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; 570 /* Replace the Ethernet type by the VLAN if necessary. */ 571 if (buf->ol_flags & PKT_TX_VLAN_PKT) { 572 uint32_t vlan = rte_cpu_to_be_32(0x81000000 | 573 buf->vlan_tci); 574 unsigned int len = 2 * ETHER_ADDR_LEN - 2; 575 576 addr += 2; 577 length -= 2; 578 /* Copy Destination and source mac address. */ 579 memcpy((uint8_t *)raw, ((uint8_t *)addr), len); 580 /* Copy VLAN. */ 581 memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan)); 582 /* Copy missing two bytes to end the DSeg. */ 583 memcpy((uint8_t *)raw + len + sizeof(vlan), 584 ((uint8_t *)addr) + len, 2); 585 addr += len + 2; 586 length -= (len + 2); 587 } else { 588 memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, 589 MLX5_WQE_DWORD_SIZE); 590 length -= pkt_inline_sz; 591 addr += pkt_inline_sz; 592 } 593 raw += MLX5_WQE_DWORD_SIZE; 594 if (tso) { 595 ret = inline_tso(txq, buf, &length, 596 &addr, &pkt_inline_sz, 597 &raw, &max_wqe, 598 &tso_segsz, &tso_header_sz); 599 if (ret == -EINVAL) { 600 break; 601 } else if (ret == -EAGAIN) { 602 /* NOP WQE. */ 603 wqe->ctrl = (rte_v128u32_t){ 604 rte_cpu_to_be_32(txq->wqe_ci << 8), 605 rte_cpu_to_be_32(txq->qp_num_8s | 1), 606 0, 607 0, 608 }; 609 ds = 1; 610 #ifdef MLX5_PMD_SOFT_COUNTERS 611 total_length = 0; 612 #endif 613 k++; 614 goto next_wqe; 615 } 616 } 617 /* Inline if enough room. */ 618 if (max_inline || tso) { 619 uint32_t inl = 0; 620 uintptr_t end = (uintptr_t) 621 (((uintptr_t)txq->wqes) + 622 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 623 unsigned int inline_room = max_inline * 624 RTE_CACHE_LINE_SIZE - 625 (pkt_inline_sz - 2) - 626 !!tso * sizeof(inl); 627 uintptr_t addr_end; 628 unsigned int copy_b; 629 630 pkt_inline: 631 addr_end = RTE_ALIGN_FLOOR(addr + inline_room, 632 RTE_CACHE_LINE_SIZE); 633 copy_b = (addr_end > addr) ? 634 RTE_MIN((addr_end - addr), length) : 0; 635 if (copy_b && ((end - (uintptr_t)raw) > copy_b)) { 636 /* 637 * One Dseg remains in the current WQE. To 638 * keep the computation positive, it is 639 * removed after the bytes to Dseg conversion. 640 */ 641 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 642 643 if (unlikely(max_wqe < n)) 644 break; 645 max_wqe -= n; 646 if (tso) { 647 assert(inl == 0); 648 inl = rte_cpu_to_be_32(copy_b | 649 MLX5_INLINE_SEG); 650 rte_memcpy((void *)raw, 651 (void *)&inl, sizeof(inl)); 652 raw += sizeof(inl); 653 pkt_inline_sz += sizeof(inl); 654 } 655 rte_memcpy((void *)raw, (void *)addr, copy_b); 656 addr += copy_b; 657 length -= copy_b; 658 pkt_inline_sz += copy_b; 659 } 660 /* 661 * 2 DWORDs consumed by the WQE header + ETH segment + 662 * the size of the inline part of the packet. 663 */ 664 ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); 665 if (length > 0) { 666 if (ds % (MLX5_WQE_SIZE / 667 MLX5_WQE_DWORD_SIZE) == 0) { 668 if (unlikely(--max_wqe == 0)) 669 break; 670 dseg = (volatile rte_v128u32_t *) 671 tx_mlx5_wqe(txq, txq->wqe_ci + 672 ds / 4); 673 } else { 674 dseg = (volatile rte_v128u32_t *) 675 ((uintptr_t)wqe + 676 (ds * MLX5_WQE_DWORD_SIZE)); 677 } 678 goto use_dseg; 679 } else if (!segs_n) { 680 goto next_pkt; 681 } else { 682 /* 683 * Further inline the next segment only for 684 * non-TSO packets. 685 */ 686 if (!tso) { 687 raw += copy_b; 688 inline_room -= copy_b; 689 } else { 690 inline_room = 0; 691 } 692 /* Move to the next segment. */ 693 --segs_n; 694 buf = buf->next; 695 assert(buf); 696 addr = rte_pktmbuf_mtod(buf, uintptr_t); 697 length = DATA_LEN(buf); 698 #ifdef MLX5_PMD_SOFT_COUNTERS 699 total_length += length; 700 #endif 701 (*txq->elts)[++elts_head & elts_m] = buf; 702 goto pkt_inline; 703 } 704 } else { 705 /* 706 * No inline has been done in the packet, only the 707 * Ethernet Header as been stored. 708 */ 709 dseg = (volatile rte_v128u32_t *) 710 ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); 711 ds = 3; 712 use_dseg: 713 /* Add the remaining packet as a simple ds. */ 714 addr = rte_cpu_to_be_64(addr); 715 *dseg = (rte_v128u32_t){ 716 rte_cpu_to_be_32(length), 717 mlx5_tx_mb2mr(txq, buf), 718 addr, 719 addr >> 32, 720 }; 721 ++ds; 722 if (!segs_n) 723 goto next_pkt; 724 } 725 next_seg: 726 assert(buf); 727 assert(ds); 728 assert(wqe); 729 /* 730 * Spill on next WQE when the current one does not have 731 * enough room left. Size of WQE must a be a multiple 732 * of data segment size. 733 */ 734 assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); 735 if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { 736 if (unlikely(--max_wqe == 0)) 737 break; 738 dseg = (volatile rte_v128u32_t *) 739 tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4); 740 rte_prefetch0(tx_mlx5_wqe(txq, 741 txq->wqe_ci + ds / 4 + 1)); 742 } else { 743 ++dseg; 744 } 745 ++ds; 746 buf = buf->next; 747 assert(buf); 748 length = DATA_LEN(buf); 749 #ifdef MLX5_PMD_SOFT_COUNTERS 750 total_length += length; 751 #endif 752 /* Store segment information. */ 753 addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); 754 *dseg = (rte_v128u32_t){ 755 rte_cpu_to_be_32(length), 756 mlx5_tx_mb2mr(txq, buf), 757 addr, 758 addr >> 32, 759 }; 760 (*txq->elts)[++elts_head & elts_m] = buf; 761 if (--segs_n) 762 goto next_seg; 763 next_pkt: 764 if (ds > MLX5_DSEG_MAX) { 765 txq->stats.oerrors++; 766 break; 767 } 768 ++elts_head; 769 ++pkts; 770 ++i; 771 j += sg; 772 /* Initialize known and common part of the WQE structure. */ 773 if (tso) { 774 wqe->ctrl = (rte_v128u32_t){ 775 rte_cpu_to_be_32((txq->wqe_ci << 8) | 776 MLX5_OPCODE_TSO), 777 rte_cpu_to_be_32(txq->qp_num_8s | ds), 778 0, 779 0, 780 }; 781 wqe->eseg = (rte_v128u32_t){ 782 swp_offsets, 783 cs_flags | (swp_types << 8) | 784 (rte_cpu_to_be_16(tso_segsz) << 16), 785 0, 786 (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz), 787 }; 788 } else { 789 wqe->ctrl = (rte_v128u32_t){ 790 rte_cpu_to_be_32((txq->wqe_ci << 8) | 791 MLX5_OPCODE_SEND), 792 rte_cpu_to_be_32(txq->qp_num_8s | ds), 793 0, 794 0, 795 }; 796 wqe->eseg = (rte_v128u32_t){ 797 swp_offsets, 798 cs_flags | (swp_types << 8), 799 0, 800 (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz), 801 }; 802 } 803 next_wqe: 804 txq->wqe_ci += (ds + 3) / 4; 805 /* Save the last successful WQE for completion request */ 806 last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe; 807 #ifdef MLX5_PMD_SOFT_COUNTERS 808 /* Increment sent bytes counter. */ 809 txq->stats.obytes += total_length; 810 #endif 811 } while (i < pkts_n); 812 /* Take a shortcut if nothing must be sent. */ 813 if (unlikely((i + k) == 0)) 814 return 0; 815 txq->elts_head += (i + j); 816 /* Check whether completion threshold has been reached. */ 817 comp = txq->elts_comp + i + j + k; 818 if (comp >= MLX5_TX_COMP_THRESH) { 819 /* Request completion on last WQE. */ 820 last_wqe->ctrl2 = rte_cpu_to_be_32(8); 821 /* Save elts_head in unused "immediate" field of WQE. */ 822 last_wqe->ctrl3 = txq->elts_head; 823 txq->elts_comp = 0; 824 #ifndef NDEBUG 825 ++txq->cq_pi; 826 #endif 827 } else { 828 txq->elts_comp = comp; 829 } 830 #ifdef MLX5_PMD_SOFT_COUNTERS 831 /* Increment sent packets counter. */ 832 txq->stats.opackets += i; 833 #endif 834 /* Ring QP doorbell. */ 835 mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe); 836 return i; 837 } 838 839 /** 840 * Open a MPW session. 841 * 842 * @param txq 843 * Pointer to TX queue structure. 844 * @param mpw 845 * Pointer to MPW session structure. 846 * @param length 847 * Packet length. 848 */ 849 static inline void 850 mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length) 851 { 852 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 853 volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = 854 (volatile struct mlx5_wqe_data_seg (*)[]) 855 tx_mlx5_wqe(txq, idx + 1); 856 857 mpw->state = MLX5_MPW_STATE_OPENED; 858 mpw->pkts_n = 0; 859 mpw->len = length; 860 mpw->total_len = 0; 861 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 862 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 863 mpw->wqe->eseg.inline_hdr_sz = 0; 864 mpw->wqe->eseg.rsvd0 = 0; 865 mpw->wqe->eseg.rsvd1 = 0; 866 mpw->wqe->eseg.rsvd2 = 0; 867 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 868 (txq->wqe_ci << 8) | 869 MLX5_OPCODE_TSO); 870 mpw->wqe->ctrl[2] = 0; 871 mpw->wqe->ctrl[3] = 0; 872 mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *) 873 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 874 mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *) 875 (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE)); 876 mpw->data.dseg[2] = &(*dseg)[0]; 877 mpw->data.dseg[3] = &(*dseg)[1]; 878 mpw->data.dseg[4] = &(*dseg)[2]; 879 } 880 881 /** 882 * Close a MPW session. 883 * 884 * @param txq 885 * Pointer to TX queue structure. 886 * @param mpw 887 * Pointer to MPW session structure. 888 */ 889 static inline void 890 mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 891 { 892 unsigned int num = mpw->pkts_n; 893 894 /* 895 * Store size in multiple of 16 bytes. Control and Ethernet segments 896 * count as 2. 897 */ 898 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num)); 899 mpw->state = MLX5_MPW_STATE_CLOSED; 900 if (num < 3) 901 ++txq->wqe_ci; 902 else 903 txq->wqe_ci += 2; 904 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 905 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 906 } 907 908 /** 909 * DPDK callback for TX with MPW support. 910 * 911 * @param dpdk_txq 912 * Generic pointer to TX queue structure. 913 * @param[in] pkts 914 * Packets to transmit. 915 * @param pkts_n 916 * Number of packets in array. 917 * 918 * @return 919 * Number of packets successfully transmitted (<= pkts_n). 920 */ 921 uint16_t 922 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 923 { 924 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 925 uint16_t elts_head = txq->elts_head; 926 const uint16_t elts_n = 1 << txq->elts_n; 927 const uint16_t elts_m = elts_n - 1; 928 unsigned int i = 0; 929 unsigned int j = 0; 930 uint16_t max_elts; 931 uint16_t max_wqe; 932 unsigned int comp; 933 struct mlx5_mpw mpw = { 934 .state = MLX5_MPW_STATE_CLOSED, 935 }; 936 937 if (unlikely(!pkts_n)) 938 return 0; 939 /* Prefetch first packet cacheline. */ 940 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 941 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 942 /* Start processing. */ 943 mlx5_tx_complete(txq); 944 max_elts = (elts_n - (elts_head - txq->elts_tail)); 945 /* A CQE slot must always be available. */ 946 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 947 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 948 if (unlikely(!max_wqe)) 949 return 0; 950 do { 951 struct rte_mbuf *buf = *(pkts++); 952 uint32_t length; 953 unsigned int segs_n = buf->nb_segs; 954 uint32_t cs_flags; 955 956 /* 957 * Make sure there is enough room to store this packet and 958 * that one ring entry remains unused. 959 */ 960 assert(segs_n); 961 if (max_elts < segs_n) 962 break; 963 /* Do not bother with large packets MPW cannot handle. */ 964 if (segs_n > MLX5_MPW_DSEG_MAX) { 965 txq->stats.oerrors++; 966 break; 967 } 968 max_elts -= segs_n; 969 --pkts_n; 970 cs_flags = txq_ol_cksum_to_cs(buf); 971 /* Retrieve packet information. */ 972 length = PKT_LEN(buf); 973 assert(length); 974 /* Start new session if packet differs. */ 975 if ((mpw.state == MLX5_MPW_STATE_OPENED) && 976 ((mpw.len != length) || 977 (segs_n != 1) || 978 (mpw.wqe->eseg.cs_flags != cs_flags))) 979 mlx5_mpw_close(txq, &mpw); 980 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 981 /* 982 * Multi-Packet WQE consumes at most two WQE. 983 * mlx5_mpw_new() expects to be able to use such 984 * resources. 985 */ 986 if (unlikely(max_wqe < 2)) 987 break; 988 max_wqe -= 2; 989 mlx5_mpw_new(txq, &mpw, length); 990 mpw.wqe->eseg.cs_flags = cs_flags; 991 } 992 /* Multi-segment packets must be alone in their MPW. */ 993 assert((segs_n == 1) || (mpw.pkts_n == 0)); 994 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 995 length = 0; 996 #endif 997 do { 998 volatile struct mlx5_wqe_data_seg *dseg; 999 uintptr_t addr; 1000 1001 assert(buf); 1002 (*txq->elts)[elts_head++ & elts_m] = buf; 1003 dseg = mpw.data.dseg[mpw.pkts_n]; 1004 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1005 *dseg = (struct mlx5_wqe_data_seg){ 1006 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), 1007 .lkey = mlx5_tx_mb2mr(txq, buf), 1008 .addr = rte_cpu_to_be_64(addr), 1009 }; 1010 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1011 length += DATA_LEN(buf); 1012 #endif 1013 buf = buf->next; 1014 ++mpw.pkts_n; 1015 ++j; 1016 } while (--segs_n); 1017 assert(length == mpw.len); 1018 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 1019 mlx5_mpw_close(txq, &mpw); 1020 #ifdef MLX5_PMD_SOFT_COUNTERS 1021 /* Increment sent bytes counter. */ 1022 txq->stats.obytes += length; 1023 #endif 1024 ++i; 1025 } while (pkts_n); 1026 /* Take a shortcut if nothing must be sent. */ 1027 if (unlikely(i == 0)) 1028 return 0; 1029 /* Check whether completion threshold has been reached. */ 1030 /* "j" includes both packets and segments. */ 1031 comp = txq->elts_comp + j; 1032 if (comp >= MLX5_TX_COMP_THRESH) { 1033 volatile struct mlx5_wqe *wqe = mpw.wqe; 1034 1035 /* Request completion on last WQE. */ 1036 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1037 /* Save elts_head in unused "immediate" field of WQE. */ 1038 wqe->ctrl[3] = elts_head; 1039 txq->elts_comp = 0; 1040 #ifndef NDEBUG 1041 ++txq->cq_pi; 1042 #endif 1043 } else { 1044 txq->elts_comp = comp; 1045 } 1046 #ifdef MLX5_PMD_SOFT_COUNTERS 1047 /* Increment sent packets counter. */ 1048 txq->stats.opackets += i; 1049 #endif 1050 /* Ring QP doorbell. */ 1051 if (mpw.state == MLX5_MPW_STATE_OPENED) 1052 mlx5_mpw_close(txq, &mpw); 1053 mlx5_tx_dbrec(txq, mpw.wqe); 1054 txq->elts_head = elts_head; 1055 return i; 1056 } 1057 1058 /** 1059 * Open a MPW inline session. 1060 * 1061 * @param txq 1062 * Pointer to TX queue structure. 1063 * @param mpw 1064 * Pointer to MPW session structure. 1065 * @param length 1066 * Packet length. 1067 */ 1068 static inline void 1069 mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, 1070 uint32_t length) 1071 { 1072 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1073 struct mlx5_wqe_inl_small *inl; 1074 1075 mpw->state = MLX5_MPW_INL_STATE_OPENED; 1076 mpw->pkts_n = 0; 1077 mpw->len = length; 1078 mpw->total_len = 0; 1079 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1080 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 1081 (txq->wqe_ci << 8) | 1082 MLX5_OPCODE_TSO); 1083 mpw->wqe->ctrl[2] = 0; 1084 mpw->wqe->ctrl[3] = 0; 1085 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 1086 mpw->wqe->eseg.inline_hdr_sz = 0; 1087 mpw->wqe->eseg.cs_flags = 0; 1088 mpw->wqe->eseg.rsvd0 = 0; 1089 mpw->wqe->eseg.rsvd1 = 0; 1090 mpw->wqe->eseg.rsvd2 = 0; 1091 inl = (struct mlx5_wqe_inl_small *) 1092 (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); 1093 mpw->data.raw = (uint8_t *)&inl->raw; 1094 } 1095 1096 /** 1097 * Close a MPW inline session. 1098 * 1099 * @param txq 1100 * Pointer to TX queue structure. 1101 * @param mpw 1102 * Pointer to MPW session structure. 1103 */ 1104 static inline void 1105 mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 1106 { 1107 unsigned int size; 1108 struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *) 1109 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 1110 1111 size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len; 1112 /* 1113 * Store size in multiple of 16 bytes. Control and Ethernet segments 1114 * count as 2. 1115 */ 1116 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 1117 MLX5_WQE_DS(size)); 1118 mpw->state = MLX5_MPW_STATE_CLOSED; 1119 inl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG); 1120 txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1121 } 1122 1123 /** 1124 * DPDK callback for TX with MPW inline support. 1125 * 1126 * @param dpdk_txq 1127 * Generic pointer to TX queue structure. 1128 * @param[in] pkts 1129 * Packets to transmit. 1130 * @param pkts_n 1131 * Number of packets in array. 1132 * 1133 * @return 1134 * Number of packets successfully transmitted (<= pkts_n). 1135 */ 1136 uint16_t 1137 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, 1138 uint16_t pkts_n) 1139 { 1140 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1141 uint16_t elts_head = txq->elts_head; 1142 const uint16_t elts_n = 1 << txq->elts_n; 1143 const uint16_t elts_m = elts_n - 1; 1144 unsigned int i = 0; 1145 unsigned int j = 0; 1146 uint16_t max_elts; 1147 uint16_t max_wqe; 1148 unsigned int comp; 1149 unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; 1150 struct mlx5_mpw mpw = { 1151 .state = MLX5_MPW_STATE_CLOSED, 1152 }; 1153 /* 1154 * Compute the maximum number of WQE which can be consumed by inline 1155 * code. 1156 * - 2 DSEG for: 1157 * - 1 control segment, 1158 * - 1 Ethernet segment, 1159 * - N Dseg from the inline request. 1160 */ 1161 const unsigned int wqe_inl_n = 1162 ((2 * MLX5_WQE_DWORD_SIZE + 1163 txq->max_inline * RTE_CACHE_LINE_SIZE) + 1164 RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; 1165 1166 if (unlikely(!pkts_n)) 1167 return 0; 1168 /* Prefetch first packet cacheline. */ 1169 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 1170 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 1171 /* Start processing. */ 1172 mlx5_tx_complete(txq); 1173 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1174 /* A CQE slot must always be available. */ 1175 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 1176 do { 1177 struct rte_mbuf *buf = *(pkts++); 1178 uintptr_t addr; 1179 uint32_t length; 1180 unsigned int segs_n = buf->nb_segs; 1181 uint8_t cs_flags; 1182 1183 /* 1184 * Make sure there is enough room to store this packet and 1185 * that one ring entry remains unused. 1186 */ 1187 assert(segs_n); 1188 if (max_elts < segs_n) 1189 break; 1190 /* Do not bother with large packets MPW cannot handle. */ 1191 if (segs_n > MLX5_MPW_DSEG_MAX) { 1192 txq->stats.oerrors++; 1193 break; 1194 } 1195 max_elts -= segs_n; 1196 --pkts_n; 1197 /* 1198 * Compute max_wqe in case less WQE were consumed in previous 1199 * iteration. 1200 */ 1201 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1202 cs_flags = txq_ol_cksum_to_cs(buf); 1203 /* Retrieve packet information. */ 1204 length = PKT_LEN(buf); 1205 /* Start new session if packet differs. */ 1206 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1207 if ((mpw.len != length) || 1208 (segs_n != 1) || 1209 (mpw.wqe->eseg.cs_flags != cs_flags)) 1210 mlx5_mpw_close(txq, &mpw); 1211 } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { 1212 if ((mpw.len != length) || 1213 (segs_n != 1) || 1214 (length > inline_room) || 1215 (mpw.wqe->eseg.cs_flags != cs_flags)) { 1216 mlx5_mpw_inline_close(txq, &mpw); 1217 inline_room = 1218 txq->max_inline * RTE_CACHE_LINE_SIZE; 1219 } 1220 } 1221 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 1222 if ((segs_n != 1) || 1223 (length > inline_room)) { 1224 /* 1225 * Multi-Packet WQE consumes at most two WQE. 1226 * mlx5_mpw_new() expects to be able to use 1227 * such resources. 1228 */ 1229 if (unlikely(max_wqe < 2)) 1230 break; 1231 max_wqe -= 2; 1232 mlx5_mpw_new(txq, &mpw, length); 1233 mpw.wqe->eseg.cs_flags = cs_flags; 1234 } else { 1235 if (unlikely(max_wqe < wqe_inl_n)) 1236 break; 1237 max_wqe -= wqe_inl_n; 1238 mlx5_mpw_inline_new(txq, &mpw, length); 1239 mpw.wqe->eseg.cs_flags = cs_flags; 1240 } 1241 } 1242 /* Multi-segment packets must be alone in their MPW. */ 1243 assert((segs_n == 1) || (mpw.pkts_n == 0)); 1244 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1245 assert(inline_room == 1246 txq->max_inline * RTE_CACHE_LINE_SIZE); 1247 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1248 length = 0; 1249 #endif 1250 do { 1251 volatile struct mlx5_wqe_data_seg *dseg; 1252 1253 assert(buf); 1254 (*txq->elts)[elts_head++ & elts_m] = buf; 1255 dseg = mpw.data.dseg[mpw.pkts_n]; 1256 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1257 *dseg = (struct mlx5_wqe_data_seg){ 1258 .byte_count = 1259 rte_cpu_to_be_32(DATA_LEN(buf)), 1260 .lkey = mlx5_tx_mb2mr(txq, buf), 1261 .addr = rte_cpu_to_be_64(addr), 1262 }; 1263 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1264 length += DATA_LEN(buf); 1265 #endif 1266 buf = buf->next; 1267 ++mpw.pkts_n; 1268 ++j; 1269 } while (--segs_n); 1270 assert(length == mpw.len); 1271 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 1272 mlx5_mpw_close(txq, &mpw); 1273 } else { 1274 unsigned int max; 1275 1276 assert(mpw.state == MLX5_MPW_INL_STATE_OPENED); 1277 assert(length <= inline_room); 1278 assert(length == DATA_LEN(buf)); 1279 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1280 (*txq->elts)[elts_head++ & elts_m] = buf; 1281 /* Maximum number of bytes before wrapping. */ 1282 max = ((((uintptr_t)(txq->wqes)) + 1283 (1 << txq->wqe_n) * 1284 MLX5_WQE_SIZE) - 1285 (uintptr_t)mpw.data.raw); 1286 if (length > max) { 1287 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1288 (void *)addr, 1289 max); 1290 mpw.data.raw = (volatile void *)txq->wqes; 1291 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1292 (void *)(addr + max), 1293 length - max); 1294 mpw.data.raw += length - max; 1295 } else { 1296 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1297 (void *)addr, 1298 length); 1299 1300 if (length == max) 1301 mpw.data.raw = 1302 (volatile void *)txq->wqes; 1303 else 1304 mpw.data.raw += length; 1305 } 1306 ++mpw.pkts_n; 1307 mpw.total_len += length; 1308 ++j; 1309 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { 1310 mlx5_mpw_inline_close(txq, &mpw); 1311 inline_room = 1312 txq->max_inline * RTE_CACHE_LINE_SIZE; 1313 } else { 1314 inline_room -= length; 1315 } 1316 } 1317 #ifdef MLX5_PMD_SOFT_COUNTERS 1318 /* Increment sent bytes counter. */ 1319 txq->stats.obytes += length; 1320 #endif 1321 ++i; 1322 } while (pkts_n); 1323 /* Take a shortcut if nothing must be sent. */ 1324 if (unlikely(i == 0)) 1325 return 0; 1326 /* Check whether completion threshold has been reached. */ 1327 /* "j" includes both packets and segments. */ 1328 comp = txq->elts_comp + j; 1329 if (comp >= MLX5_TX_COMP_THRESH) { 1330 volatile struct mlx5_wqe *wqe = mpw.wqe; 1331 1332 /* Request completion on last WQE. */ 1333 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1334 /* Save elts_head in unused "immediate" field of WQE. */ 1335 wqe->ctrl[3] = elts_head; 1336 txq->elts_comp = 0; 1337 #ifndef NDEBUG 1338 ++txq->cq_pi; 1339 #endif 1340 } else { 1341 txq->elts_comp = comp; 1342 } 1343 #ifdef MLX5_PMD_SOFT_COUNTERS 1344 /* Increment sent packets counter. */ 1345 txq->stats.opackets += i; 1346 #endif 1347 /* Ring QP doorbell. */ 1348 if (mpw.state == MLX5_MPW_INL_STATE_OPENED) 1349 mlx5_mpw_inline_close(txq, &mpw); 1350 else if (mpw.state == MLX5_MPW_STATE_OPENED) 1351 mlx5_mpw_close(txq, &mpw); 1352 mlx5_tx_dbrec(txq, mpw.wqe); 1353 txq->elts_head = elts_head; 1354 return i; 1355 } 1356 1357 /** 1358 * Open an Enhanced MPW session. 1359 * 1360 * @param txq 1361 * Pointer to TX queue structure. 1362 * @param mpw 1363 * Pointer to MPW session structure. 1364 * @param length 1365 * Packet length. 1366 */ 1367 static inline void 1368 mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding) 1369 { 1370 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1371 1372 mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED; 1373 mpw->pkts_n = 0; 1374 mpw->total_len = sizeof(struct mlx5_wqe); 1375 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1376 mpw->wqe->ctrl[0] = 1377 rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) | 1378 (txq->wqe_ci << 8) | 1379 MLX5_OPCODE_ENHANCED_MPSW); 1380 mpw->wqe->ctrl[2] = 0; 1381 mpw->wqe->ctrl[3] = 0; 1382 memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE); 1383 if (unlikely(padding)) { 1384 uintptr_t addr = (uintptr_t)(mpw->wqe + 1); 1385 1386 /* Pad the first 2 DWORDs with zero-length inline header. */ 1387 *(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG); 1388 *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) = 1389 rte_cpu_to_be_32(MLX5_INLINE_SEG); 1390 mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE; 1391 /* Start from the next WQEBB. */ 1392 mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1)); 1393 } else { 1394 mpw->data.raw = (volatile void *)(mpw->wqe + 1); 1395 } 1396 } 1397 1398 /** 1399 * Close an Enhanced MPW session. 1400 * 1401 * @param txq 1402 * Pointer to TX queue structure. 1403 * @param mpw 1404 * Pointer to MPW session structure. 1405 * 1406 * @return 1407 * Number of consumed WQEs. 1408 */ 1409 static inline uint16_t 1410 mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 1411 { 1412 uint16_t ret; 1413 1414 /* Store size in multiple of 16 bytes. Control and Ethernet segments 1415 * count as 2. 1416 */ 1417 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 1418 MLX5_WQE_DS(mpw->total_len)); 1419 mpw->state = MLX5_MPW_STATE_CLOSED; 1420 ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1421 txq->wqe_ci += ret; 1422 return ret; 1423 } 1424 1425 /** 1426 * TX with Enhanced MPW support. 1427 * 1428 * @param txq 1429 * Pointer to TX queue structure. 1430 * @param[in] pkts 1431 * Packets to transmit. 1432 * @param pkts_n 1433 * Number of packets in array. 1434 * 1435 * @return 1436 * Number of packets successfully transmitted (<= pkts_n). 1437 */ 1438 static inline uint16_t 1439 txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 1440 uint16_t pkts_n) 1441 { 1442 uint16_t elts_head = txq->elts_head; 1443 const uint16_t elts_n = 1 << txq->elts_n; 1444 const uint16_t elts_m = elts_n - 1; 1445 unsigned int i = 0; 1446 unsigned int j = 0; 1447 uint16_t max_elts; 1448 uint16_t max_wqe; 1449 unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE; 1450 unsigned int mpw_room = 0; 1451 unsigned int inl_pad = 0; 1452 uint32_t inl_hdr; 1453 struct mlx5_mpw mpw = { 1454 .state = MLX5_MPW_STATE_CLOSED, 1455 }; 1456 1457 if (unlikely(!pkts_n)) 1458 return 0; 1459 /* Start processing. */ 1460 mlx5_tx_complete(txq); 1461 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1462 /* A CQE slot must always be available. */ 1463 assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci)); 1464 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1465 if (unlikely(!max_wqe)) 1466 return 0; 1467 do { 1468 struct rte_mbuf *buf = *(pkts++); 1469 uintptr_t addr; 1470 unsigned int do_inline = 0; /* Whether inline is possible. */ 1471 uint32_t length; 1472 uint8_t cs_flags; 1473 1474 /* Multi-segmented packet is handled in slow-path outside. */ 1475 assert(NB_SEGS(buf) == 1); 1476 /* Make sure there is enough room to store this packet. */ 1477 if (max_elts - j == 0) 1478 break; 1479 cs_flags = txq_ol_cksum_to_cs(buf); 1480 /* Retrieve packet information. */ 1481 length = PKT_LEN(buf); 1482 /* Start new session if: 1483 * - multi-segment packet 1484 * - no space left even for a dseg 1485 * - next packet can be inlined with a new WQE 1486 * - cs_flag differs 1487 */ 1488 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) { 1489 if ((inl_pad + sizeof(struct mlx5_wqe_data_seg) > 1490 mpw_room) || 1491 (length <= txq->inline_max_packet_sz && 1492 inl_pad + sizeof(inl_hdr) + length > 1493 mpw_room) || 1494 (mpw.wqe->eseg.cs_flags != cs_flags)) 1495 max_wqe -= mlx5_empw_close(txq, &mpw); 1496 } 1497 if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) { 1498 /* In Enhanced MPW, inline as much as the budget is 1499 * allowed. The remaining space is to be filled with 1500 * dsegs. If the title WQEBB isn't padded, it will have 1501 * 2 dsegs there. 1502 */ 1503 mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX, 1504 (max_inline ? max_inline : 1505 pkts_n * MLX5_WQE_DWORD_SIZE) + 1506 MLX5_WQE_SIZE); 1507 if (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room)) 1508 break; 1509 /* Don't pad the title WQEBB to not waste WQ. */ 1510 mlx5_empw_new(txq, &mpw, 0); 1511 mpw_room -= mpw.total_len; 1512 inl_pad = 0; 1513 do_inline = length <= txq->inline_max_packet_sz && 1514 sizeof(inl_hdr) + length <= mpw_room && 1515 !txq->mpw_hdr_dseg; 1516 mpw.wqe->eseg.cs_flags = cs_flags; 1517 } else { 1518 /* Evaluate whether the next packet can be inlined. 1519 * Inlininig is possible when: 1520 * - length is less than configured value 1521 * - length fits for remaining space 1522 * - not required to fill the title WQEBB with dsegs 1523 */ 1524 do_inline = 1525 length <= txq->inline_max_packet_sz && 1526 inl_pad + sizeof(inl_hdr) + length <= 1527 mpw_room && 1528 (!txq->mpw_hdr_dseg || 1529 mpw.total_len >= MLX5_WQE_SIZE); 1530 } 1531 if (max_inline && do_inline) { 1532 /* Inline packet into WQE. */ 1533 unsigned int max; 1534 1535 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1536 assert(length == DATA_LEN(buf)); 1537 inl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG); 1538 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1539 mpw.data.raw = (volatile void *) 1540 ((uintptr_t)mpw.data.raw + inl_pad); 1541 max = tx_mlx5_wq_tailroom(txq, 1542 (void *)(uintptr_t)mpw.data.raw); 1543 /* Copy inline header. */ 1544 mpw.data.raw = (volatile void *) 1545 mlx5_copy_to_wq( 1546 (void *)(uintptr_t)mpw.data.raw, 1547 &inl_hdr, 1548 sizeof(inl_hdr), 1549 (void *)(uintptr_t)txq->wqes, 1550 max); 1551 max = tx_mlx5_wq_tailroom(txq, 1552 (void *)(uintptr_t)mpw.data.raw); 1553 /* Copy packet data. */ 1554 mpw.data.raw = (volatile void *) 1555 mlx5_copy_to_wq( 1556 (void *)(uintptr_t)mpw.data.raw, 1557 (void *)addr, 1558 length, 1559 (void *)(uintptr_t)txq->wqes, 1560 max); 1561 ++mpw.pkts_n; 1562 mpw.total_len += (inl_pad + sizeof(inl_hdr) + length); 1563 /* No need to get completion as the entire packet is 1564 * copied to WQ. Free the buf right away. 1565 */ 1566 rte_pktmbuf_free_seg(buf); 1567 mpw_room -= (inl_pad + sizeof(inl_hdr) + length); 1568 /* Add pad in the next packet if any. */ 1569 inl_pad = (((uintptr_t)mpw.data.raw + 1570 (MLX5_WQE_DWORD_SIZE - 1)) & 1571 ~(MLX5_WQE_DWORD_SIZE - 1)) - 1572 (uintptr_t)mpw.data.raw; 1573 } else { 1574 /* No inline. Load a dseg of packet pointer. */ 1575 volatile rte_v128u32_t *dseg; 1576 1577 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1578 assert((inl_pad + sizeof(*dseg)) <= mpw_room); 1579 assert(length == DATA_LEN(buf)); 1580 if (!tx_mlx5_wq_tailroom(txq, 1581 (void *)((uintptr_t)mpw.data.raw 1582 + inl_pad))) 1583 dseg = (volatile void *)txq->wqes; 1584 else 1585 dseg = (volatile void *) 1586 ((uintptr_t)mpw.data.raw + 1587 inl_pad); 1588 (*txq->elts)[elts_head++ & elts_m] = buf; 1589 addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, 1590 uintptr_t)); 1591 *dseg = (rte_v128u32_t) { 1592 rte_cpu_to_be_32(length), 1593 mlx5_tx_mb2mr(txq, buf), 1594 addr, 1595 addr >> 32, 1596 }; 1597 mpw.data.raw = (volatile void *)(dseg + 1); 1598 mpw.total_len += (inl_pad + sizeof(*dseg)); 1599 ++j; 1600 ++mpw.pkts_n; 1601 mpw_room -= (inl_pad + sizeof(*dseg)); 1602 inl_pad = 0; 1603 } 1604 #ifdef MLX5_PMD_SOFT_COUNTERS 1605 /* Increment sent bytes counter. */ 1606 txq->stats.obytes += length; 1607 #endif 1608 ++i; 1609 } while (i < pkts_n); 1610 /* Take a shortcut if nothing must be sent. */ 1611 if (unlikely(i == 0)) 1612 return 0; 1613 /* Check whether completion threshold has been reached. */ 1614 if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH || 1615 (uint16_t)(txq->wqe_ci - txq->mpw_comp) >= 1616 (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) { 1617 volatile struct mlx5_wqe *wqe = mpw.wqe; 1618 1619 /* Request completion on last WQE. */ 1620 wqe->ctrl[2] = rte_cpu_to_be_32(8); 1621 /* Save elts_head in unused "immediate" field of WQE. */ 1622 wqe->ctrl[3] = elts_head; 1623 txq->elts_comp = 0; 1624 txq->mpw_comp = txq->wqe_ci; 1625 #ifndef NDEBUG 1626 ++txq->cq_pi; 1627 #endif 1628 } else { 1629 txq->elts_comp += j; 1630 } 1631 #ifdef MLX5_PMD_SOFT_COUNTERS 1632 /* Increment sent packets counter. */ 1633 txq->stats.opackets += i; 1634 #endif 1635 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) 1636 mlx5_empw_close(txq, &mpw); 1637 /* Ring QP doorbell. */ 1638 mlx5_tx_dbrec(txq, mpw.wqe); 1639 txq->elts_head = elts_head; 1640 return i; 1641 } 1642 1643 /** 1644 * DPDK callback for TX with Enhanced MPW support. 1645 * 1646 * @param dpdk_txq 1647 * Generic pointer to TX queue structure. 1648 * @param[in] pkts 1649 * Packets to transmit. 1650 * @param pkts_n 1651 * Number of packets in array. 1652 * 1653 * @return 1654 * Number of packets successfully transmitted (<= pkts_n). 1655 */ 1656 uint16_t 1657 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1658 { 1659 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1660 uint16_t nb_tx = 0; 1661 1662 while (pkts_n > nb_tx) { 1663 uint16_t n; 1664 uint16_t ret; 1665 1666 n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx); 1667 if (n) { 1668 ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n); 1669 if (!ret) 1670 break; 1671 nb_tx += ret; 1672 } 1673 n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx); 1674 if (n) { 1675 ret = txq_burst_empw(txq, &pkts[nb_tx], n); 1676 if (!ret) 1677 break; 1678 nb_tx += ret; 1679 } 1680 } 1681 return nb_tx; 1682 } 1683 1684 /** 1685 * Translate RX completion flags to packet type. 1686 * 1687 * @param[in] rxq 1688 * Pointer to RX queue structure. 1689 * @param[in] cqe 1690 * Pointer to CQE. 1691 * 1692 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 1693 * 1694 * @return 1695 * Packet type for struct rte_mbuf. 1696 */ 1697 static inline uint32_t 1698 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 1699 { 1700 uint8_t idx; 1701 uint8_t pinfo = cqe->pkt_info; 1702 uint16_t ptype = cqe->hdr_type_etc; 1703 1704 /* 1705 * The index to the array should have: 1706 * bit[1:0] = l3_hdr_type 1707 * bit[4:2] = l4_hdr_type 1708 * bit[5] = ip_frag 1709 * bit[6] = tunneled 1710 * bit[7] = outer_l3_type 1711 */ 1712 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 1713 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 1714 } 1715 1716 /** 1717 * Get size of the next packet for a given CQE. For compressed CQEs, the 1718 * consumer index is updated only once all packets of the current one have 1719 * been processed. 1720 * 1721 * @param rxq 1722 * Pointer to RX queue. 1723 * @param cqe 1724 * CQE to process. 1725 * @param[out] mcqe 1726 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1727 * written. 1728 * 1729 * @return 1730 * Packet size in bytes (0 if there is none), -1 in case of completion 1731 * with error. 1732 */ 1733 static inline int 1734 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1735 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1736 { 1737 struct rxq_zip *zip = &rxq->zip; 1738 uint16_t cqe_n = cqe_cnt + 1; 1739 int len = 0; 1740 uint16_t idx, end; 1741 1742 /* Process compressed data in the CQE and mini arrays. */ 1743 if (zip->ai) { 1744 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1745 (volatile struct mlx5_mini_cqe8 (*)[8]) 1746 (uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].pkt_info); 1747 1748 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1749 *mcqe = &(*mc)[zip->ai & 7]; 1750 if ((++zip->ai & 7) == 0) { 1751 /* Invalidate consumed CQEs */ 1752 idx = zip->ca; 1753 end = zip->na; 1754 while (idx != end) { 1755 (*rxq->cqes)[idx & cqe_cnt].op_own = 1756 MLX5_CQE_INVALIDATE; 1757 ++idx; 1758 } 1759 /* 1760 * Increment consumer index to skip the number of 1761 * CQEs consumed. Hardware leaves holes in the CQ 1762 * ring for software use. 1763 */ 1764 zip->ca = zip->na; 1765 zip->na += 8; 1766 } 1767 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1768 /* Invalidate the rest */ 1769 idx = zip->ca; 1770 end = zip->cq_ci; 1771 1772 while (idx != end) { 1773 (*rxq->cqes)[idx & cqe_cnt].op_own = 1774 MLX5_CQE_INVALIDATE; 1775 ++idx; 1776 } 1777 rxq->cq_ci = zip->cq_ci; 1778 zip->ai = 0; 1779 } 1780 /* No compressed data, get next CQE and verify if it is compressed. */ 1781 } else { 1782 int ret; 1783 int8_t op_own; 1784 1785 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1786 if (unlikely(ret == 1)) 1787 return 0; 1788 ++rxq->cq_ci; 1789 op_own = cqe->op_own; 1790 rte_cio_rmb(); 1791 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1792 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1793 (volatile struct mlx5_mini_cqe8 (*)[8]) 1794 (uintptr_t)(&(*rxq->cqes)[rxq->cq_ci & 1795 cqe_cnt].pkt_info); 1796 1797 /* Fix endianness. */ 1798 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1799 /* 1800 * Current mini array position is the one returned by 1801 * check_cqe64(). 1802 * 1803 * If completion comprises several mini arrays, as a 1804 * special case the second one is located 7 CQEs after 1805 * the initial CQE instead of 8 for subsequent ones. 1806 */ 1807 zip->ca = rxq->cq_ci; 1808 zip->na = zip->ca + 7; 1809 /* Compute the next non compressed CQE. */ 1810 --rxq->cq_ci; 1811 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1812 /* Get packet size to return. */ 1813 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1814 *mcqe = &(*mc)[0]; 1815 zip->ai = 1; 1816 /* Prefetch all the entries to be invalidated */ 1817 idx = zip->ca; 1818 end = zip->cq_ci; 1819 while (idx != end) { 1820 rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_cnt]); 1821 ++idx; 1822 } 1823 } else { 1824 len = rte_be_to_cpu_32(cqe->byte_cnt); 1825 } 1826 /* Error while receiving packet. */ 1827 if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR)) 1828 return -1; 1829 } 1830 return len; 1831 } 1832 1833 /** 1834 * Translate RX completion flags to offload flags. 1835 * 1836 * @param[in] cqe 1837 * Pointer to CQE. 1838 * 1839 * @return 1840 * Offload flags (ol_flags) for struct rte_mbuf. 1841 */ 1842 static inline uint32_t 1843 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1844 { 1845 uint32_t ol_flags = 0; 1846 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1847 1848 ol_flags = 1849 TRANSPOSE(flags, 1850 MLX5_CQE_RX_L3_HDR_VALID, 1851 PKT_RX_IP_CKSUM_GOOD) | 1852 TRANSPOSE(flags, 1853 MLX5_CQE_RX_L4_HDR_VALID, 1854 PKT_RX_L4_CKSUM_GOOD); 1855 return ol_flags; 1856 } 1857 1858 /** 1859 * Fill in mbuf fields from RX completion flags. 1860 * Note that pkt->ol_flags should be initialized outside of this function. 1861 * 1862 * @param rxq 1863 * Pointer to RX queue. 1864 * @param pkt 1865 * mbuf to fill. 1866 * @param cqe 1867 * CQE to process. 1868 * @param rss_hash_res 1869 * Packet RSS Hash result. 1870 */ 1871 static inline void 1872 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1873 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res) 1874 { 1875 /* Update packet information. */ 1876 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); 1877 if (rss_hash_res && rxq->rss_hash) { 1878 pkt->hash.rss = rss_hash_res; 1879 pkt->ol_flags |= PKT_RX_RSS_HASH; 1880 } 1881 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1882 pkt->ol_flags |= PKT_RX_FDIR; 1883 if (cqe->sop_drop_qpn != 1884 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1885 uint32_t mark = cqe->sop_drop_qpn; 1886 1887 pkt->ol_flags |= PKT_RX_FDIR_ID; 1888 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1889 } 1890 } 1891 if (rxq->csum) 1892 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1893 if (rxq->vlan_strip && 1894 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1895 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1896 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1897 } 1898 if (rxq->hw_timestamp) { 1899 pkt->timestamp = rte_be_to_cpu_64(cqe->timestamp); 1900 pkt->ol_flags |= PKT_RX_TIMESTAMP; 1901 } 1902 } 1903 1904 /** 1905 * DPDK callback for RX. 1906 * 1907 * @param dpdk_rxq 1908 * Generic pointer to RX queue structure. 1909 * @param[out] pkts 1910 * Array to store received packets. 1911 * @param pkts_n 1912 * Maximum number of packets in array. 1913 * 1914 * @return 1915 * Number of packets successfully received (<= pkts_n). 1916 */ 1917 uint16_t 1918 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1919 { 1920 struct mlx5_rxq_data *rxq = dpdk_rxq; 1921 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1922 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1923 const unsigned int sges_n = rxq->sges_n; 1924 struct rte_mbuf *pkt = NULL; 1925 struct rte_mbuf *seg = NULL; 1926 volatile struct mlx5_cqe *cqe = 1927 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1928 unsigned int i = 0; 1929 unsigned int rq_ci = rxq->rq_ci << sges_n; 1930 int len = 0; /* keep its value across iterations. */ 1931 1932 while (pkts_n) { 1933 unsigned int idx = rq_ci & wqe_cnt; 1934 volatile struct mlx5_wqe_data_seg *wqe = 1935 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1936 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1937 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1938 uint32_t rss_hash_res; 1939 1940 if (pkt) 1941 NEXT(seg) = rep; 1942 seg = rep; 1943 rte_prefetch0(seg); 1944 rte_prefetch0(cqe); 1945 rte_prefetch0(wqe); 1946 rep = rte_mbuf_raw_alloc(rxq->mp); 1947 if (unlikely(rep == NULL)) { 1948 ++rxq->stats.rx_nombuf; 1949 if (!pkt) { 1950 /* 1951 * no buffers before we even started, 1952 * bail out silently. 1953 */ 1954 break; 1955 } 1956 while (pkt != seg) { 1957 assert(pkt != (*rxq->elts)[idx]); 1958 rep = NEXT(pkt); 1959 NEXT(pkt) = NULL; 1960 NB_SEGS(pkt) = 1; 1961 rte_mbuf_raw_free(pkt); 1962 pkt = rep; 1963 } 1964 break; 1965 } 1966 if (!pkt) { 1967 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1968 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1969 if (!len) { 1970 rte_mbuf_raw_free(rep); 1971 break; 1972 } 1973 if (unlikely(len == -1)) { 1974 /* RX error, packet is likely too large. */ 1975 rte_mbuf_raw_free(rep); 1976 ++rxq->stats.idropped; 1977 goto skip; 1978 } 1979 pkt = seg; 1980 assert(len >= (rxq->crc_present << 2)); 1981 pkt->ol_flags = 0; 1982 /* If compressed, take hash result from mini-CQE. */ 1983 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? 1984 cqe->rx_hash_res : 1985 mcqe->rx_hash_result); 1986 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1987 if (rxq->crc_present) 1988 len -= ETHER_CRC_LEN; 1989 PKT_LEN(pkt) = len; 1990 } 1991 DATA_LEN(rep) = DATA_LEN(seg); 1992 PKT_LEN(rep) = PKT_LEN(seg); 1993 SET_DATA_OFF(rep, DATA_OFF(seg)); 1994 PORT(rep) = PORT(seg); 1995 (*rxq->elts)[idx] = rep; 1996 /* 1997 * Fill NIC descriptor with the new buffer. The lkey and size 1998 * of the buffers are already known, only the buffer address 1999 * changes. 2000 */ 2001 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 2002 /* If there's only one MR, no need to replace LKey in WQE. */ 2003 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 2004 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 2005 if (len > DATA_LEN(seg)) { 2006 len -= DATA_LEN(seg); 2007 ++NB_SEGS(pkt); 2008 ++rq_ci; 2009 continue; 2010 } 2011 DATA_LEN(seg) = len; 2012 #ifdef MLX5_PMD_SOFT_COUNTERS 2013 /* Increment bytes counter. */ 2014 rxq->stats.ibytes += PKT_LEN(pkt); 2015 #endif 2016 /* Return packet. */ 2017 *(pkts++) = pkt; 2018 pkt = NULL; 2019 --pkts_n; 2020 ++i; 2021 skip: 2022 /* Align consumer index to the next stride. */ 2023 rq_ci >>= sges_n; 2024 ++rq_ci; 2025 rq_ci <<= sges_n; 2026 } 2027 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 2028 return 0; 2029 /* Update the consumer index. */ 2030 rxq->rq_ci = rq_ci >> sges_n; 2031 rte_cio_wmb(); 2032 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 2033 rte_cio_wmb(); 2034 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 2035 #ifdef MLX5_PMD_SOFT_COUNTERS 2036 /* Increment packets counter. */ 2037 rxq->stats.ipackets += i; 2038 #endif 2039 return i; 2040 } 2041 2042 void 2043 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 2044 { 2045 struct mlx5_mprq_buf *buf = opaque; 2046 2047 if (rte_atomic16_read(&buf->refcnt) == 1) { 2048 rte_mempool_put(buf->mp, buf); 2049 } else if (rte_atomic16_add_return(&buf->refcnt, -1) == 0) { 2050 rte_atomic16_set(&buf->refcnt, 1); 2051 rte_mempool_put(buf->mp, buf); 2052 } 2053 } 2054 2055 void 2056 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 2057 { 2058 mlx5_mprq_buf_free_cb(NULL, buf); 2059 } 2060 2061 static inline void 2062 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx) 2063 { 2064 struct mlx5_mprq_buf *rep = rxq->mprq_repl; 2065 volatile struct mlx5_wqe_data_seg *wqe = 2066 &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; 2067 void *addr; 2068 2069 assert(rep != NULL); 2070 /* Replace MPRQ buf. */ 2071 (*rxq->mprq_bufs)[rq_idx] = rep; 2072 /* Replace WQE. */ 2073 addr = mlx5_mprq_buf_addr(rep); 2074 wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); 2075 /* If there's only one MR, no need to replace LKey in WQE. */ 2076 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 2077 wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); 2078 /* Stash a mbuf for next replacement. */ 2079 if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep))) 2080 rxq->mprq_repl = rep; 2081 else 2082 rxq->mprq_repl = NULL; 2083 } 2084 2085 /** 2086 * DPDK callback for RX with Multi-Packet RQ support. 2087 * 2088 * @param dpdk_rxq 2089 * Generic pointer to RX queue structure. 2090 * @param[out] pkts 2091 * Array to store received packets. 2092 * @param pkts_n 2093 * Maximum number of packets in array. 2094 * 2095 * @return 2096 * Number of packets successfully received (<= pkts_n). 2097 */ 2098 uint16_t 2099 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 2100 { 2101 struct mlx5_rxq_data *rxq = dpdk_rxq; 2102 const unsigned int strd_n = 1 << rxq->strd_num_n; 2103 const unsigned int strd_sz = 1 << rxq->strd_sz_n; 2104 const unsigned int strd_shift = 2105 MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; 2106 const unsigned int cq_mask = (1 << rxq->cqe_n) - 1; 2107 const unsigned int wq_mask = (1 << rxq->elts_n) - 1; 2108 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 2109 unsigned int i = 0; 2110 uint16_t rq_ci = rxq->rq_ci; 2111 uint16_t consumed_strd = rxq->consumed_strd; 2112 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 2113 2114 while (i < pkts_n) { 2115 struct rte_mbuf *pkt; 2116 void *addr; 2117 int ret; 2118 unsigned int len; 2119 uint16_t strd_cnt; 2120 uint16_t strd_idx; 2121 uint32_t offset; 2122 uint32_t byte_cnt; 2123 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 2124 uint32_t rss_hash_res = 0; 2125 2126 if (consumed_strd == strd_n) { 2127 /* Replace WQE only if the buffer is still in use. */ 2128 if (rte_atomic16_read(&buf->refcnt) > 1) { 2129 mprq_buf_replace(rxq, rq_ci & wq_mask); 2130 /* Release the old buffer. */ 2131 mlx5_mprq_buf_free(buf); 2132 } else if (unlikely(rxq->mprq_repl == NULL)) { 2133 struct mlx5_mprq_buf *rep; 2134 2135 /* 2136 * Currently, the MPRQ mempool is out of buffer 2137 * and doing memcpy regardless of the size of Rx 2138 * packet. Retry allocation to get back to 2139 * normal. 2140 */ 2141 if (!rte_mempool_get(rxq->mprq_mp, 2142 (void **)&rep)) 2143 rxq->mprq_repl = rep; 2144 } 2145 /* Advance to the next WQE. */ 2146 consumed_strd = 0; 2147 ++rq_ci; 2148 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 2149 } 2150 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 2151 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 2152 if (!ret) 2153 break; 2154 if (unlikely(ret == -1)) { 2155 /* RX error, packet is likely too large. */ 2156 ++rxq->stats.idropped; 2157 continue; 2158 } 2159 byte_cnt = ret; 2160 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 2161 MLX5_MPRQ_STRIDE_NUM_SHIFT; 2162 assert(strd_cnt); 2163 consumed_strd += strd_cnt; 2164 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 2165 continue; 2166 if (mcqe == NULL) { 2167 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 2168 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); 2169 } else { 2170 /* mini-CQE for MPRQ doesn't have hash result. */ 2171 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); 2172 } 2173 assert(strd_idx < strd_n); 2174 assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); 2175 /* 2176 * Currently configured to receive a packet per a stride. But if 2177 * MTU is adjusted through kernel interface, device could 2178 * consume multiple strides without raising an error. In this 2179 * case, the packet should be dropped because it is bigger than 2180 * the max_rx_pkt_len. 2181 */ 2182 if (unlikely(strd_cnt > 1)) { 2183 ++rxq->stats.idropped; 2184 continue; 2185 } 2186 pkt = rte_pktmbuf_alloc(rxq->mp); 2187 if (unlikely(pkt == NULL)) { 2188 ++rxq->stats.rx_nombuf; 2189 break; 2190 } 2191 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 2192 assert((int)len >= (rxq->crc_present << 2)); 2193 if (rxq->crc_present) 2194 len -= ETHER_CRC_LEN; 2195 offset = strd_idx * strd_sz + strd_shift; 2196 addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset); 2197 /* Initialize the offload flag. */ 2198 pkt->ol_flags = 0; 2199 /* 2200 * Memcpy packets to the target mbuf if: 2201 * - The size of packet is smaller than mprq_max_memcpy_len. 2202 * - Out of buffer in the Mempool for Multi-Packet RQ. 2203 */ 2204 if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) { 2205 /* 2206 * When memcpy'ing packet due to out-of-buffer, the 2207 * packet must be smaller than the target mbuf. 2208 */ 2209 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { 2210 rte_pktmbuf_free_seg(pkt); 2211 ++rxq->stats.idropped; 2212 continue; 2213 } 2214 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), addr, len); 2215 } else { 2216 rte_iova_t buf_iova; 2217 struct rte_mbuf_ext_shared_info *shinfo; 2218 uint16_t buf_len = strd_cnt * strd_sz; 2219 2220 /* Increment the refcnt of the whole chunk. */ 2221 rte_atomic16_add_return(&buf->refcnt, 1); 2222 assert((uint16_t)rte_atomic16_read(&buf->refcnt) <= 2223 strd_n + 1); 2224 addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); 2225 /* 2226 * MLX5 device doesn't use iova but it is necessary in a 2227 * case where the Rx packet is transmitted via a 2228 * different PMD. 2229 */ 2230 buf_iova = rte_mempool_virt2iova(buf) + 2231 RTE_PTR_DIFF(addr, buf); 2232 shinfo = rte_pktmbuf_ext_shinfo_init_helper(addr, 2233 &buf_len, mlx5_mprq_buf_free_cb, buf); 2234 /* 2235 * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when 2236 * attaching the stride to mbuf and more offload flags 2237 * will be added below by calling rxq_cq_to_mbuf(). 2238 * Other fields will be overwritten. 2239 */ 2240 rte_pktmbuf_attach_extbuf(pkt, addr, buf_iova, buf_len, 2241 shinfo); 2242 rte_pktmbuf_reset_headroom(pkt); 2243 assert(pkt->ol_flags == EXT_ATTACHED_MBUF); 2244 /* 2245 * Prevent potential overflow due to MTU change through 2246 * kernel interface. 2247 */ 2248 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { 2249 rte_pktmbuf_free_seg(pkt); 2250 ++rxq->stats.idropped; 2251 continue; 2252 } 2253 } 2254 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 2255 PKT_LEN(pkt) = len; 2256 DATA_LEN(pkt) = len; 2257 PORT(pkt) = rxq->port_id; 2258 #ifdef MLX5_PMD_SOFT_COUNTERS 2259 /* Increment bytes counter. */ 2260 rxq->stats.ibytes += PKT_LEN(pkt); 2261 #endif 2262 /* Return packet. */ 2263 *(pkts++) = pkt; 2264 ++i; 2265 } 2266 /* Update the consumer indexes. */ 2267 rxq->consumed_strd = consumed_strd; 2268 rte_cio_wmb(); 2269 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 2270 if (rq_ci != rxq->rq_ci) { 2271 rxq->rq_ci = rq_ci; 2272 rte_cio_wmb(); 2273 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 2274 } 2275 #ifdef MLX5_PMD_SOFT_COUNTERS 2276 /* Increment packets counter. */ 2277 rxq->stats.ipackets += i; 2278 #endif 2279 return i; 2280 } 2281 2282 /** 2283 * Dummy DPDK callback for TX. 2284 * 2285 * This function is used to temporarily replace the real callback during 2286 * unsafe control operations on the queue, or in case of error. 2287 * 2288 * @param dpdk_txq 2289 * Generic pointer to TX queue structure. 2290 * @param[in] pkts 2291 * Packets to transmit. 2292 * @param pkts_n 2293 * Number of packets in array. 2294 * 2295 * @return 2296 * Number of packets successfully transmitted (<= pkts_n). 2297 */ 2298 uint16_t 2299 removed_tx_burst(void *dpdk_txq __rte_unused, 2300 struct rte_mbuf **pkts __rte_unused, 2301 uint16_t pkts_n __rte_unused) 2302 { 2303 return 0; 2304 } 2305 2306 /** 2307 * Dummy DPDK callback for RX. 2308 * 2309 * This function is used to temporarily replace the real callback during 2310 * unsafe control operations on the queue, or in case of error. 2311 * 2312 * @param dpdk_rxq 2313 * Generic pointer to RX queue structure. 2314 * @param[out] pkts 2315 * Array to store received packets. 2316 * @param pkts_n 2317 * Maximum number of packets in array. 2318 * 2319 * @return 2320 * Number of packets successfully received (<= pkts_n). 2321 */ 2322 uint16_t 2323 removed_rx_burst(void *dpdk_txq __rte_unused, 2324 struct rte_mbuf **pkts __rte_unused, 2325 uint16_t pkts_n __rte_unused) 2326 { 2327 return 0; 2328 } 2329 2330 /* 2331 * Vectorized Rx/Tx routines are not compiled in when required vector 2332 * instructions are not supported on a target architecture. The following null 2333 * stubs are needed for linkage when those are not included outside of this file 2334 * (e.g. mlx5_rxtx_vec_sse.c for x86). 2335 */ 2336 2337 uint16_t __attribute__((weak)) 2338 mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused, 2339 struct rte_mbuf **pkts __rte_unused, 2340 uint16_t pkts_n __rte_unused) 2341 { 2342 return 0; 2343 } 2344 2345 uint16_t __attribute__((weak)) 2346 mlx5_tx_burst_vec(void *dpdk_txq __rte_unused, 2347 struct rte_mbuf **pkts __rte_unused, 2348 uint16_t pkts_n __rte_unused) 2349 { 2350 return 0; 2351 } 2352 2353 uint16_t __attribute__((weak)) 2354 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 2355 struct rte_mbuf **pkts __rte_unused, 2356 uint16_t pkts_n __rte_unused) 2357 { 2358 return 0; 2359 } 2360 2361 int __attribute__((weak)) 2362 mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 2363 { 2364 return -ENOTSUP; 2365 } 2366 2367 int __attribute__((weak)) 2368 mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 2369 { 2370 return -ENOTSUP; 2371 } 2372 2373 int __attribute__((weak)) 2374 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 2375 { 2376 return -ENOTSUP; 2377 } 2378 2379 int __attribute__((weak)) 2380 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 2381 { 2382 return -ENOTSUP; 2383 } 2384