1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #include <assert.h> 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdlib.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #include <infiniband/mlx5dv.h> 18 #ifdef PEDANTIC 19 #pragma GCC diagnostic error "-Wpedantic" 20 #endif 21 22 #include <rte_mbuf.h> 23 #include <rte_mempool.h> 24 #include <rte_prefetch.h> 25 #include <rte_common.h> 26 #include <rte_branch_prediction.h> 27 #include <rte_ether.h> 28 #include <rte_cycles.h> 29 30 #include "mlx5.h" 31 #include "mlx5_utils.h" 32 #include "mlx5_rxtx.h" 33 #include "mlx5_autoconf.h" 34 #include "mlx5_defs.h" 35 #include "mlx5_prm.h" 36 37 static __rte_always_inline uint32_t 38 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 39 40 static __rte_always_inline int 41 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 42 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 43 44 static __rte_always_inline uint32_t 45 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 46 47 static __rte_always_inline void 48 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 49 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); 50 51 static __rte_always_inline void 52 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx); 53 54 static int 55 mlx5_queue_state_modify(struct rte_eth_dev *dev, 56 struct mlx5_mp_arg_queue_state_modify *sm); 57 58 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 59 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 60 }; 61 62 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 63 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 64 65 /** 66 * Build a table to translate Rx completion flags to packet type. 67 * 68 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 69 */ 70 void 71 mlx5_set_ptype_table(void) 72 { 73 unsigned int i; 74 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 75 76 /* Last entry must not be overwritten, reserved for errored packet. */ 77 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 78 (*p)[i] = RTE_PTYPE_UNKNOWN; 79 /* 80 * The index to the array should have: 81 * bit[1:0] = l3_hdr_type 82 * bit[4:2] = l4_hdr_type 83 * bit[5] = ip_frag 84 * bit[6] = tunneled 85 * bit[7] = outer_l3_type 86 */ 87 /* L2 */ 88 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 89 /* L3 */ 90 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 91 RTE_PTYPE_L4_NONFRAG; 92 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 93 RTE_PTYPE_L4_NONFRAG; 94 /* Fragmented */ 95 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 96 RTE_PTYPE_L4_FRAG; 97 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 98 RTE_PTYPE_L4_FRAG; 99 /* TCP */ 100 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 101 RTE_PTYPE_L4_TCP; 102 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 103 RTE_PTYPE_L4_TCP; 104 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 105 RTE_PTYPE_L4_TCP; 106 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 107 RTE_PTYPE_L4_TCP; 108 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 109 RTE_PTYPE_L4_TCP; 110 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 111 RTE_PTYPE_L4_TCP; 112 /* UDP */ 113 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 114 RTE_PTYPE_L4_UDP; 115 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 116 RTE_PTYPE_L4_UDP; 117 /* Repeat with outer_l3_type being set. Just in case. */ 118 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 119 RTE_PTYPE_L4_NONFRAG; 120 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 121 RTE_PTYPE_L4_NONFRAG; 122 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 123 RTE_PTYPE_L4_FRAG; 124 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 125 RTE_PTYPE_L4_FRAG; 126 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 127 RTE_PTYPE_L4_TCP; 128 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 129 RTE_PTYPE_L4_TCP; 130 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 131 RTE_PTYPE_L4_TCP; 132 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 133 RTE_PTYPE_L4_TCP; 134 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 135 RTE_PTYPE_L4_TCP; 136 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 137 RTE_PTYPE_L4_TCP; 138 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 139 RTE_PTYPE_L4_UDP; 140 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 141 RTE_PTYPE_L4_UDP; 142 /* Tunneled - L3 */ 143 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 144 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 145 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 146 RTE_PTYPE_INNER_L4_NONFRAG; 147 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 148 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 149 RTE_PTYPE_INNER_L4_NONFRAG; 150 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 151 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 152 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 153 RTE_PTYPE_INNER_L4_NONFRAG; 154 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 155 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 156 RTE_PTYPE_INNER_L4_NONFRAG; 157 /* Tunneled - Fragmented */ 158 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 159 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 160 RTE_PTYPE_INNER_L4_FRAG; 161 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 162 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 163 RTE_PTYPE_INNER_L4_FRAG; 164 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 165 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 166 RTE_PTYPE_INNER_L4_FRAG; 167 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 168 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 169 RTE_PTYPE_INNER_L4_FRAG; 170 /* Tunneled - TCP */ 171 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 172 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 173 RTE_PTYPE_INNER_L4_TCP; 174 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 175 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 176 RTE_PTYPE_INNER_L4_TCP; 177 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 178 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 179 RTE_PTYPE_INNER_L4_TCP; 180 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 181 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 182 RTE_PTYPE_INNER_L4_TCP; 183 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 184 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 185 RTE_PTYPE_INNER_L4_TCP; 186 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 187 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 188 RTE_PTYPE_INNER_L4_TCP; 189 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 190 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 191 RTE_PTYPE_INNER_L4_TCP; 192 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 193 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 194 RTE_PTYPE_INNER_L4_TCP; 195 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 196 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 197 RTE_PTYPE_INNER_L4_TCP; 198 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 199 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 200 RTE_PTYPE_INNER_L4_TCP; 201 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 202 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 203 RTE_PTYPE_INNER_L4_TCP; 204 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 205 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 206 RTE_PTYPE_INNER_L4_TCP; 207 /* Tunneled - UDP */ 208 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 209 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 210 RTE_PTYPE_INNER_L4_UDP; 211 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 212 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 213 RTE_PTYPE_INNER_L4_UDP; 214 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 215 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 216 RTE_PTYPE_INNER_L4_UDP; 217 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 218 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 219 RTE_PTYPE_INNER_L4_UDP; 220 } 221 222 /** 223 * Build a table to translate packet to checksum type of Verbs. 224 */ 225 void 226 mlx5_set_cksum_table(void) 227 { 228 unsigned int i; 229 uint8_t v; 230 231 /* 232 * The index should have: 233 * bit[0] = PKT_TX_TCP_SEG 234 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 235 * bit[4] = PKT_TX_IP_CKSUM 236 * bit[8] = PKT_TX_OUTER_IP_CKSUM 237 * bit[9] = tunnel 238 */ 239 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 240 v = 0; 241 if (i & (1 << 9)) { 242 /* Tunneled packet. */ 243 if (i & (1 << 8)) /* Outer IP. */ 244 v |= MLX5_ETH_WQE_L3_CSUM; 245 if (i & (1 << 4)) /* Inner IP. */ 246 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 247 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 248 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 249 } else { 250 /* No tunnel. */ 251 if (i & (1 << 4)) /* IP. */ 252 v |= MLX5_ETH_WQE_L3_CSUM; 253 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 254 v |= MLX5_ETH_WQE_L4_CSUM; 255 } 256 mlx5_cksum_table[i] = v; 257 } 258 } 259 260 /** 261 * Build a table to translate packet type of mbuf to SWP type of Verbs. 262 */ 263 void 264 mlx5_set_swp_types_table(void) 265 { 266 unsigned int i; 267 uint8_t v; 268 269 /* 270 * The index should have: 271 * bit[0:1] = PKT_TX_L4_MASK 272 * bit[4] = PKT_TX_IPV6 273 * bit[8] = PKT_TX_OUTER_IPV6 274 * bit[9] = PKT_TX_OUTER_UDP 275 */ 276 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 277 v = 0; 278 if (i & (1 << 8)) 279 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 280 if (i & (1 << 9)) 281 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 282 if (i & (1 << 4)) 283 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 284 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 285 v |= MLX5_ETH_WQE_L4_INNER_UDP; 286 mlx5_swp_types_table[i] = v; 287 } 288 } 289 290 /** 291 * Return the size of tailroom of WQ. 292 * 293 * @param txq 294 * Pointer to TX queue structure. 295 * @param addr 296 * Pointer to tail of WQ. 297 * 298 * @return 299 * Size of tailroom. 300 */ 301 static inline size_t 302 tx_mlx5_wq_tailroom(struct mlx5_txq_data *txq, void *addr) 303 { 304 size_t tailroom; 305 tailroom = (uintptr_t)(txq->wqes) + 306 (1 << txq->wqe_n) * MLX5_WQE_SIZE - 307 (uintptr_t)addr; 308 return tailroom; 309 } 310 311 /** 312 * Copy data to tailroom of circular queue. 313 * 314 * @param dst 315 * Pointer to destination. 316 * @param src 317 * Pointer to source. 318 * @param n 319 * Number of bytes to copy. 320 * @param base 321 * Pointer to head of queue. 322 * @param tailroom 323 * Size of tailroom from dst. 324 * 325 * @return 326 * Pointer after copied data. 327 */ 328 static inline void * 329 mlx5_copy_to_wq(void *dst, const void *src, size_t n, 330 void *base, size_t tailroom) 331 { 332 void *ret; 333 334 if (n > tailroom) { 335 rte_memcpy(dst, src, tailroom); 336 rte_memcpy(base, (void *)((uintptr_t)src + tailroom), 337 n - tailroom); 338 ret = (uint8_t *)base + n - tailroom; 339 } else { 340 rte_memcpy(dst, src, n); 341 ret = (n == tailroom) ? base : (uint8_t *)dst + n; 342 } 343 return ret; 344 } 345 346 /** 347 * Inline TSO headers into WQE. 348 * 349 * @return 350 * 0 on success, negative errno value on failure. 351 */ 352 static int 353 inline_tso(struct mlx5_txq_data *txq, struct rte_mbuf *buf, 354 uint32_t *length, 355 uintptr_t *addr, 356 uint16_t *pkt_inline_sz, 357 uint8_t **raw, 358 uint16_t *max_wqe, 359 uint16_t *tso_segsz, 360 uint16_t *tso_header_sz) 361 { 362 uintptr_t end = (uintptr_t)(((uintptr_t)txq->wqes) + 363 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 364 unsigned int copy_b; 365 uint8_t vlan_sz = (buf->ol_flags & PKT_TX_VLAN_PKT) ? 4 : 0; 366 const uint8_t tunneled = txq->tunnel_en && (buf->ol_flags & 367 PKT_TX_TUNNEL_MASK); 368 uint16_t n_wqe; 369 370 *tso_segsz = buf->tso_segsz; 371 *tso_header_sz = buf->l2_len + vlan_sz + buf->l3_len + buf->l4_len; 372 if (unlikely(*tso_segsz == 0 || *tso_header_sz == 0)) { 373 txq->stats.oerrors++; 374 return -EINVAL; 375 } 376 if (tunneled) 377 *tso_header_sz += buf->outer_l2_len + buf->outer_l3_len; 378 /* First seg must contain all TSO headers. */ 379 if (unlikely(*tso_header_sz > MLX5_MAX_TSO_HEADER) || 380 *tso_header_sz > DATA_LEN(buf)) { 381 txq->stats.oerrors++; 382 return -EINVAL; 383 } 384 copy_b = *tso_header_sz - *pkt_inline_sz; 385 if (!copy_b || ((end - (uintptr_t)*raw) < copy_b)) 386 return -EAGAIN; 387 n_wqe = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 388 if (unlikely(*max_wqe < n_wqe)) 389 return -EINVAL; 390 *max_wqe -= n_wqe; 391 rte_memcpy((void *)*raw, (void *)*addr, copy_b); 392 *length -= copy_b; 393 *addr += copy_b; 394 copy_b = MLX5_WQE_DS(copy_b) * MLX5_WQE_DWORD_SIZE; 395 *pkt_inline_sz += copy_b; 396 *raw += copy_b; 397 return 0; 398 } 399 400 /** 401 * DPDK callback to check the status of a tx descriptor. 402 * 403 * @param tx_queue 404 * The tx queue. 405 * @param[in] offset 406 * The index of the descriptor in the ring. 407 * 408 * @return 409 * The status of the tx descriptor. 410 */ 411 int 412 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 413 { 414 struct mlx5_txq_data *txq = tx_queue; 415 uint16_t used; 416 417 mlx5_tx_complete(txq); 418 used = txq->elts_head - txq->elts_tail; 419 if (offset < used) 420 return RTE_ETH_TX_DESC_FULL; 421 return RTE_ETH_TX_DESC_DONE; 422 } 423 424 /** 425 * Internal function to compute the number of used descriptors in an RX queue 426 * 427 * @param rxq 428 * The Rx queue. 429 * 430 * @return 431 * The number of used rx descriptor. 432 */ 433 static uint32_t 434 rx_queue_count(struct mlx5_rxq_data *rxq) 435 { 436 struct rxq_zip *zip = &rxq->zip; 437 volatile struct mlx5_cqe *cqe; 438 const unsigned int cqe_n = (1 << rxq->cqe_n); 439 const unsigned int cqe_cnt = cqe_n - 1; 440 unsigned int cq_ci; 441 unsigned int used; 442 443 /* if we are processing a compressed cqe */ 444 if (zip->ai) { 445 used = zip->cqe_cnt - zip->ca; 446 cq_ci = zip->cq_ci; 447 } else { 448 used = 0; 449 cq_ci = rxq->cq_ci; 450 } 451 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 452 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 453 int8_t op_own; 454 unsigned int n; 455 456 op_own = cqe->op_own; 457 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 458 n = rte_be_to_cpu_32(cqe->byte_cnt); 459 else 460 n = 1; 461 cq_ci += n; 462 used += n; 463 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 464 } 465 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 466 return used; 467 } 468 469 /** 470 * DPDK callback to check the status of a rx descriptor. 471 * 472 * @param rx_queue 473 * The Rx queue. 474 * @param[in] offset 475 * The index of the descriptor in the ring. 476 * 477 * @return 478 * The status of the tx descriptor. 479 */ 480 int 481 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 482 { 483 struct mlx5_rxq_data *rxq = rx_queue; 484 struct mlx5_rxq_ctrl *rxq_ctrl = 485 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 486 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 487 488 if (dev->rx_pkt_burst != mlx5_rx_burst) { 489 rte_errno = ENOTSUP; 490 return -rte_errno; 491 } 492 if (offset >= (1 << rxq->elts_n)) { 493 rte_errno = EINVAL; 494 return -rte_errno; 495 } 496 if (offset < rx_queue_count(rxq)) 497 return RTE_ETH_RX_DESC_DONE; 498 return RTE_ETH_RX_DESC_AVAIL; 499 } 500 501 /** 502 * DPDK callback to get the number of used descriptors in a RX queue 503 * 504 * @param dev 505 * Pointer to the device structure. 506 * 507 * @param rx_queue_id 508 * The Rx queue. 509 * 510 * @return 511 * The number of used rx descriptor. 512 * -EINVAL if the queue is invalid 513 */ 514 uint32_t 515 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 516 { 517 struct mlx5_priv *priv = dev->data->dev_private; 518 struct mlx5_rxq_data *rxq; 519 520 if (dev->rx_pkt_burst != mlx5_rx_burst) { 521 rte_errno = ENOTSUP; 522 return -rte_errno; 523 } 524 rxq = (*priv->rxqs)[rx_queue_id]; 525 if (!rxq) { 526 rte_errno = EINVAL; 527 return -rte_errno; 528 } 529 return rx_queue_count(rxq); 530 } 531 532 #define MLX5_SYSTEM_LOG_DIR "/var/log" 533 /** 534 * Dump debug information to log file. 535 * 536 * @param fname 537 * The file name. 538 * @param hex_title 539 * If not NULL this string is printed as a header to the output 540 * and the output will be in hexadecimal view. 541 * @param buf 542 * This is the buffer address to print out. 543 * @param len 544 * The number of bytes to dump out. 545 */ 546 void 547 mlx5_dump_debug_information(const char *fname, const char *hex_title, 548 const void *buf, unsigned int hex_len) 549 { 550 FILE *fd; 551 552 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 553 fd = fopen(path, "a+"); 554 if (!fd) { 555 DRV_LOG(WARNING, "cannot open %s for debug dump\n", 556 path); 557 MKSTR(path2, "./%s", fname); 558 fd = fopen(path2, "a+"); 559 if (!fd) { 560 DRV_LOG(ERR, "cannot open %s for debug dump\n", 561 path2); 562 return; 563 } 564 DRV_LOG(INFO, "New debug dump in file %s\n", path2); 565 } else { 566 DRV_LOG(INFO, "New debug dump in file %s\n", path); 567 } 568 if (hex_title) 569 rte_hexdump(fd, hex_title, buf, hex_len); 570 else 571 fprintf(fd, "%s", (const char *)buf); 572 fprintf(fd, "\n\n\n"); 573 fclose(fd); 574 } 575 576 /** 577 * Move QP from error state to running state and initialize indexes. 578 * 579 * @param txq_ctrl 580 * Pointer to TX queue control structure. 581 * 582 * @return 583 * 0 on success, else -1. 584 */ 585 static int 586 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 587 { 588 struct mlx5_mp_arg_queue_state_modify sm = { 589 .is_wq = 0, 590 .queue_id = txq_ctrl->txq.idx, 591 }; 592 593 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 594 return -1; 595 txq_ctrl->txq.wqe_ci = 0; 596 txq_ctrl->txq.wqe_pi = 0; 597 txq_ctrl->txq.elts_comp = 0; 598 return 0; 599 } 600 601 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 602 static int 603 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 604 { 605 static const uint8_t magic[] = "seen"; 606 int ret = 1; 607 unsigned int i; 608 609 for (i = 0; i < sizeof(magic); ++i) 610 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 611 ret = 0; 612 err_cqe->rsvd1[i] = magic[i]; 613 } 614 return ret; 615 } 616 617 /** 618 * Handle error CQE. 619 * 620 * @param txq 621 * Pointer to TX queue structure. 622 * @param error_cqe 623 * Pointer to the error CQE. 624 * 625 * @return 626 * The last Tx buffer element to free. 627 */ 628 uint16_t 629 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *txq, 630 volatile struct mlx5_err_cqe *err_cqe) 631 { 632 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 633 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 634 struct mlx5_txq_ctrl *txq_ctrl = 635 container_of(txq, struct mlx5_txq_ctrl, txq); 636 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 637 int seen = check_err_cqe_seen(err_cqe); 638 639 if (!seen && txq_ctrl->dump_file_n < 640 txq_ctrl->priv->config.max_dump_files_num) { 641 MKSTR(err_str, "Unexpected CQE error syndrome " 642 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 643 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 644 txq_ctrl->cqn, txq->qp_num_8s >> 8, 645 rte_be_to_cpu_16(err_cqe->wqe_counter), 646 txq->wqe_ci, txq->cq_ci); 647 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 648 PORT_ID(txq_ctrl->priv), txq->idx, 649 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 650 mlx5_dump_debug_information(name, NULL, err_str, 0); 651 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 652 (const void *)((uintptr_t) 653 &(*txq->cqes)[0]), 654 sizeof(*err_cqe) * 655 (1 << txq->cqe_n)); 656 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 657 (const void *)((uintptr_t) 658 tx_mlx5_wqe(txq, 0)), 659 MLX5_WQE_SIZE * 660 (1 << txq->wqe_n)); 661 txq_ctrl->dump_file_n++; 662 } 663 if (!seen) 664 /* 665 * Count errors in WQEs units. 666 * Later it can be improved to count error packets, 667 * for example, by SQ parsing to find how much packets 668 * should be counted for each WQE. 669 */ 670 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 671 new_wqe_pi) & wqe_m; 672 if (tx_recover_qp(txq_ctrl) == 0) { 673 txq->cq_ci++; 674 /* Release all the remaining buffers. */ 675 return txq->elts_head; 676 } 677 /* Recovering failed - try again later on the same WQE. */ 678 } else { 679 txq->cq_ci++; 680 } 681 /* Do not release buffers. */ 682 return txq->elts_tail; 683 } 684 685 /** 686 * DPDK callback for TX. 687 * 688 * @param dpdk_txq 689 * Generic pointer to TX queue structure. 690 * @param[in] pkts 691 * Packets to transmit. 692 * @param pkts_n 693 * Number of packets in array. 694 * 695 * @return 696 * Number of packets successfully transmitted (<= pkts_n). 697 */ 698 uint16_t 699 mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 700 { 701 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 702 uint16_t elts_head = txq->elts_head; 703 const uint16_t elts_n = 1 << txq->elts_n; 704 const uint16_t elts_m = elts_n - 1; 705 unsigned int i = 0; 706 unsigned int j = 0; 707 unsigned int k = 0; 708 uint16_t max_elts; 709 uint16_t max_wqe; 710 unsigned int comp; 711 volatile struct mlx5_wqe_ctrl *last_wqe = NULL; 712 unsigned int segs_n = 0; 713 const unsigned int max_inline = txq->max_inline; 714 uint64_t addr_64; 715 716 if (unlikely(!pkts_n)) 717 return 0; 718 /* Prefetch first packet cacheline. */ 719 rte_prefetch0(*pkts); 720 /* Start processing. */ 721 mlx5_tx_complete(txq); 722 max_elts = (elts_n - (elts_head - txq->elts_tail)); 723 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 724 if (unlikely(!max_wqe)) 725 return 0; 726 do { 727 struct rte_mbuf *buf = *pkts; /* First_seg. */ 728 uint8_t *raw; 729 volatile struct mlx5_wqe_v *wqe = NULL; 730 volatile rte_v128u32_t *dseg = NULL; 731 uint32_t length; 732 unsigned int ds = 0; 733 unsigned int sg = 0; /* counter of additional segs attached. */ 734 uintptr_t addr; 735 uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE + 2; 736 uint16_t tso_header_sz = 0; 737 uint16_t ehdr; 738 uint8_t cs_flags; 739 uint8_t tso = txq->tso_en && (buf->ol_flags & PKT_TX_TCP_SEG); 740 uint32_t swp_offsets = 0; 741 uint8_t swp_types = 0; 742 rte_be32_t metadata; 743 uint16_t tso_segsz = 0; 744 #ifdef MLX5_PMD_SOFT_COUNTERS 745 uint32_t total_length = 0; 746 #endif 747 int ret; 748 749 segs_n = buf->nb_segs; 750 /* 751 * Make sure there is enough room to store this packet and 752 * that one ring entry remains unused. 753 */ 754 assert(segs_n); 755 if (max_elts < segs_n) 756 break; 757 max_elts -= segs_n; 758 sg = --segs_n; 759 if (unlikely(--max_wqe == 0)) 760 break; 761 wqe = (volatile struct mlx5_wqe_v *) 762 tx_mlx5_wqe(txq, txq->wqe_ci); 763 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 764 if (pkts_n - i > 1) 765 rte_prefetch0(*(pkts + 1)); 766 addr = rte_pktmbuf_mtod(buf, uintptr_t); 767 length = DATA_LEN(buf); 768 ehdr = (((uint8_t *)addr)[1] << 8) | 769 ((uint8_t *)addr)[0]; 770 #ifdef MLX5_PMD_SOFT_COUNTERS 771 total_length = length; 772 #endif 773 if (length < (MLX5_WQE_DWORD_SIZE + 2)) { 774 txq->stats.oerrors++; 775 break; 776 } 777 /* Update element. */ 778 (*txq->elts)[elts_head & elts_m] = buf; 779 /* Prefetch next buffer data. */ 780 if (pkts_n - i > 1) 781 rte_prefetch0( 782 rte_pktmbuf_mtod(*(pkts + 1), volatile void *)); 783 cs_flags = txq_ol_cksum_to_cs(buf); 784 txq_mbuf_to_swp(txq, buf, (uint8_t *)&swp_offsets, &swp_types); 785 raw = ((uint8_t *)(uintptr_t)wqe) + 2 * MLX5_WQE_DWORD_SIZE; 786 /* Copy metadata from mbuf if valid */ 787 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata : 788 0; 789 /* Replace the Ethernet type by the VLAN if necessary. */ 790 if (buf->ol_flags & PKT_TX_VLAN_PKT) { 791 uint32_t vlan = rte_cpu_to_be_32(0x81000000 | 792 buf->vlan_tci); 793 unsigned int len = 2 * RTE_ETHER_ADDR_LEN - 2; 794 795 addr += 2; 796 length -= 2; 797 /* Copy Destination and source mac address. */ 798 memcpy((uint8_t *)raw, ((uint8_t *)addr), len); 799 /* Copy VLAN. */ 800 memcpy((uint8_t *)raw + len, &vlan, sizeof(vlan)); 801 /* Copy missing two bytes to end the DSeg. */ 802 memcpy((uint8_t *)raw + len + sizeof(vlan), 803 ((uint8_t *)addr) + len, 2); 804 addr += len + 2; 805 length -= (len + 2); 806 } else { 807 memcpy((uint8_t *)raw, ((uint8_t *)addr) + 2, 808 MLX5_WQE_DWORD_SIZE); 809 length -= pkt_inline_sz; 810 addr += pkt_inline_sz; 811 } 812 raw += MLX5_WQE_DWORD_SIZE; 813 if (tso) { 814 ret = inline_tso(txq, buf, &length, 815 &addr, &pkt_inline_sz, 816 &raw, &max_wqe, 817 &tso_segsz, &tso_header_sz); 818 if (ret == -EINVAL) { 819 break; 820 } else if (ret == -EAGAIN) { 821 /* NOP WQE. */ 822 wqe->ctrl = (rte_v128u32_t){ 823 rte_cpu_to_be_32(txq->wqe_ci << 8), 824 rte_cpu_to_be_32(txq->qp_num_8s | 1), 825 rte_cpu_to_be_32 826 (MLX5_COMP_ONLY_FIRST_ERR << 827 MLX5_COMP_MODE_OFFSET), 828 0, 829 }; 830 ds = 1; 831 #ifdef MLX5_PMD_SOFT_COUNTERS 832 total_length = 0; 833 #endif 834 k++; 835 goto next_wqe; 836 } 837 } 838 /* Inline if enough room. */ 839 if (max_inline || tso) { 840 uint32_t inl = 0; 841 uintptr_t end = (uintptr_t) 842 (((uintptr_t)txq->wqes) + 843 (1 << txq->wqe_n) * MLX5_WQE_SIZE); 844 unsigned int inline_room = max_inline * 845 RTE_CACHE_LINE_SIZE - 846 (pkt_inline_sz - 2) - 847 !!tso * sizeof(inl); 848 uintptr_t addr_end; 849 unsigned int copy_b; 850 851 pkt_inline: 852 addr_end = RTE_ALIGN_FLOOR(addr + inline_room, 853 RTE_CACHE_LINE_SIZE); 854 copy_b = (addr_end > addr) ? 855 RTE_MIN((addr_end - addr), length) : 0; 856 if (copy_b && ((end - (uintptr_t)raw) > 857 (copy_b + sizeof(inl)))) { 858 /* 859 * One Dseg remains in the current WQE. To 860 * keep the computation positive, it is 861 * removed after the bytes to Dseg conversion. 862 */ 863 uint16_t n = (MLX5_WQE_DS(copy_b) - 1 + 3) / 4; 864 865 if (unlikely(max_wqe < n)) 866 break; 867 max_wqe -= n; 868 if (tso) { 869 assert(inl == 0); 870 inl = rte_cpu_to_be_32(copy_b | 871 MLX5_INLINE_SEG); 872 rte_memcpy((void *)raw, 873 (void *)&inl, sizeof(inl)); 874 raw += sizeof(inl); 875 pkt_inline_sz += sizeof(inl); 876 } 877 rte_memcpy((void *)raw, (void *)addr, copy_b); 878 addr += copy_b; 879 length -= copy_b; 880 pkt_inline_sz += copy_b; 881 } 882 /* 883 * 2 DWORDs consumed by the WQE header + ETH segment + 884 * the size of the inline part of the packet. 885 */ 886 ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2); 887 if (length > 0) { 888 if (ds % (MLX5_WQE_SIZE / 889 MLX5_WQE_DWORD_SIZE) == 0) { 890 if (unlikely(--max_wqe == 0)) 891 break; 892 dseg = (volatile rte_v128u32_t *) 893 tx_mlx5_wqe(txq, txq->wqe_ci + 894 ds / 4); 895 } else { 896 dseg = (volatile rte_v128u32_t *) 897 ((uintptr_t)wqe + 898 (ds * MLX5_WQE_DWORD_SIZE)); 899 } 900 goto use_dseg; 901 } else if (!segs_n) { 902 goto next_pkt; 903 } else { 904 /* 905 * Further inline the next segment only for 906 * non-TSO packets. 907 */ 908 if (!tso) { 909 raw += copy_b; 910 inline_room -= copy_b; 911 } else { 912 inline_room = 0; 913 } 914 /* Move to the next segment. */ 915 --segs_n; 916 buf = buf->next; 917 assert(buf); 918 addr = rte_pktmbuf_mtod(buf, uintptr_t); 919 length = DATA_LEN(buf); 920 #ifdef MLX5_PMD_SOFT_COUNTERS 921 total_length += length; 922 #endif 923 (*txq->elts)[++elts_head & elts_m] = buf; 924 goto pkt_inline; 925 } 926 } else { 927 /* 928 * No inline has been done in the packet, only the 929 * Ethernet Header as been stored. 930 */ 931 dseg = (volatile rte_v128u32_t *) 932 ((uintptr_t)wqe + (3 * MLX5_WQE_DWORD_SIZE)); 933 ds = 3; 934 use_dseg: 935 /* Add the remaining packet as a simple ds. */ 936 addr_64 = rte_cpu_to_be_64(addr); 937 *dseg = (rte_v128u32_t){ 938 rte_cpu_to_be_32(length), 939 mlx5_tx_mb2mr(txq, buf), 940 addr_64, 941 addr_64 >> 32, 942 }; 943 ++ds; 944 if (!segs_n) 945 goto next_pkt; 946 } 947 next_seg: 948 assert(buf); 949 assert(ds); 950 assert(wqe); 951 /* 952 * Spill on next WQE when the current one does not have 953 * enough room left. Size of WQE must a be a multiple 954 * of data segment size. 955 */ 956 assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE)); 957 if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE))) { 958 if (unlikely(--max_wqe == 0)) 959 break; 960 dseg = (volatile rte_v128u32_t *) 961 tx_mlx5_wqe(txq, txq->wqe_ci + ds / 4); 962 rte_prefetch0(tx_mlx5_wqe(txq, 963 txq->wqe_ci + ds / 4 + 1)); 964 } else { 965 ++dseg; 966 } 967 ++ds; 968 buf = buf->next; 969 assert(buf); 970 length = DATA_LEN(buf); 971 #ifdef MLX5_PMD_SOFT_COUNTERS 972 total_length += length; 973 #endif 974 /* Store segment information. */ 975 addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, uintptr_t)); 976 *dseg = (rte_v128u32_t){ 977 rte_cpu_to_be_32(length), 978 mlx5_tx_mb2mr(txq, buf), 979 addr_64, 980 addr_64 >> 32, 981 }; 982 (*txq->elts)[++elts_head & elts_m] = buf; 983 if (--segs_n) 984 goto next_seg; 985 next_pkt: 986 if (ds > MLX5_DSEG_MAX) { 987 txq->stats.oerrors++; 988 break; 989 } 990 ++elts_head; 991 ++pkts; 992 ++i; 993 j += sg; 994 /* Initialize known and common part of the WQE structure. */ 995 if (tso) { 996 wqe->ctrl = (rte_v128u32_t){ 997 rte_cpu_to_be_32((txq->wqe_ci << 8) | 998 MLX5_OPCODE_TSO), 999 rte_cpu_to_be_32(txq->qp_num_8s | ds), 1000 rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << 1001 MLX5_COMP_MODE_OFFSET), 1002 0, 1003 }; 1004 wqe->eseg = (rte_v128u32_t){ 1005 swp_offsets, 1006 cs_flags | (swp_types << 8) | 1007 (rte_cpu_to_be_16(tso_segsz) << 16), 1008 metadata, 1009 (ehdr << 16) | rte_cpu_to_be_16(tso_header_sz), 1010 }; 1011 } else { 1012 wqe->ctrl = (rte_v128u32_t){ 1013 rte_cpu_to_be_32((txq->wqe_ci << 8) | 1014 MLX5_OPCODE_SEND), 1015 rte_cpu_to_be_32(txq->qp_num_8s | ds), 1016 rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << 1017 MLX5_COMP_MODE_OFFSET), 1018 0, 1019 }; 1020 wqe->eseg = (rte_v128u32_t){ 1021 swp_offsets, 1022 cs_flags | (swp_types << 8), 1023 metadata, 1024 (ehdr << 16) | rte_cpu_to_be_16(pkt_inline_sz), 1025 }; 1026 } 1027 next_wqe: 1028 txq->wqe_ci += (ds + 3) / 4; 1029 /* Save the last successful WQE for completion request */ 1030 last_wqe = (volatile struct mlx5_wqe_ctrl *)wqe; 1031 #ifdef MLX5_PMD_SOFT_COUNTERS 1032 /* Increment sent bytes counter. */ 1033 txq->stats.obytes += total_length; 1034 #endif 1035 } while (i < pkts_n); 1036 /* Take a shortcut if nothing must be sent. */ 1037 if (unlikely((i + k) == 0)) 1038 return 0; 1039 txq->elts_head += (i + j); 1040 /* Check whether completion threshold has been reached. */ 1041 comp = txq->elts_comp + i + j + k; 1042 if (comp >= MLX5_TX_COMP_THRESH) { 1043 /* A CQE slot must always be available. */ 1044 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); 1045 /* Request completion on last WQE. */ 1046 last_wqe->ctrl2 = rte_cpu_to_be_32(MLX5_COMP_ALWAYS << 1047 MLX5_COMP_MODE_OFFSET); 1048 /* Save elts_head in unused "immediate" field of WQE. */ 1049 last_wqe->ctrl3 = txq->elts_head; 1050 txq->elts_comp = 0; 1051 } else { 1052 txq->elts_comp = comp; 1053 } 1054 #ifdef MLX5_PMD_SOFT_COUNTERS 1055 /* Increment sent packets counter. */ 1056 txq->stats.opackets += i; 1057 #endif 1058 /* Ring QP doorbell. */ 1059 mlx5_tx_dbrec(txq, (volatile struct mlx5_wqe *)last_wqe); 1060 return i; 1061 } 1062 1063 /** 1064 * Open a MPW session. 1065 * 1066 * @param txq 1067 * Pointer to TX queue structure. 1068 * @param mpw 1069 * Pointer to MPW session structure. 1070 * @param length 1071 * Packet length. 1072 */ 1073 static inline void 1074 mlx5_mpw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, uint32_t length) 1075 { 1076 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1077 volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] = 1078 (volatile struct mlx5_wqe_data_seg (*)[]) 1079 tx_mlx5_wqe(txq, idx + 1); 1080 1081 mpw->state = MLX5_MPW_STATE_OPENED; 1082 mpw->pkts_n = 0; 1083 mpw->len = length; 1084 mpw->total_len = 0; 1085 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1086 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 1087 mpw->wqe->eseg.inline_hdr_sz = 0; 1088 mpw->wqe->eseg.rsvd0 = 0; 1089 mpw->wqe->eseg.rsvd1 = 0; 1090 mpw->wqe->eseg.flow_table_metadata = 0; 1091 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 1092 (txq->wqe_ci << 8) | 1093 MLX5_OPCODE_TSO); 1094 mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << 1095 MLX5_COMP_MODE_OFFSET); 1096 mpw->wqe->ctrl[3] = 0; 1097 mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *) 1098 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 1099 mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *) 1100 (((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE)); 1101 mpw->data.dseg[2] = &(*dseg)[0]; 1102 mpw->data.dseg[3] = &(*dseg)[1]; 1103 mpw->data.dseg[4] = &(*dseg)[2]; 1104 } 1105 1106 /** 1107 * Close a MPW session. 1108 * 1109 * @param txq 1110 * Pointer to TX queue structure. 1111 * @param mpw 1112 * Pointer to MPW session structure. 1113 */ 1114 static inline void 1115 mlx5_mpw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 1116 { 1117 unsigned int num = mpw->pkts_n; 1118 1119 /* 1120 * Store size in multiple of 16 bytes. Control and Ethernet segments 1121 * count as 2. 1122 */ 1123 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | (2 + num)); 1124 mpw->state = MLX5_MPW_STATE_CLOSED; 1125 if (num < 3) 1126 ++txq->wqe_ci; 1127 else 1128 txq->wqe_ci += 2; 1129 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 1130 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 1131 } 1132 1133 /** 1134 * DPDK callback for TX with MPW support. 1135 * 1136 * @param dpdk_txq 1137 * Generic pointer to TX queue structure. 1138 * @param[in] pkts 1139 * Packets to transmit. 1140 * @param pkts_n 1141 * Number of packets in array. 1142 * 1143 * @return 1144 * Number of packets successfully transmitted (<= pkts_n). 1145 */ 1146 uint16_t 1147 mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1148 { 1149 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1150 uint16_t elts_head = txq->elts_head; 1151 const uint16_t elts_n = 1 << txq->elts_n; 1152 const uint16_t elts_m = elts_n - 1; 1153 unsigned int i = 0; 1154 unsigned int j = 0; 1155 uint16_t max_elts; 1156 uint16_t max_wqe; 1157 unsigned int comp; 1158 struct mlx5_mpw mpw = { 1159 .state = MLX5_MPW_STATE_CLOSED, 1160 }; 1161 1162 if (unlikely(!pkts_n)) 1163 return 0; 1164 /* Prefetch first packet cacheline. */ 1165 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 1166 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 1167 /* Start processing. */ 1168 mlx5_tx_complete(txq); 1169 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1170 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1171 if (unlikely(!max_wqe)) 1172 return 0; 1173 do { 1174 struct rte_mbuf *buf = *(pkts++); 1175 uint32_t length; 1176 unsigned int segs_n = buf->nb_segs; 1177 uint32_t cs_flags; 1178 rte_be32_t metadata; 1179 1180 /* 1181 * Make sure there is enough room to store this packet and 1182 * that one ring entry remains unused. 1183 */ 1184 assert(segs_n); 1185 if (max_elts < segs_n) 1186 break; 1187 /* Do not bother with large packets MPW cannot handle. */ 1188 if (segs_n > MLX5_MPW_DSEG_MAX) { 1189 txq->stats.oerrors++; 1190 break; 1191 } 1192 max_elts -= segs_n; 1193 --pkts_n; 1194 cs_flags = txq_ol_cksum_to_cs(buf); 1195 /* Copy metadata from mbuf if valid */ 1196 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata : 1197 0; 1198 /* Retrieve packet information. */ 1199 length = PKT_LEN(buf); 1200 assert(length); 1201 /* Start new session if packet differs. */ 1202 if ((mpw.state == MLX5_MPW_STATE_OPENED) && 1203 ((mpw.len != length) || 1204 (segs_n != 1) || 1205 (mpw.wqe->eseg.flow_table_metadata != metadata) || 1206 (mpw.wqe->eseg.cs_flags != cs_flags))) 1207 mlx5_mpw_close(txq, &mpw); 1208 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 1209 /* 1210 * Multi-Packet WQE consumes at most two WQE. 1211 * mlx5_mpw_new() expects to be able to use such 1212 * resources. 1213 */ 1214 if (unlikely(max_wqe < 2)) 1215 break; 1216 max_wqe -= 2; 1217 mlx5_mpw_new(txq, &mpw, length); 1218 mpw.wqe->eseg.cs_flags = cs_flags; 1219 mpw.wqe->eseg.flow_table_metadata = metadata; 1220 } 1221 /* Multi-segment packets must be alone in their MPW. */ 1222 assert((segs_n == 1) || (mpw.pkts_n == 0)); 1223 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1224 length = 0; 1225 #endif 1226 do { 1227 volatile struct mlx5_wqe_data_seg *dseg; 1228 uintptr_t addr; 1229 1230 assert(buf); 1231 (*txq->elts)[elts_head++ & elts_m] = buf; 1232 dseg = mpw.data.dseg[mpw.pkts_n]; 1233 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1234 *dseg = (struct mlx5_wqe_data_seg){ 1235 .byte_count = rte_cpu_to_be_32(DATA_LEN(buf)), 1236 .lkey = mlx5_tx_mb2mr(txq, buf), 1237 .addr = rte_cpu_to_be_64(addr), 1238 }; 1239 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1240 length += DATA_LEN(buf); 1241 #endif 1242 buf = buf->next; 1243 ++mpw.pkts_n; 1244 ++j; 1245 } while (--segs_n); 1246 assert(length == mpw.len); 1247 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 1248 mlx5_mpw_close(txq, &mpw); 1249 #ifdef MLX5_PMD_SOFT_COUNTERS 1250 /* Increment sent bytes counter. */ 1251 txq->stats.obytes += length; 1252 #endif 1253 ++i; 1254 } while (pkts_n); 1255 /* Take a shortcut if nothing must be sent. */ 1256 if (unlikely(i == 0)) 1257 return 0; 1258 /* Check whether completion threshold has been reached. */ 1259 /* "j" includes both packets and segments. */ 1260 comp = txq->elts_comp + j; 1261 if (comp >= MLX5_TX_COMP_THRESH) { 1262 volatile struct mlx5_wqe *wqe = mpw.wqe; 1263 1264 /* A CQE slot must always be available. */ 1265 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); 1266 /* Request completion on last WQE. */ 1267 wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS << 1268 MLX5_COMP_MODE_OFFSET); 1269 /* Save elts_head in unused "immediate" field of WQE. */ 1270 wqe->ctrl[3] = elts_head; 1271 txq->elts_comp = 0; 1272 } else { 1273 txq->elts_comp = comp; 1274 } 1275 #ifdef MLX5_PMD_SOFT_COUNTERS 1276 /* Increment sent packets counter. */ 1277 txq->stats.opackets += i; 1278 #endif 1279 /* Ring QP doorbell. */ 1280 if (mpw.state == MLX5_MPW_STATE_OPENED) 1281 mlx5_mpw_close(txq, &mpw); 1282 mlx5_tx_dbrec(txq, mpw.wqe); 1283 txq->elts_head = elts_head; 1284 return i; 1285 } 1286 1287 /** 1288 * Open a MPW inline session. 1289 * 1290 * @param txq 1291 * Pointer to TX queue structure. 1292 * @param mpw 1293 * Pointer to MPW session structure. 1294 * @param length 1295 * Packet length. 1296 */ 1297 static inline void 1298 mlx5_mpw_inline_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, 1299 uint32_t length) 1300 { 1301 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1302 struct mlx5_wqe_inl_small *inl; 1303 1304 mpw->state = MLX5_MPW_INL_STATE_OPENED; 1305 mpw->pkts_n = 0; 1306 mpw->len = length; 1307 mpw->total_len = 0; 1308 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1309 mpw->wqe->ctrl[0] = rte_cpu_to_be_32((MLX5_OPC_MOD_MPW << 24) | 1310 (txq->wqe_ci << 8) | 1311 MLX5_OPCODE_TSO); 1312 mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << 1313 MLX5_COMP_MODE_OFFSET); 1314 mpw->wqe->ctrl[3] = 0; 1315 mpw->wqe->eseg.mss = rte_cpu_to_be_16(length); 1316 mpw->wqe->eseg.inline_hdr_sz = 0; 1317 mpw->wqe->eseg.cs_flags = 0; 1318 mpw->wqe->eseg.rsvd0 = 0; 1319 mpw->wqe->eseg.rsvd1 = 0; 1320 mpw->wqe->eseg.flow_table_metadata = 0; 1321 inl = (struct mlx5_wqe_inl_small *) 1322 (((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE); 1323 mpw->data.raw = (uint8_t *)&inl->raw; 1324 } 1325 1326 /** 1327 * Close a MPW inline session. 1328 * 1329 * @param txq 1330 * Pointer to TX queue structure. 1331 * @param mpw 1332 * Pointer to MPW session structure. 1333 */ 1334 static inline void 1335 mlx5_mpw_inline_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 1336 { 1337 unsigned int size; 1338 struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *) 1339 (((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE)); 1340 1341 size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len; 1342 /* 1343 * Store size in multiple of 16 bytes. Control and Ethernet segments 1344 * count as 2. 1345 */ 1346 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 1347 MLX5_WQE_DS(size)); 1348 mpw->state = MLX5_MPW_STATE_CLOSED; 1349 inl->byte_cnt = rte_cpu_to_be_32(mpw->total_len | MLX5_INLINE_SEG); 1350 txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1351 } 1352 1353 /** 1354 * DPDK callback for TX with MPW inline support. 1355 * 1356 * @param dpdk_txq 1357 * Generic pointer to TX queue structure. 1358 * @param[in] pkts 1359 * Packets to transmit. 1360 * @param pkts_n 1361 * Number of packets in array. 1362 * 1363 * @return 1364 * Number of packets successfully transmitted (<= pkts_n). 1365 */ 1366 uint16_t 1367 mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, 1368 uint16_t pkts_n) 1369 { 1370 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1371 uint16_t elts_head = txq->elts_head; 1372 const uint16_t elts_n = 1 << txq->elts_n; 1373 const uint16_t elts_m = elts_n - 1; 1374 unsigned int i = 0; 1375 unsigned int j = 0; 1376 uint16_t max_elts; 1377 uint16_t max_wqe; 1378 unsigned int comp; 1379 unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE; 1380 struct mlx5_mpw mpw = { 1381 .state = MLX5_MPW_STATE_CLOSED, 1382 }; 1383 /* 1384 * Compute the maximum number of WQE which can be consumed by inline 1385 * code. 1386 * - 2 DSEG for: 1387 * - 1 control segment, 1388 * - 1 Ethernet segment, 1389 * - N Dseg from the inline request. 1390 */ 1391 const unsigned int wqe_inl_n = 1392 ((2 * MLX5_WQE_DWORD_SIZE + 1393 txq->max_inline * RTE_CACHE_LINE_SIZE) + 1394 RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; 1395 1396 if (unlikely(!pkts_n)) 1397 return 0; 1398 /* Prefetch first packet cacheline. */ 1399 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci)); 1400 rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1)); 1401 /* Start processing. */ 1402 mlx5_tx_complete(txq); 1403 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1404 do { 1405 struct rte_mbuf *buf = *(pkts++); 1406 uintptr_t addr; 1407 uint32_t length; 1408 unsigned int segs_n = buf->nb_segs; 1409 uint8_t cs_flags; 1410 rte_be32_t metadata; 1411 1412 /* 1413 * Make sure there is enough room to store this packet and 1414 * that one ring entry remains unused. 1415 */ 1416 assert(segs_n); 1417 if (max_elts < segs_n) 1418 break; 1419 /* Do not bother with large packets MPW cannot handle. */ 1420 if (segs_n > MLX5_MPW_DSEG_MAX) { 1421 txq->stats.oerrors++; 1422 break; 1423 } 1424 max_elts -= segs_n; 1425 --pkts_n; 1426 /* 1427 * Compute max_wqe in case less WQE were consumed in previous 1428 * iteration. 1429 */ 1430 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1431 cs_flags = txq_ol_cksum_to_cs(buf); 1432 /* Copy metadata from mbuf if valid */ 1433 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata : 1434 0; 1435 /* Retrieve packet information. */ 1436 length = PKT_LEN(buf); 1437 /* Start new session if packet differs. */ 1438 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1439 if ((mpw.len != length) || 1440 (segs_n != 1) || 1441 (mpw.wqe->eseg.flow_table_metadata != metadata) || 1442 (mpw.wqe->eseg.cs_flags != cs_flags)) 1443 mlx5_mpw_close(txq, &mpw); 1444 } else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) { 1445 if ((mpw.len != length) || 1446 (segs_n != 1) || 1447 (length > inline_room) || 1448 (mpw.wqe->eseg.flow_table_metadata != metadata) || 1449 (mpw.wqe->eseg.cs_flags != cs_flags)) { 1450 mlx5_mpw_inline_close(txq, &mpw); 1451 inline_room = 1452 txq->max_inline * RTE_CACHE_LINE_SIZE; 1453 } 1454 } 1455 if (mpw.state == MLX5_MPW_STATE_CLOSED) { 1456 if ((segs_n != 1) || 1457 (length > inline_room)) { 1458 /* 1459 * Multi-Packet WQE consumes at most two WQE. 1460 * mlx5_mpw_new() expects to be able to use 1461 * such resources. 1462 */ 1463 if (unlikely(max_wqe < 2)) 1464 break; 1465 max_wqe -= 2; 1466 mlx5_mpw_new(txq, &mpw, length); 1467 mpw.wqe->eseg.cs_flags = cs_flags; 1468 mpw.wqe->eseg.flow_table_metadata = metadata; 1469 } else { 1470 if (unlikely(max_wqe < wqe_inl_n)) 1471 break; 1472 max_wqe -= wqe_inl_n; 1473 mlx5_mpw_inline_new(txq, &mpw, length); 1474 mpw.wqe->eseg.cs_flags = cs_flags; 1475 mpw.wqe->eseg.flow_table_metadata = metadata; 1476 } 1477 } 1478 /* Multi-segment packets must be alone in their MPW. */ 1479 assert((segs_n == 1) || (mpw.pkts_n == 0)); 1480 if (mpw.state == MLX5_MPW_STATE_OPENED) { 1481 assert(inline_room == 1482 txq->max_inline * RTE_CACHE_LINE_SIZE); 1483 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1484 length = 0; 1485 #endif 1486 do { 1487 volatile struct mlx5_wqe_data_seg *dseg; 1488 1489 assert(buf); 1490 (*txq->elts)[elts_head++ & elts_m] = buf; 1491 dseg = mpw.data.dseg[mpw.pkts_n]; 1492 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1493 *dseg = (struct mlx5_wqe_data_seg){ 1494 .byte_count = 1495 rte_cpu_to_be_32(DATA_LEN(buf)), 1496 .lkey = mlx5_tx_mb2mr(txq, buf), 1497 .addr = rte_cpu_to_be_64(addr), 1498 }; 1499 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG) 1500 length += DATA_LEN(buf); 1501 #endif 1502 buf = buf->next; 1503 ++mpw.pkts_n; 1504 ++j; 1505 } while (--segs_n); 1506 assert(length == mpw.len); 1507 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) 1508 mlx5_mpw_close(txq, &mpw); 1509 } else { 1510 unsigned int max; 1511 1512 assert(mpw.state == MLX5_MPW_INL_STATE_OPENED); 1513 assert(length <= inline_room); 1514 assert(length == DATA_LEN(buf)); 1515 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1516 (*txq->elts)[elts_head++ & elts_m] = buf; 1517 /* Maximum number of bytes before wrapping. */ 1518 max = ((((uintptr_t)(txq->wqes)) + 1519 (1 << txq->wqe_n) * 1520 MLX5_WQE_SIZE) - 1521 (uintptr_t)mpw.data.raw); 1522 if (length > max) { 1523 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1524 (void *)addr, 1525 max); 1526 mpw.data.raw = (volatile void *)txq->wqes; 1527 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1528 (void *)(addr + max), 1529 length - max); 1530 mpw.data.raw += length - max; 1531 } else { 1532 rte_memcpy((void *)(uintptr_t)mpw.data.raw, 1533 (void *)addr, 1534 length); 1535 1536 if (length == max) 1537 mpw.data.raw = 1538 (volatile void *)txq->wqes; 1539 else 1540 mpw.data.raw += length; 1541 } 1542 ++mpw.pkts_n; 1543 mpw.total_len += length; 1544 ++j; 1545 if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) { 1546 mlx5_mpw_inline_close(txq, &mpw); 1547 inline_room = 1548 txq->max_inline * RTE_CACHE_LINE_SIZE; 1549 } else { 1550 inline_room -= length; 1551 } 1552 } 1553 #ifdef MLX5_PMD_SOFT_COUNTERS 1554 /* Increment sent bytes counter. */ 1555 txq->stats.obytes += length; 1556 #endif 1557 ++i; 1558 } while (pkts_n); 1559 /* Take a shortcut if nothing must be sent. */ 1560 if (unlikely(i == 0)) 1561 return 0; 1562 /* Check whether completion threshold has been reached. */ 1563 /* "j" includes both packets and segments. */ 1564 comp = txq->elts_comp + j; 1565 if (comp >= MLX5_TX_COMP_THRESH) { 1566 volatile struct mlx5_wqe *wqe = mpw.wqe; 1567 1568 /* A CQE slot must always be available. */ 1569 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); 1570 /* Request completion on last WQE. */ 1571 wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS << 1572 MLX5_COMP_MODE_OFFSET); 1573 /* Save elts_head in unused "immediate" field of WQE. */ 1574 wqe->ctrl[3] = elts_head; 1575 txq->elts_comp = 0; 1576 } else { 1577 txq->elts_comp = comp; 1578 } 1579 #ifdef MLX5_PMD_SOFT_COUNTERS 1580 /* Increment sent packets counter. */ 1581 txq->stats.opackets += i; 1582 #endif 1583 /* Ring QP doorbell. */ 1584 if (mpw.state == MLX5_MPW_INL_STATE_OPENED) 1585 mlx5_mpw_inline_close(txq, &mpw); 1586 else if (mpw.state == MLX5_MPW_STATE_OPENED) 1587 mlx5_mpw_close(txq, &mpw); 1588 mlx5_tx_dbrec(txq, mpw.wqe); 1589 txq->elts_head = elts_head; 1590 return i; 1591 } 1592 1593 /** 1594 * Open an Enhanced MPW session. 1595 * 1596 * @param txq 1597 * Pointer to TX queue structure. 1598 * @param mpw 1599 * Pointer to MPW session structure. 1600 * @param length 1601 * Packet length. 1602 */ 1603 static inline void 1604 mlx5_empw_new(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw, int padding) 1605 { 1606 uint16_t idx = txq->wqe_ci & ((1 << txq->wqe_n) - 1); 1607 1608 mpw->state = MLX5_MPW_ENHANCED_STATE_OPENED; 1609 mpw->pkts_n = 0; 1610 mpw->total_len = sizeof(struct mlx5_wqe); 1611 mpw->wqe = (volatile struct mlx5_wqe *)tx_mlx5_wqe(txq, idx); 1612 mpw->wqe->ctrl[0] = 1613 rte_cpu_to_be_32((MLX5_OPC_MOD_ENHANCED_MPSW << 24) | 1614 (txq->wqe_ci << 8) | 1615 MLX5_OPCODE_ENHANCED_MPSW); 1616 mpw->wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ONLY_FIRST_ERR << 1617 MLX5_COMP_MODE_OFFSET); 1618 mpw->wqe->ctrl[3] = 0; 1619 memset((void *)(uintptr_t)&mpw->wqe->eseg, 0, MLX5_WQE_DWORD_SIZE); 1620 if (unlikely(padding)) { 1621 uintptr_t addr = (uintptr_t)(mpw->wqe + 1); 1622 1623 /* Pad the first 2 DWORDs with zero-length inline header. */ 1624 *(volatile uint32_t *)addr = rte_cpu_to_be_32(MLX5_INLINE_SEG); 1625 *(volatile uint32_t *)(addr + MLX5_WQE_DWORD_SIZE) = 1626 rte_cpu_to_be_32(MLX5_INLINE_SEG); 1627 mpw->total_len += 2 * MLX5_WQE_DWORD_SIZE; 1628 /* Start from the next WQEBB. */ 1629 mpw->data.raw = (volatile void *)(tx_mlx5_wqe(txq, idx + 1)); 1630 } else { 1631 mpw->data.raw = (volatile void *)(mpw->wqe + 1); 1632 } 1633 } 1634 1635 /** 1636 * Close an Enhanced MPW session. 1637 * 1638 * @param txq 1639 * Pointer to TX queue structure. 1640 * @param mpw 1641 * Pointer to MPW session structure. 1642 * 1643 * @return 1644 * Number of consumed WQEs. 1645 */ 1646 static inline uint16_t 1647 mlx5_empw_close(struct mlx5_txq_data *txq, struct mlx5_mpw *mpw) 1648 { 1649 uint16_t ret; 1650 1651 /* Store size in multiple of 16 bytes. Control and Ethernet segments 1652 * count as 2. 1653 */ 1654 mpw->wqe->ctrl[1] = rte_cpu_to_be_32(txq->qp_num_8s | 1655 MLX5_WQE_DS(mpw->total_len)); 1656 mpw->state = MLX5_MPW_STATE_CLOSED; 1657 ret = (mpw->total_len + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE; 1658 txq->wqe_ci += ret; 1659 return ret; 1660 } 1661 1662 /** 1663 * TX with Enhanced MPW support. 1664 * 1665 * @param txq 1666 * Pointer to TX queue structure. 1667 * @param[in] pkts 1668 * Packets to transmit. 1669 * @param pkts_n 1670 * Number of packets in array. 1671 * 1672 * @return 1673 * Number of packets successfully transmitted (<= pkts_n). 1674 */ 1675 static inline uint16_t 1676 txq_burst_empw(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, 1677 uint16_t pkts_n) 1678 { 1679 uint16_t elts_head = txq->elts_head; 1680 const uint16_t elts_n = 1 << txq->elts_n; 1681 const uint16_t elts_m = elts_n - 1; 1682 unsigned int i = 0; 1683 unsigned int j = 0; 1684 uint16_t max_elts; 1685 uint16_t max_wqe; 1686 unsigned int max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE; 1687 unsigned int mpw_room = 0; 1688 unsigned int inl_pad = 0; 1689 uint32_t inl_hdr; 1690 uint64_t addr_64; 1691 struct mlx5_mpw mpw = { 1692 .state = MLX5_MPW_STATE_CLOSED, 1693 }; 1694 1695 if (unlikely(!pkts_n)) 1696 return 0; 1697 /* Start processing. */ 1698 mlx5_tx_complete(txq); 1699 max_elts = (elts_n - (elts_head - txq->elts_tail)); 1700 max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi); 1701 if (unlikely(!max_wqe)) 1702 return 0; 1703 do { 1704 struct rte_mbuf *buf = *(pkts++); 1705 uintptr_t addr; 1706 unsigned int do_inline = 0; /* Whether inline is possible. */ 1707 uint32_t length; 1708 uint8_t cs_flags; 1709 rte_be32_t metadata; 1710 1711 /* Multi-segmented packet is handled in slow-path outside. */ 1712 assert(NB_SEGS(buf) == 1); 1713 /* Make sure there is enough room to store this packet. */ 1714 if (max_elts - j == 0) 1715 break; 1716 cs_flags = txq_ol_cksum_to_cs(buf); 1717 /* Copy metadata from mbuf if valid */ 1718 metadata = buf->ol_flags & PKT_TX_METADATA ? buf->tx_metadata : 1719 0; 1720 /* Retrieve packet information. */ 1721 length = PKT_LEN(buf); 1722 /* Start new session if: 1723 * - multi-segment packet 1724 * - no space left even for a dseg 1725 * - next packet can be inlined with a new WQE 1726 * - cs_flag differs 1727 */ 1728 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) { 1729 if ((inl_pad + sizeof(struct mlx5_wqe_data_seg) > 1730 mpw_room) || 1731 (length <= txq->inline_max_packet_sz && 1732 inl_pad + sizeof(inl_hdr) + length > 1733 mpw_room) || 1734 (mpw.wqe->eseg.flow_table_metadata != metadata) || 1735 (mpw.wqe->eseg.cs_flags != cs_flags)) 1736 max_wqe -= mlx5_empw_close(txq, &mpw); 1737 } 1738 if (unlikely(mpw.state == MLX5_MPW_STATE_CLOSED)) { 1739 /* In Enhanced MPW, inline as much as the budget is 1740 * allowed. The remaining space is to be filled with 1741 * dsegs. If the title WQEBB isn't padded, it will have 1742 * 2 dsegs there. 1743 */ 1744 mpw_room = RTE_MIN(MLX5_WQE_SIZE_MAX, 1745 (max_inline ? max_inline : 1746 pkts_n * MLX5_WQE_DWORD_SIZE) + 1747 MLX5_WQE_SIZE); 1748 if (unlikely(max_wqe * MLX5_WQE_SIZE < mpw_room)) 1749 break; 1750 /* Don't pad the title WQEBB to not waste WQ. */ 1751 mlx5_empw_new(txq, &mpw, 0); 1752 mpw_room -= mpw.total_len; 1753 inl_pad = 0; 1754 do_inline = length <= txq->inline_max_packet_sz && 1755 sizeof(inl_hdr) + length <= mpw_room && 1756 !txq->mpw_hdr_dseg; 1757 mpw.wqe->eseg.cs_flags = cs_flags; 1758 mpw.wqe->eseg.flow_table_metadata = metadata; 1759 } else { 1760 /* Evaluate whether the next packet can be inlined. 1761 * Inlininig is possible when: 1762 * - length is less than configured value 1763 * - length fits for remaining space 1764 * - not required to fill the title WQEBB with dsegs 1765 */ 1766 do_inline = 1767 length <= txq->inline_max_packet_sz && 1768 inl_pad + sizeof(inl_hdr) + length <= 1769 mpw_room && 1770 (!txq->mpw_hdr_dseg || 1771 mpw.total_len >= MLX5_WQE_SIZE); 1772 } 1773 if (max_inline && do_inline) { 1774 /* Inline packet into WQE. */ 1775 unsigned int max; 1776 1777 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1778 assert(length == DATA_LEN(buf)); 1779 inl_hdr = rte_cpu_to_be_32(length | MLX5_INLINE_SEG); 1780 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1781 mpw.data.raw = (volatile void *) 1782 ((uintptr_t)mpw.data.raw + inl_pad); 1783 max = tx_mlx5_wq_tailroom(txq, 1784 (void *)(uintptr_t)mpw.data.raw); 1785 /* Copy inline header. */ 1786 mpw.data.raw = (volatile void *) 1787 mlx5_copy_to_wq( 1788 (void *)(uintptr_t)mpw.data.raw, 1789 &inl_hdr, 1790 sizeof(inl_hdr), 1791 (void *)(uintptr_t)txq->wqes, 1792 max); 1793 max = tx_mlx5_wq_tailroom(txq, 1794 (void *)(uintptr_t)mpw.data.raw); 1795 /* Copy packet data. */ 1796 mpw.data.raw = (volatile void *) 1797 mlx5_copy_to_wq( 1798 (void *)(uintptr_t)mpw.data.raw, 1799 (void *)addr, 1800 length, 1801 (void *)(uintptr_t)txq->wqes, 1802 max); 1803 ++mpw.pkts_n; 1804 mpw.total_len += (inl_pad + sizeof(inl_hdr) + length); 1805 /* No need to get completion as the entire packet is 1806 * copied to WQ. Free the buf right away. 1807 */ 1808 rte_pktmbuf_free_seg(buf); 1809 mpw_room -= (inl_pad + sizeof(inl_hdr) + length); 1810 /* Add pad in the next packet if any. */ 1811 inl_pad = (((uintptr_t)mpw.data.raw + 1812 (MLX5_WQE_DWORD_SIZE - 1)) & 1813 ~(MLX5_WQE_DWORD_SIZE - 1)) - 1814 (uintptr_t)mpw.data.raw; 1815 } else { 1816 /* No inline. Load a dseg of packet pointer. */ 1817 volatile rte_v128u32_t *dseg; 1818 1819 assert(mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED); 1820 assert((inl_pad + sizeof(*dseg)) <= mpw_room); 1821 assert(length == DATA_LEN(buf)); 1822 if (!tx_mlx5_wq_tailroom(txq, 1823 (void *)((uintptr_t)mpw.data.raw 1824 + inl_pad))) 1825 dseg = (volatile void *)txq->wqes; 1826 else 1827 dseg = (volatile void *) 1828 ((uintptr_t)mpw.data.raw + 1829 inl_pad); 1830 (*txq->elts)[elts_head++ & elts_m] = buf; 1831 addr_64 = rte_cpu_to_be_64(rte_pktmbuf_mtod(buf, 1832 uintptr_t)); 1833 *dseg = (rte_v128u32_t) { 1834 rte_cpu_to_be_32(length), 1835 mlx5_tx_mb2mr(txq, buf), 1836 addr_64, 1837 addr_64 >> 32, 1838 }; 1839 mpw.data.raw = (volatile void *)(dseg + 1); 1840 mpw.total_len += (inl_pad + sizeof(*dseg)); 1841 ++j; 1842 ++mpw.pkts_n; 1843 mpw_room -= (inl_pad + sizeof(*dseg)); 1844 inl_pad = 0; 1845 } 1846 #ifdef MLX5_PMD_SOFT_COUNTERS 1847 /* Increment sent bytes counter. */ 1848 txq->stats.obytes += length; 1849 #endif 1850 ++i; 1851 } while (i < pkts_n); 1852 /* Take a shortcut if nothing must be sent. */ 1853 if (unlikely(i == 0)) 1854 return 0; 1855 /* Check whether completion threshold has been reached. */ 1856 if (txq->elts_comp + j >= MLX5_TX_COMP_THRESH || 1857 (uint16_t)(txq->wqe_ci - txq->mpw_comp) >= 1858 (1 << txq->wqe_n) / MLX5_TX_COMP_THRESH_INLINE_DIV) { 1859 volatile struct mlx5_wqe *wqe = mpw.wqe; 1860 1861 /* A CQE slot must always be available. */ 1862 assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci)); 1863 /* Request completion on last WQE. */ 1864 wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS << 1865 MLX5_COMP_MODE_OFFSET); 1866 /* Save elts_head in unused "immediate" field of WQE. */ 1867 wqe->ctrl[3] = elts_head; 1868 txq->elts_comp = 0; 1869 txq->mpw_comp = txq->wqe_ci; 1870 } else { 1871 txq->elts_comp += j; 1872 } 1873 #ifdef MLX5_PMD_SOFT_COUNTERS 1874 /* Increment sent packets counter. */ 1875 txq->stats.opackets += i; 1876 #endif 1877 if (mpw.state == MLX5_MPW_ENHANCED_STATE_OPENED) 1878 mlx5_empw_close(txq, &mpw); 1879 /* Ring QP doorbell. */ 1880 mlx5_tx_dbrec(txq, mpw.wqe); 1881 txq->elts_head = elts_head; 1882 return i; 1883 } 1884 1885 /** 1886 * DPDK callback for TX with Enhanced MPW support. 1887 * 1888 * @param dpdk_txq 1889 * Generic pointer to TX queue structure. 1890 * @param[in] pkts 1891 * Packets to transmit. 1892 * @param pkts_n 1893 * Number of packets in array. 1894 * 1895 * @return 1896 * Number of packets successfully transmitted (<= pkts_n). 1897 */ 1898 uint16_t 1899 mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n) 1900 { 1901 struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq; 1902 uint16_t nb_tx = 0; 1903 1904 while (pkts_n > nb_tx) { 1905 uint16_t n; 1906 uint16_t ret; 1907 1908 n = txq_count_contig_multi_seg(&pkts[nb_tx], pkts_n - nb_tx); 1909 if (n) { 1910 ret = mlx5_tx_burst(dpdk_txq, &pkts[nb_tx], n); 1911 if (!ret) 1912 break; 1913 nb_tx += ret; 1914 } 1915 n = txq_count_contig_single_seg(&pkts[nb_tx], pkts_n - nb_tx); 1916 if (n) { 1917 ret = txq_burst_empw(txq, &pkts[nb_tx], n); 1918 if (!ret) 1919 break; 1920 nb_tx += ret; 1921 } 1922 } 1923 return nb_tx; 1924 } 1925 1926 /** 1927 * Translate RX completion flags to packet type. 1928 * 1929 * @param[in] rxq 1930 * Pointer to RX queue structure. 1931 * @param[in] cqe 1932 * Pointer to CQE. 1933 * 1934 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 1935 * 1936 * @return 1937 * Packet type for struct rte_mbuf. 1938 */ 1939 static inline uint32_t 1940 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 1941 { 1942 uint8_t idx; 1943 uint8_t pinfo = cqe->pkt_info; 1944 uint16_t ptype = cqe->hdr_type_etc; 1945 1946 /* 1947 * The index to the array should have: 1948 * bit[1:0] = l3_hdr_type 1949 * bit[4:2] = l4_hdr_type 1950 * bit[5] = ip_frag 1951 * bit[6] = tunneled 1952 * bit[7] = outer_l3_type 1953 */ 1954 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 1955 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 1956 } 1957 1958 /** 1959 * Initialize Rx WQ and indexes. 1960 * 1961 * @param[in] rxq 1962 * Pointer to RX queue structure. 1963 */ 1964 void 1965 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 1966 { 1967 const unsigned int wqe_n = 1 << rxq->elts_n; 1968 unsigned int i; 1969 1970 for (i = 0; (i != wqe_n); ++i) { 1971 volatile struct mlx5_wqe_data_seg *scat; 1972 uintptr_t addr; 1973 uint32_t byte_count; 1974 1975 if (mlx5_rxq_mprq_enabled(rxq)) { 1976 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 1977 1978 scat = &((volatile struct mlx5_wqe_mprq *) 1979 rxq->wqes)[i].dseg; 1980 addr = (uintptr_t)mlx5_mprq_buf_addr(buf); 1981 byte_count = (1 << rxq->strd_sz_n) * 1982 (1 << rxq->strd_num_n); 1983 } else { 1984 struct rte_mbuf *buf = (*rxq->elts)[i]; 1985 1986 scat = &((volatile struct mlx5_wqe_data_seg *) 1987 rxq->wqes)[i]; 1988 addr = rte_pktmbuf_mtod(buf, uintptr_t); 1989 byte_count = DATA_LEN(buf); 1990 } 1991 /* scat->addr must be able to store a pointer. */ 1992 assert(sizeof(scat->addr) >= sizeof(uintptr_t)); 1993 *scat = (struct mlx5_wqe_data_seg){ 1994 .addr = rte_cpu_to_be_64(addr), 1995 .byte_count = rte_cpu_to_be_32(byte_count), 1996 .lkey = mlx5_rx_addr2mr(rxq, addr), 1997 }; 1998 } 1999 rxq->consumed_strd = 0; 2000 rxq->decompressed = 0; 2001 rxq->rq_pi = 0; 2002 rxq->zip = (struct rxq_zip){ 2003 .ai = 0, 2004 }; 2005 /* Update doorbell counter. */ 2006 rxq->rq_ci = wqe_n >> rxq->sges_n; 2007 rte_cio_wmb(); 2008 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 2009 } 2010 2011 /** 2012 * Modify a Verbs queue state. 2013 * This must be called from the primary process. 2014 * 2015 * @param dev 2016 * Pointer to Ethernet device. 2017 * @param sm 2018 * State modify request parameters. 2019 * 2020 * @return 2021 * 0 in case of success else non-zero value and rte_errno is set. 2022 */ 2023 int 2024 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 2025 const struct mlx5_mp_arg_queue_state_modify *sm) 2026 { 2027 int ret; 2028 struct mlx5_priv *priv = dev->data->dev_private; 2029 2030 if (sm->is_wq) { 2031 struct ibv_wq_attr mod = { 2032 .attr_mask = IBV_WQ_ATTR_STATE, 2033 .wq_state = sm->state, 2034 }; 2035 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 2036 struct mlx5_rxq_ctrl *rxq_ctrl = 2037 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 2038 2039 ret = mlx5_glue->modify_wq(rxq_ctrl->ibv->wq, &mod); 2040 if (ret) { 2041 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s\n", 2042 sm->state, strerror(errno)); 2043 rte_errno = errno; 2044 return ret; 2045 } 2046 } else { 2047 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 2048 struct mlx5_txq_ctrl *txq_ctrl = 2049 container_of(txq, struct mlx5_txq_ctrl, txq); 2050 struct ibv_qp_attr mod = { 2051 .qp_state = IBV_QPS_RESET, 2052 .port_num = (uint8_t)priv->ibv_port, 2053 }; 2054 struct ibv_qp *qp = txq_ctrl->ibv->qp; 2055 2056 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 2057 if (ret) { 2058 DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " 2059 "%s\n", strerror(errno)); 2060 rte_errno = errno; 2061 return ret; 2062 } 2063 mod.qp_state = IBV_QPS_INIT; 2064 ret = mlx5_glue->modify_qp(qp, &mod, 2065 (IBV_QP_STATE | IBV_QP_PORT)); 2066 if (ret) { 2067 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s\n", 2068 strerror(errno)); 2069 rte_errno = errno; 2070 return ret; 2071 } 2072 mod.qp_state = IBV_QPS_RTR; 2073 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 2074 if (ret) { 2075 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s\n", 2076 strerror(errno)); 2077 rte_errno = errno; 2078 return ret; 2079 } 2080 mod.qp_state = IBV_QPS_RTS; 2081 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 2082 if (ret) { 2083 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s\n", 2084 strerror(errno)); 2085 rte_errno = errno; 2086 return ret; 2087 } 2088 } 2089 return 0; 2090 } 2091 2092 /** 2093 * Modify a Verbs queue state. 2094 * 2095 * @param dev 2096 * Pointer to Ethernet device. 2097 * @param sm 2098 * State modify request parameters. 2099 * 2100 * @return 2101 * 0 in case of success else non-zero value. 2102 */ 2103 static int 2104 mlx5_queue_state_modify(struct rte_eth_dev *dev, 2105 struct mlx5_mp_arg_queue_state_modify *sm) 2106 { 2107 int ret = 0; 2108 2109 switch (rte_eal_process_type()) { 2110 case RTE_PROC_PRIMARY: 2111 ret = mlx5_queue_state_modify_primary(dev, sm); 2112 break; 2113 case RTE_PROC_SECONDARY: 2114 ret = mlx5_mp_req_queue_state_modify(dev, sm); 2115 break; 2116 default: 2117 break; 2118 } 2119 return ret; 2120 } 2121 2122 /** 2123 * Handle a Rx error. 2124 * The function inserts the RQ state to reset when the first error CQE is 2125 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 2126 * it moves the RQ state to ready and initializes the RQ. 2127 * Next CQE identification and error counting are in the caller responsibility. 2128 * 2129 * @param[in] rxq 2130 * Pointer to RX queue structure. 2131 * @param[in] mbuf_prepare 2132 * Whether to prepare mbufs for the RQ. 2133 * 2134 * @return 2135 * -1 in case of recovery error, otherwise the CQE status. 2136 */ 2137 int 2138 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare) 2139 { 2140 const uint16_t cqe_n = 1 << rxq->cqe_n; 2141 const uint16_t cqe_mask = cqe_n - 1; 2142 const unsigned int wqe_n = 1 << rxq->elts_n; 2143 struct mlx5_rxq_ctrl *rxq_ctrl = 2144 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 2145 union { 2146 volatile struct mlx5_cqe *cqe; 2147 volatile struct mlx5_err_cqe *err_cqe; 2148 } u = { 2149 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 2150 }; 2151 struct mlx5_mp_arg_queue_state_modify sm; 2152 int ret; 2153 2154 switch (rxq->err_state) { 2155 case MLX5_RXQ_ERR_STATE_NO_ERROR: 2156 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 2157 /* Fall-through */ 2158 case MLX5_RXQ_ERR_STATE_NEED_RESET: 2159 sm.is_wq = 1; 2160 sm.queue_id = rxq->idx; 2161 sm.state = IBV_WQS_RESET; 2162 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 2163 return -1; 2164 if (rxq_ctrl->dump_file_n < 2165 rxq_ctrl->priv->config.max_dump_files_num) { 2166 MKSTR(err_str, "Unexpected CQE error syndrome " 2167 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 2168 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 2169 rxq->cqn, rxq_ctrl->wqn, 2170 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 2171 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 2172 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 2173 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 2174 mlx5_dump_debug_information(name, NULL, err_str, 0); 2175 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 2176 (const void *)((uintptr_t) 2177 rxq->cqes), 2178 sizeof(*u.cqe) * cqe_n); 2179 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 2180 (const void *)((uintptr_t) 2181 rxq->wqes), 2182 16 * wqe_n); 2183 rxq_ctrl->dump_file_n++; 2184 } 2185 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 2186 /* Fall-through */ 2187 case MLX5_RXQ_ERR_STATE_NEED_READY: 2188 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 2189 if (ret == MLX5_CQE_STATUS_HW_OWN) { 2190 rte_cio_wmb(); 2191 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 2192 rte_cio_wmb(); 2193 /* 2194 * The RQ consumer index must be zeroed while moving 2195 * from RESET state to RDY state. 2196 */ 2197 *rxq->rq_db = rte_cpu_to_be_32(0); 2198 rte_cio_wmb(); 2199 sm.is_wq = 1; 2200 sm.queue_id = rxq->idx; 2201 sm.state = IBV_WQS_RDY; 2202 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 2203 &sm)) 2204 return -1; 2205 if (mbuf_prepare) { 2206 const uint16_t q_mask = wqe_n - 1; 2207 uint16_t elt_idx; 2208 struct rte_mbuf **elt; 2209 int i; 2210 unsigned int n = wqe_n - (rxq->rq_ci - 2211 rxq->rq_pi); 2212 2213 for (i = 0; i < (int)n; ++i) { 2214 elt_idx = (rxq->rq_ci + i) & q_mask; 2215 elt = &(*rxq->elts)[elt_idx]; 2216 *elt = rte_mbuf_raw_alloc(rxq->mp); 2217 if (!*elt) { 2218 for (i--; i >= 0; --i) { 2219 elt_idx = (rxq->rq_ci + 2220 i) & q_mask; 2221 elt = &(*rxq->elts) 2222 [elt_idx]; 2223 rte_pktmbuf_free_seg 2224 (*elt); 2225 } 2226 return -1; 2227 } 2228 } 2229 } 2230 mlx5_rxq_initialize(rxq); 2231 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 2232 } 2233 return ret; 2234 default: 2235 return -1; 2236 } 2237 } 2238 2239 /** 2240 * Get size of the next packet for a given CQE. For compressed CQEs, the 2241 * consumer index is updated only once all packets of the current one have 2242 * been processed. 2243 * 2244 * @param rxq 2245 * Pointer to RX queue. 2246 * @param cqe 2247 * CQE to process. 2248 * @param[out] mcqe 2249 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 2250 * written. 2251 * 2252 * @return 2253 * 0 in case of empty CQE, otherwise the packet size in bytes. 2254 */ 2255 static inline int 2256 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 2257 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 2258 { 2259 struct rxq_zip *zip = &rxq->zip; 2260 uint16_t cqe_n = cqe_cnt + 1; 2261 int len; 2262 uint16_t idx, end; 2263 2264 do { 2265 len = 0; 2266 /* Process compressed data in the CQE and mini arrays. */ 2267 if (zip->ai) { 2268 volatile struct mlx5_mini_cqe8 (*mc)[8] = 2269 (volatile struct mlx5_mini_cqe8 (*)[8]) 2270 (uintptr_t)(&(*rxq->cqes)[zip->ca & 2271 cqe_cnt].pkt_info); 2272 2273 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 2274 *mcqe = &(*mc)[zip->ai & 7]; 2275 if ((++zip->ai & 7) == 0) { 2276 /* Invalidate consumed CQEs */ 2277 idx = zip->ca; 2278 end = zip->na; 2279 while (idx != end) { 2280 (*rxq->cqes)[idx & cqe_cnt].op_own = 2281 MLX5_CQE_INVALIDATE; 2282 ++idx; 2283 } 2284 /* 2285 * Increment consumer index to skip the number 2286 * of CQEs consumed. Hardware leaves holes in 2287 * the CQ ring for software use. 2288 */ 2289 zip->ca = zip->na; 2290 zip->na += 8; 2291 } 2292 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 2293 /* Invalidate the rest */ 2294 idx = zip->ca; 2295 end = zip->cq_ci; 2296 2297 while (idx != end) { 2298 (*rxq->cqes)[idx & cqe_cnt].op_own = 2299 MLX5_CQE_INVALIDATE; 2300 ++idx; 2301 } 2302 rxq->cq_ci = zip->cq_ci; 2303 zip->ai = 0; 2304 } 2305 /* 2306 * No compressed data, get next CQE and verify if it is 2307 * compressed. 2308 */ 2309 } else { 2310 int ret; 2311 int8_t op_own; 2312 2313 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 2314 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2315 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 2316 rxq->err_state)) { 2317 ret = mlx5_rx_err_handle(rxq, 0); 2318 if (ret == MLX5_CQE_STATUS_HW_OWN || 2319 ret == -1) 2320 return 0; 2321 } else { 2322 return 0; 2323 } 2324 } 2325 ++rxq->cq_ci; 2326 op_own = cqe->op_own; 2327 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 2328 volatile struct mlx5_mini_cqe8 (*mc)[8] = 2329 (volatile struct mlx5_mini_cqe8 (*)[8]) 2330 (uintptr_t)(&(*rxq->cqes) 2331 [rxq->cq_ci & 2332 cqe_cnt].pkt_info); 2333 2334 /* Fix endianness. */ 2335 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 2336 /* 2337 * Current mini array position is the one 2338 * returned by check_cqe64(). 2339 * 2340 * If completion comprises several mini arrays, 2341 * as a special case the second one is located 2342 * 7 CQEs after the initial CQE instead of 8 2343 * for subsequent ones. 2344 */ 2345 zip->ca = rxq->cq_ci; 2346 zip->na = zip->ca + 7; 2347 /* Compute the next non compressed CQE. */ 2348 --rxq->cq_ci; 2349 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 2350 /* Get packet size to return. */ 2351 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 2352 *mcqe = &(*mc)[0]; 2353 zip->ai = 1; 2354 /* Prefetch all to be invalidated */ 2355 idx = zip->ca; 2356 end = zip->cq_ci; 2357 while (idx != end) { 2358 rte_prefetch0(&(*rxq->cqes)[(idx) & 2359 cqe_cnt]); 2360 ++idx; 2361 } 2362 } else { 2363 len = rte_be_to_cpu_32(cqe->byte_cnt); 2364 } 2365 } 2366 if (unlikely(rxq->err_state)) { 2367 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 2368 ++rxq->stats.idropped; 2369 } else { 2370 return len; 2371 } 2372 } while (1); 2373 } 2374 2375 /** 2376 * Translate RX completion flags to offload flags. 2377 * 2378 * @param[in] cqe 2379 * Pointer to CQE. 2380 * 2381 * @return 2382 * Offload flags (ol_flags) for struct rte_mbuf. 2383 */ 2384 static inline uint32_t 2385 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 2386 { 2387 uint32_t ol_flags = 0; 2388 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 2389 2390 ol_flags = 2391 TRANSPOSE(flags, 2392 MLX5_CQE_RX_L3_HDR_VALID, 2393 PKT_RX_IP_CKSUM_GOOD) | 2394 TRANSPOSE(flags, 2395 MLX5_CQE_RX_L4_HDR_VALID, 2396 PKT_RX_L4_CKSUM_GOOD); 2397 return ol_flags; 2398 } 2399 2400 /** 2401 * Fill in mbuf fields from RX completion flags. 2402 * Note that pkt->ol_flags should be initialized outside of this function. 2403 * 2404 * @param rxq 2405 * Pointer to RX queue. 2406 * @param pkt 2407 * mbuf to fill. 2408 * @param cqe 2409 * CQE to process. 2410 * @param rss_hash_res 2411 * Packet RSS Hash result. 2412 */ 2413 static inline void 2414 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 2415 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res) 2416 { 2417 /* Update packet information. */ 2418 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); 2419 if (rss_hash_res && rxq->rss_hash) { 2420 pkt->hash.rss = rss_hash_res; 2421 pkt->ol_flags |= PKT_RX_RSS_HASH; 2422 } 2423 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 2424 pkt->ol_flags |= PKT_RX_FDIR; 2425 if (cqe->sop_drop_qpn != 2426 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 2427 uint32_t mark = cqe->sop_drop_qpn; 2428 2429 pkt->ol_flags |= PKT_RX_FDIR_ID; 2430 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 2431 } 2432 } 2433 if (rxq->csum) 2434 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 2435 if (rxq->vlan_strip && 2436 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 2437 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 2438 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 2439 } 2440 if (rxq->hw_timestamp) { 2441 pkt->timestamp = rte_be_to_cpu_64(cqe->timestamp); 2442 pkt->ol_flags |= PKT_RX_TIMESTAMP; 2443 } 2444 } 2445 2446 /** 2447 * DPDK callback for RX. 2448 * 2449 * @param dpdk_rxq 2450 * Generic pointer to RX queue structure. 2451 * @param[out] pkts 2452 * Array to store received packets. 2453 * @param pkts_n 2454 * Maximum number of packets in array. 2455 * 2456 * @return 2457 * Number of packets successfully received (<= pkts_n). 2458 */ 2459 uint16_t 2460 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 2461 { 2462 struct mlx5_rxq_data *rxq = dpdk_rxq; 2463 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 2464 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 2465 const unsigned int sges_n = rxq->sges_n; 2466 struct rte_mbuf *pkt = NULL; 2467 struct rte_mbuf *seg = NULL; 2468 volatile struct mlx5_cqe *cqe = 2469 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 2470 unsigned int i = 0; 2471 unsigned int rq_ci = rxq->rq_ci << sges_n; 2472 int len = 0; /* keep its value across iterations. */ 2473 2474 while (pkts_n) { 2475 unsigned int idx = rq_ci & wqe_cnt; 2476 volatile struct mlx5_wqe_data_seg *wqe = 2477 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 2478 struct rte_mbuf *rep = (*rxq->elts)[idx]; 2479 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 2480 uint32_t rss_hash_res; 2481 2482 if (pkt) 2483 NEXT(seg) = rep; 2484 seg = rep; 2485 rte_prefetch0(seg); 2486 rte_prefetch0(cqe); 2487 rte_prefetch0(wqe); 2488 rep = rte_mbuf_raw_alloc(rxq->mp); 2489 if (unlikely(rep == NULL)) { 2490 ++rxq->stats.rx_nombuf; 2491 if (!pkt) { 2492 /* 2493 * no buffers before we even started, 2494 * bail out silently. 2495 */ 2496 break; 2497 } 2498 while (pkt != seg) { 2499 assert(pkt != (*rxq->elts)[idx]); 2500 rep = NEXT(pkt); 2501 NEXT(pkt) = NULL; 2502 NB_SEGS(pkt) = 1; 2503 rte_mbuf_raw_free(pkt); 2504 pkt = rep; 2505 } 2506 break; 2507 } 2508 if (!pkt) { 2509 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 2510 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 2511 if (!len) { 2512 rte_mbuf_raw_free(rep); 2513 break; 2514 } 2515 pkt = seg; 2516 assert(len >= (rxq->crc_present << 2)); 2517 pkt->ol_flags = 0; 2518 /* If compressed, take hash result from mini-CQE. */ 2519 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? 2520 cqe->rx_hash_res : 2521 mcqe->rx_hash_result); 2522 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 2523 if (rxq->crc_present) 2524 len -= RTE_ETHER_CRC_LEN; 2525 PKT_LEN(pkt) = len; 2526 } 2527 DATA_LEN(rep) = DATA_LEN(seg); 2528 PKT_LEN(rep) = PKT_LEN(seg); 2529 SET_DATA_OFF(rep, DATA_OFF(seg)); 2530 PORT(rep) = PORT(seg); 2531 (*rxq->elts)[idx] = rep; 2532 /* 2533 * Fill NIC descriptor with the new buffer. The lkey and size 2534 * of the buffers are already known, only the buffer address 2535 * changes. 2536 */ 2537 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 2538 /* If there's only one MR, no need to replace LKey in WQE. */ 2539 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 2540 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 2541 if (len > DATA_LEN(seg)) { 2542 len -= DATA_LEN(seg); 2543 ++NB_SEGS(pkt); 2544 ++rq_ci; 2545 continue; 2546 } 2547 DATA_LEN(seg) = len; 2548 #ifdef MLX5_PMD_SOFT_COUNTERS 2549 /* Increment bytes counter. */ 2550 rxq->stats.ibytes += PKT_LEN(pkt); 2551 #endif 2552 /* Return packet. */ 2553 *(pkts++) = pkt; 2554 pkt = NULL; 2555 --pkts_n; 2556 ++i; 2557 /* Align consumer index to the next stride. */ 2558 rq_ci >>= sges_n; 2559 ++rq_ci; 2560 rq_ci <<= sges_n; 2561 } 2562 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 2563 return 0; 2564 /* Update the consumer index. */ 2565 rxq->rq_ci = rq_ci >> sges_n; 2566 rte_cio_wmb(); 2567 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 2568 rte_cio_wmb(); 2569 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 2570 #ifdef MLX5_PMD_SOFT_COUNTERS 2571 /* Increment packets counter. */ 2572 rxq->stats.ipackets += i; 2573 #endif 2574 return i; 2575 } 2576 2577 void 2578 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 2579 { 2580 struct mlx5_mprq_buf *buf = opaque; 2581 2582 if (rte_atomic16_read(&buf->refcnt) == 1) { 2583 rte_mempool_put(buf->mp, buf); 2584 } else if (rte_atomic16_add_return(&buf->refcnt, -1) == 0) { 2585 rte_atomic16_set(&buf->refcnt, 1); 2586 rte_mempool_put(buf->mp, buf); 2587 } 2588 } 2589 2590 void 2591 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 2592 { 2593 mlx5_mprq_buf_free_cb(NULL, buf); 2594 } 2595 2596 static inline void 2597 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx) 2598 { 2599 struct mlx5_mprq_buf *rep = rxq->mprq_repl; 2600 volatile struct mlx5_wqe_data_seg *wqe = 2601 &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; 2602 void *addr; 2603 2604 assert(rep != NULL); 2605 /* Replace MPRQ buf. */ 2606 (*rxq->mprq_bufs)[rq_idx] = rep; 2607 /* Replace WQE. */ 2608 addr = mlx5_mprq_buf_addr(rep); 2609 wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); 2610 /* If there's only one MR, no need to replace LKey in WQE. */ 2611 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 2612 wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); 2613 /* Stash a mbuf for next replacement. */ 2614 if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep))) 2615 rxq->mprq_repl = rep; 2616 else 2617 rxq->mprq_repl = NULL; 2618 } 2619 2620 /** 2621 * DPDK callback for RX with Multi-Packet RQ support. 2622 * 2623 * @param dpdk_rxq 2624 * Generic pointer to RX queue structure. 2625 * @param[out] pkts 2626 * Array to store received packets. 2627 * @param pkts_n 2628 * Maximum number of packets in array. 2629 * 2630 * @return 2631 * Number of packets successfully received (<= pkts_n). 2632 */ 2633 uint16_t 2634 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 2635 { 2636 struct mlx5_rxq_data *rxq = dpdk_rxq; 2637 const unsigned int strd_n = 1 << rxq->strd_num_n; 2638 const unsigned int strd_sz = 1 << rxq->strd_sz_n; 2639 const unsigned int strd_shift = 2640 MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; 2641 const unsigned int cq_mask = (1 << rxq->cqe_n) - 1; 2642 const unsigned int wq_mask = (1 << rxq->elts_n) - 1; 2643 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 2644 unsigned int i = 0; 2645 uint32_t rq_ci = rxq->rq_ci; 2646 uint16_t consumed_strd = rxq->consumed_strd; 2647 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 2648 2649 while (i < pkts_n) { 2650 struct rte_mbuf *pkt; 2651 void *addr; 2652 int ret; 2653 unsigned int len; 2654 uint16_t strd_cnt; 2655 uint16_t strd_idx; 2656 uint32_t offset; 2657 uint32_t byte_cnt; 2658 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 2659 uint32_t rss_hash_res = 0; 2660 2661 if (consumed_strd == strd_n) { 2662 /* Replace WQE only if the buffer is still in use. */ 2663 if (rte_atomic16_read(&buf->refcnt) > 1) { 2664 mprq_buf_replace(rxq, rq_ci & wq_mask); 2665 /* Release the old buffer. */ 2666 mlx5_mprq_buf_free(buf); 2667 } else if (unlikely(rxq->mprq_repl == NULL)) { 2668 struct mlx5_mprq_buf *rep; 2669 2670 /* 2671 * Currently, the MPRQ mempool is out of buffer 2672 * and doing memcpy regardless of the size of Rx 2673 * packet. Retry allocation to get back to 2674 * normal. 2675 */ 2676 if (!rte_mempool_get(rxq->mprq_mp, 2677 (void **)&rep)) 2678 rxq->mprq_repl = rep; 2679 } 2680 /* Advance to the next WQE. */ 2681 consumed_strd = 0; 2682 ++rq_ci; 2683 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 2684 } 2685 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 2686 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 2687 if (!ret) 2688 break; 2689 byte_cnt = ret; 2690 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 2691 MLX5_MPRQ_STRIDE_NUM_SHIFT; 2692 assert(strd_cnt); 2693 consumed_strd += strd_cnt; 2694 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 2695 continue; 2696 if (mcqe == NULL) { 2697 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 2698 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); 2699 } else { 2700 /* mini-CQE for MPRQ doesn't have hash result. */ 2701 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); 2702 } 2703 assert(strd_idx < strd_n); 2704 assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); 2705 /* 2706 * Currently configured to receive a packet per a stride. But if 2707 * MTU is adjusted through kernel interface, device could 2708 * consume multiple strides without raising an error. In this 2709 * case, the packet should be dropped because it is bigger than 2710 * the max_rx_pkt_len. 2711 */ 2712 if (unlikely(strd_cnt > 1)) { 2713 ++rxq->stats.idropped; 2714 continue; 2715 } 2716 pkt = rte_pktmbuf_alloc(rxq->mp); 2717 if (unlikely(pkt == NULL)) { 2718 ++rxq->stats.rx_nombuf; 2719 break; 2720 } 2721 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 2722 assert((int)len >= (rxq->crc_present << 2)); 2723 if (rxq->crc_present) 2724 len -= RTE_ETHER_CRC_LEN; 2725 offset = strd_idx * strd_sz + strd_shift; 2726 addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf), offset); 2727 /* Initialize the offload flag. */ 2728 pkt->ol_flags = 0; 2729 /* 2730 * Memcpy packets to the target mbuf if: 2731 * - The size of packet is smaller than mprq_max_memcpy_len. 2732 * - Out of buffer in the Mempool for Multi-Packet RQ. 2733 */ 2734 if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) { 2735 /* 2736 * When memcpy'ing packet due to out-of-buffer, the 2737 * packet must be smaller than the target mbuf. 2738 */ 2739 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { 2740 rte_pktmbuf_free_seg(pkt); 2741 ++rxq->stats.idropped; 2742 continue; 2743 } 2744 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), addr, len); 2745 } else { 2746 rte_iova_t buf_iova; 2747 struct rte_mbuf_ext_shared_info *shinfo; 2748 uint16_t buf_len = strd_cnt * strd_sz; 2749 2750 /* Increment the refcnt of the whole chunk. */ 2751 rte_atomic16_add_return(&buf->refcnt, 1); 2752 assert((uint16_t)rte_atomic16_read(&buf->refcnt) <= 2753 strd_n + 1); 2754 addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); 2755 /* 2756 * MLX5 device doesn't use iova but it is necessary in a 2757 * case where the Rx packet is transmitted via a 2758 * different PMD. 2759 */ 2760 buf_iova = rte_mempool_virt2iova(buf) + 2761 RTE_PTR_DIFF(addr, buf); 2762 shinfo = rte_pktmbuf_ext_shinfo_init_helper(addr, 2763 &buf_len, mlx5_mprq_buf_free_cb, buf); 2764 /* 2765 * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when 2766 * attaching the stride to mbuf and more offload flags 2767 * will be added below by calling rxq_cq_to_mbuf(). 2768 * Other fields will be overwritten. 2769 */ 2770 rte_pktmbuf_attach_extbuf(pkt, addr, buf_iova, buf_len, 2771 shinfo); 2772 rte_pktmbuf_reset_headroom(pkt); 2773 assert(pkt->ol_flags == EXT_ATTACHED_MBUF); 2774 /* 2775 * Prevent potential overflow due to MTU change through 2776 * kernel interface. 2777 */ 2778 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { 2779 rte_pktmbuf_free_seg(pkt); 2780 ++rxq->stats.idropped; 2781 continue; 2782 } 2783 } 2784 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 2785 PKT_LEN(pkt) = len; 2786 DATA_LEN(pkt) = len; 2787 PORT(pkt) = rxq->port_id; 2788 #ifdef MLX5_PMD_SOFT_COUNTERS 2789 /* Increment bytes counter. */ 2790 rxq->stats.ibytes += PKT_LEN(pkt); 2791 #endif 2792 /* Return packet. */ 2793 *(pkts++) = pkt; 2794 ++i; 2795 } 2796 /* Update the consumer indexes. */ 2797 rxq->consumed_strd = consumed_strd; 2798 rte_cio_wmb(); 2799 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 2800 if (rq_ci != rxq->rq_ci) { 2801 rxq->rq_ci = rq_ci; 2802 rte_cio_wmb(); 2803 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 2804 } 2805 #ifdef MLX5_PMD_SOFT_COUNTERS 2806 /* Increment packets counter. */ 2807 rxq->stats.ipackets += i; 2808 #endif 2809 return i; 2810 } 2811 2812 /** 2813 * Dummy DPDK callback for TX. 2814 * 2815 * This function is used to temporarily replace the real callback during 2816 * unsafe control operations on the queue, or in case of error. 2817 * 2818 * @param dpdk_txq 2819 * Generic pointer to TX queue structure. 2820 * @param[in] pkts 2821 * Packets to transmit. 2822 * @param pkts_n 2823 * Number of packets in array. 2824 * 2825 * @return 2826 * Number of packets successfully transmitted (<= pkts_n). 2827 */ 2828 uint16_t 2829 removed_tx_burst(void *dpdk_txq __rte_unused, 2830 struct rte_mbuf **pkts __rte_unused, 2831 uint16_t pkts_n __rte_unused) 2832 { 2833 rte_mb(); 2834 return 0; 2835 } 2836 2837 /** 2838 * Dummy DPDK callback for RX. 2839 * 2840 * This function is used to temporarily replace the real callback during 2841 * unsafe control operations on the queue, or in case of error. 2842 * 2843 * @param dpdk_rxq 2844 * Generic pointer to RX queue structure. 2845 * @param[out] pkts 2846 * Array to store received packets. 2847 * @param pkts_n 2848 * Maximum number of packets in array. 2849 * 2850 * @return 2851 * Number of packets successfully received (<= pkts_n). 2852 */ 2853 uint16_t 2854 removed_rx_burst(void *dpdk_txq __rte_unused, 2855 struct rte_mbuf **pkts __rte_unused, 2856 uint16_t pkts_n __rte_unused) 2857 { 2858 rte_mb(); 2859 return 0; 2860 } 2861 2862 /* 2863 * Vectorized Rx/Tx routines are not compiled in when required vector 2864 * instructions are not supported on a target architecture. The following null 2865 * stubs are needed for linkage when those are not included outside of this file 2866 * (e.g. mlx5_rxtx_vec_sse.c for x86). 2867 */ 2868 2869 __rte_weak uint16_t 2870 mlx5_tx_burst_raw_vec(void *dpdk_txq __rte_unused, 2871 struct rte_mbuf **pkts __rte_unused, 2872 uint16_t pkts_n __rte_unused) 2873 { 2874 return 0; 2875 } 2876 2877 __rte_weak uint16_t 2878 mlx5_tx_burst_vec(void *dpdk_txq __rte_unused, 2879 struct rte_mbuf **pkts __rte_unused, 2880 uint16_t pkts_n __rte_unused) 2881 { 2882 return 0; 2883 } 2884 2885 __rte_weak uint16_t 2886 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 2887 struct rte_mbuf **pkts __rte_unused, 2888 uint16_t pkts_n __rte_unused) 2889 { 2890 return 0; 2891 } 2892 2893 __rte_weak int 2894 mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 2895 { 2896 return -ENOTSUP; 2897 } 2898 2899 __rte_weak int 2900 mlx5_check_vec_tx_support(struct rte_eth_dev *dev __rte_unused) 2901 { 2902 return -ENOTSUP; 2903 } 2904 2905 __rte_weak int 2906 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 2907 { 2908 return -ENOTSUP; 2909 } 2910 2911 __rte_weak int 2912 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 2913 { 2914 return -ENOTSUP; 2915 } 2916