1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 22 #include "mlx5_autoconf.h" 23 #include "mlx5_defs.h" 24 #include "mlx5.h" 25 #include "mlx5_mr.h" 26 #include "mlx5_utils.h" 27 #include "mlx5_rxtx.h" 28 29 /* TX burst subroutines return codes. */ 30 enum mlx5_txcmp_code { 31 MLX5_TXCMP_CODE_EXIT = 0, 32 MLX5_TXCMP_CODE_ERROR, 33 MLX5_TXCMP_CODE_SINGLE, 34 MLX5_TXCMP_CODE_MULTI, 35 MLX5_TXCMP_CODE_TSO, 36 MLX5_TXCMP_CODE_EMPW, 37 }; 38 39 /* 40 * These defines are used to configure Tx burst routine option set 41 * supported at compile time. The not specified options are optimized out 42 * out due to if conditions can be explicitly calculated at compile time. 43 * The offloads with bigger runtime check (require more CPU cycles to 44 * skip) overhead should have the bigger index - this is needed to 45 * select the better matching routine function if no exact match and 46 * some offloads are not actually requested. 47 */ 48 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 49 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 50 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 51 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 52 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 53 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 54 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 55 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 56 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 57 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 58 59 /* The most common offloads groups. */ 60 #define MLX5_TXOFF_CONFIG_NONE 0 61 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 62 MLX5_TXOFF_CONFIG_TSO | \ 63 MLX5_TXOFF_CONFIG_SWP | \ 64 MLX5_TXOFF_CONFIG_CSUM | \ 65 MLX5_TXOFF_CONFIG_INLINE | \ 66 MLX5_TXOFF_CONFIG_VLAN | \ 67 MLX5_TXOFF_CONFIG_METADATA) 68 69 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 70 71 #define MLX5_TXOFF_DECL(func, olx) \ 72 static uint16_t mlx5_tx_burst_##func(void *txq, \ 73 struct rte_mbuf **pkts, \ 74 uint16_t pkts_n) \ 75 { \ 76 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 77 pkts, pkts_n, (olx)); \ 78 } 79 80 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 81 82 static __rte_always_inline uint32_t 83 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 84 volatile struct mlx5_mini_cqe8 *mcqe); 85 86 static __rte_always_inline int 87 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 88 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 89 90 static __rte_always_inline uint32_t 91 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 92 93 static __rte_always_inline void 94 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 95 volatile struct mlx5_cqe *cqe, 96 volatile struct mlx5_mini_cqe8 *mcqe); 97 98 static int 99 mlx5_queue_state_modify(struct rte_eth_dev *dev, 100 struct mlx5_mp_arg_queue_state_modify *sm); 101 102 static inline void 103 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 104 volatile struct mlx5_cqe *__rte_restrict cqe, 105 uint32_t phcsum, uint8_t l4_type); 106 107 static inline void 108 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 109 volatile struct mlx5_cqe *__rte_restrict cqe, 110 volatile struct mlx5_mini_cqe8 *mcqe, 111 struct mlx5_rxq_data *rxq, uint32_t len); 112 113 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 114 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 115 }; 116 117 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 118 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 119 120 uint64_t rte_net_mlx5_dynf_inline_mask; 121 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 122 123 /** 124 * Build a table to translate Rx completion flags to packet type. 125 * 126 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 127 */ 128 void 129 mlx5_set_ptype_table(void) 130 { 131 unsigned int i; 132 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 133 134 /* Last entry must not be overwritten, reserved for errored packet. */ 135 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 136 (*p)[i] = RTE_PTYPE_UNKNOWN; 137 /* 138 * The index to the array should have: 139 * bit[1:0] = l3_hdr_type 140 * bit[4:2] = l4_hdr_type 141 * bit[5] = ip_frag 142 * bit[6] = tunneled 143 * bit[7] = outer_l3_type 144 */ 145 /* L2 */ 146 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 147 /* L3 */ 148 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 149 RTE_PTYPE_L4_NONFRAG; 150 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 151 RTE_PTYPE_L4_NONFRAG; 152 /* Fragmented */ 153 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 154 RTE_PTYPE_L4_FRAG; 155 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 156 RTE_PTYPE_L4_FRAG; 157 /* TCP */ 158 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 159 RTE_PTYPE_L4_TCP; 160 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 161 RTE_PTYPE_L4_TCP; 162 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 163 RTE_PTYPE_L4_TCP; 164 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 165 RTE_PTYPE_L4_TCP; 166 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 167 RTE_PTYPE_L4_TCP; 168 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 169 RTE_PTYPE_L4_TCP; 170 /* UDP */ 171 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 172 RTE_PTYPE_L4_UDP; 173 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 174 RTE_PTYPE_L4_UDP; 175 /* Repeat with outer_l3_type being set. Just in case. */ 176 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 177 RTE_PTYPE_L4_NONFRAG; 178 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 179 RTE_PTYPE_L4_NONFRAG; 180 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 181 RTE_PTYPE_L4_FRAG; 182 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 183 RTE_PTYPE_L4_FRAG; 184 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 185 RTE_PTYPE_L4_TCP; 186 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 187 RTE_PTYPE_L4_TCP; 188 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 189 RTE_PTYPE_L4_TCP; 190 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 191 RTE_PTYPE_L4_TCP; 192 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 193 RTE_PTYPE_L4_TCP; 194 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 195 RTE_PTYPE_L4_TCP; 196 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 197 RTE_PTYPE_L4_UDP; 198 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 199 RTE_PTYPE_L4_UDP; 200 /* Tunneled - L3 */ 201 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 202 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 203 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 204 RTE_PTYPE_INNER_L4_NONFRAG; 205 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 206 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 207 RTE_PTYPE_INNER_L4_NONFRAG; 208 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 209 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 210 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 211 RTE_PTYPE_INNER_L4_NONFRAG; 212 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 213 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 214 RTE_PTYPE_INNER_L4_NONFRAG; 215 /* Tunneled - Fragmented */ 216 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 217 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 218 RTE_PTYPE_INNER_L4_FRAG; 219 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 220 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 221 RTE_PTYPE_INNER_L4_FRAG; 222 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 223 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 224 RTE_PTYPE_INNER_L4_FRAG; 225 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 226 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 227 RTE_PTYPE_INNER_L4_FRAG; 228 /* Tunneled - TCP */ 229 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 230 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 231 RTE_PTYPE_INNER_L4_TCP; 232 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 233 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 234 RTE_PTYPE_INNER_L4_TCP; 235 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 236 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 237 RTE_PTYPE_INNER_L4_TCP; 238 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 239 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 240 RTE_PTYPE_INNER_L4_TCP; 241 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 242 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 243 RTE_PTYPE_INNER_L4_TCP; 244 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 245 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 246 RTE_PTYPE_INNER_L4_TCP; 247 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 248 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 249 RTE_PTYPE_INNER_L4_TCP; 250 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 251 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 252 RTE_PTYPE_INNER_L4_TCP; 253 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 254 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 255 RTE_PTYPE_INNER_L4_TCP; 256 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 257 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 258 RTE_PTYPE_INNER_L4_TCP; 259 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 260 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L4_TCP; 262 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 263 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L4_TCP; 265 /* Tunneled - UDP */ 266 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 268 RTE_PTYPE_INNER_L4_UDP; 269 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 270 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L4_UDP; 272 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 273 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L4_UDP; 275 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 276 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L4_UDP; 278 } 279 280 /** 281 * Build a table to translate packet to checksum type of Verbs. 282 */ 283 void 284 mlx5_set_cksum_table(void) 285 { 286 unsigned int i; 287 uint8_t v; 288 289 /* 290 * The index should have: 291 * bit[0] = PKT_TX_TCP_SEG 292 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 293 * bit[4] = PKT_TX_IP_CKSUM 294 * bit[8] = PKT_TX_OUTER_IP_CKSUM 295 * bit[9] = tunnel 296 */ 297 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 298 v = 0; 299 if (i & (1 << 9)) { 300 /* Tunneled packet. */ 301 if (i & (1 << 8)) /* Outer IP. */ 302 v |= MLX5_ETH_WQE_L3_CSUM; 303 if (i & (1 << 4)) /* Inner IP. */ 304 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 305 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 306 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 307 } else { 308 /* No tunnel. */ 309 if (i & (1 << 4)) /* IP. */ 310 v |= MLX5_ETH_WQE_L3_CSUM; 311 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 312 v |= MLX5_ETH_WQE_L4_CSUM; 313 } 314 mlx5_cksum_table[i] = v; 315 } 316 } 317 318 /** 319 * Build a table to translate packet type of mbuf to SWP type of Verbs. 320 */ 321 void 322 mlx5_set_swp_types_table(void) 323 { 324 unsigned int i; 325 uint8_t v; 326 327 /* 328 * The index should have: 329 * bit[0:1] = PKT_TX_L4_MASK 330 * bit[4] = PKT_TX_IPV6 331 * bit[8] = PKT_TX_OUTER_IPV6 332 * bit[9] = PKT_TX_OUTER_UDP 333 */ 334 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 335 v = 0; 336 if (i & (1 << 8)) 337 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 338 if (i & (1 << 9)) 339 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 340 if (i & (1 << 4)) 341 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 342 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 343 v |= MLX5_ETH_WQE_L4_INNER_UDP; 344 mlx5_swp_types_table[i] = v; 345 } 346 } 347 348 /** 349 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 350 * Flags must be preliminary initialized to zero. 351 * 352 * @param loc 353 * Pointer to burst routine local context. 354 * @param swp_flags 355 * Pointer to store Software Parser flags 356 * @param olx 357 * Configured Tx offloads mask. It is fully defined at 358 * compile time and may be used for optimization. 359 * 360 * @return 361 * Software Parser offsets packed in dword. 362 * Software Parser flags are set by pointer. 363 */ 364 static __rte_always_inline uint32_t 365 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 366 uint8_t *swp_flags, 367 unsigned int olx) 368 { 369 uint64_t ol, tunnel; 370 unsigned int idx, off; 371 uint32_t set; 372 373 if (!MLX5_TXOFF_CONFIG(SWP)) 374 return 0; 375 ol = loc->mbuf->ol_flags; 376 tunnel = ol & PKT_TX_TUNNEL_MASK; 377 /* 378 * Check whether Software Parser is required. 379 * Only customized tunnels may ask for. 380 */ 381 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 382 return 0; 383 /* 384 * The index should have: 385 * bit[0:1] = PKT_TX_L4_MASK 386 * bit[4] = PKT_TX_IPV6 387 * bit[8] = PKT_TX_OUTER_IPV6 388 * bit[9] = PKT_TX_OUTER_UDP 389 */ 390 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 391 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 392 *swp_flags = mlx5_swp_types_table[idx]; 393 /* 394 * Set offsets for SW parser. Since ConnectX-5, SW parser just 395 * complements HW parser. SW parser starts to engage only if HW parser 396 * can't reach a header. For the older devices, HW parser will not kick 397 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 398 * should be set regardless of HW offload. 399 */ 400 off = loc->mbuf->outer_l2_len; 401 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 402 off += sizeof(struct rte_vlan_hdr); 403 set = (off >> 1) << 8; /* Outer L3 offset. */ 404 off += loc->mbuf->outer_l3_len; 405 if (tunnel == PKT_TX_TUNNEL_UDP) 406 set |= off >> 1; /* Outer L4 offset. */ 407 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 408 const uint64_t csum = ol & PKT_TX_L4_MASK; 409 off += loc->mbuf->l2_len; 410 set |= (off >> 1) << 24; /* Inner L3 offset. */ 411 if (csum == PKT_TX_TCP_CKSUM || 412 csum == PKT_TX_UDP_CKSUM || 413 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 414 off += loc->mbuf->l3_len; 415 set |= (off >> 1) << 16; /* Inner L4 offset. */ 416 } 417 } 418 set = rte_cpu_to_le_32(set); 419 return set; 420 } 421 422 /** 423 * Convert the Checksum offloads to Verbs. 424 * 425 * @param buf 426 * Pointer to the mbuf. 427 * 428 * @return 429 * Converted checksum flags. 430 */ 431 static __rte_always_inline uint8_t 432 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 433 { 434 uint32_t idx; 435 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 436 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 437 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 438 439 /* 440 * The index should have: 441 * bit[0] = PKT_TX_TCP_SEG 442 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 443 * bit[4] = PKT_TX_IP_CKSUM 444 * bit[8] = PKT_TX_OUTER_IP_CKSUM 445 * bit[9] = tunnel 446 */ 447 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 448 return mlx5_cksum_table[idx]; 449 } 450 451 /** 452 * Internal function to compute the number of used descriptors in an RX queue 453 * 454 * @param rxq 455 * The Rx queue. 456 * 457 * @return 458 * The number of used rx descriptor. 459 */ 460 static uint32_t 461 rx_queue_count(struct mlx5_rxq_data *rxq) 462 { 463 struct rxq_zip *zip = &rxq->zip; 464 volatile struct mlx5_cqe *cqe; 465 unsigned int cq_ci = rxq->cq_ci; 466 const unsigned int cqe_n = (1 << rxq->cqe_n); 467 const unsigned int cqe_cnt = cqe_n - 1; 468 unsigned int used = 0; 469 470 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 471 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 472 int8_t op_own; 473 unsigned int n; 474 475 op_own = cqe->op_own; 476 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 477 if (unlikely(zip->ai)) 478 n = zip->cqe_cnt - zip->ai; 479 else 480 n = rte_be_to_cpu_32(cqe->byte_cnt); 481 else 482 n = 1; 483 cq_ci += n; 484 used += n; 485 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 486 } 487 used = RTE_MIN(used, cqe_n); 488 return used; 489 } 490 491 /** 492 * DPDK callback to check the status of a rx descriptor. 493 * 494 * @param rx_queue 495 * The Rx queue. 496 * @param[in] offset 497 * The index of the descriptor in the ring. 498 * 499 * @return 500 * The status of the tx descriptor. 501 */ 502 int 503 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 504 { 505 struct mlx5_rxq_data *rxq = rx_queue; 506 struct mlx5_rxq_ctrl *rxq_ctrl = 507 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 508 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 509 510 if (dev->rx_pkt_burst == NULL || 511 dev->rx_pkt_burst == removed_rx_burst) { 512 rte_errno = ENOTSUP; 513 return -rte_errno; 514 } 515 if (offset >= (1 << rxq->cqe_n)) { 516 rte_errno = EINVAL; 517 return -rte_errno; 518 } 519 if (offset < rx_queue_count(rxq)) 520 return RTE_ETH_RX_DESC_DONE; 521 return RTE_ETH_RX_DESC_AVAIL; 522 } 523 524 /** 525 * DPDK callback to get the RX queue information 526 * 527 * @param dev 528 * Pointer to the device structure. 529 * 530 * @param rx_queue_id 531 * Rx queue identificator. 532 * 533 * @param qinfo 534 * Pointer to the RX queue information structure. 535 * 536 * @return 537 * None. 538 */ 539 540 void 541 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 542 struct rte_eth_rxq_info *qinfo) 543 { 544 struct mlx5_priv *priv = dev->data->dev_private; 545 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; 546 struct mlx5_rxq_ctrl *rxq_ctrl = 547 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 548 549 if (!rxq) 550 return; 551 qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ? 552 rxq->mprq_mp : rxq->mp; 553 qinfo->conf.rx_thresh.pthresh = 0; 554 qinfo->conf.rx_thresh.hthresh = 0; 555 qinfo->conf.rx_thresh.wthresh = 0; 556 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 557 qinfo->conf.rx_drop_en = 1; 558 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; 559 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 560 qinfo->scattered_rx = dev->data->scattered_rx; 561 qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ? 562 (1 << rxq->elts_n) * (1 << rxq->strd_num_n) : 563 (1 << rxq->elts_n); 564 } 565 566 /** 567 * DPDK callback to get the RX packet burst mode information 568 * 569 * @param dev 570 * Pointer to the device structure. 571 * 572 * @param rx_queue_id 573 * Rx queue identificatior. 574 * 575 * @param mode 576 * Pointer to the burts mode information. 577 * 578 * @return 579 * 0 as success, -EINVAL as failure. 580 */ 581 582 int 583 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 584 uint16_t rx_queue_id __rte_unused, 585 struct rte_eth_burst_mode *mode) 586 { 587 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 588 struct mlx5_priv *priv = dev->data->dev_private; 589 struct mlx5_rxq_data *rxq; 590 591 rxq = (*priv->rxqs)[rx_queue_id]; 592 if (!rxq) { 593 rte_errno = EINVAL; 594 return -rte_errno; 595 } 596 if (pkt_burst == mlx5_rx_burst) { 597 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 598 } else if (pkt_burst == mlx5_rx_burst_mprq) { 599 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 600 } else if (pkt_burst == mlx5_rx_burst_vec) { 601 #if defined RTE_ARCH_X86_64 602 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 603 #elif defined RTE_ARCH_ARM64 604 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 605 #elif defined RTE_ARCH_PPC_64 606 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 607 #else 608 return -EINVAL; 609 #endif 610 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { 611 #if defined RTE_ARCH_X86_64 612 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); 613 #elif defined RTE_ARCH_ARM64 614 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); 615 #elif defined RTE_ARCH_PPC_64 616 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); 617 #else 618 return -EINVAL; 619 #endif 620 } else { 621 return -EINVAL; 622 } 623 return 0; 624 } 625 626 /** 627 * DPDK callback to get the number of used descriptors in a RX queue 628 * 629 * @param dev 630 * Pointer to the device structure. 631 * 632 * @param rx_queue_id 633 * The Rx queue. 634 * 635 * @return 636 * The number of used rx descriptor. 637 * -EINVAL if the queue is invalid 638 */ 639 uint32_t 640 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 641 { 642 struct mlx5_priv *priv = dev->data->dev_private; 643 struct mlx5_rxq_data *rxq; 644 645 if (dev->rx_pkt_burst == NULL || 646 dev->rx_pkt_burst == removed_rx_burst) { 647 rte_errno = ENOTSUP; 648 return -rte_errno; 649 } 650 rxq = (*priv->rxqs)[rx_queue_id]; 651 if (!rxq) { 652 rte_errno = EINVAL; 653 return -rte_errno; 654 } 655 return rx_queue_count(rxq); 656 } 657 658 #define MLX5_SYSTEM_LOG_DIR "/var/log" 659 /** 660 * Dump debug information to log file. 661 * 662 * @param fname 663 * The file name. 664 * @param hex_title 665 * If not NULL this string is printed as a header to the output 666 * and the output will be in hexadecimal view. 667 * @param buf 668 * This is the buffer address to print out. 669 * @param len 670 * The number of bytes to dump out. 671 */ 672 void 673 mlx5_dump_debug_information(const char *fname, const char *hex_title, 674 const void *buf, unsigned int hex_len) 675 { 676 FILE *fd; 677 678 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 679 fd = fopen(path, "a+"); 680 if (!fd) { 681 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 682 MKSTR(path2, "./%s", fname); 683 fd = fopen(path2, "a+"); 684 if (!fd) { 685 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 686 return; 687 } 688 DRV_LOG(INFO, "New debug dump in file %s", path2); 689 } else { 690 DRV_LOG(INFO, "New debug dump in file %s", path); 691 } 692 if (hex_title) 693 rte_hexdump(fd, hex_title, buf, hex_len); 694 else 695 fprintf(fd, "%s", (const char *)buf); 696 fprintf(fd, "\n\n\n"); 697 fclose(fd); 698 } 699 700 /** 701 * Move QP from error state to running state and initialize indexes. 702 * 703 * @param txq_ctrl 704 * Pointer to TX queue control structure. 705 * 706 * @return 707 * 0 on success, else -1. 708 */ 709 static int 710 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 711 { 712 struct mlx5_mp_arg_queue_state_modify sm = { 713 .is_wq = 0, 714 .queue_id = txq_ctrl->txq.idx, 715 }; 716 717 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 718 return -1; 719 txq_ctrl->txq.wqe_ci = 0; 720 txq_ctrl->txq.wqe_pi = 0; 721 txq_ctrl->txq.elts_comp = 0; 722 return 0; 723 } 724 725 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 726 static int 727 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 728 { 729 static const uint8_t magic[] = "seen"; 730 int ret = 1; 731 unsigned int i; 732 733 for (i = 0; i < sizeof(magic); ++i) 734 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 735 ret = 0; 736 err_cqe->rsvd1[i] = magic[i]; 737 } 738 return ret; 739 } 740 741 /** 742 * Handle error CQE. 743 * 744 * @param txq 745 * Pointer to TX queue structure. 746 * @param error_cqe 747 * Pointer to the error CQE. 748 * 749 * @return 750 * Negative value if queue recovery failed, otherwise 751 * the error completion entry is handled successfully. 752 */ 753 static int 754 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, 755 volatile struct mlx5_err_cqe *err_cqe) 756 { 757 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 758 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 759 struct mlx5_txq_ctrl *txq_ctrl = 760 container_of(txq, struct mlx5_txq_ctrl, txq); 761 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 762 int seen = check_err_cqe_seen(err_cqe); 763 764 if (!seen && txq_ctrl->dump_file_n < 765 txq_ctrl->priv->config.max_dump_files_num) { 766 MKSTR(err_str, "Unexpected CQE error syndrome " 767 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 768 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 769 txq->cqe_s, txq->qp_num_8s >> 8, 770 rte_be_to_cpu_16(err_cqe->wqe_counter), 771 txq->wqe_ci, txq->cq_ci); 772 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 773 PORT_ID(txq_ctrl->priv), txq->idx, 774 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 775 mlx5_dump_debug_information(name, NULL, err_str, 0); 776 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 777 (const void *)((uintptr_t) 778 txq->cqes), 779 sizeof(*err_cqe) * 780 (1 << txq->cqe_n)); 781 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 782 (const void *)((uintptr_t) 783 txq->wqes), 784 MLX5_WQE_SIZE * 785 (1 << txq->wqe_n)); 786 txq_ctrl->dump_file_n++; 787 } 788 if (!seen) 789 /* 790 * Count errors in WQEs units. 791 * Later it can be improved to count error packets, 792 * for example, by SQ parsing to find how much packets 793 * should be counted for each WQE. 794 */ 795 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 796 new_wqe_pi) & wqe_m; 797 if (tx_recover_qp(txq_ctrl)) { 798 /* Recovering failed - retry later on the same WQE. */ 799 return -1; 800 } 801 /* Release all the remaining buffers. */ 802 txq_free_elts(txq_ctrl); 803 } 804 return 0; 805 } 806 807 /** 808 * Translate RX completion flags to packet type. 809 * 810 * @param[in] rxq 811 * Pointer to RX queue structure. 812 * @param[in] cqe 813 * Pointer to CQE. 814 * 815 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 816 * 817 * @return 818 * Packet type for struct rte_mbuf. 819 */ 820 static inline uint32_t 821 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 822 volatile struct mlx5_mini_cqe8 *mcqe) 823 { 824 uint8_t idx; 825 uint8_t ptype; 826 uint8_t pinfo = (cqe->pkt_info & 0x3) << 6; 827 828 /* Get l3/l4 header from mini-CQE in case L3/L4 format*/ 829 if (mcqe == NULL || 830 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 831 ptype = (cqe->hdr_type_etc & 0xfc00) >> 10; 832 else 833 ptype = mcqe->hdr_type >> 2; 834 /* 835 * The index to the array should have: 836 * bit[1:0] = l3_hdr_type 837 * bit[4:2] = l4_hdr_type 838 * bit[5] = ip_frag 839 * bit[6] = tunneled 840 * bit[7] = outer_l3_type 841 */ 842 idx = pinfo | ptype; 843 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 844 } 845 846 /** 847 * Initialize Rx WQ and indexes. 848 * 849 * @param[in] rxq 850 * Pointer to RX queue structure. 851 */ 852 void 853 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 854 { 855 const unsigned int wqe_n = 1 << rxq->elts_n; 856 unsigned int i; 857 858 for (i = 0; (i != wqe_n); ++i) { 859 volatile struct mlx5_wqe_data_seg *scat; 860 uintptr_t addr; 861 uint32_t byte_count; 862 863 if (mlx5_rxq_mprq_enabled(rxq)) { 864 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 865 866 scat = &((volatile struct mlx5_wqe_mprq *) 867 rxq->wqes)[i].dseg; 868 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 869 1 << rxq->strd_num_n); 870 byte_count = (1 << rxq->strd_sz_n) * 871 (1 << rxq->strd_num_n); 872 } else { 873 struct rte_mbuf *buf = (*rxq->elts)[i]; 874 875 scat = &((volatile struct mlx5_wqe_data_seg *) 876 rxq->wqes)[i]; 877 addr = rte_pktmbuf_mtod(buf, uintptr_t); 878 byte_count = DATA_LEN(buf); 879 } 880 /* scat->addr must be able to store a pointer. */ 881 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 882 *scat = (struct mlx5_wqe_data_seg){ 883 .addr = rte_cpu_to_be_64(addr), 884 .byte_count = rte_cpu_to_be_32(byte_count), 885 .lkey = mlx5_rx_addr2mr(rxq, addr), 886 }; 887 } 888 rxq->consumed_strd = 0; 889 rxq->decompressed = 0; 890 rxq->rq_pi = 0; 891 rxq->zip = (struct rxq_zip){ 892 .ai = 0, 893 }; 894 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? 895 (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0; 896 /* Update doorbell counter. */ 897 rxq->rq_ci = wqe_n >> rxq->sges_n; 898 rte_io_wmb(); 899 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 900 } 901 902 /** 903 * Modify a Verbs/DevX queue state. 904 * This must be called from the primary process. 905 * 906 * @param dev 907 * Pointer to Ethernet device. 908 * @param sm 909 * State modify request parameters. 910 * 911 * @return 912 * 0 in case of success else non-zero value and rte_errno is set. 913 */ 914 int 915 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 916 const struct mlx5_mp_arg_queue_state_modify *sm) 917 { 918 int ret; 919 struct mlx5_priv *priv = dev->data->dev_private; 920 921 if (sm->is_wq) { 922 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 923 struct mlx5_rxq_ctrl *rxq_ctrl = 924 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 925 926 ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, sm->state); 927 if (ret) { 928 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 929 sm->state, strerror(errno)); 930 rte_errno = errno; 931 return ret; 932 } 933 } else { 934 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 935 struct mlx5_txq_ctrl *txq_ctrl = 936 container_of(txq, struct mlx5_txq_ctrl, txq); 937 938 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 939 MLX5_TXQ_MOD_ERR2RDY, 940 (uint8_t)priv->dev_port); 941 if (ret) 942 return ret; 943 } 944 return 0; 945 } 946 947 /** 948 * Modify a Verbs queue state. 949 * 950 * @param dev 951 * Pointer to Ethernet device. 952 * @param sm 953 * State modify request parameters. 954 * 955 * @return 956 * 0 in case of success else non-zero value. 957 */ 958 static int 959 mlx5_queue_state_modify(struct rte_eth_dev *dev, 960 struct mlx5_mp_arg_queue_state_modify *sm) 961 { 962 struct mlx5_priv *priv = dev->data->dev_private; 963 int ret = 0; 964 965 switch (rte_eal_process_type()) { 966 case RTE_PROC_PRIMARY: 967 ret = mlx5_queue_state_modify_primary(dev, sm); 968 break; 969 case RTE_PROC_SECONDARY: 970 ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); 971 break; 972 default: 973 break; 974 } 975 return ret; 976 } 977 978 /** 979 * Handle a Rx error. 980 * The function inserts the RQ state to reset when the first error CQE is 981 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 982 * it moves the RQ state to ready and initializes the RQ. 983 * Next CQE identification and error counting are in the caller responsibility. 984 * 985 * @param[in] rxq 986 * Pointer to RX queue structure. 987 * @param[in] vec 988 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 989 * 0 when called from non-vectorized Rx burst. 990 * 991 * @return 992 * -1 in case of recovery error, otherwise the CQE status. 993 */ 994 int 995 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 996 { 997 const uint16_t cqe_n = 1 << rxq->cqe_n; 998 const uint16_t cqe_mask = cqe_n - 1; 999 const uint16_t wqe_n = 1 << rxq->elts_n; 1000 const uint16_t strd_n = 1 << rxq->strd_num_n; 1001 struct mlx5_rxq_ctrl *rxq_ctrl = 1002 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 1003 union { 1004 volatile struct mlx5_cqe *cqe; 1005 volatile struct mlx5_err_cqe *err_cqe; 1006 } u = { 1007 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 1008 }; 1009 struct mlx5_mp_arg_queue_state_modify sm; 1010 int ret; 1011 1012 switch (rxq->err_state) { 1013 case MLX5_RXQ_ERR_STATE_NO_ERROR: 1014 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 1015 /* Fall-through */ 1016 case MLX5_RXQ_ERR_STATE_NEED_RESET: 1017 sm.is_wq = 1; 1018 sm.queue_id = rxq->idx; 1019 sm.state = IBV_WQS_RESET; 1020 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 1021 return -1; 1022 if (rxq_ctrl->dump_file_n < 1023 rxq_ctrl->priv->config.max_dump_files_num) { 1024 MKSTR(err_str, "Unexpected CQE error syndrome " 1025 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 1026 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 1027 rxq->cqn, rxq_ctrl->wqn, 1028 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 1029 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 1030 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 1031 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 1032 mlx5_dump_debug_information(name, NULL, err_str, 0); 1033 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 1034 (const void *)((uintptr_t) 1035 rxq->cqes), 1036 sizeof(*u.cqe) * cqe_n); 1037 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 1038 (const void *)((uintptr_t) 1039 rxq->wqes), 1040 16 * wqe_n); 1041 rxq_ctrl->dump_file_n++; 1042 } 1043 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 1044 /* Fall-through */ 1045 case MLX5_RXQ_ERR_STATE_NEED_READY: 1046 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1047 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1048 rte_io_wmb(); 1049 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1050 rte_io_wmb(); 1051 /* 1052 * The RQ consumer index must be zeroed while moving 1053 * from RESET state to RDY state. 1054 */ 1055 *rxq->rq_db = rte_cpu_to_be_32(0); 1056 rte_io_wmb(); 1057 sm.is_wq = 1; 1058 sm.queue_id = rxq->idx; 1059 sm.state = IBV_WQS_RDY; 1060 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1061 &sm)) 1062 return -1; 1063 if (vec) { 1064 const uint32_t elts_n = 1065 mlx5_rxq_mprq_enabled(rxq) ? 1066 wqe_n * strd_n : wqe_n; 1067 const uint32_t e_mask = elts_n - 1; 1068 uint32_t elts_ci = 1069 mlx5_rxq_mprq_enabled(rxq) ? 1070 rxq->elts_ci : rxq->rq_ci; 1071 uint32_t elt_idx; 1072 struct rte_mbuf **elt; 1073 int i; 1074 unsigned int n = elts_n - (elts_ci - 1075 rxq->rq_pi); 1076 1077 for (i = 0; i < (int)n; ++i) { 1078 elt_idx = (elts_ci + i) & e_mask; 1079 elt = &(*rxq->elts)[elt_idx]; 1080 *elt = rte_mbuf_raw_alloc(rxq->mp); 1081 if (!*elt) { 1082 for (i--; i >= 0; --i) { 1083 elt_idx = (elts_ci + 1084 i) & elts_n; 1085 elt = &(*rxq->elts) 1086 [elt_idx]; 1087 rte_pktmbuf_free_seg 1088 (*elt); 1089 } 1090 return -1; 1091 } 1092 } 1093 for (i = 0; i < (int)elts_n; ++i) { 1094 elt = &(*rxq->elts)[i]; 1095 DATA_LEN(*elt) = 1096 (uint16_t)((*elt)->buf_len - 1097 rte_pktmbuf_headroom(*elt)); 1098 } 1099 /* Padding with a fake mbuf for vec Rx. */ 1100 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1101 (*rxq->elts)[elts_n + i] = 1102 &rxq->fake_mbuf; 1103 } 1104 mlx5_rxq_initialize(rxq); 1105 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1106 } 1107 return ret; 1108 default: 1109 return -1; 1110 } 1111 } 1112 1113 /** 1114 * Get size of the next packet for a given CQE. For compressed CQEs, the 1115 * consumer index is updated only once all packets of the current one have 1116 * been processed. 1117 * 1118 * @param rxq 1119 * Pointer to RX queue. 1120 * @param cqe 1121 * CQE to process. 1122 * @param[out] mcqe 1123 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1124 * written. 1125 * 1126 * @return 1127 * 0 in case of empty CQE, otherwise the packet size in bytes. 1128 */ 1129 static inline int 1130 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1131 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1132 { 1133 struct rxq_zip *zip = &rxq->zip; 1134 uint16_t cqe_n = cqe_cnt + 1; 1135 int len; 1136 uint16_t idx, end; 1137 1138 do { 1139 len = 0; 1140 /* Process compressed data in the CQE and mini arrays. */ 1141 if (zip->ai) { 1142 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1143 (volatile struct mlx5_mini_cqe8 (*)[8]) 1144 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1145 cqe_cnt].pkt_info); 1146 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & 1147 rxq->byte_mask); 1148 *mcqe = &(*mc)[zip->ai & 7]; 1149 if ((++zip->ai & 7) == 0) { 1150 /* Invalidate consumed CQEs */ 1151 idx = zip->ca; 1152 end = zip->na; 1153 while (idx != end) { 1154 (*rxq->cqes)[idx & cqe_cnt].op_own = 1155 MLX5_CQE_INVALIDATE; 1156 ++idx; 1157 } 1158 /* 1159 * Increment consumer index to skip the number 1160 * of CQEs consumed. Hardware leaves holes in 1161 * the CQ ring for software use. 1162 */ 1163 zip->ca = zip->na; 1164 zip->na += 8; 1165 } 1166 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1167 /* Invalidate the rest */ 1168 idx = zip->ca; 1169 end = zip->cq_ci; 1170 1171 while (idx != end) { 1172 (*rxq->cqes)[idx & cqe_cnt].op_own = 1173 MLX5_CQE_INVALIDATE; 1174 ++idx; 1175 } 1176 rxq->cq_ci = zip->cq_ci; 1177 zip->ai = 0; 1178 } 1179 /* 1180 * No compressed data, get next CQE and verify if it is 1181 * compressed. 1182 */ 1183 } else { 1184 int ret; 1185 int8_t op_own; 1186 uint32_t cq_ci; 1187 1188 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1189 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1190 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1191 rxq->err_state)) { 1192 ret = mlx5_rx_err_handle(rxq, 0); 1193 if (ret == MLX5_CQE_STATUS_HW_OWN || 1194 ret == -1) 1195 return 0; 1196 } else { 1197 return 0; 1198 } 1199 } 1200 /* 1201 * Introduce the local variable to have queue cq_ci 1202 * index in queue structure always consistent with 1203 * actual CQE boundary (not pointing to the middle 1204 * of compressed CQE session). 1205 */ 1206 cq_ci = rxq->cq_ci + 1; 1207 op_own = cqe->op_own; 1208 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1209 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1210 (volatile struct mlx5_mini_cqe8 (*)[8]) 1211 (uintptr_t)(&(*rxq->cqes) 1212 [cq_ci & cqe_cnt].pkt_info); 1213 1214 /* Fix endianness. */ 1215 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1216 /* 1217 * Current mini array position is the one 1218 * returned by check_cqe64(). 1219 * 1220 * If completion comprises several mini arrays, 1221 * as a special case the second one is located 1222 * 7 CQEs after the initial CQE instead of 8 1223 * for subsequent ones. 1224 */ 1225 zip->ca = cq_ci; 1226 zip->na = zip->ca + 7; 1227 /* Compute the next non compressed CQE. */ 1228 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1229 /* Get packet size to return. */ 1230 len = rte_be_to_cpu_32((*mc)[0].byte_cnt & 1231 rxq->byte_mask); 1232 *mcqe = &(*mc)[0]; 1233 zip->ai = 1; 1234 /* Prefetch all to be invalidated */ 1235 idx = zip->ca; 1236 end = zip->cq_ci; 1237 while (idx != end) { 1238 rte_prefetch0(&(*rxq->cqes)[(idx) & 1239 cqe_cnt]); 1240 ++idx; 1241 } 1242 } else { 1243 rxq->cq_ci = cq_ci; 1244 len = rte_be_to_cpu_32(cqe->byte_cnt); 1245 } 1246 } 1247 if (unlikely(rxq->err_state)) { 1248 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1249 ++rxq->stats.idropped; 1250 } else { 1251 return len; 1252 } 1253 } while (1); 1254 } 1255 1256 /** 1257 * Translate RX completion flags to offload flags. 1258 * 1259 * @param[in] cqe 1260 * Pointer to CQE. 1261 * 1262 * @return 1263 * Offload flags (ol_flags) for struct rte_mbuf. 1264 */ 1265 static inline uint32_t 1266 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1267 { 1268 uint32_t ol_flags = 0; 1269 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1270 1271 ol_flags = 1272 TRANSPOSE(flags, 1273 MLX5_CQE_RX_L3_HDR_VALID, 1274 PKT_RX_IP_CKSUM_GOOD) | 1275 TRANSPOSE(flags, 1276 MLX5_CQE_RX_L4_HDR_VALID, 1277 PKT_RX_L4_CKSUM_GOOD); 1278 return ol_flags; 1279 } 1280 1281 /** 1282 * Fill in mbuf fields from RX completion flags. 1283 * Note that pkt->ol_flags should be initialized outside of this function. 1284 * 1285 * @param rxq 1286 * Pointer to RX queue. 1287 * @param pkt 1288 * mbuf to fill. 1289 * @param cqe 1290 * CQE to process. 1291 * @param rss_hash_res 1292 * Packet RSS Hash result. 1293 */ 1294 static inline void 1295 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1296 volatile struct mlx5_cqe *cqe, 1297 volatile struct mlx5_mini_cqe8 *mcqe) 1298 { 1299 /* Update packet information. */ 1300 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe); 1301 1302 if (rxq->rss_hash) { 1303 uint32_t rss_hash_res = 0; 1304 1305 /* If compressed, take hash result from mini-CQE. */ 1306 if (mcqe == NULL || 1307 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH) 1308 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1309 else 1310 rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result); 1311 if (rss_hash_res) { 1312 pkt->hash.rss = rss_hash_res; 1313 pkt->ol_flags |= PKT_RX_RSS_HASH; 1314 } 1315 } 1316 if (rxq->mark) { 1317 uint32_t mark = 0; 1318 1319 /* If compressed, take flow tag from mini-CQE. */ 1320 if (mcqe == NULL || 1321 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1322 mark = cqe->sop_drop_qpn; 1323 else 1324 mark = ((mcqe->byte_cnt_flow & 0xff) << 8) | 1325 (mcqe->flow_tag_high << 16); 1326 if (MLX5_FLOW_MARK_IS_VALID(mark)) { 1327 pkt->ol_flags |= PKT_RX_FDIR; 1328 if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { 1329 pkt->ol_flags |= PKT_RX_FDIR_ID; 1330 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1331 } 1332 } 1333 } 1334 if (rxq->dynf_meta && cqe->flow_table_metadata) { 1335 pkt->ol_flags |= rxq->flow_meta_mask; 1336 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = 1337 cqe->flow_table_metadata; 1338 } 1339 if (rxq->csum) 1340 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1341 if (rxq->vlan_strip) { 1342 bool vlan_strip; 1343 1344 if (mcqe == NULL || 1345 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1346 vlan_strip = cqe->hdr_type_etc & 1347 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1348 else 1349 vlan_strip = mcqe->hdr_type & 1350 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1351 if (vlan_strip) { 1352 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1353 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1354 } 1355 } 1356 if (rxq->hw_timestamp) { 1357 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 1358 1359 if (rxq->rt_timestamp) 1360 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 1361 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 1362 pkt->ol_flags |= rxq->timestamp_rx_flag; 1363 } 1364 } 1365 1366 /** 1367 * DPDK callback for RX. 1368 * 1369 * @param dpdk_rxq 1370 * Generic pointer to RX queue structure. 1371 * @param[out] pkts 1372 * Array to store received packets. 1373 * @param pkts_n 1374 * Maximum number of packets in array. 1375 * 1376 * @return 1377 * Number of packets successfully received (<= pkts_n). 1378 */ 1379 uint16_t 1380 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1381 { 1382 struct mlx5_rxq_data *rxq = dpdk_rxq; 1383 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1384 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1385 const unsigned int sges_n = rxq->sges_n; 1386 struct rte_mbuf *pkt = NULL; 1387 struct rte_mbuf *seg = NULL; 1388 volatile struct mlx5_cqe *cqe = 1389 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1390 unsigned int i = 0; 1391 unsigned int rq_ci = rxq->rq_ci << sges_n; 1392 int len = 0; /* keep its value across iterations. */ 1393 1394 while (pkts_n) { 1395 unsigned int idx = rq_ci & wqe_cnt; 1396 volatile struct mlx5_wqe_data_seg *wqe = 1397 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1398 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1399 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1400 1401 if (pkt) 1402 NEXT(seg) = rep; 1403 seg = rep; 1404 rte_prefetch0(seg); 1405 rte_prefetch0(cqe); 1406 rte_prefetch0(wqe); 1407 /* Allocate the buf from the same pool. */ 1408 rep = rte_mbuf_raw_alloc(seg->pool); 1409 if (unlikely(rep == NULL)) { 1410 ++rxq->stats.rx_nombuf; 1411 if (!pkt) { 1412 /* 1413 * no buffers before we even started, 1414 * bail out silently. 1415 */ 1416 break; 1417 } 1418 while (pkt != seg) { 1419 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 1420 rep = NEXT(pkt); 1421 NEXT(pkt) = NULL; 1422 NB_SEGS(pkt) = 1; 1423 rte_mbuf_raw_free(pkt); 1424 pkt = rep; 1425 } 1426 break; 1427 } 1428 if (!pkt) { 1429 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1430 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1431 if (!len) { 1432 rte_mbuf_raw_free(rep); 1433 break; 1434 } 1435 pkt = seg; 1436 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 1437 pkt->ol_flags &= EXT_ATTACHED_MBUF; 1438 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1439 if (rxq->crc_present) 1440 len -= RTE_ETHER_CRC_LEN; 1441 PKT_LEN(pkt) = len; 1442 if (cqe->lro_num_seg > 1) { 1443 mlx5_lro_update_hdr 1444 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1445 mcqe, rxq, len); 1446 pkt->ol_flags |= PKT_RX_LRO; 1447 pkt->tso_segsz = len / cqe->lro_num_seg; 1448 } 1449 } 1450 DATA_LEN(rep) = DATA_LEN(seg); 1451 PKT_LEN(rep) = PKT_LEN(seg); 1452 SET_DATA_OFF(rep, DATA_OFF(seg)); 1453 PORT(rep) = PORT(seg); 1454 (*rxq->elts)[idx] = rep; 1455 /* 1456 * Fill NIC descriptor with the new buffer. The lkey and size 1457 * of the buffers are already known, only the buffer address 1458 * changes. 1459 */ 1460 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1461 /* If there's only one MR, no need to replace LKey in WQE. */ 1462 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1463 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1464 if (len > DATA_LEN(seg)) { 1465 len -= DATA_LEN(seg); 1466 ++NB_SEGS(pkt); 1467 ++rq_ci; 1468 continue; 1469 } 1470 DATA_LEN(seg) = len; 1471 #ifdef MLX5_PMD_SOFT_COUNTERS 1472 /* Increment bytes counter. */ 1473 rxq->stats.ibytes += PKT_LEN(pkt); 1474 #endif 1475 /* Return packet. */ 1476 *(pkts++) = pkt; 1477 pkt = NULL; 1478 --pkts_n; 1479 ++i; 1480 /* Align consumer index to the next stride. */ 1481 rq_ci >>= sges_n; 1482 ++rq_ci; 1483 rq_ci <<= sges_n; 1484 } 1485 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1486 return 0; 1487 /* Update the consumer index. */ 1488 rxq->rq_ci = rq_ci >> sges_n; 1489 rte_io_wmb(); 1490 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1491 rte_io_wmb(); 1492 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1493 #ifdef MLX5_PMD_SOFT_COUNTERS 1494 /* Increment packets counter. */ 1495 rxq->stats.ipackets += i; 1496 #endif 1497 return i; 1498 } 1499 1500 /** 1501 * Update LRO packet TCP header. 1502 * The HW LRO feature doesn't update the TCP header after coalescing the 1503 * TCP segments but supplies information in CQE to fill it by SW. 1504 * 1505 * @param tcp 1506 * Pointer to the TCP header. 1507 * @param cqe 1508 * Pointer to the completion entry.. 1509 * @param phcsum 1510 * The L3 pseudo-header checksum. 1511 */ 1512 static inline void 1513 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1514 volatile struct mlx5_cqe *__rte_restrict cqe, 1515 uint32_t phcsum, uint8_t l4_type) 1516 { 1517 /* 1518 * The HW calculates only the TCP payload checksum, need to complete 1519 * the TCP header checksum and the L3 pseudo-header checksum. 1520 */ 1521 uint32_t csum = phcsum + cqe->csum; 1522 1523 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1524 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1525 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1526 tcp->recv_ack = cqe->lro_ack_seq_num; 1527 tcp->rx_win = cqe->lro_tcp_win; 1528 } 1529 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1530 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1531 tcp->cksum = 0; 1532 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1533 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1534 csum = (~csum) & 0xffff; 1535 if (csum == 0) 1536 csum = 0xffff; 1537 tcp->cksum = csum; 1538 } 1539 1540 /** 1541 * Update LRO packet headers. 1542 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1543 * TCP segments but supply information in CQE to fill it by SW. 1544 * 1545 * @param padd 1546 * The packet address. 1547 * @param cqe 1548 * Pointer to the completion entry.. 1549 * @param len 1550 * The packet length. 1551 */ 1552 static inline void 1553 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1554 volatile struct mlx5_cqe *__rte_restrict cqe, 1555 volatile struct mlx5_mini_cqe8 *mcqe, 1556 struct mlx5_rxq_data *rxq, uint32_t len) 1557 { 1558 union { 1559 struct rte_ether_hdr *eth; 1560 struct rte_vlan_hdr *vlan; 1561 struct rte_ipv4_hdr *ipv4; 1562 struct rte_ipv6_hdr *ipv6; 1563 struct rte_tcp_hdr *tcp; 1564 uint8_t *hdr; 1565 } h = { 1566 .hdr = padd, 1567 }; 1568 uint16_t proto = h.eth->ether_type; 1569 uint32_t phcsum; 1570 uint8_t l4_type; 1571 1572 h.eth++; 1573 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1574 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1575 proto = h.vlan->eth_proto; 1576 h.vlan++; 1577 } 1578 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1579 h.ipv4->time_to_live = cqe->lro_min_ttl; 1580 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1581 h.ipv4->hdr_checksum = 0; 1582 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1583 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1584 h.ipv4++; 1585 } else { 1586 h.ipv6->hop_limits = cqe->lro_min_ttl; 1587 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1588 sizeof(*h.ipv6)); 1589 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1590 h.ipv6++; 1591 } 1592 if (mcqe == NULL || 1593 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1594 l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1595 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1596 else 1597 l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) & 1598 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1599 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type); 1600 } 1601 1602 void 1603 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1604 { 1605 struct mlx5_mprq_buf *buf = opaque; 1606 1607 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 1608 rte_mempool_put(buf->mp, buf); 1609 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 1610 __ATOMIC_RELAXED) == 0)) { 1611 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1612 rte_mempool_put(buf->mp, buf); 1613 } 1614 } 1615 1616 void 1617 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1618 { 1619 mlx5_mprq_buf_free_cb(NULL, buf); 1620 } 1621 1622 /** 1623 * DPDK callback for RX with Multi-Packet RQ support. 1624 * 1625 * @param dpdk_rxq 1626 * Generic pointer to RX queue structure. 1627 * @param[out] pkts 1628 * Array to store received packets. 1629 * @param pkts_n 1630 * Maximum number of packets in array. 1631 * 1632 * @return 1633 * Number of packets successfully received (<= pkts_n). 1634 */ 1635 uint16_t 1636 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1637 { 1638 struct mlx5_rxq_data *rxq = dpdk_rxq; 1639 const uint32_t strd_n = 1 << rxq->strd_num_n; 1640 const uint32_t strd_sz = 1 << rxq->strd_sz_n; 1641 const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; 1642 const uint32_t wq_mask = (1 << rxq->elts_n) - 1; 1643 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1644 unsigned int i = 0; 1645 uint32_t rq_ci = rxq->rq_ci; 1646 uint16_t consumed_strd = rxq->consumed_strd; 1647 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1648 1649 while (i < pkts_n) { 1650 struct rte_mbuf *pkt; 1651 int ret; 1652 uint32_t len; 1653 uint16_t strd_cnt; 1654 uint16_t strd_idx; 1655 uint32_t byte_cnt; 1656 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1657 enum mlx5_rqx_code rxq_code; 1658 1659 if (consumed_strd == strd_n) { 1660 /* Replace WQE if the buffer is still in use. */ 1661 mprq_buf_replace(rxq, rq_ci & wq_mask); 1662 /* Advance to the next WQE. */ 1663 consumed_strd = 0; 1664 ++rq_ci; 1665 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1666 } 1667 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1668 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1669 if (!ret) 1670 break; 1671 byte_cnt = ret; 1672 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1673 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1674 if (rxq->crc_present) 1675 len -= RTE_ETHER_CRC_LEN; 1676 if (mcqe && 1677 rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1678 strd_cnt = (len / strd_sz) + !!(len % strd_sz); 1679 else 1680 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1681 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1682 MLX5_ASSERT(strd_cnt); 1683 consumed_strd += strd_cnt; 1684 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1685 continue; 1686 strd_idx = rte_be_to_cpu_16(mcqe == NULL ? 1687 cqe->wqe_counter : 1688 mcqe->stride_idx); 1689 MLX5_ASSERT(strd_idx < strd_n); 1690 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1691 wq_mask)); 1692 pkt = rte_pktmbuf_alloc(rxq->mp); 1693 if (unlikely(pkt == NULL)) { 1694 ++rxq->stats.rx_nombuf; 1695 break; 1696 } 1697 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1698 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1699 if (rxq->crc_present) 1700 len -= RTE_ETHER_CRC_LEN; 1701 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, 1702 strd_idx, strd_cnt); 1703 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 1704 rte_pktmbuf_free_seg(pkt); 1705 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 1706 ++rxq->stats.idropped; 1707 continue; 1708 } 1709 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 1710 ++rxq->stats.rx_nombuf; 1711 break; 1712 } 1713 } 1714 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1715 if (cqe->lro_num_seg > 1) { 1716 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), 1717 cqe, mcqe, rxq, len); 1718 pkt->ol_flags |= PKT_RX_LRO; 1719 pkt->tso_segsz = len / cqe->lro_num_seg; 1720 } 1721 PKT_LEN(pkt) = len; 1722 PORT(pkt) = rxq->port_id; 1723 #ifdef MLX5_PMD_SOFT_COUNTERS 1724 /* Increment bytes counter. */ 1725 rxq->stats.ibytes += PKT_LEN(pkt); 1726 #endif 1727 /* Return packet. */ 1728 *(pkts++) = pkt; 1729 ++i; 1730 } 1731 /* Update the consumer indexes. */ 1732 rxq->consumed_strd = consumed_strd; 1733 rte_io_wmb(); 1734 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1735 if (rq_ci != rxq->rq_ci) { 1736 rxq->rq_ci = rq_ci; 1737 rte_io_wmb(); 1738 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1739 } 1740 #ifdef MLX5_PMD_SOFT_COUNTERS 1741 /* Increment packets counter. */ 1742 rxq->stats.ipackets += i; 1743 #endif 1744 return i; 1745 } 1746 1747 /** 1748 * Dummy DPDK callback for TX. 1749 * 1750 * This function is used to temporarily replace the real callback during 1751 * unsafe control operations on the queue, or in case of error. 1752 * 1753 * @param dpdk_txq 1754 * Generic pointer to TX queue structure. 1755 * @param[in] pkts 1756 * Packets to transmit. 1757 * @param pkts_n 1758 * Number of packets in array. 1759 * 1760 * @return 1761 * Number of packets successfully transmitted (<= pkts_n). 1762 */ 1763 uint16_t 1764 removed_tx_burst(void *dpdk_txq __rte_unused, 1765 struct rte_mbuf **pkts __rte_unused, 1766 uint16_t pkts_n __rte_unused) 1767 { 1768 rte_mb(); 1769 return 0; 1770 } 1771 1772 /** 1773 * Dummy DPDK callback for RX. 1774 * 1775 * This function is used to temporarily replace the real callback during 1776 * unsafe control operations on the queue, or in case of error. 1777 * 1778 * @param dpdk_rxq 1779 * Generic pointer to RX queue structure. 1780 * @param[out] pkts 1781 * Array to store received packets. 1782 * @param pkts_n 1783 * Maximum number of packets in array. 1784 * 1785 * @return 1786 * Number of packets successfully received (<= pkts_n). 1787 */ 1788 uint16_t 1789 removed_rx_burst(void *dpdk_txq __rte_unused, 1790 struct rte_mbuf **pkts __rte_unused, 1791 uint16_t pkts_n __rte_unused) 1792 { 1793 rte_mb(); 1794 return 0; 1795 } 1796 1797 /* 1798 * Vectorized Rx/Tx routines are not compiled in when required vector 1799 * instructions are not supported on a target architecture. The following null 1800 * stubs are needed for linkage when those are not included outside of this file 1801 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1802 */ 1803 1804 __rte_weak uint16_t 1805 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1806 struct rte_mbuf **pkts __rte_unused, 1807 uint16_t pkts_n __rte_unused) 1808 { 1809 return 0; 1810 } 1811 1812 __rte_weak uint16_t 1813 mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused, 1814 struct rte_mbuf **pkts __rte_unused, 1815 uint16_t pkts_n __rte_unused) 1816 { 1817 return 0; 1818 } 1819 1820 __rte_weak int 1821 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1822 { 1823 return -ENOTSUP; 1824 } 1825 1826 __rte_weak int 1827 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1828 { 1829 return -ENOTSUP; 1830 } 1831 1832 /** 1833 * Free the mbufs from the linear array of pointers. 1834 * 1835 * @param pkts 1836 * Pointer to array of packets to be free. 1837 * @param pkts_n 1838 * Number of packets to be freed. 1839 * @param olx 1840 * Configured Tx offloads mask. It is fully defined at 1841 * compile time and may be used for optimization. 1842 */ 1843 static __rte_always_inline void 1844 mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, 1845 unsigned int pkts_n, 1846 unsigned int olx __rte_unused) 1847 { 1848 struct rte_mempool *pool = NULL; 1849 struct rte_mbuf **p_free = NULL; 1850 struct rte_mbuf *mbuf; 1851 unsigned int n_free = 0; 1852 1853 /* 1854 * The implemented algorithm eliminates 1855 * copying pointers to temporary array 1856 * for rte_mempool_put_bulk() calls. 1857 */ 1858 MLX5_ASSERT(pkts); 1859 MLX5_ASSERT(pkts_n); 1860 for (;;) { 1861 for (;;) { 1862 /* 1863 * Decrement mbuf reference counter, detach 1864 * indirect and external buffers if needed. 1865 */ 1866 mbuf = rte_pktmbuf_prefree_seg(*pkts); 1867 if (likely(mbuf != NULL)) { 1868 MLX5_ASSERT(mbuf == *pkts); 1869 if (likely(n_free != 0)) { 1870 if (unlikely(pool != mbuf->pool)) 1871 /* From different pool. */ 1872 break; 1873 } else { 1874 /* Start new scan array. */ 1875 pool = mbuf->pool; 1876 p_free = pkts; 1877 } 1878 ++n_free; 1879 ++pkts; 1880 --pkts_n; 1881 if (unlikely(pkts_n == 0)) { 1882 mbuf = NULL; 1883 break; 1884 } 1885 } else { 1886 /* 1887 * This happens if mbuf is still referenced. 1888 * We can't put it back to the pool, skip. 1889 */ 1890 ++pkts; 1891 --pkts_n; 1892 if (unlikely(n_free != 0)) 1893 /* There is some array to free.*/ 1894 break; 1895 if (unlikely(pkts_n == 0)) 1896 /* Last mbuf, nothing to free. */ 1897 return; 1898 } 1899 } 1900 for (;;) { 1901 /* 1902 * This loop is implemented to avoid multiple 1903 * inlining of rte_mempool_put_bulk(). 1904 */ 1905 MLX5_ASSERT(pool); 1906 MLX5_ASSERT(p_free); 1907 MLX5_ASSERT(n_free); 1908 /* 1909 * Free the array of pre-freed mbufs 1910 * belonging to the same memory pool. 1911 */ 1912 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 1913 if (unlikely(mbuf != NULL)) { 1914 /* There is the request to start new scan. */ 1915 pool = mbuf->pool; 1916 p_free = pkts++; 1917 n_free = 1; 1918 --pkts_n; 1919 if (likely(pkts_n != 0)) 1920 break; 1921 /* 1922 * This is the last mbuf to be freed. 1923 * Do one more loop iteration to complete. 1924 * This is rare case of the last unique mbuf. 1925 */ 1926 mbuf = NULL; 1927 continue; 1928 } 1929 if (likely(pkts_n == 0)) 1930 return; 1931 n_free = 0; 1932 break; 1933 } 1934 } 1935 } 1936 1937 /** 1938 * Free the mbuf from the elts ring buffer till new tail. 1939 * 1940 * @param txq 1941 * Pointer to Tx queue structure. 1942 * @param tail 1943 * Index in elts to free up to, becomes new elts tail. 1944 * @param olx 1945 * Configured Tx offloads mask. It is fully defined at 1946 * compile time and may be used for optimization. 1947 */ 1948 static __rte_always_inline void 1949 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 1950 uint16_t tail, 1951 unsigned int olx __rte_unused) 1952 { 1953 uint16_t n_elts = tail - txq->elts_tail; 1954 1955 MLX5_ASSERT(n_elts); 1956 MLX5_ASSERT(n_elts <= txq->elts_s); 1957 /* 1958 * Implement a loop to support ring buffer wraparound 1959 * with single inlining of mlx5_tx_free_mbuf(). 1960 */ 1961 do { 1962 unsigned int part; 1963 1964 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 1965 part = RTE_MIN(part, n_elts); 1966 MLX5_ASSERT(part); 1967 MLX5_ASSERT(part <= txq->elts_s); 1968 mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], 1969 part, olx); 1970 txq->elts_tail += part; 1971 n_elts -= part; 1972 } while (n_elts); 1973 } 1974 1975 /** 1976 * Store the mbuf being sent into elts ring buffer. 1977 * On Tx completion these mbufs will be freed. 1978 * 1979 * @param txq 1980 * Pointer to Tx queue structure. 1981 * @param pkts 1982 * Pointer to array of packets to be stored. 1983 * @param pkts_n 1984 * Number of packets to be stored. 1985 * @param olx 1986 * Configured Tx offloads mask. It is fully defined at 1987 * compile time and may be used for optimization. 1988 */ 1989 static __rte_always_inline void 1990 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 1991 struct rte_mbuf **__rte_restrict pkts, 1992 unsigned int pkts_n, 1993 unsigned int olx __rte_unused) 1994 { 1995 unsigned int part; 1996 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 1997 1998 MLX5_ASSERT(pkts); 1999 MLX5_ASSERT(pkts_n); 2000 part = txq->elts_s - (txq->elts_head & txq->elts_m); 2001 MLX5_ASSERT(part); 2002 MLX5_ASSERT(part <= txq->elts_s); 2003 /* This code is a good candidate for vectorizing with SIMD. */ 2004 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 2005 (void *)pkts, 2006 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 2007 txq->elts_head += pkts_n; 2008 if (unlikely(part < pkts_n)) 2009 /* The copy is wrapping around the elts array. */ 2010 rte_memcpy((void *)elts, (void *)(pkts + part), 2011 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2012 } 2013 2014 /** 2015 * Update completion queue consuming index via doorbell 2016 * and flush the completed data buffers. 2017 * 2018 * @param txq 2019 * Pointer to TX queue structure. 2020 * @param valid CQE pointer 2021 * if not NULL update txq->wqe_pi and flush the buffers 2022 * @param olx 2023 * Configured Tx offloads mask. It is fully defined at 2024 * compile time and may be used for optimization. 2025 */ 2026 static __rte_always_inline void 2027 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, 2028 volatile struct mlx5_cqe *last_cqe, 2029 unsigned int olx __rte_unused) 2030 { 2031 if (likely(last_cqe != NULL)) { 2032 uint16_t tail; 2033 2034 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 2035 tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; 2036 if (likely(tail != txq->elts_tail)) { 2037 mlx5_tx_free_elts(txq, tail, olx); 2038 MLX5_ASSERT(tail == txq->elts_tail); 2039 } 2040 } 2041 } 2042 2043 /** 2044 * Manage TX completions. This routine checks the CQ for 2045 * arrived CQEs, deduces the last accomplished WQE in SQ, 2046 * updates SQ producing index and frees all completed mbufs. 2047 * 2048 * @param txq 2049 * Pointer to TX queue structure. 2050 * @param olx 2051 * Configured Tx offloads mask. It is fully defined at 2052 * compile time and may be used for optimization. 2053 * 2054 * NOTE: not inlined intentionally, it makes tx_burst 2055 * routine smaller, simple and faster - from experiments. 2056 */ 2057 static void 2058 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 2059 unsigned int olx __rte_unused) 2060 { 2061 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2062 volatile struct mlx5_cqe *last_cqe = NULL; 2063 bool ring_doorbell = false; 2064 int ret; 2065 2066 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 2067 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 2068 do { 2069 volatile struct mlx5_cqe *cqe; 2070 2071 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2072 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2073 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2074 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2075 /* No new CQEs in completion queue. */ 2076 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 2077 break; 2078 } 2079 /* 2080 * Some error occurred, try to restart. 2081 * We have no barrier after WQE related Doorbell 2082 * written, make sure all writes are completed 2083 * here, before we might perform SQ reset. 2084 */ 2085 rte_wmb(); 2086 ret = mlx5_tx_error_cqe_handle 2087 (txq, (volatile struct mlx5_err_cqe *)cqe); 2088 if (unlikely(ret < 0)) { 2089 /* 2090 * Some error occurred on queue error 2091 * handling, we do not advance the index 2092 * here, allowing to retry on next call. 2093 */ 2094 return; 2095 } 2096 /* 2097 * We are going to fetch all entries with 2098 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. 2099 * The send queue is supposed to be empty. 2100 */ 2101 ring_doorbell = true; 2102 ++txq->cq_ci; 2103 txq->cq_pi = txq->cq_ci; 2104 last_cqe = NULL; 2105 continue; 2106 } 2107 /* Normal transmit completion. */ 2108 MLX5_ASSERT(txq->cq_ci != txq->cq_pi); 2109 MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == 2110 cqe->wqe_counter); 2111 ring_doorbell = true; 2112 ++txq->cq_ci; 2113 last_cqe = cqe; 2114 /* 2115 * We have to restrict the amount of processed CQEs 2116 * in one tx_burst routine call. The CQ may be large 2117 * and many CQEs may be updated by the NIC in one 2118 * transaction. Buffers freeing is time consuming, 2119 * multiple iterations may introduce significant 2120 * latency. 2121 */ 2122 if (likely(--count == 0)) 2123 break; 2124 } while (true); 2125 if (likely(ring_doorbell)) { 2126 /* Ring doorbell to notify hardware. */ 2127 rte_compiler_barrier(); 2128 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2129 mlx5_tx_comp_flush(txq, last_cqe, olx); 2130 } 2131 } 2132 2133 /** 2134 * Check if the completion request flag should be set in the last WQE. 2135 * Both pushed mbufs and WQEs are monitored and the completion request 2136 * flag is set if any of thresholds is reached. 2137 * 2138 * @param txq 2139 * Pointer to TX queue structure. 2140 * @param loc 2141 * Pointer to burst routine local context. 2142 * @param olx 2143 * Configured Tx offloads mask. It is fully defined at 2144 * compile time and may be used for optimization. 2145 */ 2146 static __rte_always_inline void 2147 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 2148 struct mlx5_txq_local *__rte_restrict loc, 2149 unsigned int olx) 2150 { 2151 uint16_t head = txq->elts_head; 2152 unsigned int part; 2153 2154 part = MLX5_TXOFF_CONFIG(INLINE) ? 2155 0 : loc->pkts_sent - loc->pkts_copy; 2156 head += part; 2157 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2158 (MLX5_TXOFF_CONFIG(INLINE) && 2159 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2160 volatile struct mlx5_wqe *last = loc->wqe_last; 2161 2162 MLX5_ASSERT(last); 2163 txq->elts_comp = head; 2164 if (MLX5_TXOFF_CONFIG(INLINE)) 2165 txq->wqe_comp = txq->wqe_ci; 2166 /* Request unconditional completion on last WQE. */ 2167 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2168 MLX5_COMP_MODE_OFFSET); 2169 /* Save elts_head in dedicated free on completion queue. */ 2170 #ifdef RTE_LIBRTE_MLX5_DEBUG 2171 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 2172 (last->cseg.opcode >> 8) << 16; 2173 #else 2174 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 2175 #endif 2176 /* A CQE slot must always be available. */ 2177 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 2178 } 2179 } 2180 2181 /** 2182 * DPDK callback to check the status of a tx descriptor. 2183 * 2184 * @param tx_queue 2185 * The tx queue. 2186 * @param[in] offset 2187 * The index of the descriptor in the ring. 2188 * 2189 * @return 2190 * The status of the tx descriptor. 2191 */ 2192 int 2193 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2194 { 2195 struct mlx5_txq_data *__rte_restrict txq = tx_queue; 2196 uint16_t used; 2197 2198 mlx5_tx_handle_completion(txq, 0); 2199 used = txq->elts_head - txq->elts_tail; 2200 if (offset < used) 2201 return RTE_ETH_TX_DESC_FULL; 2202 return RTE_ETH_TX_DESC_DONE; 2203 } 2204 2205 /** 2206 * Build the Control Segment with specified opcode: 2207 * - MLX5_OPCODE_SEND 2208 * - MLX5_OPCODE_ENHANCED_MPSW 2209 * - MLX5_OPCODE_TSO 2210 * 2211 * @param txq 2212 * Pointer to TX queue structure. 2213 * @param loc 2214 * Pointer to burst routine local context. 2215 * @param wqe 2216 * Pointer to WQE to fill with built Control Segment. 2217 * @param ds 2218 * Supposed length of WQE in segments. 2219 * @param opcode 2220 * SQ WQE opcode to put into Control Segment. 2221 * @param olx 2222 * Configured Tx offloads mask. It is fully defined at 2223 * compile time and may be used for optimization. 2224 */ 2225 static __rte_always_inline void 2226 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 2227 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2228 struct mlx5_wqe *__rte_restrict wqe, 2229 unsigned int ds, 2230 unsigned int opcode, 2231 unsigned int olx __rte_unused) 2232 { 2233 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 2234 2235 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2236 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2237 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2238 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2239 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2240 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2241 MLX5_COMP_MODE_OFFSET); 2242 cs->misc = RTE_BE32(0); 2243 } 2244 2245 /** 2246 * Build the Synchronize Queue Segment with specified completion index. 2247 * 2248 * @param txq 2249 * Pointer to TX queue structure. 2250 * @param loc 2251 * Pointer to burst routine local context. 2252 * @param wqe 2253 * Pointer to WQE to fill with built Control Segment. 2254 * @param wci 2255 * Completion index in Clock Queue to wait. 2256 * @param olx 2257 * Configured Tx offloads mask. It is fully defined at 2258 * compile time and may be used for optimization. 2259 */ 2260 static __rte_always_inline void 2261 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 2262 struct mlx5_txq_local *restrict loc __rte_unused, 2263 struct mlx5_wqe *restrict wqe, 2264 unsigned int wci, 2265 unsigned int olx __rte_unused) 2266 { 2267 struct mlx5_wqe_qseg *qs; 2268 2269 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 2270 qs->max_index = rte_cpu_to_be_32(wci); 2271 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq->id); 2272 qs->reserved0 = RTE_BE32(0); 2273 qs->reserved1 = RTE_BE32(0); 2274 } 2275 2276 /** 2277 * Build the Ethernet Segment without inlined data. 2278 * Supports Software Parser, Checksums and VLAN 2279 * insertion Tx offload features. 2280 * 2281 * @param txq 2282 * Pointer to TX queue structure. 2283 * @param loc 2284 * Pointer to burst routine local context. 2285 * @param wqe 2286 * Pointer to WQE to fill with built Ethernet Segment. 2287 * @param olx 2288 * Configured Tx offloads mask. It is fully defined at 2289 * compile time and may be used for optimization. 2290 */ 2291 static __rte_always_inline void 2292 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2293 struct mlx5_txq_local *__rte_restrict loc, 2294 struct mlx5_wqe *__rte_restrict wqe, 2295 unsigned int olx) 2296 { 2297 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2298 uint32_t csum; 2299 2300 /* 2301 * Calculate and set check sum flags first, dword field 2302 * in segment may be shared with Software Parser flags. 2303 */ 2304 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2305 es->flags = rte_cpu_to_le_32(csum); 2306 /* 2307 * Calculate and set Software Parser offsets and flags. 2308 * These flags a set for custom UDP and IP tunnel packets. 2309 */ 2310 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2311 /* Fill metadata field if needed. */ 2312 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2313 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2314 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2315 /* Engage VLAN tag insertion feature if requested. */ 2316 if (MLX5_TXOFF_CONFIG(VLAN) && 2317 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2318 /* 2319 * We should get here only if device support 2320 * this feature correctly. 2321 */ 2322 MLX5_ASSERT(txq->vlan_en); 2323 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2324 loc->mbuf->vlan_tci); 2325 } else { 2326 es->inline_hdr = RTE_BE32(0); 2327 } 2328 } 2329 2330 /** 2331 * Build the Ethernet Segment with minimal inlined data 2332 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2333 * used to fill the gap in single WQEBB WQEs. 2334 * Supports Software Parser, Checksums and VLAN 2335 * insertion Tx offload features. 2336 * 2337 * @param txq 2338 * Pointer to TX queue structure. 2339 * @param loc 2340 * Pointer to burst routine local context. 2341 * @param wqe 2342 * Pointer to WQE to fill with built Ethernet Segment. 2343 * @param vlan 2344 * Length of VLAN tag insertion if any. 2345 * @param olx 2346 * Configured Tx offloads mask. It is fully defined at 2347 * compile time and may be used for optimization. 2348 */ 2349 static __rte_always_inline void 2350 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2351 struct mlx5_txq_local *__rte_restrict loc, 2352 struct mlx5_wqe *__rte_restrict wqe, 2353 unsigned int vlan, 2354 unsigned int olx) 2355 { 2356 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2357 uint32_t csum; 2358 uint8_t *psrc, *pdst; 2359 2360 /* 2361 * Calculate and set check sum flags first, dword field 2362 * in segment may be shared with Software Parser flags. 2363 */ 2364 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2365 es->flags = rte_cpu_to_le_32(csum); 2366 /* 2367 * Calculate and set Software Parser offsets and flags. 2368 * These flags a set for custom UDP and IP tunnel packets. 2369 */ 2370 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2371 /* Fill metadata field if needed. */ 2372 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2373 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2374 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2375 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2376 (sizeof(uint16_t) + 2377 sizeof(rte_v128u32_t)), 2378 "invalid Ethernet Segment data size"); 2379 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2380 (sizeof(uint16_t) + 2381 sizeof(struct rte_vlan_hdr) + 2382 2 * RTE_ETHER_ADDR_LEN), 2383 "invalid Ethernet Segment data size"); 2384 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2385 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2386 es->inline_data = *(unaligned_uint16_t *)psrc; 2387 psrc += sizeof(uint16_t); 2388 pdst = (uint8_t *)(es + 1); 2389 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2390 /* Implement VLAN tag insertion as part inline data. */ 2391 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2392 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2393 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2394 /* Insert VLAN ethertype + VLAN tag. */ 2395 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2396 ((RTE_ETHER_TYPE_VLAN << 16) | 2397 loc->mbuf->vlan_tci); 2398 pdst += sizeof(struct rte_vlan_hdr); 2399 /* Copy the rest two bytes from packet data. */ 2400 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2401 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2402 } else { 2403 /* Fill the gap in the title WQEBB with inline data. */ 2404 rte_mov16(pdst, psrc); 2405 } 2406 } 2407 2408 /** 2409 * Build the Ethernet Segment with entire packet 2410 * data inlining. Checks the boundary of WQEBB and 2411 * ring buffer wrapping, supports Software Parser, 2412 * Checksums and VLAN insertion Tx offload features. 2413 * 2414 * @param txq 2415 * Pointer to TX queue structure. 2416 * @param loc 2417 * Pointer to burst routine local context. 2418 * @param wqe 2419 * Pointer to WQE to fill with built Ethernet Segment. 2420 * @param vlan 2421 * Length of VLAN tag insertion if any. 2422 * @param inlen 2423 * Length of data to inline (VLAN included, if any). 2424 * @param tso 2425 * TSO flag, set mss field from the packet. 2426 * @param olx 2427 * Configured Tx offloads mask. It is fully defined at 2428 * compile time and may be used for optimization. 2429 * 2430 * @return 2431 * Pointer to the next Data Segment (aligned and wrapped around). 2432 */ 2433 static __rte_always_inline struct mlx5_wqe_dseg * 2434 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 2435 struct mlx5_txq_local *__rte_restrict loc, 2436 struct mlx5_wqe *__rte_restrict wqe, 2437 unsigned int vlan, 2438 unsigned int inlen, 2439 unsigned int tso, 2440 unsigned int olx) 2441 { 2442 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2443 uint32_t csum; 2444 uint8_t *psrc, *pdst; 2445 unsigned int part; 2446 2447 /* 2448 * Calculate and set check sum flags first, dword field 2449 * in segment may be shared with Software Parser flags. 2450 */ 2451 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2452 if (tso) { 2453 csum <<= 24; 2454 csum |= loc->mbuf->tso_segsz; 2455 es->flags = rte_cpu_to_be_32(csum); 2456 } else { 2457 es->flags = rte_cpu_to_le_32(csum); 2458 } 2459 /* 2460 * Calculate and set Software Parser offsets and flags. 2461 * These flags a set for custom UDP and IP tunnel packets. 2462 */ 2463 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2464 /* Fill metadata field if needed. */ 2465 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2466 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2467 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2468 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2469 (sizeof(uint16_t) + 2470 sizeof(rte_v128u32_t)), 2471 "invalid Ethernet Segment data size"); 2472 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2473 (sizeof(uint16_t) + 2474 sizeof(struct rte_vlan_hdr) + 2475 2 * RTE_ETHER_ADDR_LEN), 2476 "invalid Ethernet Segment data size"); 2477 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2478 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2479 es->inline_data = *(unaligned_uint16_t *)psrc; 2480 psrc += sizeof(uint16_t); 2481 pdst = (uint8_t *)(es + 1); 2482 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2483 /* Implement VLAN tag insertion as part inline data. */ 2484 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2485 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2486 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2487 /* Insert VLAN ethertype + VLAN tag. */ 2488 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2489 ((RTE_ETHER_TYPE_VLAN << 16) | 2490 loc->mbuf->vlan_tci); 2491 pdst += sizeof(struct rte_vlan_hdr); 2492 /* Copy the rest two bytes from packet data. */ 2493 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2494 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2495 psrc += sizeof(uint16_t); 2496 } else { 2497 /* Fill the gap in the title WQEBB with inline data. */ 2498 rte_mov16(pdst, psrc); 2499 psrc += sizeof(rte_v128u32_t); 2500 } 2501 pdst = (uint8_t *)(es + 2); 2502 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2503 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2504 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2505 if (!inlen) { 2506 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2507 return (struct mlx5_wqe_dseg *)pdst; 2508 } 2509 /* 2510 * The WQEBB space availability is checked by caller. 2511 * Here we should be aware of WQE ring buffer wraparound only. 2512 */ 2513 part = (uint8_t *)txq->wqes_end - pdst; 2514 part = RTE_MIN(part, inlen); 2515 do { 2516 rte_memcpy(pdst, psrc, part); 2517 inlen -= part; 2518 if (likely(!inlen)) { 2519 /* 2520 * If return value is not used by the caller 2521 * the code below will be optimized out. 2522 */ 2523 pdst += part; 2524 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2525 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2526 pdst = (uint8_t *)txq->wqes; 2527 return (struct mlx5_wqe_dseg *)pdst; 2528 } 2529 pdst = (uint8_t *)txq->wqes; 2530 psrc += part; 2531 part = inlen; 2532 } while (true); 2533 } 2534 2535 /** 2536 * Copy data from chain of mbuf to the specified linear buffer. 2537 * Checksums and VLAN insertion Tx offload features. If data 2538 * from some mbuf copied completely this mbuf is freed. Local 2539 * structure is used to keep the byte stream state. 2540 * 2541 * @param pdst 2542 * Pointer to the destination linear buffer. 2543 * @param loc 2544 * Pointer to burst routine local context. 2545 * @param len 2546 * Length of data to be copied. 2547 * @param must 2548 * Length of data to be copied ignoring no inline hint. 2549 * @param olx 2550 * Configured Tx offloads mask. It is fully defined at 2551 * compile time and may be used for optimization. 2552 * 2553 * @return 2554 * Number of actual copied data bytes. This is always greater than or 2555 * equal to must parameter and might be lesser than len in no inline 2556 * hint flag is encountered. 2557 */ 2558 static __rte_always_inline unsigned int 2559 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2560 struct mlx5_txq_local *__rte_restrict loc, 2561 unsigned int len, 2562 unsigned int must, 2563 unsigned int olx __rte_unused) 2564 { 2565 struct rte_mbuf *mbuf; 2566 unsigned int part, dlen, copy = 0; 2567 uint8_t *psrc; 2568 2569 MLX5_ASSERT(len); 2570 MLX5_ASSERT(must <= len); 2571 do { 2572 /* Allow zero length packets, must check first. */ 2573 dlen = rte_pktmbuf_data_len(loc->mbuf); 2574 if (dlen <= loc->mbuf_off) { 2575 /* Exhausted packet, just free. */ 2576 mbuf = loc->mbuf; 2577 loc->mbuf = mbuf->next; 2578 rte_pktmbuf_free_seg(mbuf); 2579 loc->mbuf_off = 0; 2580 MLX5_ASSERT(loc->mbuf_nseg > 1); 2581 MLX5_ASSERT(loc->mbuf); 2582 --loc->mbuf_nseg; 2583 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2584 unsigned int diff; 2585 2586 if (copy >= must) { 2587 /* 2588 * We already copied the minimal 2589 * requested amount of data. 2590 */ 2591 return copy; 2592 } 2593 diff = must - copy; 2594 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 2595 /* 2596 * Copy only the minimal required 2597 * part of the data buffer. 2598 */ 2599 len = diff; 2600 } 2601 } 2602 continue; 2603 } 2604 dlen -= loc->mbuf_off; 2605 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2606 loc->mbuf_off); 2607 part = RTE_MIN(len, dlen); 2608 rte_memcpy(pdst, psrc, part); 2609 copy += part; 2610 loc->mbuf_off += part; 2611 len -= part; 2612 if (!len) { 2613 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2614 loc->mbuf_off = 0; 2615 /* Exhausted packet, just free. */ 2616 mbuf = loc->mbuf; 2617 loc->mbuf = mbuf->next; 2618 rte_pktmbuf_free_seg(mbuf); 2619 loc->mbuf_off = 0; 2620 MLX5_ASSERT(loc->mbuf_nseg >= 1); 2621 --loc->mbuf_nseg; 2622 } 2623 return copy; 2624 } 2625 pdst += part; 2626 } while (true); 2627 } 2628 2629 /** 2630 * Build the Ethernet Segment with inlined data from 2631 * multi-segment packet. Checks the boundary of WQEBB 2632 * and ring buffer wrapping, supports Software Parser, 2633 * Checksums and VLAN insertion Tx offload features. 2634 * 2635 * @param txq 2636 * Pointer to TX queue structure. 2637 * @param loc 2638 * Pointer to burst routine local context. 2639 * @param wqe 2640 * Pointer to WQE to fill with built Ethernet Segment. 2641 * @param vlan 2642 * Length of VLAN tag insertion if any. 2643 * @param inlen 2644 * Length of data to inline (VLAN included, if any). 2645 * @param tso 2646 * TSO flag, set mss field from the packet. 2647 * @param olx 2648 * Configured Tx offloads mask. It is fully defined at 2649 * compile time and may be used for optimization. 2650 * 2651 * @return 2652 * Pointer to the next Data Segment (aligned and 2653 * possible NOT wrapped around - caller should do 2654 * wrapping check on its own). 2655 */ 2656 static __rte_always_inline struct mlx5_wqe_dseg * 2657 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 2658 struct mlx5_txq_local *__rte_restrict loc, 2659 struct mlx5_wqe *__rte_restrict wqe, 2660 unsigned int vlan, 2661 unsigned int inlen, 2662 unsigned int tso, 2663 unsigned int olx) 2664 { 2665 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2666 uint32_t csum; 2667 uint8_t *pdst; 2668 unsigned int part, tlen = 0; 2669 2670 /* 2671 * Calculate and set check sum flags first, uint32_t field 2672 * in segment may be shared with Software Parser flags. 2673 */ 2674 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2675 if (tso) { 2676 csum <<= 24; 2677 csum |= loc->mbuf->tso_segsz; 2678 es->flags = rte_cpu_to_be_32(csum); 2679 } else { 2680 es->flags = rte_cpu_to_le_32(csum); 2681 } 2682 /* 2683 * Calculate and set Software Parser offsets and flags. 2684 * These flags a set for custom UDP and IP tunnel packets. 2685 */ 2686 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2687 /* Fill metadata field if needed. */ 2688 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2689 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2690 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2691 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2692 (sizeof(uint16_t) + 2693 sizeof(rte_v128u32_t)), 2694 "invalid Ethernet Segment data size"); 2695 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2696 (sizeof(uint16_t) + 2697 sizeof(struct rte_vlan_hdr) + 2698 2 * RTE_ETHER_ADDR_LEN), 2699 "invalid Ethernet Segment data size"); 2700 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2701 pdst = (uint8_t *)&es->inline_data; 2702 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2703 /* Implement VLAN tag insertion as part inline data. */ 2704 mlx5_tx_mseg_memcpy(pdst, loc, 2705 2 * RTE_ETHER_ADDR_LEN, 2706 2 * RTE_ETHER_ADDR_LEN, olx); 2707 pdst += 2 * RTE_ETHER_ADDR_LEN; 2708 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2709 ((RTE_ETHER_TYPE_VLAN << 16) | 2710 loc->mbuf->vlan_tci); 2711 pdst += sizeof(struct rte_vlan_hdr); 2712 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2713 } 2714 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2715 /* 2716 * The WQEBB space availability is checked by caller. 2717 * Here we should be aware of WQE ring buffer wraparound only. 2718 */ 2719 part = (uint8_t *)txq->wqes_end - pdst; 2720 part = RTE_MIN(part, inlen - tlen); 2721 MLX5_ASSERT(part); 2722 do { 2723 unsigned int copy; 2724 2725 /* 2726 * Copying may be interrupted inside the routine 2727 * if run into no inline hint flag. 2728 */ 2729 copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); 2730 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 2731 tlen += copy; 2732 if (likely(inlen <= tlen) || copy < part) { 2733 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 2734 pdst += copy; 2735 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2736 return (struct mlx5_wqe_dseg *)pdst; 2737 } 2738 pdst = (uint8_t *)txq->wqes; 2739 part = inlen - tlen; 2740 } while (true); 2741 } 2742 2743 /** 2744 * Build the Data Segment of pointer type. 2745 * 2746 * @param txq 2747 * Pointer to TX queue structure. 2748 * @param loc 2749 * Pointer to burst routine local context. 2750 * @param dseg 2751 * Pointer to WQE to fill with built Data Segment. 2752 * @param buf 2753 * Data buffer to point. 2754 * @param len 2755 * Data buffer length. 2756 * @param olx 2757 * Configured Tx offloads mask. It is fully defined at 2758 * compile time and may be used for optimization. 2759 */ 2760 static __rte_always_inline void 2761 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 2762 struct mlx5_txq_local *__rte_restrict loc, 2763 struct mlx5_wqe_dseg *__rte_restrict dseg, 2764 uint8_t *buf, 2765 unsigned int len, 2766 unsigned int olx __rte_unused) 2767 2768 { 2769 MLX5_ASSERT(len); 2770 dseg->bcount = rte_cpu_to_be_32(len); 2771 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2772 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2773 } 2774 2775 /** 2776 * Build the Data Segment of pointer type or inline 2777 * if data length is less than buffer in minimal 2778 * Data Segment size. 2779 * 2780 * @param txq 2781 * Pointer to TX queue structure. 2782 * @param loc 2783 * Pointer to burst routine local context. 2784 * @param dseg 2785 * Pointer to WQE to fill with built Data Segment. 2786 * @param buf 2787 * Data buffer to point. 2788 * @param len 2789 * Data buffer length. 2790 * @param olx 2791 * Configured Tx offloads mask. It is fully defined at 2792 * compile time and may be used for optimization. 2793 */ 2794 static __rte_always_inline void 2795 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 2796 struct mlx5_txq_local *__rte_restrict loc, 2797 struct mlx5_wqe_dseg *__rte_restrict dseg, 2798 uint8_t *buf, 2799 unsigned int len, 2800 unsigned int olx __rte_unused) 2801 2802 { 2803 uintptr_t dst, src; 2804 2805 MLX5_ASSERT(len); 2806 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2807 dseg->bcount = rte_cpu_to_be_32(len); 2808 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2809 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2810 2811 return; 2812 } 2813 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2814 /* Unrolled implementation of generic rte_memcpy. */ 2815 dst = (uintptr_t)&dseg->inline_data[0]; 2816 src = (uintptr_t)buf; 2817 if (len & 0x08) { 2818 #ifdef RTE_ARCH_STRICT_ALIGN 2819 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2820 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2821 dst += sizeof(uint32_t); 2822 src += sizeof(uint32_t); 2823 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2824 dst += sizeof(uint32_t); 2825 src += sizeof(uint32_t); 2826 #else 2827 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2828 dst += sizeof(uint64_t); 2829 src += sizeof(uint64_t); 2830 #endif 2831 } 2832 if (len & 0x04) { 2833 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2834 dst += sizeof(uint32_t); 2835 src += sizeof(uint32_t); 2836 } 2837 if (len & 0x02) { 2838 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2839 dst += sizeof(uint16_t); 2840 src += sizeof(uint16_t); 2841 } 2842 if (len & 0x01) 2843 *(uint8_t *)dst = *(uint8_t *)src; 2844 } 2845 2846 /** 2847 * Build the Data Segment of inlined data from single 2848 * segment packet, no VLAN insertion. 2849 * 2850 * @param txq 2851 * Pointer to TX queue structure. 2852 * @param loc 2853 * Pointer to burst routine local context. 2854 * @param dseg 2855 * Pointer to WQE to fill with built Data Segment. 2856 * @param buf 2857 * Data buffer to point. 2858 * @param len 2859 * Data buffer length. 2860 * @param olx 2861 * Configured Tx offloads mask. It is fully defined at 2862 * compile time and may be used for optimization. 2863 * 2864 * @return 2865 * Pointer to the next Data Segment after inlined data. 2866 * Ring buffer wraparound check is needed. We do not 2867 * do it here because it may not be needed for the 2868 * last packet in the eMPW session. 2869 */ 2870 static __rte_always_inline struct mlx5_wqe_dseg * 2871 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 2872 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2873 struct mlx5_wqe_dseg *__rte_restrict dseg, 2874 uint8_t *buf, 2875 unsigned int len, 2876 unsigned int olx __rte_unused) 2877 { 2878 unsigned int part; 2879 uint8_t *pdst; 2880 2881 if (!MLX5_TXOFF_CONFIG(MPW)) { 2882 /* Store the descriptor byte counter for eMPW sessions. */ 2883 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2884 pdst = &dseg->inline_data[0]; 2885 } else { 2886 /* The entire legacy MPW session counter is stored on close. */ 2887 pdst = (uint8_t *)dseg; 2888 } 2889 /* 2890 * The WQEBB space availability is checked by caller. 2891 * Here we should be aware of WQE ring buffer wraparound only. 2892 */ 2893 part = (uint8_t *)txq->wqes_end - pdst; 2894 part = RTE_MIN(part, len); 2895 do { 2896 rte_memcpy(pdst, buf, part); 2897 len -= part; 2898 if (likely(!len)) { 2899 pdst += part; 2900 if (!MLX5_TXOFF_CONFIG(MPW)) 2901 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2902 /* Note: no final wraparound check here. */ 2903 return (struct mlx5_wqe_dseg *)pdst; 2904 } 2905 pdst = (uint8_t *)txq->wqes; 2906 buf += part; 2907 part = len; 2908 } while (true); 2909 } 2910 2911 /** 2912 * Build the Data Segment of inlined data from single 2913 * segment packet with VLAN insertion. 2914 * 2915 * @param txq 2916 * Pointer to TX queue structure. 2917 * @param loc 2918 * Pointer to burst routine local context. 2919 * @param dseg 2920 * Pointer to the dseg fill with built Data Segment. 2921 * @param buf 2922 * Data buffer to point. 2923 * @param len 2924 * Data buffer length. 2925 * @param olx 2926 * Configured Tx offloads mask. It is fully defined at 2927 * compile time and may be used for optimization. 2928 * 2929 * @return 2930 * Pointer to the next Data Segment after inlined data. 2931 * Ring buffer wraparound check is needed. 2932 */ 2933 static __rte_always_inline struct mlx5_wqe_dseg * 2934 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 2935 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2936 struct mlx5_wqe_dseg *__rte_restrict dseg, 2937 uint8_t *buf, 2938 unsigned int len, 2939 unsigned int olx __rte_unused) 2940 2941 { 2942 unsigned int part; 2943 uint8_t *pdst; 2944 2945 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 2946 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 2947 (2 * RTE_ETHER_ADDR_LEN), 2948 "invalid Data Segment data size"); 2949 if (!MLX5_TXOFF_CONFIG(MPW)) { 2950 /* Store the descriptor byte counter for eMPW sessions. */ 2951 dseg->bcount = rte_cpu_to_be_32 2952 ((len + sizeof(struct rte_vlan_hdr)) | 2953 MLX5_ETH_WQE_DATA_INLINE); 2954 pdst = &dseg->inline_data[0]; 2955 } else { 2956 /* The entire legacy MPW session counter is stored on close. */ 2957 pdst = (uint8_t *)dseg; 2958 } 2959 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 2960 buf += MLX5_DSEG_MIN_INLINE_SIZE; 2961 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 2962 len -= MLX5_DSEG_MIN_INLINE_SIZE; 2963 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 2964 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2965 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2966 pdst = (uint8_t *)txq->wqes; 2967 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 2968 loc->mbuf->vlan_tci); 2969 pdst += sizeof(struct rte_vlan_hdr); 2970 /* 2971 * The WQEBB space availability is checked by caller. 2972 * Here we should be aware of WQE ring buffer wraparound only. 2973 */ 2974 part = (uint8_t *)txq->wqes_end - pdst; 2975 part = RTE_MIN(part, len); 2976 do { 2977 rte_memcpy(pdst, buf, part); 2978 len -= part; 2979 if (likely(!len)) { 2980 pdst += part; 2981 if (!MLX5_TXOFF_CONFIG(MPW)) 2982 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2983 /* Note: no final wraparound check here. */ 2984 return (struct mlx5_wqe_dseg *)pdst; 2985 } 2986 pdst = (uint8_t *)txq->wqes; 2987 buf += part; 2988 part = len; 2989 } while (true); 2990 } 2991 2992 /** 2993 * Build the Ethernet Segment with optionally inlined data with 2994 * VLAN insertion and following Data Segments (if any) from 2995 * multi-segment packet. Used by ordinary send and TSO. 2996 * 2997 * @param txq 2998 * Pointer to TX queue structure. 2999 * @param loc 3000 * Pointer to burst routine local context. 3001 * @param wqe 3002 * Pointer to WQE to fill with built Ethernet/Data Segments. 3003 * @param vlan 3004 * Length of VLAN header to insert, 0 means no VLAN insertion. 3005 * @param inlen 3006 * Data length to inline. For TSO this parameter specifies 3007 * exact value, for ordinary send routine can be aligned by 3008 * caller to provide better WQE space saving and data buffer 3009 * start address alignment. This length includes VLAN header 3010 * being inserted. 3011 * @param tso 3012 * Zero means ordinary send, inlined data can be extended, 3013 * otherwise this is TSO, inlined data length is fixed. 3014 * @param olx 3015 * Configured Tx offloads mask. It is fully defined at 3016 * compile time and may be used for optimization. 3017 * 3018 * @return 3019 * Actual size of built WQE in segments. 3020 */ 3021 static __rte_always_inline unsigned int 3022 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 3023 struct mlx5_txq_local *__rte_restrict loc, 3024 struct mlx5_wqe *__rte_restrict wqe, 3025 unsigned int vlan, 3026 unsigned int inlen, 3027 unsigned int tso, 3028 unsigned int olx __rte_unused) 3029 { 3030 struct mlx5_wqe_dseg *__rte_restrict dseg; 3031 unsigned int ds; 3032 3033 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 3034 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 3035 loc->mbuf_off = 0; 3036 3037 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 3038 if (!loc->mbuf_nseg) 3039 goto dseg_done; 3040 /* 3041 * There are still some mbuf remaining, not inlined. 3042 * The first mbuf may be partially inlined and we 3043 * must process the possible non-zero data offset. 3044 */ 3045 if (loc->mbuf_off) { 3046 unsigned int dlen; 3047 uint8_t *dptr; 3048 3049 /* 3050 * Exhausted packets must be dropped before. 3051 * Non-zero offset means there are some data 3052 * remained in the packet. 3053 */ 3054 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 3055 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 3056 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 3057 loc->mbuf_off); 3058 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 3059 /* 3060 * Build the pointer/minimal data Data Segment. 3061 * Do ring buffer wrapping check in advance. 3062 */ 3063 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3064 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3065 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 3066 /* Store the mbuf to be freed on completion. */ 3067 MLX5_ASSERT(loc->elts_free); 3068 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3069 --loc->elts_free; 3070 ++dseg; 3071 if (--loc->mbuf_nseg == 0) 3072 goto dseg_done; 3073 loc->mbuf = loc->mbuf->next; 3074 loc->mbuf_off = 0; 3075 } 3076 do { 3077 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3078 struct rte_mbuf *mbuf; 3079 3080 /* Zero length segment found, just skip. */ 3081 mbuf = loc->mbuf; 3082 loc->mbuf = loc->mbuf->next; 3083 rte_pktmbuf_free_seg(mbuf); 3084 if (--loc->mbuf_nseg == 0) 3085 break; 3086 } else { 3087 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3088 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3089 mlx5_tx_dseg_iptr 3090 (txq, loc, dseg, 3091 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3092 rte_pktmbuf_data_len(loc->mbuf), olx); 3093 MLX5_ASSERT(loc->elts_free); 3094 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3095 --loc->elts_free; 3096 ++dseg; 3097 if (--loc->mbuf_nseg == 0) 3098 break; 3099 loc->mbuf = loc->mbuf->next; 3100 } 3101 } while (true); 3102 3103 dseg_done: 3104 /* Calculate actual segments used from the dseg pointer. */ 3105 if ((uintptr_t)wqe < (uintptr_t)dseg) 3106 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3107 else 3108 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3109 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3110 return ds; 3111 } 3112 3113 /** 3114 * The routine checks timestamp flag in the current packet, 3115 * and push WAIT WQE into the queue if scheduling is required. 3116 * 3117 * @param txq 3118 * Pointer to TX queue structure. 3119 * @param loc 3120 * Pointer to burst routine local context. 3121 * @param olx 3122 * Configured Tx offloads mask. It is fully defined at 3123 * compile time and may be used for optimization. 3124 * 3125 * @return 3126 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3127 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 3128 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 3129 * Local context variables partially updated. 3130 */ 3131 static __rte_always_inline enum mlx5_txcmp_code 3132 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 3133 struct mlx5_txq_local *restrict loc, 3134 unsigned int olx) 3135 { 3136 if (MLX5_TXOFF_CONFIG(TXPP) && 3137 loc->mbuf->ol_flags & txq->ts_mask) { 3138 struct mlx5_wqe *wqe; 3139 uint64_t ts; 3140 int32_t wci; 3141 3142 /* 3143 * Estimate the required space quickly and roughly. 3144 * We would like to ensure the packet can be pushed 3145 * to the queue and we won't get the orphan WAIT WQE. 3146 */ 3147 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 3148 loc->elts_free < NB_SEGS(loc->mbuf)) 3149 return MLX5_TXCMP_CODE_EXIT; 3150 /* Convert the timestamp into completion to wait. */ 3151 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 3152 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 3153 if (unlikely(wci < 0)) 3154 return MLX5_TXCMP_CODE_SINGLE; 3155 /* Build the WAIT WQE with specified completion. */ 3156 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3157 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 3158 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 3159 ++txq->wqe_ci; 3160 --loc->wqe_free; 3161 return MLX5_TXCMP_CODE_MULTI; 3162 } 3163 return MLX5_TXCMP_CODE_SINGLE; 3164 } 3165 3166 /** 3167 * Tx one packet function for multi-segment TSO. Supports all 3168 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3169 * sends one packet per WQE. 3170 * 3171 * This routine is responsible for storing processed mbuf 3172 * into elts ring buffer and update elts_head. 3173 * 3174 * @param txq 3175 * Pointer to TX queue structure. 3176 * @param loc 3177 * Pointer to burst routine local context. 3178 * @param olx 3179 * Configured Tx offloads mask. It is fully defined at 3180 * compile time and may be used for optimization. 3181 * 3182 * @return 3183 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3184 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3185 * Local context variables partially updated. 3186 */ 3187 static __rte_always_inline enum mlx5_txcmp_code 3188 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 3189 struct mlx5_txq_local *__rte_restrict loc, 3190 unsigned int olx) 3191 { 3192 struct mlx5_wqe *__rte_restrict wqe; 3193 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3194 3195 if (MLX5_TXOFF_CONFIG(TXPP)) { 3196 enum mlx5_txcmp_code wret; 3197 3198 /* Generate WAIT for scheduling if requested. */ 3199 wret = mlx5_tx_schedule_send(txq, loc, olx); 3200 if (wret == MLX5_TXCMP_CODE_EXIT) 3201 return MLX5_TXCMP_CODE_EXIT; 3202 if (wret == MLX5_TXCMP_CODE_ERROR) 3203 return MLX5_TXCMP_CODE_ERROR; 3204 } 3205 /* 3206 * Calculate data length to be inlined to estimate 3207 * the required space in WQE ring buffer. 3208 */ 3209 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3210 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3211 vlan = sizeof(struct rte_vlan_hdr); 3212 inlen = loc->mbuf->l2_len + vlan + 3213 loc->mbuf->l3_len + loc->mbuf->l4_len; 3214 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3215 return MLX5_TXCMP_CODE_ERROR; 3216 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3217 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3218 /* Packet must contain all TSO headers. */ 3219 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3220 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3221 inlen > (dlen + vlan))) 3222 return MLX5_TXCMP_CODE_ERROR; 3223 MLX5_ASSERT(inlen >= txq->inlen_mode); 3224 /* 3225 * Check whether there are enough free WQEBBs: 3226 * - Control Segment 3227 * - Ethernet Segment 3228 * - First Segment of inlined Ethernet data 3229 * - ... data continued ... 3230 * - Data Segments of pointer/min inline type 3231 */ 3232 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3233 MLX5_ESEG_MIN_INLINE_SIZE + 3234 MLX5_WSEG_SIZE + 3235 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3236 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3237 return MLX5_TXCMP_CODE_EXIT; 3238 /* Check for maximal WQE size. */ 3239 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3240 return MLX5_TXCMP_CODE_ERROR; 3241 #ifdef MLX5_PMD_SOFT_COUNTERS 3242 /* Update sent data bytes/packets counters. */ 3243 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3244 loc->mbuf->tso_segsz; 3245 /* 3246 * One will be added for mbuf itself 3247 * at the end of the mlx5_tx_burst from 3248 * loc->pkts_sent field. 3249 */ 3250 --ntcp; 3251 txq->stats.opackets += ntcp; 3252 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3253 #endif 3254 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3255 loc->wqe_last = wqe; 3256 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3257 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3258 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3259 txq->wqe_ci += (ds + 3) / 4; 3260 loc->wqe_free -= (ds + 3) / 4; 3261 return MLX5_TXCMP_CODE_MULTI; 3262 } 3263 3264 /** 3265 * Tx one packet function for multi-segment SEND. Supports all 3266 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3267 * sends one packet per WQE, without any data inlining in 3268 * Ethernet Segment. 3269 * 3270 * This routine is responsible for storing processed mbuf 3271 * into elts ring buffer and update elts_head. 3272 * 3273 * @param txq 3274 * Pointer to TX queue structure. 3275 * @param loc 3276 * Pointer to burst routine local context. 3277 * @param olx 3278 * Configured Tx offloads mask. It is fully defined at 3279 * compile time and may be used for optimization. 3280 * 3281 * @return 3282 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3283 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3284 * Local context variables partially updated. 3285 */ 3286 static __rte_always_inline enum mlx5_txcmp_code 3287 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 3288 struct mlx5_txq_local *__rte_restrict loc, 3289 unsigned int olx) 3290 { 3291 struct mlx5_wqe_dseg *__rte_restrict dseg; 3292 struct mlx5_wqe *__rte_restrict wqe; 3293 unsigned int ds, nseg; 3294 3295 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3296 if (MLX5_TXOFF_CONFIG(TXPP)) { 3297 enum mlx5_txcmp_code wret; 3298 3299 /* Generate WAIT for scheduling if requested. */ 3300 wret = mlx5_tx_schedule_send(txq, loc, olx); 3301 if (wret == MLX5_TXCMP_CODE_EXIT) 3302 return MLX5_TXCMP_CODE_EXIT; 3303 if (wret == MLX5_TXCMP_CODE_ERROR) 3304 return MLX5_TXCMP_CODE_ERROR; 3305 } 3306 /* 3307 * No inline at all, it means the CPU cycles saving 3308 * is prioritized at configuration, we should not 3309 * copy any packet data to WQE. 3310 */ 3311 nseg = NB_SEGS(loc->mbuf); 3312 ds = 2 + nseg; 3313 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3314 return MLX5_TXCMP_CODE_EXIT; 3315 /* Check for maximal WQE size. */ 3316 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3317 return MLX5_TXCMP_CODE_ERROR; 3318 /* 3319 * Some Tx offloads may cause an error if 3320 * packet is not long enough, check against 3321 * assumed minimal length. 3322 */ 3323 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3324 return MLX5_TXCMP_CODE_ERROR; 3325 #ifdef MLX5_PMD_SOFT_COUNTERS 3326 /* Update sent data bytes counter. */ 3327 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3328 if (MLX5_TXOFF_CONFIG(VLAN) && 3329 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3330 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3331 #endif 3332 /* 3333 * SEND WQE, one WQEBB: 3334 * - Control Segment, SEND opcode 3335 * - Ethernet Segment, optional VLAN, no inline 3336 * - Data Segments, pointer only type 3337 */ 3338 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3339 loc->wqe_last = wqe; 3340 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3341 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3342 dseg = &wqe->dseg[0]; 3343 do { 3344 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3345 struct rte_mbuf *mbuf; 3346 3347 /* 3348 * Zero length segment found, have to 3349 * correct total size of WQE in segments. 3350 * It is supposed to be rare occasion, so 3351 * in normal case (no zero length segments) 3352 * we avoid extra writing to the Control 3353 * Segment. 3354 */ 3355 --ds; 3356 wqe->cseg.sq_ds -= RTE_BE32(1); 3357 mbuf = loc->mbuf; 3358 loc->mbuf = mbuf->next; 3359 rte_pktmbuf_free_seg(mbuf); 3360 if (--nseg == 0) 3361 break; 3362 } else { 3363 mlx5_tx_dseg_ptr 3364 (txq, loc, dseg, 3365 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3366 rte_pktmbuf_data_len(loc->mbuf), olx); 3367 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3368 --loc->elts_free; 3369 if (--nseg == 0) 3370 break; 3371 ++dseg; 3372 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3373 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3374 loc->mbuf = loc->mbuf->next; 3375 } 3376 } while (true); 3377 txq->wqe_ci += (ds + 3) / 4; 3378 loc->wqe_free -= (ds + 3) / 4; 3379 return MLX5_TXCMP_CODE_MULTI; 3380 } 3381 3382 /** 3383 * Tx one packet function for multi-segment SEND. Supports all 3384 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3385 * sends one packet per WQE, with data inlining in 3386 * Ethernet Segment and minimal Data Segments. 3387 * 3388 * This routine is responsible for storing processed mbuf 3389 * into elts ring buffer and update elts_head. 3390 * 3391 * @param txq 3392 * Pointer to TX queue structure. 3393 * @param loc 3394 * Pointer to burst routine local context. 3395 * @param olx 3396 * Configured Tx offloads mask. It is fully defined at 3397 * compile time and may be used for optimization. 3398 * 3399 * @return 3400 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3401 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3402 * Local context variables partially updated. 3403 */ 3404 static __rte_always_inline enum mlx5_txcmp_code 3405 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 3406 struct mlx5_txq_local *__rte_restrict loc, 3407 unsigned int olx) 3408 { 3409 struct mlx5_wqe *__rte_restrict wqe; 3410 unsigned int ds, inlen, dlen, vlan = 0; 3411 3412 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3413 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3414 if (MLX5_TXOFF_CONFIG(TXPP)) { 3415 enum mlx5_txcmp_code wret; 3416 3417 /* Generate WAIT for scheduling if requested. */ 3418 wret = mlx5_tx_schedule_send(txq, loc, olx); 3419 if (wret == MLX5_TXCMP_CODE_EXIT) 3420 return MLX5_TXCMP_CODE_EXIT; 3421 if (wret == MLX5_TXCMP_CODE_ERROR) 3422 return MLX5_TXCMP_CODE_ERROR; 3423 } 3424 /* 3425 * First calculate data length to be inlined 3426 * to estimate the required space for WQE. 3427 */ 3428 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3429 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3430 vlan = sizeof(struct rte_vlan_hdr); 3431 inlen = dlen + vlan; 3432 /* Check against minimal length. */ 3433 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3434 return MLX5_TXCMP_CODE_ERROR; 3435 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3436 if (inlen > txq->inlen_send || 3437 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 3438 struct rte_mbuf *mbuf; 3439 unsigned int nxlen; 3440 uintptr_t start; 3441 3442 /* 3443 * Packet length exceeds the allowed inline 3444 * data length, check whether the minimal 3445 * inlining is required. 3446 */ 3447 if (txq->inlen_mode) { 3448 MLX5_ASSERT(txq->inlen_mode >= 3449 MLX5_ESEG_MIN_INLINE_SIZE); 3450 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 3451 inlen = txq->inlen_mode; 3452 } else { 3453 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 3454 !vlan || txq->vlan_en) { 3455 /* 3456 * VLAN insertion will be done inside by HW. 3457 * It is not utmost effective - VLAN flag is 3458 * checked twice, but we should proceed the 3459 * inlining length correctly and take into 3460 * account the VLAN header being inserted. 3461 */ 3462 return mlx5_tx_packet_multi_send 3463 (txq, loc, olx); 3464 } 3465 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3466 } 3467 /* 3468 * Now we know the minimal amount of data is requested 3469 * to inline. Check whether we should inline the buffers 3470 * from the chain beginning to eliminate some mbufs. 3471 */ 3472 mbuf = loc->mbuf; 3473 nxlen = rte_pktmbuf_data_len(mbuf); 3474 if (unlikely(nxlen <= txq->inlen_send)) { 3475 /* We can inline first mbuf at least. */ 3476 if (nxlen < inlen) { 3477 unsigned int smlen; 3478 3479 /* Scan mbufs till inlen filled. */ 3480 do { 3481 smlen = nxlen; 3482 mbuf = NEXT(mbuf); 3483 MLX5_ASSERT(mbuf); 3484 nxlen = rte_pktmbuf_data_len(mbuf); 3485 nxlen += smlen; 3486 } while (unlikely(nxlen < inlen)); 3487 if (unlikely(nxlen > txq->inlen_send)) { 3488 /* We cannot inline entire mbuf. */ 3489 smlen = inlen - smlen; 3490 start = rte_pktmbuf_mtod_offset 3491 (mbuf, uintptr_t, smlen); 3492 goto do_align; 3493 } 3494 } 3495 do { 3496 inlen = nxlen; 3497 mbuf = NEXT(mbuf); 3498 /* There should be not end of packet. */ 3499 MLX5_ASSERT(mbuf); 3500 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3501 } while (unlikely(nxlen < txq->inlen_send)); 3502 } 3503 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3504 /* 3505 * Check whether we can do inline to align start 3506 * address of data buffer to cacheline. 3507 */ 3508 do_align: 3509 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3510 if (unlikely(start)) { 3511 start += inlen; 3512 if (start <= txq->inlen_send) 3513 inlen = start; 3514 } 3515 } 3516 /* 3517 * Check whether there are enough free WQEBBs: 3518 * - Control Segment 3519 * - Ethernet Segment 3520 * - First Segment of inlined Ethernet data 3521 * - ... data continued ... 3522 * - Data Segments of pointer/min inline type 3523 * 3524 * Estimate the number of Data Segments conservatively, 3525 * supposing no any mbufs is being freed during inlining. 3526 */ 3527 MLX5_ASSERT(inlen <= txq->inlen_send); 3528 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3529 MLX5_ESEG_MIN_INLINE_SIZE + 3530 MLX5_WSEG_SIZE + 3531 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3532 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3533 return MLX5_TXCMP_CODE_EXIT; 3534 /* Check for maximal WQE size. */ 3535 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3536 return MLX5_TXCMP_CODE_ERROR; 3537 #ifdef MLX5_PMD_SOFT_COUNTERS 3538 /* Update sent data bytes/packets counters. */ 3539 txq->stats.obytes += dlen + vlan; 3540 #endif 3541 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3542 loc->wqe_last = wqe; 3543 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3544 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3545 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3546 txq->wqe_ci += (ds + 3) / 4; 3547 loc->wqe_free -= (ds + 3) / 4; 3548 return MLX5_TXCMP_CODE_MULTI; 3549 } 3550 3551 /** 3552 * Tx burst function for multi-segment packets. Supports all 3553 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3554 * sends one packet per WQE. Function stops sending if it 3555 * encounters the single-segment packet. 3556 * 3557 * This routine is responsible for storing processed mbuf 3558 * into elts ring buffer and update elts_head. 3559 * 3560 * @param txq 3561 * Pointer to TX queue structure. 3562 * @param[in] pkts 3563 * Packets to transmit. 3564 * @param pkts_n 3565 * Number of packets in array. 3566 * @param loc 3567 * Pointer to burst routine local context. 3568 * @param olx 3569 * Configured Tx offloads mask. It is fully defined at 3570 * compile time and may be used for optimization. 3571 * 3572 * @return 3573 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3574 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3575 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3576 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3577 * Local context variables updated. 3578 */ 3579 static __rte_always_inline enum mlx5_txcmp_code 3580 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 3581 struct rte_mbuf **__rte_restrict pkts, 3582 unsigned int pkts_n, 3583 struct mlx5_txq_local *__rte_restrict loc, 3584 unsigned int olx) 3585 { 3586 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3587 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3588 pkts += loc->pkts_sent + 1; 3589 pkts_n -= loc->pkts_sent; 3590 for (;;) { 3591 enum mlx5_txcmp_code ret; 3592 3593 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3594 /* 3595 * Estimate the number of free elts quickly but 3596 * conservatively. Some segment may be fully inlined 3597 * and freed, ignore this here - precise estimation 3598 * is costly. 3599 */ 3600 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3601 return MLX5_TXCMP_CODE_EXIT; 3602 if (MLX5_TXOFF_CONFIG(TSO) && 3603 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3604 /* Proceed with multi-segment TSO. */ 3605 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3606 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3607 /* Proceed with multi-segment SEND with inlining. */ 3608 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3609 } else { 3610 /* Proceed with multi-segment SEND w/o inlining. */ 3611 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3612 } 3613 if (ret == MLX5_TXCMP_CODE_EXIT) 3614 return MLX5_TXCMP_CODE_EXIT; 3615 if (ret == MLX5_TXCMP_CODE_ERROR) 3616 return MLX5_TXCMP_CODE_ERROR; 3617 /* WQE is built, go to the next packet. */ 3618 ++loc->pkts_sent; 3619 --pkts_n; 3620 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3621 return MLX5_TXCMP_CODE_EXIT; 3622 loc->mbuf = *pkts++; 3623 if (pkts_n > 1) 3624 rte_prefetch0(*pkts); 3625 if (likely(NB_SEGS(loc->mbuf) > 1)) 3626 continue; 3627 /* Here ends the series of multi-segment packets. */ 3628 if (MLX5_TXOFF_CONFIG(TSO) && 3629 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3630 return MLX5_TXCMP_CODE_TSO; 3631 return MLX5_TXCMP_CODE_SINGLE; 3632 } 3633 MLX5_ASSERT(false); 3634 } 3635 3636 /** 3637 * Tx burst function for single-segment packets with TSO. 3638 * Supports all types of Tx offloads, except multi-packets. 3639 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3640 * Function stops sending if it encounters the multi-segment 3641 * packet or packet without TSO requested. 3642 * 3643 * The routine is responsible for storing processed mbuf 3644 * into elts ring buffer and update elts_head if inline 3645 * offloads is requested due to possible early freeing 3646 * of the inlined mbufs (can not store pkts array in elts 3647 * as a batch). 3648 * 3649 * @param txq 3650 * Pointer to TX queue structure. 3651 * @param[in] pkts 3652 * Packets to transmit. 3653 * @param pkts_n 3654 * Number of packets in array. 3655 * @param loc 3656 * Pointer to burst routine local context. 3657 * @param olx 3658 * Configured Tx offloads mask. It is fully defined at 3659 * compile time and may be used for optimization. 3660 * 3661 * @return 3662 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3663 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3664 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3665 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3666 * Local context variables updated. 3667 */ 3668 static __rte_always_inline enum mlx5_txcmp_code 3669 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 3670 struct rte_mbuf **__rte_restrict pkts, 3671 unsigned int pkts_n, 3672 struct mlx5_txq_local *__rte_restrict loc, 3673 unsigned int olx) 3674 { 3675 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3676 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3677 pkts += loc->pkts_sent + 1; 3678 pkts_n -= loc->pkts_sent; 3679 for (;;) { 3680 struct mlx5_wqe_dseg *__rte_restrict dseg; 3681 struct mlx5_wqe *__rte_restrict wqe; 3682 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3683 uint8_t *dptr; 3684 3685 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3686 if (MLX5_TXOFF_CONFIG(TXPP)) { 3687 enum mlx5_txcmp_code wret; 3688 3689 /* Generate WAIT for scheduling if requested. */ 3690 wret = mlx5_tx_schedule_send(txq, loc, olx); 3691 if (wret == MLX5_TXCMP_CODE_EXIT) 3692 return MLX5_TXCMP_CODE_EXIT; 3693 if (wret == MLX5_TXCMP_CODE_ERROR) 3694 return MLX5_TXCMP_CODE_ERROR; 3695 } 3696 dlen = rte_pktmbuf_data_len(loc->mbuf); 3697 if (MLX5_TXOFF_CONFIG(VLAN) && 3698 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3699 vlan = sizeof(struct rte_vlan_hdr); 3700 } 3701 /* 3702 * First calculate the WQE size to check 3703 * whether we have enough space in ring buffer. 3704 */ 3705 hlen = loc->mbuf->l2_len + vlan + 3706 loc->mbuf->l3_len + loc->mbuf->l4_len; 3707 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3708 return MLX5_TXCMP_CODE_ERROR; 3709 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3710 hlen += loc->mbuf->outer_l2_len + 3711 loc->mbuf->outer_l3_len; 3712 /* Segment must contain all TSO headers. */ 3713 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3714 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3715 hlen > (dlen + vlan))) 3716 return MLX5_TXCMP_CODE_ERROR; 3717 /* 3718 * Check whether there are enough free WQEBBs: 3719 * - Control Segment 3720 * - Ethernet Segment 3721 * - First Segment of inlined Ethernet data 3722 * - ... data continued ... 3723 * - Finishing Data Segment of pointer type 3724 */ 3725 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3726 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3727 if (loc->wqe_free < ((ds + 3) / 4)) 3728 return MLX5_TXCMP_CODE_EXIT; 3729 #ifdef MLX5_PMD_SOFT_COUNTERS 3730 /* Update sent data bytes/packets counters. */ 3731 ntcp = (dlen + vlan - hlen + 3732 loc->mbuf->tso_segsz - 1) / 3733 loc->mbuf->tso_segsz; 3734 /* 3735 * One will be added for mbuf itself at the end 3736 * of the mlx5_tx_burst from loc->pkts_sent field. 3737 */ 3738 --ntcp; 3739 txq->stats.opackets += ntcp; 3740 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3741 #endif 3742 /* 3743 * Build the TSO WQE: 3744 * - Control Segment 3745 * - Ethernet Segment with hlen bytes inlined 3746 * - Data Segment of pointer type 3747 */ 3748 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3749 loc->wqe_last = wqe; 3750 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3751 MLX5_OPCODE_TSO, olx); 3752 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3753 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3754 dlen -= hlen - vlan; 3755 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3756 /* 3757 * WQE is built, update the loop parameters 3758 * and go to the next packet. 3759 */ 3760 txq->wqe_ci += (ds + 3) / 4; 3761 loc->wqe_free -= (ds + 3) / 4; 3762 if (MLX5_TXOFF_CONFIG(INLINE)) 3763 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3764 --loc->elts_free; 3765 ++loc->pkts_sent; 3766 --pkts_n; 3767 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3768 return MLX5_TXCMP_CODE_EXIT; 3769 loc->mbuf = *pkts++; 3770 if (pkts_n > 1) 3771 rte_prefetch0(*pkts); 3772 if (MLX5_TXOFF_CONFIG(MULTI) && 3773 unlikely(NB_SEGS(loc->mbuf) > 1)) 3774 return MLX5_TXCMP_CODE_MULTI; 3775 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3776 return MLX5_TXCMP_CODE_SINGLE; 3777 /* Continue with the next TSO packet. */ 3778 } 3779 MLX5_ASSERT(false); 3780 } 3781 3782 /** 3783 * Analyze the packet and select the best method to send. 3784 * 3785 * @param txq 3786 * Pointer to TX queue structure. 3787 * @param loc 3788 * Pointer to burst routine local context. 3789 * @param olx 3790 * Configured Tx offloads mask. It is fully defined at 3791 * compile time and may be used for optimization. 3792 * @param newp 3793 * The predefined flag whether do complete check for 3794 * multi-segment packets and TSO. 3795 * 3796 * @return 3797 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3798 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3799 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3800 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3801 */ 3802 static __rte_always_inline enum mlx5_txcmp_code 3803 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 3804 struct mlx5_txq_local *__rte_restrict loc, 3805 unsigned int olx, 3806 bool newp) 3807 { 3808 /* Check for multi-segment packet. */ 3809 if (newp && 3810 MLX5_TXOFF_CONFIG(MULTI) && 3811 unlikely(NB_SEGS(loc->mbuf) > 1)) 3812 return MLX5_TXCMP_CODE_MULTI; 3813 /* Check for TSO packet. */ 3814 if (newp && 3815 MLX5_TXOFF_CONFIG(TSO) && 3816 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3817 return MLX5_TXCMP_CODE_TSO; 3818 /* Check if eMPW is enabled at all. */ 3819 if (!MLX5_TXOFF_CONFIG(EMPW)) 3820 return MLX5_TXCMP_CODE_SINGLE; 3821 /* Check if eMPW can be engaged. */ 3822 if (MLX5_TXOFF_CONFIG(VLAN) && 3823 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3824 (!MLX5_TXOFF_CONFIG(INLINE) || 3825 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3826 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3827 /* 3828 * eMPW does not support VLAN insertion offload, 3829 * we have to inline the entire packet but 3830 * packet is too long for inlining. 3831 */ 3832 return MLX5_TXCMP_CODE_SINGLE; 3833 } 3834 return MLX5_TXCMP_CODE_EMPW; 3835 } 3836 3837 /** 3838 * Check the next packet attributes to match with the eMPW batch ones. 3839 * In addition, for legacy MPW the packet length is checked either. 3840 * 3841 * @param txq 3842 * Pointer to TX queue structure. 3843 * @param es 3844 * Pointer to Ethernet Segment of eMPW batch. 3845 * @param loc 3846 * Pointer to burst routine local context. 3847 * @param dlen 3848 * Length of previous packet in MPW descriptor. 3849 * @param olx 3850 * Configured Tx offloads mask. It is fully defined at 3851 * compile time and may be used for optimization. 3852 * 3853 * @return 3854 * true - packet match with eMPW batch attributes. 3855 * false - no match, eMPW should be restarted. 3856 */ 3857 static __rte_always_inline bool 3858 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 3859 struct mlx5_wqe_eseg *__rte_restrict es, 3860 struct mlx5_txq_local *__rte_restrict loc, 3861 uint32_t dlen, 3862 unsigned int olx) 3863 { 3864 uint8_t swp_flags = 0; 3865 3866 /* Compare the checksum flags, if any. */ 3867 if (MLX5_TXOFF_CONFIG(CSUM) && 3868 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 3869 return false; 3870 /* Compare the Software Parser offsets and flags. */ 3871 if (MLX5_TXOFF_CONFIG(SWP) && 3872 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 3873 es->swp_flags != swp_flags)) 3874 return false; 3875 /* Fill metadata field if needed. */ 3876 if (MLX5_TXOFF_CONFIG(METADATA) && 3877 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 3878 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 3879 return false; 3880 /* Legacy MPW can send packets with the same lengt only. */ 3881 if (MLX5_TXOFF_CONFIG(MPW) && 3882 dlen != rte_pktmbuf_data_len(loc->mbuf)) 3883 return false; 3884 /* There must be no VLAN packets in eMPW loop. */ 3885 if (MLX5_TXOFF_CONFIG(VLAN)) 3886 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 3887 /* Check if the scheduling is requested. */ 3888 if (MLX5_TXOFF_CONFIG(TXPP) && 3889 loc->mbuf->ol_flags & txq->ts_mask) 3890 return false; 3891 return true; 3892 } 3893 3894 /* 3895 * Update send loop variables and WQE for eMPW loop 3896 * without data inlining. Number of Data Segments is 3897 * equal to the number of sent packets. 3898 * 3899 * @param txq 3900 * Pointer to TX queue structure. 3901 * @param loc 3902 * Pointer to burst routine local context. 3903 * @param ds 3904 * Number of packets/Data Segments/Packets. 3905 * @param slen 3906 * Accumulated statistics, bytes sent 3907 * @param olx 3908 * Configured Tx offloads mask. It is fully defined at 3909 * compile time and may be used for optimization. 3910 * 3911 * @return 3912 * true - packet match with eMPW batch attributes. 3913 * false - no match, eMPW should be restarted. 3914 */ 3915 static __rte_always_inline void 3916 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 3917 struct mlx5_txq_local *__rte_restrict loc, 3918 unsigned int ds, 3919 unsigned int slen, 3920 unsigned int olx __rte_unused) 3921 { 3922 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3923 #ifdef MLX5_PMD_SOFT_COUNTERS 3924 /* Update sent data bytes counter. */ 3925 txq->stats.obytes += slen; 3926 #else 3927 (void)slen; 3928 #endif 3929 loc->elts_free -= ds; 3930 loc->pkts_sent += ds; 3931 ds += 2; 3932 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3933 txq->wqe_ci += (ds + 3) / 4; 3934 loc->wqe_free -= (ds + 3) / 4; 3935 } 3936 3937 /* 3938 * Update send loop variables and WQE for eMPW loop 3939 * with data inlining. Gets the size of pushed descriptors 3940 * and data to the WQE. 3941 * 3942 * @param txq 3943 * Pointer to TX queue structure. 3944 * @param loc 3945 * Pointer to burst routine local context. 3946 * @param len 3947 * Total size of descriptor/data in bytes. 3948 * @param slen 3949 * Accumulated statistics, data bytes sent. 3950 * @param wqem 3951 * The base WQE for the eMPW/MPW descriptor. 3952 * @param olx 3953 * Configured Tx offloads mask. It is fully defined at 3954 * compile time and may be used for optimization. 3955 * 3956 * @return 3957 * true - packet match with eMPW batch attributes. 3958 * false - no match, eMPW should be restarted. 3959 */ 3960 static __rte_always_inline void 3961 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 3962 struct mlx5_txq_local *__rte_restrict loc, 3963 unsigned int len, 3964 unsigned int slen, 3965 struct mlx5_wqe *__rte_restrict wqem, 3966 unsigned int olx __rte_unused) 3967 { 3968 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 3969 3970 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3971 #ifdef MLX5_PMD_SOFT_COUNTERS 3972 /* Update sent data bytes counter. */ 3973 txq->stats.obytes += slen; 3974 #else 3975 (void)slen; 3976 #endif 3977 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 3978 /* 3979 * If the legacy MPW session contains the inline packets 3980 * we should set the only inline data segment length 3981 * and align the total length to the segment size. 3982 */ 3983 MLX5_ASSERT(len > sizeof(dseg->bcount)); 3984 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 3985 MLX5_ETH_WQE_DATA_INLINE); 3986 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 3987 } else { 3988 /* 3989 * The session is not legacy MPW or contains the 3990 * data buffer pointer segments. 3991 */ 3992 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 3993 len = len / MLX5_WSEG_SIZE + 2; 3994 } 3995 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 3996 txq->wqe_ci += (len + 3) / 4; 3997 loc->wqe_free -= (len + 3) / 4; 3998 loc->wqe_last = wqem; 3999 } 4000 4001 /** 4002 * The set of Tx burst functions for single-segment packets 4003 * without TSO and with Multi-Packet Writing feature support. 4004 * Supports all types of Tx offloads, except multi-packets 4005 * and TSO. 4006 * 4007 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 4008 * as many packet per WQE as it can. If eMPW is not configured 4009 * or packet can not be sent with eMPW (VLAN insertion) the 4010 * ordinary SEND opcode is used and only one packet placed 4011 * in WQE. 4012 * 4013 * Functions stop sending if it encounters the multi-segment 4014 * packet or packet with TSO requested. 4015 * 4016 * The routines are responsible for storing processed mbuf 4017 * into elts ring buffer and update elts_head if inlining 4018 * offload is requested. Otherwise the copying mbufs to elts 4019 * can be postponed and completed at the end of burst routine. 4020 * 4021 * @param txq 4022 * Pointer to TX queue structure. 4023 * @param[in] pkts 4024 * Packets to transmit. 4025 * @param pkts_n 4026 * Number of packets in array. 4027 * @param loc 4028 * Pointer to burst routine local context. 4029 * @param olx 4030 * Configured Tx offloads mask. It is fully defined at 4031 * compile time and may be used for optimization. 4032 * 4033 * @return 4034 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 4035 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 4036 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 4037 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 4038 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 4039 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 4040 * 4041 * Local context variables updated. 4042 * 4043 * 4044 * The routine sends packets with MLX5_OPCODE_EMPW 4045 * without inlining, this is dedicated optimized branch. 4046 * No VLAN insertion is supported. 4047 */ 4048 static __rte_always_inline enum mlx5_txcmp_code 4049 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 4050 struct rte_mbuf **__rte_restrict pkts, 4051 unsigned int pkts_n, 4052 struct mlx5_txq_local *__rte_restrict loc, 4053 unsigned int olx) 4054 { 4055 /* 4056 * Subroutine is the part of mlx5_tx_burst_single() 4057 * and sends single-segment packet with eMPW opcode 4058 * without data inlining. 4059 */ 4060 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4061 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4062 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4063 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4064 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4065 pkts += loc->pkts_sent + 1; 4066 pkts_n -= loc->pkts_sent; 4067 for (;;) { 4068 struct mlx5_wqe_dseg *__rte_restrict dseg; 4069 struct mlx5_wqe_eseg *__rte_restrict eseg; 4070 enum mlx5_txcmp_code ret; 4071 unsigned int part, loop; 4072 unsigned int slen = 0; 4073 4074 next_empw: 4075 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4076 if (MLX5_TXOFF_CONFIG(TXPP)) { 4077 enum mlx5_txcmp_code wret; 4078 4079 /* Generate WAIT for scheduling if requested. */ 4080 wret = mlx5_tx_schedule_send(txq, loc, olx); 4081 if (wret == MLX5_TXCMP_CODE_EXIT) 4082 return MLX5_TXCMP_CODE_EXIT; 4083 if (wret == MLX5_TXCMP_CODE_ERROR) 4084 return MLX5_TXCMP_CODE_ERROR; 4085 } 4086 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4087 MLX5_MPW_MAX_PACKETS : 4088 MLX5_EMPW_MAX_PACKETS); 4089 if (unlikely(loc->elts_free < part)) { 4090 /* We have no enough elts to save all mbufs. */ 4091 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 4092 return MLX5_TXCMP_CODE_EXIT; 4093 /* But we still able to send at least minimal eMPW. */ 4094 part = loc->elts_free; 4095 } 4096 /* Check whether we have enough WQEs */ 4097 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 4098 if (unlikely(loc->wqe_free < 4099 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4100 return MLX5_TXCMP_CODE_EXIT; 4101 part = (loc->wqe_free * 4) - 2; 4102 } 4103 if (likely(part > 1)) 4104 rte_prefetch0(*pkts); 4105 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4106 /* 4107 * Build eMPW title WQEBB: 4108 * - Control Segment, eMPW opcode 4109 * - Ethernet Segment, no inline 4110 */ 4111 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 4112 MLX5_OPCODE_ENHANCED_MPSW, olx); 4113 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4114 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4115 eseg = &loc->wqe_last->eseg; 4116 dseg = &loc->wqe_last->dseg[0]; 4117 loop = part; 4118 /* Store the packet length for legacy MPW. */ 4119 if (MLX5_TXOFF_CONFIG(MPW)) 4120 eseg->mss = rte_cpu_to_be_16 4121 (rte_pktmbuf_data_len(loc->mbuf)); 4122 for (;;) { 4123 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4124 #ifdef MLX5_PMD_SOFT_COUNTERS 4125 /* Update sent data bytes counter. */ 4126 slen += dlen; 4127 #endif 4128 mlx5_tx_dseg_ptr 4129 (txq, loc, dseg, 4130 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4131 dlen, olx); 4132 if (unlikely(--loop == 0)) 4133 break; 4134 loc->mbuf = *pkts++; 4135 if (likely(loop > 1)) 4136 rte_prefetch0(*pkts); 4137 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4138 /* 4139 * Unroll the completion code to avoid 4140 * returning variable value - it results in 4141 * unoptimized sequent checking in caller. 4142 */ 4143 if (ret == MLX5_TXCMP_CODE_MULTI) { 4144 part -= loop; 4145 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4146 if (unlikely(!loc->elts_free || 4147 !loc->wqe_free)) 4148 return MLX5_TXCMP_CODE_EXIT; 4149 return MLX5_TXCMP_CODE_MULTI; 4150 } 4151 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4152 if (ret == MLX5_TXCMP_CODE_TSO) { 4153 part -= loop; 4154 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4155 if (unlikely(!loc->elts_free || 4156 !loc->wqe_free)) 4157 return MLX5_TXCMP_CODE_EXIT; 4158 return MLX5_TXCMP_CODE_TSO; 4159 } 4160 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4161 part -= loop; 4162 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4163 if (unlikely(!loc->elts_free || 4164 !loc->wqe_free)) 4165 return MLX5_TXCMP_CODE_EXIT; 4166 return MLX5_TXCMP_CODE_SINGLE; 4167 } 4168 if (ret != MLX5_TXCMP_CODE_EMPW) { 4169 MLX5_ASSERT(false); 4170 part -= loop; 4171 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4172 return MLX5_TXCMP_CODE_ERROR; 4173 } 4174 /* 4175 * Check whether packet parameters coincide 4176 * within assumed eMPW batch: 4177 * - check sum settings 4178 * - metadata value 4179 * - software parser settings 4180 * - packets length (legacy MPW only) 4181 * - scheduling is not required 4182 */ 4183 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 4184 MLX5_ASSERT(loop); 4185 part -= loop; 4186 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4187 if (unlikely(!loc->elts_free || 4188 !loc->wqe_free)) 4189 return MLX5_TXCMP_CODE_EXIT; 4190 pkts_n -= part; 4191 goto next_empw; 4192 } 4193 /* Packet attributes match, continue the same eMPW. */ 4194 ++dseg; 4195 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4196 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4197 } 4198 /* eMPW is built successfully, update loop parameters. */ 4199 MLX5_ASSERT(!loop); 4200 MLX5_ASSERT(pkts_n >= part); 4201 #ifdef MLX5_PMD_SOFT_COUNTERS 4202 /* Update sent data bytes counter. */ 4203 txq->stats.obytes += slen; 4204 #endif 4205 loc->elts_free -= part; 4206 loc->pkts_sent += part; 4207 txq->wqe_ci += (2 + part + 3) / 4; 4208 loc->wqe_free -= (2 + part + 3) / 4; 4209 pkts_n -= part; 4210 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4211 return MLX5_TXCMP_CODE_EXIT; 4212 loc->mbuf = *pkts++; 4213 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4214 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4215 return ret; 4216 /* Continue sending eMPW batches. */ 4217 } 4218 MLX5_ASSERT(false); 4219 } 4220 4221 /** 4222 * The routine sends packets with MLX5_OPCODE_EMPW 4223 * with inlining, optionally supports VLAN insertion. 4224 */ 4225 static __rte_always_inline enum mlx5_txcmp_code 4226 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 4227 struct rte_mbuf **__rte_restrict pkts, 4228 unsigned int pkts_n, 4229 struct mlx5_txq_local *__rte_restrict loc, 4230 unsigned int olx) 4231 { 4232 /* 4233 * Subroutine is the part of mlx5_tx_burst_single() 4234 * and sends single-segment packet with eMPW opcode 4235 * with data inlining. 4236 */ 4237 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4238 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4239 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4240 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4241 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4242 pkts += loc->pkts_sent + 1; 4243 pkts_n -= loc->pkts_sent; 4244 for (;;) { 4245 struct mlx5_wqe_dseg *__rte_restrict dseg; 4246 struct mlx5_wqe *__rte_restrict wqem; 4247 enum mlx5_txcmp_code ret; 4248 unsigned int room, part, nlim; 4249 unsigned int slen = 0; 4250 4251 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4252 if (MLX5_TXOFF_CONFIG(TXPP)) { 4253 enum mlx5_txcmp_code wret; 4254 4255 /* Generate WAIT for scheduling if requested. */ 4256 wret = mlx5_tx_schedule_send(txq, loc, olx); 4257 if (wret == MLX5_TXCMP_CODE_EXIT) 4258 return MLX5_TXCMP_CODE_EXIT; 4259 if (wret == MLX5_TXCMP_CODE_ERROR) 4260 return MLX5_TXCMP_CODE_ERROR; 4261 } 4262 /* 4263 * Limits the amount of packets in one WQE 4264 * to improve CQE latency generation. 4265 */ 4266 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4267 MLX5_MPW_INLINE_MAX_PACKETS : 4268 MLX5_EMPW_MAX_PACKETS); 4269 /* Check whether we have minimal amount WQEs */ 4270 if (unlikely(loc->wqe_free < 4271 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4272 return MLX5_TXCMP_CODE_EXIT; 4273 if (likely(pkts_n > 1)) 4274 rte_prefetch0(*pkts); 4275 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4276 /* 4277 * Build eMPW title WQEBB: 4278 * - Control Segment, eMPW opcode, zero DS 4279 * - Ethernet Segment, no inline 4280 */ 4281 mlx5_tx_cseg_init(txq, loc, wqem, 0, 4282 MLX5_OPCODE_ENHANCED_MPSW, olx); 4283 mlx5_tx_eseg_none(txq, loc, wqem, 4284 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4285 dseg = &wqem->dseg[0]; 4286 /* Store the packet length for legacy MPW. */ 4287 if (MLX5_TXOFF_CONFIG(MPW)) 4288 wqem->eseg.mss = rte_cpu_to_be_16 4289 (rte_pktmbuf_data_len(loc->mbuf)); 4290 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4291 loc->wqe_free) * MLX5_WQE_SIZE - 4292 MLX5_WQE_CSEG_SIZE - 4293 MLX5_WQE_ESEG_SIZE; 4294 /* Limit the room for legacy MPW sessions for performance. */ 4295 if (MLX5_TXOFF_CONFIG(MPW)) 4296 room = RTE_MIN(room, 4297 RTE_MAX(txq->inlen_empw + 4298 sizeof(dseg->bcount) + 4299 (MLX5_TXOFF_CONFIG(VLAN) ? 4300 sizeof(struct rte_vlan_hdr) : 0), 4301 MLX5_MPW_INLINE_MAX_PACKETS * 4302 MLX5_WQE_DSEG_SIZE)); 4303 /* Build WQE till we have space, packets and resources. */ 4304 part = room; 4305 for (;;) { 4306 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4307 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4308 unsigned int tlen; 4309 4310 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4311 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 4312 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4313 /* 4314 * Some Tx offloads may cause an error if 4315 * packet is not long enough, check against 4316 * assumed minimal length. 4317 */ 4318 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4319 part -= room; 4320 if (unlikely(!part)) 4321 return MLX5_TXCMP_CODE_ERROR; 4322 /* 4323 * We have some successfully built 4324 * packet Data Segments to send. 4325 */ 4326 mlx5_tx_idone_empw(txq, loc, part, 4327 slen, wqem, olx); 4328 return MLX5_TXCMP_CODE_ERROR; 4329 } 4330 /* Inline or not inline - that's the Question. */ 4331 if (dlen > txq->inlen_empw || 4332 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 4333 goto pointer_empw; 4334 if (MLX5_TXOFF_CONFIG(MPW)) { 4335 if (dlen > txq->inlen_send) 4336 goto pointer_empw; 4337 tlen = dlen; 4338 if (part == room) { 4339 /* Open new inline MPW session. */ 4340 tlen += sizeof(dseg->bcount); 4341 dseg->bcount = RTE_BE32(0); 4342 dseg = RTE_PTR_ADD 4343 (dseg, sizeof(dseg->bcount)); 4344 } else { 4345 /* 4346 * No pointer and inline descriptor 4347 * intermix for legacy MPW sessions. 4348 */ 4349 if (wqem->dseg[0].bcount) 4350 break; 4351 } 4352 } else { 4353 tlen = sizeof(dseg->bcount) + dlen; 4354 } 4355 /* Inline entire packet, optional VLAN insertion. */ 4356 if (MLX5_TXOFF_CONFIG(VLAN) && 4357 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4358 /* 4359 * The packet length must be checked in 4360 * mlx5_tx_able_to_empw() and packet 4361 * fits into inline length guaranteed. 4362 */ 4363 MLX5_ASSERT((dlen + 4364 sizeof(struct rte_vlan_hdr)) <= 4365 txq->inlen_empw); 4366 tlen += sizeof(struct rte_vlan_hdr); 4367 if (room < tlen) 4368 break; 4369 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4370 dptr, dlen, olx); 4371 #ifdef MLX5_PMD_SOFT_COUNTERS 4372 /* Update sent data bytes counter. */ 4373 slen += sizeof(struct rte_vlan_hdr); 4374 #endif 4375 } else { 4376 if (room < tlen) 4377 break; 4378 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4379 dptr, dlen, olx); 4380 } 4381 if (!MLX5_TXOFF_CONFIG(MPW)) 4382 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4383 MLX5_ASSERT(room >= tlen); 4384 room -= tlen; 4385 /* 4386 * Packet data are completely inlined, 4387 * free the packet immediately. 4388 */ 4389 rte_pktmbuf_free_seg(loc->mbuf); 4390 goto next_mbuf; 4391 pointer_empw: 4392 /* 4393 * No pointer and inline descriptor 4394 * intermix for legacy MPW sessions. 4395 */ 4396 if (MLX5_TXOFF_CONFIG(MPW) && 4397 part != room && 4398 wqem->dseg[0].bcount == RTE_BE32(0)) 4399 break; 4400 /* 4401 * Not inlinable VLAN packets are 4402 * proceeded outside of this routine. 4403 */ 4404 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4405 if (MLX5_TXOFF_CONFIG(VLAN)) 4406 MLX5_ASSERT(!(loc->mbuf->ol_flags & 4407 PKT_TX_VLAN_PKT)); 4408 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4409 /* We have to store mbuf in elts.*/ 4410 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4411 room -= MLX5_WQE_DSEG_SIZE; 4412 /* Ring buffer wraparound is checked at the loop end.*/ 4413 ++dseg; 4414 next_mbuf: 4415 #ifdef MLX5_PMD_SOFT_COUNTERS 4416 /* Update sent data bytes counter. */ 4417 slen += dlen; 4418 #endif 4419 loc->pkts_sent++; 4420 loc->elts_free--; 4421 pkts_n--; 4422 if (unlikely(!pkts_n || !loc->elts_free)) { 4423 /* 4424 * We have no resources/packets to 4425 * continue build descriptors. 4426 */ 4427 part -= room; 4428 mlx5_tx_idone_empw(txq, loc, part, 4429 slen, wqem, olx); 4430 return MLX5_TXCMP_CODE_EXIT; 4431 } 4432 loc->mbuf = *pkts++; 4433 if (likely(pkts_n > 1)) 4434 rte_prefetch0(*pkts); 4435 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4436 /* 4437 * Unroll the completion code to avoid 4438 * returning variable value - it results in 4439 * unoptimized sequent checking in caller. 4440 */ 4441 if (ret == MLX5_TXCMP_CODE_MULTI) { 4442 part -= room; 4443 mlx5_tx_idone_empw(txq, loc, part, 4444 slen, wqem, olx); 4445 if (unlikely(!loc->elts_free || 4446 !loc->wqe_free)) 4447 return MLX5_TXCMP_CODE_EXIT; 4448 return MLX5_TXCMP_CODE_MULTI; 4449 } 4450 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4451 if (ret == MLX5_TXCMP_CODE_TSO) { 4452 part -= room; 4453 mlx5_tx_idone_empw(txq, loc, part, 4454 slen, wqem, olx); 4455 if (unlikely(!loc->elts_free || 4456 !loc->wqe_free)) 4457 return MLX5_TXCMP_CODE_EXIT; 4458 return MLX5_TXCMP_CODE_TSO; 4459 } 4460 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4461 part -= room; 4462 mlx5_tx_idone_empw(txq, loc, part, 4463 slen, wqem, olx); 4464 if (unlikely(!loc->elts_free || 4465 !loc->wqe_free)) 4466 return MLX5_TXCMP_CODE_EXIT; 4467 return MLX5_TXCMP_CODE_SINGLE; 4468 } 4469 if (ret != MLX5_TXCMP_CODE_EMPW) { 4470 MLX5_ASSERT(false); 4471 part -= room; 4472 mlx5_tx_idone_empw(txq, loc, part, 4473 slen, wqem, olx); 4474 return MLX5_TXCMP_CODE_ERROR; 4475 } 4476 /* Check if we have minimal room left. */ 4477 nlim--; 4478 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4479 break; 4480 /* 4481 * Check whether packet parameters coincide 4482 * within assumed eMPW batch: 4483 * - check sum settings 4484 * - metadata value 4485 * - software parser settings 4486 * - packets length (legacy MPW only) 4487 * - scheduling is not required 4488 */ 4489 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 4490 loc, dlen, olx)) 4491 break; 4492 /* Packet attributes match, continue the same eMPW. */ 4493 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4494 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4495 } 4496 /* 4497 * We get here to close an existing eMPW 4498 * session and start the new one. 4499 */ 4500 MLX5_ASSERT(pkts_n); 4501 part -= room; 4502 if (unlikely(!part)) 4503 return MLX5_TXCMP_CODE_EXIT; 4504 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 4505 if (unlikely(!loc->elts_free || 4506 !loc->wqe_free)) 4507 return MLX5_TXCMP_CODE_EXIT; 4508 /* Continue the loop with new eMPW session. */ 4509 } 4510 MLX5_ASSERT(false); 4511 } 4512 4513 /** 4514 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4515 * Data inlining and VLAN insertion are supported. 4516 */ 4517 static __rte_always_inline enum mlx5_txcmp_code 4518 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 4519 struct rte_mbuf **__rte_restrict pkts, 4520 unsigned int pkts_n, 4521 struct mlx5_txq_local *__rte_restrict loc, 4522 unsigned int olx) 4523 { 4524 /* 4525 * Subroutine is the part of mlx5_tx_burst_single() 4526 * and sends single-segment packet with SEND opcode. 4527 */ 4528 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4529 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4530 pkts += loc->pkts_sent + 1; 4531 pkts_n -= loc->pkts_sent; 4532 for (;;) { 4533 struct mlx5_wqe *__rte_restrict wqe; 4534 enum mlx5_txcmp_code ret; 4535 4536 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4537 if (MLX5_TXOFF_CONFIG(TXPP)) { 4538 enum mlx5_txcmp_code wret; 4539 4540 /* Generate WAIT for scheduling if requested. */ 4541 wret = mlx5_tx_schedule_send(txq, loc, olx); 4542 if (wret == MLX5_TXCMP_CODE_EXIT) 4543 return MLX5_TXCMP_CODE_EXIT; 4544 if (wret == MLX5_TXCMP_CODE_ERROR) 4545 return MLX5_TXCMP_CODE_ERROR; 4546 } 4547 if (MLX5_TXOFF_CONFIG(INLINE)) { 4548 unsigned int inlen, vlan = 0; 4549 4550 inlen = rte_pktmbuf_data_len(loc->mbuf); 4551 if (MLX5_TXOFF_CONFIG(VLAN) && 4552 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4553 vlan = sizeof(struct rte_vlan_hdr); 4554 inlen += vlan; 4555 static_assert((sizeof(struct rte_vlan_hdr) + 4556 sizeof(struct rte_ether_hdr)) == 4557 MLX5_ESEG_MIN_INLINE_SIZE, 4558 "invalid min inline data size"); 4559 } 4560 /* 4561 * If inlining is enabled at configuration time 4562 * the limit must be not less than minimal size. 4563 * Otherwise we would do extra check for data 4564 * size to avoid crashes due to length overflow. 4565 */ 4566 MLX5_ASSERT(txq->inlen_send >= 4567 MLX5_ESEG_MIN_INLINE_SIZE); 4568 if (inlen <= txq->inlen_send) { 4569 unsigned int seg_n, wqe_n; 4570 4571 rte_prefetch0(rte_pktmbuf_mtod 4572 (loc->mbuf, uint8_t *)); 4573 /* Check against minimal length. */ 4574 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4575 return MLX5_TXCMP_CODE_ERROR; 4576 if (loc->mbuf->ol_flags & 4577 PKT_TX_DYNF_NOINLINE) { 4578 /* 4579 * The hint flag not to inline packet 4580 * data is set. Check whether we can 4581 * follow the hint. 4582 */ 4583 if ((!MLX5_TXOFF_CONFIG(EMPW) && 4584 txq->inlen_mode) || 4585 (MLX5_TXOFF_CONFIG(MPW) && 4586 txq->inlen_mode)) { 4587 if (inlen <= txq->inlen_send) 4588 goto single_inline; 4589 /* 4590 * The hardware requires the 4591 * minimal inline data header. 4592 */ 4593 goto single_min_inline; 4594 } 4595 if (MLX5_TXOFF_CONFIG(VLAN) && 4596 vlan && !txq->vlan_en) { 4597 /* 4598 * We must insert VLAN tag 4599 * by software means. 4600 */ 4601 goto single_part_inline; 4602 } 4603 goto single_no_inline; 4604 } 4605 single_inline: 4606 /* 4607 * Completely inlined packet data WQE: 4608 * - Control Segment, SEND opcode 4609 * - Ethernet Segment, no VLAN insertion 4610 * - Data inlined, VLAN optionally inserted 4611 * - Alignment to MLX5_WSEG_SIZE 4612 * Have to estimate amount of WQEBBs 4613 */ 4614 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4615 MLX5_ESEG_MIN_INLINE_SIZE + 4616 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4617 /* Check if there are enough WQEBBs. */ 4618 wqe_n = (seg_n + 3) / 4; 4619 if (wqe_n > loc->wqe_free) 4620 return MLX5_TXCMP_CODE_EXIT; 4621 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4622 loc->wqe_last = wqe; 4623 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4624 MLX5_OPCODE_SEND, olx); 4625 mlx5_tx_eseg_data(txq, loc, wqe, 4626 vlan, inlen, 0, olx); 4627 txq->wqe_ci += wqe_n; 4628 loc->wqe_free -= wqe_n; 4629 /* 4630 * Packet data are completely inlined, 4631 * free the packet immediately. 4632 */ 4633 rte_pktmbuf_free_seg(loc->mbuf); 4634 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4635 MLX5_TXOFF_CONFIG(MPW)) && 4636 txq->inlen_mode) { 4637 /* 4638 * If minimal inlining is requested the eMPW 4639 * feature should be disabled due to data is 4640 * inlined into Ethernet Segment, which can 4641 * not contain inlined data for eMPW due to 4642 * segment shared for all packets. 4643 */ 4644 struct mlx5_wqe_dseg *__rte_restrict dseg; 4645 unsigned int ds; 4646 uint8_t *dptr; 4647 4648 /* 4649 * The inline-mode settings require 4650 * to inline the specified amount of 4651 * data bytes to the Ethernet Segment. 4652 * We should check the free space in 4653 * WQE ring buffer to inline partially. 4654 */ 4655 single_min_inline: 4656 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 4657 MLX5_ASSERT(inlen > txq->inlen_mode); 4658 MLX5_ASSERT(txq->inlen_mode >= 4659 MLX5_ESEG_MIN_INLINE_SIZE); 4660 /* 4661 * Check whether there are enough free WQEBBs: 4662 * - Control Segment 4663 * - Ethernet Segment 4664 * - First Segment of inlined Ethernet data 4665 * - ... data continued ... 4666 * - Finishing Data Segment of pointer type 4667 */ 4668 ds = (MLX5_WQE_CSEG_SIZE + 4669 MLX5_WQE_ESEG_SIZE + 4670 MLX5_WQE_DSEG_SIZE + 4671 txq->inlen_mode - 4672 MLX5_ESEG_MIN_INLINE_SIZE + 4673 MLX5_WQE_DSEG_SIZE + 4674 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4675 if (loc->wqe_free < ((ds + 3) / 4)) 4676 return MLX5_TXCMP_CODE_EXIT; 4677 /* 4678 * Build the ordinary SEND WQE: 4679 * - Control Segment 4680 * - Ethernet Segment, inline inlen_mode bytes 4681 * - Data Segment of pointer type 4682 */ 4683 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4684 loc->wqe_last = wqe; 4685 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4686 MLX5_OPCODE_SEND, olx); 4687 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4688 txq->inlen_mode, 4689 0, olx); 4690 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4691 txq->inlen_mode - vlan; 4692 inlen -= txq->inlen_mode; 4693 mlx5_tx_dseg_ptr(txq, loc, dseg, 4694 dptr, inlen, olx); 4695 /* 4696 * WQE is built, update the loop parameters 4697 * and got to the next packet. 4698 */ 4699 txq->wqe_ci += (ds + 3) / 4; 4700 loc->wqe_free -= (ds + 3) / 4; 4701 /* We have to store mbuf in elts.*/ 4702 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4703 txq->elts[txq->elts_head++ & txq->elts_m] = 4704 loc->mbuf; 4705 --loc->elts_free; 4706 } else { 4707 uint8_t *dptr; 4708 unsigned int dlen; 4709 4710 /* 4711 * Partially inlined packet data WQE, we have 4712 * some space in title WQEBB, we can fill it 4713 * with some packet data. It takes one WQEBB, 4714 * it is available, no extra space check: 4715 * - Control Segment, SEND opcode 4716 * - Ethernet Segment, no VLAN insertion 4717 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4718 * - Data Segment, pointer type 4719 * 4720 * We also get here if VLAN insertion is not 4721 * supported by HW, the inline is enabled. 4722 */ 4723 single_part_inline: 4724 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4725 loc->wqe_last = wqe; 4726 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4727 MLX5_OPCODE_SEND, olx); 4728 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4729 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4730 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4731 /* 4732 * The length check is performed above, by 4733 * comparing with txq->inlen_send. We should 4734 * not get overflow here. 4735 */ 4736 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4737 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4738 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4739 dptr, dlen, olx); 4740 ++txq->wqe_ci; 4741 --loc->wqe_free; 4742 /* We have to store mbuf in elts.*/ 4743 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4744 txq->elts[txq->elts_head++ & txq->elts_m] = 4745 loc->mbuf; 4746 --loc->elts_free; 4747 } 4748 #ifdef MLX5_PMD_SOFT_COUNTERS 4749 /* Update sent data bytes counter. */ 4750 txq->stats.obytes += vlan + 4751 rte_pktmbuf_data_len(loc->mbuf); 4752 #endif 4753 } else { 4754 /* 4755 * No inline at all, it means the CPU cycles saving 4756 * is prioritized at configuration, we should not 4757 * copy any packet data to WQE. 4758 * 4759 * SEND WQE, one WQEBB: 4760 * - Control Segment, SEND opcode 4761 * - Ethernet Segment, optional VLAN, no inline 4762 * - Data Segment, pointer type 4763 */ 4764 single_no_inline: 4765 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4766 loc->wqe_last = wqe; 4767 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4768 MLX5_OPCODE_SEND, olx); 4769 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4770 mlx5_tx_dseg_ptr 4771 (txq, loc, &wqe->dseg[0], 4772 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4773 rte_pktmbuf_data_len(loc->mbuf), olx); 4774 ++txq->wqe_ci; 4775 --loc->wqe_free; 4776 /* 4777 * We should not store mbuf pointer in elts 4778 * if no inlining is configured, this is done 4779 * by calling routine in a batch copy. 4780 */ 4781 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4782 --loc->elts_free; 4783 #ifdef MLX5_PMD_SOFT_COUNTERS 4784 /* Update sent data bytes counter. */ 4785 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4786 if (MLX5_TXOFF_CONFIG(VLAN) && 4787 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4788 txq->stats.obytes += 4789 sizeof(struct rte_vlan_hdr); 4790 #endif 4791 } 4792 ++loc->pkts_sent; 4793 --pkts_n; 4794 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4795 return MLX5_TXCMP_CODE_EXIT; 4796 loc->mbuf = *pkts++; 4797 if (pkts_n > 1) 4798 rte_prefetch0(*pkts); 4799 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4800 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4801 return ret; 4802 } 4803 MLX5_ASSERT(false); 4804 } 4805 4806 static __rte_always_inline enum mlx5_txcmp_code 4807 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 4808 struct rte_mbuf **__rte_restrict pkts, 4809 unsigned int pkts_n, 4810 struct mlx5_txq_local *__rte_restrict loc, 4811 unsigned int olx) 4812 { 4813 enum mlx5_txcmp_code ret; 4814 4815 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4816 if (ret == MLX5_TXCMP_CODE_SINGLE) 4817 goto ordinary_send; 4818 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 4819 for (;;) { 4820 /* Optimize for inline/no inline eMPW send. */ 4821 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4822 mlx5_tx_burst_empw_inline 4823 (txq, pkts, pkts_n, loc, olx) : 4824 mlx5_tx_burst_empw_simple 4825 (txq, pkts, pkts_n, loc, olx); 4826 if (ret != MLX5_TXCMP_CODE_SINGLE) 4827 return ret; 4828 /* The resources to send one packet should remain. */ 4829 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4830 ordinary_send: 4831 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4832 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 4833 if (ret != MLX5_TXCMP_CODE_EMPW) 4834 return ret; 4835 /* The resources to send one packet should remain. */ 4836 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4837 } 4838 } 4839 4840 /** 4841 * DPDK Tx callback template. This is configured template 4842 * used to generate routines optimized for specified offload setup. 4843 * One of this generated functions is chosen at SQ configuration 4844 * time. 4845 * 4846 * @param txq 4847 * Generic pointer to TX queue structure. 4848 * @param[in] pkts 4849 * Packets to transmit. 4850 * @param pkts_n 4851 * Number of packets in array. 4852 * @param olx 4853 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 4854 * values. Should be static to take compile time static configuration 4855 * advantages. 4856 * 4857 * @return 4858 * Number of packets successfully transmitted (<= pkts_n). 4859 */ 4860 static __rte_always_inline uint16_t 4861 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 4862 struct rte_mbuf **__rte_restrict pkts, 4863 uint16_t pkts_n, 4864 unsigned int olx) 4865 { 4866 struct mlx5_txq_local loc; 4867 enum mlx5_txcmp_code ret; 4868 unsigned int part; 4869 4870 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4871 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4872 if (unlikely(!pkts_n)) 4873 return 0; 4874 loc.pkts_sent = 0; 4875 loc.pkts_copy = 0; 4876 loc.wqe_last = NULL; 4877 4878 send_loop: 4879 loc.pkts_loop = loc.pkts_sent; 4880 /* 4881 * Check if there are some CQEs, if any: 4882 * - process an encountered errors 4883 * - process the completed WQEs 4884 * - free related mbufs 4885 * - doorbell the NIC about processed CQEs 4886 */ 4887 rte_prefetch0(*(pkts + loc.pkts_sent)); 4888 mlx5_tx_handle_completion(txq, olx); 4889 /* 4890 * Calculate the number of available resources - elts and WQEs. 4891 * There are two possible different scenarios: 4892 * - no data inlining into WQEs, one WQEBB may contains up to 4893 * four packets, in this case elts become scarce resource 4894 * - data inlining into WQEs, one packet may require multiple 4895 * WQEBBs, the WQEs become the limiting factor. 4896 */ 4897 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4898 loc.elts_free = txq->elts_s - 4899 (uint16_t)(txq->elts_head - txq->elts_tail); 4900 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4901 loc.wqe_free = txq->wqe_s - 4902 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 4903 if (unlikely(!loc.elts_free || !loc.wqe_free)) 4904 goto burst_exit; 4905 for (;;) { 4906 /* 4907 * Fetch the packet from array. Usually this is 4908 * the first packet in series of multi/single 4909 * segment packets. 4910 */ 4911 loc.mbuf = *(pkts + loc.pkts_sent); 4912 /* Dedicated branch for multi-segment packets. */ 4913 if (MLX5_TXOFF_CONFIG(MULTI) && 4914 unlikely(NB_SEGS(loc.mbuf) > 1)) { 4915 /* 4916 * Multi-segment packet encountered. 4917 * Hardware is able to process it only 4918 * with SEND/TSO opcodes, one packet 4919 * per WQE, do it in dedicated routine. 4920 */ 4921 enter_send_multi: 4922 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 4923 part = loc.pkts_sent - loc.pkts_copy; 4924 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4925 /* 4926 * There are some single-segment mbufs not 4927 * stored in elts. The mbufs must be in the 4928 * same order as WQEs, so we must copy the 4929 * mbufs to elts here, before the coming 4930 * multi-segment packet mbufs is appended. 4931 */ 4932 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 4933 part, olx); 4934 loc.pkts_copy = loc.pkts_sent; 4935 } 4936 MLX5_ASSERT(pkts_n > loc.pkts_sent); 4937 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 4938 if (!MLX5_TXOFF_CONFIG(INLINE)) 4939 loc.pkts_copy = loc.pkts_sent; 4940 /* 4941 * These returned code checks are supposed 4942 * to be optimized out due to routine inlining. 4943 */ 4944 if (ret == MLX5_TXCMP_CODE_EXIT) { 4945 /* 4946 * The routine returns this code when 4947 * all packets are sent or there is no 4948 * enough resources to complete request. 4949 */ 4950 break; 4951 } 4952 if (ret == MLX5_TXCMP_CODE_ERROR) { 4953 /* 4954 * The routine returns this code when 4955 * some error in the incoming packets 4956 * format occurred. 4957 */ 4958 txq->stats.oerrors++; 4959 break; 4960 } 4961 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4962 /* 4963 * The single-segment packet was encountered 4964 * in the array, try to send it with the 4965 * best optimized way, possible engaging eMPW. 4966 */ 4967 goto enter_send_single; 4968 } 4969 if (MLX5_TXOFF_CONFIG(TSO) && 4970 ret == MLX5_TXCMP_CODE_TSO) { 4971 /* 4972 * The single-segment TSO packet was 4973 * encountered in the array. 4974 */ 4975 goto enter_send_tso; 4976 } 4977 /* We must not get here. Something is going wrong. */ 4978 MLX5_ASSERT(false); 4979 txq->stats.oerrors++; 4980 break; 4981 } 4982 /* Dedicated branch for single-segment TSO packets. */ 4983 if (MLX5_TXOFF_CONFIG(TSO) && 4984 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 4985 /* 4986 * TSO might require special way for inlining 4987 * (dedicated parameters) and is sent with 4988 * MLX5_OPCODE_TSO opcode only, provide this 4989 * in dedicated branch. 4990 */ 4991 enter_send_tso: 4992 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 4993 MLX5_ASSERT(pkts_n > loc.pkts_sent); 4994 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 4995 /* 4996 * These returned code checks are supposed 4997 * to be optimized out due to routine inlining. 4998 */ 4999 if (ret == MLX5_TXCMP_CODE_EXIT) 5000 break; 5001 if (ret == MLX5_TXCMP_CODE_ERROR) { 5002 txq->stats.oerrors++; 5003 break; 5004 } 5005 if (ret == MLX5_TXCMP_CODE_SINGLE) 5006 goto enter_send_single; 5007 if (MLX5_TXOFF_CONFIG(MULTI) && 5008 ret == MLX5_TXCMP_CODE_MULTI) { 5009 /* 5010 * The multi-segment packet was 5011 * encountered in the array. 5012 */ 5013 goto enter_send_multi; 5014 } 5015 /* We must not get here. Something is going wrong. */ 5016 MLX5_ASSERT(false); 5017 txq->stats.oerrors++; 5018 break; 5019 } 5020 /* 5021 * The dedicated branch for the single-segment packets 5022 * without TSO. Often these ones can be sent using 5023 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 5024 * The routine builds the WQEs till it encounters 5025 * the TSO or multi-segment packet (in case if these 5026 * offloads are requested at SQ configuration time). 5027 */ 5028 enter_send_single: 5029 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5030 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 5031 /* 5032 * These returned code checks are supposed 5033 * to be optimized out due to routine inlining. 5034 */ 5035 if (ret == MLX5_TXCMP_CODE_EXIT) 5036 break; 5037 if (ret == MLX5_TXCMP_CODE_ERROR) { 5038 txq->stats.oerrors++; 5039 break; 5040 } 5041 if (MLX5_TXOFF_CONFIG(MULTI) && 5042 ret == MLX5_TXCMP_CODE_MULTI) { 5043 /* 5044 * The multi-segment packet was 5045 * encountered in the array. 5046 */ 5047 goto enter_send_multi; 5048 } 5049 if (MLX5_TXOFF_CONFIG(TSO) && 5050 ret == MLX5_TXCMP_CODE_TSO) { 5051 /* 5052 * The single-segment TSO packet was 5053 * encountered in the array. 5054 */ 5055 goto enter_send_tso; 5056 } 5057 /* We must not get here. Something is going wrong. */ 5058 MLX5_ASSERT(false); 5059 txq->stats.oerrors++; 5060 break; 5061 } 5062 /* 5063 * Main Tx loop is completed, do the rest: 5064 * - set completion request if thresholds are reached 5065 * - doorbell the hardware 5066 * - copy the rest of mbufs to elts (if any) 5067 */ 5068 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 5069 loc.pkts_sent >= loc.pkts_copy); 5070 /* Take a shortcut if nothing is sent. */ 5071 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 5072 goto burst_exit; 5073 /* Request CQE generation if limits are reached. */ 5074 mlx5_tx_request_completion(txq, &loc, olx); 5075 /* 5076 * Ring QP doorbell immediately after WQE building completion 5077 * to improve latencies. The pure software related data treatment 5078 * can be completed after doorbell. Tx CQEs for this SQ are 5079 * processed in this thread only by the polling. 5080 * 5081 * The rdma core library can map doorbell register in two ways, 5082 * depending on the environment variable "MLX5_SHUT_UP_BF": 5083 * 5084 * - as regular cached memory, the variable is either missing or 5085 * set to zero. This type of mapping may cause the significant 5086 * doorbell register writing latency and requires explicit 5087 * memory write barrier to mitigate this issue and prevent 5088 * write combining. 5089 * 5090 * - as non-cached memory, the variable is present and set to 5091 * not "0" value. This type of mapping may cause performance 5092 * impact under heavy loading conditions but the explicit write 5093 * memory barrier is not required and it may improve core 5094 * performance. 5095 * 5096 * - the legacy behaviour (prior 19.08 release) was to use some 5097 * heuristics to decide whether write memory barrier should 5098 * be performed. This behavior is supported with specifying 5099 * tx_db_nc=2, write barrier is skipped if application 5100 * provides the full recommended burst of packets, it 5101 * supposes the next packets are coming and the write barrier 5102 * will be issued on the next burst (after descriptor writing, 5103 * at least). 5104 */ 5105 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 5106 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 5107 /* Not all of the mbufs may be stored into elts yet. */ 5108 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 5109 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5110 /* 5111 * There are some single-segment mbufs not stored in elts. 5112 * It can be only if the last packet was single-segment. 5113 * The copying is gathered into one place due to it is 5114 * a good opportunity to optimize that with SIMD. 5115 * Unfortunately if inlining is enabled the gaps in 5116 * pointer array may happen due to early freeing of the 5117 * inlined mbufs. 5118 */ 5119 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 5120 loc.pkts_copy = loc.pkts_sent; 5121 } 5122 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5123 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5124 if (pkts_n > loc.pkts_sent) { 5125 /* 5126 * If burst size is large there might be no enough CQE 5127 * fetched from completion queue and no enough resources 5128 * freed to send all the packets. 5129 */ 5130 goto send_loop; 5131 } 5132 burst_exit: 5133 #ifdef MLX5_PMD_SOFT_COUNTERS 5134 /* Increment sent packets counter. */ 5135 txq->stats.opackets += loc.pkts_sent; 5136 #endif 5137 return loc.pkts_sent; 5138 } 5139 5140 /* Generate routines with Enhanced Multi-Packet Write support. */ 5141 MLX5_TXOFF_DECL(full_empw, 5142 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 5143 5144 MLX5_TXOFF_DECL(none_empw, 5145 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5146 5147 MLX5_TXOFF_DECL(md_empw, 5148 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5149 5150 MLX5_TXOFF_DECL(mt_empw, 5151 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5152 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5153 5154 MLX5_TXOFF_DECL(mtsc_empw, 5155 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5156 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5157 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5158 5159 MLX5_TXOFF_DECL(mti_empw, 5160 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5161 MLX5_TXOFF_CONFIG_INLINE | 5162 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5163 5164 MLX5_TXOFF_DECL(mtv_empw, 5165 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5166 MLX5_TXOFF_CONFIG_VLAN | 5167 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5168 5169 MLX5_TXOFF_DECL(mtiv_empw, 5170 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5171 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5172 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5173 5174 MLX5_TXOFF_DECL(sc_empw, 5175 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5176 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5177 5178 MLX5_TXOFF_DECL(sci_empw, 5179 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5180 MLX5_TXOFF_CONFIG_INLINE | 5181 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5182 5183 MLX5_TXOFF_DECL(scv_empw, 5184 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5185 MLX5_TXOFF_CONFIG_VLAN | 5186 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5187 5188 MLX5_TXOFF_DECL(sciv_empw, 5189 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5190 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5191 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5192 5193 MLX5_TXOFF_DECL(i_empw, 5194 MLX5_TXOFF_CONFIG_INLINE | 5195 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5196 5197 MLX5_TXOFF_DECL(v_empw, 5198 MLX5_TXOFF_CONFIG_VLAN | 5199 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5200 5201 MLX5_TXOFF_DECL(iv_empw, 5202 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5203 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5204 5205 /* Generate routines without Enhanced Multi-Packet Write support. */ 5206 MLX5_TXOFF_DECL(full, 5207 MLX5_TXOFF_CONFIG_FULL) 5208 5209 MLX5_TXOFF_DECL(none, 5210 MLX5_TXOFF_CONFIG_NONE) 5211 5212 MLX5_TXOFF_DECL(md, 5213 MLX5_TXOFF_CONFIG_METADATA) 5214 5215 MLX5_TXOFF_DECL(mt, 5216 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5217 MLX5_TXOFF_CONFIG_METADATA) 5218 5219 MLX5_TXOFF_DECL(mtsc, 5220 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5221 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5222 MLX5_TXOFF_CONFIG_METADATA) 5223 5224 MLX5_TXOFF_DECL(mti, 5225 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5226 MLX5_TXOFF_CONFIG_INLINE | 5227 MLX5_TXOFF_CONFIG_METADATA) 5228 5229 5230 MLX5_TXOFF_DECL(mtv, 5231 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5232 MLX5_TXOFF_CONFIG_VLAN | 5233 MLX5_TXOFF_CONFIG_METADATA) 5234 5235 5236 MLX5_TXOFF_DECL(mtiv, 5237 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5238 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5239 MLX5_TXOFF_CONFIG_METADATA) 5240 5241 MLX5_TXOFF_DECL(sc, 5242 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5243 MLX5_TXOFF_CONFIG_METADATA) 5244 5245 MLX5_TXOFF_DECL(sci, 5246 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5247 MLX5_TXOFF_CONFIG_INLINE | 5248 MLX5_TXOFF_CONFIG_METADATA) 5249 5250 5251 MLX5_TXOFF_DECL(scv, 5252 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5253 MLX5_TXOFF_CONFIG_VLAN | 5254 MLX5_TXOFF_CONFIG_METADATA) 5255 5256 5257 MLX5_TXOFF_DECL(sciv, 5258 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5259 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5260 MLX5_TXOFF_CONFIG_METADATA) 5261 5262 MLX5_TXOFF_DECL(i, 5263 MLX5_TXOFF_CONFIG_INLINE | 5264 MLX5_TXOFF_CONFIG_METADATA) 5265 5266 MLX5_TXOFF_DECL(v, 5267 MLX5_TXOFF_CONFIG_VLAN | 5268 MLX5_TXOFF_CONFIG_METADATA) 5269 5270 MLX5_TXOFF_DECL(iv, 5271 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5272 MLX5_TXOFF_CONFIG_METADATA) 5273 5274 /* Generate routines with timestamp scheduling. */ 5275 MLX5_TXOFF_DECL(full_ts_nompw, 5276 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5277 5278 MLX5_TXOFF_DECL(full_ts_nompwi, 5279 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5280 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5281 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5282 MLX5_TXOFF_CONFIG_TXPP) 5283 5284 MLX5_TXOFF_DECL(full_ts, 5285 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5286 MLX5_TXOFF_CONFIG_EMPW) 5287 5288 MLX5_TXOFF_DECL(full_ts_noi, 5289 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5290 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5291 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5292 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5293 5294 MLX5_TXOFF_DECL(none_ts, 5295 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5296 MLX5_TXOFF_CONFIG_EMPW) 5297 5298 MLX5_TXOFF_DECL(mdi_ts, 5299 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5300 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5301 5302 MLX5_TXOFF_DECL(mti_ts, 5303 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5304 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5305 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5306 5307 MLX5_TXOFF_DECL(mtiv_ts, 5308 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5309 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5310 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5311 MLX5_TXOFF_CONFIG_EMPW) 5312 5313 /* 5314 * Generate routines with Legacy Multi-Packet Write support. 5315 * This mode is supported by ConnectX-4 Lx only and imposes 5316 * offload limitations, not supported: 5317 * - ACL/Flows (metadata are becoming meaningless) 5318 * - WQE Inline headers 5319 * - SRIOV (E-Switch offloads) 5320 * - VLAN insertion 5321 * - tunnel encapsulation/decapsulation 5322 * - TSO 5323 */ 5324 MLX5_TXOFF_DECL(none_mpw, 5325 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5326 MLX5_TXOFF_CONFIG_MPW) 5327 5328 MLX5_TXOFF_DECL(mci_mpw, 5329 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5330 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5331 MLX5_TXOFF_CONFIG_MPW) 5332 5333 MLX5_TXOFF_DECL(mc_mpw, 5334 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5335 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5336 5337 MLX5_TXOFF_DECL(i_mpw, 5338 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5339 MLX5_TXOFF_CONFIG_MPW) 5340 5341 /* 5342 * Array of declared and compiled Tx burst function and corresponding 5343 * supported offloads set. The array is used to select the Tx burst 5344 * function for specified offloads set at Tx queue configuration time. 5345 */ 5346 const struct { 5347 eth_tx_burst_t func; 5348 unsigned int olx; 5349 } txoff_func[] = { 5350 MLX5_TXOFF_INFO(full_empw, 5351 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5352 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5353 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5354 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5355 5356 MLX5_TXOFF_INFO(none_empw, 5357 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5358 5359 MLX5_TXOFF_INFO(md_empw, 5360 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5361 5362 MLX5_TXOFF_INFO(mt_empw, 5363 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5364 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5365 5366 MLX5_TXOFF_INFO(mtsc_empw, 5367 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5368 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5369 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5370 5371 MLX5_TXOFF_INFO(mti_empw, 5372 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5373 MLX5_TXOFF_CONFIG_INLINE | 5374 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5375 5376 MLX5_TXOFF_INFO(mtv_empw, 5377 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5378 MLX5_TXOFF_CONFIG_VLAN | 5379 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5380 5381 MLX5_TXOFF_INFO(mtiv_empw, 5382 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5383 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5384 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5385 5386 MLX5_TXOFF_INFO(sc_empw, 5387 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5388 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5389 5390 MLX5_TXOFF_INFO(sci_empw, 5391 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5392 MLX5_TXOFF_CONFIG_INLINE | 5393 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5394 5395 MLX5_TXOFF_INFO(scv_empw, 5396 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5397 MLX5_TXOFF_CONFIG_VLAN | 5398 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5399 5400 MLX5_TXOFF_INFO(sciv_empw, 5401 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5402 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5403 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5404 5405 MLX5_TXOFF_INFO(i_empw, 5406 MLX5_TXOFF_CONFIG_INLINE | 5407 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5408 5409 MLX5_TXOFF_INFO(v_empw, 5410 MLX5_TXOFF_CONFIG_VLAN | 5411 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5412 5413 MLX5_TXOFF_INFO(iv_empw, 5414 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5415 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5416 5417 MLX5_TXOFF_INFO(full_ts_nompw, 5418 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5419 5420 MLX5_TXOFF_INFO(full_ts_nompwi, 5421 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5422 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5423 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5424 MLX5_TXOFF_CONFIG_TXPP) 5425 5426 MLX5_TXOFF_INFO(full_ts, 5427 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5428 MLX5_TXOFF_CONFIG_EMPW) 5429 5430 MLX5_TXOFF_INFO(full_ts_noi, 5431 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5432 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5433 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5434 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5435 5436 MLX5_TXOFF_INFO(none_ts, 5437 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5438 MLX5_TXOFF_CONFIG_EMPW) 5439 5440 MLX5_TXOFF_INFO(mdi_ts, 5441 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5442 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5443 5444 MLX5_TXOFF_INFO(mti_ts, 5445 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5446 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5447 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5448 5449 MLX5_TXOFF_INFO(mtiv_ts, 5450 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5451 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5452 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5453 MLX5_TXOFF_CONFIG_EMPW) 5454 5455 MLX5_TXOFF_INFO(full, 5456 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5457 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5458 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5459 MLX5_TXOFF_CONFIG_METADATA) 5460 5461 MLX5_TXOFF_INFO(none, 5462 MLX5_TXOFF_CONFIG_NONE) 5463 5464 MLX5_TXOFF_INFO(md, 5465 MLX5_TXOFF_CONFIG_METADATA) 5466 5467 MLX5_TXOFF_INFO(mt, 5468 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5469 MLX5_TXOFF_CONFIG_METADATA) 5470 5471 MLX5_TXOFF_INFO(mtsc, 5472 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5473 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5474 MLX5_TXOFF_CONFIG_METADATA) 5475 5476 MLX5_TXOFF_INFO(mti, 5477 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5478 MLX5_TXOFF_CONFIG_INLINE | 5479 MLX5_TXOFF_CONFIG_METADATA) 5480 5481 MLX5_TXOFF_INFO(mtv, 5482 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5483 MLX5_TXOFF_CONFIG_VLAN | 5484 MLX5_TXOFF_CONFIG_METADATA) 5485 5486 MLX5_TXOFF_INFO(mtiv, 5487 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5488 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5489 MLX5_TXOFF_CONFIG_METADATA) 5490 5491 MLX5_TXOFF_INFO(sc, 5492 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5493 MLX5_TXOFF_CONFIG_METADATA) 5494 5495 MLX5_TXOFF_INFO(sci, 5496 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5497 MLX5_TXOFF_CONFIG_INLINE | 5498 MLX5_TXOFF_CONFIG_METADATA) 5499 5500 MLX5_TXOFF_INFO(scv, 5501 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5502 MLX5_TXOFF_CONFIG_VLAN | 5503 MLX5_TXOFF_CONFIG_METADATA) 5504 5505 MLX5_TXOFF_INFO(sciv, 5506 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5507 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5508 MLX5_TXOFF_CONFIG_METADATA) 5509 5510 MLX5_TXOFF_INFO(i, 5511 MLX5_TXOFF_CONFIG_INLINE | 5512 MLX5_TXOFF_CONFIG_METADATA) 5513 5514 MLX5_TXOFF_INFO(v, 5515 MLX5_TXOFF_CONFIG_VLAN | 5516 MLX5_TXOFF_CONFIG_METADATA) 5517 5518 MLX5_TXOFF_INFO(iv, 5519 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5520 MLX5_TXOFF_CONFIG_METADATA) 5521 5522 MLX5_TXOFF_INFO(none_mpw, 5523 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5524 MLX5_TXOFF_CONFIG_MPW) 5525 5526 MLX5_TXOFF_INFO(mci_mpw, 5527 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5528 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5529 MLX5_TXOFF_CONFIG_MPW) 5530 5531 MLX5_TXOFF_INFO(mc_mpw, 5532 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5533 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5534 5535 MLX5_TXOFF_INFO(i_mpw, 5536 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5537 MLX5_TXOFF_CONFIG_MPW) 5538 }; 5539 5540 /** 5541 * Configure the Tx function to use. The routine checks configured 5542 * Tx offloads for the device and selects appropriate Tx burst 5543 * routine. There are multiple Tx burst routines compiled from 5544 * the same template in the most optimal way for the dedicated 5545 * Tx offloads set. 5546 * 5547 * @param dev 5548 * Pointer to private data structure. 5549 * 5550 * @return 5551 * Pointer to selected Tx burst function. 5552 */ 5553 eth_tx_burst_t 5554 mlx5_select_tx_function(struct rte_eth_dev *dev) 5555 { 5556 struct mlx5_priv *priv = dev->data->dev_private; 5557 struct mlx5_dev_config *config = &priv->config; 5558 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5559 unsigned int diff = 0, olx = 0, i, m; 5560 5561 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 5562 MLX5_DSEG_MAX, "invalid WQE max size"); 5563 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 5564 "invalid WQE Control Segment size"); 5565 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 5566 "invalid WQE Ethernet Segment size"); 5567 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 5568 "invalid WQE Data Segment size"); 5569 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 5570 "invalid WQE size"); 5571 MLX5_ASSERT(priv); 5572 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5573 /* We should support Multi-Segment Packets. */ 5574 olx |= MLX5_TXOFF_CONFIG_MULTI; 5575 } 5576 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5577 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5578 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5579 DEV_TX_OFFLOAD_IP_TNL_TSO | 5580 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5581 /* We should support TCP Send Offload. */ 5582 olx |= MLX5_TXOFF_CONFIG_TSO; 5583 } 5584 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5585 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5586 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5587 /* We should support Software Parser for Tunnels. */ 5588 olx |= MLX5_TXOFF_CONFIG_SWP; 5589 } 5590 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5591 DEV_TX_OFFLOAD_UDP_CKSUM | 5592 DEV_TX_OFFLOAD_TCP_CKSUM | 5593 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5594 /* We should support IP/TCP/UDP Checksums. */ 5595 olx |= MLX5_TXOFF_CONFIG_CSUM; 5596 } 5597 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5598 /* We should support VLAN insertion. */ 5599 olx |= MLX5_TXOFF_CONFIG_VLAN; 5600 } 5601 if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && 5602 rte_mbuf_dynflag_lookup 5603 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 && 5604 rte_mbuf_dynfield_lookup 5605 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) { 5606 /* Offload configured, dynamic entities registered. */ 5607 olx |= MLX5_TXOFF_CONFIG_TXPP; 5608 } 5609 if (priv->txqs_n && (*priv->txqs)[0]) { 5610 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5611 5612 if (txd->inlen_send) { 5613 /* 5614 * Check the data inline requirements. Data inline 5615 * is enabled on per device basis, we can check 5616 * the first Tx queue only. 5617 * 5618 * If device does not support VLAN insertion in WQE 5619 * and some queues are requested to perform VLAN 5620 * insertion offload than inline must be enabled. 5621 */ 5622 olx |= MLX5_TXOFF_CONFIG_INLINE; 5623 } 5624 } 5625 if (config->mps == MLX5_MPW_ENHANCED && 5626 config->txq_inline_min <= 0) { 5627 /* 5628 * The NIC supports Enhanced Multi-Packet Write 5629 * and does not require minimal inline data. 5630 */ 5631 olx |= MLX5_TXOFF_CONFIG_EMPW; 5632 } 5633 if (rte_flow_dynf_metadata_avail()) { 5634 /* We should support Flow metadata. */ 5635 olx |= MLX5_TXOFF_CONFIG_METADATA; 5636 } 5637 if (config->mps == MLX5_MPW) { 5638 /* 5639 * The NIC supports Legacy Multi-Packet Write. 5640 * The MLX5_TXOFF_CONFIG_MPW controls the 5641 * descriptor building method in combination 5642 * with MLX5_TXOFF_CONFIG_EMPW. 5643 */ 5644 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5645 MLX5_TXOFF_CONFIG_SWP | 5646 MLX5_TXOFF_CONFIG_VLAN | 5647 MLX5_TXOFF_CONFIG_METADATA))) 5648 olx |= MLX5_TXOFF_CONFIG_EMPW | 5649 MLX5_TXOFF_CONFIG_MPW; 5650 } 5651 /* 5652 * Scan the routines table to find the minimal 5653 * satisfying routine with requested offloads. 5654 */ 5655 m = RTE_DIM(txoff_func); 5656 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5657 unsigned int tmp; 5658 5659 tmp = txoff_func[i].olx; 5660 if (tmp == olx) { 5661 /* Meets requested offloads exactly.*/ 5662 m = i; 5663 break; 5664 } 5665 if ((tmp & olx) != olx) { 5666 /* Does not meet requested offloads at all. */ 5667 continue; 5668 } 5669 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) 5670 /* Do not enable legacy MPW if not configured. */ 5671 continue; 5672 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5673 /* Do not enable eMPW if not configured. */ 5674 continue; 5675 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5676 /* Do not enable inlining if not configured. */ 5677 continue; 5678 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) 5679 /* Do not enable scheduling if not configured. */ 5680 continue; 5681 /* 5682 * Some routine meets the requirements. 5683 * Check whether it has minimal amount 5684 * of not requested offloads. 5685 */ 5686 tmp = __builtin_popcountl(tmp & ~olx); 5687 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5688 /* First or better match, save and continue. */ 5689 m = i; 5690 diff = tmp; 5691 continue; 5692 } 5693 if (tmp == diff) { 5694 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5695 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5696 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5697 /* Lighter not requested offload. */ 5698 m = i; 5699 } 5700 } 5701 } 5702 if (m >= RTE_DIM(txoff_func)) { 5703 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5704 " for requested offloads %04X", 5705 dev->data->port_id, olx); 5706 return NULL; 5707 } 5708 DRV_LOG(DEBUG, "port %u has selected Tx function" 5709 " supporting offloads %04X/%04X", 5710 dev->data->port_id, olx, txoff_func[m].olx); 5711 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5712 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5713 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5714 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5715 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5716 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5717 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5718 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5719 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5720 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5721 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5722 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5723 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5724 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5725 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) 5726 DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); 5727 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5728 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5729 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5730 else 5731 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5732 } 5733 return txoff_func[m].func; 5734 } 5735 5736 /** 5737 * DPDK callback to get the TX queue information 5738 * 5739 * @param dev 5740 * Pointer to the device structure. 5741 * 5742 * @param tx_queue_id 5743 * Tx queue identificator. 5744 * 5745 * @param qinfo 5746 * Pointer to the TX queue information structure. 5747 * 5748 * @return 5749 * None. 5750 */ 5751 5752 void 5753 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 5754 struct rte_eth_txq_info *qinfo) 5755 { 5756 struct mlx5_priv *priv = dev->data->dev_private; 5757 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5758 struct mlx5_txq_ctrl *txq_ctrl = 5759 container_of(txq, struct mlx5_txq_ctrl, txq); 5760 5761 if (!txq) 5762 return; 5763 qinfo->nb_desc = txq->elts_s; 5764 qinfo->conf.tx_thresh.pthresh = 0; 5765 qinfo->conf.tx_thresh.hthresh = 0; 5766 qinfo->conf.tx_thresh.wthresh = 0; 5767 qinfo->conf.tx_rs_thresh = 0; 5768 qinfo->conf.tx_free_thresh = 0; 5769 qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; 5770 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 5771 } 5772 5773 /** 5774 * DPDK callback to get the TX packet burst mode information 5775 * 5776 * @param dev 5777 * Pointer to the device structure. 5778 * 5779 * @param tx_queue_id 5780 * Tx queue identificatior. 5781 * 5782 * @param mode 5783 * Pointer to the burts mode information. 5784 * 5785 * @return 5786 * 0 as success, -EINVAL as failure. 5787 */ 5788 5789 int 5790 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, 5791 uint16_t tx_queue_id __rte_unused, 5792 struct rte_eth_burst_mode *mode) 5793 { 5794 eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; 5795 unsigned int i, olx; 5796 5797 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5798 if (pkt_burst == txoff_func[i].func) { 5799 olx = txoff_func[i].olx; 5800 snprintf(mode->info, sizeof(mode->info), 5801 "%s%s%s%s%s%s%s%s%s", 5802 (olx & MLX5_TXOFF_CONFIG_EMPW) ? 5803 ((olx & MLX5_TXOFF_CONFIG_MPW) ? 5804 "Legacy MPW" : "Enhanced MPW") : "No MPW", 5805 (olx & MLX5_TXOFF_CONFIG_MULTI) ? 5806 " + MULTI" : "", 5807 (olx & MLX5_TXOFF_CONFIG_TSO) ? 5808 " + TSO" : "", 5809 (olx & MLX5_TXOFF_CONFIG_SWP) ? 5810 " + SWP" : "", 5811 (olx & MLX5_TXOFF_CONFIG_CSUM) ? 5812 " + CSUM" : "", 5813 (olx & MLX5_TXOFF_CONFIG_INLINE) ? 5814 " + INLINE" : "", 5815 (olx & MLX5_TXOFF_CONFIG_VLAN) ? 5816 " + VLAN" : "", 5817 (olx & MLX5_TXOFF_CONFIG_METADATA) ? 5818 " + METADATA" : "", 5819 (olx & MLX5_TXOFF_CONFIG_TXPP) ? 5820 " + TXPP" : ""); 5821 return 0; 5822 } 5823 } 5824 return -EINVAL; 5825 } 5826