1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 22 #include "mlx5_autoconf.h" 23 #include "mlx5_defs.h" 24 #include "mlx5.h" 25 #include "mlx5_mr.h" 26 #include "mlx5_utils.h" 27 #include "mlx5_rxtx.h" 28 29 /* TX burst subroutines return codes. */ 30 enum mlx5_txcmp_code { 31 MLX5_TXCMP_CODE_EXIT = 0, 32 MLX5_TXCMP_CODE_ERROR, 33 MLX5_TXCMP_CODE_SINGLE, 34 MLX5_TXCMP_CODE_MULTI, 35 MLX5_TXCMP_CODE_TSO, 36 MLX5_TXCMP_CODE_EMPW, 37 }; 38 39 /* 40 * These defines are used to configure Tx burst routine option set 41 * supported at compile time. The not specified options are optimized out 42 * out due to if conditions can be explicitly calculated at compile time. 43 * The offloads with bigger runtime check (require more CPU cycles to 44 * skip) overhead should have the bigger index - this is needed to 45 * select the better matching routine function if no exact match and 46 * some offloads are not actually requested. 47 */ 48 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 49 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 50 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 51 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 52 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 53 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 54 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 55 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 56 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 57 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 58 59 /* The most common offloads groups. */ 60 #define MLX5_TXOFF_CONFIG_NONE 0 61 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 62 MLX5_TXOFF_CONFIG_TSO | \ 63 MLX5_TXOFF_CONFIG_SWP | \ 64 MLX5_TXOFF_CONFIG_CSUM | \ 65 MLX5_TXOFF_CONFIG_INLINE | \ 66 MLX5_TXOFF_CONFIG_VLAN | \ 67 MLX5_TXOFF_CONFIG_METADATA) 68 69 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 70 71 #define MLX5_TXOFF_DECL(func, olx) \ 72 static uint16_t mlx5_tx_burst_##func(void *txq, \ 73 struct rte_mbuf **pkts, \ 74 uint16_t pkts_n) \ 75 { \ 76 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 77 pkts, pkts_n, (olx)); \ 78 } 79 80 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 81 82 static __rte_always_inline uint32_t 83 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 84 volatile struct mlx5_mini_cqe8 *mcqe); 85 86 static __rte_always_inline int 87 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 88 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 89 90 static __rte_always_inline uint32_t 91 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 92 93 static __rte_always_inline void 94 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 95 volatile struct mlx5_cqe *cqe, 96 volatile struct mlx5_mini_cqe8 *mcqe); 97 98 static int 99 mlx5_queue_state_modify(struct rte_eth_dev *dev, 100 struct mlx5_mp_arg_queue_state_modify *sm); 101 102 static inline void 103 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 104 volatile struct mlx5_cqe *__rte_restrict cqe, 105 uint32_t phcsum, uint8_t l4_type); 106 107 static inline void 108 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 109 volatile struct mlx5_cqe *__rte_restrict cqe, 110 volatile struct mlx5_mini_cqe8 *mcqe, 111 struct mlx5_rxq_data *rxq, uint32_t len); 112 113 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 114 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 115 }; 116 117 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 118 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 119 120 uint64_t rte_net_mlx5_dynf_inline_mask; 121 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 122 123 /** 124 * Build a table to translate Rx completion flags to packet type. 125 * 126 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 127 */ 128 void 129 mlx5_set_ptype_table(void) 130 { 131 unsigned int i; 132 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 133 134 /* Last entry must not be overwritten, reserved for errored packet. */ 135 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 136 (*p)[i] = RTE_PTYPE_UNKNOWN; 137 /* 138 * The index to the array should have: 139 * bit[1:0] = l3_hdr_type 140 * bit[4:2] = l4_hdr_type 141 * bit[5] = ip_frag 142 * bit[6] = tunneled 143 * bit[7] = outer_l3_type 144 */ 145 /* L2 */ 146 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 147 /* L3 */ 148 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 149 RTE_PTYPE_L4_NONFRAG; 150 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 151 RTE_PTYPE_L4_NONFRAG; 152 /* Fragmented */ 153 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 154 RTE_PTYPE_L4_FRAG; 155 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 156 RTE_PTYPE_L4_FRAG; 157 /* TCP */ 158 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 159 RTE_PTYPE_L4_TCP; 160 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 161 RTE_PTYPE_L4_TCP; 162 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 163 RTE_PTYPE_L4_TCP; 164 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 165 RTE_PTYPE_L4_TCP; 166 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 167 RTE_PTYPE_L4_TCP; 168 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 169 RTE_PTYPE_L4_TCP; 170 /* UDP */ 171 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 172 RTE_PTYPE_L4_UDP; 173 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 174 RTE_PTYPE_L4_UDP; 175 /* Repeat with outer_l3_type being set. Just in case. */ 176 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 177 RTE_PTYPE_L4_NONFRAG; 178 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 179 RTE_PTYPE_L4_NONFRAG; 180 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 181 RTE_PTYPE_L4_FRAG; 182 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 183 RTE_PTYPE_L4_FRAG; 184 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 185 RTE_PTYPE_L4_TCP; 186 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 187 RTE_PTYPE_L4_TCP; 188 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 189 RTE_PTYPE_L4_TCP; 190 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 191 RTE_PTYPE_L4_TCP; 192 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 193 RTE_PTYPE_L4_TCP; 194 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 195 RTE_PTYPE_L4_TCP; 196 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 197 RTE_PTYPE_L4_UDP; 198 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 199 RTE_PTYPE_L4_UDP; 200 /* Tunneled - L3 */ 201 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 202 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 203 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 204 RTE_PTYPE_INNER_L4_NONFRAG; 205 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 206 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 207 RTE_PTYPE_INNER_L4_NONFRAG; 208 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 209 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 210 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 211 RTE_PTYPE_INNER_L4_NONFRAG; 212 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 213 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 214 RTE_PTYPE_INNER_L4_NONFRAG; 215 /* Tunneled - Fragmented */ 216 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 217 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 218 RTE_PTYPE_INNER_L4_FRAG; 219 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 220 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 221 RTE_PTYPE_INNER_L4_FRAG; 222 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 223 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 224 RTE_PTYPE_INNER_L4_FRAG; 225 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 226 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 227 RTE_PTYPE_INNER_L4_FRAG; 228 /* Tunneled - TCP */ 229 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 230 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 231 RTE_PTYPE_INNER_L4_TCP; 232 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 233 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 234 RTE_PTYPE_INNER_L4_TCP; 235 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 236 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 237 RTE_PTYPE_INNER_L4_TCP; 238 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 239 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 240 RTE_PTYPE_INNER_L4_TCP; 241 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 242 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 243 RTE_PTYPE_INNER_L4_TCP; 244 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 245 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 246 RTE_PTYPE_INNER_L4_TCP; 247 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 248 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 249 RTE_PTYPE_INNER_L4_TCP; 250 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 251 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 252 RTE_PTYPE_INNER_L4_TCP; 253 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 254 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 255 RTE_PTYPE_INNER_L4_TCP; 256 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 257 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 258 RTE_PTYPE_INNER_L4_TCP; 259 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 260 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L4_TCP; 262 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 263 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L4_TCP; 265 /* Tunneled - UDP */ 266 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 268 RTE_PTYPE_INNER_L4_UDP; 269 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 270 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L4_UDP; 272 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 273 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L4_UDP; 275 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 276 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L4_UDP; 278 } 279 280 /** 281 * Build a table to translate packet to checksum type of Verbs. 282 */ 283 void 284 mlx5_set_cksum_table(void) 285 { 286 unsigned int i; 287 uint8_t v; 288 289 /* 290 * The index should have: 291 * bit[0] = PKT_TX_TCP_SEG 292 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 293 * bit[4] = PKT_TX_IP_CKSUM 294 * bit[8] = PKT_TX_OUTER_IP_CKSUM 295 * bit[9] = tunnel 296 */ 297 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 298 v = 0; 299 if (i & (1 << 9)) { 300 /* Tunneled packet. */ 301 if (i & (1 << 8)) /* Outer IP. */ 302 v |= MLX5_ETH_WQE_L3_CSUM; 303 if (i & (1 << 4)) /* Inner IP. */ 304 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 305 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 306 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 307 } else { 308 /* No tunnel. */ 309 if (i & (1 << 4)) /* IP. */ 310 v |= MLX5_ETH_WQE_L3_CSUM; 311 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 312 v |= MLX5_ETH_WQE_L4_CSUM; 313 } 314 mlx5_cksum_table[i] = v; 315 } 316 } 317 318 /** 319 * Build a table to translate packet type of mbuf to SWP type of Verbs. 320 */ 321 void 322 mlx5_set_swp_types_table(void) 323 { 324 unsigned int i; 325 uint8_t v; 326 327 /* 328 * The index should have: 329 * bit[0:1] = PKT_TX_L4_MASK 330 * bit[4] = PKT_TX_IPV6 331 * bit[8] = PKT_TX_OUTER_IPV6 332 * bit[9] = PKT_TX_OUTER_UDP 333 */ 334 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 335 v = 0; 336 if (i & (1 << 8)) 337 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 338 if (i & (1 << 9)) 339 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 340 if (i & (1 << 4)) 341 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 342 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 343 v |= MLX5_ETH_WQE_L4_INNER_UDP; 344 mlx5_swp_types_table[i] = v; 345 } 346 } 347 348 /** 349 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 350 * Flags must be preliminary initialized to zero. 351 * 352 * @param loc 353 * Pointer to burst routine local context. 354 * @param swp_flags 355 * Pointer to store Software Parser flags 356 * @param olx 357 * Configured Tx offloads mask. It is fully defined at 358 * compile time and may be used for optimization. 359 * 360 * @return 361 * Software Parser offsets packed in dword. 362 * Software Parser flags are set by pointer. 363 */ 364 static __rte_always_inline uint32_t 365 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 366 uint8_t *swp_flags, 367 unsigned int olx) 368 { 369 uint64_t ol, tunnel; 370 unsigned int idx, off; 371 uint32_t set; 372 373 if (!MLX5_TXOFF_CONFIG(SWP)) 374 return 0; 375 ol = loc->mbuf->ol_flags; 376 tunnel = ol & PKT_TX_TUNNEL_MASK; 377 /* 378 * Check whether Software Parser is required. 379 * Only customized tunnels may ask for. 380 */ 381 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 382 return 0; 383 /* 384 * The index should have: 385 * bit[0:1] = PKT_TX_L4_MASK 386 * bit[4] = PKT_TX_IPV6 387 * bit[8] = PKT_TX_OUTER_IPV6 388 * bit[9] = PKT_TX_OUTER_UDP 389 */ 390 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 391 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 392 *swp_flags = mlx5_swp_types_table[idx]; 393 /* 394 * Set offsets for SW parser. Since ConnectX-5, SW parser just 395 * complements HW parser. SW parser starts to engage only if HW parser 396 * can't reach a header. For the older devices, HW parser will not kick 397 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 398 * should be set regardless of HW offload. 399 */ 400 off = loc->mbuf->outer_l2_len; 401 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 402 off += sizeof(struct rte_vlan_hdr); 403 set = (off >> 1) << 8; /* Outer L3 offset. */ 404 off += loc->mbuf->outer_l3_len; 405 if (tunnel == PKT_TX_TUNNEL_UDP) 406 set |= off >> 1; /* Outer L4 offset. */ 407 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 408 const uint64_t csum = ol & PKT_TX_L4_MASK; 409 off += loc->mbuf->l2_len; 410 set |= (off >> 1) << 24; /* Inner L3 offset. */ 411 if (csum == PKT_TX_TCP_CKSUM || 412 csum == PKT_TX_UDP_CKSUM || 413 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 414 off += loc->mbuf->l3_len; 415 set |= (off >> 1) << 16; /* Inner L4 offset. */ 416 } 417 } 418 set = rte_cpu_to_le_32(set); 419 return set; 420 } 421 422 /** 423 * Convert the Checksum offloads to Verbs. 424 * 425 * @param buf 426 * Pointer to the mbuf. 427 * 428 * @return 429 * Converted checksum flags. 430 */ 431 static __rte_always_inline uint8_t 432 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 433 { 434 uint32_t idx; 435 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 436 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 437 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 438 439 /* 440 * The index should have: 441 * bit[0] = PKT_TX_TCP_SEG 442 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 443 * bit[4] = PKT_TX_IP_CKSUM 444 * bit[8] = PKT_TX_OUTER_IP_CKSUM 445 * bit[9] = tunnel 446 */ 447 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 448 return mlx5_cksum_table[idx]; 449 } 450 451 /** 452 * Internal function to compute the number of used descriptors in an RX queue 453 * 454 * @param rxq 455 * The Rx queue. 456 * 457 * @return 458 * The number of used rx descriptor. 459 */ 460 static uint32_t 461 rx_queue_count(struct mlx5_rxq_data *rxq) 462 { 463 struct rxq_zip *zip = &rxq->zip; 464 volatile struct mlx5_cqe *cqe; 465 unsigned int cq_ci = rxq->cq_ci; 466 const unsigned int cqe_n = (1 << rxq->cqe_n); 467 const unsigned int cqe_cnt = cqe_n - 1; 468 unsigned int used = 0; 469 470 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 471 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 472 int8_t op_own; 473 unsigned int n; 474 475 op_own = cqe->op_own; 476 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 477 if (unlikely(zip->ai)) 478 n = zip->cqe_cnt - zip->ai; 479 else 480 n = rte_be_to_cpu_32(cqe->byte_cnt); 481 else 482 n = 1; 483 cq_ci += n; 484 used += n; 485 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 486 } 487 used = RTE_MIN(used, cqe_n); 488 return used; 489 } 490 491 /** 492 * DPDK callback to check the status of a rx descriptor. 493 * 494 * @param rx_queue 495 * The Rx queue. 496 * @param[in] offset 497 * The index of the descriptor in the ring. 498 * 499 * @return 500 * The status of the tx descriptor. 501 */ 502 int 503 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 504 { 505 struct mlx5_rxq_data *rxq = rx_queue; 506 struct mlx5_rxq_ctrl *rxq_ctrl = 507 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 508 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 509 510 if (dev->rx_pkt_burst == NULL || 511 dev->rx_pkt_burst == removed_rx_burst) { 512 rte_errno = ENOTSUP; 513 return -rte_errno; 514 } 515 if (offset >= (1 << rxq->cqe_n)) { 516 rte_errno = EINVAL; 517 return -rte_errno; 518 } 519 if (offset < rx_queue_count(rxq)) 520 return RTE_ETH_RX_DESC_DONE; 521 return RTE_ETH_RX_DESC_AVAIL; 522 } 523 524 /** 525 * DPDK callback to get the RX queue information 526 * 527 * @param dev 528 * Pointer to the device structure. 529 * 530 * @param rx_queue_id 531 * Rx queue identificator. 532 * 533 * @param qinfo 534 * Pointer to the RX queue information structure. 535 * 536 * @return 537 * None. 538 */ 539 540 void 541 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 542 struct rte_eth_rxq_info *qinfo) 543 { 544 struct mlx5_priv *priv = dev->data->dev_private; 545 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; 546 struct mlx5_rxq_ctrl *rxq_ctrl = 547 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 548 549 if (!rxq) 550 return; 551 qinfo->mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 552 rxq->mprq_mp : rxq->mp; 553 qinfo->conf.rx_thresh.pthresh = 0; 554 qinfo->conf.rx_thresh.hthresh = 0; 555 qinfo->conf.rx_thresh.wthresh = 0; 556 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 557 qinfo->conf.rx_drop_en = 1; 558 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; 559 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 560 qinfo->scattered_rx = dev->data->scattered_rx; 561 qinfo->nb_desc = 1 << rxq->elts_n; 562 } 563 564 /** 565 * DPDK callback to get the RX packet burst mode information 566 * 567 * @param dev 568 * Pointer to the device structure. 569 * 570 * @param rx_queue_id 571 * Rx queue identificatior. 572 * 573 * @param mode 574 * Pointer to the burts mode information. 575 * 576 * @return 577 * 0 as success, -EINVAL as failure. 578 */ 579 580 int 581 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 582 uint16_t rx_queue_id __rte_unused, 583 struct rte_eth_burst_mode *mode) 584 { 585 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 586 struct mlx5_priv *priv = dev->data->dev_private; 587 struct mlx5_rxq_data *rxq; 588 589 rxq = (*priv->rxqs)[rx_queue_id]; 590 if (!rxq) { 591 rte_errno = EINVAL; 592 return -rte_errno; 593 } 594 if (pkt_burst == mlx5_rx_burst) { 595 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 596 } else if (pkt_burst == mlx5_rx_burst_mprq) { 597 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 598 } else if (pkt_burst == mlx5_rx_burst_vec) { 599 #if defined RTE_ARCH_X86_64 600 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 601 #elif defined RTE_ARCH_ARM64 602 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 603 #elif defined RTE_ARCH_PPC_64 604 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 605 #else 606 return -EINVAL; 607 #endif 608 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { 609 #if defined RTE_ARCH_X86_64 610 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); 611 #elif defined RTE_ARCH_ARM64 612 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); 613 #elif defined RTE_ARCH_PPC_64 614 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); 615 #else 616 return -EINVAL; 617 #endif 618 } else { 619 return -EINVAL; 620 } 621 return 0; 622 } 623 624 /** 625 * DPDK callback to get the number of used descriptors in a RX queue 626 * 627 * @param dev 628 * Pointer to the device structure. 629 * 630 * @param rx_queue_id 631 * The Rx queue. 632 * 633 * @return 634 * The number of used rx descriptor. 635 * -EINVAL if the queue is invalid 636 */ 637 uint32_t 638 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 639 { 640 struct mlx5_priv *priv = dev->data->dev_private; 641 struct mlx5_rxq_data *rxq; 642 643 if (dev->rx_pkt_burst == NULL || 644 dev->rx_pkt_burst == removed_rx_burst) { 645 rte_errno = ENOTSUP; 646 return -rte_errno; 647 } 648 rxq = (*priv->rxqs)[rx_queue_id]; 649 if (!rxq) { 650 rte_errno = EINVAL; 651 return -rte_errno; 652 } 653 return rx_queue_count(rxq); 654 } 655 656 #define MLX5_SYSTEM_LOG_DIR "/var/log" 657 /** 658 * Dump debug information to log file. 659 * 660 * @param fname 661 * The file name. 662 * @param hex_title 663 * If not NULL this string is printed as a header to the output 664 * and the output will be in hexadecimal view. 665 * @param buf 666 * This is the buffer address to print out. 667 * @param len 668 * The number of bytes to dump out. 669 */ 670 void 671 mlx5_dump_debug_information(const char *fname, const char *hex_title, 672 const void *buf, unsigned int hex_len) 673 { 674 FILE *fd; 675 676 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 677 fd = fopen(path, "a+"); 678 if (!fd) { 679 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 680 MKSTR(path2, "./%s", fname); 681 fd = fopen(path2, "a+"); 682 if (!fd) { 683 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 684 return; 685 } 686 DRV_LOG(INFO, "New debug dump in file %s", path2); 687 } else { 688 DRV_LOG(INFO, "New debug dump in file %s", path); 689 } 690 if (hex_title) 691 rte_hexdump(fd, hex_title, buf, hex_len); 692 else 693 fprintf(fd, "%s", (const char *)buf); 694 fprintf(fd, "\n\n\n"); 695 fclose(fd); 696 } 697 698 /** 699 * Move QP from error state to running state and initialize indexes. 700 * 701 * @param txq_ctrl 702 * Pointer to TX queue control structure. 703 * 704 * @return 705 * 0 on success, else -1. 706 */ 707 static int 708 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 709 { 710 struct mlx5_mp_arg_queue_state_modify sm = { 711 .is_wq = 0, 712 .queue_id = txq_ctrl->txq.idx, 713 }; 714 715 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 716 return -1; 717 txq_ctrl->txq.wqe_ci = 0; 718 txq_ctrl->txq.wqe_pi = 0; 719 txq_ctrl->txq.elts_comp = 0; 720 return 0; 721 } 722 723 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 724 static int 725 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 726 { 727 static const uint8_t magic[] = "seen"; 728 int ret = 1; 729 unsigned int i; 730 731 for (i = 0; i < sizeof(magic); ++i) 732 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 733 ret = 0; 734 err_cqe->rsvd1[i] = magic[i]; 735 } 736 return ret; 737 } 738 739 /** 740 * Handle error CQE. 741 * 742 * @param txq 743 * Pointer to TX queue structure. 744 * @param error_cqe 745 * Pointer to the error CQE. 746 * 747 * @return 748 * Negative value if queue recovery failed, otherwise 749 * the error completion entry is handled successfully. 750 */ 751 static int 752 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, 753 volatile struct mlx5_err_cqe *err_cqe) 754 { 755 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 756 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 757 struct mlx5_txq_ctrl *txq_ctrl = 758 container_of(txq, struct mlx5_txq_ctrl, txq); 759 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 760 int seen = check_err_cqe_seen(err_cqe); 761 762 if (!seen && txq_ctrl->dump_file_n < 763 txq_ctrl->priv->config.max_dump_files_num) { 764 MKSTR(err_str, "Unexpected CQE error syndrome " 765 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 766 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 767 txq->cqe_s, txq->qp_num_8s >> 8, 768 rte_be_to_cpu_16(err_cqe->wqe_counter), 769 txq->wqe_ci, txq->cq_ci); 770 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 771 PORT_ID(txq_ctrl->priv), txq->idx, 772 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 773 mlx5_dump_debug_information(name, NULL, err_str, 0); 774 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 775 (const void *)((uintptr_t) 776 txq->cqes), 777 sizeof(*err_cqe) * 778 (1 << txq->cqe_n)); 779 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 780 (const void *)((uintptr_t) 781 txq->wqes), 782 MLX5_WQE_SIZE * 783 (1 << txq->wqe_n)); 784 txq_ctrl->dump_file_n++; 785 } 786 if (!seen) 787 /* 788 * Count errors in WQEs units. 789 * Later it can be improved to count error packets, 790 * for example, by SQ parsing to find how much packets 791 * should be counted for each WQE. 792 */ 793 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 794 new_wqe_pi) & wqe_m; 795 if (tx_recover_qp(txq_ctrl)) { 796 /* Recovering failed - retry later on the same WQE. */ 797 return -1; 798 } 799 /* Release all the remaining buffers. */ 800 txq_free_elts(txq_ctrl); 801 } 802 return 0; 803 } 804 805 /** 806 * Translate RX completion flags to packet type. 807 * 808 * @param[in] rxq 809 * Pointer to RX queue structure. 810 * @param[in] cqe 811 * Pointer to CQE. 812 * 813 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 814 * 815 * @return 816 * Packet type for struct rte_mbuf. 817 */ 818 static inline uint32_t 819 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 820 volatile struct mlx5_mini_cqe8 *mcqe) 821 { 822 uint8_t idx; 823 uint8_t ptype; 824 uint8_t pinfo = (cqe->pkt_info & 0x3) << 6; 825 826 /* Get l3/l4 header from mini-CQE in case L3/L4 format*/ 827 if (mcqe == NULL || 828 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 829 ptype = (cqe->hdr_type_etc & 0xfc00) >> 10; 830 else 831 ptype = mcqe->hdr_type >> 2; 832 /* 833 * The index to the array should have: 834 * bit[1:0] = l3_hdr_type 835 * bit[4:2] = l4_hdr_type 836 * bit[5] = ip_frag 837 * bit[6] = tunneled 838 * bit[7] = outer_l3_type 839 */ 840 idx = pinfo | ptype; 841 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 842 } 843 844 /** 845 * Initialize Rx WQ and indexes. 846 * 847 * @param[in] rxq 848 * Pointer to RX queue structure. 849 */ 850 void 851 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 852 { 853 const unsigned int wqe_n = 1 << rxq->elts_n; 854 unsigned int i; 855 856 for (i = 0; (i != wqe_n); ++i) { 857 volatile struct mlx5_wqe_data_seg *scat; 858 uintptr_t addr; 859 uint32_t byte_count; 860 861 if (mlx5_rxq_mprq_enabled(rxq)) { 862 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 863 864 scat = &((volatile struct mlx5_wqe_mprq *) 865 rxq->wqes)[i].dseg; 866 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 867 1 << rxq->strd_num_n); 868 byte_count = (1 << rxq->strd_sz_n) * 869 (1 << rxq->strd_num_n); 870 } else { 871 struct rte_mbuf *buf = (*rxq->elts)[i]; 872 873 scat = &((volatile struct mlx5_wqe_data_seg *) 874 rxq->wqes)[i]; 875 addr = rte_pktmbuf_mtod(buf, uintptr_t); 876 byte_count = DATA_LEN(buf); 877 } 878 /* scat->addr must be able to store a pointer. */ 879 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 880 *scat = (struct mlx5_wqe_data_seg){ 881 .addr = rte_cpu_to_be_64(addr), 882 .byte_count = rte_cpu_to_be_32(byte_count), 883 .lkey = mlx5_rx_addr2mr(rxq, addr), 884 }; 885 } 886 rxq->consumed_strd = 0; 887 rxq->decompressed = 0; 888 rxq->rq_pi = 0; 889 rxq->zip = (struct rxq_zip){ 890 .ai = 0, 891 }; 892 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? 893 (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0; 894 /* Update doorbell counter. */ 895 rxq->rq_ci = wqe_n >> rxq->sges_n; 896 rte_io_wmb(); 897 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 898 } 899 900 /** 901 * Modify a Verbs/DevX queue state. 902 * This must be called from the primary process. 903 * 904 * @param dev 905 * Pointer to Ethernet device. 906 * @param sm 907 * State modify request parameters. 908 * 909 * @return 910 * 0 in case of success else non-zero value and rte_errno is set. 911 */ 912 int 913 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 914 const struct mlx5_mp_arg_queue_state_modify *sm) 915 { 916 int ret; 917 struct mlx5_priv *priv = dev->data->dev_private; 918 919 if (sm->is_wq) { 920 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 921 struct mlx5_rxq_ctrl *rxq_ctrl = 922 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 923 924 ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, sm->state); 925 if (ret) { 926 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 927 sm->state, strerror(errno)); 928 rte_errno = errno; 929 return ret; 930 } 931 } else { 932 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 933 struct mlx5_txq_ctrl *txq_ctrl = 934 container_of(txq, struct mlx5_txq_ctrl, txq); 935 936 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 937 MLX5_TXQ_MOD_ERR2RDY, 938 (uint8_t)priv->dev_port); 939 if (ret) 940 return ret; 941 } 942 return 0; 943 } 944 945 /** 946 * Modify a Verbs queue state. 947 * 948 * @param dev 949 * Pointer to Ethernet device. 950 * @param sm 951 * State modify request parameters. 952 * 953 * @return 954 * 0 in case of success else non-zero value. 955 */ 956 static int 957 mlx5_queue_state_modify(struct rte_eth_dev *dev, 958 struct mlx5_mp_arg_queue_state_modify *sm) 959 { 960 struct mlx5_priv *priv = dev->data->dev_private; 961 int ret = 0; 962 963 switch (rte_eal_process_type()) { 964 case RTE_PROC_PRIMARY: 965 ret = mlx5_queue_state_modify_primary(dev, sm); 966 break; 967 case RTE_PROC_SECONDARY: 968 ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); 969 break; 970 default: 971 break; 972 } 973 return ret; 974 } 975 976 /** 977 * Handle a Rx error. 978 * The function inserts the RQ state to reset when the first error CQE is 979 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 980 * it moves the RQ state to ready and initializes the RQ. 981 * Next CQE identification and error counting are in the caller responsibility. 982 * 983 * @param[in] rxq 984 * Pointer to RX queue structure. 985 * @param[in] vec 986 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 987 * 0 when called from non-vectorized Rx burst. 988 * 989 * @return 990 * -1 in case of recovery error, otherwise the CQE status. 991 */ 992 int 993 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 994 { 995 const uint16_t cqe_n = 1 << rxq->cqe_n; 996 const uint16_t cqe_mask = cqe_n - 1; 997 const uint16_t wqe_n = 1 << rxq->elts_n; 998 const uint16_t strd_n = 1 << rxq->strd_num_n; 999 struct mlx5_rxq_ctrl *rxq_ctrl = 1000 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 1001 union { 1002 volatile struct mlx5_cqe *cqe; 1003 volatile struct mlx5_err_cqe *err_cqe; 1004 } u = { 1005 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 1006 }; 1007 struct mlx5_mp_arg_queue_state_modify sm; 1008 int ret; 1009 1010 switch (rxq->err_state) { 1011 case MLX5_RXQ_ERR_STATE_NO_ERROR: 1012 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 1013 /* Fall-through */ 1014 case MLX5_RXQ_ERR_STATE_NEED_RESET: 1015 sm.is_wq = 1; 1016 sm.queue_id = rxq->idx; 1017 sm.state = IBV_WQS_RESET; 1018 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 1019 return -1; 1020 if (rxq_ctrl->dump_file_n < 1021 rxq_ctrl->priv->config.max_dump_files_num) { 1022 MKSTR(err_str, "Unexpected CQE error syndrome " 1023 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 1024 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 1025 rxq->cqn, rxq_ctrl->wqn, 1026 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 1027 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 1028 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 1029 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 1030 mlx5_dump_debug_information(name, NULL, err_str, 0); 1031 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 1032 (const void *)((uintptr_t) 1033 rxq->cqes), 1034 sizeof(*u.cqe) * cqe_n); 1035 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 1036 (const void *)((uintptr_t) 1037 rxq->wqes), 1038 16 * wqe_n); 1039 rxq_ctrl->dump_file_n++; 1040 } 1041 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 1042 /* Fall-through */ 1043 case MLX5_RXQ_ERR_STATE_NEED_READY: 1044 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1045 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1046 rte_io_wmb(); 1047 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1048 rte_io_wmb(); 1049 /* 1050 * The RQ consumer index must be zeroed while moving 1051 * from RESET state to RDY state. 1052 */ 1053 *rxq->rq_db = rte_cpu_to_be_32(0); 1054 rte_io_wmb(); 1055 sm.is_wq = 1; 1056 sm.queue_id = rxq->idx; 1057 sm.state = IBV_WQS_RDY; 1058 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1059 &sm)) 1060 return -1; 1061 if (vec) { 1062 const uint32_t elts_n = 1063 mlx5_rxq_mprq_enabled(rxq) ? 1064 wqe_n * strd_n : wqe_n; 1065 const uint32_t e_mask = elts_n - 1; 1066 uint32_t elts_ci = 1067 mlx5_rxq_mprq_enabled(rxq) ? 1068 rxq->elts_ci : rxq->rq_ci; 1069 uint32_t elt_idx; 1070 struct rte_mbuf **elt; 1071 int i; 1072 unsigned int n = elts_n - (elts_ci - 1073 rxq->rq_pi); 1074 1075 for (i = 0; i < (int)n; ++i) { 1076 elt_idx = (elts_ci + i) & e_mask; 1077 elt = &(*rxq->elts)[elt_idx]; 1078 *elt = rte_mbuf_raw_alloc(rxq->mp); 1079 if (!*elt) { 1080 for (i--; i >= 0; --i) { 1081 elt_idx = (elts_ci + 1082 i) & elts_n; 1083 elt = &(*rxq->elts) 1084 [elt_idx]; 1085 rte_pktmbuf_free_seg 1086 (*elt); 1087 } 1088 return -1; 1089 } 1090 } 1091 for (i = 0; i < (int)elts_n; ++i) { 1092 elt = &(*rxq->elts)[i]; 1093 DATA_LEN(*elt) = 1094 (uint16_t)((*elt)->buf_len - 1095 rte_pktmbuf_headroom(*elt)); 1096 } 1097 /* Padding with a fake mbuf for vec Rx. */ 1098 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1099 (*rxq->elts)[elts_n + i] = 1100 &rxq->fake_mbuf; 1101 } 1102 mlx5_rxq_initialize(rxq); 1103 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1104 } 1105 return ret; 1106 default: 1107 return -1; 1108 } 1109 } 1110 1111 /** 1112 * Get size of the next packet for a given CQE. For compressed CQEs, the 1113 * consumer index is updated only once all packets of the current one have 1114 * been processed. 1115 * 1116 * @param rxq 1117 * Pointer to RX queue. 1118 * @param cqe 1119 * CQE to process. 1120 * @param[out] mcqe 1121 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1122 * written. 1123 * 1124 * @return 1125 * 0 in case of empty CQE, otherwise the packet size in bytes. 1126 */ 1127 static inline int 1128 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1129 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1130 { 1131 struct rxq_zip *zip = &rxq->zip; 1132 uint16_t cqe_n = cqe_cnt + 1; 1133 int len; 1134 uint16_t idx, end; 1135 1136 do { 1137 len = 0; 1138 /* Process compressed data in the CQE and mini arrays. */ 1139 if (zip->ai) { 1140 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1141 (volatile struct mlx5_mini_cqe8 (*)[8]) 1142 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1143 cqe_cnt].pkt_info); 1144 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & 1145 rxq->byte_mask); 1146 *mcqe = &(*mc)[zip->ai & 7]; 1147 if ((++zip->ai & 7) == 0) { 1148 /* Invalidate consumed CQEs */ 1149 idx = zip->ca; 1150 end = zip->na; 1151 while (idx != end) { 1152 (*rxq->cqes)[idx & cqe_cnt].op_own = 1153 MLX5_CQE_INVALIDATE; 1154 ++idx; 1155 } 1156 /* 1157 * Increment consumer index to skip the number 1158 * of CQEs consumed. Hardware leaves holes in 1159 * the CQ ring for software use. 1160 */ 1161 zip->ca = zip->na; 1162 zip->na += 8; 1163 } 1164 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1165 /* Invalidate the rest */ 1166 idx = zip->ca; 1167 end = zip->cq_ci; 1168 1169 while (idx != end) { 1170 (*rxq->cqes)[idx & cqe_cnt].op_own = 1171 MLX5_CQE_INVALIDATE; 1172 ++idx; 1173 } 1174 rxq->cq_ci = zip->cq_ci; 1175 zip->ai = 0; 1176 } 1177 /* 1178 * No compressed data, get next CQE and verify if it is 1179 * compressed. 1180 */ 1181 } else { 1182 int ret; 1183 int8_t op_own; 1184 1185 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1186 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1187 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1188 rxq->err_state)) { 1189 ret = mlx5_rx_err_handle(rxq, 0); 1190 if (ret == MLX5_CQE_STATUS_HW_OWN || 1191 ret == -1) 1192 return 0; 1193 } else { 1194 return 0; 1195 } 1196 } 1197 ++rxq->cq_ci; 1198 op_own = cqe->op_own; 1199 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1200 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1201 (volatile struct mlx5_mini_cqe8 (*)[8]) 1202 (uintptr_t)(&(*rxq->cqes) 1203 [rxq->cq_ci & 1204 cqe_cnt].pkt_info); 1205 1206 /* Fix endianness. */ 1207 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1208 /* 1209 * Current mini array position is the one 1210 * returned by check_cqe64(). 1211 * 1212 * If completion comprises several mini arrays, 1213 * as a special case the second one is located 1214 * 7 CQEs after the initial CQE instead of 8 1215 * for subsequent ones. 1216 */ 1217 zip->ca = rxq->cq_ci; 1218 zip->na = zip->ca + 7; 1219 /* Compute the next non compressed CQE. */ 1220 --rxq->cq_ci; 1221 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1222 /* Get packet size to return. */ 1223 len = rte_be_to_cpu_32((*mc)[0].byte_cnt & 1224 rxq->byte_mask); 1225 *mcqe = &(*mc)[0]; 1226 zip->ai = 1; 1227 /* Prefetch all to be invalidated */ 1228 idx = zip->ca; 1229 end = zip->cq_ci; 1230 while (idx != end) { 1231 rte_prefetch0(&(*rxq->cqes)[(idx) & 1232 cqe_cnt]); 1233 ++idx; 1234 } 1235 } else { 1236 len = rte_be_to_cpu_32(cqe->byte_cnt); 1237 } 1238 } 1239 if (unlikely(rxq->err_state)) { 1240 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1241 ++rxq->stats.idropped; 1242 } else { 1243 return len; 1244 } 1245 } while (1); 1246 } 1247 1248 /** 1249 * Translate RX completion flags to offload flags. 1250 * 1251 * @param[in] cqe 1252 * Pointer to CQE. 1253 * 1254 * @return 1255 * Offload flags (ol_flags) for struct rte_mbuf. 1256 */ 1257 static inline uint32_t 1258 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1259 { 1260 uint32_t ol_flags = 0; 1261 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1262 1263 ol_flags = 1264 TRANSPOSE(flags, 1265 MLX5_CQE_RX_L3_HDR_VALID, 1266 PKT_RX_IP_CKSUM_GOOD) | 1267 TRANSPOSE(flags, 1268 MLX5_CQE_RX_L4_HDR_VALID, 1269 PKT_RX_L4_CKSUM_GOOD); 1270 return ol_flags; 1271 } 1272 1273 /** 1274 * Fill in mbuf fields from RX completion flags. 1275 * Note that pkt->ol_flags should be initialized outside of this function. 1276 * 1277 * @param rxq 1278 * Pointer to RX queue. 1279 * @param pkt 1280 * mbuf to fill. 1281 * @param cqe 1282 * CQE to process. 1283 * @param rss_hash_res 1284 * Packet RSS Hash result. 1285 */ 1286 static inline void 1287 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1288 volatile struct mlx5_cqe *cqe, 1289 volatile struct mlx5_mini_cqe8 *mcqe) 1290 { 1291 /* Update packet information. */ 1292 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe); 1293 1294 if (rxq->rss_hash) { 1295 uint32_t rss_hash_res = 0; 1296 1297 /* If compressed, take hash result from mini-CQE. */ 1298 if (mcqe == NULL || 1299 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH) 1300 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1301 else 1302 rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result); 1303 if (rss_hash_res) { 1304 pkt->hash.rss = rss_hash_res; 1305 pkt->ol_flags |= PKT_RX_RSS_HASH; 1306 } 1307 } 1308 if (rxq->mark) { 1309 uint32_t mark = 0; 1310 1311 /* If compressed, take flow tag from mini-CQE. */ 1312 if (mcqe == NULL || 1313 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1314 mark = cqe->sop_drop_qpn; 1315 else 1316 mark = ((mcqe->byte_cnt_flow & 0xff) << 8) | 1317 (mcqe->flow_tag_high << 16); 1318 if (MLX5_FLOW_MARK_IS_VALID(mark)) { 1319 pkt->ol_flags |= PKT_RX_FDIR; 1320 if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { 1321 pkt->ol_flags |= PKT_RX_FDIR_ID; 1322 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1323 } 1324 } 1325 } 1326 if (rxq->dynf_meta && cqe->flow_table_metadata) { 1327 pkt->ol_flags |= rxq->flow_meta_mask; 1328 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = 1329 cqe->flow_table_metadata; 1330 } 1331 if (rxq->csum) 1332 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1333 if (rxq->vlan_strip) { 1334 bool vlan_strip; 1335 1336 if (mcqe == NULL || 1337 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1338 vlan_strip = cqe->hdr_type_etc & 1339 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1340 else 1341 vlan_strip = mcqe->hdr_type & 1342 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1343 if (vlan_strip) { 1344 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1345 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1346 } 1347 } 1348 if (rxq->hw_timestamp) { 1349 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 1350 1351 if (rxq->rt_timestamp) 1352 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 1353 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 1354 pkt->ol_flags |= rxq->timestamp_rx_flag; 1355 } 1356 } 1357 1358 /** 1359 * DPDK callback for RX. 1360 * 1361 * @param dpdk_rxq 1362 * Generic pointer to RX queue structure. 1363 * @param[out] pkts 1364 * Array to store received packets. 1365 * @param pkts_n 1366 * Maximum number of packets in array. 1367 * 1368 * @return 1369 * Number of packets successfully received (<= pkts_n). 1370 */ 1371 uint16_t 1372 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1373 { 1374 struct mlx5_rxq_data *rxq = dpdk_rxq; 1375 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1376 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1377 const unsigned int sges_n = rxq->sges_n; 1378 struct rte_mbuf *pkt = NULL; 1379 struct rte_mbuf *seg = NULL; 1380 volatile struct mlx5_cqe *cqe = 1381 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1382 unsigned int i = 0; 1383 unsigned int rq_ci = rxq->rq_ci << sges_n; 1384 int len = 0; /* keep its value across iterations. */ 1385 1386 while (pkts_n) { 1387 unsigned int idx = rq_ci & wqe_cnt; 1388 volatile struct mlx5_wqe_data_seg *wqe = 1389 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1390 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1391 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1392 1393 if (pkt) 1394 NEXT(seg) = rep; 1395 seg = rep; 1396 rte_prefetch0(seg); 1397 rte_prefetch0(cqe); 1398 rte_prefetch0(wqe); 1399 /* Allocate the buf from the same pool. */ 1400 rep = rte_mbuf_raw_alloc(seg->pool); 1401 if (unlikely(rep == NULL)) { 1402 ++rxq->stats.rx_nombuf; 1403 if (!pkt) { 1404 /* 1405 * no buffers before we even started, 1406 * bail out silently. 1407 */ 1408 break; 1409 } 1410 while (pkt != seg) { 1411 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 1412 rep = NEXT(pkt); 1413 NEXT(pkt) = NULL; 1414 NB_SEGS(pkt) = 1; 1415 rte_mbuf_raw_free(pkt); 1416 pkt = rep; 1417 } 1418 break; 1419 } 1420 if (!pkt) { 1421 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1422 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1423 if (!len) { 1424 rte_mbuf_raw_free(rep); 1425 break; 1426 } 1427 pkt = seg; 1428 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 1429 pkt->ol_flags &= EXT_ATTACHED_MBUF; 1430 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1431 if (rxq->crc_present) 1432 len -= RTE_ETHER_CRC_LEN; 1433 PKT_LEN(pkt) = len; 1434 if (cqe->lro_num_seg > 1) { 1435 mlx5_lro_update_hdr 1436 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1437 mcqe, rxq, len); 1438 pkt->ol_flags |= PKT_RX_LRO; 1439 pkt->tso_segsz = len / cqe->lro_num_seg; 1440 } 1441 } 1442 DATA_LEN(rep) = DATA_LEN(seg); 1443 PKT_LEN(rep) = PKT_LEN(seg); 1444 SET_DATA_OFF(rep, DATA_OFF(seg)); 1445 PORT(rep) = PORT(seg); 1446 (*rxq->elts)[idx] = rep; 1447 /* 1448 * Fill NIC descriptor with the new buffer. The lkey and size 1449 * of the buffers are already known, only the buffer address 1450 * changes. 1451 */ 1452 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1453 /* If there's only one MR, no need to replace LKey in WQE. */ 1454 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1455 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1456 if (len > DATA_LEN(seg)) { 1457 len -= DATA_LEN(seg); 1458 ++NB_SEGS(pkt); 1459 ++rq_ci; 1460 continue; 1461 } 1462 DATA_LEN(seg) = len; 1463 #ifdef MLX5_PMD_SOFT_COUNTERS 1464 /* Increment bytes counter. */ 1465 rxq->stats.ibytes += PKT_LEN(pkt); 1466 #endif 1467 /* Return packet. */ 1468 *(pkts++) = pkt; 1469 pkt = NULL; 1470 --pkts_n; 1471 ++i; 1472 /* Align consumer index to the next stride. */ 1473 rq_ci >>= sges_n; 1474 ++rq_ci; 1475 rq_ci <<= sges_n; 1476 } 1477 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1478 return 0; 1479 /* Update the consumer index. */ 1480 rxq->rq_ci = rq_ci >> sges_n; 1481 rte_io_wmb(); 1482 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1483 rte_io_wmb(); 1484 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1485 #ifdef MLX5_PMD_SOFT_COUNTERS 1486 /* Increment packets counter. */ 1487 rxq->stats.ipackets += i; 1488 #endif 1489 return i; 1490 } 1491 1492 /** 1493 * Update LRO packet TCP header. 1494 * The HW LRO feature doesn't update the TCP header after coalescing the 1495 * TCP segments but supplies information in CQE to fill it by SW. 1496 * 1497 * @param tcp 1498 * Pointer to the TCP header. 1499 * @param cqe 1500 * Pointer to the completion entry.. 1501 * @param phcsum 1502 * The L3 pseudo-header checksum. 1503 */ 1504 static inline void 1505 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1506 volatile struct mlx5_cqe *__rte_restrict cqe, 1507 uint32_t phcsum, uint8_t l4_type) 1508 { 1509 /* 1510 * The HW calculates only the TCP payload checksum, need to complete 1511 * the TCP header checksum and the L3 pseudo-header checksum. 1512 */ 1513 uint32_t csum = phcsum + cqe->csum; 1514 1515 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1516 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1517 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1518 tcp->recv_ack = cqe->lro_ack_seq_num; 1519 tcp->rx_win = cqe->lro_tcp_win; 1520 } 1521 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1522 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1523 tcp->cksum = 0; 1524 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1525 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1526 csum = (~csum) & 0xffff; 1527 if (csum == 0) 1528 csum = 0xffff; 1529 tcp->cksum = csum; 1530 } 1531 1532 /** 1533 * Update LRO packet headers. 1534 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1535 * TCP segments but supply information in CQE to fill it by SW. 1536 * 1537 * @param padd 1538 * The packet address. 1539 * @param cqe 1540 * Pointer to the completion entry.. 1541 * @param len 1542 * The packet length. 1543 */ 1544 static inline void 1545 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1546 volatile struct mlx5_cqe *__rte_restrict cqe, 1547 volatile struct mlx5_mini_cqe8 *mcqe, 1548 struct mlx5_rxq_data *rxq, uint32_t len) 1549 { 1550 union { 1551 struct rte_ether_hdr *eth; 1552 struct rte_vlan_hdr *vlan; 1553 struct rte_ipv4_hdr *ipv4; 1554 struct rte_ipv6_hdr *ipv6; 1555 struct rte_tcp_hdr *tcp; 1556 uint8_t *hdr; 1557 } h = { 1558 .hdr = padd, 1559 }; 1560 uint16_t proto = h.eth->ether_type; 1561 uint32_t phcsum; 1562 uint8_t l4_type; 1563 1564 h.eth++; 1565 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1566 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1567 proto = h.vlan->eth_proto; 1568 h.vlan++; 1569 } 1570 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1571 h.ipv4->time_to_live = cqe->lro_min_ttl; 1572 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1573 h.ipv4->hdr_checksum = 0; 1574 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1575 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1576 h.ipv4++; 1577 } else { 1578 h.ipv6->hop_limits = cqe->lro_min_ttl; 1579 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1580 sizeof(*h.ipv6)); 1581 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1582 h.ipv6++; 1583 } 1584 if (mcqe == NULL || 1585 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1586 l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1587 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1588 else 1589 l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) & 1590 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1591 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type); 1592 } 1593 1594 void 1595 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1596 { 1597 struct mlx5_mprq_buf *buf = opaque; 1598 1599 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 1600 rte_mempool_put(buf->mp, buf); 1601 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 1602 __ATOMIC_RELAXED) == 0)) { 1603 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1604 rte_mempool_put(buf->mp, buf); 1605 } 1606 } 1607 1608 void 1609 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1610 { 1611 mlx5_mprq_buf_free_cb(NULL, buf); 1612 } 1613 1614 /** 1615 * DPDK callback for RX with Multi-Packet RQ support. 1616 * 1617 * @param dpdk_rxq 1618 * Generic pointer to RX queue structure. 1619 * @param[out] pkts 1620 * Array to store received packets. 1621 * @param pkts_n 1622 * Maximum number of packets in array. 1623 * 1624 * @return 1625 * Number of packets successfully received (<= pkts_n). 1626 */ 1627 uint16_t 1628 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1629 { 1630 struct mlx5_rxq_data *rxq = dpdk_rxq; 1631 const uint32_t strd_n = 1 << rxq->strd_num_n; 1632 const uint32_t strd_sz = 1 << rxq->strd_sz_n; 1633 const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; 1634 const uint32_t wq_mask = (1 << rxq->elts_n) - 1; 1635 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1636 unsigned int i = 0; 1637 uint32_t rq_ci = rxq->rq_ci; 1638 uint16_t consumed_strd = rxq->consumed_strd; 1639 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1640 1641 while (i < pkts_n) { 1642 struct rte_mbuf *pkt; 1643 int ret; 1644 uint32_t len; 1645 uint16_t strd_cnt; 1646 uint16_t strd_idx; 1647 uint32_t byte_cnt; 1648 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1649 enum mlx5_rqx_code rxq_code; 1650 1651 if (consumed_strd == strd_n) { 1652 /* Replace WQE if the buffer is still in use. */ 1653 mprq_buf_replace(rxq, rq_ci & wq_mask); 1654 /* Advance to the next WQE. */ 1655 consumed_strd = 0; 1656 ++rq_ci; 1657 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1658 } 1659 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1660 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1661 if (!ret) 1662 break; 1663 byte_cnt = ret; 1664 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1665 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1666 if (rxq->crc_present) 1667 len -= RTE_ETHER_CRC_LEN; 1668 if (mcqe && 1669 rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1670 strd_cnt = (len / strd_sz) + !!(len % strd_sz); 1671 else 1672 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1673 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1674 MLX5_ASSERT(strd_cnt); 1675 consumed_strd += strd_cnt; 1676 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1677 continue; 1678 strd_idx = rte_be_to_cpu_16(mcqe == NULL ? 1679 cqe->wqe_counter : 1680 mcqe->stride_idx); 1681 MLX5_ASSERT(strd_idx < strd_n); 1682 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1683 wq_mask)); 1684 pkt = rte_pktmbuf_alloc(rxq->mp); 1685 if (unlikely(pkt == NULL)) { 1686 ++rxq->stats.rx_nombuf; 1687 break; 1688 } 1689 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1690 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1691 if (rxq->crc_present) 1692 len -= RTE_ETHER_CRC_LEN; 1693 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, 1694 strd_idx, strd_cnt); 1695 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 1696 rte_pktmbuf_free_seg(pkt); 1697 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 1698 ++rxq->stats.idropped; 1699 continue; 1700 } 1701 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 1702 ++rxq->stats.rx_nombuf; 1703 break; 1704 } 1705 } 1706 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1707 if (cqe->lro_num_seg > 1) { 1708 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), 1709 cqe, mcqe, rxq, len); 1710 pkt->ol_flags |= PKT_RX_LRO; 1711 pkt->tso_segsz = len / cqe->lro_num_seg; 1712 } 1713 PKT_LEN(pkt) = len; 1714 PORT(pkt) = rxq->port_id; 1715 #ifdef MLX5_PMD_SOFT_COUNTERS 1716 /* Increment bytes counter. */ 1717 rxq->stats.ibytes += PKT_LEN(pkt); 1718 #endif 1719 /* Return packet. */ 1720 *(pkts++) = pkt; 1721 ++i; 1722 } 1723 /* Update the consumer indexes. */ 1724 rxq->consumed_strd = consumed_strd; 1725 rte_io_wmb(); 1726 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1727 if (rq_ci != rxq->rq_ci) { 1728 rxq->rq_ci = rq_ci; 1729 rte_io_wmb(); 1730 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1731 } 1732 #ifdef MLX5_PMD_SOFT_COUNTERS 1733 /* Increment packets counter. */ 1734 rxq->stats.ipackets += i; 1735 #endif 1736 return i; 1737 } 1738 1739 /** 1740 * Dummy DPDK callback for TX. 1741 * 1742 * This function is used to temporarily replace the real callback during 1743 * unsafe control operations on the queue, or in case of error. 1744 * 1745 * @param dpdk_txq 1746 * Generic pointer to TX queue structure. 1747 * @param[in] pkts 1748 * Packets to transmit. 1749 * @param pkts_n 1750 * Number of packets in array. 1751 * 1752 * @return 1753 * Number of packets successfully transmitted (<= pkts_n). 1754 */ 1755 uint16_t 1756 removed_tx_burst(void *dpdk_txq __rte_unused, 1757 struct rte_mbuf **pkts __rte_unused, 1758 uint16_t pkts_n __rte_unused) 1759 { 1760 rte_mb(); 1761 return 0; 1762 } 1763 1764 /** 1765 * Dummy DPDK callback for RX. 1766 * 1767 * This function is used to temporarily replace the real callback during 1768 * unsafe control operations on the queue, or in case of error. 1769 * 1770 * @param dpdk_rxq 1771 * Generic pointer to RX queue structure. 1772 * @param[out] pkts 1773 * Array to store received packets. 1774 * @param pkts_n 1775 * Maximum number of packets in array. 1776 * 1777 * @return 1778 * Number of packets successfully received (<= pkts_n). 1779 */ 1780 uint16_t 1781 removed_rx_burst(void *dpdk_txq __rte_unused, 1782 struct rte_mbuf **pkts __rte_unused, 1783 uint16_t pkts_n __rte_unused) 1784 { 1785 rte_mb(); 1786 return 0; 1787 } 1788 1789 /* 1790 * Vectorized Rx/Tx routines are not compiled in when required vector 1791 * instructions are not supported on a target architecture. The following null 1792 * stubs are needed for linkage when those are not included outside of this file 1793 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1794 */ 1795 1796 __rte_weak uint16_t 1797 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1798 struct rte_mbuf **pkts __rte_unused, 1799 uint16_t pkts_n __rte_unused) 1800 { 1801 return 0; 1802 } 1803 1804 __rte_weak uint16_t 1805 mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused, 1806 struct rte_mbuf **pkts __rte_unused, 1807 uint16_t pkts_n __rte_unused) 1808 { 1809 return 0; 1810 } 1811 1812 __rte_weak int 1813 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1814 { 1815 return -ENOTSUP; 1816 } 1817 1818 __rte_weak int 1819 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1820 { 1821 return -ENOTSUP; 1822 } 1823 1824 /** 1825 * Free the mbufs from the linear array of pointers. 1826 * 1827 * @param pkts 1828 * Pointer to array of packets to be free. 1829 * @param pkts_n 1830 * Number of packets to be freed. 1831 * @param olx 1832 * Configured Tx offloads mask. It is fully defined at 1833 * compile time and may be used for optimization. 1834 */ 1835 static __rte_always_inline void 1836 mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, 1837 unsigned int pkts_n, 1838 unsigned int olx __rte_unused) 1839 { 1840 struct rte_mempool *pool = NULL; 1841 struct rte_mbuf **p_free = NULL; 1842 struct rte_mbuf *mbuf; 1843 unsigned int n_free = 0; 1844 1845 /* 1846 * The implemented algorithm eliminates 1847 * copying pointers to temporary array 1848 * for rte_mempool_put_bulk() calls. 1849 */ 1850 MLX5_ASSERT(pkts); 1851 MLX5_ASSERT(pkts_n); 1852 for (;;) { 1853 for (;;) { 1854 /* 1855 * Decrement mbuf reference counter, detach 1856 * indirect and external buffers if needed. 1857 */ 1858 mbuf = rte_pktmbuf_prefree_seg(*pkts); 1859 if (likely(mbuf != NULL)) { 1860 MLX5_ASSERT(mbuf == *pkts); 1861 if (likely(n_free != 0)) { 1862 if (unlikely(pool != mbuf->pool)) 1863 /* From different pool. */ 1864 break; 1865 } else { 1866 /* Start new scan array. */ 1867 pool = mbuf->pool; 1868 p_free = pkts; 1869 } 1870 ++n_free; 1871 ++pkts; 1872 --pkts_n; 1873 if (unlikely(pkts_n == 0)) { 1874 mbuf = NULL; 1875 break; 1876 } 1877 } else { 1878 /* 1879 * This happens if mbuf is still referenced. 1880 * We can't put it back to the pool, skip. 1881 */ 1882 ++pkts; 1883 --pkts_n; 1884 if (unlikely(n_free != 0)) 1885 /* There is some array to free.*/ 1886 break; 1887 if (unlikely(pkts_n == 0)) 1888 /* Last mbuf, nothing to free. */ 1889 return; 1890 } 1891 } 1892 for (;;) { 1893 /* 1894 * This loop is implemented to avoid multiple 1895 * inlining of rte_mempool_put_bulk(). 1896 */ 1897 MLX5_ASSERT(pool); 1898 MLX5_ASSERT(p_free); 1899 MLX5_ASSERT(n_free); 1900 /* 1901 * Free the array of pre-freed mbufs 1902 * belonging to the same memory pool. 1903 */ 1904 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 1905 if (unlikely(mbuf != NULL)) { 1906 /* There is the request to start new scan. */ 1907 pool = mbuf->pool; 1908 p_free = pkts++; 1909 n_free = 1; 1910 --pkts_n; 1911 if (likely(pkts_n != 0)) 1912 break; 1913 /* 1914 * This is the last mbuf to be freed. 1915 * Do one more loop iteration to complete. 1916 * This is rare case of the last unique mbuf. 1917 */ 1918 mbuf = NULL; 1919 continue; 1920 } 1921 if (likely(pkts_n == 0)) 1922 return; 1923 n_free = 0; 1924 break; 1925 } 1926 } 1927 } 1928 1929 /** 1930 * Free the mbuf from the elts ring buffer till new tail. 1931 * 1932 * @param txq 1933 * Pointer to Tx queue structure. 1934 * @param tail 1935 * Index in elts to free up to, becomes new elts tail. 1936 * @param olx 1937 * Configured Tx offloads mask. It is fully defined at 1938 * compile time and may be used for optimization. 1939 */ 1940 static __rte_always_inline void 1941 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 1942 uint16_t tail, 1943 unsigned int olx __rte_unused) 1944 { 1945 uint16_t n_elts = tail - txq->elts_tail; 1946 1947 MLX5_ASSERT(n_elts); 1948 MLX5_ASSERT(n_elts <= txq->elts_s); 1949 /* 1950 * Implement a loop to support ring buffer wraparound 1951 * with single inlining of mlx5_tx_free_mbuf(). 1952 */ 1953 do { 1954 unsigned int part; 1955 1956 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 1957 part = RTE_MIN(part, n_elts); 1958 MLX5_ASSERT(part); 1959 MLX5_ASSERT(part <= txq->elts_s); 1960 mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], 1961 part, olx); 1962 txq->elts_tail += part; 1963 n_elts -= part; 1964 } while (n_elts); 1965 } 1966 1967 /** 1968 * Store the mbuf being sent into elts ring buffer. 1969 * On Tx completion these mbufs will be freed. 1970 * 1971 * @param txq 1972 * Pointer to Tx queue structure. 1973 * @param pkts 1974 * Pointer to array of packets to be stored. 1975 * @param pkts_n 1976 * Number of packets to be stored. 1977 * @param olx 1978 * Configured Tx offloads mask. It is fully defined at 1979 * compile time and may be used for optimization. 1980 */ 1981 static __rte_always_inline void 1982 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 1983 struct rte_mbuf **__rte_restrict pkts, 1984 unsigned int pkts_n, 1985 unsigned int olx __rte_unused) 1986 { 1987 unsigned int part; 1988 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 1989 1990 MLX5_ASSERT(pkts); 1991 MLX5_ASSERT(pkts_n); 1992 part = txq->elts_s - (txq->elts_head & txq->elts_m); 1993 MLX5_ASSERT(part); 1994 MLX5_ASSERT(part <= txq->elts_s); 1995 /* This code is a good candidate for vectorizing with SIMD. */ 1996 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 1997 (void *)pkts, 1998 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 1999 txq->elts_head += pkts_n; 2000 if (unlikely(part < pkts_n)) 2001 /* The copy is wrapping around the elts array. */ 2002 rte_memcpy((void *)elts, (void *)(pkts + part), 2003 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2004 } 2005 2006 /** 2007 * Update completion queue consuming index via doorbell 2008 * and flush the completed data buffers. 2009 * 2010 * @param txq 2011 * Pointer to TX queue structure. 2012 * @param valid CQE pointer 2013 * if not NULL update txq->wqe_pi and flush the buffers 2014 * @param olx 2015 * Configured Tx offloads mask. It is fully defined at 2016 * compile time and may be used for optimization. 2017 */ 2018 static __rte_always_inline void 2019 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, 2020 volatile struct mlx5_cqe *last_cqe, 2021 unsigned int olx __rte_unused) 2022 { 2023 if (likely(last_cqe != NULL)) { 2024 uint16_t tail; 2025 2026 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 2027 tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; 2028 if (likely(tail != txq->elts_tail)) { 2029 mlx5_tx_free_elts(txq, tail, olx); 2030 MLX5_ASSERT(tail == txq->elts_tail); 2031 } 2032 } 2033 } 2034 2035 /** 2036 * Manage TX completions. This routine checks the CQ for 2037 * arrived CQEs, deduces the last accomplished WQE in SQ, 2038 * updates SQ producing index and frees all completed mbufs. 2039 * 2040 * @param txq 2041 * Pointer to TX queue structure. 2042 * @param olx 2043 * Configured Tx offloads mask. It is fully defined at 2044 * compile time and may be used for optimization. 2045 * 2046 * NOTE: not inlined intentionally, it makes tx_burst 2047 * routine smaller, simple and faster - from experiments. 2048 */ 2049 static void 2050 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 2051 unsigned int olx __rte_unused) 2052 { 2053 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2054 volatile struct mlx5_cqe *last_cqe = NULL; 2055 bool ring_doorbell = false; 2056 int ret; 2057 2058 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 2059 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 2060 do { 2061 volatile struct mlx5_cqe *cqe; 2062 2063 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2064 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2065 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2066 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2067 /* No new CQEs in completion queue. */ 2068 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 2069 break; 2070 } 2071 /* 2072 * Some error occurred, try to restart. 2073 * We have no barrier after WQE related Doorbell 2074 * written, make sure all writes are completed 2075 * here, before we might perform SQ reset. 2076 */ 2077 rte_wmb(); 2078 ret = mlx5_tx_error_cqe_handle 2079 (txq, (volatile struct mlx5_err_cqe *)cqe); 2080 if (unlikely(ret < 0)) { 2081 /* 2082 * Some error occurred on queue error 2083 * handling, we do not advance the index 2084 * here, allowing to retry on next call. 2085 */ 2086 return; 2087 } 2088 /* 2089 * We are going to fetch all entries with 2090 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. 2091 * The send queue is supposed to be empty. 2092 */ 2093 ring_doorbell = true; 2094 ++txq->cq_ci; 2095 txq->cq_pi = txq->cq_ci; 2096 last_cqe = NULL; 2097 continue; 2098 } 2099 /* Normal transmit completion. */ 2100 MLX5_ASSERT(txq->cq_ci != txq->cq_pi); 2101 MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == 2102 cqe->wqe_counter); 2103 ring_doorbell = true; 2104 ++txq->cq_ci; 2105 last_cqe = cqe; 2106 /* 2107 * We have to restrict the amount of processed CQEs 2108 * in one tx_burst routine call. The CQ may be large 2109 * and many CQEs may be updated by the NIC in one 2110 * transaction. Buffers freeing is time consuming, 2111 * multiple iterations may introduce significant 2112 * latency. 2113 */ 2114 if (likely(--count == 0)) 2115 break; 2116 } while (true); 2117 if (likely(ring_doorbell)) { 2118 /* Ring doorbell to notify hardware. */ 2119 rte_compiler_barrier(); 2120 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2121 mlx5_tx_comp_flush(txq, last_cqe, olx); 2122 } 2123 } 2124 2125 /** 2126 * Check if the completion request flag should be set in the last WQE. 2127 * Both pushed mbufs and WQEs are monitored and the completion request 2128 * flag is set if any of thresholds is reached. 2129 * 2130 * @param txq 2131 * Pointer to TX queue structure. 2132 * @param loc 2133 * Pointer to burst routine local context. 2134 * @param olx 2135 * Configured Tx offloads mask. It is fully defined at 2136 * compile time and may be used for optimization. 2137 */ 2138 static __rte_always_inline void 2139 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 2140 struct mlx5_txq_local *__rte_restrict loc, 2141 unsigned int olx) 2142 { 2143 uint16_t head = txq->elts_head; 2144 unsigned int part; 2145 2146 part = MLX5_TXOFF_CONFIG(INLINE) ? 2147 0 : loc->pkts_sent - loc->pkts_copy; 2148 head += part; 2149 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2150 (MLX5_TXOFF_CONFIG(INLINE) && 2151 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2152 volatile struct mlx5_wqe *last = loc->wqe_last; 2153 2154 MLX5_ASSERT(last); 2155 txq->elts_comp = head; 2156 if (MLX5_TXOFF_CONFIG(INLINE)) 2157 txq->wqe_comp = txq->wqe_ci; 2158 /* Request unconditional completion on last WQE. */ 2159 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2160 MLX5_COMP_MODE_OFFSET); 2161 /* Save elts_head in dedicated free on completion queue. */ 2162 #ifdef RTE_LIBRTE_MLX5_DEBUG 2163 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 2164 (last->cseg.opcode >> 8) << 16; 2165 #else 2166 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 2167 #endif 2168 /* A CQE slot must always be available. */ 2169 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 2170 } 2171 } 2172 2173 /** 2174 * DPDK callback to check the status of a tx descriptor. 2175 * 2176 * @param tx_queue 2177 * The tx queue. 2178 * @param[in] offset 2179 * The index of the descriptor in the ring. 2180 * 2181 * @return 2182 * The status of the tx descriptor. 2183 */ 2184 int 2185 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2186 { 2187 struct mlx5_txq_data *__rte_restrict txq = tx_queue; 2188 uint16_t used; 2189 2190 mlx5_tx_handle_completion(txq, 0); 2191 used = txq->elts_head - txq->elts_tail; 2192 if (offset < used) 2193 return RTE_ETH_TX_DESC_FULL; 2194 return RTE_ETH_TX_DESC_DONE; 2195 } 2196 2197 /** 2198 * Build the Control Segment with specified opcode: 2199 * - MLX5_OPCODE_SEND 2200 * - MLX5_OPCODE_ENHANCED_MPSW 2201 * - MLX5_OPCODE_TSO 2202 * 2203 * @param txq 2204 * Pointer to TX queue structure. 2205 * @param loc 2206 * Pointer to burst routine local context. 2207 * @param wqe 2208 * Pointer to WQE to fill with built Control Segment. 2209 * @param ds 2210 * Supposed length of WQE in segments. 2211 * @param opcode 2212 * SQ WQE opcode to put into Control Segment. 2213 * @param olx 2214 * Configured Tx offloads mask. It is fully defined at 2215 * compile time and may be used for optimization. 2216 */ 2217 static __rte_always_inline void 2218 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 2219 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2220 struct mlx5_wqe *__rte_restrict wqe, 2221 unsigned int ds, 2222 unsigned int opcode, 2223 unsigned int olx __rte_unused) 2224 { 2225 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 2226 2227 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2228 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2229 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2230 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2231 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2232 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2233 MLX5_COMP_MODE_OFFSET); 2234 cs->misc = RTE_BE32(0); 2235 } 2236 2237 /** 2238 * Build the Synchronize Queue Segment with specified completion index. 2239 * 2240 * @param txq 2241 * Pointer to TX queue structure. 2242 * @param loc 2243 * Pointer to burst routine local context. 2244 * @param wqe 2245 * Pointer to WQE to fill with built Control Segment. 2246 * @param wci 2247 * Completion index in Clock Queue to wait. 2248 * @param olx 2249 * Configured Tx offloads mask. It is fully defined at 2250 * compile time and may be used for optimization. 2251 */ 2252 static __rte_always_inline void 2253 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 2254 struct mlx5_txq_local *restrict loc __rte_unused, 2255 struct mlx5_wqe *restrict wqe, 2256 unsigned int wci, 2257 unsigned int olx __rte_unused) 2258 { 2259 struct mlx5_wqe_qseg *qs; 2260 2261 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 2262 qs->max_index = rte_cpu_to_be_32(wci); 2263 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq->id); 2264 qs->reserved0 = RTE_BE32(0); 2265 qs->reserved1 = RTE_BE32(0); 2266 } 2267 2268 /** 2269 * Build the Ethernet Segment without inlined data. 2270 * Supports Software Parser, Checksums and VLAN 2271 * insertion Tx offload features. 2272 * 2273 * @param txq 2274 * Pointer to TX queue structure. 2275 * @param loc 2276 * Pointer to burst routine local context. 2277 * @param wqe 2278 * Pointer to WQE to fill with built Ethernet Segment. 2279 * @param olx 2280 * Configured Tx offloads mask. It is fully defined at 2281 * compile time and may be used for optimization. 2282 */ 2283 static __rte_always_inline void 2284 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2285 struct mlx5_txq_local *__rte_restrict loc, 2286 struct mlx5_wqe *__rte_restrict wqe, 2287 unsigned int olx) 2288 { 2289 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2290 uint32_t csum; 2291 2292 /* 2293 * Calculate and set check sum flags first, dword field 2294 * in segment may be shared with Software Parser flags. 2295 */ 2296 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2297 es->flags = rte_cpu_to_le_32(csum); 2298 /* 2299 * Calculate and set Software Parser offsets and flags. 2300 * These flags a set for custom UDP and IP tunnel packets. 2301 */ 2302 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2303 /* Fill metadata field if needed. */ 2304 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2305 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2306 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2307 /* Engage VLAN tag insertion feature if requested. */ 2308 if (MLX5_TXOFF_CONFIG(VLAN) && 2309 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2310 /* 2311 * We should get here only if device support 2312 * this feature correctly. 2313 */ 2314 MLX5_ASSERT(txq->vlan_en); 2315 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2316 loc->mbuf->vlan_tci); 2317 } else { 2318 es->inline_hdr = RTE_BE32(0); 2319 } 2320 } 2321 2322 /** 2323 * Build the Ethernet Segment with minimal inlined data 2324 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2325 * used to fill the gap in single WQEBB WQEs. 2326 * Supports Software Parser, Checksums and VLAN 2327 * insertion Tx offload features. 2328 * 2329 * @param txq 2330 * Pointer to TX queue structure. 2331 * @param loc 2332 * Pointer to burst routine local context. 2333 * @param wqe 2334 * Pointer to WQE to fill with built Ethernet Segment. 2335 * @param vlan 2336 * Length of VLAN tag insertion if any. 2337 * @param olx 2338 * Configured Tx offloads mask. It is fully defined at 2339 * compile time and may be used for optimization. 2340 */ 2341 static __rte_always_inline void 2342 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2343 struct mlx5_txq_local *__rte_restrict loc, 2344 struct mlx5_wqe *__rte_restrict wqe, 2345 unsigned int vlan, 2346 unsigned int olx) 2347 { 2348 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2349 uint32_t csum; 2350 uint8_t *psrc, *pdst; 2351 2352 /* 2353 * Calculate and set check sum flags first, dword field 2354 * in segment may be shared with Software Parser flags. 2355 */ 2356 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2357 es->flags = rte_cpu_to_le_32(csum); 2358 /* 2359 * Calculate and set Software Parser offsets and flags. 2360 * These flags a set for custom UDP and IP tunnel packets. 2361 */ 2362 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2363 /* Fill metadata field if needed. */ 2364 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2365 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2366 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2367 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2368 (sizeof(uint16_t) + 2369 sizeof(rte_v128u32_t)), 2370 "invalid Ethernet Segment data size"); 2371 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2372 (sizeof(uint16_t) + 2373 sizeof(struct rte_vlan_hdr) + 2374 2 * RTE_ETHER_ADDR_LEN), 2375 "invalid Ethernet Segment data size"); 2376 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2377 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2378 es->inline_data = *(unaligned_uint16_t *)psrc; 2379 psrc += sizeof(uint16_t); 2380 pdst = (uint8_t *)(es + 1); 2381 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2382 /* Implement VLAN tag insertion as part inline data. */ 2383 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2384 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2385 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2386 /* Insert VLAN ethertype + VLAN tag. */ 2387 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2388 ((RTE_ETHER_TYPE_VLAN << 16) | 2389 loc->mbuf->vlan_tci); 2390 pdst += sizeof(struct rte_vlan_hdr); 2391 /* Copy the rest two bytes from packet data. */ 2392 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2393 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2394 } else { 2395 /* Fill the gap in the title WQEBB with inline data. */ 2396 rte_mov16(pdst, psrc); 2397 } 2398 } 2399 2400 /** 2401 * Build the Ethernet Segment with entire packet 2402 * data inlining. Checks the boundary of WQEBB and 2403 * ring buffer wrapping, supports Software Parser, 2404 * Checksums and VLAN insertion Tx offload features. 2405 * 2406 * @param txq 2407 * Pointer to TX queue structure. 2408 * @param loc 2409 * Pointer to burst routine local context. 2410 * @param wqe 2411 * Pointer to WQE to fill with built Ethernet Segment. 2412 * @param vlan 2413 * Length of VLAN tag insertion if any. 2414 * @param inlen 2415 * Length of data to inline (VLAN included, if any). 2416 * @param tso 2417 * TSO flag, set mss field from the packet. 2418 * @param olx 2419 * Configured Tx offloads mask. It is fully defined at 2420 * compile time and may be used for optimization. 2421 * 2422 * @return 2423 * Pointer to the next Data Segment (aligned and wrapped around). 2424 */ 2425 static __rte_always_inline struct mlx5_wqe_dseg * 2426 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 2427 struct mlx5_txq_local *__rte_restrict loc, 2428 struct mlx5_wqe *__rte_restrict wqe, 2429 unsigned int vlan, 2430 unsigned int inlen, 2431 unsigned int tso, 2432 unsigned int olx) 2433 { 2434 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2435 uint32_t csum; 2436 uint8_t *psrc, *pdst; 2437 unsigned int part; 2438 2439 /* 2440 * Calculate and set check sum flags first, dword field 2441 * in segment may be shared with Software Parser flags. 2442 */ 2443 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2444 if (tso) { 2445 csum <<= 24; 2446 csum |= loc->mbuf->tso_segsz; 2447 es->flags = rte_cpu_to_be_32(csum); 2448 } else { 2449 es->flags = rte_cpu_to_le_32(csum); 2450 } 2451 /* 2452 * Calculate and set Software Parser offsets and flags. 2453 * These flags a set for custom UDP and IP tunnel packets. 2454 */ 2455 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2456 /* Fill metadata field if needed. */ 2457 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2458 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2459 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2460 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2461 (sizeof(uint16_t) + 2462 sizeof(rte_v128u32_t)), 2463 "invalid Ethernet Segment data size"); 2464 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2465 (sizeof(uint16_t) + 2466 sizeof(struct rte_vlan_hdr) + 2467 2 * RTE_ETHER_ADDR_LEN), 2468 "invalid Ethernet Segment data size"); 2469 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2470 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2471 es->inline_data = *(unaligned_uint16_t *)psrc; 2472 psrc += sizeof(uint16_t); 2473 pdst = (uint8_t *)(es + 1); 2474 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2475 /* Implement VLAN tag insertion as part inline data. */ 2476 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2477 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2478 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2479 /* Insert VLAN ethertype + VLAN tag. */ 2480 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2481 ((RTE_ETHER_TYPE_VLAN << 16) | 2482 loc->mbuf->vlan_tci); 2483 pdst += sizeof(struct rte_vlan_hdr); 2484 /* Copy the rest two bytes from packet data. */ 2485 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2486 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2487 psrc += sizeof(uint16_t); 2488 } else { 2489 /* Fill the gap in the title WQEBB with inline data. */ 2490 rte_mov16(pdst, psrc); 2491 psrc += sizeof(rte_v128u32_t); 2492 } 2493 pdst = (uint8_t *)(es + 2); 2494 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2495 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2496 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2497 if (!inlen) { 2498 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2499 return (struct mlx5_wqe_dseg *)pdst; 2500 } 2501 /* 2502 * The WQEBB space availability is checked by caller. 2503 * Here we should be aware of WQE ring buffer wraparound only. 2504 */ 2505 part = (uint8_t *)txq->wqes_end - pdst; 2506 part = RTE_MIN(part, inlen); 2507 do { 2508 rte_memcpy(pdst, psrc, part); 2509 inlen -= part; 2510 if (likely(!inlen)) { 2511 /* 2512 * If return value is not used by the caller 2513 * the code below will be optimized out. 2514 */ 2515 pdst += part; 2516 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2517 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2518 pdst = (uint8_t *)txq->wqes; 2519 return (struct mlx5_wqe_dseg *)pdst; 2520 } 2521 pdst = (uint8_t *)txq->wqes; 2522 psrc += part; 2523 part = inlen; 2524 } while (true); 2525 } 2526 2527 /** 2528 * Copy data from chain of mbuf to the specified linear buffer. 2529 * Checksums and VLAN insertion Tx offload features. If data 2530 * from some mbuf copied completely this mbuf is freed. Local 2531 * structure is used to keep the byte stream state. 2532 * 2533 * @param pdst 2534 * Pointer to the destination linear buffer. 2535 * @param loc 2536 * Pointer to burst routine local context. 2537 * @param len 2538 * Length of data to be copied. 2539 * @param must 2540 * Length of data to be copied ignoring no inline hint. 2541 * @param olx 2542 * Configured Tx offloads mask. It is fully defined at 2543 * compile time and may be used for optimization. 2544 * 2545 * @return 2546 * Number of actual copied data bytes. This is always greater than or 2547 * equal to must parameter and might be lesser than len in no inline 2548 * hint flag is encountered. 2549 */ 2550 static __rte_always_inline unsigned int 2551 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2552 struct mlx5_txq_local *__rte_restrict loc, 2553 unsigned int len, 2554 unsigned int must, 2555 unsigned int olx __rte_unused) 2556 { 2557 struct rte_mbuf *mbuf; 2558 unsigned int part, dlen, copy = 0; 2559 uint8_t *psrc; 2560 2561 MLX5_ASSERT(len); 2562 MLX5_ASSERT(must <= len); 2563 do { 2564 /* Allow zero length packets, must check first. */ 2565 dlen = rte_pktmbuf_data_len(loc->mbuf); 2566 if (dlen <= loc->mbuf_off) { 2567 /* Exhausted packet, just free. */ 2568 mbuf = loc->mbuf; 2569 loc->mbuf = mbuf->next; 2570 rte_pktmbuf_free_seg(mbuf); 2571 loc->mbuf_off = 0; 2572 MLX5_ASSERT(loc->mbuf_nseg > 1); 2573 MLX5_ASSERT(loc->mbuf); 2574 --loc->mbuf_nseg; 2575 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2576 unsigned int diff; 2577 2578 if (copy >= must) { 2579 /* 2580 * We already copied the minimal 2581 * requested amount of data. 2582 */ 2583 return copy; 2584 } 2585 diff = must - copy; 2586 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 2587 /* 2588 * Copy only the minimal required 2589 * part of the data buffer. 2590 */ 2591 len = diff; 2592 } 2593 } 2594 continue; 2595 } 2596 dlen -= loc->mbuf_off; 2597 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2598 loc->mbuf_off); 2599 part = RTE_MIN(len, dlen); 2600 rte_memcpy(pdst, psrc, part); 2601 copy += part; 2602 loc->mbuf_off += part; 2603 len -= part; 2604 if (!len) { 2605 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2606 loc->mbuf_off = 0; 2607 /* Exhausted packet, just free. */ 2608 mbuf = loc->mbuf; 2609 loc->mbuf = mbuf->next; 2610 rte_pktmbuf_free_seg(mbuf); 2611 loc->mbuf_off = 0; 2612 MLX5_ASSERT(loc->mbuf_nseg >= 1); 2613 --loc->mbuf_nseg; 2614 } 2615 return copy; 2616 } 2617 pdst += part; 2618 } while (true); 2619 } 2620 2621 /** 2622 * Build the Ethernet Segment with inlined data from 2623 * multi-segment packet. Checks the boundary of WQEBB 2624 * and ring buffer wrapping, supports Software Parser, 2625 * Checksums and VLAN insertion Tx offload features. 2626 * 2627 * @param txq 2628 * Pointer to TX queue structure. 2629 * @param loc 2630 * Pointer to burst routine local context. 2631 * @param wqe 2632 * Pointer to WQE to fill with built Ethernet Segment. 2633 * @param vlan 2634 * Length of VLAN tag insertion if any. 2635 * @param inlen 2636 * Length of data to inline (VLAN included, if any). 2637 * @param tso 2638 * TSO flag, set mss field from the packet. 2639 * @param olx 2640 * Configured Tx offloads mask. It is fully defined at 2641 * compile time and may be used for optimization. 2642 * 2643 * @return 2644 * Pointer to the next Data Segment (aligned and 2645 * possible NOT wrapped around - caller should do 2646 * wrapping check on its own). 2647 */ 2648 static __rte_always_inline struct mlx5_wqe_dseg * 2649 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 2650 struct mlx5_txq_local *__rte_restrict loc, 2651 struct mlx5_wqe *__rte_restrict wqe, 2652 unsigned int vlan, 2653 unsigned int inlen, 2654 unsigned int tso, 2655 unsigned int olx) 2656 { 2657 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2658 uint32_t csum; 2659 uint8_t *pdst; 2660 unsigned int part, tlen = 0; 2661 2662 /* 2663 * Calculate and set check sum flags first, uint32_t field 2664 * in segment may be shared with Software Parser flags. 2665 */ 2666 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2667 if (tso) { 2668 csum <<= 24; 2669 csum |= loc->mbuf->tso_segsz; 2670 es->flags = rte_cpu_to_be_32(csum); 2671 } else { 2672 es->flags = rte_cpu_to_le_32(csum); 2673 } 2674 /* 2675 * Calculate and set Software Parser offsets and flags. 2676 * These flags a set for custom UDP and IP tunnel packets. 2677 */ 2678 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2679 /* Fill metadata field if needed. */ 2680 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2681 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2682 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2683 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2684 (sizeof(uint16_t) + 2685 sizeof(rte_v128u32_t)), 2686 "invalid Ethernet Segment data size"); 2687 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2688 (sizeof(uint16_t) + 2689 sizeof(struct rte_vlan_hdr) + 2690 2 * RTE_ETHER_ADDR_LEN), 2691 "invalid Ethernet Segment data size"); 2692 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2693 pdst = (uint8_t *)&es->inline_data; 2694 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2695 /* Implement VLAN tag insertion as part inline data. */ 2696 mlx5_tx_mseg_memcpy(pdst, loc, 2697 2 * RTE_ETHER_ADDR_LEN, 2698 2 * RTE_ETHER_ADDR_LEN, olx); 2699 pdst += 2 * RTE_ETHER_ADDR_LEN; 2700 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2701 ((RTE_ETHER_TYPE_VLAN << 16) | 2702 loc->mbuf->vlan_tci); 2703 pdst += sizeof(struct rte_vlan_hdr); 2704 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2705 } 2706 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2707 /* 2708 * The WQEBB space availability is checked by caller. 2709 * Here we should be aware of WQE ring buffer wraparound only. 2710 */ 2711 part = (uint8_t *)txq->wqes_end - pdst; 2712 part = RTE_MIN(part, inlen - tlen); 2713 MLX5_ASSERT(part); 2714 do { 2715 unsigned int copy; 2716 2717 /* 2718 * Copying may be interrupted inside the routine 2719 * if run into no inline hint flag. 2720 */ 2721 copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); 2722 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 2723 tlen += copy; 2724 if (likely(inlen <= tlen) || copy < part) { 2725 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 2726 pdst += copy; 2727 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2728 return (struct mlx5_wqe_dseg *)pdst; 2729 } 2730 pdst = (uint8_t *)txq->wqes; 2731 part = inlen - tlen; 2732 } while (true); 2733 } 2734 2735 /** 2736 * Build the Data Segment of pointer type. 2737 * 2738 * @param txq 2739 * Pointer to TX queue structure. 2740 * @param loc 2741 * Pointer to burst routine local context. 2742 * @param dseg 2743 * Pointer to WQE to fill with built Data Segment. 2744 * @param buf 2745 * Data buffer to point. 2746 * @param len 2747 * Data buffer length. 2748 * @param olx 2749 * Configured Tx offloads mask. It is fully defined at 2750 * compile time and may be used for optimization. 2751 */ 2752 static __rte_always_inline void 2753 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 2754 struct mlx5_txq_local *__rte_restrict loc, 2755 struct mlx5_wqe_dseg *__rte_restrict dseg, 2756 uint8_t *buf, 2757 unsigned int len, 2758 unsigned int olx __rte_unused) 2759 2760 { 2761 MLX5_ASSERT(len); 2762 dseg->bcount = rte_cpu_to_be_32(len); 2763 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2764 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2765 } 2766 2767 /** 2768 * Build the Data Segment of pointer type or inline 2769 * if data length is less than buffer in minimal 2770 * Data Segment size. 2771 * 2772 * @param txq 2773 * Pointer to TX queue structure. 2774 * @param loc 2775 * Pointer to burst routine local context. 2776 * @param dseg 2777 * Pointer to WQE to fill with built Data Segment. 2778 * @param buf 2779 * Data buffer to point. 2780 * @param len 2781 * Data buffer length. 2782 * @param olx 2783 * Configured Tx offloads mask. It is fully defined at 2784 * compile time and may be used for optimization. 2785 */ 2786 static __rte_always_inline void 2787 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 2788 struct mlx5_txq_local *__rte_restrict loc, 2789 struct mlx5_wqe_dseg *__rte_restrict dseg, 2790 uint8_t *buf, 2791 unsigned int len, 2792 unsigned int olx __rte_unused) 2793 2794 { 2795 uintptr_t dst, src; 2796 2797 MLX5_ASSERT(len); 2798 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2799 dseg->bcount = rte_cpu_to_be_32(len); 2800 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2801 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2802 2803 return; 2804 } 2805 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2806 /* Unrolled implementation of generic rte_memcpy. */ 2807 dst = (uintptr_t)&dseg->inline_data[0]; 2808 src = (uintptr_t)buf; 2809 if (len & 0x08) { 2810 #ifdef RTE_ARCH_STRICT_ALIGN 2811 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2812 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2813 dst += sizeof(uint32_t); 2814 src += sizeof(uint32_t); 2815 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2816 dst += sizeof(uint32_t); 2817 src += sizeof(uint32_t); 2818 #else 2819 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2820 dst += sizeof(uint64_t); 2821 src += sizeof(uint64_t); 2822 #endif 2823 } 2824 if (len & 0x04) { 2825 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2826 dst += sizeof(uint32_t); 2827 src += sizeof(uint32_t); 2828 } 2829 if (len & 0x02) { 2830 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2831 dst += sizeof(uint16_t); 2832 src += sizeof(uint16_t); 2833 } 2834 if (len & 0x01) 2835 *(uint8_t *)dst = *(uint8_t *)src; 2836 } 2837 2838 /** 2839 * Build the Data Segment of inlined data from single 2840 * segment packet, no VLAN insertion. 2841 * 2842 * @param txq 2843 * Pointer to TX queue structure. 2844 * @param loc 2845 * Pointer to burst routine local context. 2846 * @param dseg 2847 * Pointer to WQE to fill with built Data Segment. 2848 * @param buf 2849 * Data buffer to point. 2850 * @param len 2851 * Data buffer length. 2852 * @param olx 2853 * Configured Tx offloads mask. It is fully defined at 2854 * compile time and may be used for optimization. 2855 * 2856 * @return 2857 * Pointer to the next Data Segment after inlined data. 2858 * Ring buffer wraparound check is needed. We do not 2859 * do it here because it may not be needed for the 2860 * last packet in the eMPW session. 2861 */ 2862 static __rte_always_inline struct mlx5_wqe_dseg * 2863 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 2864 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2865 struct mlx5_wqe_dseg *__rte_restrict dseg, 2866 uint8_t *buf, 2867 unsigned int len, 2868 unsigned int olx __rte_unused) 2869 { 2870 unsigned int part; 2871 uint8_t *pdst; 2872 2873 if (!MLX5_TXOFF_CONFIG(MPW)) { 2874 /* Store the descriptor byte counter for eMPW sessions. */ 2875 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2876 pdst = &dseg->inline_data[0]; 2877 } else { 2878 /* The entire legacy MPW session counter is stored on close. */ 2879 pdst = (uint8_t *)dseg; 2880 } 2881 /* 2882 * The WQEBB space availability is checked by caller. 2883 * Here we should be aware of WQE ring buffer wraparound only. 2884 */ 2885 part = (uint8_t *)txq->wqes_end - pdst; 2886 part = RTE_MIN(part, len); 2887 do { 2888 rte_memcpy(pdst, buf, part); 2889 len -= part; 2890 if (likely(!len)) { 2891 pdst += part; 2892 if (!MLX5_TXOFF_CONFIG(MPW)) 2893 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2894 /* Note: no final wraparound check here. */ 2895 return (struct mlx5_wqe_dseg *)pdst; 2896 } 2897 pdst = (uint8_t *)txq->wqes; 2898 buf += part; 2899 part = len; 2900 } while (true); 2901 } 2902 2903 /** 2904 * Build the Data Segment of inlined data from single 2905 * segment packet with VLAN insertion. 2906 * 2907 * @param txq 2908 * Pointer to TX queue structure. 2909 * @param loc 2910 * Pointer to burst routine local context. 2911 * @param dseg 2912 * Pointer to the dseg fill with built Data Segment. 2913 * @param buf 2914 * Data buffer to point. 2915 * @param len 2916 * Data buffer length. 2917 * @param olx 2918 * Configured Tx offloads mask. It is fully defined at 2919 * compile time and may be used for optimization. 2920 * 2921 * @return 2922 * Pointer to the next Data Segment after inlined data. 2923 * Ring buffer wraparound check is needed. 2924 */ 2925 static __rte_always_inline struct mlx5_wqe_dseg * 2926 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 2927 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2928 struct mlx5_wqe_dseg *__rte_restrict dseg, 2929 uint8_t *buf, 2930 unsigned int len, 2931 unsigned int olx __rte_unused) 2932 2933 { 2934 unsigned int part; 2935 uint8_t *pdst; 2936 2937 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 2938 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 2939 (2 * RTE_ETHER_ADDR_LEN), 2940 "invalid Data Segment data size"); 2941 if (!MLX5_TXOFF_CONFIG(MPW)) { 2942 /* Store the descriptor byte counter for eMPW sessions. */ 2943 dseg->bcount = rte_cpu_to_be_32 2944 ((len + sizeof(struct rte_vlan_hdr)) | 2945 MLX5_ETH_WQE_DATA_INLINE); 2946 pdst = &dseg->inline_data[0]; 2947 } else { 2948 /* The entire legacy MPW session counter is stored on close. */ 2949 pdst = (uint8_t *)dseg; 2950 } 2951 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 2952 buf += MLX5_DSEG_MIN_INLINE_SIZE; 2953 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 2954 len -= MLX5_DSEG_MIN_INLINE_SIZE; 2955 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 2956 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2957 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2958 pdst = (uint8_t *)txq->wqes; 2959 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 2960 loc->mbuf->vlan_tci); 2961 pdst += sizeof(struct rte_vlan_hdr); 2962 /* 2963 * The WQEBB space availability is checked by caller. 2964 * Here we should be aware of WQE ring buffer wraparound only. 2965 */ 2966 part = (uint8_t *)txq->wqes_end - pdst; 2967 part = RTE_MIN(part, len); 2968 do { 2969 rte_memcpy(pdst, buf, part); 2970 len -= part; 2971 if (likely(!len)) { 2972 pdst += part; 2973 if (!MLX5_TXOFF_CONFIG(MPW)) 2974 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2975 /* Note: no final wraparound check here. */ 2976 return (struct mlx5_wqe_dseg *)pdst; 2977 } 2978 pdst = (uint8_t *)txq->wqes; 2979 buf += part; 2980 part = len; 2981 } while (true); 2982 } 2983 2984 /** 2985 * Build the Ethernet Segment with optionally inlined data with 2986 * VLAN insertion and following Data Segments (if any) from 2987 * multi-segment packet. Used by ordinary send and TSO. 2988 * 2989 * @param txq 2990 * Pointer to TX queue structure. 2991 * @param loc 2992 * Pointer to burst routine local context. 2993 * @param wqe 2994 * Pointer to WQE to fill with built Ethernet/Data Segments. 2995 * @param vlan 2996 * Length of VLAN header to insert, 0 means no VLAN insertion. 2997 * @param inlen 2998 * Data length to inline. For TSO this parameter specifies 2999 * exact value, for ordinary send routine can be aligned by 3000 * caller to provide better WQE space saving and data buffer 3001 * start address alignment. This length includes VLAN header 3002 * being inserted. 3003 * @param tso 3004 * Zero means ordinary send, inlined data can be extended, 3005 * otherwise this is TSO, inlined data length is fixed. 3006 * @param olx 3007 * Configured Tx offloads mask. It is fully defined at 3008 * compile time and may be used for optimization. 3009 * 3010 * @return 3011 * Actual size of built WQE in segments. 3012 */ 3013 static __rte_always_inline unsigned int 3014 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 3015 struct mlx5_txq_local *__rte_restrict loc, 3016 struct mlx5_wqe *__rte_restrict wqe, 3017 unsigned int vlan, 3018 unsigned int inlen, 3019 unsigned int tso, 3020 unsigned int olx __rte_unused) 3021 { 3022 struct mlx5_wqe_dseg *__rte_restrict dseg; 3023 unsigned int ds; 3024 3025 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 3026 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 3027 loc->mbuf_off = 0; 3028 3029 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 3030 if (!loc->mbuf_nseg) 3031 goto dseg_done; 3032 /* 3033 * There are still some mbuf remaining, not inlined. 3034 * The first mbuf may be partially inlined and we 3035 * must process the possible non-zero data offset. 3036 */ 3037 if (loc->mbuf_off) { 3038 unsigned int dlen; 3039 uint8_t *dptr; 3040 3041 /* 3042 * Exhausted packets must be dropped before. 3043 * Non-zero offset means there are some data 3044 * remained in the packet. 3045 */ 3046 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 3047 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 3048 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 3049 loc->mbuf_off); 3050 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 3051 /* 3052 * Build the pointer/minimal data Data Segment. 3053 * Do ring buffer wrapping check in advance. 3054 */ 3055 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3056 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3057 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 3058 /* Store the mbuf to be freed on completion. */ 3059 MLX5_ASSERT(loc->elts_free); 3060 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3061 --loc->elts_free; 3062 ++dseg; 3063 if (--loc->mbuf_nseg == 0) 3064 goto dseg_done; 3065 loc->mbuf = loc->mbuf->next; 3066 loc->mbuf_off = 0; 3067 } 3068 do { 3069 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3070 struct rte_mbuf *mbuf; 3071 3072 /* Zero length segment found, just skip. */ 3073 mbuf = loc->mbuf; 3074 loc->mbuf = loc->mbuf->next; 3075 rte_pktmbuf_free_seg(mbuf); 3076 if (--loc->mbuf_nseg == 0) 3077 break; 3078 } else { 3079 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3080 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3081 mlx5_tx_dseg_iptr 3082 (txq, loc, dseg, 3083 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3084 rte_pktmbuf_data_len(loc->mbuf), olx); 3085 MLX5_ASSERT(loc->elts_free); 3086 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3087 --loc->elts_free; 3088 ++dseg; 3089 if (--loc->mbuf_nseg == 0) 3090 break; 3091 loc->mbuf = loc->mbuf->next; 3092 } 3093 } while (true); 3094 3095 dseg_done: 3096 /* Calculate actual segments used from the dseg pointer. */ 3097 if ((uintptr_t)wqe < (uintptr_t)dseg) 3098 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3099 else 3100 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3101 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3102 return ds; 3103 } 3104 3105 /** 3106 * The routine checks timestamp flag in the current packet, 3107 * and push WAIT WQE into the queue if scheduling is required. 3108 * 3109 * @param txq 3110 * Pointer to TX queue structure. 3111 * @param loc 3112 * Pointer to burst routine local context. 3113 * @param olx 3114 * Configured Tx offloads mask. It is fully defined at 3115 * compile time and may be used for optimization. 3116 * 3117 * @return 3118 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3119 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 3120 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 3121 * Local context variables partially updated. 3122 */ 3123 static __rte_always_inline enum mlx5_txcmp_code 3124 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 3125 struct mlx5_txq_local *restrict loc, 3126 unsigned int olx) 3127 { 3128 if (MLX5_TXOFF_CONFIG(TXPP) && 3129 loc->mbuf->ol_flags & txq->ts_mask) { 3130 struct mlx5_wqe *wqe; 3131 uint64_t ts; 3132 int32_t wci; 3133 3134 /* 3135 * Estimate the required space quickly and roughly. 3136 * We would like to ensure the packet can be pushed 3137 * to the queue and we won't get the orphan WAIT WQE. 3138 */ 3139 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 3140 loc->elts_free < NB_SEGS(loc->mbuf)) 3141 return MLX5_TXCMP_CODE_EXIT; 3142 /* Convert the timestamp into completion to wait. */ 3143 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 3144 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 3145 if (unlikely(wci < 0)) 3146 return MLX5_TXCMP_CODE_SINGLE; 3147 /* Build the WAIT WQE with specified completion. */ 3148 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3149 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 3150 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 3151 ++txq->wqe_ci; 3152 --loc->wqe_free; 3153 return MLX5_TXCMP_CODE_MULTI; 3154 } 3155 return MLX5_TXCMP_CODE_SINGLE; 3156 } 3157 3158 /** 3159 * Tx one packet function for multi-segment TSO. Supports all 3160 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3161 * sends one packet per WQE. 3162 * 3163 * This routine is responsible for storing processed mbuf 3164 * into elts ring buffer and update elts_head. 3165 * 3166 * @param txq 3167 * Pointer to TX queue structure. 3168 * @param loc 3169 * Pointer to burst routine local context. 3170 * @param olx 3171 * Configured Tx offloads mask. It is fully defined at 3172 * compile time and may be used for optimization. 3173 * 3174 * @return 3175 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3176 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3177 * Local context variables partially updated. 3178 */ 3179 static __rte_always_inline enum mlx5_txcmp_code 3180 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 3181 struct mlx5_txq_local *__rte_restrict loc, 3182 unsigned int olx) 3183 { 3184 struct mlx5_wqe *__rte_restrict wqe; 3185 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3186 3187 if (MLX5_TXOFF_CONFIG(TXPP)) { 3188 enum mlx5_txcmp_code wret; 3189 3190 /* Generate WAIT for scheduling if requested. */ 3191 wret = mlx5_tx_schedule_send(txq, loc, olx); 3192 if (wret == MLX5_TXCMP_CODE_EXIT) 3193 return MLX5_TXCMP_CODE_EXIT; 3194 if (wret == MLX5_TXCMP_CODE_ERROR) 3195 return MLX5_TXCMP_CODE_ERROR; 3196 } 3197 /* 3198 * Calculate data length to be inlined to estimate 3199 * the required space in WQE ring buffer. 3200 */ 3201 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3202 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3203 vlan = sizeof(struct rte_vlan_hdr); 3204 inlen = loc->mbuf->l2_len + vlan + 3205 loc->mbuf->l3_len + loc->mbuf->l4_len; 3206 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3207 return MLX5_TXCMP_CODE_ERROR; 3208 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3209 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3210 /* Packet must contain all TSO headers. */ 3211 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3212 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3213 inlen > (dlen + vlan))) 3214 return MLX5_TXCMP_CODE_ERROR; 3215 MLX5_ASSERT(inlen >= txq->inlen_mode); 3216 /* 3217 * Check whether there are enough free WQEBBs: 3218 * - Control Segment 3219 * - Ethernet Segment 3220 * - First Segment of inlined Ethernet data 3221 * - ... data continued ... 3222 * - Data Segments of pointer/min inline type 3223 */ 3224 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3225 MLX5_ESEG_MIN_INLINE_SIZE + 3226 MLX5_WSEG_SIZE + 3227 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3228 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3229 return MLX5_TXCMP_CODE_EXIT; 3230 /* Check for maximal WQE size. */ 3231 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3232 return MLX5_TXCMP_CODE_ERROR; 3233 #ifdef MLX5_PMD_SOFT_COUNTERS 3234 /* Update sent data bytes/packets counters. */ 3235 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3236 loc->mbuf->tso_segsz; 3237 /* 3238 * One will be added for mbuf itself 3239 * at the end of the mlx5_tx_burst from 3240 * loc->pkts_sent field. 3241 */ 3242 --ntcp; 3243 txq->stats.opackets += ntcp; 3244 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3245 #endif 3246 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3247 loc->wqe_last = wqe; 3248 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3249 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3250 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3251 txq->wqe_ci += (ds + 3) / 4; 3252 loc->wqe_free -= (ds + 3) / 4; 3253 return MLX5_TXCMP_CODE_MULTI; 3254 } 3255 3256 /** 3257 * Tx one packet function for multi-segment SEND. Supports all 3258 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3259 * sends one packet per WQE, without any data inlining in 3260 * Ethernet Segment. 3261 * 3262 * This routine is responsible for storing processed mbuf 3263 * into elts ring buffer and update elts_head. 3264 * 3265 * @param txq 3266 * Pointer to TX queue structure. 3267 * @param loc 3268 * Pointer to burst routine local context. 3269 * @param olx 3270 * Configured Tx offloads mask. It is fully defined at 3271 * compile time and may be used for optimization. 3272 * 3273 * @return 3274 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3275 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3276 * Local context variables partially updated. 3277 */ 3278 static __rte_always_inline enum mlx5_txcmp_code 3279 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 3280 struct mlx5_txq_local *__rte_restrict loc, 3281 unsigned int olx) 3282 { 3283 struct mlx5_wqe_dseg *__rte_restrict dseg; 3284 struct mlx5_wqe *__rte_restrict wqe; 3285 unsigned int ds, nseg; 3286 3287 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3288 if (MLX5_TXOFF_CONFIG(TXPP)) { 3289 enum mlx5_txcmp_code wret; 3290 3291 /* Generate WAIT for scheduling if requested. */ 3292 wret = mlx5_tx_schedule_send(txq, loc, olx); 3293 if (wret == MLX5_TXCMP_CODE_EXIT) 3294 return MLX5_TXCMP_CODE_EXIT; 3295 if (wret == MLX5_TXCMP_CODE_ERROR) 3296 return MLX5_TXCMP_CODE_ERROR; 3297 } 3298 /* 3299 * No inline at all, it means the CPU cycles saving 3300 * is prioritized at configuration, we should not 3301 * copy any packet data to WQE. 3302 */ 3303 nseg = NB_SEGS(loc->mbuf); 3304 ds = 2 + nseg; 3305 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3306 return MLX5_TXCMP_CODE_EXIT; 3307 /* Check for maximal WQE size. */ 3308 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3309 return MLX5_TXCMP_CODE_ERROR; 3310 /* 3311 * Some Tx offloads may cause an error if 3312 * packet is not long enough, check against 3313 * assumed minimal length. 3314 */ 3315 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3316 return MLX5_TXCMP_CODE_ERROR; 3317 #ifdef MLX5_PMD_SOFT_COUNTERS 3318 /* Update sent data bytes counter. */ 3319 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3320 if (MLX5_TXOFF_CONFIG(VLAN) && 3321 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3322 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3323 #endif 3324 /* 3325 * SEND WQE, one WQEBB: 3326 * - Control Segment, SEND opcode 3327 * - Ethernet Segment, optional VLAN, no inline 3328 * - Data Segments, pointer only type 3329 */ 3330 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3331 loc->wqe_last = wqe; 3332 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3333 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3334 dseg = &wqe->dseg[0]; 3335 do { 3336 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3337 struct rte_mbuf *mbuf; 3338 3339 /* 3340 * Zero length segment found, have to 3341 * correct total size of WQE in segments. 3342 * It is supposed to be rare occasion, so 3343 * in normal case (no zero length segments) 3344 * we avoid extra writing to the Control 3345 * Segment. 3346 */ 3347 --ds; 3348 wqe->cseg.sq_ds -= RTE_BE32(1); 3349 mbuf = loc->mbuf; 3350 loc->mbuf = mbuf->next; 3351 rte_pktmbuf_free_seg(mbuf); 3352 if (--nseg == 0) 3353 break; 3354 } else { 3355 mlx5_tx_dseg_ptr 3356 (txq, loc, dseg, 3357 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3358 rte_pktmbuf_data_len(loc->mbuf), olx); 3359 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3360 --loc->elts_free; 3361 if (--nseg == 0) 3362 break; 3363 ++dseg; 3364 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3365 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3366 loc->mbuf = loc->mbuf->next; 3367 } 3368 } while (true); 3369 txq->wqe_ci += (ds + 3) / 4; 3370 loc->wqe_free -= (ds + 3) / 4; 3371 return MLX5_TXCMP_CODE_MULTI; 3372 } 3373 3374 /** 3375 * Tx one packet function for multi-segment SEND. Supports all 3376 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3377 * sends one packet per WQE, with data inlining in 3378 * Ethernet Segment and minimal Data Segments. 3379 * 3380 * This routine is responsible for storing processed mbuf 3381 * into elts ring buffer and update elts_head. 3382 * 3383 * @param txq 3384 * Pointer to TX queue structure. 3385 * @param loc 3386 * Pointer to burst routine local context. 3387 * @param olx 3388 * Configured Tx offloads mask. It is fully defined at 3389 * compile time and may be used for optimization. 3390 * 3391 * @return 3392 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3393 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3394 * Local context variables partially updated. 3395 */ 3396 static __rte_always_inline enum mlx5_txcmp_code 3397 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 3398 struct mlx5_txq_local *__rte_restrict loc, 3399 unsigned int olx) 3400 { 3401 struct mlx5_wqe *__rte_restrict wqe; 3402 unsigned int ds, inlen, dlen, vlan = 0; 3403 3404 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3405 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3406 if (MLX5_TXOFF_CONFIG(TXPP)) { 3407 enum mlx5_txcmp_code wret; 3408 3409 /* Generate WAIT for scheduling if requested. */ 3410 wret = mlx5_tx_schedule_send(txq, loc, olx); 3411 if (wret == MLX5_TXCMP_CODE_EXIT) 3412 return MLX5_TXCMP_CODE_EXIT; 3413 if (wret == MLX5_TXCMP_CODE_ERROR) 3414 return MLX5_TXCMP_CODE_ERROR; 3415 } 3416 /* 3417 * First calculate data length to be inlined 3418 * to estimate the required space for WQE. 3419 */ 3420 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3421 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3422 vlan = sizeof(struct rte_vlan_hdr); 3423 inlen = dlen + vlan; 3424 /* Check against minimal length. */ 3425 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3426 return MLX5_TXCMP_CODE_ERROR; 3427 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3428 if (inlen > txq->inlen_send || 3429 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 3430 struct rte_mbuf *mbuf; 3431 unsigned int nxlen; 3432 uintptr_t start; 3433 3434 /* 3435 * Packet length exceeds the allowed inline 3436 * data length, check whether the minimal 3437 * inlining is required. 3438 */ 3439 if (txq->inlen_mode) { 3440 MLX5_ASSERT(txq->inlen_mode >= 3441 MLX5_ESEG_MIN_INLINE_SIZE); 3442 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 3443 inlen = txq->inlen_mode; 3444 } else { 3445 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 3446 !vlan || txq->vlan_en) { 3447 /* 3448 * VLAN insertion will be done inside by HW. 3449 * It is not utmost effective - VLAN flag is 3450 * checked twice, but we should proceed the 3451 * inlining length correctly and take into 3452 * account the VLAN header being inserted. 3453 */ 3454 return mlx5_tx_packet_multi_send 3455 (txq, loc, olx); 3456 } 3457 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3458 } 3459 /* 3460 * Now we know the minimal amount of data is requested 3461 * to inline. Check whether we should inline the buffers 3462 * from the chain beginning to eliminate some mbufs. 3463 */ 3464 mbuf = loc->mbuf; 3465 nxlen = rte_pktmbuf_data_len(mbuf); 3466 if (unlikely(nxlen <= txq->inlen_send)) { 3467 /* We can inline first mbuf at least. */ 3468 if (nxlen < inlen) { 3469 unsigned int smlen; 3470 3471 /* Scan mbufs till inlen filled. */ 3472 do { 3473 smlen = nxlen; 3474 mbuf = NEXT(mbuf); 3475 MLX5_ASSERT(mbuf); 3476 nxlen = rte_pktmbuf_data_len(mbuf); 3477 nxlen += smlen; 3478 } while (unlikely(nxlen < inlen)); 3479 if (unlikely(nxlen > txq->inlen_send)) { 3480 /* We cannot inline entire mbuf. */ 3481 smlen = inlen - smlen; 3482 start = rte_pktmbuf_mtod_offset 3483 (mbuf, uintptr_t, smlen); 3484 goto do_align; 3485 } 3486 } 3487 do { 3488 inlen = nxlen; 3489 mbuf = NEXT(mbuf); 3490 /* There should be not end of packet. */ 3491 MLX5_ASSERT(mbuf); 3492 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3493 } while (unlikely(nxlen < txq->inlen_send)); 3494 } 3495 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3496 /* 3497 * Check whether we can do inline to align start 3498 * address of data buffer to cacheline. 3499 */ 3500 do_align: 3501 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3502 if (unlikely(start)) { 3503 start += inlen; 3504 if (start <= txq->inlen_send) 3505 inlen = start; 3506 } 3507 } 3508 /* 3509 * Check whether there are enough free WQEBBs: 3510 * - Control Segment 3511 * - Ethernet Segment 3512 * - First Segment of inlined Ethernet data 3513 * - ... data continued ... 3514 * - Data Segments of pointer/min inline type 3515 * 3516 * Estimate the number of Data Segments conservatively, 3517 * supposing no any mbufs is being freed during inlining. 3518 */ 3519 MLX5_ASSERT(inlen <= txq->inlen_send); 3520 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3521 MLX5_ESEG_MIN_INLINE_SIZE + 3522 MLX5_WSEG_SIZE + 3523 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3524 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3525 return MLX5_TXCMP_CODE_EXIT; 3526 /* Check for maximal WQE size. */ 3527 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3528 return MLX5_TXCMP_CODE_ERROR; 3529 #ifdef MLX5_PMD_SOFT_COUNTERS 3530 /* Update sent data bytes/packets counters. */ 3531 txq->stats.obytes += dlen + vlan; 3532 #endif 3533 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3534 loc->wqe_last = wqe; 3535 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3536 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3537 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3538 txq->wqe_ci += (ds + 3) / 4; 3539 loc->wqe_free -= (ds + 3) / 4; 3540 return MLX5_TXCMP_CODE_MULTI; 3541 } 3542 3543 /** 3544 * Tx burst function for multi-segment packets. Supports all 3545 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3546 * sends one packet per WQE. Function stops sending if it 3547 * encounters the single-segment packet. 3548 * 3549 * This routine is responsible for storing processed mbuf 3550 * into elts ring buffer and update elts_head. 3551 * 3552 * @param txq 3553 * Pointer to TX queue structure. 3554 * @param[in] pkts 3555 * Packets to transmit. 3556 * @param pkts_n 3557 * Number of packets in array. 3558 * @param loc 3559 * Pointer to burst routine local context. 3560 * @param olx 3561 * Configured Tx offloads mask. It is fully defined at 3562 * compile time and may be used for optimization. 3563 * 3564 * @return 3565 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3566 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3567 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3568 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3569 * Local context variables updated. 3570 */ 3571 static __rte_always_inline enum mlx5_txcmp_code 3572 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 3573 struct rte_mbuf **__rte_restrict pkts, 3574 unsigned int pkts_n, 3575 struct mlx5_txq_local *__rte_restrict loc, 3576 unsigned int olx) 3577 { 3578 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3579 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3580 pkts += loc->pkts_sent + 1; 3581 pkts_n -= loc->pkts_sent; 3582 for (;;) { 3583 enum mlx5_txcmp_code ret; 3584 3585 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3586 /* 3587 * Estimate the number of free elts quickly but 3588 * conservatively. Some segment may be fully inlined 3589 * and freed, ignore this here - precise estimation 3590 * is costly. 3591 */ 3592 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3593 return MLX5_TXCMP_CODE_EXIT; 3594 if (MLX5_TXOFF_CONFIG(TSO) && 3595 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3596 /* Proceed with multi-segment TSO. */ 3597 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3598 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3599 /* Proceed with multi-segment SEND with inlining. */ 3600 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3601 } else { 3602 /* Proceed with multi-segment SEND w/o inlining. */ 3603 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3604 } 3605 if (ret == MLX5_TXCMP_CODE_EXIT) 3606 return MLX5_TXCMP_CODE_EXIT; 3607 if (ret == MLX5_TXCMP_CODE_ERROR) 3608 return MLX5_TXCMP_CODE_ERROR; 3609 /* WQE is built, go to the next packet. */ 3610 ++loc->pkts_sent; 3611 --pkts_n; 3612 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3613 return MLX5_TXCMP_CODE_EXIT; 3614 loc->mbuf = *pkts++; 3615 if (pkts_n > 1) 3616 rte_prefetch0(*pkts); 3617 if (likely(NB_SEGS(loc->mbuf) > 1)) 3618 continue; 3619 /* Here ends the series of multi-segment packets. */ 3620 if (MLX5_TXOFF_CONFIG(TSO) && 3621 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3622 return MLX5_TXCMP_CODE_TSO; 3623 return MLX5_TXCMP_CODE_SINGLE; 3624 } 3625 MLX5_ASSERT(false); 3626 } 3627 3628 /** 3629 * Tx burst function for single-segment packets with TSO. 3630 * Supports all types of Tx offloads, except multi-packets. 3631 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3632 * Function stops sending if it encounters the multi-segment 3633 * packet or packet without TSO requested. 3634 * 3635 * The routine is responsible for storing processed mbuf 3636 * into elts ring buffer and update elts_head if inline 3637 * offloads is requested due to possible early freeing 3638 * of the inlined mbufs (can not store pkts array in elts 3639 * as a batch). 3640 * 3641 * @param txq 3642 * Pointer to TX queue structure. 3643 * @param[in] pkts 3644 * Packets to transmit. 3645 * @param pkts_n 3646 * Number of packets in array. 3647 * @param loc 3648 * Pointer to burst routine local context. 3649 * @param olx 3650 * Configured Tx offloads mask. It is fully defined at 3651 * compile time and may be used for optimization. 3652 * 3653 * @return 3654 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3655 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3656 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3657 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3658 * Local context variables updated. 3659 */ 3660 static __rte_always_inline enum mlx5_txcmp_code 3661 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 3662 struct rte_mbuf **__rte_restrict pkts, 3663 unsigned int pkts_n, 3664 struct mlx5_txq_local *__rte_restrict loc, 3665 unsigned int olx) 3666 { 3667 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3668 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3669 pkts += loc->pkts_sent + 1; 3670 pkts_n -= loc->pkts_sent; 3671 for (;;) { 3672 struct mlx5_wqe_dseg *__rte_restrict dseg; 3673 struct mlx5_wqe *__rte_restrict wqe; 3674 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3675 uint8_t *dptr; 3676 3677 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3678 if (MLX5_TXOFF_CONFIG(TXPP)) { 3679 enum mlx5_txcmp_code wret; 3680 3681 /* Generate WAIT for scheduling if requested. */ 3682 wret = mlx5_tx_schedule_send(txq, loc, olx); 3683 if (wret == MLX5_TXCMP_CODE_EXIT) 3684 return MLX5_TXCMP_CODE_EXIT; 3685 if (wret == MLX5_TXCMP_CODE_ERROR) 3686 return MLX5_TXCMP_CODE_ERROR; 3687 } 3688 dlen = rte_pktmbuf_data_len(loc->mbuf); 3689 if (MLX5_TXOFF_CONFIG(VLAN) && 3690 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3691 vlan = sizeof(struct rte_vlan_hdr); 3692 } 3693 /* 3694 * First calculate the WQE size to check 3695 * whether we have enough space in ring buffer. 3696 */ 3697 hlen = loc->mbuf->l2_len + vlan + 3698 loc->mbuf->l3_len + loc->mbuf->l4_len; 3699 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3700 return MLX5_TXCMP_CODE_ERROR; 3701 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3702 hlen += loc->mbuf->outer_l2_len + 3703 loc->mbuf->outer_l3_len; 3704 /* Segment must contain all TSO headers. */ 3705 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3706 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3707 hlen > (dlen + vlan))) 3708 return MLX5_TXCMP_CODE_ERROR; 3709 /* 3710 * Check whether there are enough free WQEBBs: 3711 * - Control Segment 3712 * - Ethernet Segment 3713 * - First Segment of inlined Ethernet data 3714 * - ... data continued ... 3715 * - Finishing Data Segment of pointer type 3716 */ 3717 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3718 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3719 if (loc->wqe_free < ((ds + 3) / 4)) 3720 return MLX5_TXCMP_CODE_EXIT; 3721 #ifdef MLX5_PMD_SOFT_COUNTERS 3722 /* Update sent data bytes/packets counters. */ 3723 ntcp = (dlen + vlan - hlen + 3724 loc->mbuf->tso_segsz - 1) / 3725 loc->mbuf->tso_segsz; 3726 /* 3727 * One will be added for mbuf itself at the end 3728 * of the mlx5_tx_burst from loc->pkts_sent field. 3729 */ 3730 --ntcp; 3731 txq->stats.opackets += ntcp; 3732 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3733 #endif 3734 /* 3735 * Build the TSO WQE: 3736 * - Control Segment 3737 * - Ethernet Segment with hlen bytes inlined 3738 * - Data Segment of pointer type 3739 */ 3740 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3741 loc->wqe_last = wqe; 3742 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3743 MLX5_OPCODE_TSO, olx); 3744 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3745 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3746 dlen -= hlen - vlan; 3747 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3748 /* 3749 * WQE is built, update the loop parameters 3750 * and go to the next packet. 3751 */ 3752 txq->wqe_ci += (ds + 3) / 4; 3753 loc->wqe_free -= (ds + 3) / 4; 3754 if (MLX5_TXOFF_CONFIG(INLINE)) 3755 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3756 --loc->elts_free; 3757 ++loc->pkts_sent; 3758 --pkts_n; 3759 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3760 return MLX5_TXCMP_CODE_EXIT; 3761 loc->mbuf = *pkts++; 3762 if (pkts_n > 1) 3763 rte_prefetch0(*pkts); 3764 if (MLX5_TXOFF_CONFIG(MULTI) && 3765 unlikely(NB_SEGS(loc->mbuf) > 1)) 3766 return MLX5_TXCMP_CODE_MULTI; 3767 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3768 return MLX5_TXCMP_CODE_SINGLE; 3769 /* Continue with the next TSO packet. */ 3770 } 3771 MLX5_ASSERT(false); 3772 } 3773 3774 /** 3775 * Analyze the packet and select the best method to send. 3776 * 3777 * @param txq 3778 * Pointer to TX queue structure. 3779 * @param loc 3780 * Pointer to burst routine local context. 3781 * @param olx 3782 * Configured Tx offloads mask. It is fully defined at 3783 * compile time and may be used for optimization. 3784 * @param newp 3785 * The predefined flag whether do complete check for 3786 * multi-segment packets and TSO. 3787 * 3788 * @return 3789 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3790 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3791 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3792 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3793 */ 3794 static __rte_always_inline enum mlx5_txcmp_code 3795 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 3796 struct mlx5_txq_local *__rte_restrict loc, 3797 unsigned int olx, 3798 bool newp) 3799 { 3800 /* Check for multi-segment packet. */ 3801 if (newp && 3802 MLX5_TXOFF_CONFIG(MULTI) && 3803 unlikely(NB_SEGS(loc->mbuf) > 1)) 3804 return MLX5_TXCMP_CODE_MULTI; 3805 /* Check for TSO packet. */ 3806 if (newp && 3807 MLX5_TXOFF_CONFIG(TSO) && 3808 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3809 return MLX5_TXCMP_CODE_TSO; 3810 /* Check if eMPW is enabled at all. */ 3811 if (!MLX5_TXOFF_CONFIG(EMPW)) 3812 return MLX5_TXCMP_CODE_SINGLE; 3813 /* Check if eMPW can be engaged. */ 3814 if (MLX5_TXOFF_CONFIG(VLAN) && 3815 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3816 (!MLX5_TXOFF_CONFIG(INLINE) || 3817 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3818 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3819 /* 3820 * eMPW does not support VLAN insertion offload, 3821 * we have to inline the entire packet but 3822 * packet is too long for inlining. 3823 */ 3824 return MLX5_TXCMP_CODE_SINGLE; 3825 } 3826 return MLX5_TXCMP_CODE_EMPW; 3827 } 3828 3829 /** 3830 * Check the next packet attributes to match with the eMPW batch ones. 3831 * In addition, for legacy MPW the packet length is checked either. 3832 * 3833 * @param txq 3834 * Pointer to TX queue structure. 3835 * @param es 3836 * Pointer to Ethernet Segment of eMPW batch. 3837 * @param loc 3838 * Pointer to burst routine local context. 3839 * @param dlen 3840 * Length of previous packet in MPW descriptor. 3841 * @param olx 3842 * Configured Tx offloads mask. It is fully defined at 3843 * compile time and may be used for optimization. 3844 * 3845 * @return 3846 * true - packet match with eMPW batch attributes. 3847 * false - no match, eMPW should be restarted. 3848 */ 3849 static __rte_always_inline bool 3850 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 3851 struct mlx5_wqe_eseg *__rte_restrict es, 3852 struct mlx5_txq_local *__rte_restrict loc, 3853 uint32_t dlen, 3854 unsigned int olx) 3855 { 3856 uint8_t swp_flags = 0; 3857 3858 /* Compare the checksum flags, if any. */ 3859 if (MLX5_TXOFF_CONFIG(CSUM) && 3860 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 3861 return false; 3862 /* Compare the Software Parser offsets and flags. */ 3863 if (MLX5_TXOFF_CONFIG(SWP) && 3864 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 3865 es->swp_flags != swp_flags)) 3866 return false; 3867 /* Fill metadata field if needed. */ 3868 if (MLX5_TXOFF_CONFIG(METADATA) && 3869 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 3870 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 3871 return false; 3872 /* Legacy MPW can send packets with the same lengt only. */ 3873 if (MLX5_TXOFF_CONFIG(MPW) && 3874 dlen != rte_pktmbuf_data_len(loc->mbuf)) 3875 return false; 3876 /* There must be no VLAN packets in eMPW loop. */ 3877 if (MLX5_TXOFF_CONFIG(VLAN)) 3878 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 3879 /* Check if the scheduling is requested. */ 3880 if (MLX5_TXOFF_CONFIG(TXPP) && 3881 loc->mbuf->ol_flags & txq->ts_mask) 3882 return false; 3883 return true; 3884 } 3885 3886 /* 3887 * Update send loop variables and WQE for eMPW loop 3888 * without data inlining. Number of Data Segments is 3889 * equal to the number of sent packets. 3890 * 3891 * @param txq 3892 * Pointer to TX queue structure. 3893 * @param loc 3894 * Pointer to burst routine local context. 3895 * @param ds 3896 * Number of packets/Data Segments/Packets. 3897 * @param slen 3898 * Accumulated statistics, bytes sent 3899 * @param olx 3900 * Configured Tx offloads mask. It is fully defined at 3901 * compile time and may be used for optimization. 3902 * 3903 * @return 3904 * true - packet match with eMPW batch attributes. 3905 * false - no match, eMPW should be restarted. 3906 */ 3907 static __rte_always_inline void 3908 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 3909 struct mlx5_txq_local *__rte_restrict loc, 3910 unsigned int ds, 3911 unsigned int slen, 3912 unsigned int olx __rte_unused) 3913 { 3914 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3915 #ifdef MLX5_PMD_SOFT_COUNTERS 3916 /* Update sent data bytes counter. */ 3917 txq->stats.obytes += slen; 3918 #else 3919 (void)slen; 3920 #endif 3921 loc->elts_free -= ds; 3922 loc->pkts_sent += ds; 3923 ds += 2; 3924 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3925 txq->wqe_ci += (ds + 3) / 4; 3926 loc->wqe_free -= (ds + 3) / 4; 3927 } 3928 3929 /* 3930 * Update send loop variables and WQE for eMPW loop 3931 * with data inlining. Gets the size of pushed descriptors 3932 * and data to the WQE. 3933 * 3934 * @param txq 3935 * Pointer to TX queue structure. 3936 * @param loc 3937 * Pointer to burst routine local context. 3938 * @param len 3939 * Total size of descriptor/data in bytes. 3940 * @param slen 3941 * Accumulated statistics, data bytes sent. 3942 * @param wqem 3943 * The base WQE for the eMPW/MPW descriptor. 3944 * @param olx 3945 * Configured Tx offloads mask. It is fully defined at 3946 * compile time and may be used for optimization. 3947 * 3948 * @return 3949 * true - packet match with eMPW batch attributes. 3950 * false - no match, eMPW should be restarted. 3951 */ 3952 static __rte_always_inline void 3953 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 3954 struct mlx5_txq_local *__rte_restrict loc, 3955 unsigned int len, 3956 unsigned int slen, 3957 struct mlx5_wqe *__rte_restrict wqem, 3958 unsigned int olx __rte_unused) 3959 { 3960 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 3961 3962 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3963 #ifdef MLX5_PMD_SOFT_COUNTERS 3964 /* Update sent data bytes counter. */ 3965 txq->stats.obytes += slen; 3966 #else 3967 (void)slen; 3968 #endif 3969 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 3970 /* 3971 * If the legacy MPW session contains the inline packets 3972 * we should set the only inline data segment length 3973 * and align the total length to the segment size. 3974 */ 3975 MLX5_ASSERT(len > sizeof(dseg->bcount)); 3976 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 3977 MLX5_ETH_WQE_DATA_INLINE); 3978 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 3979 } else { 3980 /* 3981 * The session is not legacy MPW or contains the 3982 * data buffer pointer segments. 3983 */ 3984 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 3985 len = len / MLX5_WSEG_SIZE + 2; 3986 } 3987 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 3988 txq->wqe_ci += (len + 3) / 4; 3989 loc->wqe_free -= (len + 3) / 4; 3990 loc->wqe_last = wqem; 3991 } 3992 3993 /** 3994 * The set of Tx burst functions for single-segment packets 3995 * without TSO and with Multi-Packet Writing feature support. 3996 * Supports all types of Tx offloads, except multi-packets 3997 * and TSO. 3998 * 3999 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 4000 * as many packet per WQE as it can. If eMPW is not configured 4001 * or packet can not be sent with eMPW (VLAN insertion) the 4002 * ordinary SEND opcode is used and only one packet placed 4003 * in WQE. 4004 * 4005 * Functions stop sending if it encounters the multi-segment 4006 * packet or packet with TSO requested. 4007 * 4008 * The routines are responsible for storing processed mbuf 4009 * into elts ring buffer and update elts_head if inlining 4010 * offload is requested. Otherwise the copying mbufs to elts 4011 * can be postponed and completed at the end of burst routine. 4012 * 4013 * @param txq 4014 * Pointer to TX queue structure. 4015 * @param[in] pkts 4016 * Packets to transmit. 4017 * @param pkts_n 4018 * Number of packets in array. 4019 * @param loc 4020 * Pointer to burst routine local context. 4021 * @param olx 4022 * Configured Tx offloads mask. It is fully defined at 4023 * compile time and may be used for optimization. 4024 * 4025 * @return 4026 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 4027 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 4028 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 4029 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 4030 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 4031 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 4032 * 4033 * Local context variables updated. 4034 * 4035 * 4036 * The routine sends packets with MLX5_OPCODE_EMPW 4037 * without inlining, this is dedicated optimized branch. 4038 * No VLAN insertion is supported. 4039 */ 4040 static __rte_always_inline enum mlx5_txcmp_code 4041 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 4042 struct rte_mbuf **__rte_restrict pkts, 4043 unsigned int pkts_n, 4044 struct mlx5_txq_local *__rte_restrict loc, 4045 unsigned int olx) 4046 { 4047 /* 4048 * Subroutine is the part of mlx5_tx_burst_single() 4049 * and sends single-segment packet with eMPW opcode 4050 * without data inlining. 4051 */ 4052 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4053 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4054 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4055 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4056 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4057 pkts += loc->pkts_sent + 1; 4058 pkts_n -= loc->pkts_sent; 4059 for (;;) { 4060 struct mlx5_wqe_dseg *__rte_restrict dseg; 4061 struct mlx5_wqe_eseg *__rte_restrict eseg; 4062 enum mlx5_txcmp_code ret; 4063 unsigned int part, loop; 4064 unsigned int slen = 0; 4065 4066 next_empw: 4067 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4068 if (MLX5_TXOFF_CONFIG(TXPP)) { 4069 enum mlx5_txcmp_code wret; 4070 4071 /* Generate WAIT for scheduling if requested. */ 4072 wret = mlx5_tx_schedule_send(txq, loc, olx); 4073 if (wret == MLX5_TXCMP_CODE_EXIT) 4074 return MLX5_TXCMP_CODE_EXIT; 4075 if (wret == MLX5_TXCMP_CODE_ERROR) 4076 return MLX5_TXCMP_CODE_ERROR; 4077 } 4078 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4079 MLX5_MPW_MAX_PACKETS : 4080 MLX5_EMPW_MAX_PACKETS); 4081 if (unlikely(loc->elts_free < part)) { 4082 /* We have no enough elts to save all mbufs. */ 4083 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 4084 return MLX5_TXCMP_CODE_EXIT; 4085 /* But we still able to send at least minimal eMPW. */ 4086 part = loc->elts_free; 4087 } 4088 /* Check whether we have enough WQEs */ 4089 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 4090 if (unlikely(loc->wqe_free < 4091 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4092 return MLX5_TXCMP_CODE_EXIT; 4093 part = (loc->wqe_free * 4) - 2; 4094 } 4095 if (likely(part > 1)) 4096 rte_prefetch0(*pkts); 4097 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4098 /* 4099 * Build eMPW title WQEBB: 4100 * - Control Segment, eMPW opcode 4101 * - Ethernet Segment, no inline 4102 */ 4103 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 4104 MLX5_OPCODE_ENHANCED_MPSW, olx); 4105 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4106 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4107 eseg = &loc->wqe_last->eseg; 4108 dseg = &loc->wqe_last->dseg[0]; 4109 loop = part; 4110 /* Store the packet length for legacy MPW. */ 4111 if (MLX5_TXOFF_CONFIG(MPW)) 4112 eseg->mss = rte_cpu_to_be_16 4113 (rte_pktmbuf_data_len(loc->mbuf)); 4114 for (;;) { 4115 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4116 #ifdef MLX5_PMD_SOFT_COUNTERS 4117 /* Update sent data bytes counter. */ 4118 slen += dlen; 4119 #endif 4120 mlx5_tx_dseg_ptr 4121 (txq, loc, dseg, 4122 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4123 dlen, olx); 4124 if (unlikely(--loop == 0)) 4125 break; 4126 loc->mbuf = *pkts++; 4127 if (likely(loop > 1)) 4128 rte_prefetch0(*pkts); 4129 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4130 /* 4131 * Unroll the completion code to avoid 4132 * returning variable value - it results in 4133 * unoptimized sequent checking in caller. 4134 */ 4135 if (ret == MLX5_TXCMP_CODE_MULTI) { 4136 part -= loop; 4137 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4138 if (unlikely(!loc->elts_free || 4139 !loc->wqe_free)) 4140 return MLX5_TXCMP_CODE_EXIT; 4141 return MLX5_TXCMP_CODE_MULTI; 4142 } 4143 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4144 if (ret == MLX5_TXCMP_CODE_TSO) { 4145 part -= loop; 4146 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4147 if (unlikely(!loc->elts_free || 4148 !loc->wqe_free)) 4149 return MLX5_TXCMP_CODE_EXIT; 4150 return MLX5_TXCMP_CODE_TSO; 4151 } 4152 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4153 part -= loop; 4154 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4155 if (unlikely(!loc->elts_free || 4156 !loc->wqe_free)) 4157 return MLX5_TXCMP_CODE_EXIT; 4158 return MLX5_TXCMP_CODE_SINGLE; 4159 } 4160 if (ret != MLX5_TXCMP_CODE_EMPW) { 4161 MLX5_ASSERT(false); 4162 part -= loop; 4163 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4164 return MLX5_TXCMP_CODE_ERROR; 4165 } 4166 /* 4167 * Check whether packet parameters coincide 4168 * within assumed eMPW batch: 4169 * - check sum settings 4170 * - metadata value 4171 * - software parser settings 4172 * - packets length (legacy MPW only) 4173 * - scheduling is not required 4174 */ 4175 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 4176 MLX5_ASSERT(loop); 4177 part -= loop; 4178 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4179 if (unlikely(!loc->elts_free || 4180 !loc->wqe_free)) 4181 return MLX5_TXCMP_CODE_EXIT; 4182 pkts_n -= part; 4183 goto next_empw; 4184 } 4185 /* Packet attributes match, continue the same eMPW. */ 4186 ++dseg; 4187 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4188 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4189 } 4190 /* eMPW is built successfully, update loop parameters. */ 4191 MLX5_ASSERT(!loop); 4192 MLX5_ASSERT(pkts_n >= part); 4193 #ifdef MLX5_PMD_SOFT_COUNTERS 4194 /* Update sent data bytes counter. */ 4195 txq->stats.obytes += slen; 4196 #endif 4197 loc->elts_free -= part; 4198 loc->pkts_sent += part; 4199 txq->wqe_ci += (2 + part + 3) / 4; 4200 loc->wqe_free -= (2 + part + 3) / 4; 4201 pkts_n -= part; 4202 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4203 return MLX5_TXCMP_CODE_EXIT; 4204 loc->mbuf = *pkts++; 4205 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4206 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4207 return ret; 4208 /* Continue sending eMPW batches. */ 4209 } 4210 MLX5_ASSERT(false); 4211 } 4212 4213 /** 4214 * The routine sends packets with MLX5_OPCODE_EMPW 4215 * with inlining, optionally supports VLAN insertion. 4216 */ 4217 static __rte_always_inline enum mlx5_txcmp_code 4218 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 4219 struct rte_mbuf **__rte_restrict pkts, 4220 unsigned int pkts_n, 4221 struct mlx5_txq_local *__rte_restrict loc, 4222 unsigned int olx) 4223 { 4224 /* 4225 * Subroutine is the part of mlx5_tx_burst_single() 4226 * and sends single-segment packet with eMPW opcode 4227 * with data inlining. 4228 */ 4229 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4230 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4231 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4232 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4233 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4234 pkts += loc->pkts_sent + 1; 4235 pkts_n -= loc->pkts_sent; 4236 for (;;) { 4237 struct mlx5_wqe_dseg *__rte_restrict dseg; 4238 struct mlx5_wqe *__rte_restrict wqem; 4239 enum mlx5_txcmp_code ret; 4240 unsigned int room, part, nlim; 4241 unsigned int slen = 0; 4242 4243 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4244 if (MLX5_TXOFF_CONFIG(TXPP)) { 4245 enum mlx5_txcmp_code wret; 4246 4247 /* Generate WAIT for scheduling if requested. */ 4248 wret = mlx5_tx_schedule_send(txq, loc, olx); 4249 if (wret == MLX5_TXCMP_CODE_EXIT) 4250 return MLX5_TXCMP_CODE_EXIT; 4251 if (wret == MLX5_TXCMP_CODE_ERROR) 4252 return MLX5_TXCMP_CODE_ERROR; 4253 } 4254 /* 4255 * Limits the amount of packets in one WQE 4256 * to improve CQE latency generation. 4257 */ 4258 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4259 MLX5_MPW_INLINE_MAX_PACKETS : 4260 MLX5_EMPW_MAX_PACKETS); 4261 /* Check whether we have minimal amount WQEs */ 4262 if (unlikely(loc->wqe_free < 4263 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4264 return MLX5_TXCMP_CODE_EXIT; 4265 if (likely(pkts_n > 1)) 4266 rte_prefetch0(*pkts); 4267 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4268 /* 4269 * Build eMPW title WQEBB: 4270 * - Control Segment, eMPW opcode, zero DS 4271 * - Ethernet Segment, no inline 4272 */ 4273 mlx5_tx_cseg_init(txq, loc, wqem, 0, 4274 MLX5_OPCODE_ENHANCED_MPSW, olx); 4275 mlx5_tx_eseg_none(txq, loc, wqem, 4276 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4277 dseg = &wqem->dseg[0]; 4278 /* Store the packet length for legacy MPW. */ 4279 if (MLX5_TXOFF_CONFIG(MPW)) 4280 wqem->eseg.mss = rte_cpu_to_be_16 4281 (rte_pktmbuf_data_len(loc->mbuf)); 4282 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4283 loc->wqe_free) * MLX5_WQE_SIZE - 4284 MLX5_WQE_CSEG_SIZE - 4285 MLX5_WQE_ESEG_SIZE; 4286 /* Limit the room for legacy MPW sessions for performance. */ 4287 if (MLX5_TXOFF_CONFIG(MPW)) 4288 room = RTE_MIN(room, 4289 RTE_MAX(txq->inlen_empw + 4290 sizeof(dseg->bcount) + 4291 (MLX5_TXOFF_CONFIG(VLAN) ? 4292 sizeof(struct rte_vlan_hdr) : 0), 4293 MLX5_MPW_INLINE_MAX_PACKETS * 4294 MLX5_WQE_DSEG_SIZE)); 4295 /* Build WQE till we have space, packets and resources. */ 4296 part = room; 4297 for (;;) { 4298 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4299 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4300 unsigned int tlen; 4301 4302 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4303 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 4304 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4305 /* 4306 * Some Tx offloads may cause an error if 4307 * packet is not long enough, check against 4308 * assumed minimal length. 4309 */ 4310 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4311 part -= room; 4312 if (unlikely(!part)) 4313 return MLX5_TXCMP_CODE_ERROR; 4314 /* 4315 * We have some successfully built 4316 * packet Data Segments to send. 4317 */ 4318 mlx5_tx_idone_empw(txq, loc, part, 4319 slen, wqem, olx); 4320 return MLX5_TXCMP_CODE_ERROR; 4321 } 4322 /* Inline or not inline - that's the Question. */ 4323 if (dlen > txq->inlen_empw || 4324 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 4325 goto pointer_empw; 4326 if (MLX5_TXOFF_CONFIG(MPW)) { 4327 if (dlen > txq->inlen_send) 4328 goto pointer_empw; 4329 tlen = dlen; 4330 if (part == room) { 4331 /* Open new inline MPW session. */ 4332 tlen += sizeof(dseg->bcount); 4333 dseg->bcount = RTE_BE32(0); 4334 dseg = RTE_PTR_ADD 4335 (dseg, sizeof(dseg->bcount)); 4336 } else { 4337 /* 4338 * No pointer and inline descriptor 4339 * intermix for legacy MPW sessions. 4340 */ 4341 if (wqem->dseg[0].bcount) 4342 break; 4343 } 4344 } else { 4345 tlen = sizeof(dseg->bcount) + dlen; 4346 } 4347 /* Inline entire packet, optional VLAN insertion. */ 4348 if (MLX5_TXOFF_CONFIG(VLAN) && 4349 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4350 /* 4351 * The packet length must be checked in 4352 * mlx5_tx_able_to_empw() and packet 4353 * fits into inline length guaranteed. 4354 */ 4355 MLX5_ASSERT((dlen + 4356 sizeof(struct rte_vlan_hdr)) <= 4357 txq->inlen_empw); 4358 tlen += sizeof(struct rte_vlan_hdr); 4359 if (room < tlen) 4360 break; 4361 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4362 dptr, dlen, olx); 4363 #ifdef MLX5_PMD_SOFT_COUNTERS 4364 /* Update sent data bytes counter. */ 4365 slen += sizeof(struct rte_vlan_hdr); 4366 #endif 4367 } else { 4368 if (room < tlen) 4369 break; 4370 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4371 dptr, dlen, olx); 4372 } 4373 if (!MLX5_TXOFF_CONFIG(MPW)) 4374 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4375 MLX5_ASSERT(room >= tlen); 4376 room -= tlen; 4377 /* 4378 * Packet data are completely inlined, 4379 * free the packet immediately. 4380 */ 4381 rte_pktmbuf_free_seg(loc->mbuf); 4382 goto next_mbuf; 4383 pointer_empw: 4384 /* 4385 * No pointer and inline descriptor 4386 * intermix for legacy MPW sessions. 4387 */ 4388 if (MLX5_TXOFF_CONFIG(MPW) && 4389 part != room && 4390 wqem->dseg[0].bcount == RTE_BE32(0)) 4391 break; 4392 /* 4393 * Not inlinable VLAN packets are 4394 * proceeded outside of this routine. 4395 */ 4396 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4397 if (MLX5_TXOFF_CONFIG(VLAN)) 4398 MLX5_ASSERT(!(loc->mbuf->ol_flags & 4399 PKT_TX_VLAN_PKT)); 4400 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4401 /* We have to store mbuf in elts.*/ 4402 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4403 room -= MLX5_WQE_DSEG_SIZE; 4404 /* Ring buffer wraparound is checked at the loop end.*/ 4405 ++dseg; 4406 next_mbuf: 4407 #ifdef MLX5_PMD_SOFT_COUNTERS 4408 /* Update sent data bytes counter. */ 4409 slen += dlen; 4410 #endif 4411 loc->pkts_sent++; 4412 loc->elts_free--; 4413 pkts_n--; 4414 if (unlikely(!pkts_n || !loc->elts_free)) { 4415 /* 4416 * We have no resources/packets to 4417 * continue build descriptors. 4418 */ 4419 part -= room; 4420 mlx5_tx_idone_empw(txq, loc, part, 4421 slen, wqem, olx); 4422 return MLX5_TXCMP_CODE_EXIT; 4423 } 4424 loc->mbuf = *pkts++; 4425 if (likely(pkts_n > 1)) 4426 rte_prefetch0(*pkts); 4427 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4428 /* 4429 * Unroll the completion code to avoid 4430 * returning variable value - it results in 4431 * unoptimized sequent checking in caller. 4432 */ 4433 if (ret == MLX5_TXCMP_CODE_MULTI) { 4434 part -= room; 4435 mlx5_tx_idone_empw(txq, loc, part, 4436 slen, wqem, olx); 4437 if (unlikely(!loc->elts_free || 4438 !loc->wqe_free)) 4439 return MLX5_TXCMP_CODE_EXIT; 4440 return MLX5_TXCMP_CODE_MULTI; 4441 } 4442 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4443 if (ret == MLX5_TXCMP_CODE_TSO) { 4444 part -= room; 4445 mlx5_tx_idone_empw(txq, loc, part, 4446 slen, wqem, olx); 4447 if (unlikely(!loc->elts_free || 4448 !loc->wqe_free)) 4449 return MLX5_TXCMP_CODE_EXIT; 4450 return MLX5_TXCMP_CODE_TSO; 4451 } 4452 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4453 part -= room; 4454 mlx5_tx_idone_empw(txq, loc, part, 4455 slen, wqem, olx); 4456 if (unlikely(!loc->elts_free || 4457 !loc->wqe_free)) 4458 return MLX5_TXCMP_CODE_EXIT; 4459 return MLX5_TXCMP_CODE_SINGLE; 4460 } 4461 if (ret != MLX5_TXCMP_CODE_EMPW) { 4462 MLX5_ASSERT(false); 4463 part -= room; 4464 mlx5_tx_idone_empw(txq, loc, part, 4465 slen, wqem, olx); 4466 return MLX5_TXCMP_CODE_ERROR; 4467 } 4468 /* Check if we have minimal room left. */ 4469 nlim--; 4470 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4471 break; 4472 /* 4473 * Check whether packet parameters coincide 4474 * within assumed eMPW batch: 4475 * - check sum settings 4476 * - metadata value 4477 * - software parser settings 4478 * - packets length (legacy MPW only) 4479 * - scheduling is not required 4480 */ 4481 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 4482 loc, dlen, olx)) 4483 break; 4484 /* Packet attributes match, continue the same eMPW. */ 4485 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4486 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4487 } 4488 /* 4489 * We get here to close an existing eMPW 4490 * session and start the new one. 4491 */ 4492 MLX5_ASSERT(pkts_n); 4493 part -= room; 4494 if (unlikely(!part)) 4495 return MLX5_TXCMP_CODE_EXIT; 4496 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 4497 if (unlikely(!loc->elts_free || 4498 !loc->wqe_free)) 4499 return MLX5_TXCMP_CODE_EXIT; 4500 /* Continue the loop with new eMPW session. */ 4501 } 4502 MLX5_ASSERT(false); 4503 } 4504 4505 /** 4506 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4507 * Data inlining and VLAN insertion are supported. 4508 */ 4509 static __rte_always_inline enum mlx5_txcmp_code 4510 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 4511 struct rte_mbuf **__rte_restrict pkts, 4512 unsigned int pkts_n, 4513 struct mlx5_txq_local *__rte_restrict loc, 4514 unsigned int olx) 4515 { 4516 /* 4517 * Subroutine is the part of mlx5_tx_burst_single() 4518 * and sends single-segment packet with SEND opcode. 4519 */ 4520 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4521 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4522 pkts += loc->pkts_sent + 1; 4523 pkts_n -= loc->pkts_sent; 4524 for (;;) { 4525 struct mlx5_wqe *__rte_restrict wqe; 4526 enum mlx5_txcmp_code ret; 4527 4528 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4529 if (MLX5_TXOFF_CONFIG(TXPP)) { 4530 enum mlx5_txcmp_code wret; 4531 4532 /* Generate WAIT for scheduling if requested. */ 4533 wret = mlx5_tx_schedule_send(txq, loc, olx); 4534 if (wret == MLX5_TXCMP_CODE_EXIT) 4535 return MLX5_TXCMP_CODE_EXIT; 4536 if (wret == MLX5_TXCMP_CODE_ERROR) 4537 return MLX5_TXCMP_CODE_ERROR; 4538 } 4539 if (MLX5_TXOFF_CONFIG(INLINE)) { 4540 unsigned int inlen, vlan = 0; 4541 4542 inlen = rte_pktmbuf_data_len(loc->mbuf); 4543 if (MLX5_TXOFF_CONFIG(VLAN) && 4544 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4545 vlan = sizeof(struct rte_vlan_hdr); 4546 inlen += vlan; 4547 static_assert((sizeof(struct rte_vlan_hdr) + 4548 sizeof(struct rte_ether_hdr)) == 4549 MLX5_ESEG_MIN_INLINE_SIZE, 4550 "invalid min inline data size"); 4551 } 4552 /* 4553 * If inlining is enabled at configuration time 4554 * the limit must be not less than minimal size. 4555 * Otherwise we would do extra check for data 4556 * size to avoid crashes due to length overflow. 4557 */ 4558 MLX5_ASSERT(txq->inlen_send >= 4559 MLX5_ESEG_MIN_INLINE_SIZE); 4560 if (inlen <= txq->inlen_send) { 4561 unsigned int seg_n, wqe_n; 4562 4563 rte_prefetch0(rte_pktmbuf_mtod 4564 (loc->mbuf, uint8_t *)); 4565 /* Check against minimal length. */ 4566 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4567 return MLX5_TXCMP_CODE_ERROR; 4568 if (loc->mbuf->ol_flags & 4569 PKT_TX_DYNF_NOINLINE) { 4570 /* 4571 * The hint flag not to inline packet 4572 * data is set. Check whether we can 4573 * follow the hint. 4574 */ 4575 if ((!MLX5_TXOFF_CONFIG(EMPW) && 4576 txq->inlen_mode) || 4577 (MLX5_TXOFF_CONFIG(MPW) && 4578 txq->inlen_mode)) { 4579 if (inlen <= txq->inlen_send) 4580 goto single_inline; 4581 /* 4582 * The hardware requires the 4583 * minimal inline data header. 4584 */ 4585 goto single_min_inline; 4586 } 4587 if (MLX5_TXOFF_CONFIG(VLAN) && 4588 vlan && !txq->vlan_en) { 4589 /* 4590 * We must insert VLAN tag 4591 * by software means. 4592 */ 4593 goto single_part_inline; 4594 } 4595 goto single_no_inline; 4596 } 4597 single_inline: 4598 /* 4599 * Completely inlined packet data WQE: 4600 * - Control Segment, SEND opcode 4601 * - Ethernet Segment, no VLAN insertion 4602 * - Data inlined, VLAN optionally inserted 4603 * - Alignment to MLX5_WSEG_SIZE 4604 * Have to estimate amount of WQEBBs 4605 */ 4606 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4607 MLX5_ESEG_MIN_INLINE_SIZE + 4608 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4609 /* Check if there are enough WQEBBs. */ 4610 wqe_n = (seg_n + 3) / 4; 4611 if (wqe_n > loc->wqe_free) 4612 return MLX5_TXCMP_CODE_EXIT; 4613 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4614 loc->wqe_last = wqe; 4615 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4616 MLX5_OPCODE_SEND, olx); 4617 mlx5_tx_eseg_data(txq, loc, wqe, 4618 vlan, inlen, 0, olx); 4619 txq->wqe_ci += wqe_n; 4620 loc->wqe_free -= wqe_n; 4621 /* 4622 * Packet data are completely inlined, 4623 * free the packet immediately. 4624 */ 4625 rte_pktmbuf_free_seg(loc->mbuf); 4626 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4627 MLX5_TXOFF_CONFIG(MPW)) && 4628 txq->inlen_mode) { 4629 /* 4630 * If minimal inlining is requested the eMPW 4631 * feature should be disabled due to data is 4632 * inlined into Ethernet Segment, which can 4633 * not contain inlined data for eMPW due to 4634 * segment shared for all packets. 4635 */ 4636 struct mlx5_wqe_dseg *__rte_restrict dseg; 4637 unsigned int ds; 4638 uint8_t *dptr; 4639 4640 /* 4641 * The inline-mode settings require 4642 * to inline the specified amount of 4643 * data bytes to the Ethernet Segment. 4644 * We should check the free space in 4645 * WQE ring buffer to inline partially. 4646 */ 4647 single_min_inline: 4648 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 4649 MLX5_ASSERT(inlen > txq->inlen_mode); 4650 MLX5_ASSERT(txq->inlen_mode >= 4651 MLX5_ESEG_MIN_INLINE_SIZE); 4652 /* 4653 * Check whether there are enough free WQEBBs: 4654 * - Control Segment 4655 * - Ethernet Segment 4656 * - First Segment of inlined Ethernet data 4657 * - ... data continued ... 4658 * - Finishing Data Segment of pointer type 4659 */ 4660 ds = (MLX5_WQE_CSEG_SIZE + 4661 MLX5_WQE_ESEG_SIZE + 4662 MLX5_WQE_DSEG_SIZE + 4663 txq->inlen_mode - 4664 MLX5_ESEG_MIN_INLINE_SIZE + 4665 MLX5_WQE_DSEG_SIZE + 4666 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4667 if (loc->wqe_free < ((ds + 3) / 4)) 4668 return MLX5_TXCMP_CODE_EXIT; 4669 /* 4670 * Build the ordinary SEND WQE: 4671 * - Control Segment 4672 * - Ethernet Segment, inline inlen_mode bytes 4673 * - Data Segment of pointer type 4674 */ 4675 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4676 loc->wqe_last = wqe; 4677 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4678 MLX5_OPCODE_SEND, olx); 4679 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4680 txq->inlen_mode, 4681 0, olx); 4682 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4683 txq->inlen_mode - vlan; 4684 inlen -= txq->inlen_mode; 4685 mlx5_tx_dseg_ptr(txq, loc, dseg, 4686 dptr, inlen, olx); 4687 /* 4688 * WQE is built, update the loop parameters 4689 * and got to the next packet. 4690 */ 4691 txq->wqe_ci += (ds + 3) / 4; 4692 loc->wqe_free -= (ds + 3) / 4; 4693 /* We have to store mbuf in elts.*/ 4694 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4695 txq->elts[txq->elts_head++ & txq->elts_m] = 4696 loc->mbuf; 4697 --loc->elts_free; 4698 } else { 4699 uint8_t *dptr; 4700 unsigned int dlen; 4701 4702 /* 4703 * Partially inlined packet data WQE, we have 4704 * some space in title WQEBB, we can fill it 4705 * with some packet data. It takes one WQEBB, 4706 * it is available, no extra space check: 4707 * - Control Segment, SEND opcode 4708 * - Ethernet Segment, no VLAN insertion 4709 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4710 * - Data Segment, pointer type 4711 * 4712 * We also get here if VLAN insertion is not 4713 * supported by HW, the inline is enabled. 4714 */ 4715 single_part_inline: 4716 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4717 loc->wqe_last = wqe; 4718 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4719 MLX5_OPCODE_SEND, olx); 4720 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4721 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4722 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4723 /* 4724 * The length check is performed above, by 4725 * comparing with txq->inlen_send. We should 4726 * not get overflow here. 4727 */ 4728 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4729 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4730 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4731 dptr, dlen, olx); 4732 ++txq->wqe_ci; 4733 --loc->wqe_free; 4734 /* We have to store mbuf in elts.*/ 4735 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4736 txq->elts[txq->elts_head++ & txq->elts_m] = 4737 loc->mbuf; 4738 --loc->elts_free; 4739 } 4740 #ifdef MLX5_PMD_SOFT_COUNTERS 4741 /* Update sent data bytes counter. */ 4742 txq->stats.obytes += vlan + 4743 rte_pktmbuf_data_len(loc->mbuf); 4744 #endif 4745 } else { 4746 /* 4747 * No inline at all, it means the CPU cycles saving 4748 * is prioritized at configuration, we should not 4749 * copy any packet data to WQE. 4750 * 4751 * SEND WQE, one WQEBB: 4752 * - Control Segment, SEND opcode 4753 * - Ethernet Segment, optional VLAN, no inline 4754 * - Data Segment, pointer type 4755 */ 4756 single_no_inline: 4757 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4758 loc->wqe_last = wqe; 4759 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4760 MLX5_OPCODE_SEND, olx); 4761 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4762 mlx5_tx_dseg_ptr 4763 (txq, loc, &wqe->dseg[0], 4764 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4765 rte_pktmbuf_data_len(loc->mbuf), olx); 4766 ++txq->wqe_ci; 4767 --loc->wqe_free; 4768 /* 4769 * We should not store mbuf pointer in elts 4770 * if no inlining is configured, this is done 4771 * by calling routine in a batch copy. 4772 */ 4773 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4774 --loc->elts_free; 4775 #ifdef MLX5_PMD_SOFT_COUNTERS 4776 /* Update sent data bytes counter. */ 4777 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4778 if (MLX5_TXOFF_CONFIG(VLAN) && 4779 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4780 txq->stats.obytes += 4781 sizeof(struct rte_vlan_hdr); 4782 #endif 4783 } 4784 ++loc->pkts_sent; 4785 --pkts_n; 4786 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4787 return MLX5_TXCMP_CODE_EXIT; 4788 loc->mbuf = *pkts++; 4789 if (pkts_n > 1) 4790 rte_prefetch0(*pkts); 4791 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4792 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4793 return ret; 4794 } 4795 MLX5_ASSERT(false); 4796 } 4797 4798 static __rte_always_inline enum mlx5_txcmp_code 4799 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 4800 struct rte_mbuf **__rte_restrict pkts, 4801 unsigned int pkts_n, 4802 struct mlx5_txq_local *__rte_restrict loc, 4803 unsigned int olx) 4804 { 4805 enum mlx5_txcmp_code ret; 4806 4807 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4808 if (ret == MLX5_TXCMP_CODE_SINGLE) 4809 goto ordinary_send; 4810 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 4811 for (;;) { 4812 /* Optimize for inline/no inline eMPW send. */ 4813 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4814 mlx5_tx_burst_empw_inline 4815 (txq, pkts, pkts_n, loc, olx) : 4816 mlx5_tx_burst_empw_simple 4817 (txq, pkts, pkts_n, loc, olx); 4818 if (ret != MLX5_TXCMP_CODE_SINGLE) 4819 return ret; 4820 /* The resources to send one packet should remain. */ 4821 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4822 ordinary_send: 4823 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4824 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 4825 if (ret != MLX5_TXCMP_CODE_EMPW) 4826 return ret; 4827 /* The resources to send one packet should remain. */ 4828 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4829 } 4830 } 4831 4832 /** 4833 * DPDK Tx callback template. This is configured template 4834 * used to generate routines optimized for specified offload setup. 4835 * One of this generated functions is chosen at SQ configuration 4836 * time. 4837 * 4838 * @param txq 4839 * Generic pointer to TX queue structure. 4840 * @param[in] pkts 4841 * Packets to transmit. 4842 * @param pkts_n 4843 * Number of packets in array. 4844 * @param olx 4845 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 4846 * values. Should be static to take compile time static configuration 4847 * advantages. 4848 * 4849 * @return 4850 * Number of packets successfully transmitted (<= pkts_n). 4851 */ 4852 static __rte_always_inline uint16_t 4853 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 4854 struct rte_mbuf **__rte_restrict pkts, 4855 uint16_t pkts_n, 4856 unsigned int olx) 4857 { 4858 struct mlx5_txq_local loc; 4859 enum mlx5_txcmp_code ret; 4860 unsigned int part; 4861 4862 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4863 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4864 if (unlikely(!pkts_n)) 4865 return 0; 4866 loc.pkts_sent = 0; 4867 loc.pkts_copy = 0; 4868 loc.wqe_last = NULL; 4869 4870 send_loop: 4871 loc.pkts_loop = loc.pkts_sent; 4872 /* 4873 * Check if there are some CQEs, if any: 4874 * - process an encountered errors 4875 * - process the completed WQEs 4876 * - free related mbufs 4877 * - doorbell the NIC about processed CQEs 4878 */ 4879 rte_prefetch0(*(pkts + loc.pkts_sent)); 4880 mlx5_tx_handle_completion(txq, olx); 4881 /* 4882 * Calculate the number of available resources - elts and WQEs. 4883 * There are two possible different scenarios: 4884 * - no data inlining into WQEs, one WQEBB may contains up to 4885 * four packets, in this case elts become scarce resource 4886 * - data inlining into WQEs, one packet may require multiple 4887 * WQEBBs, the WQEs become the limiting factor. 4888 */ 4889 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4890 loc.elts_free = txq->elts_s - 4891 (uint16_t)(txq->elts_head - txq->elts_tail); 4892 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4893 loc.wqe_free = txq->wqe_s - 4894 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 4895 if (unlikely(!loc.elts_free || !loc.wqe_free)) 4896 goto burst_exit; 4897 for (;;) { 4898 /* 4899 * Fetch the packet from array. Usually this is 4900 * the first packet in series of multi/single 4901 * segment packets. 4902 */ 4903 loc.mbuf = *(pkts + loc.pkts_sent); 4904 /* Dedicated branch for multi-segment packets. */ 4905 if (MLX5_TXOFF_CONFIG(MULTI) && 4906 unlikely(NB_SEGS(loc.mbuf) > 1)) { 4907 /* 4908 * Multi-segment packet encountered. 4909 * Hardware is able to process it only 4910 * with SEND/TSO opcodes, one packet 4911 * per WQE, do it in dedicated routine. 4912 */ 4913 enter_send_multi: 4914 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 4915 part = loc.pkts_sent - loc.pkts_copy; 4916 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4917 /* 4918 * There are some single-segment mbufs not 4919 * stored in elts. The mbufs must be in the 4920 * same order as WQEs, so we must copy the 4921 * mbufs to elts here, before the coming 4922 * multi-segment packet mbufs is appended. 4923 */ 4924 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 4925 part, olx); 4926 loc.pkts_copy = loc.pkts_sent; 4927 } 4928 MLX5_ASSERT(pkts_n > loc.pkts_sent); 4929 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 4930 if (!MLX5_TXOFF_CONFIG(INLINE)) 4931 loc.pkts_copy = loc.pkts_sent; 4932 /* 4933 * These returned code checks are supposed 4934 * to be optimized out due to routine inlining. 4935 */ 4936 if (ret == MLX5_TXCMP_CODE_EXIT) { 4937 /* 4938 * The routine returns this code when 4939 * all packets are sent or there is no 4940 * enough resources to complete request. 4941 */ 4942 break; 4943 } 4944 if (ret == MLX5_TXCMP_CODE_ERROR) { 4945 /* 4946 * The routine returns this code when 4947 * some error in the incoming packets 4948 * format occurred. 4949 */ 4950 txq->stats.oerrors++; 4951 break; 4952 } 4953 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4954 /* 4955 * The single-segment packet was encountered 4956 * in the array, try to send it with the 4957 * best optimized way, possible engaging eMPW. 4958 */ 4959 goto enter_send_single; 4960 } 4961 if (MLX5_TXOFF_CONFIG(TSO) && 4962 ret == MLX5_TXCMP_CODE_TSO) { 4963 /* 4964 * The single-segment TSO packet was 4965 * encountered in the array. 4966 */ 4967 goto enter_send_tso; 4968 } 4969 /* We must not get here. Something is going wrong. */ 4970 MLX5_ASSERT(false); 4971 txq->stats.oerrors++; 4972 break; 4973 } 4974 /* Dedicated branch for single-segment TSO packets. */ 4975 if (MLX5_TXOFF_CONFIG(TSO) && 4976 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 4977 /* 4978 * TSO might require special way for inlining 4979 * (dedicated parameters) and is sent with 4980 * MLX5_OPCODE_TSO opcode only, provide this 4981 * in dedicated branch. 4982 */ 4983 enter_send_tso: 4984 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 4985 MLX5_ASSERT(pkts_n > loc.pkts_sent); 4986 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 4987 /* 4988 * These returned code checks are supposed 4989 * to be optimized out due to routine inlining. 4990 */ 4991 if (ret == MLX5_TXCMP_CODE_EXIT) 4992 break; 4993 if (ret == MLX5_TXCMP_CODE_ERROR) { 4994 txq->stats.oerrors++; 4995 break; 4996 } 4997 if (ret == MLX5_TXCMP_CODE_SINGLE) 4998 goto enter_send_single; 4999 if (MLX5_TXOFF_CONFIG(MULTI) && 5000 ret == MLX5_TXCMP_CODE_MULTI) { 5001 /* 5002 * The multi-segment packet was 5003 * encountered in the array. 5004 */ 5005 goto enter_send_multi; 5006 } 5007 /* We must not get here. Something is going wrong. */ 5008 MLX5_ASSERT(false); 5009 txq->stats.oerrors++; 5010 break; 5011 } 5012 /* 5013 * The dedicated branch for the single-segment packets 5014 * without TSO. Often these ones can be sent using 5015 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 5016 * The routine builds the WQEs till it encounters 5017 * the TSO or multi-segment packet (in case if these 5018 * offloads are requested at SQ configuration time). 5019 */ 5020 enter_send_single: 5021 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5022 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 5023 /* 5024 * These returned code checks are supposed 5025 * to be optimized out due to routine inlining. 5026 */ 5027 if (ret == MLX5_TXCMP_CODE_EXIT) 5028 break; 5029 if (ret == MLX5_TXCMP_CODE_ERROR) { 5030 txq->stats.oerrors++; 5031 break; 5032 } 5033 if (MLX5_TXOFF_CONFIG(MULTI) && 5034 ret == MLX5_TXCMP_CODE_MULTI) { 5035 /* 5036 * The multi-segment packet was 5037 * encountered in the array. 5038 */ 5039 goto enter_send_multi; 5040 } 5041 if (MLX5_TXOFF_CONFIG(TSO) && 5042 ret == MLX5_TXCMP_CODE_TSO) { 5043 /* 5044 * The single-segment TSO packet was 5045 * encountered in the array. 5046 */ 5047 goto enter_send_tso; 5048 } 5049 /* We must not get here. Something is going wrong. */ 5050 MLX5_ASSERT(false); 5051 txq->stats.oerrors++; 5052 break; 5053 } 5054 /* 5055 * Main Tx loop is completed, do the rest: 5056 * - set completion request if thresholds are reached 5057 * - doorbell the hardware 5058 * - copy the rest of mbufs to elts (if any) 5059 */ 5060 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 5061 loc.pkts_sent >= loc.pkts_copy); 5062 /* Take a shortcut if nothing is sent. */ 5063 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 5064 goto burst_exit; 5065 /* Request CQE generation if limits are reached. */ 5066 mlx5_tx_request_completion(txq, &loc, olx); 5067 /* 5068 * Ring QP doorbell immediately after WQE building completion 5069 * to improve latencies. The pure software related data treatment 5070 * can be completed after doorbell. Tx CQEs for this SQ are 5071 * processed in this thread only by the polling. 5072 * 5073 * The rdma core library can map doorbell register in two ways, 5074 * depending on the environment variable "MLX5_SHUT_UP_BF": 5075 * 5076 * - as regular cached memory, the variable is either missing or 5077 * set to zero. This type of mapping may cause the significant 5078 * doorbell register writing latency and requires explicit 5079 * memory write barrier to mitigate this issue and prevent 5080 * write combining. 5081 * 5082 * - as non-cached memory, the variable is present and set to 5083 * not "0" value. This type of mapping may cause performance 5084 * impact under heavy loading conditions but the explicit write 5085 * memory barrier is not required and it may improve core 5086 * performance. 5087 * 5088 * - the legacy behaviour (prior 19.08 release) was to use some 5089 * heuristics to decide whether write memory barrier should 5090 * be performed. This behavior is supported with specifying 5091 * tx_db_nc=2, write barrier is skipped if application 5092 * provides the full recommended burst of packets, it 5093 * supposes the next packets are coming and the write barrier 5094 * will be issued on the next burst (after descriptor writing, 5095 * at least). 5096 */ 5097 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 5098 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 5099 /* Not all of the mbufs may be stored into elts yet. */ 5100 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 5101 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5102 /* 5103 * There are some single-segment mbufs not stored in elts. 5104 * It can be only if the last packet was single-segment. 5105 * The copying is gathered into one place due to it is 5106 * a good opportunity to optimize that with SIMD. 5107 * Unfortunately if inlining is enabled the gaps in 5108 * pointer array may happen due to early freeing of the 5109 * inlined mbufs. 5110 */ 5111 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 5112 loc.pkts_copy = loc.pkts_sent; 5113 } 5114 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5115 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5116 if (pkts_n > loc.pkts_sent) { 5117 /* 5118 * If burst size is large there might be no enough CQE 5119 * fetched from completion queue and no enough resources 5120 * freed to send all the packets. 5121 */ 5122 goto send_loop; 5123 } 5124 burst_exit: 5125 #ifdef MLX5_PMD_SOFT_COUNTERS 5126 /* Increment sent packets counter. */ 5127 txq->stats.opackets += loc.pkts_sent; 5128 #endif 5129 return loc.pkts_sent; 5130 } 5131 5132 /* Generate routines with Enhanced Multi-Packet Write support. */ 5133 MLX5_TXOFF_DECL(full_empw, 5134 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 5135 5136 MLX5_TXOFF_DECL(none_empw, 5137 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5138 5139 MLX5_TXOFF_DECL(md_empw, 5140 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5141 5142 MLX5_TXOFF_DECL(mt_empw, 5143 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5144 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5145 5146 MLX5_TXOFF_DECL(mtsc_empw, 5147 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5148 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5149 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5150 5151 MLX5_TXOFF_DECL(mti_empw, 5152 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5153 MLX5_TXOFF_CONFIG_INLINE | 5154 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5155 5156 MLX5_TXOFF_DECL(mtv_empw, 5157 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5158 MLX5_TXOFF_CONFIG_VLAN | 5159 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5160 5161 MLX5_TXOFF_DECL(mtiv_empw, 5162 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5163 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5164 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5165 5166 MLX5_TXOFF_DECL(sc_empw, 5167 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5168 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5169 5170 MLX5_TXOFF_DECL(sci_empw, 5171 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5172 MLX5_TXOFF_CONFIG_INLINE | 5173 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5174 5175 MLX5_TXOFF_DECL(scv_empw, 5176 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5177 MLX5_TXOFF_CONFIG_VLAN | 5178 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5179 5180 MLX5_TXOFF_DECL(sciv_empw, 5181 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5182 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5183 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5184 5185 MLX5_TXOFF_DECL(i_empw, 5186 MLX5_TXOFF_CONFIG_INLINE | 5187 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5188 5189 MLX5_TXOFF_DECL(v_empw, 5190 MLX5_TXOFF_CONFIG_VLAN | 5191 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5192 5193 MLX5_TXOFF_DECL(iv_empw, 5194 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5195 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5196 5197 /* Generate routines without Enhanced Multi-Packet Write support. */ 5198 MLX5_TXOFF_DECL(full, 5199 MLX5_TXOFF_CONFIG_FULL) 5200 5201 MLX5_TXOFF_DECL(none, 5202 MLX5_TXOFF_CONFIG_NONE) 5203 5204 MLX5_TXOFF_DECL(md, 5205 MLX5_TXOFF_CONFIG_METADATA) 5206 5207 MLX5_TXOFF_DECL(mt, 5208 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5209 MLX5_TXOFF_CONFIG_METADATA) 5210 5211 MLX5_TXOFF_DECL(mtsc, 5212 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5213 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5214 MLX5_TXOFF_CONFIG_METADATA) 5215 5216 MLX5_TXOFF_DECL(mti, 5217 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5218 MLX5_TXOFF_CONFIG_INLINE | 5219 MLX5_TXOFF_CONFIG_METADATA) 5220 5221 5222 MLX5_TXOFF_DECL(mtv, 5223 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5224 MLX5_TXOFF_CONFIG_VLAN | 5225 MLX5_TXOFF_CONFIG_METADATA) 5226 5227 5228 MLX5_TXOFF_DECL(mtiv, 5229 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5230 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5231 MLX5_TXOFF_CONFIG_METADATA) 5232 5233 MLX5_TXOFF_DECL(sc, 5234 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5235 MLX5_TXOFF_CONFIG_METADATA) 5236 5237 MLX5_TXOFF_DECL(sci, 5238 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5239 MLX5_TXOFF_CONFIG_INLINE | 5240 MLX5_TXOFF_CONFIG_METADATA) 5241 5242 5243 MLX5_TXOFF_DECL(scv, 5244 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5245 MLX5_TXOFF_CONFIG_VLAN | 5246 MLX5_TXOFF_CONFIG_METADATA) 5247 5248 5249 MLX5_TXOFF_DECL(sciv, 5250 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5251 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5252 MLX5_TXOFF_CONFIG_METADATA) 5253 5254 MLX5_TXOFF_DECL(i, 5255 MLX5_TXOFF_CONFIG_INLINE | 5256 MLX5_TXOFF_CONFIG_METADATA) 5257 5258 MLX5_TXOFF_DECL(v, 5259 MLX5_TXOFF_CONFIG_VLAN | 5260 MLX5_TXOFF_CONFIG_METADATA) 5261 5262 MLX5_TXOFF_DECL(iv, 5263 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5264 MLX5_TXOFF_CONFIG_METADATA) 5265 5266 /* Generate routines with timestamp scheduling. */ 5267 MLX5_TXOFF_DECL(full_ts_nompw, 5268 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5269 5270 MLX5_TXOFF_DECL(full_ts_nompwi, 5271 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5272 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5273 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5274 MLX5_TXOFF_CONFIG_TXPP) 5275 5276 MLX5_TXOFF_DECL(full_ts, 5277 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5278 MLX5_TXOFF_CONFIG_EMPW) 5279 5280 MLX5_TXOFF_DECL(full_ts_noi, 5281 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5282 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5283 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5284 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5285 5286 MLX5_TXOFF_DECL(none_ts, 5287 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5288 MLX5_TXOFF_CONFIG_EMPW) 5289 5290 MLX5_TXOFF_DECL(mdi_ts, 5291 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5292 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5293 5294 MLX5_TXOFF_DECL(mti_ts, 5295 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5296 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5297 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5298 5299 MLX5_TXOFF_DECL(mtiv_ts, 5300 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5301 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5302 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5303 MLX5_TXOFF_CONFIG_EMPW) 5304 5305 /* 5306 * Generate routines with Legacy Multi-Packet Write support. 5307 * This mode is supported by ConnectX-4 Lx only and imposes 5308 * offload limitations, not supported: 5309 * - ACL/Flows (metadata are becoming meaningless) 5310 * - WQE Inline headers 5311 * - SRIOV (E-Switch offloads) 5312 * - VLAN insertion 5313 * - tunnel encapsulation/decapsulation 5314 * - TSO 5315 */ 5316 MLX5_TXOFF_DECL(none_mpw, 5317 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5318 MLX5_TXOFF_CONFIG_MPW) 5319 5320 MLX5_TXOFF_DECL(mci_mpw, 5321 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5322 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5323 MLX5_TXOFF_CONFIG_MPW) 5324 5325 MLX5_TXOFF_DECL(mc_mpw, 5326 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5327 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5328 5329 MLX5_TXOFF_DECL(i_mpw, 5330 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5331 MLX5_TXOFF_CONFIG_MPW) 5332 5333 /* 5334 * Array of declared and compiled Tx burst function and corresponding 5335 * supported offloads set. The array is used to select the Tx burst 5336 * function for specified offloads set at Tx queue configuration time. 5337 */ 5338 const struct { 5339 eth_tx_burst_t func; 5340 unsigned int olx; 5341 } txoff_func[] = { 5342 MLX5_TXOFF_INFO(full_empw, 5343 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5344 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5345 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5346 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5347 5348 MLX5_TXOFF_INFO(none_empw, 5349 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5350 5351 MLX5_TXOFF_INFO(md_empw, 5352 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5353 5354 MLX5_TXOFF_INFO(mt_empw, 5355 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5356 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5357 5358 MLX5_TXOFF_INFO(mtsc_empw, 5359 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5360 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5361 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5362 5363 MLX5_TXOFF_INFO(mti_empw, 5364 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5365 MLX5_TXOFF_CONFIG_INLINE | 5366 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5367 5368 MLX5_TXOFF_INFO(mtv_empw, 5369 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5370 MLX5_TXOFF_CONFIG_VLAN | 5371 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5372 5373 MLX5_TXOFF_INFO(mtiv_empw, 5374 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5375 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5376 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5377 5378 MLX5_TXOFF_INFO(sc_empw, 5379 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5380 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5381 5382 MLX5_TXOFF_INFO(sci_empw, 5383 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5384 MLX5_TXOFF_CONFIG_INLINE | 5385 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5386 5387 MLX5_TXOFF_INFO(scv_empw, 5388 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5389 MLX5_TXOFF_CONFIG_VLAN | 5390 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5391 5392 MLX5_TXOFF_INFO(sciv_empw, 5393 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5394 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5395 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5396 5397 MLX5_TXOFF_INFO(i_empw, 5398 MLX5_TXOFF_CONFIG_INLINE | 5399 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5400 5401 MLX5_TXOFF_INFO(v_empw, 5402 MLX5_TXOFF_CONFIG_VLAN | 5403 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5404 5405 MLX5_TXOFF_INFO(iv_empw, 5406 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5407 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5408 5409 MLX5_TXOFF_INFO(full_ts_nompw, 5410 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5411 5412 MLX5_TXOFF_INFO(full_ts_nompwi, 5413 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5414 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5415 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5416 MLX5_TXOFF_CONFIG_TXPP) 5417 5418 MLX5_TXOFF_INFO(full_ts, 5419 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5420 MLX5_TXOFF_CONFIG_EMPW) 5421 5422 MLX5_TXOFF_INFO(full_ts_noi, 5423 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5424 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5425 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5426 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5427 5428 MLX5_TXOFF_INFO(none_ts, 5429 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5430 MLX5_TXOFF_CONFIG_EMPW) 5431 5432 MLX5_TXOFF_INFO(mdi_ts, 5433 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5434 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5435 5436 MLX5_TXOFF_INFO(mti_ts, 5437 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5438 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5439 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5440 5441 MLX5_TXOFF_INFO(mtiv_ts, 5442 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5443 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5444 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5445 MLX5_TXOFF_CONFIG_EMPW) 5446 5447 MLX5_TXOFF_INFO(full, 5448 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5449 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5450 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5451 MLX5_TXOFF_CONFIG_METADATA) 5452 5453 MLX5_TXOFF_INFO(none, 5454 MLX5_TXOFF_CONFIG_NONE) 5455 5456 MLX5_TXOFF_INFO(md, 5457 MLX5_TXOFF_CONFIG_METADATA) 5458 5459 MLX5_TXOFF_INFO(mt, 5460 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5461 MLX5_TXOFF_CONFIG_METADATA) 5462 5463 MLX5_TXOFF_INFO(mtsc, 5464 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5465 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5466 MLX5_TXOFF_CONFIG_METADATA) 5467 5468 MLX5_TXOFF_INFO(mti, 5469 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5470 MLX5_TXOFF_CONFIG_INLINE | 5471 MLX5_TXOFF_CONFIG_METADATA) 5472 5473 MLX5_TXOFF_INFO(mtv, 5474 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5475 MLX5_TXOFF_CONFIG_VLAN | 5476 MLX5_TXOFF_CONFIG_METADATA) 5477 5478 MLX5_TXOFF_INFO(mtiv, 5479 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5480 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5481 MLX5_TXOFF_CONFIG_METADATA) 5482 5483 MLX5_TXOFF_INFO(sc, 5484 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5485 MLX5_TXOFF_CONFIG_METADATA) 5486 5487 MLX5_TXOFF_INFO(sci, 5488 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5489 MLX5_TXOFF_CONFIG_INLINE | 5490 MLX5_TXOFF_CONFIG_METADATA) 5491 5492 MLX5_TXOFF_INFO(scv, 5493 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5494 MLX5_TXOFF_CONFIG_VLAN | 5495 MLX5_TXOFF_CONFIG_METADATA) 5496 5497 MLX5_TXOFF_INFO(sciv, 5498 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5499 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5500 MLX5_TXOFF_CONFIG_METADATA) 5501 5502 MLX5_TXOFF_INFO(i, 5503 MLX5_TXOFF_CONFIG_INLINE | 5504 MLX5_TXOFF_CONFIG_METADATA) 5505 5506 MLX5_TXOFF_INFO(v, 5507 MLX5_TXOFF_CONFIG_VLAN | 5508 MLX5_TXOFF_CONFIG_METADATA) 5509 5510 MLX5_TXOFF_INFO(iv, 5511 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5512 MLX5_TXOFF_CONFIG_METADATA) 5513 5514 MLX5_TXOFF_INFO(none_mpw, 5515 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5516 MLX5_TXOFF_CONFIG_MPW) 5517 5518 MLX5_TXOFF_INFO(mci_mpw, 5519 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5520 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5521 MLX5_TXOFF_CONFIG_MPW) 5522 5523 MLX5_TXOFF_INFO(mc_mpw, 5524 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5525 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5526 5527 MLX5_TXOFF_INFO(i_mpw, 5528 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5529 MLX5_TXOFF_CONFIG_MPW) 5530 }; 5531 5532 /** 5533 * Configure the Tx function to use. The routine checks configured 5534 * Tx offloads for the device and selects appropriate Tx burst 5535 * routine. There are multiple Tx burst routines compiled from 5536 * the same template in the most optimal way for the dedicated 5537 * Tx offloads set. 5538 * 5539 * @param dev 5540 * Pointer to private data structure. 5541 * 5542 * @return 5543 * Pointer to selected Tx burst function. 5544 */ 5545 eth_tx_burst_t 5546 mlx5_select_tx_function(struct rte_eth_dev *dev) 5547 { 5548 struct mlx5_priv *priv = dev->data->dev_private; 5549 struct mlx5_dev_config *config = &priv->config; 5550 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5551 unsigned int diff = 0, olx = 0, i, m; 5552 5553 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 5554 MLX5_DSEG_MAX, "invalid WQE max size"); 5555 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 5556 "invalid WQE Control Segment size"); 5557 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 5558 "invalid WQE Ethernet Segment size"); 5559 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 5560 "invalid WQE Data Segment size"); 5561 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 5562 "invalid WQE size"); 5563 MLX5_ASSERT(priv); 5564 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5565 /* We should support Multi-Segment Packets. */ 5566 olx |= MLX5_TXOFF_CONFIG_MULTI; 5567 } 5568 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5569 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5570 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5571 DEV_TX_OFFLOAD_IP_TNL_TSO | 5572 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5573 /* We should support TCP Send Offload. */ 5574 olx |= MLX5_TXOFF_CONFIG_TSO; 5575 } 5576 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5577 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5578 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5579 /* We should support Software Parser for Tunnels. */ 5580 olx |= MLX5_TXOFF_CONFIG_SWP; 5581 } 5582 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5583 DEV_TX_OFFLOAD_UDP_CKSUM | 5584 DEV_TX_OFFLOAD_TCP_CKSUM | 5585 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5586 /* We should support IP/TCP/UDP Checksums. */ 5587 olx |= MLX5_TXOFF_CONFIG_CSUM; 5588 } 5589 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5590 /* We should support VLAN insertion. */ 5591 olx |= MLX5_TXOFF_CONFIG_VLAN; 5592 } 5593 if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && 5594 rte_mbuf_dynflag_lookup 5595 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 && 5596 rte_mbuf_dynfield_lookup 5597 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) { 5598 /* Offload configured, dynamic entities registered. */ 5599 olx |= MLX5_TXOFF_CONFIG_TXPP; 5600 } 5601 if (priv->txqs_n && (*priv->txqs)[0]) { 5602 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5603 5604 if (txd->inlen_send) { 5605 /* 5606 * Check the data inline requirements. Data inline 5607 * is enabled on per device basis, we can check 5608 * the first Tx queue only. 5609 * 5610 * If device does not support VLAN insertion in WQE 5611 * and some queues are requested to perform VLAN 5612 * insertion offload than inline must be enabled. 5613 */ 5614 olx |= MLX5_TXOFF_CONFIG_INLINE; 5615 } 5616 } 5617 if (config->mps == MLX5_MPW_ENHANCED && 5618 config->txq_inline_min <= 0) { 5619 /* 5620 * The NIC supports Enhanced Multi-Packet Write 5621 * and does not require minimal inline data. 5622 */ 5623 olx |= MLX5_TXOFF_CONFIG_EMPW; 5624 } 5625 if (rte_flow_dynf_metadata_avail()) { 5626 /* We should support Flow metadata. */ 5627 olx |= MLX5_TXOFF_CONFIG_METADATA; 5628 } 5629 if (config->mps == MLX5_MPW) { 5630 /* 5631 * The NIC supports Legacy Multi-Packet Write. 5632 * The MLX5_TXOFF_CONFIG_MPW controls the 5633 * descriptor building method in combination 5634 * with MLX5_TXOFF_CONFIG_EMPW. 5635 */ 5636 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5637 MLX5_TXOFF_CONFIG_SWP | 5638 MLX5_TXOFF_CONFIG_VLAN | 5639 MLX5_TXOFF_CONFIG_METADATA))) 5640 olx |= MLX5_TXOFF_CONFIG_EMPW | 5641 MLX5_TXOFF_CONFIG_MPW; 5642 } 5643 /* 5644 * Scan the routines table to find the minimal 5645 * satisfying routine with requested offloads. 5646 */ 5647 m = RTE_DIM(txoff_func); 5648 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5649 unsigned int tmp; 5650 5651 tmp = txoff_func[i].olx; 5652 if (tmp == olx) { 5653 /* Meets requested offloads exactly.*/ 5654 m = i; 5655 break; 5656 } 5657 if ((tmp & olx) != olx) { 5658 /* Does not meet requested offloads at all. */ 5659 continue; 5660 } 5661 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) 5662 /* Do not enable legacy MPW if not configured. */ 5663 continue; 5664 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5665 /* Do not enable eMPW if not configured. */ 5666 continue; 5667 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5668 /* Do not enable inlining if not configured. */ 5669 continue; 5670 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) 5671 /* Do not enable scheduling if not configured. */ 5672 continue; 5673 /* 5674 * Some routine meets the requirements. 5675 * Check whether it has minimal amount 5676 * of not requested offloads. 5677 */ 5678 tmp = __builtin_popcountl(tmp & ~olx); 5679 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5680 /* First or better match, save and continue. */ 5681 m = i; 5682 diff = tmp; 5683 continue; 5684 } 5685 if (tmp == diff) { 5686 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5687 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5688 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5689 /* Lighter not requested offload. */ 5690 m = i; 5691 } 5692 } 5693 } 5694 if (m >= RTE_DIM(txoff_func)) { 5695 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5696 " for requested offloads %04X", 5697 dev->data->port_id, olx); 5698 return NULL; 5699 } 5700 DRV_LOG(DEBUG, "port %u has selected Tx function" 5701 " supporting offloads %04X/%04X", 5702 dev->data->port_id, olx, txoff_func[m].olx); 5703 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5704 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5705 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5706 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5707 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5708 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5709 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5710 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5711 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5712 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5713 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5714 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5715 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5716 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5717 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) 5718 DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); 5719 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5720 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5721 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5722 else 5723 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5724 } 5725 return txoff_func[m].func; 5726 } 5727 5728 /** 5729 * DPDK callback to get the TX queue information 5730 * 5731 * @param dev 5732 * Pointer to the device structure. 5733 * 5734 * @param tx_queue_id 5735 * Tx queue identificator. 5736 * 5737 * @param qinfo 5738 * Pointer to the TX queue information structure. 5739 * 5740 * @return 5741 * None. 5742 */ 5743 5744 void 5745 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 5746 struct rte_eth_txq_info *qinfo) 5747 { 5748 struct mlx5_priv *priv = dev->data->dev_private; 5749 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5750 struct mlx5_txq_ctrl *txq_ctrl = 5751 container_of(txq, struct mlx5_txq_ctrl, txq); 5752 5753 if (!txq) 5754 return; 5755 qinfo->nb_desc = txq->elts_s; 5756 qinfo->conf.tx_thresh.pthresh = 0; 5757 qinfo->conf.tx_thresh.hthresh = 0; 5758 qinfo->conf.tx_thresh.wthresh = 0; 5759 qinfo->conf.tx_rs_thresh = 0; 5760 qinfo->conf.tx_free_thresh = 0; 5761 qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; 5762 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 5763 } 5764 5765 /** 5766 * DPDK callback to get the TX packet burst mode information 5767 * 5768 * @param dev 5769 * Pointer to the device structure. 5770 * 5771 * @param tx_queue_id 5772 * Tx queue identificatior. 5773 * 5774 * @param mode 5775 * Pointer to the burts mode information. 5776 * 5777 * @return 5778 * 0 as success, -EINVAL as failure. 5779 */ 5780 5781 int 5782 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, 5783 uint16_t tx_queue_id __rte_unused, 5784 struct rte_eth_burst_mode *mode) 5785 { 5786 eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; 5787 unsigned int i, olx; 5788 5789 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5790 if (pkt_burst == txoff_func[i].func) { 5791 olx = txoff_func[i].olx; 5792 snprintf(mode->info, sizeof(mode->info), 5793 "%s%s%s%s%s%s%s%s%s", 5794 (olx & MLX5_TXOFF_CONFIG_EMPW) ? 5795 ((olx & MLX5_TXOFF_CONFIG_MPW) ? 5796 "Legacy MPW" : "Enhanced MPW") : "No MPW", 5797 (olx & MLX5_TXOFF_CONFIG_MULTI) ? 5798 " + MULTI" : "", 5799 (olx & MLX5_TXOFF_CONFIG_TSO) ? 5800 " + TSO" : "", 5801 (olx & MLX5_TXOFF_CONFIG_SWP) ? 5802 " + SWP" : "", 5803 (olx & MLX5_TXOFF_CONFIG_CSUM) ? 5804 " + CSUM" : "", 5805 (olx & MLX5_TXOFF_CONFIG_INLINE) ? 5806 " + INLINE" : "", 5807 (olx & MLX5_TXOFF_CONFIG_VLAN) ? 5808 " + VLAN" : "", 5809 (olx & MLX5_TXOFF_CONFIG_METADATA) ? 5810 " + METADATA" : "", 5811 (olx & MLX5_TXOFF_CONFIG_TXPP) ? 5812 " + TXPP" : ""); 5813 return 0; 5814 } 5815 } 5816 return -EINVAL; 5817 } 5818