1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 22 #include "mlx5_autoconf.h" 23 #include "mlx5_defs.h" 24 #include "mlx5.h" 25 #include "mlx5_mr.h" 26 #include "mlx5_utils.h" 27 #include "mlx5_rxtx.h" 28 29 /* TX burst subroutines return codes. */ 30 enum mlx5_txcmp_code { 31 MLX5_TXCMP_CODE_EXIT = 0, 32 MLX5_TXCMP_CODE_ERROR, 33 MLX5_TXCMP_CODE_SINGLE, 34 MLX5_TXCMP_CODE_MULTI, 35 MLX5_TXCMP_CODE_TSO, 36 MLX5_TXCMP_CODE_EMPW, 37 }; 38 39 /* 40 * These defines are used to configure Tx burst routine option set 41 * supported at compile time. The not specified options are optimized out 42 * out due to if conditions can be explicitly calculated at compile time. 43 * The offloads with bigger runtime check (require more CPU cycles to 44 * skip) overhead should have the bigger index - this is needed to 45 * select the better matching routine function if no exact match and 46 * some offloads are not actually requested. 47 */ 48 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 49 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 50 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 51 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 52 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 53 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 54 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 55 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 56 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 57 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 58 59 /* The most common offloads groups. */ 60 #define MLX5_TXOFF_CONFIG_NONE 0 61 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 62 MLX5_TXOFF_CONFIG_TSO | \ 63 MLX5_TXOFF_CONFIG_SWP | \ 64 MLX5_TXOFF_CONFIG_CSUM | \ 65 MLX5_TXOFF_CONFIG_INLINE | \ 66 MLX5_TXOFF_CONFIG_VLAN | \ 67 MLX5_TXOFF_CONFIG_METADATA) 68 69 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 70 71 #define MLX5_TXOFF_DECL(func, olx) \ 72 static uint16_t mlx5_tx_burst_##func(void *txq, \ 73 struct rte_mbuf **pkts, \ 74 uint16_t pkts_n) \ 75 { \ 76 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 77 pkts, pkts_n, (olx)); \ 78 } 79 80 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 81 82 /* static asserts */ 83 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 84 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 85 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 86 (sizeof(uint16_t) + 87 sizeof(rte_v128u32_t)), 88 "invalid Ethernet Segment data size"); 89 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 90 (sizeof(uint16_t) + 91 sizeof(struct rte_vlan_hdr) + 92 2 * RTE_ETHER_ADDR_LEN), 93 "invalid Ethernet Segment data size"); 94 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 95 (sizeof(uint16_t) + 96 sizeof(rte_v128u32_t)), 97 "invalid Ethernet Segment data size"); 98 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 99 (sizeof(uint16_t) + 100 sizeof(struct rte_vlan_hdr) + 101 2 * RTE_ETHER_ADDR_LEN), 102 "invalid Ethernet Segment data size"); 103 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 104 (sizeof(uint16_t) + 105 sizeof(rte_v128u32_t)), 106 "invalid Ethernet Segment data size"); 107 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 108 (sizeof(uint16_t) + 109 sizeof(struct rte_vlan_hdr) + 110 2 * RTE_ETHER_ADDR_LEN), 111 "invalid Ethernet Segment data size"); 112 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 113 (2 * RTE_ETHER_ADDR_LEN), 114 "invalid Data Segment data size"); 115 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 116 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 117 static_assert((sizeof(struct rte_vlan_hdr) + 118 sizeof(struct rte_ether_hdr)) == 119 MLX5_ESEG_MIN_INLINE_SIZE, 120 "invalid min inline data size"); 121 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 122 MLX5_DSEG_MAX, "invalid WQE max size"); 123 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 124 "invalid WQE Control Segment size"); 125 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 126 "invalid WQE Ethernet Segment size"); 127 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 128 "invalid WQE Data Segment size"); 129 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 130 "invalid WQE size"); 131 132 static __rte_always_inline uint32_t 133 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 134 volatile struct mlx5_mini_cqe8 *mcqe); 135 136 static __rte_always_inline int 137 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 138 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 139 140 static __rte_always_inline uint32_t 141 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 142 143 static __rte_always_inline void 144 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 145 volatile struct mlx5_cqe *cqe, 146 volatile struct mlx5_mini_cqe8 *mcqe); 147 148 static int 149 mlx5_queue_state_modify(struct rte_eth_dev *dev, 150 struct mlx5_mp_arg_queue_state_modify *sm); 151 152 static inline void 153 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 154 volatile struct mlx5_cqe *__rte_restrict cqe, 155 uint32_t phcsum, uint8_t l4_type); 156 157 static inline void 158 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 159 volatile struct mlx5_cqe *__rte_restrict cqe, 160 volatile struct mlx5_mini_cqe8 *mcqe, 161 struct mlx5_rxq_data *rxq, uint32_t len); 162 163 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 164 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 165 }; 166 167 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 168 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 169 170 uint64_t rte_net_mlx5_dynf_inline_mask; 171 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 172 173 /** 174 * Build a table to translate Rx completion flags to packet type. 175 * 176 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 177 */ 178 void 179 mlx5_set_ptype_table(void) 180 { 181 unsigned int i; 182 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 183 184 /* Last entry must not be overwritten, reserved for errored packet. */ 185 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 186 (*p)[i] = RTE_PTYPE_UNKNOWN; 187 /* 188 * The index to the array should have: 189 * bit[1:0] = l3_hdr_type 190 * bit[4:2] = l4_hdr_type 191 * bit[5] = ip_frag 192 * bit[6] = tunneled 193 * bit[7] = outer_l3_type 194 */ 195 /* L2 */ 196 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 197 /* L3 */ 198 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 199 RTE_PTYPE_L4_NONFRAG; 200 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 201 RTE_PTYPE_L4_NONFRAG; 202 /* Fragmented */ 203 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 204 RTE_PTYPE_L4_FRAG; 205 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 206 RTE_PTYPE_L4_FRAG; 207 /* TCP */ 208 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 209 RTE_PTYPE_L4_TCP; 210 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 211 RTE_PTYPE_L4_TCP; 212 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 213 RTE_PTYPE_L4_TCP; 214 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 215 RTE_PTYPE_L4_TCP; 216 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 217 RTE_PTYPE_L4_TCP; 218 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 219 RTE_PTYPE_L4_TCP; 220 /* UDP */ 221 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 222 RTE_PTYPE_L4_UDP; 223 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 224 RTE_PTYPE_L4_UDP; 225 /* Repeat with outer_l3_type being set. Just in case. */ 226 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 227 RTE_PTYPE_L4_NONFRAG; 228 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 229 RTE_PTYPE_L4_NONFRAG; 230 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 231 RTE_PTYPE_L4_FRAG; 232 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 233 RTE_PTYPE_L4_FRAG; 234 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 235 RTE_PTYPE_L4_TCP; 236 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 237 RTE_PTYPE_L4_TCP; 238 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 239 RTE_PTYPE_L4_TCP; 240 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 241 RTE_PTYPE_L4_TCP; 242 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 243 RTE_PTYPE_L4_TCP; 244 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 245 RTE_PTYPE_L4_TCP; 246 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 247 RTE_PTYPE_L4_UDP; 248 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 249 RTE_PTYPE_L4_UDP; 250 /* Tunneled - L3 */ 251 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 252 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 253 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 254 RTE_PTYPE_INNER_L4_NONFRAG; 255 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 256 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 257 RTE_PTYPE_INNER_L4_NONFRAG; 258 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 259 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 260 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L4_NONFRAG; 262 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 263 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L4_NONFRAG; 265 /* Tunneled - Fragmented */ 266 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 268 RTE_PTYPE_INNER_L4_FRAG; 269 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 270 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L4_FRAG; 272 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 273 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L4_FRAG; 275 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 276 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L4_FRAG; 278 /* Tunneled - TCP */ 279 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 280 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 281 RTE_PTYPE_INNER_L4_TCP; 282 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 283 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 284 RTE_PTYPE_INNER_L4_TCP; 285 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 286 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 287 RTE_PTYPE_INNER_L4_TCP; 288 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 289 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 290 RTE_PTYPE_INNER_L4_TCP; 291 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 292 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 293 RTE_PTYPE_INNER_L4_TCP; 294 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 295 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 296 RTE_PTYPE_INNER_L4_TCP; 297 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 298 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 299 RTE_PTYPE_INNER_L4_TCP; 300 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 301 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 302 RTE_PTYPE_INNER_L4_TCP; 303 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 304 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 305 RTE_PTYPE_INNER_L4_TCP; 306 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 307 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 308 RTE_PTYPE_INNER_L4_TCP; 309 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 310 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 311 RTE_PTYPE_INNER_L4_TCP; 312 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 313 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 314 RTE_PTYPE_INNER_L4_TCP; 315 /* Tunneled - UDP */ 316 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 317 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 318 RTE_PTYPE_INNER_L4_UDP; 319 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 320 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 321 RTE_PTYPE_INNER_L4_UDP; 322 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 323 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 324 RTE_PTYPE_INNER_L4_UDP; 325 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 326 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 327 RTE_PTYPE_INNER_L4_UDP; 328 } 329 330 /** 331 * Build a table to translate packet to checksum type of Verbs. 332 */ 333 void 334 mlx5_set_cksum_table(void) 335 { 336 unsigned int i; 337 uint8_t v; 338 339 /* 340 * The index should have: 341 * bit[0] = PKT_TX_TCP_SEG 342 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 343 * bit[4] = PKT_TX_IP_CKSUM 344 * bit[8] = PKT_TX_OUTER_IP_CKSUM 345 * bit[9] = tunnel 346 */ 347 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 348 v = 0; 349 if (i & (1 << 9)) { 350 /* Tunneled packet. */ 351 if (i & (1 << 8)) /* Outer IP. */ 352 v |= MLX5_ETH_WQE_L3_CSUM; 353 if (i & (1 << 4)) /* Inner IP. */ 354 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 355 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 356 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 357 } else { 358 /* No tunnel. */ 359 if (i & (1 << 4)) /* IP. */ 360 v |= MLX5_ETH_WQE_L3_CSUM; 361 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 362 v |= MLX5_ETH_WQE_L4_CSUM; 363 } 364 mlx5_cksum_table[i] = v; 365 } 366 } 367 368 /** 369 * Build a table to translate packet type of mbuf to SWP type of Verbs. 370 */ 371 void 372 mlx5_set_swp_types_table(void) 373 { 374 unsigned int i; 375 uint8_t v; 376 377 /* 378 * The index should have: 379 * bit[0:1] = PKT_TX_L4_MASK 380 * bit[4] = PKT_TX_IPV6 381 * bit[8] = PKT_TX_OUTER_IPV6 382 * bit[9] = PKT_TX_OUTER_UDP 383 */ 384 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 385 v = 0; 386 if (i & (1 << 8)) 387 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 388 if (i & (1 << 9)) 389 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 390 if (i & (1 << 4)) 391 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 392 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 393 v |= MLX5_ETH_WQE_L4_INNER_UDP; 394 mlx5_swp_types_table[i] = v; 395 } 396 } 397 398 /** 399 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 400 * Flags must be preliminary initialized to zero. 401 * 402 * @param loc 403 * Pointer to burst routine local context. 404 * @param swp_flags 405 * Pointer to store Software Parser flags 406 * @param olx 407 * Configured Tx offloads mask. It is fully defined at 408 * compile time and may be used for optimization. 409 * 410 * @return 411 * Software Parser offsets packed in dword. 412 * Software Parser flags are set by pointer. 413 */ 414 static __rte_always_inline uint32_t 415 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 416 uint8_t *swp_flags, 417 unsigned int olx) 418 { 419 uint64_t ol, tunnel; 420 unsigned int idx, off; 421 uint32_t set; 422 423 if (!MLX5_TXOFF_CONFIG(SWP)) 424 return 0; 425 ol = loc->mbuf->ol_flags; 426 tunnel = ol & PKT_TX_TUNNEL_MASK; 427 /* 428 * Check whether Software Parser is required. 429 * Only customized tunnels may ask for. 430 */ 431 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 432 return 0; 433 /* 434 * The index should have: 435 * bit[0:1] = PKT_TX_L4_MASK 436 * bit[4] = PKT_TX_IPV6 437 * bit[8] = PKT_TX_OUTER_IPV6 438 * bit[9] = PKT_TX_OUTER_UDP 439 */ 440 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 441 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 442 *swp_flags = mlx5_swp_types_table[idx]; 443 /* 444 * Set offsets for SW parser. Since ConnectX-5, SW parser just 445 * complements HW parser. SW parser starts to engage only if HW parser 446 * can't reach a header. For the older devices, HW parser will not kick 447 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 448 * should be set regardless of HW offload. 449 */ 450 off = loc->mbuf->outer_l2_len; 451 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 452 off += sizeof(struct rte_vlan_hdr); 453 set = (off >> 1) << 8; /* Outer L3 offset. */ 454 off += loc->mbuf->outer_l3_len; 455 if (tunnel == PKT_TX_TUNNEL_UDP) 456 set |= off >> 1; /* Outer L4 offset. */ 457 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 458 const uint64_t csum = ol & PKT_TX_L4_MASK; 459 off += loc->mbuf->l2_len; 460 set |= (off >> 1) << 24; /* Inner L3 offset. */ 461 if (csum == PKT_TX_TCP_CKSUM || 462 csum == PKT_TX_UDP_CKSUM || 463 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 464 off += loc->mbuf->l3_len; 465 set |= (off >> 1) << 16; /* Inner L4 offset. */ 466 } 467 } 468 set = rte_cpu_to_le_32(set); 469 return set; 470 } 471 472 /** 473 * Convert the Checksum offloads to Verbs. 474 * 475 * @param buf 476 * Pointer to the mbuf. 477 * 478 * @return 479 * Converted checksum flags. 480 */ 481 static __rte_always_inline uint8_t 482 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 483 { 484 uint32_t idx; 485 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 486 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 487 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 488 489 /* 490 * The index should have: 491 * bit[0] = PKT_TX_TCP_SEG 492 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 493 * bit[4] = PKT_TX_IP_CKSUM 494 * bit[8] = PKT_TX_OUTER_IP_CKSUM 495 * bit[9] = tunnel 496 */ 497 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 498 return mlx5_cksum_table[idx]; 499 } 500 501 /** 502 * Internal function to compute the number of used descriptors in an RX queue 503 * 504 * @param rxq 505 * The Rx queue. 506 * 507 * @return 508 * The number of used rx descriptor. 509 */ 510 static uint32_t 511 rx_queue_count(struct mlx5_rxq_data *rxq) 512 { 513 struct rxq_zip *zip = &rxq->zip; 514 volatile struct mlx5_cqe *cqe; 515 const unsigned int cqe_n = (1 << rxq->cqe_n); 516 const unsigned int sges_n = (1 << rxq->sges_n); 517 const unsigned int elts_n = (1 << rxq->elts_n); 518 const unsigned int strd_n = (1 << rxq->strd_num_n); 519 const unsigned int cqe_cnt = cqe_n - 1; 520 unsigned int cq_ci, used; 521 522 /* if we are processing a compressed cqe */ 523 if (zip->ai) { 524 used = zip->cqe_cnt - zip->ai; 525 cq_ci = zip->cq_ci; 526 } else { 527 used = 0; 528 cq_ci = rxq->cq_ci; 529 } 530 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 531 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 532 int8_t op_own; 533 unsigned int n; 534 535 op_own = cqe->op_own; 536 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 537 n = rte_be_to_cpu_32(cqe->byte_cnt); 538 else 539 n = 1; 540 cq_ci += n; 541 used += n; 542 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 543 } 544 used = RTE_MIN(used * sges_n, elts_n * strd_n); 545 return used; 546 } 547 548 /** 549 * DPDK callback to check the status of a rx descriptor. 550 * 551 * @param rx_queue 552 * The Rx queue. 553 * @param[in] offset 554 * The index of the descriptor in the ring. 555 * 556 * @return 557 * The status of the tx descriptor. 558 */ 559 int 560 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 561 { 562 struct mlx5_rxq_data *rxq = rx_queue; 563 struct mlx5_rxq_ctrl *rxq_ctrl = 564 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 565 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 566 567 if (dev->rx_pkt_burst == NULL || 568 dev->rx_pkt_burst == removed_rx_burst) { 569 rte_errno = ENOTSUP; 570 return -rte_errno; 571 } 572 if (offset >= (1 << rxq->cqe_n)) { 573 rte_errno = EINVAL; 574 return -rte_errno; 575 } 576 if (offset < rx_queue_count(rxq)) 577 return RTE_ETH_RX_DESC_DONE; 578 return RTE_ETH_RX_DESC_AVAIL; 579 } 580 581 /** 582 * DPDK callback to get the RX queue information 583 * 584 * @param dev 585 * Pointer to the device structure. 586 * 587 * @param rx_queue_id 588 * Rx queue identificator. 589 * 590 * @param qinfo 591 * Pointer to the RX queue information structure. 592 * 593 * @return 594 * None. 595 */ 596 597 void 598 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 599 struct rte_eth_rxq_info *qinfo) 600 { 601 struct mlx5_priv *priv = dev->data->dev_private; 602 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; 603 struct mlx5_rxq_ctrl *rxq_ctrl = 604 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 605 606 if (!rxq) 607 return; 608 qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ? 609 rxq->mprq_mp : rxq->mp; 610 qinfo->conf.rx_thresh.pthresh = 0; 611 qinfo->conf.rx_thresh.hthresh = 0; 612 qinfo->conf.rx_thresh.wthresh = 0; 613 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 614 qinfo->conf.rx_drop_en = 1; 615 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; 616 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 617 qinfo->scattered_rx = dev->data->scattered_rx; 618 qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ? 619 (1 << rxq->elts_n) * (1 << rxq->strd_num_n) : 620 (1 << rxq->elts_n); 621 } 622 623 /** 624 * DPDK callback to get the RX packet burst mode information 625 * 626 * @param dev 627 * Pointer to the device structure. 628 * 629 * @param rx_queue_id 630 * Rx queue identificatior. 631 * 632 * @param mode 633 * Pointer to the burts mode information. 634 * 635 * @return 636 * 0 as success, -EINVAL as failure. 637 */ 638 639 int 640 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 641 uint16_t rx_queue_id __rte_unused, 642 struct rte_eth_burst_mode *mode) 643 { 644 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 645 struct mlx5_priv *priv = dev->data->dev_private; 646 struct mlx5_rxq_data *rxq; 647 648 rxq = (*priv->rxqs)[rx_queue_id]; 649 if (!rxq) { 650 rte_errno = EINVAL; 651 return -rte_errno; 652 } 653 if (pkt_burst == mlx5_rx_burst) { 654 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 655 } else if (pkt_burst == mlx5_rx_burst_mprq) { 656 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 657 } else if (pkt_burst == mlx5_rx_burst_vec) { 658 #if defined RTE_ARCH_X86_64 659 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 660 #elif defined RTE_ARCH_ARM64 661 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 662 #elif defined RTE_ARCH_PPC_64 663 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 664 #else 665 return -EINVAL; 666 #endif 667 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { 668 #if defined RTE_ARCH_X86_64 669 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); 670 #elif defined RTE_ARCH_ARM64 671 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); 672 #elif defined RTE_ARCH_PPC_64 673 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); 674 #else 675 return -EINVAL; 676 #endif 677 } else { 678 return -EINVAL; 679 } 680 return 0; 681 } 682 683 /** 684 * DPDK callback to get the number of used descriptors in a RX queue 685 * 686 * @param dev 687 * Pointer to the device structure. 688 * 689 * @param rx_queue_id 690 * The Rx queue. 691 * 692 * @return 693 * The number of used rx descriptor. 694 * -EINVAL if the queue is invalid 695 */ 696 uint32_t 697 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 698 { 699 struct mlx5_priv *priv = dev->data->dev_private; 700 struct mlx5_rxq_data *rxq; 701 702 if (dev->rx_pkt_burst == NULL || 703 dev->rx_pkt_burst == removed_rx_burst) { 704 rte_errno = ENOTSUP; 705 return -rte_errno; 706 } 707 rxq = (*priv->rxqs)[rx_queue_id]; 708 if (!rxq) { 709 rte_errno = EINVAL; 710 return -rte_errno; 711 } 712 return rx_queue_count(rxq); 713 } 714 715 #define MLX5_SYSTEM_LOG_DIR "/var/log" 716 /** 717 * Dump debug information to log file. 718 * 719 * @param fname 720 * The file name. 721 * @param hex_title 722 * If not NULL this string is printed as a header to the output 723 * and the output will be in hexadecimal view. 724 * @param buf 725 * This is the buffer address to print out. 726 * @param len 727 * The number of bytes to dump out. 728 */ 729 void 730 mlx5_dump_debug_information(const char *fname, const char *hex_title, 731 const void *buf, unsigned int hex_len) 732 { 733 FILE *fd; 734 735 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 736 fd = fopen(path, "a+"); 737 if (!fd) { 738 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 739 MKSTR(path2, "./%s", fname); 740 fd = fopen(path2, "a+"); 741 if (!fd) { 742 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 743 return; 744 } 745 DRV_LOG(INFO, "New debug dump in file %s", path2); 746 } else { 747 DRV_LOG(INFO, "New debug dump in file %s", path); 748 } 749 if (hex_title) 750 rte_hexdump(fd, hex_title, buf, hex_len); 751 else 752 fprintf(fd, "%s", (const char *)buf); 753 fprintf(fd, "\n\n\n"); 754 fclose(fd); 755 } 756 757 /** 758 * Move QP from error state to running state and initialize indexes. 759 * 760 * @param txq_ctrl 761 * Pointer to TX queue control structure. 762 * 763 * @return 764 * 0 on success, else -1. 765 */ 766 static int 767 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 768 { 769 struct mlx5_mp_arg_queue_state_modify sm = { 770 .is_wq = 0, 771 .queue_id = txq_ctrl->txq.idx, 772 }; 773 774 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 775 return -1; 776 txq_ctrl->txq.wqe_ci = 0; 777 txq_ctrl->txq.wqe_pi = 0; 778 txq_ctrl->txq.elts_comp = 0; 779 return 0; 780 } 781 782 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 783 static int 784 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 785 { 786 static const uint8_t magic[] = "seen"; 787 int ret = 1; 788 unsigned int i; 789 790 for (i = 0; i < sizeof(magic); ++i) 791 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 792 ret = 0; 793 err_cqe->rsvd1[i] = magic[i]; 794 } 795 return ret; 796 } 797 798 /** 799 * Handle error CQE. 800 * 801 * @param txq 802 * Pointer to TX queue structure. 803 * @param error_cqe 804 * Pointer to the error CQE. 805 * 806 * @return 807 * Negative value if queue recovery failed, otherwise 808 * the error completion entry is handled successfully. 809 */ 810 static int 811 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, 812 volatile struct mlx5_err_cqe *err_cqe) 813 { 814 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 815 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 816 struct mlx5_txq_ctrl *txq_ctrl = 817 container_of(txq, struct mlx5_txq_ctrl, txq); 818 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 819 int seen = check_err_cqe_seen(err_cqe); 820 821 if (!seen && txq_ctrl->dump_file_n < 822 txq_ctrl->priv->config.max_dump_files_num) { 823 MKSTR(err_str, "Unexpected CQE error syndrome " 824 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 825 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 826 txq->cqe_s, txq->qp_num_8s >> 8, 827 rte_be_to_cpu_16(err_cqe->wqe_counter), 828 txq->wqe_ci, txq->cq_ci); 829 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 830 PORT_ID(txq_ctrl->priv), txq->idx, 831 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 832 mlx5_dump_debug_information(name, NULL, err_str, 0); 833 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 834 (const void *)((uintptr_t) 835 txq->cqes), 836 sizeof(*err_cqe) * 837 (1 << txq->cqe_n)); 838 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 839 (const void *)((uintptr_t) 840 txq->wqes), 841 MLX5_WQE_SIZE * 842 (1 << txq->wqe_n)); 843 txq_ctrl->dump_file_n++; 844 } 845 if (!seen) 846 /* 847 * Count errors in WQEs units. 848 * Later it can be improved to count error packets, 849 * for example, by SQ parsing to find how much packets 850 * should be counted for each WQE. 851 */ 852 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 853 new_wqe_pi) & wqe_m; 854 if (tx_recover_qp(txq_ctrl)) { 855 /* Recovering failed - retry later on the same WQE. */ 856 return -1; 857 } 858 /* Release all the remaining buffers. */ 859 txq_free_elts(txq_ctrl); 860 } 861 return 0; 862 } 863 864 /** 865 * Translate RX completion flags to packet type. 866 * 867 * @param[in] rxq 868 * Pointer to RX queue structure. 869 * @param[in] cqe 870 * Pointer to CQE. 871 * 872 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 873 * 874 * @return 875 * Packet type for struct rte_mbuf. 876 */ 877 static inline uint32_t 878 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 879 volatile struct mlx5_mini_cqe8 *mcqe) 880 { 881 uint8_t idx; 882 uint8_t ptype; 883 uint8_t pinfo = (cqe->pkt_info & 0x3) << 6; 884 885 /* Get l3/l4 header from mini-CQE in case L3/L4 format*/ 886 if (mcqe == NULL || 887 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 888 ptype = (cqe->hdr_type_etc & 0xfc00) >> 10; 889 else 890 ptype = mcqe->hdr_type >> 2; 891 /* 892 * The index to the array should have: 893 * bit[1:0] = l3_hdr_type 894 * bit[4:2] = l4_hdr_type 895 * bit[5] = ip_frag 896 * bit[6] = tunneled 897 * bit[7] = outer_l3_type 898 */ 899 idx = pinfo | ptype; 900 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 901 } 902 903 /** 904 * Initialize Rx WQ and indexes. 905 * 906 * @param[in] rxq 907 * Pointer to RX queue structure. 908 */ 909 void 910 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 911 { 912 const unsigned int wqe_n = 1 << rxq->elts_n; 913 unsigned int i; 914 915 for (i = 0; (i != wqe_n); ++i) { 916 volatile struct mlx5_wqe_data_seg *scat; 917 uintptr_t addr; 918 uint32_t byte_count; 919 920 if (mlx5_rxq_mprq_enabled(rxq)) { 921 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 922 923 scat = &((volatile struct mlx5_wqe_mprq *) 924 rxq->wqes)[i].dseg; 925 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 926 1 << rxq->strd_num_n); 927 byte_count = (1 << rxq->strd_sz_n) * 928 (1 << rxq->strd_num_n); 929 } else { 930 struct rte_mbuf *buf = (*rxq->elts)[i]; 931 932 scat = &((volatile struct mlx5_wqe_data_seg *) 933 rxq->wqes)[i]; 934 addr = rte_pktmbuf_mtod(buf, uintptr_t); 935 byte_count = DATA_LEN(buf); 936 } 937 /* scat->addr must be able to store a pointer. */ 938 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 939 *scat = (struct mlx5_wqe_data_seg){ 940 .addr = rte_cpu_to_be_64(addr), 941 .byte_count = rte_cpu_to_be_32(byte_count), 942 .lkey = mlx5_rx_addr2mr(rxq, addr), 943 }; 944 } 945 rxq->consumed_strd = 0; 946 rxq->decompressed = 0; 947 rxq->rq_pi = 0; 948 rxq->zip = (struct rxq_zip){ 949 .ai = 0, 950 }; 951 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? 952 (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0; 953 /* Update doorbell counter. */ 954 rxq->rq_ci = wqe_n >> rxq->sges_n; 955 rte_io_wmb(); 956 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 957 } 958 959 /** 960 * Modify a Verbs/DevX queue state. 961 * This must be called from the primary process. 962 * 963 * @param dev 964 * Pointer to Ethernet device. 965 * @param sm 966 * State modify request parameters. 967 * 968 * @return 969 * 0 in case of success else non-zero value and rte_errno is set. 970 */ 971 int 972 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 973 const struct mlx5_mp_arg_queue_state_modify *sm) 974 { 975 int ret; 976 struct mlx5_priv *priv = dev->data->dev_private; 977 978 if (sm->is_wq) { 979 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 980 struct mlx5_rxq_ctrl *rxq_ctrl = 981 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 982 983 ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, sm->state); 984 if (ret) { 985 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 986 sm->state, strerror(errno)); 987 rte_errno = errno; 988 return ret; 989 } 990 } else { 991 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 992 struct mlx5_txq_ctrl *txq_ctrl = 993 container_of(txq, struct mlx5_txq_ctrl, txq); 994 995 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 996 MLX5_TXQ_MOD_ERR2RDY, 997 (uint8_t)priv->dev_port); 998 if (ret) 999 return ret; 1000 } 1001 return 0; 1002 } 1003 1004 /** 1005 * Modify a Verbs queue state. 1006 * 1007 * @param dev 1008 * Pointer to Ethernet device. 1009 * @param sm 1010 * State modify request parameters. 1011 * 1012 * @return 1013 * 0 in case of success else non-zero value. 1014 */ 1015 static int 1016 mlx5_queue_state_modify(struct rte_eth_dev *dev, 1017 struct mlx5_mp_arg_queue_state_modify *sm) 1018 { 1019 struct mlx5_priv *priv = dev->data->dev_private; 1020 int ret = 0; 1021 1022 switch (rte_eal_process_type()) { 1023 case RTE_PROC_PRIMARY: 1024 ret = mlx5_queue_state_modify_primary(dev, sm); 1025 break; 1026 case RTE_PROC_SECONDARY: 1027 ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); 1028 break; 1029 default: 1030 break; 1031 } 1032 return ret; 1033 } 1034 1035 /** 1036 * Handle a Rx error. 1037 * The function inserts the RQ state to reset when the first error CQE is 1038 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 1039 * it moves the RQ state to ready and initializes the RQ. 1040 * Next CQE identification and error counting are in the caller responsibility. 1041 * 1042 * @param[in] rxq 1043 * Pointer to RX queue structure. 1044 * @param[in] vec 1045 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 1046 * 0 when called from non-vectorized Rx burst. 1047 * 1048 * @return 1049 * -1 in case of recovery error, otherwise the CQE status. 1050 */ 1051 int 1052 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 1053 { 1054 const uint16_t cqe_n = 1 << rxq->cqe_n; 1055 const uint16_t cqe_mask = cqe_n - 1; 1056 const uint16_t wqe_n = 1 << rxq->elts_n; 1057 const uint16_t strd_n = 1 << rxq->strd_num_n; 1058 struct mlx5_rxq_ctrl *rxq_ctrl = 1059 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 1060 union { 1061 volatile struct mlx5_cqe *cqe; 1062 volatile struct mlx5_err_cqe *err_cqe; 1063 } u = { 1064 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 1065 }; 1066 struct mlx5_mp_arg_queue_state_modify sm; 1067 int ret; 1068 1069 switch (rxq->err_state) { 1070 case MLX5_RXQ_ERR_STATE_NO_ERROR: 1071 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 1072 /* Fall-through */ 1073 case MLX5_RXQ_ERR_STATE_NEED_RESET: 1074 sm.is_wq = 1; 1075 sm.queue_id = rxq->idx; 1076 sm.state = IBV_WQS_RESET; 1077 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 1078 return -1; 1079 if (rxq_ctrl->dump_file_n < 1080 rxq_ctrl->priv->config.max_dump_files_num) { 1081 MKSTR(err_str, "Unexpected CQE error syndrome " 1082 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 1083 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 1084 rxq->cqn, rxq_ctrl->wqn, 1085 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 1086 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 1087 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 1088 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 1089 mlx5_dump_debug_information(name, NULL, err_str, 0); 1090 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 1091 (const void *)((uintptr_t) 1092 rxq->cqes), 1093 sizeof(*u.cqe) * cqe_n); 1094 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 1095 (const void *)((uintptr_t) 1096 rxq->wqes), 1097 16 * wqe_n); 1098 rxq_ctrl->dump_file_n++; 1099 } 1100 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 1101 /* Fall-through */ 1102 case MLX5_RXQ_ERR_STATE_NEED_READY: 1103 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1104 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1105 rte_io_wmb(); 1106 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1107 rte_io_wmb(); 1108 /* 1109 * The RQ consumer index must be zeroed while moving 1110 * from RESET state to RDY state. 1111 */ 1112 *rxq->rq_db = rte_cpu_to_be_32(0); 1113 rte_io_wmb(); 1114 sm.is_wq = 1; 1115 sm.queue_id = rxq->idx; 1116 sm.state = IBV_WQS_RDY; 1117 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1118 &sm)) 1119 return -1; 1120 if (vec) { 1121 const uint32_t elts_n = 1122 mlx5_rxq_mprq_enabled(rxq) ? 1123 wqe_n * strd_n : wqe_n; 1124 const uint32_t e_mask = elts_n - 1; 1125 uint32_t elts_ci = 1126 mlx5_rxq_mprq_enabled(rxq) ? 1127 rxq->elts_ci : rxq->rq_ci; 1128 uint32_t elt_idx; 1129 struct rte_mbuf **elt; 1130 int i; 1131 unsigned int n = elts_n - (elts_ci - 1132 rxq->rq_pi); 1133 1134 for (i = 0; i < (int)n; ++i) { 1135 elt_idx = (elts_ci + i) & e_mask; 1136 elt = &(*rxq->elts)[elt_idx]; 1137 *elt = rte_mbuf_raw_alloc(rxq->mp); 1138 if (!*elt) { 1139 for (i--; i >= 0; --i) { 1140 elt_idx = (elts_ci + 1141 i) & elts_n; 1142 elt = &(*rxq->elts) 1143 [elt_idx]; 1144 rte_pktmbuf_free_seg 1145 (*elt); 1146 } 1147 return -1; 1148 } 1149 } 1150 for (i = 0; i < (int)elts_n; ++i) { 1151 elt = &(*rxq->elts)[i]; 1152 DATA_LEN(*elt) = 1153 (uint16_t)((*elt)->buf_len - 1154 rte_pktmbuf_headroom(*elt)); 1155 } 1156 /* Padding with a fake mbuf for vec Rx. */ 1157 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1158 (*rxq->elts)[elts_n + i] = 1159 &rxq->fake_mbuf; 1160 } 1161 mlx5_rxq_initialize(rxq); 1162 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1163 } 1164 return ret; 1165 default: 1166 return -1; 1167 } 1168 } 1169 1170 /** 1171 * Get size of the next packet for a given CQE. For compressed CQEs, the 1172 * consumer index is updated only once all packets of the current one have 1173 * been processed. 1174 * 1175 * @param rxq 1176 * Pointer to RX queue. 1177 * @param cqe 1178 * CQE to process. 1179 * @param[out] mcqe 1180 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1181 * written. 1182 * 1183 * @return 1184 * 0 in case of empty CQE, otherwise the packet size in bytes. 1185 */ 1186 static inline int 1187 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1188 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1189 { 1190 struct rxq_zip *zip = &rxq->zip; 1191 uint16_t cqe_n = cqe_cnt + 1; 1192 int len; 1193 uint16_t idx, end; 1194 1195 do { 1196 len = 0; 1197 /* Process compressed data in the CQE and mini arrays. */ 1198 if (zip->ai) { 1199 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1200 (volatile struct mlx5_mini_cqe8 (*)[8]) 1201 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1202 cqe_cnt].pkt_info); 1203 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & 1204 rxq->byte_mask); 1205 *mcqe = &(*mc)[zip->ai & 7]; 1206 if ((++zip->ai & 7) == 0) { 1207 /* Invalidate consumed CQEs */ 1208 idx = zip->ca; 1209 end = zip->na; 1210 while (idx != end) { 1211 (*rxq->cqes)[idx & cqe_cnt].op_own = 1212 MLX5_CQE_INVALIDATE; 1213 ++idx; 1214 } 1215 /* 1216 * Increment consumer index to skip the number 1217 * of CQEs consumed. Hardware leaves holes in 1218 * the CQ ring for software use. 1219 */ 1220 zip->ca = zip->na; 1221 zip->na += 8; 1222 } 1223 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1224 /* Invalidate the rest */ 1225 idx = zip->ca; 1226 end = zip->cq_ci; 1227 1228 while (idx != end) { 1229 (*rxq->cqes)[idx & cqe_cnt].op_own = 1230 MLX5_CQE_INVALIDATE; 1231 ++idx; 1232 } 1233 rxq->cq_ci = zip->cq_ci; 1234 zip->ai = 0; 1235 } 1236 /* 1237 * No compressed data, get next CQE and verify if it is 1238 * compressed. 1239 */ 1240 } else { 1241 int ret; 1242 int8_t op_own; 1243 uint32_t cq_ci; 1244 1245 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1246 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1247 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1248 rxq->err_state)) { 1249 ret = mlx5_rx_err_handle(rxq, 0); 1250 if (ret == MLX5_CQE_STATUS_HW_OWN || 1251 ret == -1) 1252 return 0; 1253 } else { 1254 return 0; 1255 } 1256 } 1257 /* 1258 * Introduce the local variable to have queue cq_ci 1259 * index in queue structure always consistent with 1260 * actual CQE boundary (not pointing to the middle 1261 * of compressed CQE session). 1262 */ 1263 cq_ci = rxq->cq_ci + 1; 1264 op_own = cqe->op_own; 1265 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1266 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1267 (volatile struct mlx5_mini_cqe8 (*)[8]) 1268 (uintptr_t)(&(*rxq->cqes) 1269 [cq_ci & cqe_cnt].pkt_info); 1270 1271 /* Fix endianness. */ 1272 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1273 /* 1274 * Current mini array position is the one 1275 * returned by check_cqe64(). 1276 * 1277 * If completion comprises several mini arrays, 1278 * as a special case the second one is located 1279 * 7 CQEs after the initial CQE instead of 8 1280 * for subsequent ones. 1281 */ 1282 zip->ca = cq_ci; 1283 zip->na = zip->ca + 7; 1284 /* Compute the next non compressed CQE. */ 1285 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1286 /* Get packet size to return. */ 1287 len = rte_be_to_cpu_32((*mc)[0].byte_cnt & 1288 rxq->byte_mask); 1289 *mcqe = &(*mc)[0]; 1290 zip->ai = 1; 1291 /* Prefetch all to be invalidated */ 1292 idx = zip->ca; 1293 end = zip->cq_ci; 1294 while (idx != end) { 1295 rte_prefetch0(&(*rxq->cqes)[(idx) & 1296 cqe_cnt]); 1297 ++idx; 1298 } 1299 } else { 1300 rxq->cq_ci = cq_ci; 1301 len = rte_be_to_cpu_32(cqe->byte_cnt); 1302 } 1303 } 1304 if (unlikely(rxq->err_state)) { 1305 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1306 ++rxq->stats.idropped; 1307 } else { 1308 return len; 1309 } 1310 } while (1); 1311 } 1312 1313 /** 1314 * Translate RX completion flags to offload flags. 1315 * 1316 * @param[in] cqe 1317 * Pointer to CQE. 1318 * 1319 * @return 1320 * Offload flags (ol_flags) for struct rte_mbuf. 1321 */ 1322 static inline uint32_t 1323 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1324 { 1325 uint32_t ol_flags = 0; 1326 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1327 1328 ol_flags = 1329 TRANSPOSE(flags, 1330 MLX5_CQE_RX_L3_HDR_VALID, 1331 PKT_RX_IP_CKSUM_GOOD) | 1332 TRANSPOSE(flags, 1333 MLX5_CQE_RX_L4_HDR_VALID, 1334 PKT_RX_L4_CKSUM_GOOD); 1335 return ol_flags; 1336 } 1337 1338 /** 1339 * Fill in mbuf fields from RX completion flags. 1340 * Note that pkt->ol_flags should be initialized outside of this function. 1341 * 1342 * @param rxq 1343 * Pointer to RX queue. 1344 * @param pkt 1345 * mbuf to fill. 1346 * @param cqe 1347 * CQE to process. 1348 * @param rss_hash_res 1349 * Packet RSS Hash result. 1350 */ 1351 static inline void 1352 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1353 volatile struct mlx5_cqe *cqe, 1354 volatile struct mlx5_mini_cqe8 *mcqe) 1355 { 1356 /* Update packet information. */ 1357 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe); 1358 1359 if (rxq->rss_hash) { 1360 uint32_t rss_hash_res = 0; 1361 1362 /* If compressed, take hash result from mini-CQE. */ 1363 if (mcqe == NULL || 1364 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH) 1365 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1366 else 1367 rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result); 1368 if (rss_hash_res) { 1369 pkt->hash.rss = rss_hash_res; 1370 pkt->ol_flags |= PKT_RX_RSS_HASH; 1371 } 1372 } 1373 if (rxq->mark) { 1374 uint32_t mark = 0; 1375 1376 /* If compressed, take flow tag from mini-CQE. */ 1377 if (mcqe == NULL || 1378 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1379 mark = cqe->sop_drop_qpn; 1380 else 1381 mark = ((mcqe->byte_cnt_flow & 0xff) << 8) | 1382 (mcqe->flow_tag_high << 16); 1383 if (MLX5_FLOW_MARK_IS_VALID(mark)) { 1384 pkt->ol_flags |= PKT_RX_FDIR; 1385 if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { 1386 pkt->ol_flags |= PKT_RX_FDIR_ID; 1387 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1388 } 1389 } 1390 } 1391 if (rxq->dynf_meta && cqe->flow_table_metadata) { 1392 pkt->ol_flags |= rxq->flow_meta_mask; 1393 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = 1394 cqe->flow_table_metadata; 1395 } 1396 if (rxq->csum) 1397 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1398 if (rxq->vlan_strip) { 1399 bool vlan_strip; 1400 1401 if (mcqe == NULL || 1402 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1403 vlan_strip = cqe->hdr_type_etc & 1404 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1405 else 1406 vlan_strip = mcqe->hdr_type & 1407 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1408 if (vlan_strip) { 1409 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1410 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1411 } 1412 } 1413 if (rxq->hw_timestamp) { 1414 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 1415 1416 if (rxq->rt_timestamp) 1417 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 1418 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 1419 pkt->ol_flags |= rxq->timestamp_rx_flag; 1420 } 1421 } 1422 1423 /** 1424 * DPDK callback for RX. 1425 * 1426 * @param dpdk_rxq 1427 * Generic pointer to RX queue structure. 1428 * @param[out] pkts 1429 * Array to store received packets. 1430 * @param pkts_n 1431 * Maximum number of packets in array. 1432 * 1433 * @return 1434 * Number of packets successfully received (<= pkts_n). 1435 */ 1436 uint16_t 1437 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1438 { 1439 struct mlx5_rxq_data *rxq = dpdk_rxq; 1440 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1441 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1442 const unsigned int sges_n = rxq->sges_n; 1443 struct rte_mbuf *pkt = NULL; 1444 struct rte_mbuf *seg = NULL; 1445 volatile struct mlx5_cqe *cqe = 1446 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1447 unsigned int i = 0; 1448 unsigned int rq_ci = rxq->rq_ci << sges_n; 1449 int len = 0; /* keep its value across iterations. */ 1450 1451 while (pkts_n) { 1452 unsigned int idx = rq_ci & wqe_cnt; 1453 volatile struct mlx5_wqe_data_seg *wqe = 1454 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1455 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1456 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1457 1458 if (pkt) 1459 NEXT(seg) = rep; 1460 seg = rep; 1461 rte_prefetch0(seg); 1462 rte_prefetch0(cqe); 1463 rte_prefetch0(wqe); 1464 /* Allocate the buf from the same pool. */ 1465 rep = rte_mbuf_raw_alloc(seg->pool); 1466 if (unlikely(rep == NULL)) { 1467 ++rxq->stats.rx_nombuf; 1468 if (!pkt) { 1469 /* 1470 * no buffers before we even started, 1471 * bail out silently. 1472 */ 1473 break; 1474 } 1475 while (pkt != seg) { 1476 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 1477 rep = NEXT(pkt); 1478 NEXT(pkt) = NULL; 1479 NB_SEGS(pkt) = 1; 1480 rte_mbuf_raw_free(pkt); 1481 pkt = rep; 1482 } 1483 break; 1484 } 1485 if (!pkt) { 1486 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1487 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1488 if (!len) { 1489 rte_mbuf_raw_free(rep); 1490 break; 1491 } 1492 pkt = seg; 1493 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 1494 pkt->ol_flags &= EXT_ATTACHED_MBUF; 1495 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1496 if (rxq->crc_present) 1497 len -= RTE_ETHER_CRC_LEN; 1498 PKT_LEN(pkt) = len; 1499 if (cqe->lro_num_seg > 1) { 1500 mlx5_lro_update_hdr 1501 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1502 mcqe, rxq, len); 1503 pkt->ol_flags |= PKT_RX_LRO; 1504 pkt->tso_segsz = len / cqe->lro_num_seg; 1505 } 1506 } 1507 DATA_LEN(rep) = DATA_LEN(seg); 1508 PKT_LEN(rep) = PKT_LEN(seg); 1509 SET_DATA_OFF(rep, DATA_OFF(seg)); 1510 PORT(rep) = PORT(seg); 1511 (*rxq->elts)[idx] = rep; 1512 /* 1513 * Fill NIC descriptor with the new buffer. The lkey and size 1514 * of the buffers are already known, only the buffer address 1515 * changes. 1516 */ 1517 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1518 /* If there's only one MR, no need to replace LKey in WQE. */ 1519 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1520 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1521 if (len > DATA_LEN(seg)) { 1522 len -= DATA_LEN(seg); 1523 ++NB_SEGS(pkt); 1524 ++rq_ci; 1525 continue; 1526 } 1527 DATA_LEN(seg) = len; 1528 #ifdef MLX5_PMD_SOFT_COUNTERS 1529 /* Increment bytes counter. */ 1530 rxq->stats.ibytes += PKT_LEN(pkt); 1531 #endif 1532 /* Return packet. */ 1533 *(pkts++) = pkt; 1534 pkt = NULL; 1535 --pkts_n; 1536 ++i; 1537 /* Align consumer index to the next stride. */ 1538 rq_ci >>= sges_n; 1539 ++rq_ci; 1540 rq_ci <<= sges_n; 1541 } 1542 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1543 return 0; 1544 /* Update the consumer index. */ 1545 rxq->rq_ci = rq_ci >> sges_n; 1546 rte_io_wmb(); 1547 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1548 rte_io_wmb(); 1549 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1550 #ifdef MLX5_PMD_SOFT_COUNTERS 1551 /* Increment packets counter. */ 1552 rxq->stats.ipackets += i; 1553 #endif 1554 return i; 1555 } 1556 1557 /** 1558 * Update LRO packet TCP header. 1559 * The HW LRO feature doesn't update the TCP header after coalescing the 1560 * TCP segments but supplies information in CQE to fill it by SW. 1561 * 1562 * @param tcp 1563 * Pointer to the TCP header. 1564 * @param cqe 1565 * Pointer to the completion entry.. 1566 * @param phcsum 1567 * The L3 pseudo-header checksum. 1568 */ 1569 static inline void 1570 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1571 volatile struct mlx5_cqe *__rte_restrict cqe, 1572 uint32_t phcsum, uint8_t l4_type) 1573 { 1574 /* 1575 * The HW calculates only the TCP payload checksum, need to complete 1576 * the TCP header checksum and the L3 pseudo-header checksum. 1577 */ 1578 uint32_t csum = phcsum + cqe->csum; 1579 1580 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1581 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1582 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1583 tcp->recv_ack = cqe->lro_ack_seq_num; 1584 tcp->rx_win = cqe->lro_tcp_win; 1585 } 1586 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1587 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1588 tcp->cksum = 0; 1589 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1590 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1591 csum = (~csum) & 0xffff; 1592 if (csum == 0) 1593 csum = 0xffff; 1594 tcp->cksum = csum; 1595 } 1596 1597 /** 1598 * Update LRO packet headers. 1599 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1600 * TCP segments but supply information in CQE to fill it by SW. 1601 * 1602 * @param padd 1603 * The packet address. 1604 * @param cqe 1605 * Pointer to the completion entry.. 1606 * @param len 1607 * The packet length. 1608 */ 1609 static inline void 1610 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1611 volatile struct mlx5_cqe *__rte_restrict cqe, 1612 volatile struct mlx5_mini_cqe8 *mcqe, 1613 struct mlx5_rxq_data *rxq, uint32_t len) 1614 { 1615 union { 1616 struct rte_ether_hdr *eth; 1617 struct rte_vlan_hdr *vlan; 1618 struct rte_ipv4_hdr *ipv4; 1619 struct rte_ipv6_hdr *ipv6; 1620 struct rte_tcp_hdr *tcp; 1621 uint8_t *hdr; 1622 } h = { 1623 .hdr = padd, 1624 }; 1625 uint16_t proto = h.eth->ether_type; 1626 uint32_t phcsum; 1627 uint8_t l4_type; 1628 1629 h.eth++; 1630 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1631 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1632 proto = h.vlan->eth_proto; 1633 h.vlan++; 1634 } 1635 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1636 h.ipv4->time_to_live = cqe->lro_min_ttl; 1637 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1638 h.ipv4->hdr_checksum = 0; 1639 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1640 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1641 h.ipv4++; 1642 } else { 1643 h.ipv6->hop_limits = cqe->lro_min_ttl; 1644 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1645 sizeof(*h.ipv6)); 1646 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1647 h.ipv6++; 1648 } 1649 if (mcqe == NULL || 1650 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1651 l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1652 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1653 else 1654 l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) & 1655 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1656 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type); 1657 } 1658 1659 void 1660 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1661 { 1662 struct mlx5_mprq_buf *buf = opaque; 1663 1664 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 1665 rte_mempool_put(buf->mp, buf); 1666 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 1667 __ATOMIC_RELAXED) == 0)) { 1668 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1669 rte_mempool_put(buf->mp, buf); 1670 } 1671 } 1672 1673 void 1674 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1675 { 1676 mlx5_mprq_buf_free_cb(NULL, buf); 1677 } 1678 1679 /** 1680 * DPDK callback for RX with Multi-Packet RQ support. 1681 * 1682 * @param dpdk_rxq 1683 * Generic pointer to RX queue structure. 1684 * @param[out] pkts 1685 * Array to store received packets. 1686 * @param pkts_n 1687 * Maximum number of packets in array. 1688 * 1689 * @return 1690 * Number of packets successfully received (<= pkts_n). 1691 */ 1692 uint16_t 1693 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1694 { 1695 struct mlx5_rxq_data *rxq = dpdk_rxq; 1696 const uint32_t strd_n = 1 << rxq->strd_num_n; 1697 const uint32_t strd_sz = 1 << rxq->strd_sz_n; 1698 const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; 1699 const uint32_t wq_mask = (1 << rxq->elts_n) - 1; 1700 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1701 unsigned int i = 0; 1702 uint32_t rq_ci = rxq->rq_ci; 1703 uint16_t consumed_strd = rxq->consumed_strd; 1704 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1705 1706 while (i < pkts_n) { 1707 struct rte_mbuf *pkt; 1708 int ret; 1709 uint32_t len; 1710 uint16_t strd_cnt; 1711 uint16_t strd_idx; 1712 uint32_t byte_cnt; 1713 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1714 enum mlx5_rqx_code rxq_code; 1715 1716 if (consumed_strd == strd_n) { 1717 /* Replace WQE if the buffer is still in use. */ 1718 mprq_buf_replace(rxq, rq_ci & wq_mask); 1719 /* Advance to the next WQE. */ 1720 consumed_strd = 0; 1721 ++rq_ci; 1722 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1723 } 1724 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1725 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1726 if (!ret) 1727 break; 1728 byte_cnt = ret; 1729 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1730 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1731 if (rxq->crc_present) 1732 len -= RTE_ETHER_CRC_LEN; 1733 if (mcqe && 1734 rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1735 strd_cnt = (len / strd_sz) + !!(len % strd_sz); 1736 else 1737 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1738 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1739 MLX5_ASSERT(strd_cnt); 1740 consumed_strd += strd_cnt; 1741 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1742 continue; 1743 strd_idx = rte_be_to_cpu_16(mcqe == NULL ? 1744 cqe->wqe_counter : 1745 mcqe->stride_idx); 1746 MLX5_ASSERT(strd_idx < strd_n); 1747 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1748 wq_mask)); 1749 pkt = rte_pktmbuf_alloc(rxq->mp); 1750 if (unlikely(pkt == NULL)) { 1751 ++rxq->stats.rx_nombuf; 1752 break; 1753 } 1754 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1755 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1756 if (rxq->crc_present) 1757 len -= RTE_ETHER_CRC_LEN; 1758 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, 1759 strd_idx, strd_cnt); 1760 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 1761 rte_pktmbuf_free_seg(pkt); 1762 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 1763 ++rxq->stats.idropped; 1764 continue; 1765 } 1766 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 1767 ++rxq->stats.rx_nombuf; 1768 break; 1769 } 1770 } 1771 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1772 if (cqe->lro_num_seg > 1) { 1773 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), 1774 cqe, mcqe, rxq, len); 1775 pkt->ol_flags |= PKT_RX_LRO; 1776 pkt->tso_segsz = len / cqe->lro_num_seg; 1777 } 1778 PKT_LEN(pkt) = len; 1779 PORT(pkt) = rxq->port_id; 1780 #ifdef MLX5_PMD_SOFT_COUNTERS 1781 /* Increment bytes counter. */ 1782 rxq->stats.ibytes += PKT_LEN(pkt); 1783 #endif 1784 /* Return packet. */ 1785 *(pkts++) = pkt; 1786 ++i; 1787 } 1788 /* Update the consumer indexes. */ 1789 rxq->consumed_strd = consumed_strd; 1790 rte_io_wmb(); 1791 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1792 if (rq_ci != rxq->rq_ci) { 1793 rxq->rq_ci = rq_ci; 1794 rte_io_wmb(); 1795 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1796 } 1797 #ifdef MLX5_PMD_SOFT_COUNTERS 1798 /* Increment packets counter. */ 1799 rxq->stats.ipackets += i; 1800 #endif 1801 return i; 1802 } 1803 1804 /** 1805 * Dummy DPDK callback for TX. 1806 * 1807 * This function is used to temporarily replace the real callback during 1808 * unsafe control operations on the queue, or in case of error. 1809 * 1810 * @param dpdk_txq 1811 * Generic pointer to TX queue structure. 1812 * @param[in] pkts 1813 * Packets to transmit. 1814 * @param pkts_n 1815 * Number of packets in array. 1816 * 1817 * @return 1818 * Number of packets successfully transmitted (<= pkts_n). 1819 */ 1820 uint16_t 1821 removed_tx_burst(void *dpdk_txq __rte_unused, 1822 struct rte_mbuf **pkts __rte_unused, 1823 uint16_t pkts_n __rte_unused) 1824 { 1825 rte_mb(); 1826 return 0; 1827 } 1828 1829 /** 1830 * Dummy DPDK callback for RX. 1831 * 1832 * This function is used to temporarily replace the real callback during 1833 * unsafe control operations on the queue, or in case of error. 1834 * 1835 * @param dpdk_rxq 1836 * Generic pointer to RX queue structure. 1837 * @param[out] pkts 1838 * Array to store received packets. 1839 * @param pkts_n 1840 * Maximum number of packets in array. 1841 * 1842 * @return 1843 * Number of packets successfully received (<= pkts_n). 1844 */ 1845 uint16_t 1846 removed_rx_burst(void *dpdk_txq __rte_unused, 1847 struct rte_mbuf **pkts __rte_unused, 1848 uint16_t pkts_n __rte_unused) 1849 { 1850 rte_mb(); 1851 return 0; 1852 } 1853 1854 /* 1855 * Vectorized Rx/Tx routines are not compiled in when required vector 1856 * instructions are not supported on a target architecture. The following null 1857 * stubs are needed for linkage when those are not included outside of this file 1858 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1859 */ 1860 1861 __rte_weak uint16_t 1862 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1863 struct rte_mbuf **pkts __rte_unused, 1864 uint16_t pkts_n __rte_unused) 1865 { 1866 return 0; 1867 } 1868 1869 __rte_weak uint16_t 1870 mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused, 1871 struct rte_mbuf **pkts __rte_unused, 1872 uint16_t pkts_n __rte_unused) 1873 { 1874 return 0; 1875 } 1876 1877 __rte_weak int 1878 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1879 { 1880 return -ENOTSUP; 1881 } 1882 1883 __rte_weak int 1884 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1885 { 1886 return -ENOTSUP; 1887 } 1888 1889 /** 1890 * Free the mbufs from the linear array of pointers. 1891 * 1892 * @param txq 1893 * Pointer to Tx queue structure. 1894 * @param pkts 1895 * Pointer to array of packets to be free. 1896 * @param pkts_n 1897 * Number of packets to be freed. 1898 * @param olx 1899 * Configured Tx offloads mask. It is fully defined at 1900 * compile time and may be used for optimization. 1901 */ 1902 static __rte_always_inline void 1903 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 1904 struct rte_mbuf **__rte_restrict pkts, 1905 unsigned int pkts_n, 1906 unsigned int olx __rte_unused) 1907 { 1908 struct rte_mempool *pool = NULL; 1909 struct rte_mbuf **p_free = NULL; 1910 struct rte_mbuf *mbuf; 1911 unsigned int n_free = 0; 1912 1913 /* 1914 * The implemented algorithm eliminates 1915 * copying pointers to temporary array 1916 * for rte_mempool_put_bulk() calls. 1917 */ 1918 MLX5_ASSERT(pkts); 1919 MLX5_ASSERT(pkts_n); 1920 /* 1921 * Free mbufs directly to the pool in bulk 1922 * if fast free offload is engaged 1923 */ 1924 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 1925 mbuf = *pkts; 1926 pool = mbuf->pool; 1927 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 1928 return; 1929 } 1930 for (;;) { 1931 for (;;) { 1932 /* 1933 * Decrement mbuf reference counter, detach 1934 * indirect and external buffers if needed. 1935 */ 1936 mbuf = rte_pktmbuf_prefree_seg(*pkts); 1937 if (likely(mbuf != NULL)) { 1938 MLX5_ASSERT(mbuf == *pkts); 1939 if (likely(n_free != 0)) { 1940 if (unlikely(pool != mbuf->pool)) 1941 /* From different pool. */ 1942 break; 1943 } else { 1944 /* Start new scan array. */ 1945 pool = mbuf->pool; 1946 p_free = pkts; 1947 } 1948 ++n_free; 1949 ++pkts; 1950 --pkts_n; 1951 if (unlikely(pkts_n == 0)) { 1952 mbuf = NULL; 1953 break; 1954 } 1955 } else { 1956 /* 1957 * This happens if mbuf is still referenced. 1958 * We can't put it back to the pool, skip. 1959 */ 1960 ++pkts; 1961 --pkts_n; 1962 if (unlikely(n_free != 0)) 1963 /* There is some array to free.*/ 1964 break; 1965 if (unlikely(pkts_n == 0)) 1966 /* Last mbuf, nothing to free. */ 1967 return; 1968 } 1969 } 1970 for (;;) { 1971 /* 1972 * This loop is implemented to avoid multiple 1973 * inlining of rte_mempool_put_bulk(). 1974 */ 1975 MLX5_ASSERT(pool); 1976 MLX5_ASSERT(p_free); 1977 MLX5_ASSERT(n_free); 1978 /* 1979 * Free the array of pre-freed mbufs 1980 * belonging to the same memory pool. 1981 */ 1982 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 1983 if (unlikely(mbuf != NULL)) { 1984 /* There is the request to start new scan. */ 1985 pool = mbuf->pool; 1986 p_free = pkts++; 1987 n_free = 1; 1988 --pkts_n; 1989 if (likely(pkts_n != 0)) 1990 break; 1991 /* 1992 * This is the last mbuf to be freed. 1993 * Do one more loop iteration to complete. 1994 * This is rare case of the last unique mbuf. 1995 */ 1996 mbuf = NULL; 1997 continue; 1998 } 1999 if (likely(pkts_n == 0)) 2000 return; 2001 n_free = 0; 2002 break; 2003 } 2004 } 2005 } 2006 /* 2007 * No inline version to free buffers for optimal call 2008 * on the tx_burst completion. 2009 */ 2010 static __rte_noinline void 2011 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 2012 struct rte_mbuf **__rte_restrict pkts, 2013 unsigned int pkts_n, 2014 unsigned int olx __rte_unused) 2015 { 2016 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 2017 } 2018 2019 /** 2020 * Free the mbuf from the elts ring buffer till new tail. 2021 * 2022 * @param txq 2023 * Pointer to Tx queue structure. 2024 * @param tail 2025 * Index in elts to free up to, becomes new elts tail. 2026 * @param olx 2027 * Configured Tx offloads mask. It is fully defined at 2028 * compile time and may be used for optimization. 2029 */ 2030 static __rte_always_inline void 2031 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 2032 uint16_t tail, 2033 unsigned int olx __rte_unused) 2034 { 2035 uint16_t n_elts = tail - txq->elts_tail; 2036 2037 MLX5_ASSERT(n_elts); 2038 MLX5_ASSERT(n_elts <= txq->elts_s); 2039 /* 2040 * Implement a loop to support ring buffer wraparound 2041 * with single inlining of mlx5_tx_free_mbuf(). 2042 */ 2043 do { 2044 unsigned int part; 2045 2046 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 2047 part = RTE_MIN(part, n_elts); 2048 MLX5_ASSERT(part); 2049 MLX5_ASSERT(part <= txq->elts_s); 2050 mlx5_tx_free_mbuf(txq, 2051 &txq->elts[txq->elts_tail & txq->elts_m], 2052 part, olx); 2053 txq->elts_tail += part; 2054 n_elts -= part; 2055 } while (n_elts); 2056 } 2057 2058 /** 2059 * Store the mbuf being sent into elts ring buffer. 2060 * On Tx completion these mbufs will be freed. 2061 * 2062 * @param txq 2063 * Pointer to Tx queue structure. 2064 * @param pkts 2065 * Pointer to array of packets to be stored. 2066 * @param pkts_n 2067 * Number of packets to be stored. 2068 * @param olx 2069 * Configured Tx offloads mask. It is fully defined at 2070 * compile time and may be used for optimization. 2071 */ 2072 static __rte_always_inline void 2073 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 2074 struct rte_mbuf **__rte_restrict pkts, 2075 unsigned int pkts_n, 2076 unsigned int olx __rte_unused) 2077 { 2078 unsigned int part; 2079 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 2080 2081 MLX5_ASSERT(pkts); 2082 MLX5_ASSERT(pkts_n); 2083 part = txq->elts_s - (txq->elts_head & txq->elts_m); 2084 MLX5_ASSERT(part); 2085 MLX5_ASSERT(part <= txq->elts_s); 2086 /* This code is a good candidate for vectorizing with SIMD. */ 2087 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 2088 (void *)pkts, 2089 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 2090 txq->elts_head += pkts_n; 2091 if (unlikely(part < pkts_n)) 2092 /* The copy is wrapping around the elts array. */ 2093 rte_memcpy((void *)elts, (void *)(pkts + part), 2094 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2095 } 2096 2097 /** 2098 * Update completion queue consuming index via doorbell 2099 * and flush the completed data buffers. 2100 * 2101 * @param txq 2102 * Pointer to TX queue structure. 2103 * @param valid CQE pointer 2104 * if not NULL update txq->wqe_pi and flush the buffers 2105 * @param olx 2106 * Configured Tx offloads mask. It is fully defined at 2107 * compile time and may be used for optimization. 2108 */ 2109 static __rte_always_inline void 2110 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, 2111 volatile struct mlx5_cqe *last_cqe, 2112 unsigned int olx __rte_unused) 2113 { 2114 if (likely(last_cqe != NULL)) { 2115 uint16_t tail; 2116 2117 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 2118 tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; 2119 if (likely(tail != txq->elts_tail)) { 2120 mlx5_tx_free_elts(txq, tail, olx); 2121 MLX5_ASSERT(tail == txq->elts_tail); 2122 } 2123 } 2124 } 2125 2126 /** 2127 * Manage TX completions. This routine checks the CQ for 2128 * arrived CQEs, deduces the last accomplished WQE in SQ, 2129 * updates SQ producing index and frees all completed mbufs. 2130 * 2131 * @param txq 2132 * Pointer to TX queue structure. 2133 * @param olx 2134 * Configured Tx offloads mask. It is fully defined at 2135 * compile time and may be used for optimization. 2136 * 2137 * NOTE: not inlined intentionally, it makes tx_burst 2138 * routine smaller, simple and faster - from experiments. 2139 */ 2140 static void 2141 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 2142 unsigned int olx __rte_unused) 2143 { 2144 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2145 volatile struct mlx5_cqe *last_cqe = NULL; 2146 bool ring_doorbell = false; 2147 int ret; 2148 2149 do { 2150 volatile struct mlx5_cqe *cqe; 2151 2152 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2153 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2154 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2155 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2156 /* No new CQEs in completion queue. */ 2157 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 2158 break; 2159 } 2160 /* 2161 * Some error occurred, try to restart. 2162 * We have no barrier after WQE related Doorbell 2163 * written, make sure all writes are completed 2164 * here, before we might perform SQ reset. 2165 */ 2166 rte_wmb(); 2167 ret = mlx5_tx_error_cqe_handle 2168 (txq, (volatile struct mlx5_err_cqe *)cqe); 2169 if (unlikely(ret < 0)) { 2170 /* 2171 * Some error occurred on queue error 2172 * handling, we do not advance the index 2173 * here, allowing to retry on next call. 2174 */ 2175 return; 2176 } 2177 /* 2178 * We are going to fetch all entries with 2179 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. 2180 * The send queue is supposed to be empty. 2181 */ 2182 ring_doorbell = true; 2183 ++txq->cq_ci; 2184 txq->cq_pi = txq->cq_ci; 2185 last_cqe = NULL; 2186 continue; 2187 } 2188 /* Normal transmit completion. */ 2189 MLX5_ASSERT(txq->cq_ci != txq->cq_pi); 2190 #ifdef RTE_LIBRTE_MLX5_DEBUG 2191 MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == 2192 cqe->wqe_counter); 2193 #endif 2194 ring_doorbell = true; 2195 ++txq->cq_ci; 2196 last_cqe = cqe; 2197 /* 2198 * We have to restrict the amount of processed CQEs 2199 * in one tx_burst routine call. The CQ may be large 2200 * and many CQEs may be updated by the NIC in one 2201 * transaction. Buffers freeing is time consuming, 2202 * multiple iterations may introduce significant 2203 * latency. 2204 */ 2205 if (likely(--count == 0)) 2206 break; 2207 } while (true); 2208 if (likely(ring_doorbell)) { 2209 /* Ring doorbell to notify hardware. */ 2210 rte_compiler_barrier(); 2211 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2212 mlx5_tx_comp_flush(txq, last_cqe, olx); 2213 } 2214 } 2215 2216 /** 2217 * Check if the completion request flag should be set in the last WQE. 2218 * Both pushed mbufs and WQEs are monitored and the completion request 2219 * flag is set if any of thresholds is reached. 2220 * 2221 * @param txq 2222 * Pointer to TX queue structure. 2223 * @param loc 2224 * Pointer to burst routine local context. 2225 * @param olx 2226 * Configured Tx offloads mask. It is fully defined at 2227 * compile time and may be used for optimization. 2228 */ 2229 static __rte_always_inline void 2230 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 2231 struct mlx5_txq_local *__rte_restrict loc, 2232 unsigned int olx) 2233 { 2234 uint16_t head = txq->elts_head; 2235 unsigned int part; 2236 2237 part = MLX5_TXOFF_CONFIG(INLINE) ? 2238 0 : loc->pkts_sent - loc->pkts_copy; 2239 head += part; 2240 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2241 (MLX5_TXOFF_CONFIG(INLINE) && 2242 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2243 volatile struct mlx5_wqe *last = loc->wqe_last; 2244 2245 MLX5_ASSERT(last); 2246 txq->elts_comp = head; 2247 if (MLX5_TXOFF_CONFIG(INLINE)) 2248 txq->wqe_comp = txq->wqe_ci; 2249 /* Request unconditional completion on last WQE. */ 2250 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2251 MLX5_COMP_MODE_OFFSET); 2252 /* Save elts_head in dedicated free on completion queue. */ 2253 #ifdef RTE_LIBRTE_MLX5_DEBUG 2254 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 2255 (last->cseg.opcode >> 8) << 16; 2256 #else 2257 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 2258 #endif 2259 /* A CQE slot must always be available. */ 2260 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 2261 } 2262 } 2263 2264 /** 2265 * DPDK callback to check the status of a tx descriptor. 2266 * 2267 * @param tx_queue 2268 * The tx queue. 2269 * @param[in] offset 2270 * The index of the descriptor in the ring. 2271 * 2272 * @return 2273 * The status of the tx descriptor. 2274 */ 2275 int 2276 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2277 { 2278 struct mlx5_txq_data *__rte_restrict txq = tx_queue; 2279 uint16_t used; 2280 2281 mlx5_tx_handle_completion(txq, 0); 2282 used = txq->elts_head - txq->elts_tail; 2283 if (offset < used) 2284 return RTE_ETH_TX_DESC_FULL; 2285 return RTE_ETH_TX_DESC_DONE; 2286 } 2287 2288 /** 2289 * Build the Control Segment with specified opcode: 2290 * - MLX5_OPCODE_SEND 2291 * - MLX5_OPCODE_ENHANCED_MPSW 2292 * - MLX5_OPCODE_TSO 2293 * 2294 * @param txq 2295 * Pointer to TX queue structure. 2296 * @param loc 2297 * Pointer to burst routine local context. 2298 * @param wqe 2299 * Pointer to WQE to fill with built Control Segment. 2300 * @param ds 2301 * Supposed length of WQE in segments. 2302 * @param opcode 2303 * SQ WQE opcode to put into Control Segment. 2304 * @param olx 2305 * Configured Tx offloads mask. It is fully defined at 2306 * compile time and may be used for optimization. 2307 */ 2308 static __rte_always_inline void 2309 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 2310 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2311 struct mlx5_wqe *__rte_restrict wqe, 2312 unsigned int ds, 2313 unsigned int opcode, 2314 unsigned int olx __rte_unused) 2315 { 2316 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 2317 2318 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2319 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2320 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2321 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2322 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2323 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2324 MLX5_COMP_MODE_OFFSET); 2325 cs->misc = RTE_BE32(0); 2326 } 2327 2328 /** 2329 * Build the Synchronize Queue Segment with specified completion index. 2330 * 2331 * @param txq 2332 * Pointer to TX queue structure. 2333 * @param loc 2334 * Pointer to burst routine local context. 2335 * @param wqe 2336 * Pointer to WQE to fill with built Control Segment. 2337 * @param wci 2338 * Completion index in Clock Queue to wait. 2339 * @param olx 2340 * Configured Tx offloads mask. It is fully defined at 2341 * compile time and may be used for optimization. 2342 */ 2343 static __rte_always_inline void 2344 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 2345 struct mlx5_txq_local *restrict loc __rte_unused, 2346 struct mlx5_wqe *restrict wqe, 2347 unsigned int wci, 2348 unsigned int olx __rte_unused) 2349 { 2350 struct mlx5_wqe_qseg *qs; 2351 2352 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 2353 qs->max_index = rte_cpu_to_be_32(wci); 2354 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 2355 qs->reserved0 = RTE_BE32(0); 2356 qs->reserved1 = RTE_BE32(0); 2357 } 2358 2359 /** 2360 * Build the Ethernet Segment without inlined data. 2361 * Supports Software Parser, Checksums and VLAN 2362 * insertion Tx offload features. 2363 * 2364 * @param txq 2365 * Pointer to TX queue structure. 2366 * @param loc 2367 * Pointer to burst routine local context. 2368 * @param wqe 2369 * Pointer to WQE to fill with built Ethernet Segment. 2370 * @param olx 2371 * Configured Tx offloads mask. It is fully defined at 2372 * compile time and may be used for optimization. 2373 */ 2374 static __rte_always_inline void 2375 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2376 struct mlx5_txq_local *__rte_restrict loc, 2377 struct mlx5_wqe *__rte_restrict wqe, 2378 unsigned int olx) 2379 { 2380 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2381 uint32_t csum; 2382 2383 /* 2384 * Calculate and set check sum flags first, dword field 2385 * in segment may be shared with Software Parser flags. 2386 */ 2387 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2388 es->flags = rte_cpu_to_le_32(csum); 2389 /* 2390 * Calculate and set Software Parser offsets and flags. 2391 * These flags a set for custom UDP and IP tunnel packets. 2392 */ 2393 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2394 /* Fill metadata field if needed. */ 2395 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2396 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2397 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2398 /* Engage VLAN tag insertion feature if requested. */ 2399 if (MLX5_TXOFF_CONFIG(VLAN) && 2400 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2401 /* 2402 * We should get here only if device support 2403 * this feature correctly. 2404 */ 2405 MLX5_ASSERT(txq->vlan_en); 2406 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2407 loc->mbuf->vlan_tci); 2408 } else { 2409 es->inline_hdr = RTE_BE32(0); 2410 } 2411 } 2412 2413 /** 2414 * Build the Ethernet Segment with minimal inlined data 2415 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2416 * used to fill the gap in single WQEBB WQEs. 2417 * Supports Software Parser, Checksums and VLAN 2418 * insertion Tx offload features. 2419 * 2420 * @param txq 2421 * Pointer to TX queue structure. 2422 * @param loc 2423 * Pointer to burst routine local context. 2424 * @param wqe 2425 * Pointer to WQE to fill with built Ethernet Segment. 2426 * @param vlan 2427 * Length of VLAN tag insertion if any. 2428 * @param olx 2429 * Configured Tx offloads mask. It is fully defined at 2430 * compile time and may be used for optimization. 2431 */ 2432 static __rte_always_inline void 2433 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2434 struct mlx5_txq_local *__rte_restrict loc, 2435 struct mlx5_wqe *__rte_restrict wqe, 2436 unsigned int vlan, 2437 unsigned int olx) 2438 { 2439 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2440 uint32_t csum; 2441 uint8_t *psrc, *pdst; 2442 2443 /* 2444 * Calculate and set check sum flags first, dword field 2445 * in segment may be shared with Software Parser flags. 2446 */ 2447 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2448 es->flags = rte_cpu_to_le_32(csum); 2449 /* 2450 * Calculate and set Software Parser offsets and flags. 2451 * These flags a set for custom UDP and IP tunnel packets. 2452 */ 2453 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2454 /* Fill metadata field if needed. */ 2455 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2456 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2457 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2458 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2459 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2460 es->inline_data = *(unaligned_uint16_t *)psrc; 2461 psrc += sizeof(uint16_t); 2462 pdst = (uint8_t *)(es + 1); 2463 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2464 /* Implement VLAN tag insertion as part inline data. */ 2465 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2466 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2467 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2468 /* Insert VLAN ethertype + VLAN tag. */ 2469 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2470 ((RTE_ETHER_TYPE_VLAN << 16) | 2471 loc->mbuf->vlan_tci); 2472 pdst += sizeof(struct rte_vlan_hdr); 2473 /* Copy the rest two bytes from packet data. */ 2474 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2475 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2476 } else { 2477 /* Fill the gap in the title WQEBB with inline data. */ 2478 rte_mov16(pdst, psrc); 2479 } 2480 } 2481 2482 /** 2483 * Build the Ethernet Segment with entire packet 2484 * data inlining. Checks the boundary of WQEBB and 2485 * ring buffer wrapping, supports Software Parser, 2486 * Checksums and VLAN insertion Tx offload features. 2487 * 2488 * @param txq 2489 * Pointer to TX queue structure. 2490 * @param loc 2491 * Pointer to burst routine local context. 2492 * @param wqe 2493 * Pointer to WQE to fill with built Ethernet Segment. 2494 * @param vlan 2495 * Length of VLAN tag insertion if any. 2496 * @param inlen 2497 * Length of data to inline (VLAN included, if any). 2498 * @param tso 2499 * TSO flag, set mss field from the packet. 2500 * @param olx 2501 * Configured Tx offloads mask. It is fully defined at 2502 * compile time and may be used for optimization. 2503 * 2504 * @return 2505 * Pointer to the next Data Segment (aligned and wrapped around). 2506 */ 2507 static __rte_always_inline struct mlx5_wqe_dseg * 2508 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 2509 struct mlx5_txq_local *__rte_restrict loc, 2510 struct mlx5_wqe *__rte_restrict wqe, 2511 unsigned int vlan, 2512 unsigned int inlen, 2513 unsigned int tso, 2514 unsigned int olx) 2515 { 2516 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2517 uint32_t csum; 2518 uint8_t *psrc, *pdst; 2519 unsigned int part; 2520 2521 /* 2522 * Calculate and set check sum flags first, dword field 2523 * in segment may be shared with Software Parser flags. 2524 */ 2525 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2526 if (tso) { 2527 csum <<= 24; 2528 csum |= loc->mbuf->tso_segsz; 2529 es->flags = rte_cpu_to_be_32(csum); 2530 } else { 2531 es->flags = rte_cpu_to_le_32(csum); 2532 } 2533 /* 2534 * Calculate and set Software Parser offsets and flags. 2535 * These flags a set for custom UDP and IP tunnel packets. 2536 */ 2537 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2538 /* Fill metadata field if needed. */ 2539 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2540 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2541 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2542 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2543 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2544 es->inline_data = *(unaligned_uint16_t *)psrc; 2545 psrc += sizeof(uint16_t); 2546 pdst = (uint8_t *)(es + 1); 2547 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2548 /* Implement VLAN tag insertion as part inline data. */ 2549 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2550 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2551 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2552 /* Insert VLAN ethertype + VLAN tag. */ 2553 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2554 ((RTE_ETHER_TYPE_VLAN << 16) | 2555 loc->mbuf->vlan_tci); 2556 pdst += sizeof(struct rte_vlan_hdr); 2557 /* Copy the rest two bytes from packet data. */ 2558 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2559 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2560 psrc += sizeof(uint16_t); 2561 } else { 2562 /* Fill the gap in the title WQEBB with inline data. */ 2563 rte_mov16(pdst, psrc); 2564 psrc += sizeof(rte_v128u32_t); 2565 } 2566 pdst = (uint8_t *)(es + 2); 2567 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2568 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2569 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2570 if (!inlen) { 2571 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2572 return (struct mlx5_wqe_dseg *)pdst; 2573 } 2574 /* 2575 * The WQEBB space availability is checked by caller. 2576 * Here we should be aware of WQE ring buffer wraparound only. 2577 */ 2578 part = (uint8_t *)txq->wqes_end - pdst; 2579 part = RTE_MIN(part, inlen); 2580 do { 2581 rte_memcpy(pdst, psrc, part); 2582 inlen -= part; 2583 if (likely(!inlen)) { 2584 /* 2585 * If return value is not used by the caller 2586 * the code below will be optimized out. 2587 */ 2588 pdst += part; 2589 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2590 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2591 pdst = (uint8_t *)txq->wqes; 2592 return (struct mlx5_wqe_dseg *)pdst; 2593 } 2594 pdst = (uint8_t *)txq->wqes; 2595 psrc += part; 2596 part = inlen; 2597 } while (true); 2598 } 2599 2600 /** 2601 * Copy data from chain of mbuf to the specified linear buffer. 2602 * Checksums and VLAN insertion Tx offload features. If data 2603 * from some mbuf copied completely this mbuf is freed. Local 2604 * structure is used to keep the byte stream state. 2605 * 2606 * @param pdst 2607 * Pointer to the destination linear buffer. 2608 * @param loc 2609 * Pointer to burst routine local context. 2610 * @param len 2611 * Length of data to be copied. 2612 * @param must 2613 * Length of data to be copied ignoring no inline hint. 2614 * @param olx 2615 * Configured Tx offloads mask. It is fully defined at 2616 * compile time and may be used for optimization. 2617 * 2618 * @return 2619 * Number of actual copied data bytes. This is always greater than or 2620 * equal to must parameter and might be lesser than len in no inline 2621 * hint flag is encountered. 2622 */ 2623 static __rte_always_inline unsigned int 2624 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2625 struct mlx5_txq_local *__rte_restrict loc, 2626 unsigned int len, 2627 unsigned int must, 2628 unsigned int olx __rte_unused) 2629 { 2630 struct rte_mbuf *mbuf; 2631 unsigned int part, dlen, copy = 0; 2632 uint8_t *psrc; 2633 2634 MLX5_ASSERT(len); 2635 MLX5_ASSERT(must <= len); 2636 do { 2637 /* Allow zero length packets, must check first. */ 2638 dlen = rte_pktmbuf_data_len(loc->mbuf); 2639 if (dlen <= loc->mbuf_off) { 2640 /* Exhausted packet, just free. */ 2641 mbuf = loc->mbuf; 2642 loc->mbuf = mbuf->next; 2643 rte_pktmbuf_free_seg(mbuf); 2644 loc->mbuf_off = 0; 2645 MLX5_ASSERT(loc->mbuf_nseg > 1); 2646 MLX5_ASSERT(loc->mbuf); 2647 --loc->mbuf_nseg; 2648 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2649 unsigned int diff; 2650 2651 if (copy >= must) { 2652 /* 2653 * We already copied the minimal 2654 * requested amount of data. 2655 */ 2656 return copy; 2657 } 2658 diff = must - copy; 2659 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 2660 /* 2661 * Copy only the minimal required 2662 * part of the data buffer. 2663 */ 2664 len = diff; 2665 } 2666 } 2667 continue; 2668 } 2669 dlen -= loc->mbuf_off; 2670 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2671 loc->mbuf_off); 2672 part = RTE_MIN(len, dlen); 2673 rte_memcpy(pdst, psrc, part); 2674 copy += part; 2675 loc->mbuf_off += part; 2676 len -= part; 2677 if (!len) { 2678 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2679 loc->mbuf_off = 0; 2680 /* Exhausted packet, just free. */ 2681 mbuf = loc->mbuf; 2682 loc->mbuf = mbuf->next; 2683 rte_pktmbuf_free_seg(mbuf); 2684 loc->mbuf_off = 0; 2685 MLX5_ASSERT(loc->mbuf_nseg >= 1); 2686 --loc->mbuf_nseg; 2687 } 2688 return copy; 2689 } 2690 pdst += part; 2691 } while (true); 2692 } 2693 2694 /** 2695 * Build the Ethernet Segment with inlined data from 2696 * multi-segment packet. Checks the boundary of WQEBB 2697 * and ring buffer wrapping, supports Software Parser, 2698 * Checksums and VLAN insertion Tx offload features. 2699 * 2700 * @param txq 2701 * Pointer to TX queue structure. 2702 * @param loc 2703 * Pointer to burst routine local context. 2704 * @param wqe 2705 * Pointer to WQE to fill with built Ethernet Segment. 2706 * @param vlan 2707 * Length of VLAN tag insertion if any. 2708 * @param inlen 2709 * Length of data to inline (VLAN included, if any). 2710 * @param tso 2711 * TSO flag, set mss field from the packet. 2712 * @param olx 2713 * Configured Tx offloads mask. It is fully defined at 2714 * compile time and may be used for optimization. 2715 * 2716 * @return 2717 * Pointer to the next Data Segment (aligned and 2718 * possible NOT wrapped around - caller should do 2719 * wrapping check on its own). 2720 */ 2721 static __rte_always_inline struct mlx5_wqe_dseg * 2722 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 2723 struct mlx5_txq_local *__rte_restrict loc, 2724 struct mlx5_wqe *__rte_restrict wqe, 2725 unsigned int vlan, 2726 unsigned int inlen, 2727 unsigned int tso, 2728 unsigned int olx) 2729 { 2730 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2731 uint32_t csum; 2732 uint8_t *pdst; 2733 unsigned int part, tlen = 0; 2734 2735 /* 2736 * Calculate and set check sum flags first, uint32_t field 2737 * in segment may be shared with Software Parser flags. 2738 */ 2739 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2740 if (tso) { 2741 csum <<= 24; 2742 csum |= loc->mbuf->tso_segsz; 2743 es->flags = rte_cpu_to_be_32(csum); 2744 } else { 2745 es->flags = rte_cpu_to_le_32(csum); 2746 } 2747 /* 2748 * Calculate and set Software Parser offsets and flags. 2749 * These flags a set for custom UDP and IP tunnel packets. 2750 */ 2751 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2752 /* Fill metadata field if needed. */ 2753 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2754 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2755 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2756 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2757 pdst = (uint8_t *)&es->inline_data; 2758 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2759 /* Implement VLAN tag insertion as part inline data. */ 2760 mlx5_tx_mseg_memcpy(pdst, loc, 2761 2 * RTE_ETHER_ADDR_LEN, 2762 2 * RTE_ETHER_ADDR_LEN, olx); 2763 pdst += 2 * RTE_ETHER_ADDR_LEN; 2764 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2765 ((RTE_ETHER_TYPE_VLAN << 16) | 2766 loc->mbuf->vlan_tci); 2767 pdst += sizeof(struct rte_vlan_hdr); 2768 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2769 } 2770 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2771 /* 2772 * The WQEBB space availability is checked by caller. 2773 * Here we should be aware of WQE ring buffer wraparound only. 2774 */ 2775 part = (uint8_t *)txq->wqes_end - pdst; 2776 part = RTE_MIN(part, inlen - tlen); 2777 MLX5_ASSERT(part); 2778 do { 2779 unsigned int copy; 2780 2781 /* 2782 * Copying may be interrupted inside the routine 2783 * if run into no inline hint flag. 2784 */ 2785 copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); 2786 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 2787 tlen += copy; 2788 if (likely(inlen <= tlen) || copy < part) { 2789 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 2790 pdst += copy; 2791 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2792 return (struct mlx5_wqe_dseg *)pdst; 2793 } 2794 pdst = (uint8_t *)txq->wqes; 2795 part = inlen - tlen; 2796 } while (true); 2797 } 2798 2799 /** 2800 * Build the Data Segment of pointer type. 2801 * 2802 * @param txq 2803 * Pointer to TX queue structure. 2804 * @param loc 2805 * Pointer to burst routine local context. 2806 * @param dseg 2807 * Pointer to WQE to fill with built Data Segment. 2808 * @param buf 2809 * Data buffer to point. 2810 * @param len 2811 * Data buffer length. 2812 * @param olx 2813 * Configured Tx offloads mask. It is fully defined at 2814 * compile time and may be used for optimization. 2815 */ 2816 static __rte_always_inline void 2817 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 2818 struct mlx5_txq_local *__rte_restrict loc, 2819 struct mlx5_wqe_dseg *__rte_restrict dseg, 2820 uint8_t *buf, 2821 unsigned int len, 2822 unsigned int olx __rte_unused) 2823 2824 { 2825 MLX5_ASSERT(len); 2826 dseg->bcount = rte_cpu_to_be_32(len); 2827 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2828 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2829 } 2830 2831 /** 2832 * Build the Data Segment of pointer type or inline 2833 * if data length is less than buffer in minimal 2834 * Data Segment size. 2835 * 2836 * @param txq 2837 * Pointer to TX queue structure. 2838 * @param loc 2839 * Pointer to burst routine local context. 2840 * @param dseg 2841 * Pointer to WQE to fill with built Data Segment. 2842 * @param buf 2843 * Data buffer to point. 2844 * @param len 2845 * Data buffer length. 2846 * @param olx 2847 * Configured Tx offloads mask. It is fully defined at 2848 * compile time and may be used for optimization. 2849 */ 2850 static __rte_always_inline void 2851 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 2852 struct mlx5_txq_local *__rte_restrict loc, 2853 struct mlx5_wqe_dseg *__rte_restrict dseg, 2854 uint8_t *buf, 2855 unsigned int len, 2856 unsigned int olx __rte_unused) 2857 2858 { 2859 uintptr_t dst, src; 2860 2861 MLX5_ASSERT(len); 2862 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2863 dseg->bcount = rte_cpu_to_be_32(len); 2864 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2865 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2866 2867 return; 2868 } 2869 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2870 /* Unrolled implementation of generic rte_memcpy. */ 2871 dst = (uintptr_t)&dseg->inline_data[0]; 2872 src = (uintptr_t)buf; 2873 if (len & 0x08) { 2874 #ifdef RTE_ARCH_STRICT_ALIGN 2875 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2876 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2877 dst += sizeof(uint32_t); 2878 src += sizeof(uint32_t); 2879 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2880 dst += sizeof(uint32_t); 2881 src += sizeof(uint32_t); 2882 #else 2883 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2884 dst += sizeof(uint64_t); 2885 src += sizeof(uint64_t); 2886 #endif 2887 } 2888 if (len & 0x04) { 2889 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2890 dst += sizeof(uint32_t); 2891 src += sizeof(uint32_t); 2892 } 2893 if (len & 0x02) { 2894 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2895 dst += sizeof(uint16_t); 2896 src += sizeof(uint16_t); 2897 } 2898 if (len & 0x01) 2899 *(uint8_t *)dst = *(uint8_t *)src; 2900 } 2901 2902 /** 2903 * Build the Data Segment of inlined data from single 2904 * segment packet, no VLAN insertion. 2905 * 2906 * @param txq 2907 * Pointer to TX queue structure. 2908 * @param loc 2909 * Pointer to burst routine local context. 2910 * @param dseg 2911 * Pointer to WQE to fill with built Data Segment. 2912 * @param buf 2913 * Data buffer to point. 2914 * @param len 2915 * Data buffer length. 2916 * @param olx 2917 * Configured Tx offloads mask. It is fully defined at 2918 * compile time and may be used for optimization. 2919 * 2920 * @return 2921 * Pointer to the next Data Segment after inlined data. 2922 * Ring buffer wraparound check is needed. We do not 2923 * do it here because it may not be needed for the 2924 * last packet in the eMPW session. 2925 */ 2926 static __rte_always_inline struct mlx5_wqe_dseg * 2927 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 2928 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2929 struct mlx5_wqe_dseg *__rte_restrict dseg, 2930 uint8_t *buf, 2931 unsigned int len, 2932 unsigned int olx __rte_unused) 2933 { 2934 unsigned int part; 2935 uint8_t *pdst; 2936 2937 if (!MLX5_TXOFF_CONFIG(MPW)) { 2938 /* Store the descriptor byte counter for eMPW sessions. */ 2939 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2940 pdst = &dseg->inline_data[0]; 2941 } else { 2942 /* The entire legacy MPW session counter is stored on close. */ 2943 pdst = (uint8_t *)dseg; 2944 } 2945 /* 2946 * The WQEBB space availability is checked by caller. 2947 * Here we should be aware of WQE ring buffer wraparound only. 2948 */ 2949 part = (uint8_t *)txq->wqes_end - pdst; 2950 part = RTE_MIN(part, len); 2951 do { 2952 rte_memcpy(pdst, buf, part); 2953 len -= part; 2954 if (likely(!len)) { 2955 pdst += part; 2956 if (!MLX5_TXOFF_CONFIG(MPW)) 2957 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2958 /* Note: no final wraparound check here. */ 2959 return (struct mlx5_wqe_dseg *)pdst; 2960 } 2961 pdst = (uint8_t *)txq->wqes; 2962 buf += part; 2963 part = len; 2964 } while (true); 2965 } 2966 2967 /** 2968 * Build the Data Segment of inlined data from single 2969 * segment packet with VLAN insertion. 2970 * 2971 * @param txq 2972 * Pointer to TX queue structure. 2973 * @param loc 2974 * Pointer to burst routine local context. 2975 * @param dseg 2976 * Pointer to the dseg fill with built Data Segment. 2977 * @param buf 2978 * Data buffer to point. 2979 * @param len 2980 * Data buffer length. 2981 * @param olx 2982 * Configured Tx offloads mask. It is fully defined at 2983 * compile time and may be used for optimization. 2984 * 2985 * @return 2986 * Pointer to the next Data Segment after inlined data. 2987 * Ring buffer wraparound check is needed. 2988 */ 2989 static __rte_always_inline struct mlx5_wqe_dseg * 2990 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 2991 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2992 struct mlx5_wqe_dseg *__rte_restrict dseg, 2993 uint8_t *buf, 2994 unsigned int len, 2995 unsigned int olx __rte_unused) 2996 2997 { 2998 unsigned int part; 2999 uint8_t *pdst; 3000 3001 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 3002 if (!MLX5_TXOFF_CONFIG(MPW)) { 3003 /* Store the descriptor byte counter for eMPW sessions. */ 3004 dseg->bcount = rte_cpu_to_be_32 3005 ((len + sizeof(struct rte_vlan_hdr)) | 3006 MLX5_ETH_WQE_DATA_INLINE); 3007 pdst = &dseg->inline_data[0]; 3008 } else { 3009 /* The entire legacy MPW session counter is stored on close. */ 3010 pdst = (uint8_t *)dseg; 3011 } 3012 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 3013 buf += MLX5_DSEG_MIN_INLINE_SIZE; 3014 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 3015 len -= MLX5_DSEG_MIN_INLINE_SIZE; 3016 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 3017 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 3018 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 3019 pdst = (uint8_t *)txq->wqes; 3020 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 3021 loc->mbuf->vlan_tci); 3022 pdst += sizeof(struct rte_vlan_hdr); 3023 /* 3024 * The WQEBB space availability is checked by caller. 3025 * Here we should be aware of WQE ring buffer wraparound only. 3026 */ 3027 part = (uint8_t *)txq->wqes_end - pdst; 3028 part = RTE_MIN(part, len); 3029 do { 3030 rte_memcpy(pdst, buf, part); 3031 len -= part; 3032 if (likely(!len)) { 3033 pdst += part; 3034 if (!MLX5_TXOFF_CONFIG(MPW)) 3035 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 3036 /* Note: no final wraparound check here. */ 3037 return (struct mlx5_wqe_dseg *)pdst; 3038 } 3039 pdst = (uint8_t *)txq->wqes; 3040 buf += part; 3041 part = len; 3042 } while (true); 3043 } 3044 3045 /** 3046 * Build the Ethernet Segment with optionally inlined data with 3047 * VLAN insertion and following Data Segments (if any) from 3048 * multi-segment packet. Used by ordinary send and TSO. 3049 * 3050 * @param txq 3051 * Pointer to TX queue structure. 3052 * @param loc 3053 * Pointer to burst routine local context. 3054 * @param wqe 3055 * Pointer to WQE to fill with built Ethernet/Data Segments. 3056 * @param vlan 3057 * Length of VLAN header to insert, 0 means no VLAN insertion. 3058 * @param inlen 3059 * Data length to inline. For TSO this parameter specifies 3060 * exact value, for ordinary send routine can be aligned by 3061 * caller to provide better WQE space saving and data buffer 3062 * start address alignment. This length includes VLAN header 3063 * being inserted. 3064 * @param tso 3065 * Zero means ordinary send, inlined data can be extended, 3066 * otherwise this is TSO, inlined data length is fixed. 3067 * @param olx 3068 * Configured Tx offloads mask. It is fully defined at 3069 * compile time and may be used for optimization. 3070 * 3071 * @return 3072 * Actual size of built WQE in segments. 3073 */ 3074 static __rte_always_inline unsigned int 3075 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 3076 struct mlx5_txq_local *__rte_restrict loc, 3077 struct mlx5_wqe *__rte_restrict wqe, 3078 unsigned int vlan, 3079 unsigned int inlen, 3080 unsigned int tso, 3081 unsigned int olx __rte_unused) 3082 { 3083 struct mlx5_wqe_dseg *__rte_restrict dseg; 3084 unsigned int ds; 3085 3086 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 3087 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 3088 loc->mbuf_off = 0; 3089 3090 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 3091 if (!loc->mbuf_nseg) 3092 goto dseg_done; 3093 /* 3094 * There are still some mbuf remaining, not inlined. 3095 * The first mbuf may be partially inlined and we 3096 * must process the possible non-zero data offset. 3097 */ 3098 if (loc->mbuf_off) { 3099 unsigned int dlen; 3100 uint8_t *dptr; 3101 3102 /* 3103 * Exhausted packets must be dropped before. 3104 * Non-zero offset means there are some data 3105 * remained in the packet. 3106 */ 3107 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 3108 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 3109 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 3110 loc->mbuf_off); 3111 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 3112 /* 3113 * Build the pointer/minimal data Data Segment. 3114 * Do ring buffer wrapping check in advance. 3115 */ 3116 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3117 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3118 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 3119 /* Store the mbuf to be freed on completion. */ 3120 MLX5_ASSERT(loc->elts_free); 3121 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3122 --loc->elts_free; 3123 ++dseg; 3124 if (--loc->mbuf_nseg == 0) 3125 goto dseg_done; 3126 loc->mbuf = loc->mbuf->next; 3127 loc->mbuf_off = 0; 3128 } 3129 do { 3130 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3131 struct rte_mbuf *mbuf; 3132 3133 /* Zero length segment found, just skip. */ 3134 mbuf = loc->mbuf; 3135 loc->mbuf = loc->mbuf->next; 3136 rte_pktmbuf_free_seg(mbuf); 3137 if (--loc->mbuf_nseg == 0) 3138 break; 3139 } else { 3140 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3141 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3142 mlx5_tx_dseg_iptr 3143 (txq, loc, dseg, 3144 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3145 rte_pktmbuf_data_len(loc->mbuf), olx); 3146 MLX5_ASSERT(loc->elts_free); 3147 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3148 --loc->elts_free; 3149 ++dseg; 3150 if (--loc->mbuf_nseg == 0) 3151 break; 3152 loc->mbuf = loc->mbuf->next; 3153 } 3154 } while (true); 3155 3156 dseg_done: 3157 /* Calculate actual segments used from the dseg pointer. */ 3158 if ((uintptr_t)wqe < (uintptr_t)dseg) 3159 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3160 else 3161 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3162 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3163 return ds; 3164 } 3165 3166 /** 3167 * The routine checks timestamp flag in the current packet, 3168 * and push WAIT WQE into the queue if scheduling is required. 3169 * 3170 * @param txq 3171 * Pointer to TX queue structure. 3172 * @param loc 3173 * Pointer to burst routine local context. 3174 * @param olx 3175 * Configured Tx offloads mask. It is fully defined at 3176 * compile time and may be used for optimization. 3177 * 3178 * @return 3179 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3180 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 3181 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 3182 * Local context variables partially updated. 3183 */ 3184 static __rte_always_inline enum mlx5_txcmp_code 3185 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 3186 struct mlx5_txq_local *restrict loc, 3187 unsigned int olx) 3188 { 3189 if (MLX5_TXOFF_CONFIG(TXPP) && 3190 loc->mbuf->ol_flags & txq->ts_mask) { 3191 struct mlx5_wqe *wqe; 3192 uint64_t ts; 3193 int32_t wci; 3194 3195 /* 3196 * Estimate the required space quickly and roughly. 3197 * We would like to ensure the packet can be pushed 3198 * to the queue and we won't get the orphan WAIT WQE. 3199 */ 3200 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 3201 loc->elts_free < NB_SEGS(loc->mbuf)) 3202 return MLX5_TXCMP_CODE_EXIT; 3203 /* Convert the timestamp into completion to wait. */ 3204 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 3205 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 3206 if (unlikely(wci < 0)) 3207 return MLX5_TXCMP_CODE_SINGLE; 3208 /* Build the WAIT WQE with specified completion. */ 3209 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3210 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 3211 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 3212 ++txq->wqe_ci; 3213 --loc->wqe_free; 3214 return MLX5_TXCMP_CODE_MULTI; 3215 } 3216 return MLX5_TXCMP_CODE_SINGLE; 3217 } 3218 3219 /** 3220 * Tx one packet function for multi-segment TSO. Supports all 3221 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3222 * sends one packet per WQE. 3223 * 3224 * This routine is responsible for storing processed mbuf 3225 * into elts ring buffer and update elts_head. 3226 * 3227 * @param txq 3228 * Pointer to TX queue structure. 3229 * @param loc 3230 * Pointer to burst routine local context. 3231 * @param olx 3232 * Configured Tx offloads mask. It is fully defined at 3233 * compile time and may be used for optimization. 3234 * 3235 * @return 3236 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3237 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3238 * Local context variables partially updated. 3239 */ 3240 static __rte_always_inline enum mlx5_txcmp_code 3241 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 3242 struct mlx5_txq_local *__rte_restrict loc, 3243 unsigned int olx) 3244 { 3245 struct mlx5_wqe *__rte_restrict wqe; 3246 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3247 3248 if (MLX5_TXOFF_CONFIG(TXPP)) { 3249 enum mlx5_txcmp_code wret; 3250 3251 /* Generate WAIT for scheduling if requested. */ 3252 wret = mlx5_tx_schedule_send(txq, loc, olx); 3253 if (wret == MLX5_TXCMP_CODE_EXIT) 3254 return MLX5_TXCMP_CODE_EXIT; 3255 if (wret == MLX5_TXCMP_CODE_ERROR) 3256 return MLX5_TXCMP_CODE_ERROR; 3257 } 3258 /* 3259 * Calculate data length to be inlined to estimate 3260 * the required space in WQE ring buffer. 3261 */ 3262 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3263 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3264 vlan = sizeof(struct rte_vlan_hdr); 3265 inlen = loc->mbuf->l2_len + vlan + 3266 loc->mbuf->l3_len + loc->mbuf->l4_len; 3267 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3268 return MLX5_TXCMP_CODE_ERROR; 3269 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3270 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3271 /* Packet must contain all TSO headers. */ 3272 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3273 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3274 inlen > (dlen + vlan))) 3275 return MLX5_TXCMP_CODE_ERROR; 3276 MLX5_ASSERT(inlen >= txq->inlen_mode); 3277 /* 3278 * Check whether there are enough free WQEBBs: 3279 * - Control Segment 3280 * - Ethernet Segment 3281 * - First Segment of inlined Ethernet data 3282 * - ... data continued ... 3283 * - Data Segments of pointer/min inline type 3284 */ 3285 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3286 MLX5_ESEG_MIN_INLINE_SIZE + 3287 MLX5_WSEG_SIZE + 3288 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3289 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3290 return MLX5_TXCMP_CODE_EXIT; 3291 /* Check for maximal WQE size. */ 3292 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3293 return MLX5_TXCMP_CODE_ERROR; 3294 #ifdef MLX5_PMD_SOFT_COUNTERS 3295 /* Update sent data bytes/packets counters. */ 3296 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3297 loc->mbuf->tso_segsz; 3298 /* 3299 * One will be added for mbuf itself 3300 * at the end of the mlx5_tx_burst from 3301 * loc->pkts_sent field. 3302 */ 3303 --ntcp; 3304 txq->stats.opackets += ntcp; 3305 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3306 #endif 3307 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3308 loc->wqe_last = wqe; 3309 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3310 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3311 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3312 txq->wqe_ci += (ds + 3) / 4; 3313 loc->wqe_free -= (ds + 3) / 4; 3314 return MLX5_TXCMP_CODE_MULTI; 3315 } 3316 3317 /** 3318 * Tx one packet function for multi-segment SEND. Supports all 3319 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3320 * sends one packet per WQE, without any data inlining in 3321 * Ethernet Segment. 3322 * 3323 * This routine is responsible for storing processed mbuf 3324 * into elts ring buffer and update elts_head. 3325 * 3326 * @param txq 3327 * Pointer to TX queue structure. 3328 * @param loc 3329 * Pointer to burst routine local context. 3330 * @param olx 3331 * Configured Tx offloads mask. It is fully defined at 3332 * compile time and may be used for optimization. 3333 * 3334 * @return 3335 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3336 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3337 * Local context variables partially updated. 3338 */ 3339 static __rte_always_inline enum mlx5_txcmp_code 3340 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 3341 struct mlx5_txq_local *__rte_restrict loc, 3342 unsigned int olx) 3343 { 3344 struct mlx5_wqe_dseg *__rte_restrict dseg; 3345 struct mlx5_wqe *__rte_restrict wqe; 3346 unsigned int ds, nseg; 3347 3348 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3349 if (MLX5_TXOFF_CONFIG(TXPP)) { 3350 enum mlx5_txcmp_code wret; 3351 3352 /* Generate WAIT for scheduling if requested. */ 3353 wret = mlx5_tx_schedule_send(txq, loc, olx); 3354 if (wret == MLX5_TXCMP_CODE_EXIT) 3355 return MLX5_TXCMP_CODE_EXIT; 3356 if (wret == MLX5_TXCMP_CODE_ERROR) 3357 return MLX5_TXCMP_CODE_ERROR; 3358 } 3359 /* 3360 * No inline at all, it means the CPU cycles saving 3361 * is prioritized at configuration, we should not 3362 * copy any packet data to WQE. 3363 */ 3364 nseg = NB_SEGS(loc->mbuf); 3365 ds = 2 + nseg; 3366 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3367 return MLX5_TXCMP_CODE_EXIT; 3368 /* Check for maximal WQE size. */ 3369 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3370 return MLX5_TXCMP_CODE_ERROR; 3371 /* 3372 * Some Tx offloads may cause an error if 3373 * packet is not long enough, check against 3374 * assumed minimal length. 3375 */ 3376 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3377 return MLX5_TXCMP_CODE_ERROR; 3378 #ifdef MLX5_PMD_SOFT_COUNTERS 3379 /* Update sent data bytes counter. */ 3380 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3381 if (MLX5_TXOFF_CONFIG(VLAN) && 3382 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3383 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3384 #endif 3385 /* 3386 * SEND WQE, one WQEBB: 3387 * - Control Segment, SEND opcode 3388 * - Ethernet Segment, optional VLAN, no inline 3389 * - Data Segments, pointer only type 3390 */ 3391 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3392 loc->wqe_last = wqe; 3393 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3394 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3395 dseg = &wqe->dseg[0]; 3396 do { 3397 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3398 struct rte_mbuf *mbuf; 3399 3400 /* 3401 * Zero length segment found, have to 3402 * correct total size of WQE in segments. 3403 * It is supposed to be rare occasion, so 3404 * in normal case (no zero length segments) 3405 * we avoid extra writing to the Control 3406 * Segment. 3407 */ 3408 --ds; 3409 wqe->cseg.sq_ds -= RTE_BE32(1); 3410 mbuf = loc->mbuf; 3411 loc->mbuf = mbuf->next; 3412 rte_pktmbuf_free_seg(mbuf); 3413 if (--nseg == 0) 3414 break; 3415 } else { 3416 mlx5_tx_dseg_ptr 3417 (txq, loc, dseg, 3418 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3419 rte_pktmbuf_data_len(loc->mbuf), olx); 3420 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3421 --loc->elts_free; 3422 if (--nseg == 0) 3423 break; 3424 ++dseg; 3425 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3426 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3427 loc->mbuf = loc->mbuf->next; 3428 } 3429 } while (true); 3430 txq->wqe_ci += (ds + 3) / 4; 3431 loc->wqe_free -= (ds + 3) / 4; 3432 return MLX5_TXCMP_CODE_MULTI; 3433 } 3434 3435 /** 3436 * Tx one packet function for multi-segment SEND. Supports all 3437 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3438 * sends one packet per WQE, with data inlining in 3439 * Ethernet Segment and minimal Data Segments. 3440 * 3441 * This routine is responsible for storing processed mbuf 3442 * into elts ring buffer and update elts_head. 3443 * 3444 * @param txq 3445 * Pointer to TX queue structure. 3446 * @param loc 3447 * Pointer to burst routine local context. 3448 * @param olx 3449 * Configured Tx offloads mask. It is fully defined at 3450 * compile time and may be used for optimization. 3451 * 3452 * @return 3453 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3454 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3455 * Local context variables partially updated. 3456 */ 3457 static __rte_always_inline enum mlx5_txcmp_code 3458 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 3459 struct mlx5_txq_local *__rte_restrict loc, 3460 unsigned int olx) 3461 { 3462 struct mlx5_wqe *__rte_restrict wqe; 3463 unsigned int ds, inlen, dlen, vlan = 0; 3464 3465 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3466 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3467 if (MLX5_TXOFF_CONFIG(TXPP)) { 3468 enum mlx5_txcmp_code wret; 3469 3470 /* Generate WAIT for scheduling if requested. */ 3471 wret = mlx5_tx_schedule_send(txq, loc, olx); 3472 if (wret == MLX5_TXCMP_CODE_EXIT) 3473 return MLX5_TXCMP_CODE_EXIT; 3474 if (wret == MLX5_TXCMP_CODE_ERROR) 3475 return MLX5_TXCMP_CODE_ERROR; 3476 } 3477 /* 3478 * First calculate data length to be inlined 3479 * to estimate the required space for WQE. 3480 */ 3481 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3482 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3483 vlan = sizeof(struct rte_vlan_hdr); 3484 inlen = dlen + vlan; 3485 /* Check against minimal length. */ 3486 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3487 return MLX5_TXCMP_CODE_ERROR; 3488 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3489 if (inlen > txq->inlen_send || 3490 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 3491 struct rte_mbuf *mbuf; 3492 unsigned int nxlen; 3493 uintptr_t start; 3494 3495 /* 3496 * Packet length exceeds the allowed inline 3497 * data length, check whether the minimal 3498 * inlining is required. 3499 */ 3500 if (txq->inlen_mode) { 3501 MLX5_ASSERT(txq->inlen_mode >= 3502 MLX5_ESEG_MIN_INLINE_SIZE); 3503 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 3504 inlen = txq->inlen_mode; 3505 } else { 3506 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 3507 !vlan || txq->vlan_en) { 3508 /* 3509 * VLAN insertion will be done inside by HW. 3510 * It is not utmost effective - VLAN flag is 3511 * checked twice, but we should proceed the 3512 * inlining length correctly and take into 3513 * account the VLAN header being inserted. 3514 */ 3515 return mlx5_tx_packet_multi_send 3516 (txq, loc, olx); 3517 } 3518 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3519 } 3520 /* 3521 * Now we know the minimal amount of data is requested 3522 * to inline. Check whether we should inline the buffers 3523 * from the chain beginning to eliminate some mbufs. 3524 */ 3525 mbuf = loc->mbuf; 3526 nxlen = rte_pktmbuf_data_len(mbuf); 3527 if (unlikely(nxlen <= txq->inlen_send)) { 3528 /* We can inline first mbuf at least. */ 3529 if (nxlen < inlen) { 3530 unsigned int smlen; 3531 3532 /* Scan mbufs till inlen filled. */ 3533 do { 3534 smlen = nxlen; 3535 mbuf = NEXT(mbuf); 3536 MLX5_ASSERT(mbuf); 3537 nxlen = rte_pktmbuf_data_len(mbuf); 3538 nxlen += smlen; 3539 } while (unlikely(nxlen < inlen)); 3540 if (unlikely(nxlen > txq->inlen_send)) { 3541 /* We cannot inline entire mbuf. */ 3542 smlen = inlen - smlen; 3543 start = rte_pktmbuf_mtod_offset 3544 (mbuf, uintptr_t, smlen); 3545 goto do_align; 3546 } 3547 } 3548 do { 3549 inlen = nxlen; 3550 mbuf = NEXT(mbuf); 3551 /* There should be not end of packet. */ 3552 MLX5_ASSERT(mbuf); 3553 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3554 } while (unlikely(nxlen < txq->inlen_send)); 3555 } 3556 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3557 /* 3558 * Check whether we can do inline to align start 3559 * address of data buffer to cacheline. 3560 */ 3561 do_align: 3562 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3563 if (unlikely(start)) { 3564 start += inlen; 3565 if (start <= txq->inlen_send) 3566 inlen = start; 3567 } 3568 } 3569 /* 3570 * Check whether there are enough free WQEBBs: 3571 * - Control Segment 3572 * - Ethernet Segment 3573 * - First Segment of inlined Ethernet data 3574 * - ... data continued ... 3575 * - Data Segments of pointer/min inline type 3576 * 3577 * Estimate the number of Data Segments conservatively, 3578 * supposing no any mbufs is being freed during inlining. 3579 */ 3580 MLX5_ASSERT(inlen <= txq->inlen_send); 3581 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3582 MLX5_ESEG_MIN_INLINE_SIZE + 3583 MLX5_WSEG_SIZE + 3584 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3585 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3586 return MLX5_TXCMP_CODE_EXIT; 3587 /* Check for maximal WQE size. */ 3588 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3589 return MLX5_TXCMP_CODE_ERROR; 3590 #ifdef MLX5_PMD_SOFT_COUNTERS 3591 /* Update sent data bytes/packets counters. */ 3592 txq->stats.obytes += dlen + vlan; 3593 #endif 3594 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3595 loc->wqe_last = wqe; 3596 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3597 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3598 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3599 txq->wqe_ci += (ds + 3) / 4; 3600 loc->wqe_free -= (ds + 3) / 4; 3601 return MLX5_TXCMP_CODE_MULTI; 3602 } 3603 3604 /** 3605 * Tx burst function for multi-segment packets. Supports all 3606 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3607 * sends one packet per WQE. Function stops sending if it 3608 * encounters the single-segment packet. 3609 * 3610 * This routine is responsible for storing processed mbuf 3611 * into elts ring buffer and update elts_head. 3612 * 3613 * @param txq 3614 * Pointer to TX queue structure. 3615 * @param[in] pkts 3616 * Packets to transmit. 3617 * @param pkts_n 3618 * Number of packets in array. 3619 * @param loc 3620 * Pointer to burst routine local context. 3621 * @param olx 3622 * Configured Tx offloads mask. It is fully defined at 3623 * compile time and may be used for optimization. 3624 * 3625 * @return 3626 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3627 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3628 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3629 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3630 * Local context variables updated. 3631 */ 3632 static __rte_always_inline enum mlx5_txcmp_code 3633 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 3634 struct rte_mbuf **__rte_restrict pkts, 3635 unsigned int pkts_n, 3636 struct mlx5_txq_local *__rte_restrict loc, 3637 unsigned int olx) 3638 { 3639 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3640 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3641 pkts += loc->pkts_sent + 1; 3642 pkts_n -= loc->pkts_sent; 3643 for (;;) { 3644 enum mlx5_txcmp_code ret; 3645 3646 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3647 /* 3648 * Estimate the number of free elts quickly but 3649 * conservatively. Some segment may be fully inlined 3650 * and freed, ignore this here - precise estimation 3651 * is costly. 3652 */ 3653 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3654 return MLX5_TXCMP_CODE_EXIT; 3655 if (MLX5_TXOFF_CONFIG(TSO) && 3656 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3657 /* Proceed with multi-segment TSO. */ 3658 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3659 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3660 /* Proceed with multi-segment SEND with inlining. */ 3661 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3662 } else { 3663 /* Proceed with multi-segment SEND w/o inlining. */ 3664 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3665 } 3666 if (ret == MLX5_TXCMP_CODE_EXIT) 3667 return MLX5_TXCMP_CODE_EXIT; 3668 if (ret == MLX5_TXCMP_CODE_ERROR) 3669 return MLX5_TXCMP_CODE_ERROR; 3670 /* WQE is built, go to the next packet. */ 3671 ++loc->pkts_sent; 3672 --pkts_n; 3673 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3674 return MLX5_TXCMP_CODE_EXIT; 3675 loc->mbuf = *pkts++; 3676 if (pkts_n > 1) 3677 rte_prefetch0(*pkts); 3678 if (likely(NB_SEGS(loc->mbuf) > 1)) 3679 continue; 3680 /* Here ends the series of multi-segment packets. */ 3681 if (MLX5_TXOFF_CONFIG(TSO) && 3682 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3683 return MLX5_TXCMP_CODE_TSO; 3684 return MLX5_TXCMP_CODE_SINGLE; 3685 } 3686 MLX5_ASSERT(false); 3687 } 3688 3689 /** 3690 * Tx burst function for single-segment packets with TSO. 3691 * Supports all types of Tx offloads, except multi-packets. 3692 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3693 * Function stops sending if it encounters the multi-segment 3694 * packet or packet without TSO requested. 3695 * 3696 * The routine is responsible for storing processed mbuf 3697 * into elts ring buffer and update elts_head if inline 3698 * offloads is requested due to possible early freeing 3699 * of the inlined mbufs (can not store pkts array in elts 3700 * as a batch). 3701 * 3702 * @param txq 3703 * Pointer to TX queue structure. 3704 * @param[in] pkts 3705 * Packets to transmit. 3706 * @param pkts_n 3707 * Number of packets in array. 3708 * @param loc 3709 * Pointer to burst routine local context. 3710 * @param olx 3711 * Configured Tx offloads mask. It is fully defined at 3712 * compile time and may be used for optimization. 3713 * 3714 * @return 3715 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3716 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3717 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3718 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3719 * Local context variables updated. 3720 */ 3721 static __rte_always_inline enum mlx5_txcmp_code 3722 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 3723 struct rte_mbuf **__rte_restrict pkts, 3724 unsigned int pkts_n, 3725 struct mlx5_txq_local *__rte_restrict loc, 3726 unsigned int olx) 3727 { 3728 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3729 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3730 pkts += loc->pkts_sent + 1; 3731 pkts_n -= loc->pkts_sent; 3732 for (;;) { 3733 struct mlx5_wqe_dseg *__rte_restrict dseg; 3734 struct mlx5_wqe *__rte_restrict wqe; 3735 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3736 uint8_t *dptr; 3737 3738 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3739 if (MLX5_TXOFF_CONFIG(TXPP)) { 3740 enum mlx5_txcmp_code wret; 3741 3742 /* Generate WAIT for scheduling if requested. */ 3743 wret = mlx5_tx_schedule_send(txq, loc, olx); 3744 if (wret == MLX5_TXCMP_CODE_EXIT) 3745 return MLX5_TXCMP_CODE_EXIT; 3746 if (wret == MLX5_TXCMP_CODE_ERROR) 3747 return MLX5_TXCMP_CODE_ERROR; 3748 } 3749 dlen = rte_pktmbuf_data_len(loc->mbuf); 3750 if (MLX5_TXOFF_CONFIG(VLAN) && 3751 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3752 vlan = sizeof(struct rte_vlan_hdr); 3753 } 3754 /* 3755 * First calculate the WQE size to check 3756 * whether we have enough space in ring buffer. 3757 */ 3758 hlen = loc->mbuf->l2_len + vlan + 3759 loc->mbuf->l3_len + loc->mbuf->l4_len; 3760 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3761 return MLX5_TXCMP_CODE_ERROR; 3762 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3763 hlen += loc->mbuf->outer_l2_len + 3764 loc->mbuf->outer_l3_len; 3765 /* Segment must contain all TSO headers. */ 3766 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3767 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3768 hlen > (dlen + vlan))) 3769 return MLX5_TXCMP_CODE_ERROR; 3770 /* 3771 * Check whether there are enough free WQEBBs: 3772 * - Control Segment 3773 * - Ethernet Segment 3774 * - First Segment of inlined Ethernet data 3775 * - ... data continued ... 3776 * - Finishing Data Segment of pointer type 3777 */ 3778 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3779 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3780 if (loc->wqe_free < ((ds + 3) / 4)) 3781 return MLX5_TXCMP_CODE_EXIT; 3782 #ifdef MLX5_PMD_SOFT_COUNTERS 3783 /* Update sent data bytes/packets counters. */ 3784 ntcp = (dlen + vlan - hlen + 3785 loc->mbuf->tso_segsz - 1) / 3786 loc->mbuf->tso_segsz; 3787 /* 3788 * One will be added for mbuf itself at the end 3789 * of the mlx5_tx_burst from loc->pkts_sent field. 3790 */ 3791 --ntcp; 3792 txq->stats.opackets += ntcp; 3793 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3794 #endif 3795 /* 3796 * Build the TSO WQE: 3797 * - Control Segment 3798 * - Ethernet Segment with hlen bytes inlined 3799 * - Data Segment of pointer type 3800 */ 3801 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3802 loc->wqe_last = wqe; 3803 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3804 MLX5_OPCODE_TSO, olx); 3805 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3806 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3807 dlen -= hlen - vlan; 3808 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3809 /* 3810 * WQE is built, update the loop parameters 3811 * and go to the next packet. 3812 */ 3813 txq->wqe_ci += (ds + 3) / 4; 3814 loc->wqe_free -= (ds + 3) / 4; 3815 if (MLX5_TXOFF_CONFIG(INLINE)) 3816 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3817 --loc->elts_free; 3818 ++loc->pkts_sent; 3819 --pkts_n; 3820 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3821 return MLX5_TXCMP_CODE_EXIT; 3822 loc->mbuf = *pkts++; 3823 if (pkts_n > 1) 3824 rte_prefetch0(*pkts); 3825 if (MLX5_TXOFF_CONFIG(MULTI) && 3826 unlikely(NB_SEGS(loc->mbuf) > 1)) 3827 return MLX5_TXCMP_CODE_MULTI; 3828 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3829 return MLX5_TXCMP_CODE_SINGLE; 3830 /* Continue with the next TSO packet. */ 3831 } 3832 MLX5_ASSERT(false); 3833 } 3834 3835 /** 3836 * Analyze the packet and select the best method to send. 3837 * 3838 * @param txq 3839 * Pointer to TX queue structure. 3840 * @param loc 3841 * Pointer to burst routine local context. 3842 * @param olx 3843 * Configured Tx offloads mask. It is fully defined at 3844 * compile time and may be used for optimization. 3845 * @param newp 3846 * The predefined flag whether do complete check for 3847 * multi-segment packets and TSO. 3848 * 3849 * @return 3850 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3851 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3852 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3853 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3854 */ 3855 static __rte_always_inline enum mlx5_txcmp_code 3856 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 3857 struct mlx5_txq_local *__rte_restrict loc, 3858 unsigned int olx, 3859 bool newp) 3860 { 3861 /* Check for multi-segment packet. */ 3862 if (newp && 3863 MLX5_TXOFF_CONFIG(MULTI) && 3864 unlikely(NB_SEGS(loc->mbuf) > 1)) 3865 return MLX5_TXCMP_CODE_MULTI; 3866 /* Check for TSO packet. */ 3867 if (newp && 3868 MLX5_TXOFF_CONFIG(TSO) && 3869 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3870 return MLX5_TXCMP_CODE_TSO; 3871 /* Check if eMPW is enabled at all. */ 3872 if (!MLX5_TXOFF_CONFIG(EMPW)) 3873 return MLX5_TXCMP_CODE_SINGLE; 3874 /* Check if eMPW can be engaged. */ 3875 if (MLX5_TXOFF_CONFIG(VLAN) && 3876 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3877 (!MLX5_TXOFF_CONFIG(INLINE) || 3878 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3879 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3880 /* 3881 * eMPW does not support VLAN insertion offload, 3882 * we have to inline the entire packet but 3883 * packet is too long for inlining. 3884 */ 3885 return MLX5_TXCMP_CODE_SINGLE; 3886 } 3887 return MLX5_TXCMP_CODE_EMPW; 3888 } 3889 3890 /** 3891 * Check the next packet attributes to match with the eMPW batch ones. 3892 * In addition, for legacy MPW the packet length is checked either. 3893 * 3894 * @param txq 3895 * Pointer to TX queue structure. 3896 * @param es 3897 * Pointer to Ethernet Segment of eMPW batch. 3898 * @param loc 3899 * Pointer to burst routine local context. 3900 * @param dlen 3901 * Length of previous packet in MPW descriptor. 3902 * @param olx 3903 * Configured Tx offloads mask. It is fully defined at 3904 * compile time and may be used for optimization. 3905 * 3906 * @return 3907 * true - packet match with eMPW batch attributes. 3908 * false - no match, eMPW should be restarted. 3909 */ 3910 static __rte_always_inline bool 3911 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 3912 struct mlx5_wqe_eseg *__rte_restrict es, 3913 struct mlx5_txq_local *__rte_restrict loc, 3914 uint32_t dlen, 3915 unsigned int olx) 3916 { 3917 uint8_t swp_flags = 0; 3918 3919 /* Compare the checksum flags, if any. */ 3920 if (MLX5_TXOFF_CONFIG(CSUM) && 3921 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 3922 return false; 3923 /* Compare the Software Parser offsets and flags. */ 3924 if (MLX5_TXOFF_CONFIG(SWP) && 3925 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 3926 es->swp_flags != swp_flags)) 3927 return false; 3928 /* Fill metadata field if needed. */ 3929 if (MLX5_TXOFF_CONFIG(METADATA) && 3930 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 3931 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 3932 return false; 3933 /* Legacy MPW can send packets with the same lengt only. */ 3934 if (MLX5_TXOFF_CONFIG(MPW) && 3935 dlen != rte_pktmbuf_data_len(loc->mbuf)) 3936 return false; 3937 /* There must be no VLAN packets in eMPW loop. */ 3938 if (MLX5_TXOFF_CONFIG(VLAN)) 3939 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 3940 /* Check if the scheduling is requested. */ 3941 if (MLX5_TXOFF_CONFIG(TXPP) && 3942 loc->mbuf->ol_flags & txq->ts_mask) 3943 return false; 3944 return true; 3945 } 3946 3947 /* 3948 * Update send loop variables and WQE for eMPW loop 3949 * without data inlining. Number of Data Segments is 3950 * equal to the number of sent packets. 3951 * 3952 * @param txq 3953 * Pointer to TX queue structure. 3954 * @param loc 3955 * Pointer to burst routine local context. 3956 * @param ds 3957 * Number of packets/Data Segments/Packets. 3958 * @param slen 3959 * Accumulated statistics, bytes sent 3960 * @param olx 3961 * Configured Tx offloads mask. It is fully defined at 3962 * compile time and may be used for optimization. 3963 * 3964 * @return 3965 * true - packet match with eMPW batch attributes. 3966 * false - no match, eMPW should be restarted. 3967 */ 3968 static __rte_always_inline void 3969 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 3970 struct mlx5_txq_local *__rte_restrict loc, 3971 unsigned int ds, 3972 unsigned int slen, 3973 unsigned int olx __rte_unused) 3974 { 3975 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3976 #ifdef MLX5_PMD_SOFT_COUNTERS 3977 /* Update sent data bytes counter. */ 3978 txq->stats.obytes += slen; 3979 #else 3980 (void)slen; 3981 #endif 3982 loc->elts_free -= ds; 3983 loc->pkts_sent += ds; 3984 ds += 2; 3985 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3986 txq->wqe_ci += (ds + 3) / 4; 3987 loc->wqe_free -= (ds + 3) / 4; 3988 } 3989 3990 /* 3991 * Update send loop variables and WQE for eMPW loop 3992 * with data inlining. Gets the size of pushed descriptors 3993 * and data to the WQE. 3994 * 3995 * @param txq 3996 * Pointer to TX queue structure. 3997 * @param loc 3998 * Pointer to burst routine local context. 3999 * @param len 4000 * Total size of descriptor/data in bytes. 4001 * @param slen 4002 * Accumulated statistics, data bytes sent. 4003 * @param wqem 4004 * The base WQE for the eMPW/MPW descriptor. 4005 * @param olx 4006 * Configured Tx offloads mask. It is fully defined at 4007 * compile time and may be used for optimization. 4008 * 4009 * @return 4010 * true - packet match with eMPW batch attributes. 4011 * false - no match, eMPW should be restarted. 4012 */ 4013 static __rte_always_inline void 4014 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 4015 struct mlx5_txq_local *__rte_restrict loc, 4016 unsigned int len, 4017 unsigned int slen, 4018 struct mlx5_wqe *__rte_restrict wqem, 4019 unsigned int olx __rte_unused) 4020 { 4021 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 4022 4023 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4024 #ifdef MLX5_PMD_SOFT_COUNTERS 4025 /* Update sent data bytes counter. */ 4026 txq->stats.obytes += slen; 4027 #else 4028 (void)slen; 4029 #endif 4030 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 4031 /* 4032 * If the legacy MPW session contains the inline packets 4033 * we should set the only inline data segment length 4034 * and align the total length to the segment size. 4035 */ 4036 MLX5_ASSERT(len > sizeof(dseg->bcount)); 4037 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 4038 MLX5_ETH_WQE_DATA_INLINE); 4039 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 4040 } else { 4041 /* 4042 * The session is not legacy MPW or contains the 4043 * data buffer pointer segments. 4044 */ 4045 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 4046 len = len / MLX5_WSEG_SIZE + 2; 4047 } 4048 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 4049 txq->wqe_ci += (len + 3) / 4; 4050 loc->wqe_free -= (len + 3) / 4; 4051 loc->wqe_last = wqem; 4052 } 4053 4054 /** 4055 * The set of Tx burst functions for single-segment packets 4056 * without TSO and with Multi-Packet Writing feature support. 4057 * Supports all types of Tx offloads, except multi-packets 4058 * and TSO. 4059 * 4060 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 4061 * as many packet per WQE as it can. If eMPW is not configured 4062 * or packet can not be sent with eMPW (VLAN insertion) the 4063 * ordinary SEND opcode is used and only one packet placed 4064 * in WQE. 4065 * 4066 * Functions stop sending if it encounters the multi-segment 4067 * packet or packet with TSO requested. 4068 * 4069 * The routines are responsible for storing processed mbuf 4070 * into elts ring buffer and update elts_head if inlining 4071 * offload is requested. Otherwise the copying mbufs to elts 4072 * can be postponed and completed at the end of burst routine. 4073 * 4074 * @param txq 4075 * Pointer to TX queue structure. 4076 * @param[in] pkts 4077 * Packets to transmit. 4078 * @param pkts_n 4079 * Number of packets in array. 4080 * @param loc 4081 * Pointer to burst routine local context. 4082 * @param olx 4083 * Configured Tx offloads mask. It is fully defined at 4084 * compile time and may be used for optimization. 4085 * 4086 * @return 4087 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 4088 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 4089 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 4090 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 4091 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 4092 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 4093 * 4094 * Local context variables updated. 4095 * 4096 * 4097 * The routine sends packets with MLX5_OPCODE_EMPW 4098 * without inlining, this is dedicated optimized branch. 4099 * No VLAN insertion is supported. 4100 */ 4101 static __rte_always_inline enum mlx5_txcmp_code 4102 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 4103 struct rte_mbuf **__rte_restrict pkts, 4104 unsigned int pkts_n, 4105 struct mlx5_txq_local *__rte_restrict loc, 4106 unsigned int olx) 4107 { 4108 /* 4109 * Subroutine is the part of mlx5_tx_burst_single() 4110 * and sends single-segment packet with eMPW opcode 4111 * without data inlining. 4112 */ 4113 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4114 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4115 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4116 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4117 pkts += loc->pkts_sent + 1; 4118 pkts_n -= loc->pkts_sent; 4119 for (;;) { 4120 struct mlx5_wqe_dseg *__rte_restrict dseg; 4121 struct mlx5_wqe_eseg *__rte_restrict eseg; 4122 enum mlx5_txcmp_code ret; 4123 unsigned int part, loop; 4124 unsigned int slen = 0; 4125 4126 next_empw: 4127 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4128 if (MLX5_TXOFF_CONFIG(TXPP)) { 4129 enum mlx5_txcmp_code wret; 4130 4131 /* Generate WAIT for scheduling if requested. */ 4132 wret = mlx5_tx_schedule_send(txq, loc, olx); 4133 if (wret == MLX5_TXCMP_CODE_EXIT) 4134 return MLX5_TXCMP_CODE_EXIT; 4135 if (wret == MLX5_TXCMP_CODE_ERROR) 4136 return MLX5_TXCMP_CODE_ERROR; 4137 } 4138 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4139 MLX5_MPW_MAX_PACKETS : 4140 MLX5_EMPW_MAX_PACKETS); 4141 if (unlikely(loc->elts_free < part)) { 4142 /* We have no enough elts to save all mbufs. */ 4143 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 4144 return MLX5_TXCMP_CODE_EXIT; 4145 /* But we still able to send at least minimal eMPW. */ 4146 part = loc->elts_free; 4147 } 4148 /* Check whether we have enough WQEs */ 4149 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 4150 if (unlikely(loc->wqe_free < 4151 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4152 return MLX5_TXCMP_CODE_EXIT; 4153 part = (loc->wqe_free * 4) - 2; 4154 } 4155 if (likely(part > 1)) 4156 rte_prefetch0(*pkts); 4157 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4158 /* 4159 * Build eMPW title WQEBB: 4160 * - Control Segment, eMPW opcode 4161 * - Ethernet Segment, no inline 4162 */ 4163 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 4164 MLX5_OPCODE_ENHANCED_MPSW, olx); 4165 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4166 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4167 eseg = &loc->wqe_last->eseg; 4168 dseg = &loc->wqe_last->dseg[0]; 4169 loop = part; 4170 /* Store the packet length for legacy MPW. */ 4171 if (MLX5_TXOFF_CONFIG(MPW)) 4172 eseg->mss = rte_cpu_to_be_16 4173 (rte_pktmbuf_data_len(loc->mbuf)); 4174 for (;;) { 4175 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4176 #ifdef MLX5_PMD_SOFT_COUNTERS 4177 /* Update sent data bytes counter. */ 4178 slen += dlen; 4179 #endif 4180 mlx5_tx_dseg_ptr 4181 (txq, loc, dseg, 4182 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4183 dlen, olx); 4184 if (unlikely(--loop == 0)) 4185 break; 4186 loc->mbuf = *pkts++; 4187 if (likely(loop > 1)) 4188 rte_prefetch0(*pkts); 4189 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4190 /* 4191 * Unroll the completion code to avoid 4192 * returning variable value - it results in 4193 * unoptimized sequent checking in caller. 4194 */ 4195 if (ret == MLX5_TXCMP_CODE_MULTI) { 4196 part -= loop; 4197 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4198 if (unlikely(!loc->elts_free || 4199 !loc->wqe_free)) 4200 return MLX5_TXCMP_CODE_EXIT; 4201 return MLX5_TXCMP_CODE_MULTI; 4202 } 4203 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4204 if (ret == MLX5_TXCMP_CODE_TSO) { 4205 part -= loop; 4206 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4207 if (unlikely(!loc->elts_free || 4208 !loc->wqe_free)) 4209 return MLX5_TXCMP_CODE_EXIT; 4210 return MLX5_TXCMP_CODE_TSO; 4211 } 4212 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4213 part -= loop; 4214 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4215 if (unlikely(!loc->elts_free || 4216 !loc->wqe_free)) 4217 return MLX5_TXCMP_CODE_EXIT; 4218 return MLX5_TXCMP_CODE_SINGLE; 4219 } 4220 if (ret != MLX5_TXCMP_CODE_EMPW) { 4221 MLX5_ASSERT(false); 4222 part -= loop; 4223 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4224 return MLX5_TXCMP_CODE_ERROR; 4225 } 4226 /* 4227 * Check whether packet parameters coincide 4228 * within assumed eMPW batch: 4229 * - check sum settings 4230 * - metadata value 4231 * - software parser settings 4232 * - packets length (legacy MPW only) 4233 * - scheduling is not required 4234 */ 4235 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 4236 MLX5_ASSERT(loop); 4237 part -= loop; 4238 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4239 if (unlikely(!loc->elts_free || 4240 !loc->wqe_free)) 4241 return MLX5_TXCMP_CODE_EXIT; 4242 pkts_n -= part; 4243 goto next_empw; 4244 } 4245 /* Packet attributes match, continue the same eMPW. */ 4246 ++dseg; 4247 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4248 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4249 } 4250 /* eMPW is built successfully, update loop parameters. */ 4251 MLX5_ASSERT(!loop); 4252 MLX5_ASSERT(pkts_n >= part); 4253 #ifdef MLX5_PMD_SOFT_COUNTERS 4254 /* Update sent data bytes counter. */ 4255 txq->stats.obytes += slen; 4256 #endif 4257 loc->elts_free -= part; 4258 loc->pkts_sent += part; 4259 txq->wqe_ci += (2 + part + 3) / 4; 4260 loc->wqe_free -= (2 + part + 3) / 4; 4261 pkts_n -= part; 4262 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4263 return MLX5_TXCMP_CODE_EXIT; 4264 loc->mbuf = *pkts++; 4265 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4266 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4267 return ret; 4268 /* Continue sending eMPW batches. */ 4269 } 4270 MLX5_ASSERT(false); 4271 } 4272 4273 /** 4274 * The routine sends packets with MLX5_OPCODE_EMPW 4275 * with inlining, optionally supports VLAN insertion. 4276 */ 4277 static __rte_always_inline enum mlx5_txcmp_code 4278 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 4279 struct rte_mbuf **__rte_restrict pkts, 4280 unsigned int pkts_n, 4281 struct mlx5_txq_local *__rte_restrict loc, 4282 unsigned int olx) 4283 { 4284 /* 4285 * Subroutine is the part of mlx5_tx_burst_single() 4286 * and sends single-segment packet with eMPW opcode 4287 * with data inlining. 4288 */ 4289 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4290 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4291 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4292 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4293 pkts += loc->pkts_sent + 1; 4294 pkts_n -= loc->pkts_sent; 4295 for (;;) { 4296 struct mlx5_wqe_dseg *__rte_restrict dseg; 4297 struct mlx5_wqe *__rte_restrict wqem; 4298 enum mlx5_txcmp_code ret; 4299 unsigned int room, part, nlim; 4300 unsigned int slen = 0; 4301 4302 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4303 if (MLX5_TXOFF_CONFIG(TXPP)) { 4304 enum mlx5_txcmp_code wret; 4305 4306 /* Generate WAIT for scheduling if requested. */ 4307 wret = mlx5_tx_schedule_send(txq, loc, olx); 4308 if (wret == MLX5_TXCMP_CODE_EXIT) 4309 return MLX5_TXCMP_CODE_EXIT; 4310 if (wret == MLX5_TXCMP_CODE_ERROR) 4311 return MLX5_TXCMP_CODE_ERROR; 4312 } 4313 /* 4314 * Limits the amount of packets in one WQE 4315 * to improve CQE latency generation. 4316 */ 4317 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4318 MLX5_MPW_INLINE_MAX_PACKETS : 4319 MLX5_EMPW_MAX_PACKETS); 4320 /* Check whether we have minimal amount WQEs */ 4321 if (unlikely(loc->wqe_free < 4322 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4323 return MLX5_TXCMP_CODE_EXIT; 4324 if (likely(pkts_n > 1)) 4325 rte_prefetch0(*pkts); 4326 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4327 /* 4328 * Build eMPW title WQEBB: 4329 * - Control Segment, eMPW opcode, zero DS 4330 * - Ethernet Segment, no inline 4331 */ 4332 mlx5_tx_cseg_init(txq, loc, wqem, 0, 4333 MLX5_OPCODE_ENHANCED_MPSW, olx); 4334 mlx5_tx_eseg_none(txq, loc, wqem, 4335 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4336 dseg = &wqem->dseg[0]; 4337 /* Store the packet length for legacy MPW. */ 4338 if (MLX5_TXOFF_CONFIG(MPW)) 4339 wqem->eseg.mss = rte_cpu_to_be_16 4340 (rte_pktmbuf_data_len(loc->mbuf)); 4341 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4342 loc->wqe_free) * MLX5_WQE_SIZE - 4343 MLX5_WQE_CSEG_SIZE - 4344 MLX5_WQE_ESEG_SIZE; 4345 /* Limit the room for legacy MPW sessions for performance. */ 4346 if (MLX5_TXOFF_CONFIG(MPW)) 4347 room = RTE_MIN(room, 4348 RTE_MAX(txq->inlen_empw + 4349 sizeof(dseg->bcount) + 4350 (MLX5_TXOFF_CONFIG(VLAN) ? 4351 sizeof(struct rte_vlan_hdr) : 0), 4352 MLX5_MPW_INLINE_MAX_PACKETS * 4353 MLX5_WQE_DSEG_SIZE)); 4354 /* Build WQE till we have space, packets and resources. */ 4355 part = room; 4356 for (;;) { 4357 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4358 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4359 unsigned int tlen; 4360 4361 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4362 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 4363 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4364 /* 4365 * Some Tx offloads may cause an error if 4366 * packet is not long enough, check against 4367 * assumed minimal length. 4368 */ 4369 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4370 part -= room; 4371 if (unlikely(!part)) 4372 return MLX5_TXCMP_CODE_ERROR; 4373 /* 4374 * We have some successfully built 4375 * packet Data Segments to send. 4376 */ 4377 mlx5_tx_idone_empw(txq, loc, part, 4378 slen, wqem, olx); 4379 return MLX5_TXCMP_CODE_ERROR; 4380 } 4381 /* Inline or not inline - that's the Question. */ 4382 if (dlen > txq->inlen_empw || 4383 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 4384 goto pointer_empw; 4385 if (MLX5_TXOFF_CONFIG(MPW)) { 4386 if (dlen > txq->inlen_send) 4387 goto pointer_empw; 4388 tlen = dlen; 4389 if (part == room) { 4390 /* Open new inline MPW session. */ 4391 tlen += sizeof(dseg->bcount); 4392 dseg->bcount = RTE_BE32(0); 4393 dseg = RTE_PTR_ADD 4394 (dseg, sizeof(dseg->bcount)); 4395 } else { 4396 /* 4397 * No pointer and inline descriptor 4398 * intermix for legacy MPW sessions. 4399 */ 4400 if (wqem->dseg[0].bcount) 4401 break; 4402 } 4403 } else { 4404 tlen = sizeof(dseg->bcount) + dlen; 4405 } 4406 /* Inline entire packet, optional VLAN insertion. */ 4407 if (MLX5_TXOFF_CONFIG(VLAN) && 4408 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4409 /* 4410 * The packet length must be checked in 4411 * mlx5_tx_able_to_empw() and packet 4412 * fits into inline length guaranteed. 4413 */ 4414 MLX5_ASSERT((dlen + 4415 sizeof(struct rte_vlan_hdr)) <= 4416 txq->inlen_empw); 4417 tlen += sizeof(struct rte_vlan_hdr); 4418 if (room < tlen) 4419 break; 4420 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4421 dptr, dlen, olx); 4422 #ifdef MLX5_PMD_SOFT_COUNTERS 4423 /* Update sent data bytes counter. */ 4424 slen += sizeof(struct rte_vlan_hdr); 4425 #endif 4426 } else { 4427 if (room < tlen) 4428 break; 4429 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4430 dptr, dlen, olx); 4431 } 4432 if (!MLX5_TXOFF_CONFIG(MPW)) 4433 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4434 MLX5_ASSERT(room >= tlen); 4435 room -= tlen; 4436 /* 4437 * Packet data are completely inline, 4438 * we can try to free the packet. 4439 */ 4440 if (likely(loc->pkts_sent == loc->mbuf_free)) { 4441 /* 4442 * All the packets from the burst beginning 4443 * are inline, we can free mbufs directly 4444 * from the origin array on tx_burst exit(). 4445 */ 4446 loc->mbuf_free++; 4447 goto next_mbuf; 4448 } 4449 /* 4450 * In order no to call rte_pktmbuf_free_seg() here, 4451 * in the most inner loop (that might be very 4452 * expensive) we just save the mbuf in elts. 4453 */ 4454 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4455 loc->elts_free--; 4456 goto next_mbuf; 4457 pointer_empw: 4458 /* 4459 * No pointer and inline descriptor 4460 * intermix for legacy MPW sessions. 4461 */ 4462 if (MLX5_TXOFF_CONFIG(MPW) && 4463 part != room && 4464 wqem->dseg[0].bcount == RTE_BE32(0)) 4465 break; 4466 /* 4467 * Not inlinable VLAN packets are 4468 * proceeded outside of this routine. 4469 */ 4470 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4471 if (MLX5_TXOFF_CONFIG(VLAN)) 4472 MLX5_ASSERT(!(loc->mbuf->ol_flags & 4473 PKT_TX_VLAN_PKT)); 4474 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4475 /* We have to store mbuf in elts.*/ 4476 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4477 loc->elts_free--; 4478 room -= MLX5_WQE_DSEG_SIZE; 4479 /* Ring buffer wraparound is checked at the loop end.*/ 4480 ++dseg; 4481 next_mbuf: 4482 #ifdef MLX5_PMD_SOFT_COUNTERS 4483 /* Update sent data bytes counter. */ 4484 slen += dlen; 4485 #endif 4486 loc->pkts_sent++; 4487 pkts_n--; 4488 if (unlikely(!pkts_n || !loc->elts_free)) { 4489 /* 4490 * We have no resources/packets to 4491 * continue build descriptors. 4492 */ 4493 part -= room; 4494 mlx5_tx_idone_empw(txq, loc, part, 4495 slen, wqem, olx); 4496 return MLX5_TXCMP_CODE_EXIT; 4497 } 4498 loc->mbuf = *pkts++; 4499 if (likely(pkts_n > 1)) 4500 rte_prefetch0(*pkts); 4501 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4502 /* 4503 * Unroll the completion code to avoid 4504 * returning variable value - it results in 4505 * unoptimized sequent checking in caller. 4506 */ 4507 if (ret == MLX5_TXCMP_CODE_MULTI) { 4508 part -= room; 4509 mlx5_tx_idone_empw(txq, loc, part, 4510 slen, wqem, olx); 4511 if (unlikely(!loc->elts_free || 4512 !loc->wqe_free)) 4513 return MLX5_TXCMP_CODE_EXIT; 4514 return MLX5_TXCMP_CODE_MULTI; 4515 } 4516 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4517 if (ret == MLX5_TXCMP_CODE_TSO) { 4518 part -= room; 4519 mlx5_tx_idone_empw(txq, loc, part, 4520 slen, wqem, olx); 4521 if (unlikely(!loc->elts_free || 4522 !loc->wqe_free)) 4523 return MLX5_TXCMP_CODE_EXIT; 4524 return MLX5_TXCMP_CODE_TSO; 4525 } 4526 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4527 part -= room; 4528 mlx5_tx_idone_empw(txq, loc, part, 4529 slen, wqem, olx); 4530 if (unlikely(!loc->elts_free || 4531 !loc->wqe_free)) 4532 return MLX5_TXCMP_CODE_EXIT; 4533 return MLX5_TXCMP_CODE_SINGLE; 4534 } 4535 if (ret != MLX5_TXCMP_CODE_EMPW) { 4536 MLX5_ASSERT(false); 4537 part -= room; 4538 mlx5_tx_idone_empw(txq, loc, part, 4539 slen, wqem, olx); 4540 return MLX5_TXCMP_CODE_ERROR; 4541 } 4542 /* Check if we have minimal room left. */ 4543 nlim--; 4544 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4545 break; 4546 /* 4547 * Check whether packet parameters coincide 4548 * within assumed eMPW batch: 4549 * - check sum settings 4550 * - metadata value 4551 * - software parser settings 4552 * - packets length (legacy MPW only) 4553 * - scheduling is not required 4554 */ 4555 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 4556 loc, dlen, olx)) 4557 break; 4558 /* Packet attributes match, continue the same eMPW. */ 4559 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4560 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4561 } 4562 /* 4563 * We get here to close an existing eMPW 4564 * session and start the new one. 4565 */ 4566 MLX5_ASSERT(pkts_n); 4567 part -= room; 4568 if (unlikely(!part)) 4569 return MLX5_TXCMP_CODE_EXIT; 4570 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 4571 if (unlikely(!loc->elts_free || 4572 !loc->wqe_free)) 4573 return MLX5_TXCMP_CODE_EXIT; 4574 /* Continue the loop with new eMPW session. */ 4575 } 4576 MLX5_ASSERT(false); 4577 } 4578 4579 /** 4580 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4581 * Data inlining and VLAN insertion are supported. 4582 */ 4583 static __rte_always_inline enum mlx5_txcmp_code 4584 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 4585 struct rte_mbuf **__rte_restrict pkts, 4586 unsigned int pkts_n, 4587 struct mlx5_txq_local *__rte_restrict loc, 4588 unsigned int olx) 4589 { 4590 /* 4591 * Subroutine is the part of mlx5_tx_burst_single() 4592 * and sends single-segment packet with SEND opcode. 4593 */ 4594 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4595 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4596 pkts += loc->pkts_sent + 1; 4597 pkts_n -= loc->pkts_sent; 4598 for (;;) { 4599 struct mlx5_wqe *__rte_restrict wqe; 4600 enum mlx5_txcmp_code ret; 4601 4602 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4603 if (MLX5_TXOFF_CONFIG(TXPP)) { 4604 enum mlx5_txcmp_code wret; 4605 4606 /* Generate WAIT for scheduling if requested. */ 4607 wret = mlx5_tx_schedule_send(txq, loc, olx); 4608 if (wret == MLX5_TXCMP_CODE_EXIT) 4609 return MLX5_TXCMP_CODE_EXIT; 4610 if (wret == MLX5_TXCMP_CODE_ERROR) 4611 return MLX5_TXCMP_CODE_ERROR; 4612 } 4613 if (MLX5_TXOFF_CONFIG(INLINE)) { 4614 unsigned int inlen, vlan = 0; 4615 4616 inlen = rte_pktmbuf_data_len(loc->mbuf); 4617 if (MLX5_TXOFF_CONFIG(VLAN) && 4618 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4619 vlan = sizeof(struct rte_vlan_hdr); 4620 inlen += vlan; 4621 } 4622 /* 4623 * If inlining is enabled at configuration time 4624 * the limit must be not less than minimal size. 4625 * Otherwise we would do extra check for data 4626 * size to avoid crashes due to length overflow. 4627 */ 4628 MLX5_ASSERT(txq->inlen_send >= 4629 MLX5_ESEG_MIN_INLINE_SIZE); 4630 if (inlen <= txq->inlen_send) { 4631 unsigned int seg_n, wqe_n; 4632 4633 rte_prefetch0(rte_pktmbuf_mtod 4634 (loc->mbuf, uint8_t *)); 4635 /* Check against minimal length. */ 4636 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4637 return MLX5_TXCMP_CODE_ERROR; 4638 if (loc->mbuf->ol_flags & 4639 PKT_TX_DYNF_NOINLINE) { 4640 /* 4641 * The hint flag not to inline packet 4642 * data is set. Check whether we can 4643 * follow the hint. 4644 */ 4645 if ((!MLX5_TXOFF_CONFIG(EMPW) && 4646 txq->inlen_mode) || 4647 (MLX5_TXOFF_CONFIG(MPW) && 4648 txq->inlen_mode)) { 4649 if (inlen <= txq->inlen_send) 4650 goto single_inline; 4651 /* 4652 * The hardware requires the 4653 * minimal inline data header. 4654 */ 4655 goto single_min_inline; 4656 } 4657 if (MLX5_TXOFF_CONFIG(VLAN) && 4658 vlan && !txq->vlan_en) { 4659 /* 4660 * We must insert VLAN tag 4661 * by software means. 4662 */ 4663 goto single_part_inline; 4664 } 4665 goto single_no_inline; 4666 } 4667 single_inline: 4668 /* 4669 * Completely inlined packet data WQE: 4670 * - Control Segment, SEND opcode 4671 * - Ethernet Segment, no VLAN insertion 4672 * - Data inlined, VLAN optionally inserted 4673 * - Alignment to MLX5_WSEG_SIZE 4674 * Have to estimate amount of WQEBBs 4675 */ 4676 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4677 MLX5_ESEG_MIN_INLINE_SIZE + 4678 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4679 /* Check if there are enough WQEBBs. */ 4680 wqe_n = (seg_n + 3) / 4; 4681 if (wqe_n > loc->wqe_free) 4682 return MLX5_TXCMP_CODE_EXIT; 4683 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4684 loc->wqe_last = wqe; 4685 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4686 MLX5_OPCODE_SEND, olx); 4687 mlx5_tx_eseg_data(txq, loc, wqe, 4688 vlan, inlen, 0, olx); 4689 txq->wqe_ci += wqe_n; 4690 loc->wqe_free -= wqe_n; 4691 /* 4692 * Packet data are completely inlined, 4693 * free the packet immediately. 4694 */ 4695 rte_pktmbuf_free_seg(loc->mbuf); 4696 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4697 MLX5_TXOFF_CONFIG(MPW)) && 4698 txq->inlen_mode) { 4699 /* 4700 * If minimal inlining is requested the eMPW 4701 * feature should be disabled due to data is 4702 * inlined into Ethernet Segment, which can 4703 * not contain inlined data for eMPW due to 4704 * segment shared for all packets. 4705 */ 4706 struct mlx5_wqe_dseg *__rte_restrict dseg; 4707 unsigned int ds; 4708 uint8_t *dptr; 4709 4710 /* 4711 * The inline-mode settings require 4712 * to inline the specified amount of 4713 * data bytes to the Ethernet Segment. 4714 * We should check the free space in 4715 * WQE ring buffer to inline partially. 4716 */ 4717 single_min_inline: 4718 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 4719 MLX5_ASSERT(inlen > txq->inlen_mode); 4720 MLX5_ASSERT(txq->inlen_mode >= 4721 MLX5_ESEG_MIN_INLINE_SIZE); 4722 /* 4723 * Check whether there are enough free WQEBBs: 4724 * - Control Segment 4725 * - Ethernet Segment 4726 * - First Segment of inlined Ethernet data 4727 * - ... data continued ... 4728 * - Finishing Data Segment of pointer type 4729 */ 4730 ds = (MLX5_WQE_CSEG_SIZE + 4731 MLX5_WQE_ESEG_SIZE + 4732 MLX5_WQE_DSEG_SIZE + 4733 txq->inlen_mode - 4734 MLX5_ESEG_MIN_INLINE_SIZE + 4735 MLX5_WQE_DSEG_SIZE + 4736 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4737 if (loc->wqe_free < ((ds + 3) / 4)) 4738 return MLX5_TXCMP_CODE_EXIT; 4739 /* 4740 * Build the ordinary SEND WQE: 4741 * - Control Segment 4742 * - Ethernet Segment, inline inlen_mode bytes 4743 * - Data Segment of pointer type 4744 */ 4745 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4746 loc->wqe_last = wqe; 4747 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4748 MLX5_OPCODE_SEND, olx); 4749 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4750 txq->inlen_mode, 4751 0, olx); 4752 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4753 txq->inlen_mode - vlan; 4754 inlen -= txq->inlen_mode; 4755 mlx5_tx_dseg_ptr(txq, loc, dseg, 4756 dptr, inlen, olx); 4757 /* 4758 * WQE is built, update the loop parameters 4759 * and got to the next packet. 4760 */ 4761 txq->wqe_ci += (ds + 3) / 4; 4762 loc->wqe_free -= (ds + 3) / 4; 4763 /* We have to store mbuf in elts.*/ 4764 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4765 txq->elts[txq->elts_head++ & txq->elts_m] = 4766 loc->mbuf; 4767 --loc->elts_free; 4768 } else { 4769 uint8_t *dptr; 4770 unsigned int dlen; 4771 4772 /* 4773 * Partially inlined packet data WQE, we have 4774 * some space in title WQEBB, we can fill it 4775 * with some packet data. It takes one WQEBB, 4776 * it is available, no extra space check: 4777 * - Control Segment, SEND opcode 4778 * - Ethernet Segment, no VLAN insertion 4779 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4780 * - Data Segment, pointer type 4781 * 4782 * We also get here if VLAN insertion is not 4783 * supported by HW, the inline is enabled. 4784 */ 4785 single_part_inline: 4786 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4787 loc->wqe_last = wqe; 4788 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4789 MLX5_OPCODE_SEND, olx); 4790 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4791 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4792 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4793 /* 4794 * The length check is performed above, by 4795 * comparing with txq->inlen_send. We should 4796 * not get overflow here. 4797 */ 4798 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4799 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4800 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4801 dptr, dlen, olx); 4802 ++txq->wqe_ci; 4803 --loc->wqe_free; 4804 /* We have to store mbuf in elts.*/ 4805 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4806 txq->elts[txq->elts_head++ & txq->elts_m] = 4807 loc->mbuf; 4808 --loc->elts_free; 4809 } 4810 #ifdef MLX5_PMD_SOFT_COUNTERS 4811 /* Update sent data bytes counter. */ 4812 txq->stats.obytes += vlan + 4813 rte_pktmbuf_data_len(loc->mbuf); 4814 #endif 4815 } else { 4816 /* 4817 * No inline at all, it means the CPU cycles saving 4818 * is prioritized at configuration, we should not 4819 * copy any packet data to WQE. 4820 * 4821 * SEND WQE, one WQEBB: 4822 * - Control Segment, SEND opcode 4823 * - Ethernet Segment, optional VLAN, no inline 4824 * - Data Segment, pointer type 4825 */ 4826 single_no_inline: 4827 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4828 loc->wqe_last = wqe; 4829 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4830 MLX5_OPCODE_SEND, olx); 4831 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4832 mlx5_tx_dseg_ptr 4833 (txq, loc, &wqe->dseg[0], 4834 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4835 rte_pktmbuf_data_len(loc->mbuf), olx); 4836 ++txq->wqe_ci; 4837 --loc->wqe_free; 4838 /* 4839 * We should not store mbuf pointer in elts 4840 * if no inlining is configured, this is done 4841 * by calling routine in a batch copy. 4842 */ 4843 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4844 --loc->elts_free; 4845 #ifdef MLX5_PMD_SOFT_COUNTERS 4846 /* Update sent data bytes counter. */ 4847 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4848 if (MLX5_TXOFF_CONFIG(VLAN) && 4849 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4850 txq->stats.obytes += 4851 sizeof(struct rte_vlan_hdr); 4852 #endif 4853 } 4854 ++loc->pkts_sent; 4855 --pkts_n; 4856 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4857 return MLX5_TXCMP_CODE_EXIT; 4858 loc->mbuf = *pkts++; 4859 if (pkts_n > 1) 4860 rte_prefetch0(*pkts); 4861 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4862 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4863 return ret; 4864 } 4865 MLX5_ASSERT(false); 4866 } 4867 4868 static __rte_always_inline enum mlx5_txcmp_code 4869 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 4870 struct rte_mbuf **__rte_restrict pkts, 4871 unsigned int pkts_n, 4872 struct mlx5_txq_local *__rte_restrict loc, 4873 unsigned int olx) 4874 { 4875 enum mlx5_txcmp_code ret; 4876 4877 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4878 if (ret == MLX5_TXCMP_CODE_SINGLE) 4879 goto ordinary_send; 4880 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 4881 for (;;) { 4882 /* Optimize for inline/no inline eMPW send. */ 4883 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4884 mlx5_tx_burst_empw_inline 4885 (txq, pkts, pkts_n, loc, olx) : 4886 mlx5_tx_burst_empw_simple 4887 (txq, pkts, pkts_n, loc, olx); 4888 if (ret != MLX5_TXCMP_CODE_SINGLE) 4889 return ret; 4890 /* The resources to send one packet should remain. */ 4891 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4892 ordinary_send: 4893 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4894 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 4895 if (ret != MLX5_TXCMP_CODE_EMPW) 4896 return ret; 4897 /* The resources to send one packet should remain. */ 4898 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4899 } 4900 } 4901 4902 /** 4903 * DPDK Tx callback template. This is configured template 4904 * used to generate routines optimized for specified offload setup. 4905 * One of this generated functions is chosen at SQ configuration 4906 * time. 4907 * 4908 * @param txq 4909 * Generic pointer to TX queue structure. 4910 * @param[in] pkts 4911 * Packets to transmit. 4912 * @param pkts_n 4913 * Number of packets in array. 4914 * @param olx 4915 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 4916 * values. Should be static to take compile time static configuration 4917 * advantages. 4918 * 4919 * @return 4920 * Number of packets successfully transmitted (<= pkts_n). 4921 */ 4922 static __rte_always_inline uint16_t 4923 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 4924 struct rte_mbuf **__rte_restrict pkts, 4925 uint16_t pkts_n, 4926 unsigned int olx) 4927 { 4928 struct mlx5_txq_local loc; 4929 enum mlx5_txcmp_code ret; 4930 unsigned int part; 4931 4932 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4933 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4934 if (unlikely(!pkts_n)) 4935 return 0; 4936 if (MLX5_TXOFF_CONFIG(INLINE)) 4937 loc.mbuf_free = 0; 4938 loc.pkts_sent = 0; 4939 loc.pkts_copy = 0; 4940 loc.wqe_last = NULL; 4941 4942 send_loop: 4943 loc.pkts_loop = loc.pkts_sent; 4944 /* 4945 * Check if there are some CQEs, if any: 4946 * - process an encountered errors 4947 * - process the completed WQEs 4948 * - free related mbufs 4949 * - doorbell the NIC about processed CQEs 4950 */ 4951 rte_prefetch0(*(pkts + loc.pkts_sent)); 4952 mlx5_tx_handle_completion(txq, olx); 4953 /* 4954 * Calculate the number of available resources - elts and WQEs. 4955 * There are two possible different scenarios: 4956 * - no data inlining into WQEs, one WQEBB may contains up to 4957 * four packets, in this case elts become scarce resource 4958 * - data inlining into WQEs, one packet may require multiple 4959 * WQEBBs, the WQEs become the limiting factor. 4960 */ 4961 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4962 loc.elts_free = txq->elts_s - 4963 (uint16_t)(txq->elts_head - txq->elts_tail); 4964 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4965 loc.wqe_free = txq->wqe_s - 4966 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 4967 if (unlikely(!loc.elts_free || !loc.wqe_free)) 4968 goto burst_exit; 4969 for (;;) { 4970 /* 4971 * Fetch the packet from array. Usually this is 4972 * the first packet in series of multi/single 4973 * segment packets. 4974 */ 4975 loc.mbuf = *(pkts + loc.pkts_sent); 4976 /* Dedicated branch for multi-segment packets. */ 4977 if (MLX5_TXOFF_CONFIG(MULTI) && 4978 unlikely(NB_SEGS(loc.mbuf) > 1)) { 4979 /* 4980 * Multi-segment packet encountered. 4981 * Hardware is able to process it only 4982 * with SEND/TSO opcodes, one packet 4983 * per WQE, do it in dedicated routine. 4984 */ 4985 enter_send_multi: 4986 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 4987 part = loc.pkts_sent - loc.pkts_copy; 4988 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4989 /* 4990 * There are some single-segment mbufs not 4991 * stored in elts. The mbufs must be in the 4992 * same order as WQEs, so we must copy the 4993 * mbufs to elts here, before the coming 4994 * multi-segment packet mbufs is appended. 4995 */ 4996 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 4997 part, olx); 4998 loc.pkts_copy = loc.pkts_sent; 4999 } 5000 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5001 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 5002 if (!MLX5_TXOFF_CONFIG(INLINE)) 5003 loc.pkts_copy = loc.pkts_sent; 5004 /* 5005 * These returned code checks are supposed 5006 * to be optimized out due to routine inlining. 5007 */ 5008 if (ret == MLX5_TXCMP_CODE_EXIT) { 5009 /* 5010 * The routine returns this code when 5011 * all packets are sent or there is no 5012 * enough resources to complete request. 5013 */ 5014 break; 5015 } 5016 if (ret == MLX5_TXCMP_CODE_ERROR) { 5017 /* 5018 * The routine returns this code when 5019 * some error in the incoming packets 5020 * format occurred. 5021 */ 5022 txq->stats.oerrors++; 5023 break; 5024 } 5025 if (ret == MLX5_TXCMP_CODE_SINGLE) { 5026 /* 5027 * The single-segment packet was encountered 5028 * in the array, try to send it with the 5029 * best optimized way, possible engaging eMPW. 5030 */ 5031 goto enter_send_single; 5032 } 5033 if (MLX5_TXOFF_CONFIG(TSO) && 5034 ret == MLX5_TXCMP_CODE_TSO) { 5035 /* 5036 * The single-segment TSO packet was 5037 * encountered in the array. 5038 */ 5039 goto enter_send_tso; 5040 } 5041 /* We must not get here. Something is going wrong. */ 5042 MLX5_ASSERT(false); 5043 txq->stats.oerrors++; 5044 break; 5045 } 5046 /* Dedicated branch for single-segment TSO packets. */ 5047 if (MLX5_TXOFF_CONFIG(TSO) && 5048 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 5049 /* 5050 * TSO might require special way for inlining 5051 * (dedicated parameters) and is sent with 5052 * MLX5_OPCODE_TSO opcode only, provide this 5053 * in dedicated branch. 5054 */ 5055 enter_send_tso: 5056 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 5057 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5058 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 5059 /* 5060 * These returned code checks are supposed 5061 * to be optimized out due to routine inlining. 5062 */ 5063 if (ret == MLX5_TXCMP_CODE_EXIT) 5064 break; 5065 if (ret == MLX5_TXCMP_CODE_ERROR) { 5066 txq->stats.oerrors++; 5067 break; 5068 } 5069 if (ret == MLX5_TXCMP_CODE_SINGLE) 5070 goto enter_send_single; 5071 if (MLX5_TXOFF_CONFIG(MULTI) && 5072 ret == MLX5_TXCMP_CODE_MULTI) { 5073 /* 5074 * The multi-segment packet was 5075 * encountered in the array. 5076 */ 5077 goto enter_send_multi; 5078 } 5079 /* We must not get here. Something is going wrong. */ 5080 MLX5_ASSERT(false); 5081 txq->stats.oerrors++; 5082 break; 5083 } 5084 /* 5085 * The dedicated branch for the single-segment packets 5086 * without TSO. Often these ones can be sent using 5087 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 5088 * The routine builds the WQEs till it encounters 5089 * the TSO or multi-segment packet (in case if these 5090 * offloads are requested at SQ configuration time). 5091 */ 5092 enter_send_single: 5093 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5094 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 5095 /* 5096 * These returned code checks are supposed 5097 * to be optimized out due to routine inlining. 5098 */ 5099 if (ret == MLX5_TXCMP_CODE_EXIT) 5100 break; 5101 if (ret == MLX5_TXCMP_CODE_ERROR) { 5102 txq->stats.oerrors++; 5103 break; 5104 } 5105 if (MLX5_TXOFF_CONFIG(MULTI) && 5106 ret == MLX5_TXCMP_CODE_MULTI) { 5107 /* 5108 * The multi-segment packet was 5109 * encountered in the array. 5110 */ 5111 goto enter_send_multi; 5112 } 5113 if (MLX5_TXOFF_CONFIG(TSO) && 5114 ret == MLX5_TXCMP_CODE_TSO) { 5115 /* 5116 * The single-segment TSO packet was 5117 * encountered in the array. 5118 */ 5119 goto enter_send_tso; 5120 } 5121 /* We must not get here. Something is going wrong. */ 5122 MLX5_ASSERT(false); 5123 txq->stats.oerrors++; 5124 break; 5125 } 5126 /* 5127 * Main Tx loop is completed, do the rest: 5128 * - set completion request if thresholds are reached 5129 * - doorbell the hardware 5130 * - copy the rest of mbufs to elts (if any) 5131 */ 5132 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 5133 loc.pkts_sent >= loc.pkts_copy); 5134 /* Take a shortcut if nothing is sent. */ 5135 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 5136 goto burst_exit; 5137 /* Request CQE generation if limits are reached. */ 5138 mlx5_tx_request_completion(txq, &loc, olx); 5139 /* 5140 * Ring QP doorbell immediately after WQE building completion 5141 * to improve latencies. The pure software related data treatment 5142 * can be completed after doorbell. Tx CQEs for this SQ are 5143 * processed in this thread only by the polling. 5144 * 5145 * The rdma core library can map doorbell register in two ways, 5146 * depending on the environment variable "MLX5_SHUT_UP_BF": 5147 * 5148 * - as regular cached memory, the variable is either missing or 5149 * set to zero. This type of mapping may cause the significant 5150 * doorbell register writing latency and requires explicit 5151 * memory write barrier to mitigate this issue and prevent 5152 * write combining. 5153 * 5154 * - as non-cached memory, the variable is present and set to 5155 * not "0" value. This type of mapping may cause performance 5156 * impact under heavy loading conditions but the explicit write 5157 * memory barrier is not required and it may improve core 5158 * performance. 5159 * 5160 * - the legacy behaviour (prior 19.08 release) was to use some 5161 * heuristics to decide whether write memory barrier should 5162 * be performed. This behavior is supported with specifying 5163 * tx_db_nc=2, write barrier is skipped if application 5164 * provides the full recommended burst of packets, it 5165 * supposes the next packets are coming and the write barrier 5166 * will be issued on the next burst (after descriptor writing, 5167 * at least). 5168 */ 5169 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 5170 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 5171 /* Not all of the mbufs may be stored into elts yet. */ 5172 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 5173 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5174 /* 5175 * There are some single-segment mbufs not stored in elts. 5176 * It can be only if the last packet was single-segment. 5177 * The copying is gathered into one place due to it is 5178 * a good opportunity to optimize that with SIMD. 5179 * Unfortunately if inlining is enabled the gaps in 5180 * pointer array may happen due to early freeing of the 5181 * inlined mbufs. 5182 */ 5183 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 5184 loc.pkts_copy = loc.pkts_sent; 5185 } 5186 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5187 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5188 if (pkts_n > loc.pkts_sent) { 5189 /* 5190 * If burst size is large there might be no enough CQE 5191 * fetched from completion queue and no enough resources 5192 * freed to send all the packets. 5193 */ 5194 goto send_loop; 5195 } 5196 burst_exit: 5197 #ifdef MLX5_PMD_SOFT_COUNTERS 5198 /* Increment sent packets counter. */ 5199 txq->stats.opackets += loc.pkts_sent; 5200 #endif 5201 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 5202 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 5203 return loc.pkts_sent; 5204 } 5205 5206 /* Generate routines with Enhanced Multi-Packet Write support. */ 5207 MLX5_TXOFF_DECL(full_empw, 5208 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 5209 5210 MLX5_TXOFF_DECL(none_empw, 5211 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5212 5213 MLX5_TXOFF_DECL(md_empw, 5214 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5215 5216 MLX5_TXOFF_DECL(mt_empw, 5217 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5218 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5219 5220 MLX5_TXOFF_DECL(mtsc_empw, 5221 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5222 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5223 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5224 5225 MLX5_TXOFF_DECL(mti_empw, 5226 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5227 MLX5_TXOFF_CONFIG_INLINE | 5228 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5229 5230 MLX5_TXOFF_DECL(mtv_empw, 5231 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5232 MLX5_TXOFF_CONFIG_VLAN | 5233 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5234 5235 MLX5_TXOFF_DECL(mtiv_empw, 5236 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5237 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5238 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5239 5240 MLX5_TXOFF_DECL(sc_empw, 5241 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5242 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5243 5244 MLX5_TXOFF_DECL(sci_empw, 5245 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5246 MLX5_TXOFF_CONFIG_INLINE | 5247 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5248 5249 MLX5_TXOFF_DECL(scv_empw, 5250 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5251 MLX5_TXOFF_CONFIG_VLAN | 5252 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5253 5254 MLX5_TXOFF_DECL(sciv_empw, 5255 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5256 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5257 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5258 5259 MLX5_TXOFF_DECL(i_empw, 5260 MLX5_TXOFF_CONFIG_INLINE | 5261 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5262 5263 MLX5_TXOFF_DECL(v_empw, 5264 MLX5_TXOFF_CONFIG_VLAN | 5265 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5266 5267 MLX5_TXOFF_DECL(iv_empw, 5268 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5269 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5270 5271 /* Generate routines without Enhanced Multi-Packet Write support. */ 5272 MLX5_TXOFF_DECL(full, 5273 MLX5_TXOFF_CONFIG_FULL) 5274 5275 MLX5_TXOFF_DECL(none, 5276 MLX5_TXOFF_CONFIG_NONE) 5277 5278 MLX5_TXOFF_DECL(md, 5279 MLX5_TXOFF_CONFIG_METADATA) 5280 5281 MLX5_TXOFF_DECL(mt, 5282 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5283 MLX5_TXOFF_CONFIG_METADATA) 5284 5285 MLX5_TXOFF_DECL(mtsc, 5286 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5287 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5288 MLX5_TXOFF_CONFIG_METADATA) 5289 5290 MLX5_TXOFF_DECL(mti, 5291 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5292 MLX5_TXOFF_CONFIG_INLINE | 5293 MLX5_TXOFF_CONFIG_METADATA) 5294 5295 5296 MLX5_TXOFF_DECL(mtv, 5297 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5298 MLX5_TXOFF_CONFIG_VLAN | 5299 MLX5_TXOFF_CONFIG_METADATA) 5300 5301 5302 MLX5_TXOFF_DECL(mtiv, 5303 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5304 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5305 MLX5_TXOFF_CONFIG_METADATA) 5306 5307 MLX5_TXOFF_DECL(sc, 5308 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5309 MLX5_TXOFF_CONFIG_METADATA) 5310 5311 MLX5_TXOFF_DECL(sci, 5312 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5313 MLX5_TXOFF_CONFIG_INLINE | 5314 MLX5_TXOFF_CONFIG_METADATA) 5315 5316 5317 MLX5_TXOFF_DECL(scv, 5318 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5319 MLX5_TXOFF_CONFIG_VLAN | 5320 MLX5_TXOFF_CONFIG_METADATA) 5321 5322 5323 MLX5_TXOFF_DECL(sciv, 5324 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5325 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5326 MLX5_TXOFF_CONFIG_METADATA) 5327 5328 MLX5_TXOFF_DECL(i, 5329 MLX5_TXOFF_CONFIG_INLINE | 5330 MLX5_TXOFF_CONFIG_METADATA) 5331 5332 MLX5_TXOFF_DECL(v, 5333 MLX5_TXOFF_CONFIG_VLAN | 5334 MLX5_TXOFF_CONFIG_METADATA) 5335 5336 MLX5_TXOFF_DECL(iv, 5337 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5338 MLX5_TXOFF_CONFIG_METADATA) 5339 5340 /* Generate routines with timestamp scheduling. */ 5341 MLX5_TXOFF_DECL(full_ts_nompw, 5342 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5343 5344 MLX5_TXOFF_DECL(full_ts_nompwi, 5345 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5346 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5347 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5348 MLX5_TXOFF_CONFIG_TXPP) 5349 5350 MLX5_TXOFF_DECL(full_ts, 5351 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5352 MLX5_TXOFF_CONFIG_EMPW) 5353 5354 MLX5_TXOFF_DECL(full_ts_noi, 5355 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5356 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5357 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5358 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5359 5360 MLX5_TXOFF_DECL(none_ts, 5361 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5362 MLX5_TXOFF_CONFIG_EMPW) 5363 5364 MLX5_TXOFF_DECL(mdi_ts, 5365 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5366 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5367 5368 MLX5_TXOFF_DECL(mti_ts, 5369 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5370 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5371 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5372 5373 MLX5_TXOFF_DECL(mtiv_ts, 5374 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5375 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5376 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5377 MLX5_TXOFF_CONFIG_EMPW) 5378 5379 /* 5380 * Generate routines with Legacy Multi-Packet Write support. 5381 * This mode is supported by ConnectX-4 Lx only and imposes 5382 * offload limitations, not supported: 5383 * - ACL/Flows (metadata are becoming meaningless) 5384 * - WQE Inline headers 5385 * - SRIOV (E-Switch offloads) 5386 * - VLAN insertion 5387 * - tunnel encapsulation/decapsulation 5388 * - TSO 5389 */ 5390 MLX5_TXOFF_DECL(none_mpw, 5391 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5392 MLX5_TXOFF_CONFIG_MPW) 5393 5394 MLX5_TXOFF_DECL(mci_mpw, 5395 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5396 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5397 MLX5_TXOFF_CONFIG_MPW) 5398 5399 MLX5_TXOFF_DECL(mc_mpw, 5400 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5401 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5402 5403 MLX5_TXOFF_DECL(i_mpw, 5404 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5405 MLX5_TXOFF_CONFIG_MPW) 5406 5407 /* 5408 * Array of declared and compiled Tx burst function and corresponding 5409 * supported offloads set. The array is used to select the Tx burst 5410 * function for specified offloads set at Tx queue configuration time. 5411 */ 5412 const struct { 5413 eth_tx_burst_t func; 5414 unsigned int olx; 5415 } txoff_func[] = { 5416 MLX5_TXOFF_INFO(full_empw, 5417 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5418 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5419 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5420 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5421 5422 MLX5_TXOFF_INFO(none_empw, 5423 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5424 5425 MLX5_TXOFF_INFO(md_empw, 5426 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5427 5428 MLX5_TXOFF_INFO(mt_empw, 5429 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5430 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5431 5432 MLX5_TXOFF_INFO(mtsc_empw, 5433 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5434 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5435 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5436 5437 MLX5_TXOFF_INFO(mti_empw, 5438 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5439 MLX5_TXOFF_CONFIG_INLINE | 5440 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5441 5442 MLX5_TXOFF_INFO(mtv_empw, 5443 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5444 MLX5_TXOFF_CONFIG_VLAN | 5445 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5446 5447 MLX5_TXOFF_INFO(mtiv_empw, 5448 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5449 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5450 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5451 5452 MLX5_TXOFF_INFO(sc_empw, 5453 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5454 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5455 5456 MLX5_TXOFF_INFO(sci_empw, 5457 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5458 MLX5_TXOFF_CONFIG_INLINE | 5459 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5460 5461 MLX5_TXOFF_INFO(scv_empw, 5462 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5463 MLX5_TXOFF_CONFIG_VLAN | 5464 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5465 5466 MLX5_TXOFF_INFO(sciv_empw, 5467 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5468 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5469 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5470 5471 MLX5_TXOFF_INFO(i_empw, 5472 MLX5_TXOFF_CONFIG_INLINE | 5473 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5474 5475 MLX5_TXOFF_INFO(v_empw, 5476 MLX5_TXOFF_CONFIG_VLAN | 5477 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5478 5479 MLX5_TXOFF_INFO(iv_empw, 5480 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5481 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5482 5483 MLX5_TXOFF_INFO(full_ts_nompw, 5484 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5485 5486 MLX5_TXOFF_INFO(full_ts_nompwi, 5487 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5488 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5489 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5490 MLX5_TXOFF_CONFIG_TXPP) 5491 5492 MLX5_TXOFF_INFO(full_ts, 5493 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5494 MLX5_TXOFF_CONFIG_EMPW) 5495 5496 MLX5_TXOFF_INFO(full_ts_noi, 5497 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5498 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5499 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5500 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5501 5502 MLX5_TXOFF_INFO(none_ts, 5503 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5504 MLX5_TXOFF_CONFIG_EMPW) 5505 5506 MLX5_TXOFF_INFO(mdi_ts, 5507 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5508 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5509 5510 MLX5_TXOFF_INFO(mti_ts, 5511 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5512 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5513 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5514 5515 MLX5_TXOFF_INFO(mtiv_ts, 5516 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5517 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5518 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5519 MLX5_TXOFF_CONFIG_EMPW) 5520 5521 MLX5_TXOFF_INFO(full, 5522 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5523 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5524 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5525 MLX5_TXOFF_CONFIG_METADATA) 5526 5527 MLX5_TXOFF_INFO(none, 5528 MLX5_TXOFF_CONFIG_NONE) 5529 5530 MLX5_TXOFF_INFO(md, 5531 MLX5_TXOFF_CONFIG_METADATA) 5532 5533 MLX5_TXOFF_INFO(mt, 5534 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5535 MLX5_TXOFF_CONFIG_METADATA) 5536 5537 MLX5_TXOFF_INFO(mtsc, 5538 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5539 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5540 MLX5_TXOFF_CONFIG_METADATA) 5541 5542 MLX5_TXOFF_INFO(mti, 5543 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5544 MLX5_TXOFF_CONFIG_INLINE | 5545 MLX5_TXOFF_CONFIG_METADATA) 5546 5547 MLX5_TXOFF_INFO(mtv, 5548 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5549 MLX5_TXOFF_CONFIG_VLAN | 5550 MLX5_TXOFF_CONFIG_METADATA) 5551 5552 MLX5_TXOFF_INFO(mtiv, 5553 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5554 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5555 MLX5_TXOFF_CONFIG_METADATA) 5556 5557 MLX5_TXOFF_INFO(sc, 5558 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5559 MLX5_TXOFF_CONFIG_METADATA) 5560 5561 MLX5_TXOFF_INFO(sci, 5562 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5563 MLX5_TXOFF_CONFIG_INLINE | 5564 MLX5_TXOFF_CONFIG_METADATA) 5565 5566 MLX5_TXOFF_INFO(scv, 5567 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5568 MLX5_TXOFF_CONFIG_VLAN | 5569 MLX5_TXOFF_CONFIG_METADATA) 5570 5571 MLX5_TXOFF_INFO(sciv, 5572 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5573 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5574 MLX5_TXOFF_CONFIG_METADATA) 5575 5576 MLX5_TXOFF_INFO(i, 5577 MLX5_TXOFF_CONFIG_INLINE | 5578 MLX5_TXOFF_CONFIG_METADATA) 5579 5580 MLX5_TXOFF_INFO(v, 5581 MLX5_TXOFF_CONFIG_VLAN | 5582 MLX5_TXOFF_CONFIG_METADATA) 5583 5584 MLX5_TXOFF_INFO(iv, 5585 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5586 MLX5_TXOFF_CONFIG_METADATA) 5587 5588 MLX5_TXOFF_INFO(none_mpw, 5589 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5590 MLX5_TXOFF_CONFIG_MPW) 5591 5592 MLX5_TXOFF_INFO(mci_mpw, 5593 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5594 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5595 MLX5_TXOFF_CONFIG_MPW) 5596 5597 MLX5_TXOFF_INFO(mc_mpw, 5598 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5599 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5600 5601 MLX5_TXOFF_INFO(i_mpw, 5602 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5603 MLX5_TXOFF_CONFIG_MPW) 5604 }; 5605 5606 /** 5607 * Configure the Tx function to use. The routine checks configured 5608 * Tx offloads for the device and selects appropriate Tx burst 5609 * routine. There are multiple Tx burst routines compiled from 5610 * the same template in the most optimal way for the dedicated 5611 * Tx offloads set. 5612 * 5613 * @param dev 5614 * Pointer to private data structure. 5615 * 5616 * @return 5617 * Pointer to selected Tx burst function. 5618 */ 5619 eth_tx_burst_t 5620 mlx5_select_tx_function(struct rte_eth_dev *dev) 5621 { 5622 struct mlx5_priv *priv = dev->data->dev_private; 5623 struct mlx5_dev_config *config = &priv->config; 5624 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5625 unsigned int diff = 0, olx = 0, i, m; 5626 5627 MLX5_ASSERT(priv); 5628 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5629 /* We should support Multi-Segment Packets. */ 5630 olx |= MLX5_TXOFF_CONFIG_MULTI; 5631 } 5632 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5633 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5634 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5635 DEV_TX_OFFLOAD_IP_TNL_TSO | 5636 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5637 /* We should support TCP Send Offload. */ 5638 olx |= MLX5_TXOFF_CONFIG_TSO; 5639 } 5640 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5641 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5642 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5643 /* We should support Software Parser for Tunnels. */ 5644 olx |= MLX5_TXOFF_CONFIG_SWP; 5645 } 5646 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5647 DEV_TX_OFFLOAD_UDP_CKSUM | 5648 DEV_TX_OFFLOAD_TCP_CKSUM | 5649 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5650 /* We should support IP/TCP/UDP Checksums. */ 5651 olx |= MLX5_TXOFF_CONFIG_CSUM; 5652 } 5653 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5654 /* We should support VLAN insertion. */ 5655 olx |= MLX5_TXOFF_CONFIG_VLAN; 5656 } 5657 if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && 5658 rte_mbuf_dynflag_lookup 5659 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 && 5660 rte_mbuf_dynfield_lookup 5661 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) { 5662 /* Offload configured, dynamic entities registered. */ 5663 olx |= MLX5_TXOFF_CONFIG_TXPP; 5664 } 5665 if (priv->txqs_n && (*priv->txqs)[0]) { 5666 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5667 5668 if (txd->inlen_send) { 5669 /* 5670 * Check the data inline requirements. Data inline 5671 * is enabled on per device basis, we can check 5672 * the first Tx queue only. 5673 * 5674 * If device does not support VLAN insertion in WQE 5675 * and some queues are requested to perform VLAN 5676 * insertion offload than inline must be enabled. 5677 */ 5678 olx |= MLX5_TXOFF_CONFIG_INLINE; 5679 } 5680 } 5681 if (config->mps == MLX5_MPW_ENHANCED && 5682 config->txq_inline_min <= 0) { 5683 /* 5684 * The NIC supports Enhanced Multi-Packet Write 5685 * and does not require minimal inline data. 5686 */ 5687 olx |= MLX5_TXOFF_CONFIG_EMPW; 5688 } 5689 if (rte_flow_dynf_metadata_avail()) { 5690 /* We should support Flow metadata. */ 5691 olx |= MLX5_TXOFF_CONFIG_METADATA; 5692 } 5693 if (config->mps == MLX5_MPW) { 5694 /* 5695 * The NIC supports Legacy Multi-Packet Write. 5696 * The MLX5_TXOFF_CONFIG_MPW controls the 5697 * descriptor building method in combination 5698 * with MLX5_TXOFF_CONFIG_EMPW. 5699 */ 5700 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5701 MLX5_TXOFF_CONFIG_SWP | 5702 MLX5_TXOFF_CONFIG_VLAN | 5703 MLX5_TXOFF_CONFIG_METADATA))) 5704 olx |= MLX5_TXOFF_CONFIG_EMPW | 5705 MLX5_TXOFF_CONFIG_MPW; 5706 } 5707 /* 5708 * Scan the routines table to find the minimal 5709 * satisfying routine with requested offloads. 5710 */ 5711 m = RTE_DIM(txoff_func); 5712 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5713 unsigned int tmp; 5714 5715 tmp = txoff_func[i].olx; 5716 if (tmp == olx) { 5717 /* Meets requested offloads exactly.*/ 5718 m = i; 5719 break; 5720 } 5721 if ((tmp & olx) != olx) { 5722 /* Does not meet requested offloads at all. */ 5723 continue; 5724 } 5725 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) 5726 /* Do not enable legacy MPW if not configured. */ 5727 continue; 5728 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5729 /* Do not enable eMPW if not configured. */ 5730 continue; 5731 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5732 /* Do not enable inlining if not configured. */ 5733 continue; 5734 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) 5735 /* Do not enable scheduling if not configured. */ 5736 continue; 5737 /* 5738 * Some routine meets the requirements. 5739 * Check whether it has minimal amount 5740 * of not requested offloads. 5741 */ 5742 tmp = __builtin_popcountl(tmp & ~olx); 5743 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5744 /* First or better match, save and continue. */ 5745 m = i; 5746 diff = tmp; 5747 continue; 5748 } 5749 if (tmp == diff) { 5750 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5751 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5752 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5753 /* Lighter not requested offload. */ 5754 m = i; 5755 } 5756 } 5757 } 5758 if (m >= RTE_DIM(txoff_func)) { 5759 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5760 " for requested offloads %04X", 5761 dev->data->port_id, olx); 5762 return NULL; 5763 } 5764 DRV_LOG(DEBUG, "port %u has selected Tx function" 5765 " supporting offloads %04X/%04X", 5766 dev->data->port_id, olx, txoff_func[m].olx); 5767 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5768 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5769 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5770 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5771 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5772 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5773 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5774 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5775 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5776 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5777 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5778 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5779 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5780 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5781 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) 5782 DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); 5783 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5784 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5785 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5786 else 5787 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5788 } 5789 return txoff_func[m].func; 5790 } 5791 5792 /** 5793 * DPDK callback to get the TX queue information 5794 * 5795 * @param dev 5796 * Pointer to the device structure. 5797 * 5798 * @param tx_queue_id 5799 * Tx queue identificator. 5800 * 5801 * @param qinfo 5802 * Pointer to the TX queue information structure. 5803 * 5804 * @return 5805 * None. 5806 */ 5807 5808 void 5809 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 5810 struct rte_eth_txq_info *qinfo) 5811 { 5812 struct mlx5_priv *priv = dev->data->dev_private; 5813 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5814 struct mlx5_txq_ctrl *txq_ctrl = 5815 container_of(txq, struct mlx5_txq_ctrl, txq); 5816 5817 if (!txq) 5818 return; 5819 qinfo->nb_desc = txq->elts_s; 5820 qinfo->conf.tx_thresh.pthresh = 0; 5821 qinfo->conf.tx_thresh.hthresh = 0; 5822 qinfo->conf.tx_thresh.wthresh = 0; 5823 qinfo->conf.tx_rs_thresh = 0; 5824 qinfo->conf.tx_free_thresh = 0; 5825 qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; 5826 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 5827 } 5828 5829 /** 5830 * DPDK callback to get the TX packet burst mode information 5831 * 5832 * @param dev 5833 * Pointer to the device structure. 5834 * 5835 * @param tx_queue_id 5836 * Tx queue identificatior. 5837 * 5838 * @param mode 5839 * Pointer to the burts mode information. 5840 * 5841 * @return 5842 * 0 as success, -EINVAL as failure. 5843 */ 5844 5845 int 5846 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, 5847 uint16_t tx_queue_id, 5848 struct rte_eth_burst_mode *mode) 5849 { 5850 eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; 5851 struct mlx5_priv *priv = dev->data->dev_private; 5852 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5853 unsigned int i, olx; 5854 5855 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5856 if (pkt_burst == txoff_func[i].func) { 5857 olx = txoff_func[i].olx; 5858 snprintf(mode->info, sizeof(mode->info), 5859 "%s%s%s%s%s%s%s%s%s%s", 5860 (olx & MLX5_TXOFF_CONFIG_EMPW) ? 5861 ((olx & MLX5_TXOFF_CONFIG_MPW) ? 5862 "Legacy MPW" : "Enhanced MPW") : "No MPW", 5863 (olx & MLX5_TXOFF_CONFIG_MULTI) ? 5864 " + MULTI" : "", 5865 (olx & MLX5_TXOFF_CONFIG_TSO) ? 5866 " + TSO" : "", 5867 (olx & MLX5_TXOFF_CONFIG_SWP) ? 5868 " + SWP" : "", 5869 (olx & MLX5_TXOFF_CONFIG_CSUM) ? 5870 " + CSUM" : "", 5871 (olx & MLX5_TXOFF_CONFIG_INLINE) ? 5872 " + INLINE" : "", 5873 (olx & MLX5_TXOFF_CONFIG_VLAN) ? 5874 " + VLAN" : "", 5875 (olx & MLX5_TXOFF_CONFIG_METADATA) ? 5876 " + METADATA" : "", 5877 (olx & MLX5_TXOFF_CONFIG_TXPP) ? 5878 " + TXPP" : "", 5879 (txq && txq->fast_free) ? 5880 " + Fast Free" : ""); 5881 return 0; 5882 } 5883 } 5884 return -EINVAL; 5885 } 5886