1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 22 #include "mlx5_autoconf.h" 23 #include "mlx5_defs.h" 24 #include "mlx5.h" 25 #include "mlx5_mr.h" 26 #include "mlx5_utils.h" 27 #include "mlx5_rxtx.h" 28 29 /* TX burst subroutines return codes. */ 30 enum mlx5_txcmp_code { 31 MLX5_TXCMP_CODE_EXIT = 0, 32 MLX5_TXCMP_CODE_ERROR, 33 MLX5_TXCMP_CODE_SINGLE, 34 MLX5_TXCMP_CODE_MULTI, 35 MLX5_TXCMP_CODE_TSO, 36 MLX5_TXCMP_CODE_EMPW, 37 }; 38 39 /* 40 * These defines are used to configure Tx burst routine option set 41 * supported at compile time. The not specified options are optimized out 42 * out due to if conditions can be explicitly calculated at compile time. 43 * The offloads with bigger runtime check (require more CPU cycles to 44 * skip) overhead should have the bigger index - this is needed to 45 * select the better matching routine function if no exact match and 46 * some offloads are not actually requested. 47 */ 48 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 49 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 50 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 51 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 52 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 53 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 54 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 55 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 56 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 57 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 58 59 /* The most common offloads groups. */ 60 #define MLX5_TXOFF_CONFIG_NONE 0 61 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 62 MLX5_TXOFF_CONFIG_TSO | \ 63 MLX5_TXOFF_CONFIG_SWP | \ 64 MLX5_TXOFF_CONFIG_CSUM | \ 65 MLX5_TXOFF_CONFIG_INLINE | \ 66 MLX5_TXOFF_CONFIG_VLAN | \ 67 MLX5_TXOFF_CONFIG_METADATA) 68 69 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 70 71 #define MLX5_TXOFF_DECL(func, olx) \ 72 static uint16_t mlx5_tx_burst_##func(void *txq, \ 73 struct rte_mbuf **pkts, \ 74 uint16_t pkts_n) \ 75 { \ 76 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 77 pkts, pkts_n, (olx)); \ 78 } 79 80 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 81 82 /* static asserts */ 83 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 84 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 85 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 86 (sizeof(uint16_t) + 87 sizeof(rte_v128u32_t)), 88 "invalid Ethernet Segment data size"); 89 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 90 (sizeof(uint16_t) + 91 sizeof(struct rte_vlan_hdr) + 92 2 * RTE_ETHER_ADDR_LEN), 93 "invalid Ethernet Segment data size"); 94 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 95 (sizeof(uint16_t) + 96 sizeof(rte_v128u32_t)), 97 "invalid Ethernet Segment data size"); 98 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 99 (sizeof(uint16_t) + 100 sizeof(struct rte_vlan_hdr) + 101 2 * RTE_ETHER_ADDR_LEN), 102 "invalid Ethernet Segment data size"); 103 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 104 (sizeof(uint16_t) + 105 sizeof(rte_v128u32_t)), 106 "invalid Ethernet Segment data size"); 107 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 108 (sizeof(uint16_t) + 109 sizeof(struct rte_vlan_hdr) + 110 2 * RTE_ETHER_ADDR_LEN), 111 "invalid Ethernet Segment data size"); 112 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 113 (2 * RTE_ETHER_ADDR_LEN), 114 "invalid Data Segment data size"); 115 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 116 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 117 static_assert((sizeof(struct rte_vlan_hdr) + 118 sizeof(struct rte_ether_hdr)) == 119 MLX5_ESEG_MIN_INLINE_SIZE, 120 "invalid min inline data size"); 121 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 122 MLX5_DSEG_MAX, "invalid WQE max size"); 123 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 124 "invalid WQE Control Segment size"); 125 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 126 "invalid WQE Ethernet Segment size"); 127 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 128 "invalid WQE Data Segment size"); 129 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 130 "invalid WQE size"); 131 132 static __rte_always_inline uint32_t 133 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 134 volatile struct mlx5_mini_cqe8 *mcqe); 135 136 static __rte_always_inline int 137 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 138 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 139 140 static __rte_always_inline uint32_t 141 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 142 143 static __rte_always_inline void 144 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 145 volatile struct mlx5_cqe *cqe, 146 volatile struct mlx5_mini_cqe8 *mcqe); 147 148 static int 149 mlx5_queue_state_modify(struct rte_eth_dev *dev, 150 struct mlx5_mp_arg_queue_state_modify *sm); 151 152 static inline void 153 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 154 volatile struct mlx5_cqe *__rte_restrict cqe, 155 uint32_t phcsum, uint8_t l4_type); 156 157 static inline void 158 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 159 volatile struct mlx5_cqe *__rte_restrict cqe, 160 volatile struct mlx5_mini_cqe8 *mcqe, 161 struct mlx5_rxq_data *rxq, uint32_t len); 162 163 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 164 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 165 }; 166 167 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 168 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 169 170 uint64_t rte_net_mlx5_dynf_inline_mask; 171 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 172 173 /** 174 * Build a table to translate Rx completion flags to packet type. 175 * 176 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 177 */ 178 void 179 mlx5_set_ptype_table(void) 180 { 181 unsigned int i; 182 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 183 184 /* Last entry must not be overwritten, reserved for errored packet. */ 185 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 186 (*p)[i] = RTE_PTYPE_UNKNOWN; 187 /* 188 * The index to the array should have: 189 * bit[1:0] = l3_hdr_type 190 * bit[4:2] = l4_hdr_type 191 * bit[5] = ip_frag 192 * bit[6] = tunneled 193 * bit[7] = outer_l3_type 194 */ 195 /* L2 */ 196 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 197 /* L3 */ 198 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 199 RTE_PTYPE_L4_NONFRAG; 200 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 201 RTE_PTYPE_L4_NONFRAG; 202 /* Fragmented */ 203 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 204 RTE_PTYPE_L4_FRAG; 205 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 206 RTE_PTYPE_L4_FRAG; 207 /* TCP */ 208 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 209 RTE_PTYPE_L4_TCP; 210 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 211 RTE_PTYPE_L4_TCP; 212 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 213 RTE_PTYPE_L4_TCP; 214 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 215 RTE_PTYPE_L4_TCP; 216 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 217 RTE_PTYPE_L4_TCP; 218 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 219 RTE_PTYPE_L4_TCP; 220 /* UDP */ 221 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 222 RTE_PTYPE_L4_UDP; 223 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 224 RTE_PTYPE_L4_UDP; 225 /* Repeat with outer_l3_type being set. Just in case. */ 226 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 227 RTE_PTYPE_L4_NONFRAG; 228 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 229 RTE_PTYPE_L4_NONFRAG; 230 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 231 RTE_PTYPE_L4_FRAG; 232 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 233 RTE_PTYPE_L4_FRAG; 234 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 235 RTE_PTYPE_L4_TCP; 236 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 237 RTE_PTYPE_L4_TCP; 238 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 239 RTE_PTYPE_L4_TCP; 240 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 241 RTE_PTYPE_L4_TCP; 242 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 243 RTE_PTYPE_L4_TCP; 244 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 245 RTE_PTYPE_L4_TCP; 246 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 247 RTE_PTYPE_L4_UDP; 248 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 249 RTE_PTYPE_L4_UDP; 250 /* Tunneled - L3 */ 251 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 252 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 253 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 254 RTE_PTYPE_INNER_L4_NONFRAG; 255 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 256 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 257 RTE_PTYPE_INNER_L4_NONFRAG; 258 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 259 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 260 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L4_NONFRAG; 262 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 263 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L4_NONFRAG; 265 /* Tunneled - Fragmented */ 266 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 268 RTE_PTYPE_INNER_L4_FRAG; 269 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 270 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L4_FRAG; 272 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 273 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L4_FRAG; 275 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 276 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L4_FRAG; 278 /* Tunneled - TCP */ 279 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 280 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 281 RTE_PTYPE_INNER_L4_TCP; 282 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 283 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 284 RTE_PTYPE_INNER_L4_TCP; 285 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 286 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 287 RTE_PTYPE_INNER_L4_TCP; 288 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 289 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 290 RTE_PTYPE_INNER_L4_TCP; 291 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 292 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 293 RTE_PTYPE_INNER_L4_TCP; 294 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 295 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 296 RTE_PTYPE_INNER_L4_TCP; 297 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 298 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 299 RTE_PTYPE_INNER_L4_TCP; 300 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 301 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 302 RTE_PTYPE_INNER_L4_TCP; 303 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 304 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 305 RTE_PTYPE_INNER_L4_TCP; 306 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 307 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 308 RTE_PTYPE_INNER_L4_TCP; 309 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 310 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 311 RTE_PTYPE_INNER_L4_TCP; 312 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 313 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 314 RTE_PTYPE_INNER_L4_TCP; 315 /* Tunneled - UDP */ 316 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 317 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 318 RTE_PTYPE_INNER_L4_UDP; 319 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 320 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 321 RTE_PTYPE_INNER_L4_UDP; 322 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 323 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 324 RTE_PTYPE_INNER_L4_UDP; 325 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 326 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 327 RTE_PTYPE_INNER_L4_UDP; 328 } 329 330 /** 331 * Build a table to translate packet to checksum type of Verbs. 332 */ 333 void 334 mlx5_set_cksum_table(void) 335 { 336 unsigned int i; 337 uint8_t v; 338 339 /* 340 * The index should have: 341 * bit[0] = PKT_TX_TCP_SEG 342 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 343 * bit[4] = PKT_TX_IP_CKSUM 344 * bit[8] = PKT_TX_OUTER_IP_CKSUM 345 * bit[9] = tunnel 346 */ 347 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 348 v = 0; 349 if (i & (1 << 9)) { 350 /* Tunneled packet. */ 351 if (i & (1 << 8)) /* Outer IP. */ 352 v |= MLX5_ETH_WQE_L3_CSUM; 353 if (i & (1 << 4)) /* Inner IP. */ 354 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 355 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 356 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 357 } else { 358 /* No tunnel. */ 359 if (i & (1 << 4)) /* IP. */ 360 v |= MLX5_ETH_WQE_L3_CSUM; 361 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 362 v |= MLX5_ETH_WQE_L4_CSUM; 363 } 364 mlx5_cksum_table[i] = v; 365 } 366 } 367 368 /** 369 * Build a table to translate packet type of mbuf to SWP type of Verbs. 370 */ 371 void 372 mlx5_set_swp_types_table(void) 373 { 374 unsigned int i; 375 uint8_t v; 376 377 /* 378 * The index should have: 379 * bit[0:1] = PKT_TX_L4_MASK 380 * bit[4] = PKT_TX_IPV6 381 * bit[8] = PKT_TX_OUTER_IPV6 382 * bit[9] = PKT_TX_OUTER_UDP 383 */ 384 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 385 v = 0; 386 if (i & (1 << 8)) 387 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 388 if (i & (1 << 9)) 389 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 390 if (i & (1 << 4)) 391 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 392 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 393 v |= MLX5_ETH_WQE_L4_INNER_UDP; 394 mlx5_swp_types_table[i] = v; 395 } 396 } 397 398 /** 399 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 400 * Flags must be preliminary initialized to zero. 401 * 402 * @param loc 403 * Pointer to burst routine local context. 404 * @param swp_flags 405 * Pointer to store Software Parser flags 406 * @param olx 407 * Configured Tx offloads mask. It is fully defined at 408 * compile time and may be used for optimization. 409 * 410 * @return 411 * Software Parser offsets packed in dword. 412 * Software Parser flags are set by pointer. 413 */ 414 static __rte_always_inline uint32_t 415 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 416 uint8_t *swp_flags, 417 unsigned int olx) 418 { 419 uint64_t ol, tunnel; 420 unsigned int idx, off; 421 uint32_t set; 422 423 if (!MLX5_TXOFF_CONFIG(SWP)) 424 return 0; 425 ol = loc->mbuf->ol_flags; 426 tunnel = ol & PKT_TX_TUNNEL_MASK; 427 /* 428 * Check whether Software Parser is required. 429 * Only customized tunnels may ask for. 430 */ 431 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 432 return 0; 433 /* 434 * The index should have: 435 * bit[0:1] = PKT_TX_L4_MASK 436 * bit[4] = PKT_TX_IPV6 437 * bit[8] = PKT_TX_OUTER_IPV6 438 * bit[9] = PKT_TX_OUTER_UDP 439 */ 440 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 441 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 442 *swp_flags = mlx5_swp_types_table[idx]; 443 /* 444 * Set offsets for SW parser. Since ConnectX-5, SW parser just 445 * complements HW parser. SW parser starts to engage only if HW parser 446 * can't reach a header. For the older devices, HW parser will not kick 447 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 448 * should be set regardless of HW offload. 449 */ 450 off = loc->mbuf->outer_l2_len; 451 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 452 off += sizeof(struct rte_vlan_hdr); 453 set = (off >> 1) << 8; /* Outer L3 offset. */ 454 off += loc->mbuf->outer_l3_len; 455 if (tunnel == PKT_TX_TUNNEL_UDP) 456 set |= off >> 1; /* Outer L4 offset. */ 457 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 458 const uint64_t csum = ol & PKT_TX_L4_MASK; 459 off += loc->mbuf->l2_len; 460 set |= (off >> 1) << 24; /* Inner L3 offset. */ 461 if (csum == PKT_TX_TCP_CKSUM || 462 csum == PKT_TX_UDP_CKSUM || 463 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 464 off += loc->mbuf->l3_len; 465 set |= (off >> 1) << 16; /* Inner L4 offset. */ 466 } 467 } 468 set = rte_cpu_to_le_32(set); 469 return set; 470 } 471 472 /** 473 * Convert the Checksum offloads to Verbs. 474 * 475 * @param buf 476 * Pointer to the mbuf. 477 * 478 * @return 479 * Converted checksum flags. 480 */ 481 static __rte_always_inline uint8_t 482 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 483 { 484 uint32_t idx; 485 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 486 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 487 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 488 489 /* 490 * The index should have: 491 * bit[0] = PKT_TX_TCP_SEG 492 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 493 * bit[4] = PKT_TX_IP_CKSUM 494 * bit[8] = PKT_TX_OUTER_IP_CKSUM 495 * bit[9] = tunnel 496 */ 497 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 498 return mlx5_cksum_table[idx]; 499 } 500 501 /** 502 * Internal function to compute the number of used descriptors in an RX queue 503 * 504 * @param rxq 505 * The Rx queue. 506 * 507 * @return 508 * The number of used rx descriptor. 509 */ 510 static uint32_t 511 rx_queue_count(struct mlx5_rxq_data *rxq) 512 { 513 struct rxq_zip *zip = &rxq->zip; 514 volatile struct mlx5_cqe *cqe; 515 const unsigned int cqe_n = (1 << rxq->cqe_n); 516 const unsigned int sges_n = (1 << rxq->sges_n); 517 const unsigned int elts_n = (1 << rxq->elts_n); 518 const unsigned int strd_n = (1 << rxq->strd_num_n); 519 const unsigned int cqe_cnt = cqe_n - 1; 520 unsigned int cq_ci, used; 521 522 /* if we are processing a compressed cqe */ 523 if (zip->ai) { 524 used = zip->cqe_cnt - zip->ai; 525 cq_ci = zip->cq_ci; 526 } else { 527 used = 0; 528 cq_ci = rxq->cq_ci; 529 } 530 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 531 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 532 int8_t op_own; 533 unsigned int n; 534 535 op_own = cqe->op_own; 536 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 537 n = rte_be_to_cpu_32(cqe->byte_cnt); 538 else 539 n = 1; 540 cq_ci += n; 541 used += n; 542 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 543 } 544 used = RTE_MIN(used * sges_n, elts_n * strd_n); 545 return used; 546 } 547 548 /** 549 * DPDK callback to check the status of a rx descriptor. 550 * 551 * @param rx_queue 552 * The Rx queue. 553 * @param[in] offset 554 * The index of the descriptor in the ring. 555 * 556 * @return 557 * The status of the tx descriptor. 558 */ 559 int 560 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 561 { 562 struct mlx5_rxq_data *rxq = rx_queue; 563 struct mlx5_rxq_ctrl *rxq_ctrl = 564 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 565 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 566 567 if (dev->rx_pkt_burst == NULL || 568 dev->rx_pkt_burst == removed_rx_burst) { 569 rte_errno = ENOTSUP; 570 return -rte_errno; 571 } 572 if (offset >= (1 << rxq->cqe_n)) { 573 rte_errno = EINVAL; 574 return -rte_errno; 575 } 576 if (offset < rx_queue_count(rxq)) 577 return RTE_ETH_RX_DESC_DONE; 578 return RTE_ETH_RX_DESC_AVAIL; 579 } 580 581 /** 582 * DPDK callback to get the RX queue information 583 * 584 * @param dev 585 * Pointer to the device structure. 586 * 587 * @param rx_queue_id 588 * Rx queue identificator. 589 * 590 * @param qinfo 591 * Pointer to the RX queue information structure. 592 * 593 * @return 594 * None. 595 */ 596 597 void 598 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 599 struct rte_eth_rxq_info *qinfo) 600 { 601 struct mlx5_priv *priv = dev->data->dev_private; 602 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; 603 struct mlx5_rxq_ctrl *rxq_ctrl = 604 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 605 606 if (!rxq) 607 return; 608 qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ? 609 rxq->mprq_mp : rxq->mp; 610 qinfo->conf.rx_thresh.pthresh = 0; 611 qinfo->conf.rx_thresh.hthresh = 0; 612 qinfo->conf.rx_thresh.wthresh = 0; 613 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 614 qinfo->conf.rx_drop_en = 1; 615 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; 616 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 617 qinfo->scattered_rx = dev->data->scattered_rx; 618 qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ? 619 (1 << rxq->elts_n) * (1 << rxq->strd_num_n) : 620 (1 << rxq->elts_n); 621 } 622 623 /** 624 * DPDK callback to get the RX packet burst mode information 625 * 626 * @param dev 627 * Pointer to the device structure. 628 * 629 * @param rx_queue_id 630 * Rx queue identificatior. 631 * 632 * @param mode 633 * Pointer to the burts mode information. 634 * 635 * @return 636 * 0 as success, -EINVAL as failure. 637 */ 638 639 int 640 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 641 uint16_t rx_queue_id __rte_unused, 642 struct rte_eth_burst_mode *mode) 643 { 644 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 645 struct mlx5_priv *priv = dev->data->dev_private; 646 struct mlx5_rxq_data *rxq; 647 648 rxq = (*priv->rxqs)[rx_queue_id]; 649 if (!rxq) { 650 rte_errno = EINVAL; 651 return -rte_errno; 652 } 653 if (pkt_burst == mlx5_rx_burst) { 654 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 655 } else if (pkt_burst == mlx5_rx_burst_mprq) { 656 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 657 } else if (pkt_burst == mlx5_rx_burst_vec) { 658 #if defined RTE_ARCH_X86_64 659 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 660 #elif defined RTE_ARCH_ARM64 661 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 662 #elif defined RTE_ARCH_PPC_64 663 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 664 #else 665 return -EINVAL; 666 #endif 667 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { 668 #if defined RTE_ARCH_X86_64 669 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); 670 #elif defined RTE_ARCH_ARM64 671 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); 672 #elif defined RTE_ARCH_PPC_64 673 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); 674 #else 675 return -EINVAL; 676 #endif 677 } else { 678 return -EINVAL; 679 } 680 return 0; 681 } 682 683 /** 684 * DPDK callback to get the number of used descriptors in a RX queue 685 * 686 * @param dev 687 * Pointer to the device structure. 688 * 689 * @param rx_queue_id 690 * The Rx queue. 691 * 692 * @return 693 * The number of used rx descriptor. 694 * -EINVAL if the queue is invalid 695 */ 696 uint32_t 697 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 698 { 699 struct mlx5_priv *priv = dev->data->dev_private; 700 struct mlx5_rxq_data *rxq; 701 702 if (dev->rx_pkt_burst == NULL || 703 dev->rx_pkt_burst == removed_rx_burst) { 704 rte_errno = ENOTSUP; 705 return -rte_errno; 706 } 707 rxq = (*priv->rxqs)[rx_queue_id]; 708 if (!rxq) { 709 rte_errno = EINVAL; 710 return -rte_errno; 711 } 712 return rx_queue_count(rxq); 713 } 714 715 #define MLX5_SYSTEM_LOG_DIR "/var/log" 716 /** 717 * Dump debug information to log file. 718 * 719 * @param fname 720 * The file name. 721 * @param hex_title 722 * If not NULL this string is printed as a header to the output 723 * and the output will be in hexadecimal view. 724 * @param buf 725 * This is the buffer address to print out. 726 * @param len 727 * The number of bytes to dump out. 728 */ 729 void 730 mlx5_dump_debug_information(const char *fname, const char *hex_title, 731 const void *buf, unsigned int hex_len) 732 { 733 FILE *fd; 734 735 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 736 fd = fopen(path, "a+"); 737 if (!fd) { 738 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 739 MKSTR(path2, "./%s", fname); 740 fd = fopen(path2, "a+"); 741 if (!fd) { 742 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 743 return; 744 } 745 DRV_LOG(INFO, "New debug dump in file %s", path2); 746 } else { 747 DRV_LOG(INFO, "New debug dump in file %s", path); 748 } 749 if (hex_title) 750 rte_hexdump(fd, hex_title, buf, hex_len); 751 else 752 fprintf(fd, "%s", (const char *)buf); 753 fprintf(fd, "\n\n\n"); 754 fclose(fd); 755 } 756 757 /** 758 * Move QP from error state to running state and initialize indexes. 759 * 760 * @param txq_ctrl 761 * Pointer to TX queue control structure. 762 * 763 * @return 764 * 0 on success, else -1. 765 */ 766 static int 767 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 768 { 769 struct mlx5_mp_arg_queue_state_modify sm = { 770 .is_wq = 0, 771 .queue_id = txq_ctrl->txq.idx, 772 }; 773 774 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 775 return -1; 776 txq_ctrl->txq.wqe_ci = 0; 777 txq_ctrl->txq.wqe_pi = 0; 778 txq_ctrl->txq.elts_comp = 0; 779 return 0; 780 } 781 782 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 783 static int 784 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 785 { 786 static const uint8_t magic[] = "seen"; 787 int ret = 1; 788 unsigned int i; 789 790 for (i = 0; i < sizeof(magic); ++i) 791 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 792 ret = 0; 793 err_cqe->rsvd1[i] = magic[i]; 794 } 795 return ret; 796 } 797 798 /** 799 * Handle error CQE. 800 * 801 * @param txq 802 * Pointer to TX queue structure. 803 * @param error_cqe 804 * Pointer to the error CQE. 805 * 806 * @return 807 * Negative value if queue recovery failed, otherwise 808 * the error completion entry is handled successfully. 809 */ 810 static int 811 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, 812 volatile struct mlx5_err_cqe *err_cqe) 813 { 814 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 815 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 816 struct mlx5_txq_ctrl *txq_ctrl = 817 container_of(txq, struct mlx5_txq_ctrl, txq); 818 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 819 int seen = check_err_cqe_seen(err_cqe); 820 821 if (!seen && txq_ctrl->dump_file_n < 822 txq_ctrl->priv->config.max_dump_files_num) { 823 MKSTR(err_str, "Unexpected CQE error syndrome " 824 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 825 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 826 txq->cqe_s, txq->qp_num_8s >> 8, 827 rte_be_to_cpu_16(err_cqe->wqe_counter), 828 txq->wqe_ci, txq->cq_ci); 829 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 830 PORT_ID(txq_ctrl->priv), txq->idx, 831 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 832 mlx5_dump_debug_information(name, NULL, err_str, 0); 833 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 834 (const void *)((uintptr_t) 835 txq->cqes), 836 sizeof(*err_cqe) * 837 (1 << txq->cqe_n)); 838 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 839 (const void *)((uintptr_t) 840 txq->wqes), 841 MLX5_WQE_SIZE * 842 (1 << txq->wqe_n)); 843 txq_ctrl->dump_file_n++; 844 } 845 if (!seen) 846 /* 847 * Count errors in WQEs units. 848 * Later it can be improved to count error packets, 849 * for example, by SQ parsing to find how much packets 850 * should be counted for each WQE. 851 */ 852 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 853 new_wqe_pi) & wqe_m; 854 if (tx_recover_qp(txq_ctrl)) { 855 /* Recovering failed - retry later on the same WQE. */ 856 return -1; 857 } 858 /* Release all the remaining buffers. */ 859 txq_free_elts(txq_ctrl); 860 } 861 return 0; 862 } 863 864 /** 865 * Translate RX completion flags to packet type. 866 * 867 * @param[in] rxq 868 * Pointer to RX queue structure. 869 * @param[in] cqe 870 * Pointer to CQE. 871 * 872 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 873 * 874 * @return 875 * Packet type for struct rte_mbuf. 876 */ 877 static inline uint32_t 878 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 879 volatile struct mlx5_mini_cqe8 *mcqe) 880 { 881 uint8_t idx; 882 uint8_t ptype; 883 uint8_t pinfo = (cqe->pkt_info & 0x3) << 6; 884 885 /* Get l3/l4 header from mini-CQE in case L3/L4 format*/ 886 if (mcqe == NULL || 887 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 888 ptype = (cqe->hdr_type_etc & 0xfc00) >> 10; 889 else 890 ptype = mcqe->hdr_type >> 2; 891 /* 892 * The index to the array should have: 893 * bit[1:0] = l3_hdr_type 894 * bit[4:2] = l4_hdr_type 895 * bit[5] = ip_frag 896 * bit[6] = tunneled 897 * bit[7] = outer_l3_type 898 */ 899 idx = pinfo | ptype; 900 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 901 } 902 903 /** 904 * Initialize Rx WQ and indexes. 905 * 906 * @param[in] rxq 907 * Pointer to RX queue structure. 908 */ 909 void 910 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 911 { 912 const unsigned int wqe_n = 1 << rxq->elts_n; 913 unsigned int i; 914 915 for (i = 0; (i != wqe_n); ++i) { 916 volatile struct mlx5_wqe_data_seg *scat; 917 uintptr_t addr; 918 uint32_t byte_count; 919 920 if (mlx5_rxq_mprq_enabled(rxq)) { 921 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 922 923 scat = &((volatile struct mlx5_wqe_mprq *) 924 rxq->wqes)[i].dseg; 925 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 926 1 << rxq->strd_num_n); 927 byte_count = (1 << rxq->strd_sz_n) * 928 (1 << rxq->strd_num_n); 929 } else { 930 struct rte_mbuf *buf = (*rxq->elts)[i]; 931 932 scat = &((volatile struct mlx5_wqe_data_seg *) 933 rxq->wqes)[i]; 934 addr = rte_pktmbuf_mtod(buf, uintptr_t); 935 byte_count = DATA_LEN(buf); 936 } 937 /* scat->addr must be able to store a pointer. */ 938 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 939 *scat = (struct mlx5_wqe_data_seg){ 940 .addr = rte_cpu_to_be_64(addr), 941 .byte_count = rte_cpu_to_be_32(byte_count), 942 .lkey = mlx5_rx_addr2mr(rxq, addr), 943 }; 944 } 945 rxq->consumed_strd = 0; 946 rxq->decompressed = 0; 947 rxq->rq_pi = 0; 948 rxq->zip = (struct rxq_zip){ 949 .ai = 0, 950 }; 951 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? 952 (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0; 953 /* Update doorbell counter. */ 954 rxq->rq_ci = wqe_n >> rxq->sges_n; 955 rte_io_wmb(); 956 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 957 } 958 959 /** 960 * Modify a Verbs/DevX queue state. 961 * This must be called from the primary process. 962 * 963 * @param dev 964 * Pointer to Ethernet device. 965 * @param sm 966 * State modify request parameters. 967 * 968 * @return 969 * 0 in case of success else non-zero value and rte_errno is set. 970 */ 971 int 972 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 973 const struct mlx5_mp_arg_queue_state_modify *sm) 974 { 975 int ret; 976 struct mlx5_priv *priv = dev->data->dev_private; 977 978 if (sm->is_wq) { 979 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 980 struct mlx5_rxq_ctrl *rxq_ctrl = 981 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 982 983 ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, sm->state); 984 if (ret) { 985 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 986 sm->state, strerror(errno)); 987 rte_errno = errno; 988 return ret; 989 } 990 } else { 991 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 992 struct mlx5_txq_ctrl *txq_ctrl = 993 container_of(txq, struct mlx5_txq_ctrl, txq); 994 995 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 996 MLX5_TXQ_MOD_ERR2RDY, 997 (uint8_t)priv->dev_port); 998 if (ret) 999 return ret; 1000 } 1001 return 0; 1002 } 1003 1004 /** 1005 * Modify a Verbs queue state. 1006 * 1007 * @param dev 1008 * Pointer to Ethernet device. 1009 * @param sm 1010 * State modify request parameters. 1011 * 1012 * @return 1013 * 0 in case of success else non-zero value. 1014 */ 1015 static int 1016 mlx5_queue_state_modify(struct rte_eth_dev *dev, 1017 struct mlx5_mp_arg_queue_state_modify *sm) 1018 { 1019 struct mlx5_priv *priv = dev->data->dev_private; 1020 int ret = 0; 1021 1022 switch (rte_eal_process_type()) { 1023 case RTE_PROC_PRIMARY: 1024 ret = mlx5_queue_state_modify_primary(dev, sm); 1025 break; 1026 case RTE_PROC_SECONDARY: 1027 ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); 1028 break; 1029 default: 1030 break; 1031 } 1032 return ret; 1033 } 1034 1035 /** 1036 * Handle a Rx error. 1037 * The function inserts the RQ state to reset when the first error CQE is 1038 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 1039 * it moves the RQ state to ready and initializes the RQ. 1040 * Next CQE identification and error counting are in the caller responsibility. 1041 * 1042 * @param[in] rxq 1043 * Pointer to RX queue structure. 1044 * @param[in] vec 1045 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 1046 * 0 when called from non-vectorized Rx burst. 1047 * 1048 * @return 1049 * -1 in case of recovery error, otherwise the CQE status. 1050 */ 1051 int 1052 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 1053 { 1054 const uint16_t cqe_n = 1 << rxq->cqe_n; 1055 const uint16_t cqe_mask = cqe_n - 1; 1056 const uint16_t wqe_n = 1 << rxq->elts_n; 1057 const uint16_t strd_n = 1 << rxq->strd_num_n; 1058 struct mlx5_rxq_ctrl *rxq_ctrl = 1059 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 1060 union { 1061 volatile struct mlx5_cqe *cqe; 1062 volatile struct mlx5_err_cqe *err_cqe; 1063 } u = { 1064 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 1065 }; 1066 struct mlx5_mp_arg_queue_state_modify sm; 1067 int ret; 1068 1069 switch (rxq->err_state) { 1070 case MLX5_RXQ_ERR_STATE_NO_ERROR: 1071 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 1072 /* Fall-through */ 1073 case MLX5_RXQ_ERR_STATE_NEED_RESET: 1074 sm.is_wq = 1; 1075 sm.queue_id = rxq->idx; 1076 sm.state = IBV_WQS_RESET; 1077 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 1078 return -1; 1079 if (rxq_ctrl->dump_file_n < 1080 rxq_ctrl->priv->config.max_dump_files_num) { 1081 MKSTR(err_str, "Unexpected CQE error syndrome " 1082 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 1083 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 1084 rxq->cqn, rxq_ctrl->wqn, 1085 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 1086 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 1087 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 1088 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 1089 mlx5_dump_debug_information(name, NULL, err_str, 0); 1090 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 1091 (const void *)((uintptr_t) 1092 rxq->cqes), 1093 sizeof(*u.cqe) * cqe_n); 1094 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 1095 (const void *)((uintptr_t) 1096 rxq->wqes), 1097 16 * wqe_n); 1098 rxq_ctrl->dump_file_n++; 1099 } 1100 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 1101 /* Fall-through */ 1102 case MLX5_RXQ_ERR_STATE_NEED_READY: 1103 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1104 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1105 rte_io_wmb(); 1106 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1107 rte_io_wmb(); 1108 /* 1109 * The RQ consumer index must be zeroed while moving 1110 * from RESET state to RDY state. 1111 */ 1112 *rxq->rq_db = rte_cpu_to_be_32(0); 1113 rte_io_wmb(); 1114 sm.is_wq = 1; 1115 sm.queue_id = rxq->idx; 1116 sm.state = IBV_WQS_RDY; 1117 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1118 &sm)) 1119 return -1; 1120 if (vec) { 1121 const uint32_t elts_n = 1122 mlx5_rxq_mprq_enabled(rxq) ? 1123 wqe_n * strd_n : wqe_n; 1124 const uint32_t e_mask = elts_n - 1; 1125 uint32_t elts_ci = 1126 mlx5_rxq_mprq_enabled(rxq) ? 1127 rxq->elts_ci : rxq->rq_ci; 1128 uint32_t elt_idx; 1129 struct rte_mbuf **elt; 1130 int i; 1131 unsigned int n = elts_n - (elts_ci - 1132 rxq->rq_pi); 1133 1134 for (i = 0; i < (int)n; ++i) { 1135 elt_idx = (elts_ci + i) & e_mask; 1136 elt = &(*rxq->elts)[elt_idx]; 1137 *elt = rte_mbuf_raw_alloc(rxq->mp); 1138 if (!*elt) { 1139 for (i--; i >= 0; --i) { 1140 elt_idx = (elts_ci + 1141 i) & elts_n; 1142 elt = &(*rxq->elts) 1143 [elt_idx]; 1144 rte_pktmbuf_free_seg 1145 (*elt); 1146 } 1147 return -1; 1148 } 1149 } 1150 for (i = 0; i < (int)elts_n; ++i) { 1151 elt = &(*rxq->elts)[i]; 1152 DATA_LEN(*elt) = 1153 (uint16_t)((*elt)->buf_len - 1154 rte_pktmbuf_headroom(*elt)); 1155 } 1156 /* Padding with a fake mbuf for vec Rx. */ 1157 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1158 (*rxq->elts)[elts_n + i] = 1159 &rxq->fake_mbuf; 1160 } 1161 mlx5_rxq_initialize(rxq); 1162 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1163 } 1164 return ret; 1165 default: 1166 return -1; 1167 } 1168 } 1169 1170 /** 1171 * Get size of the next packet for a given CQE. For compressed CQEs, the 1172 * consumer index is updated only once all packets of the current one have 1173 * been processed. 1174 * 1175 * @param rxq 1176 * Pointer to RX queue. 1177 * @param cqe 1178 * CQE to process. 1179 * @param[out] mcqe 1180 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1181 * written. 1182 * 1183 * @return 1184 * 0 in case of empty CQE, otherwise the packet size in bytes. 1185 */ 1186 static inline int 1187 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1188 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1189 { 1190 struct rxq_zip *zip = &rxq->zip; 1191 uint16_t cqe_n = cqe_cnt + 1; 1192 int len; 1193 uint16_t idx, end; 1194 1195 do { 1196 len = 0; 1197 /* Process compressed data in the CQE and mini arrays. */ 1198 if (zip->ai) { 1199 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1200 (volatile struct mlx5_mini_cqe8 (*)[8]) 1201 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1202 cqe_cnt].pkt_info); 1203 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & 1204 rxq->byte_mask); 1205 *mcqe = &(*mc)[zip->ai & 7]; 1206 if ((++zip->ai & 7) == 0) { 1207 /* Invalidate consumed CQEs */ 1208 idx = zip->ca; 1209 end = zip->na; 1210 while (idx != end) { 1211 (*rxq->cqes)[idx & cqe_cnt].op_own = 1212 MLX5_CQE_INVALIDATE; 1213 ++idx; 1214 } 1215 /* 1216 * Increment consumer index to skip the number 1217 * of CQEs consumed. Hardware leaves holes in 1218 * the CQ ring for software use. 1219 */ 1220 zip->ca = zip->na; 1221 zip->na += 8; 1222 } 1223 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1224 /* Invalidate the rest */ 1225 idx = zip->ca; 1226 end = zip->cq_ci; 1227 1228 while (idx != end) { 1229 (*rxq->cqes)[idx & cqe_cnt].op_own = 1230 MLX5_CQE_INVALIDATE; 1231 ++idx; 1232 } 1233 rxq->cq_ci = zip->cq_ci; 1234 zip->ai = 0; 1235 } 1236 /* 1237 * No compressed data, get next CQE and verify if it is 1238 * compressed. 1239 */ 1240 } else { 1241 int ret; 1242 int8_t op_own; 1243 uint32_t cq_ci; 1244 1245 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1246 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1247 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1248 rxq->err_state)) { 1249 ret = mlx5_rx_err_handle(rxq, 0); 1250 if (ret == MLX5_CQE_STATUS_HW_OWN || 1251 ret == -1) 1252 return 0; 1253 } else { 1254 return 0; 1255 } 1256 } 1257 /* 1258 * Introduce the local variable to have queue cq_ci 1259 * index in queue structure always consistent with 1260 * actual CQE boundary (not pointing to the middle 1261 * of compressed CQE session). 1262 */ 1263 cq_ci = rxq->cq_ci + 1; 1264 op_own = cqe->op_own; 1265 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1266 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1267 (volatile struct mlx5_mini_cqe8 (*)[8]) 1268 (uintptr_t)(&(*rxq->cqes) 1269 [cq_ci & cqe_cnt].pkt_info); 1270 1271 /* Fix endianness. */ 1272 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1273 /* 1274 * Current mini array position is the one 1275 * returned by check_cqe64(). 1276 * 1277 * If completion comprises several mini arrays, 1278 * as a special case the second one is located 1279 * 7 CQEs after the initial CQE instead of 8 1280 * for subsequent ones. 1281 */ 1282 zip->ca = cq_ci; 1283 zip->na = zip->ca + 7; 1284 /* Compute the next non compressed CQE. */ 1285 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1286 /* Get packet size to return. */ 1287 len = rte_be_to_cpu_32((*mc)[0].byte_cnt & 1288 rxq->byte_mask); 1289 *mcqe = &(*mc)[0]; 1290 zip->ai = 1; 1291 /* Prefetch all to be invalidated */ 1292 idx = zip->ca; 1293 end = zip->cq_ci; 1294 while (idx != end) { 1295 rte_prefetch0(&(*rxq->cqes)[(idx) & 1296 cqe_cnt]); 1297 ++idx; 1298 } 1299 } else { 1300 rxq->cq_ci = cq_ci; 1301 len = rte_be_to_cpu_32(cqe->byte_cnt); 1302 } 1303 } 1304 if (unlikely(rxq->err_state)) { 1305 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1306 ++rxq->stats.idropped; 1307 } else { 1308 return len; 1309 } 1310 } while (1); 1311 } 1312 1313 /** 1314 * Translate RX completion flags to offload flags. 1315 * 1316 * @param[in] cqe 1317 * Pointer to CQE. 1318 * 1319 * @return 1320 * Offload flags (ol_flags) for struct rte_mbuf. 1321 */ 1322 static inline uint32_t 1323 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1324 { 1325 uint32_t ol_flags = 0; 1326 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1327 1328 ol_flags = 1329 TRANSPOSE(flags, 1330 MLX5_CQE_RX_L3_HDR_VALID, 1331 PKT_RX_IP_CKSUM_GOOD) | 1332 TRANSPOSE(flags, 1333 MLX5_CQE_RX_L4_HDR_VALID, 1334 PKT_RX_L4_CKSUM_GOOD); 1335 return ol_flags; 1336 } 1337 1338 /** 1339 * Fill in mbuf fields from RX completion flags. 1340 * Note that pkt->ol_flags should be initialized outside of this function. 1341 * 1342 * @param rxq 1343 * Pointer to RX queue. 1344 * @param pkt 1345 * mbuf to fill. 1346 * @param cqe 1347 * CQE to process. 1348 * @param rss_hash_res 1349 * Packet RSS Hash result. 1350 */ 1351 static inline void 1352 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1353 volatile struct mlx5_cqe *cqe, 1354 volatile struct mlx5_mini_cqe8 *mcqe) 1355 { 1356 /* Update packet information. */ 1357 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe); 1358 1359 if (rxq->rss_hash) { 1360 uint32_t rss_hash_res = 0; 1361 1362 /* If compressed, take hash result from mini-CQE. */ 1363 if (mcqe == NULL || 1364 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH) 1365 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1366 else 1367 rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result); 1368 if (rss_hash_res) { 1369 pkt->hash.rss = rss_hash_res; 1370 pkt->ol_flags |= PKT_RX_RSS_HASH; 1371 } 1372 } 1373 if (rxq->mark) { 1374 uint32_t mark = 0; 1375 1376 /* If compressed, take flow tag from mini-CQE. */ 1377 if (mcqe == NULL || 1378 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1379 mark = cqe->sop_drop_qpn; 1380 else 1381 mark = ((mcqe->byte_cnt_flow & 0xff) << 8) | 1382 (mcqe->flow_tag_high << 16); 1383 if (MLX5_FLOW_MARK_IS_VALID(mark)) { 1384 pkt->ol_flags |= PKT_RX_FDIR; 1385 if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { 1386 pkt->ol_flags |= PKT_RX_FDIR_ID; 1387 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1388 } 1389 } 1390 } 1391 if (rxq->dynf_meta) { 1392 uint32_t meta = cqe->flow_table_metadata & 1393 rxq->flow_meta_port_mask; 1394 1395 if (meta) { 1396 pkt->ol_flags |= rxq->flow_meta_mask; 1397 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, 1398 uint32_t *) = meta; 1399 } 1400 } 1401 if (rxq->csum) 1402 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1403 if (rxq->vlan_strip) { 1404 bool vlan_strip; 1405 1406 if (mcqe == NULL || 1407 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1408 vlan_strip = cqe->hdr_type_etc & 1409 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1410 else 1411 vlan_strip = mcqe->hdr_type & 1412 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1413 if (vlan_strip) { 1414 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1415 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1416 } 1417 } 1418 if (rxq->hw_timestamp) { 1419 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 1420 1421 if (rxq->rt_timestamp) 1422 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 1423 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 1424 pkt->ol_flags |= rxq->timestamp_rx_flag; 1425 } 1426 } 1427 1428 /** 1429 * DPDK callback for RX. 1430 * 1431 * @param dpdk_rxq 1432 * Generic pointer to RX queue structure. 1433 * @param[out] pkts 1434 * Array to store received packets. 1435 * @param pkts_n 1436 * Maximum number of packets in array. 1437 * 1438 * @return 1439 * Number of packets successfully received (<= pkts_n). 1440 */ 1441 uint16_t 1442 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1443 { 1444 struct mlx5_rxq_data *rxq = dpdk_rxq; 1445 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1446 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1447 const unsigned int sges_n = rxq->sges_n; 1448 struct rte_mbuf *pkt = NULL; 1449 struct rte_mbuf *seg = NULL; 1450 volatile struct mlx5_cqe *cqe = 1451 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1452 unsigned int i = 0; 1453 unsigned int rq_ci = rxq->rq_ci << sges_n; 1454 int len = 0; /* keep its value across iterations. */ 1455 1456 while (pkts_n) { 1457 unsigned int idx = rq_ci & wqe_cnt; 1458 volatile struct mlx5_wqe_data_seg *wqe = 1459 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1460 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1461 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1462 1463 if (pkt) 1464 NEXT(seg) = rep; 1465 seg = rep; 1466 rte_prefetch0(seg); 1467 rte_prefetch0(cqe); 1468 rte_prefetch0(wqe); 1469 /* Allocate the buf from the same pool. */ 1470 rep = rte_mbuf_raw_alloc(seg->pool); 1471 if (unlikely(rep == NULL)) { 1472 ++rxq->stats.rx_nombuf; 1473 if (!pkt) { 1474 /* 1475 * no buffers before we even started, 1476 * bail out silently. 1477 */ 1478 break; 1479 } 1480 while (pkt != seg) { 1481 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 1482 rep = NEXT(pkt); 1483 NEXT(pkt) = NULL; 1484 NB_SEGS(pkt) = 1; 1485 rte_mbuf_raw_free(pkt); 1486 pkt = rep; 1487 } 1488 rq_ci >>= sges_n; 1489 ++rq_ci; 1490 rq_ci <<= sges_n; 1491 break; 1492 } 1493 if (!pkt) { 1494 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1495 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1496 if (!len) { 1497 rte_mbuf_raw_free(rep); 1498 break; 1499 } 1500 pkt = seg; 1501 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 1502 pkt->ol_flags &= EXT_ATTACHED_MBUF; 1503 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1504 if (rxq->crc_present) 1505 len -= RTE_ETHER_CRC_LEN; 1506 PKT_LEN(pkt) = len; 1507 if (cqe->lro_num_seg > 1) { 1508 mlx5_lro_update_hdr 1509 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1510 mcqe, rxq, len); 1511 pkt->ol_flags |= PKT_RX_LRO; 1512 pkt->tso_segsz = len / cqe->lro_num_seg; 1513 } 1514 } 1515 DATA_LEN(rep) = DATA_LEN(seg); 1516 PKT_LEN(rep) = PKT_LEN(seg); 1517 SET_DATA_OFF(rep, DATA_OFF(seg)); 1518 PORT(rep) = PORT(seg); 1519 (*rxq->elts)[idx] = rep; 1520 /* 1521 * Fill NIC descriptor with the new buffer. The lkey and size 1522 * of the buffers are already known, only the buffer address 1523 * changes. 1524 */ 1525 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1526 /* If there's only one MR, no need to replace LKey in WQE. */ 1527 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1528 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1529 if (len > DATA_LEN(seg)) { 1530 len -= DATA_LEN(seg); 1531 ++NB_SEGS(pkt); 1532 ++rq_ci; 1533 continue; 1534 } 1535 DATA_LEN(seg) = len; 1536 #ifdef MLX5_PMD_SOFT_COUNTERS 1537 /* Increment bytes counter. */ 1538 rxq->stats.ibytes += PKT_LEN(pkt); 1539 #endif 1540 /* Return packet. */ 1541 *(pkts++) = pkt; 1542 pkt = NULL; 1543 --pkts_n; 1544 ++i; 1545 /* Align consumer index to the next stride. */ 1546 rq_ci >>= sges_n; 1547 ++rq_ci; 1548 rq_ci <<= sges_n; 1549 } 1550 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1551 return 0; 1552 /* Update the consumer index. */ 1553 rxq->rq_ci = rq_ci >> sges_n; 1554 rte_io_wmb(); 1555 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1556 rte_io_wmb(); 1557 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1558 #ifdef MLX5_PMD_SOFT_COUNTERS 1559 /* Increment packets counter. */ 1560 rxq->stats.ipackets += i; 1561 #endif 1562 return i; 1563 } 1564 1565 /** 1566 * Update LRO packet TCP header. 1567 * The HW LRO feature doesn't update the TCP header after coalescing the 1568 * TCP segments but supplies information in CQE to fill it by SW. 1569 * 1570 * @param tcp 1571 * Pointer to the TCP header. 1572 * @param cqe 1573 * Pointer to the completion entry.. 1574 * @param phcsum 1575 * The L3 pseudo-header checksum. 1576 */ 1577 static inline void 1578 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1579 volatile struct mlx5_cqe *__rte_restrict cqe, 1580 uint32_t phcsum, uint8_t l4_type) 1581 { 1582 /* 1583 * The HW calculates only the TCP payload checksum, need to complete 1584 * the TCP header checksum and the L3 pseudo-header checksum. 1585 */ 1586 uint32_t csum = phcsum + cqe->csum; 1587 1588 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1589 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1590 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1591 tcp->recv_ack = cqe->lro_ack_seq_num; 1592 tcp->rx_win = cqe->lro_tcp_win; 1593 } 1594 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1595 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1596 tcp->cksum = 0; 1597 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1598 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1599 csum = (~csum) & 0xffff; 1600 if (csum == 0) 1601 csum = 0xffff; 1602 tcp->cksum = csum; 1603 } 1604 1605 /** 1606 * Update LRO packet headers. 1607 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1608 * TCP segments but supply information in CQE to fill it by SW. 1609 * 1610 * @param padd 1611 * The packet address. 1612 * @param cqe 1613 * Pointer to the completion entry.. 1614 * @param len 1615 * The packet length. 1616 */ 1617 static inline void 1618 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1619 volatile struct mlx5_cqe *__rte_restrict cqe, 1620 volatile struct mlx5_mini_cqe8 *mcqe, 1621 struct mlx5_rxq_data *rxq, uint32_t len) 1622 { 1623 union { 1624 struct rte_ether_hdr *eth; 1625 struct rte_vlan_hdr *vlan; 1626 struct rte_ipv4_hdr *ipv4; 1627 struct rte_ipv6_hdr *ipv6; 1628 struct rte_tcp_hdr *tcp; 1629 uint8_t *hdr; 1630 } h = { 1631 .hdr = padd, 1632 }; 1633 uint16_t proto = h.eth->ether_type; 1634 uint32_t phcsum; 1635 uint8_t l4_type; 1636 1637 h.eth++; 1638 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1639 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1640 proto = h.vlan->eth_proto; 1641 h.vlan++; 1642 } 1643 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1644 h.ipv4->time_to_live = cqe->lro_min_ttl; 1645 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1646 h.ipv4->hdr_checksum = 0; 1647 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1648 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1649 h.ipv4++; 1650 } else { 1651 h.ipv6->hop_limits = cqe->lro_min_ttl; 1652 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1653 sizeof(*h.ipv6)); 1654 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1655 h.ipv6++; 1656 } 1657 if (mcqe == NULL || 1658 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1659 l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1660 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1661 else 1662 l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) & 1663 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1664 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type); 1665 } 1666 1667 void 1668 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1669 { 1670 struct mlx5_mprq_buf *buf = opaque; 1671 1672 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 1673 rte_mempool_put(buf->mp, buf); 1674 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 1675 __ATOMIC_RELAXED) == 0)) { 1676 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1677 rte_mempool_put(buf->mp, buf); 1678 } 1679 } 1680 1681 void 1682 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1683 { 1684 mlx5_mprq_buf_free_cb(NULL, buf); 1685 } 1686 1687 /** 1688 * DPDK callback for RX with Multi-Packet RQ support. 1689 * 1690 * @param dpdk_rxq 1691 * Generic pointer to RX queue structure. 1692 * @param[out] pkts 1693 * Array to store received packets. 1694 * @param pkts_n 1695 * Maximum number of packets in array. 1696 * 1697 * @return 1698 * Number of packets successfully received (<= pkts_n). 1699 */ 1700 uint16_t 1701 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1702 { 1703 struct mlx5_rxq_data *rxq = dpdk_rxq; 1704 const uint32_t strd_n = 1 << rxq->strd_num_n; 1705 const uint32_t strd_sz = 1 << rxq->strd_sz_n; 1706 const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; 1707 const uint32_t wq_mask = (1 << rxq->elts_n) - 1; 1708 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1709 unsigned int i = 0; 1710 uint32_t rq_ci = rxq->rq_ci; 1711 uint16_t consumed_strd = rxq->consumed_strd; 1712 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1713 1714 while (i < pkts_n) { 1715 struct rte_mbuf *pkt; 1716 int ret; 1717 uint32_t len; 1718 uint16_t strd_cnt; 1719 uint16_t strd_idx; 1720 uint32_t byte_cnt; 1721 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1722 enum mlx5_rqx_code rxq_code; 1723 1724 if (consumed_strd == strd_n) { 1725 /* Replace WQE if the buffer is still in use. */ 1726 mprq_buf_replace(rxq, rq_ci & wq_mask); 1727 /* Advance to the next WQE. */ 1728 consumed_strd = 0; 1729 ++rq_ci; 1730 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1731 } 1732 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1733 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1734 if (!ret) 1735 break; 1736 byte_cnt = ret; 1737 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1738 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1739 if (rxq->crc_present) 1740 len -= RTE_ETHER_CRC_LEN; 1741 if (mcqe && 1742 rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1743 strd_cnt = (len / strd_sz) + !!(len % strd_sz); 1744 else 1745 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1746 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1747 MLX5_ASSERT(strd_cnt); 1748 consumed_strd += strd_cnt; 1749 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1750 continue; 1751 strd_idx = rte_be_to_cpu_16(mcqe == NULL ? 1752 cqe->wqe_counter : 1753 mcqe->stride_idx); 1754 MLX5_ASSERT(strd_idx < strd_n); 1755 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1756 wq_mask)); 1757 pkt = rte_pktmbuf_alloc(rxq->mp); 1758 if (unlikely(pkt == NULL)) { 1759 ++rxq->stats.rx_nombuf; 1760 break; 1761 } 1762 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1763 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1764 if (rxq->crc_present) 1765 len -= RTE_ETHER_CRC_LEN; 1766 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, 1767 strd_idx, strd_cnt); 1768 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 1769 rte_pktmbuf_free_seg(pkt); 1770 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 1771 ++rxq->stats.idropped; 1772 continue; 1773 } 1774 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 1775 ++rxq->stats.rx_nombuf; 1776 break; 1777 } 1778 } 1779 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1780 if (cqe->lro_num_seg > 1) { 1781 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), 1782 cqe, mcqe, rxq, len); 1783 pkt->ol_flags |= PKT_RX_LRO; 1784 pkt->tso_segsz = len / cqe->lro_num_seg; 1785 } 1786 PKT_LEN(pkt) = len; 1787 PORT(pkt) = rxq->port_id; 1788 #ifdef MLX5_PMD_SOFT_COUNTERS 1789 /* Increment bytes counter. */ 1790 rxq->stats.ibytes += PKT_LEN(pkt); 1791 #endif 1792 /* Return packet. */ 1793 *(pkts++) = pkt; 1794 ++i; 1795 } 1796 /* Update the consumer indexes. */ 1797 rxq->consumed_strd = consumed_strd; 1798 rte_io_wmb(); 1799 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1800 if (rq_ci != rxq->rq_ci) { 1801 rxq->rq_ci = rq_ci; 1802 rte_io_wmb(); 1803 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1804 } 1805 #ifdef MLX5_PMD_SOFT_COUNTERS 1806 /* Increment packets counter. */ 1807 rxq->stats.ipackets += i; 1808 #endif 1809 return i; 1810 } 1811 1812 /** 1813 * Dummy DPDK callback for TX. 1814 * 1815 * This function is used to temporarily replace the real callback during 1816 * unsafe control operations on the queue, or in case of error. 1817 * 1818 * @param dpdk_txq 1819 * Generic pointer to TX queue structure. 1820 * @param[in] pkts 1821 * Packets to transmit. 1822 * @param pkts_n 1823 * Number of packets in array. 1824 * 1825 * @return 1826 * Number of packets successfully transmitted (<= pkts_n). 1827 */ 1828 uint16_t 1829 removed_tx_burst(void *dpdk_txq __rte_unused, 1830 struct rte_mbuf **pkts __rte_unused, 1831 uint16_t pkts_n __rte_unused) 1832 { 1833 rte_mb(); 1834 return 0; 1835 } 1836 1837 /** 1838 * Dummy DPDK callback for RX. 1839 * 1840 * This function is used to temporarily replace the real callback during 1841 * unsafe control operations on the queue, or in case of error. 1842 * 1843 * @param dpdk_rxq 1844 * Generic pointer to RX queue structure. 1845 * @param[out] pkts 1846 * Array to store received packets. 1847 * @param pkts_n 1848 * Maximum number of packets in array. 1849 * 1850 * @return 1851 * Number of packets successfully received (<= pkts_n). 1852 */ 1853 uint16_t 1854 removed_rx_burst(void *dpdk_txq __rte_unused, 1855 struct rte_mbuf **pkts __rte_unused, 1856 uint16_t pkts_n __rte_unused) 1857 { 1858 rte_mb(); 1859 return 0; 1860 } 1861 1862 /* 1863 * Vectorized Rx/Tx routines are not compiled in when required vector 1864 * instructions are not supported on a target architecture. The following null 1865 * stubs are needed for linkage when those are not included outside of this file 1866 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1867 */ 1868 1869 __rte_weak uint16_t 1870 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1871 struct rte_mbuf **pkts __rte_unused, 1872 uint16_t pkts_n __rte_unused) 1873 { 1874 return 0; 1875 } 1876 1877 __rte_weak uint16_t 1878 mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused, 1879 struct rte_mbuf **pkts __rte_unused, 1880 uint16_t pkts_n __rte_unused) 1881 { 1882 return 0; 1883 } 1884 1885 __rte_weak int 1886 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1887 { 1888 return -ENOTSUP; 1889 } 1890 1891 __rte_weak int 1892 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1893 { 1894 return -ENOTSUP; 1895 } 1896 1897 /** 1898 * Free the mbufs from the linear array of pointers. 1899 * 1900 * @param txq 1901 * Pointer to Tx queue structure. 1902 * @param pkts 1903 * Pointer to array of packets to be free. 1904 * @param pkts_n 1905 * Number of packets to be freed. 1906 * @param olx 1907 * Configured Tx offloads mask. It is fully defined at 1908 * compile time and may be used for optimization. 1909 */ 1910 static __rte_always_inline void 1911 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 1912 struct rte_mbuf **__rte_restrict pkts, 1913 unsigned int pkts_n, 1914 unsigned int olx __rte_unused) 1915 { 1916 struct rte_mempool *pool = NULL; 1917 struct rte_mbuf **p_free = NULL; 1918 struct rte_mbuf *mbuf; 1919 unsigned int n_free = 0; 1920 1921 /* 1922 * The implemented algorithm eliminates 1923 * copying pointers to temporary array 1924 * for rte_mempool_put_bulk() calls. 1925 */ 1926 MLX5_ASSERT(pkts); 1927 MLX5_ASSERT(pkts_n); 1928 /* 1929 * Free mbufs directly to the pool in bulk 1930 * if fast free offload is engaged 1931 */ 1932 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 1933 mbuf = *pkts; 1934 pool = mbuf->pool; 1935 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 1936 return; 1937 } 1938 for (;;) { 1939 for (;;) { 1940 /* 1941 * Decrement mbuf reference counter, detach 1942 * indirect and external buffers if needed. 1943 */ 1944 mbuf = rte_pktmbuf_prefree_seg(*pkts); 1945 if (likely(mbuf != NULL)) { 1946 MLX5_ASSERT(mbuf == *pkts); 1947 if (likely(n_free != 0)) { 1948 if (unlikely(pool != mbuf->pool)) 1949 /* From different pool. */ 1950 break; 1951 } else { 1952 /* Start new scan array. */ 1953 pool = mbuf->pool; 1954 p_free = pkts; 1955 } 1956 ++n_free; 1957 ++pkts; 1958 --pkts_n; 1959 if (unlikely(pkts_n == 0)) { 1960 mbuf = NULL; 1961 break; 1962 } 1963 } else { 1964 /* 1965 * This happens if mbuf is still referenced. 1966 * We can't put it back to the pool, skip. 1967 */ 1968 ++pkts; 1969 --pkts_n; 1970 if (unlikely(n_free != 0)) 1971 /* There is some array to free.*/ 1972 break; 1973 if (unlikely(pkts_n == 0)) 1974 /* Last mbuf, nothing to free. */ 1975 return; 1976 } 1977 } 1978 for (;;) { 1979 /* 1980 * This loop is implemented to avoid multiple 1981 * inlining of rte_mempool_put_bulk(). 1982 */ 1983 MLX5_ASSERT(pool); 1984 MLX5_ASSERT(p_free); 1985 MLX5_ASSERT(n_free); 1986 /* 1987 * Free the array of pre-freed mbufs 1988 * belonging to the same memory pool. 1989 */ 1990 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 1991 if (unlikely(mbuf != NULL)) { 1992 /* There is the request to start new scan. */ 1993 pool = mbuf->pool; 1994 p_free = pkts++; 1995 n_free = 1; 1996 --pkts_n; 1997 if (likely(pkts_n != 0)) 1998 break; 1999 /* 2000 * This is the last mbuf to be freed. 2001 * Do one more loop iteration to complete. 2002 * This is rare case of the last unique mbuf. 2003 */ 2004 mbuf = NULL; 2005 continue; 2006 } 2007 if (likely(pkts_n == 0)) 2008 return; 2009 n_free = 0; 2010 break; 2011 } 2012 } 2013 } 2014 /* 2015 * No inline version to free buffers for optimal call 2016 * on the tx_burst completion. 2017 */ 2018 static __rte_noinline void 2019 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 2020 struct rte_mbuf **__rte_restrict pkts, 2021 unsigned int pkts_n, 2022 unsigned int olx __rte_unused) 2023 { 2024 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 2025 } 2026 2027 /** 2028 * Free the mbuf from the elts ring buffer till new tail. 2029 * 2030 * @param txq 2031 * Pointer to Tx queue structure. 2032 * @param tail 2033 * Index in elts to free up to, becomes new elts tail. 2034 * @param olx 2035 * Configured Tx offloads mask. It is fully defined at 2036 * compile time and may be used for optimization. 2037 */ 2038 static __rte_always_inline void 2039 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 2040 uint16_t tail, 2041 unsigned int olx __rte_unused) 2042 { 2043 uint16_t n_elts = tail - txq->elts_tail; 2044 2045 MLX5_ASSERT(n_elts); 2046 MLX5_ASSERT(n_elts <= txq->elts_s); 2047 /* 2048 * Implement a loop to support ring buffer wraparound 2049 * with single inlining of mlx5_tx_free_mbuf(). 2050 */ 2051 do { 2052 unsigned int part; 2053 2054 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 2055 part = RTE_MIN(part, n_elts); 2056 MLX5_ASSERT(part); 2057 MLX5_ASSERT(part <= txq->elts_s); 2058 mlx5_tx_free_mbuf(txq, 2059 &txq->elts[txq->elts_tail & txq->elts_m], 2060 part, olx); 2061 txq->elts_tail += part; 2062 n_elts -= part; 2063 } while (n_elts); 2064 } 2065 2066 /** 2067 * Store the mbuf being sent into elts ring buffer. 2068 * On Tx completion these mbufs will be freed. 2069 * 2070 * @param txq 2071 * Pointer to Tx queue structure. 2072 * @param pkts 2073 * Pointer to array of packets to be stored. 2074 * @param pkts_n 2075 * Number of packets to be stored. 2076 * @param olx 2077 * Configured Tx offloads mask. It is fully defined at 2078 * compile time and may be used for optimization. 2079 */ 2080 static __rte_always_inline void 2081 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 2082 struct rte_mbuf **__rte_restrict pkts, 2083 unsigned int pkts_n, 2084 unsigned int olx __rte_unused) 2085 { 2086 unsigned int part; 2087 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 2088 2089 MLX5_ASSERT(pkts); 2090 MLX5_ASSERT(pkts_n); 2091 part = txq->elts_s - (txq->elts_head & txq->elts_m); 2092 MLX5_ASSERT(part); 2093 MLX5_ASSERT(part <= txq->elts_s); 2094 /* This code is a good candidate for vectorizing with SIMD. */ 2095 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 2096 (void *)pkts, 2097 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 2098 txq->elts_head += pkts_n; 2099 if (unlikely(part < pkts_n)) 2100 /* The copy is wrapping around the elts array. */ 2101 rte_memcpy((void *)elts, (void *)(pkts + part), 2102 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2103 } 2104 2105 /** 2106 * Update completion queue consuming index via doorbell 2107 * and flush the completed data buffers. 2108 * 2109 * @param txq 2110 * Pointer to TX queue structure. 2111 * @param valid CQE pointer 2112 * if not NULL update txq->wqe_pi and flush the buffers 2113 * @param olx 2114 * Configured Tx offloads mask. It is fully defined at 2115 * compile time and may be used for optimization. 2116 */ 2117 static __rte_always_inline void 2118 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, 2119 volatile struct mlx5_cqe *last_cqe, 2120 unsigned int olx __rte_unused) 2121 { 2122 if (likely(last_cqe != NULL)) { 2123 uint16_t tail; 2124 2125 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 2126 tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; 2127 if (likely(tail != txq->elts_tail)) { 2128 mlx5_tx_free_elts(txq, tail, olx); 2129 MLX5_ASSERT(tail == txq->elts_tail); 2130 } 2131 } 2132 } 2133 2134 /** 2135 * Manage TX completions. This routine checks the CQ for 2136 * arrived CQEs, deduces the last accomplished WQE in SQ, 2137 * updates SQ producing index and frees all completed mbufs. 2138 * 2139 * @param txq 2140 * Pointer to TX queue structure. 2141 * @param olx 2142 * Configured Tx offloads mask. It is fully defined at 2143 * compile time and may be used for optimization. 2144 * 2145 * NOTE: not inlined intentionally, it makes tx_burst 2146 * routine smaller, simple and faster - from experiments. 2147 */ 2148 static void 2149 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 2150 unsigned int olx __rte_unused) 2151 { 2152 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2153 volatile struct mlx5_cqe *last_cqe = NULL; 2154 bool ring_doorbell = false; 2155 int ret; 2156 2157 do { 2158 volatile struct mlx5_cqe *cqe; 2159 2160 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2161 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2162 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2163 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2164 /* No new CQEs in completion queue. */ 2165 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 2166 break; 2167 } 2168 /* 2169 * Some error occurred, try to restart. 2170 * We have no barrier after WQE related Doorbell 2171 * written, make sure all writes are completed 2172 * here, before we might perform SQ reset. 2173 */ 2174 rte_wmb(); 2175 ret = mlx5_tx_error_cqe_handle 2176 (txq, (volatile struct mlx5_err_cqe *)cqe); 2177 if (unlikely(ret < 0)) { 2178 /* 2179 * Some error occurred on queue error 2180 * handling, we do not advance the index 2181 * here, allowing to retry on next call. 2182 */ 2183 return; 2184 } 2185 /* 2186 * We are going to fetch all entries with 2187 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. 2188 * The send queue is supposed to be empty. 2189 */ 2190 ring_doorbell = true; 2191 ++txq->cq_ci; 2192 txq->cq_pi = txq->cq_ci; 2193 last_cqe = NULL; 2194 continue; 2195 } 2196 /* Normal transmit completion. */ 2197 MLX5_ASSERT(txq->cq_ci != txq->cq_pi); 2198 #ifdef RTE_LIBRTE_MLX5_DEBUG 2199 MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == 2200 cqe->wqe_counter); 2201 #endif 2202 ring_doorbell = true; 2203 ++txq->cq_ci; 2204 last_cqe = cqe; 2205 /* 2206 * We have to restrict the amount of processed CQEs 2207 * in one tx_burst routine call. The CQ may be large 2208 * and many CQEs may be updated by the NIC in one 2209 * transaction. Buffers freeing is time consuming, 2210 * multiple iterations may introduce significant 2211 * latency. 2212 */ 2213 if (likely(--count == 0)) 2214 break; 2215 } while (true); 2216 if (likely(ring_doorbell)) { 2217 /* Ring doorbell to notify hardware. */ 2218 rte_compiler_barrier(); 2219 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2220 mlx5_tx_comp_flush(txq, last_cqe, olx); 2221 } 2222 } 2223 2224 /** 2225 * Check if the completion request flag should be set in the last WQE. 2226 * Both pushed mbufs and WQEs are monitored and the completion request 2227 * flag is set if any of thresholds is reached. 2228 * 2229 * @param txq 2230 * Pointer to TX queue structure. 2231 * @param loc 2232 * Pointer to burst routine local context. 2233 * @param olx 2234 * Configured Tx offloads mask. It is fully defined at 2235 * compile time and may be used for optimization. 2236 */ 2237 static __rte_always_inline void 2238 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 2239 struct mlx5_txq_local *__rte_restrict loc, 2240 unsigned int olx) 2241 { 2242 uint16_t head = txq->elts_head; 2243 unsigned int part; 2244 2245 part = MLX5_TXOFF_CONFIG(INLINE) ? 2246 0 : loc->pkts_sent - loc->pkts_copy; 2247 head += part; 2248 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2249 (MLX5_TXOFF_CONFIG(INLINE) && 2250 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2251 volatile struct mlx5_wqe *last = loc->wqe_last; 2252 2253 MLX5_ASSERT(last); 2254 txq->elts_comp = head; 2255 if (MLX5_TXOFF_CONFIG(INLINE)) 2256 txq->wqe_comp = txq->wqe_ci; 2257 /* Request unconditional completion on last WQE. */ 2258 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2259 MLX5_COMP_MODE_OFFSET); 2260 /* Save elts_head in dedicated free on completion queue. */ 2261 #ifdef RTE_LIBRTE_MLX5_DEBUG 2262 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 2263 (last->cseg.opcode >> 8) << 16; 2264 #else 2265 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 2266 #endif 2267 /* A CQE slot must always be available. */ 2268 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 2269 } 2270 } 2271 2272 /** 2273 * DPDK callback to check the status of a tx descriptor. 2274 * 2275 * @param tx_queue 2276 * The tx queue. 2277 * @param[in] offset 2278 * The index of the descriptor in the ring. 2279 * 2280 * @return 2281 * The status of the tx descriptor. 2282 */ 2283 int 2284 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2285 { 2286 struct mlx5_txq_data *__rte_restrict txq = tx_queue; 2287 uint16_t used; 2288 2289 mlx5_tx_handle_completion(txq, 0); 2290 used = txq->elts_head - txq->elts_tail; 2291 if (offset < used) 2292 return RTE_ETH_TX_DESC_FULL; 2293 return RTE_ETH_TX_DESC_DONE; 2294 } 2295 2296 /** 2297 * Build the Control Segment with specified opcode: 2298 * - MLX5_OPCODE_SEND 2299 * - MLX5_OPCODE_ENHANCED_MPSW 2300 * - MLX5_OPCODE_TSO 2301 * 2302 * @param txq 2303 * Pointer to TX queue structure. 2304 * @param loc 2305 * Pointer to burst routine local context. 2306 * @param wqe 2307 * Pointer to WQE to fill with built Control Segment. 2308 * @param ds 2309 * Supposed length of WQE in segments. 2310 * @param opcode 2311 * SQ WQE opcode to put into Control Segment. 2312 * @param olx 2313 * Configured Tx offloads mask. It is fully defined at 2314 * compile time and may be used for optimization. 2315 */ 2316 static __rte_always_inline void 2317 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 2318 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2319 struct mlx5_wqe *__rte_restrict wqe, 2320 unsigned int ds, 2321 unsigned int opcode, 2322 unsigned int olx __rte_unused) 2323 { 2324 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 2325 2326 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2327 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2328 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2329 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2330 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2331 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2332 MLX5_COMP_MODE_OFFSET); 2333 cs->misc = RTE_BE32(0); 2334 } 2335 2336 /** 2337 * Build the Synchronize Queue Segment with specified completion index. 2338 * 2339 * @param txq 2340 * Pointer to TX queue structure. 2341 * @param loc 2342 * Pointer to burst routine local context. 2343 * @param wqe 2344 * Pointer to WQE to fill with built Control Segment. 2345 * @param wci 2346 * Completion index in Clock Queue to wait. 2347 * @param olx 2348 * Configured Tx offloads mask. It is fully defined at 2349 * compile time and may be used for optimization. 2350 */ 2351 static __rte_always_inline void 2352 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 2353 struct mlx5_txq_local *restrict loc __rte_unused, 2354 struct mlx5_wqe *restrict wqe, 2355 unsigned int wci, 2356 unsigned int olx __rte_unused) 2357 { 2358 struct mlx5_wqe_qseg *qs; 2359 2360 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 2361 qs->max_index = rte_cpu_to_be_32(wci); 2362 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 2363 qs->reserved0 = RTE_BE32(0); 2364 qs->reserved1 = RTE_BE32(0); 2365 } 2366 2367 /** 2368 * Build the Ethernet Segment without inlined data. 2369 * Supports Software Parser, Checksums and VLAN 2370 * insertion Tx offload features. 2371 * 2372 * @param txq 2373 * Pointer to TX queue structure. 2374 * @param loc 2375 * Pointer to burst routine local context. 2376 * @param wqe 2377 * Pointer to WQE to fill with built Ethernet Segment. 2378 * @param olx 2379 * Configured Tx offloads mask. It is fully defined at 2380 * compile time and may be used for optimization. 2381 */ 2382 static __rte_always_inline void 2383 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2384 struct mlx5_txq_local *__rte_restrict loc, 2385 struct mlx5_wqe *__rte_restrict wqe, 2386 unsigned int olx) 2387 { 2388 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2389 uint32_t csum; 2390 2391 /* 2392 * Calculate and set check sum flags first, dword field 2393 * in segment may be shared with Software Parser flags. 2394 */ 2395 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2396 es->flags = rte_cpu_to_le_32(csum); 2397 /* 2398 * Calculate and set Software Parser offsets and flags. 2399 * These flags a set for custom UDP and IP tunnel packets. 2400 */ 2401 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2402 /* Fill metadata field if needed. */ 2403 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2404 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2405 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2406 /* Engage VLAN tag insertion feature if requested. */ 2407 if (MLX5_TXOFF_CONFIG(VLAN) && 2408 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2409 /* 2410 * We should get here only if device support 2411 * this feature correctly. 2412 */ 2413 MLX5_ASSERT(txq->vlan_en); 2414 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2415 loc->mbuf->vlan_tci); 2416 } else { 2417 es->inline_hdr = RTE_BE32(0); 2418 } 2419 } 2420 2421 /** 2422 * Build the Ethernet Segment with minimal inlined data 2423 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2424 * used to fill the gap in single WQEBB WQEs. 2425 * Supports Software Parser, Checksums and VLAN 2426 * insertion Tx offload features. 2427 * 2428 * @param txq 2429 * Pointer to TX queue structure. 2430 * @param loc 2431 * Pointer to burst routine local context. 2432 * @param wqe 2433 * Pointer to WQE to fill with built Ethernet Segment. 2434 * @param vlan 2435 * Length of VLAN tag insertion if any. 2436 * @param olx 2437 * Configured Tx offloads mask. It is fully defined at 2438 * compile time and may be used for optimization. 2439 */ 2440 static __rte_always_inline void 2441 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2442 struct mlx5_txq_local *__rte_restrict loc, 2443 struct mlx5_wqe *__rte_restrict wqe, 2444 unsigned int vlan, 2445 unsigned int olx) 2446 { 2447 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2448 uint32_t csum; 2449 uint8_t *psrc, *pdst; 2450 2451 /* 2452 * Calculate and set check sum flags first, dword field 2453 * in segment may be shared with Software Parser flags. 2454 */ 2455 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2456 es->flags = rte_cpu_to_le_32(csum); 2457 /* 2458 * Calculate and set Software Parser offsets and flags. 2459 * These flags a set for custom UDP and IP tunnel packets. 2460 */ 2461 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2462 /* Fill metadata field if needed. */ 2463 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2464 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2465 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2466 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2467 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2468 es->inline_data = *(unaligned_uint16_t *)psrc; 2469 psrc += sizeof(uint16_t); 2470 pdst = (uint8_t *)(es + 1); 2471 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2472 /* Implement VLAN tag insertion as part inline data. */ 2473 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2474 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2475 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2476 /* Insert VLAN ethertype + VLAN tag. */ 2477 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2478 ((RTE_ETHER_TYPE_VLAN << 16) | 2479 loc->mbuf->vlan_tci); 2480 pdst += sizeof(struct rte_vlan_hdr); 2481 /* Copy the rest two bytes from packet data. */ 2482 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2483 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2484 } else { 2485 /* Fill the gap in the title WQEBB with inline data. */ 2486 rte_mov16(pdst, psrc); 2487 } 2488 } 2489 2490 /** 2491 * Build the Ethernet Segment with entire packet 2492 * data inlining. Checks the boundary of WQEBB and 2493 * ring buffer wrapping, supports Software Parser, 2494 * Checksums and VLAN insertion Tx offload features. 2495 * 2496 * @param txq 2497 * Pointer to TX queue structure. 2498 * @param loc 2499 * Pointer to burst routine local context. 2500 * @param wqe 2501 * Pointer to WQE to fill with built Ethernet Segment. 2502 * @param vlan 2503 * Length of VLAN tag insertion if any. 2504 * @param inlen 2505 * Length of data to inline (VLAN included, if any). 2506 * @param tso 2507 * TSO flag, set mss field from the packet. 2508 * @param olx 2509 * Configured Tx offloads mask. It is fully defined at 2510 * compile time and may be used for optimization. 2511 * 2512 * @return 2513 * Pointer to the next Data Segment (aligned and wrapped around). 2514 */ 2515 static __rte_always_inline struct mlx5_wqe_dseg * 2516 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 2517 struct mlx5_txq_local *__rte_restrict loc, 2518 struct mlx5_wqe *__rte_restrict wqe, 2519 unsigned int vlan, 2520 unsigned int inlen, 2521 unsigned int tso, 2522 unsigned int olx) 2523 { 2524 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2525 uint32_t csum; 2526 uint8_t *psrc, *pdst; 2527 unsigned int part; 2528 2529 /* 2530 * Calculate and set check sum flags first, dword field 2531 * in segment may be shared with Software Parser flags. 2532 */ 2533 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2534 if (tso) { 2535 csum <<= 24; 2536 csum |= loc->mbuf->tso_segsz; 2537 es->flags = rte_cpu_to_be_32(csum); 2538 } else { 2539 es->flags = rte_cpu_to_le_32(csum); 2540 } 2541 /* 2542 * Calculate and set Software Parser offsets and flags. 2543 * These flags a set for custom UDP and IP tunnel packets. 2544 */ 2545 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2546 /* Fill metadata field if needed. */ 2547 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2548 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2549 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2550 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2551 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2552 es->inline_data = *(unaligned_uint16_t *)psrc; 2553 psrc += sizeof(uint16_t); 2554 pdst = (uint8_t *)(es + 1); 2555 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2556 /* Implement VLAN tag insertion as part inline data. */ 2557 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2558 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2559 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2560 /* Insert VLAN ethertype + VLAN tag. */ 2561 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2562 ((RTE_ETHER_TYPE_VLAN << 16) | 2563 loc->mbuf->vlan_tci); 2564 pdst += sizeof(struct rte_vlan_hdr); 2565 /* Copy the rest two bytes from packet data. */ 2566 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2567 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2568 psrc += sizeof(uint16_t); 2569 } else { 2570 /* Fill the gap in the title WQEBB with inline data. */ 2571 rte_mov16(pdst, psrc); 2572 psrc += sizeof(rte_v128u32_t); 2573 } 2574 pdst = (uint8_t *)(es + 2); 2575 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2576 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2577 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2578 if (!inlen) { 2579 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2580 return (struct mlx5_wqe_dseg *)pdst; 2581 } 2582 /* 2583 * The WQEBB space availability is checked by caller. 2584 * Here we should be aware of WQE ring buffer wraparound only. 2585 */ 2586 part = (uint8_t *)txq->wqes_end - pdst; 2587 part = RTE_MIN(part, inlen); 2588 do { 2589 rte_memcpy(pdst, psrc, part); 2590 inlen -= part; 2591 if (likely(!inlen)) { 2592 /* 2593 * If return value is not used by the caller 2594 * the code below will be optimized out. 2595 */ 2596 pdst += part; 2597 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2598 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2599 pdst = (uint8_t *)txq->wqes; 2600 return (struct mlx5_wqe_dseg *)pdst; 2601 } 2602 pdst = (uint8_t *)txq->wqes; 2603 psrc += part; 2604 part = inlen; 2605 } while (true); 2606 } 2607 2608 /** 2609 * Copy data from chain of mbuf to the specified linear buffer. 2610 * Checksums and VLAN insertion Tx offload features. If data 2611 * from some mbuf copied completely this mbuf is freed. Local 2612 * structure is used to keep the byte stream state. 2613 * 2614 * @param pdst 2615 * Pointer to the destination linear buffer. 2616 * @param loc 2617 * Pointer to burst routine local context. 2618 * @param len 2619 * Length of data to be copied. 2620 * @param must 2621 * Length of data to be copied ignoring no inline hint. 2622 * @param olx 2623 * Configured Tx offloads mask. It is fully defined at 2624 * compile time and may be used for optimization. 2625 * 2626 * @return 2627 * Number of actual copied data bytes. This is always greater than or 2628 * equal to must parameter and might be lesser than len in no inline 2629 * hint flag is encountered. 2630 */ 2631 static __rte_always_inline unsigned int 2632 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2633 struct mlx5_txq_local *__rte_restrict loc, 2634 unsigned int len, 2635 unsigned int must, 2636 unsigned int olx __rte_unused) 2637 { 2638 struct rte_mbuf *mbuf; 2639 unsigned int part, dlen, copy = 0; 2640 uint8_t *psrc; 2641 2642 MLX5_ASSERT(len); 2643 MLX5_ASSERT(must <= len); 2644 do { 2645 /* Allow zero length packets, must check first. */ 2646 dlen = rte_pktmbuf_data_len(loc->mbuf); 2647 if (dlen <= loc->mbuf_off) { 2648 /* Exhausted packet, just free. */ 2649 mbuf = loc->mbuf; 2650 loc->mbuf = mbuf->next; 2651 rte_pktmbuf_free_seg(mbuf); 2652 loc->mbuf_off = 0; 2653 MLX5_ASSERT(loc->mbuf_nseg > 1); 2654 MLX5_ASSERT(loc->mbuf); 2655 --loc->mbuf_nseg; 2656 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2657 unsigned int diff; 2658 2659 if (copy >= must) { 2660 /* 2661 * We already copied the minimal 2662 * requested amount of data. 2663 */ 2664 return copy; 2665 } 2666 diff = must - copy; 2667 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 2668 /* 2669 * Copy only the minimal required 2670 * part of the data buffer. 2671 */ 2672 len = diff; 2673 } 2674 } 2675 continue; 2676 } 2677 dlen -= loc->mbuf_off; 2678 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2679 loc->mbuf_off); 2680 part = RTE_MIN(len, dlen); 2681 rte_memcpy(pdst, psrc, part); 2682 copy += part; 2683 loc->mbuf_off += part; 2684 len -= part; 2685 if (!len) { 2686 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2687 loc->mbuf_off = 0; 2688 /* Exhausted packet, just free. */ 2689 mbuf = loc->mbuf; 2690 loc->mbuf = mbuf->next; 2691 rte_pktmbuf_free_seg(mbuf); 2692 loc->mbuf_off = 0; 2693 MLX5_ASSERT(loc->mbuf_nseg >= 1); 2694 --loc->mbuf_nseg; 2695 } 2696 return copy; 2697 } 2698 pdst += part; 2699 } while (true); 2700 } 2701 2702 /** 2703 * Build the Ethernet Segment with inlined data from 2704 * multi-segment packet. Checks the boundary of WQEBB 2705 * and ring buffer wrapping, supports Software Parser, 2706 * Checksums and VLAN insertion Tx offload features. 2707 * 2708 * @param txq 2709 * Pointer to TX queue structure. 2710 * @param loc 2711 * Pointer to burst routine local context. 2712 * @param wqe 2713 * Pointer to WQE to fill with built Ethernet Segment. 2714 * @param vlan 2715 * Length of VLAN tag insertion if any. 2716 * @param inlen 2717 * Length of data to inline (VLAN included, if any). 2718 * @param tso 2719 * TSO flag, set mss field from the packet. 2720 * @param olx 2721 * Configured Tx offloads mask. It is fully defined at 2722 * compile time and may be used for optimization. 2723 * 2724 * @return 2725 * Pointer to the next Data Segment (aligned and 2726 * possible NOT wrapped around - caller should do 2727 * wrapping check on its own). 2728 */ 2729 static __rte_always_inline struct mlx5_wqe_dseg * 2730 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 2731 struct mlx5_txq_local *__rte_restrict loc, 2732 struct mlx5_wqe *__rte_restrict wqe, 2733 unsigned int vlan, 2734 unsigned int inlen, 2735 unsigned int tso, 2736 unsigned int olx) 2737 { 2738 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2739 uint32_t csum; 2740 uint8_t *pdst; 2741 unsigned int part, tlen = 0; 2742 2743 /* 2744 * Calculate and set check sum flags first, uint32_t field 2745 * in segment may be shared with Software Parser flags. 2746 */ 2747 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2748 if (tso) { 2749 csum <<= 24; 2750 csum |= loc->mbuf->tso_segsz; 2751 es->flags = rte_cpu_to_be_32(csum); 2752 } else { 2753 es->flags = rte_cpu_to_le_32(csum); 2754 } 2755 /* 2756 * Calculate and set Software Parser offsets and flags. 2757 * These flags a set for custom UDP and IP tunnel packets. 2758 */ 2759 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2760 /* Fill metadata field if needed. */ 2761 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2762 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2763 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2764 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2765 pdst = (uint8_t *)&es->inline_data; 2766 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2767 /* Implement VLAN tag insertion as part inline data. */ 2768 mlx5_tx_mseg_memcpy(pdst, loc, 2769 2 * RTE_ETHER_ADDR_LEN, 2770 2 * RTE_ETHER_ADDR_LEN, olx); 2771 pdst += 2 * RTE_ETHER_ADDR_LEN; 2772 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2773 ((RTE_ETHER_TYPE_VLAN << 16) | 2774 loc->mbuf->vlan_tci); 2775 pdst += sizeof(struct rte_vlan_hdr); 2776 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2777 } 2778 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2779 /* 2780 * The WQEBB space availability is checked by caller. 2781 * Here we should be aware of WQE ring buffer wraparound only. 2782 */ 2783 part = (uint8_t *)txq->wqes_end - pdst; 2784 part = RTE_MIN(part, inlen - tlen); 2785 MLX5_ASSERT(part); 2786 do { 2787 unsigned int copy; 2788 2789 /* 2790 * Copying may be interrupted inside the routine 2791 * if run into no inline hint flag. 2792 */ 2793 copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); 2794 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 2795 tlen += copy; 2796 if (likely(inlen <= tlen) || copy < part) { 2797 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 2798 pdst += copy; 2799 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2800 return (struct mlx5_wqe_dseg *)pdst; 2801 } 2802 pdst = (uint8_t *)txq->wqes; 2803 part = inlen - tlen; 2804 } while (true); 2805 } 2806 2807 /** 2808 * Build the Data Segment of pointer type. 2809 * 2810 * @param txq 2811 * Pointer to TX queue structure. 2812 * @param loc 2813 * Pointer to burst routine local context. 2814 * @param dseg 2815 * Pointer to WQE to fill with built Data Segment. 2816 * @param buf 2817 * Data buffer to point. 2818 * @param len 2819 * Data buffer length. 2820 * @param olx 2821 * Configured Tx offloads mask. It is fully defined at 2822 * compile time and may be used for optimization. 2823 */ 2824 static __rte_always_inline void 2825 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 2826 struct mlx5_txq_local *__rte_restrict loc, 2827 struct mlx5_wqe_dseg *__rte_restrict dseg, 2828 uint8_t *buf, 2829 unsigned int len, 2830 unsigned int olx __rte_unused) 2831 2832 { 2833 MLX5_ASSERT(len); 2834 dseg->bcount = rte_cpu_to_be_32(len); 2835 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2836 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2837 } 2838 2839 /** 2840 * Build the Data Segment of pointer type or inline 2841 * if data length is less than buffer in minimal 2842 * Data Segment size. 2843 * 2844 * @param txq 2845 * Pointer to TX queue structure. 2846 * @param loc 2847 * Pointer to burst routine local context. 2848 * @param dseg 2849 * Pointer to WQE to fill with built Data Segment. 2850 * @param buf 2851 * Data buffer to point. 2852 * @param len 2853 * Data buffer length. 2854 * @param olx 2855 * Configured Tx offloads mask. It is fully defined at 2856 * compile time and may be used for optimization. 2857 */ 2858 static __rte_always_inline void 2859 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 2860 struct mlx5_txq_local *__rte_restrict loc, 2861 struct mlx5_wqe_dseg *__rte_restrict dseg, 2862 uint8_t *buf, 2863 unsigned int len, 2864 unsigned int olx __rte_unused) 2865 2866 { 2867 uintptr_t dst, src; 2868 2869 MLX5_ASSERT(len); 2870 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2871 dseg->bcount = rte_cpu_to_be_32(len); 2872 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2873 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2874 2875 return; 2876 } 2877 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2878 /* Unrolled implementation of generic rte_memcpy. */ 2879 dst = (uintptr_t)&dseg->inline_data[0]; 2880 src = (uintptr_t)buf; 2881 if (len & 0x08) { 2882 #ifdef RTE_ARCH_STRICT_ALIGN 2883 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2884 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2885 dst += sizeof(uint32_t); 2886 src += sizeof(uint32_t); 2887 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2888 dst += sizeof(uint32_t); 2889 src += sizeof(uint32_t); 2890 #else 2891 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2892 dst += sizeof(uint64_t); 2893 src += sizeof(uint64_t); 2894 #endif 2895 } 2896 if (len & 0x04) { 2897 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2898 dst += sizeof(uint32_t); 2899 src += sizeof(uint32_t); 2900 } 2901 if (len & 0x02) { 2902 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2903 dst += sizeof(uint16_t); 2904 src += sizeof(uint16_t); 2905 } 2906 if (len & 0x01) 2907 *(uint8_t *)dst = *(uint8_t *)src; 2908 } 2909 2910 /** 2911 * Build the Data Segment of inlined data from single 2912 * segment packet, no VLAN insertion. 2913 * 2914 * @param txq 2915 * Pointer to TX queue structure. 2916 * @param loc 2917 * Pointer to burst routine local context. 2918 * @param dseg 2919 * Pointer to WQE to fill with built Data Segment. 2920 * @param buf 2921 * Data buffer to point. 2922 * @param len 2923 * Data buffer length. 2924 * @param olx 2925 * Configured Tx offloads mask. It is fully defined at 2926 * compile time and may be used for optimization. 2927 * 2928 * @return 2929 * Pointer to the next Data Segment after inlined data. 2930 * Ring buffer wraparound check is needed. We do not 2931 * do it here because it may not be needed for the 2932 * last packet in the eMPW session. 2933 */ 2934 static __rte_always_inline struct mlx5_wqe_dseg * 2935 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 2936 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2937 struct mlx5_wqe_dseg *__rte_restrict dseg, 2938 uint8_t *buf, 2939 unsigned int len, 2940 unsigned int olx __rte_unused) 2941 { 2942 unsigned int part; 2943 uint8_t *pdst; 2944 2945 if (!MLX5_TXOFF_CONFIG(MPW)) { 2946 /* Store the descriptor byte counter for eMPW sessions. */ 2947 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2948 pdst = &dseg->inline_data[0]; 2949 } else { 2950 /* The entire legacy MPW session counter is stored on close. */ 2951 pdst = (uint8_t *)dseg; 2952 } 2953 /* 2954 * The WQEBB space availability is checked by caller. 2955 * Here we should be aware of WQE ring buffer wraparound only. 2956 */ 2957 part = (uint8_t *)txq->wqes_end - pdst; 2958 part = RTE_MIN(part, len); 2959 do { 2960 rte_memcpy(pdst, buf, part); 2961 len -= part; 2962 if (likely(!len)) { 2963 pdst += part; 2964 if (!MLX5_TXOFF_CONFIG(MPW)) 2965 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2966 /* Note: no final wraparound check here. */ 2967 return (struct mlx5_wqe_dseg *)pdst; 2968 } 2969 pdst = (uint8_t *)txq->wqes; 2970 buf += part; 2971 part = len; 2972 } while (true); 2973 } 2974 2975 /** 2976 * Build the Data Segment of inlined data from single 2977 * segment packet with VLAN insertion. 2978 * 2979 * @param txq 2980 * Pointer to TX queue structure. 2981 * @param loc 2982 * Pointer to burst routine local context. 2983 * @param dseg 2984 * Pointer to the dseg fill with built Data Segment. 2985 * @param buf 2986 * Data buffer to point. 2987 * @param len 2988 * Data buffer length. 2989 * @param olx 2990 * Configured Tx offloads mask. It is fully defined at 2991 * compile time and may be used for optimization. 2992 * 2993 * @return 2994 * Pointer to the next Data Segment after inlined data. 2995 * Ring buffer wraparound check is needed. 2996 */ 2997 static __rte_always_inline struct mlx5_wqe_dseg * 2998 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 2999 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 3000 struct mlx5_wqe_dseg *__rte_restrict dseg, 3001 uint8_t *buf, 3002 unsigned int len, 3003 unsigned int olx __rte_unused) 3004 3005 { 3006 unsigned int part; 3007 uint8_t *pdst; 3008 3009 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 3010 if (!MLX5_TXOFF_CONFIG(MPW)) { 3011 /* Store the descriptor byte counter for eMPW sessions. */ 3012 dseg->bcount = rte_cpu_to_be_32 3013 ((len + sizeof(struct rte_vlan_hdr)) | 3014 MLX5_ETH_WQE_DATA_INLINE); 3015 pdst = &dseg->inline_data[0]; 3016 } else { 3017 /* The entire legacy MPW session counter is stored on close. */ 3018 pdst = (uint8_t *)dseg; 3019 } 3020 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 3021 buf += MLX5_DSEG_MIN_INLINE_SIZE; 3022 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 3023 len -= MLX5_DSEG_MIN_INLINE_SIZE; 3024 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 3025 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 3026 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 3027 pdst = (uint8_t *)txq->wqes; 3028 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 3029 loc->mbuf->vlan_tci); 3030 pdst += sizeof(struct rte_vlan_hdr); 3031 /* 3032 * The WQEBB space availability is checked by caller. 3033 * Here we should be aware of WQE ring buffer wraparound only. 3034 */ 3035 part = (uint8_t *)txq->wqes_end - pdst; 3036 part = RTE_MIN(part, len); 3037 do { 3038 rte_memcpy(pdst, buf, part); 3039 len -= part; 3040 if (likely(!len)) { 3041 pdst += part; 3042 if (!MLX5_TXOFF_CONFIG(MPW)) 3043 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 3044 /* Note: no final wraparound check here. */ 3045 return (struct mlx5_wqe_dseg *)pdst; 3046 } 3047 pdst = (uint8_t *)txq->wqes; 3048 buf += part; 3049 part = len; 3050 } while (true); 3051 } 3052 3053 /** 3054 * Build the Ethernet Segment with optionally inlined data with 3055 * VLAN insertion and following Data Segments (if any) from 3056 * multi-segment packet. Used by ordinary send and TSO. 3057 * 3058 * @param txq 3059 * Pointer to TX queue structure. 3060 * @param loc 3061 * Pointer to burst routine local context. 3062 * @param wqe 3063 * Pointer to WQE to fill with built Ethernet/Data Segments. 3064 * @param vlan 3065 * Length of VLAN header to insert, 0 means no VLAN insertion. 3066 * @param inlen 3067 * Data length to inline. For TSO this parameter specifies 3068 * exact value, for ordinary send routine can be aligned by 3069 * caller to provide better WQE space saving and data buffer 3070 * start address alignment. This length includes VLAN header 3071 * being inserted. 3072 * @param tso 3073 * Zero means ordinary send, inlined data can be extended, 3074 * otherwise this is TSO, inlined data length is fixed. 3075 * @param olx 3076 * Configured Tx offloads mask. It is fully defined at 3077 * compile time and may be used for optimization. 3078 * 3079 * @return 3080 * Actual size of built WQE in segments. 3081 */ 3082 static __rte_always_inline unsigned int 3083 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 3084 struct mlx5_txq_local *__rte_restrict loc, 3085 struct mlx5_wqe *__rte_restrict wqe, 3086 unsigned int vlan, 3087 unsigned int inlen, 3088 unsigned int tso, 3089 unsigned int olx __rte_unused) 3090 { 3091 struct mlx5_wqe_dseg *__rte_restrict dseg; 3092 unsigned int ds; 3093 3094 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 3095 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 3096 loc->mbuf_off = 0; 3097 3098 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 3099 if (!loc->mbuf_nseg) 3100 goto dseg_done; 3101 /* 3102 * There are still some mbuf remaining, not inlined. 3103 * The first mbuf may be partially inlined and we 3104 * must process the possible non-zero data offset. 3105 */ 3106 if (loc->mbuf_off) { 3107 unsigned int dlen; 3108 uint8_t *dptr; 3109 3110 /* 3111 * Exhausted packets must be dropped before. 3112 * Non-zero offset means there are some data 3113 * remained in the packet. 3114 */ 3115 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 3116 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 3117 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 3118 loc->mbuf_off); 3119 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 3120 /* 3121 * Build the pointer/minimal data Data Segment. 3122 * Do ring buffer wrapping check in advance. 3123 */ 3124 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3125 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3126 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 3127 /* Store the mbuf to be freed on completion. */ 3128 MLX5_ASSERT(loc->elts_free); 3129 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3130 --loc->elts_free; 3131 ++dseg; 3132 if (--loc->mbuf_nseg == 0) 3133 goto dseg_done; 3134 loc->mbuf = loc->mbuf->next; 3135 loc->mbuf_off = 0; 3136 } 3137 do { 3138 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3139 struct rte_mbuf *mbuf; 3140 3141 /* Zero length segment found, just skip. */ 3142 mbuf = loc->mbuf; 3143 loc->mbuf = loc->mbuf->next; 3144 rte_pktmbuf_free_seg(mbuf); 3145 if (--loc->mbuf_nseg == 0) 3146 break; 3147 } else { 3148 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3149 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3150 mlx5_tx_dseg_iptr 3151 (txq, loc, dseg, 3152 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3153 rte_pktmbuf_data_len(loc->mbuf), olx); 3154 MLX5_ASSERT(loc->elts_free); 3155 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3156 --loc->elts_free; 3157 ++dseg; 3158 if (--loc->mbuf_nseg == 0) 3159 break; 3160 loc->mbuf = loc->mbuf->next; 3161 } 3162 } while (true); 3163 3164 dseg_done: 3165 /* Calculate actual segments used from the dseg pointer. */ 3166 if ((uintptr_t)wqe < (uintptr_t)dseg) 3167 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3168 else 3169 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3170 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3171 return ds; 3172 } 3173 3174 /** 3175 * The routine checks timestamp flag in the current packet, 3176 * and push WAIT WQE into the queue if scheduling is required. 3177 * 3178 * @param txq 3179 * Pointer to TX queue structure. 3180 * @param loc 3181 * Pointer to burst routine local context. 3182 * @param olx 3183 * Configured Tx offloads mask. It is fully defined at 3184 * compile time and may be used for optimization. 3185 * 3186 * @return 3187 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3188 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 3189 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 3190 * Local context variables partially updated. 3191 */ 3192 static __rte_always_inline enum mlx5_txcmp_code 3193 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 3194 struct mlx5_txq_local *restrict loc, 3195 unsigned int olx) 3196 { 3197 if (MLX5_TXOFF_CONFIG(TXPP) && 3198 loc->mbuf->ol_flags & txq->ts_mask) { 3199 struct mlx5_wqe *wqe; 3200 uint64_t ts; 3201 int32_t wci; 3202 3203 /* 3204 * Estimate the required space quickly and roughly. 3205 * We would like to ensure the packet can be pushed 3206 * to the queue and we won't get the orphan WAIT WQE. 3207 */ 3208 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 3209 loc->elts_free < NB_SEGS(loc->mbuf)) 3210 return MLX5_TXCMP_CODE_EXIT; 3211 /* Convert the timestamp into completion to wait. */ 3212 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 3213 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 3214 if (unlikely(wci < 0)) 3215 return MLX5_TXCMP_CODE_SINGLE; 3216 /* Build the WAIT WQE with specified completion. */ 3217 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3218 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 3219 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 3220 ++txq->wqe_ci; 3221 --loc->wqe_free; 3222 return MLX5_TXCMP_CODE_MULTI; 3223 } 3224 return MLX5_TXCMP_CODE_SINGLE; 3225 } 3226 3227 /** 3228 * Tx one packet function for multi-segment TSO. Supports all 3229 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3230 * sends one packet per WQE. 3231 * 3232 * This routine is responsible for storing processed mbuf 3233 * into elts ring buffer and update elts_head. 3234 * 3235 * @param txq 3236 * Pointer to TX queue structure. 3237 * @param loc 3238 * Pointer to burst routine local context. 3239 * @param olx 3240 * Configured Tx offloads mask. It is fully defined at 3241 * compile time and may be used for optimization. 3242 * 3243 * @return 3244 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3245 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3246 * Local context variables partially updated. 3247 */ 3248 static __rte_always_inline enum mlx5_txcmp_code 3249 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 3250 struct mlx5_txq_local *__rte_restrict loc, 3251 unsigned int olx) 3252 { 3253 struct mlx5_wqe *__rte_restrict wqe; 3254 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3255 3256 if (MLX5_TXOFF_CONFIG(TXPP)) { 3257 enum mlx5_txcmp_code wret; 3258 3259 /* Generate WAIT for scheduling if requested. */ 3260 wret = mlx5_tx_schedule_send(txq, loc, olx); 3261 if (wret == MLX5_TXCMP_CODE_EXIT) 3262 return MLX5_TXCMP_CODE_EXIT; 3263 if (wret == MLX5_TXCMP_CODE_ERROR) 3264 return MLX5_TXCMP_CODE_ERROR; 3265 } 3266 /* 3267 * Calculate data length to be inlined to estimate 3268 * the required space in WQE ring buffer. 3269 */ 3270 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3271 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3272 vlan = sizeof(struct rte_vlan_hdr); 3273 inlen = loc->mbuf->l2_len + vlan + 3274 loc->mbuf->l3_len + loc->mbuf->l4_len; 3275 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3276 return MLX5_TXCMP_CODE_ERROR; 3277 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3278 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3279 /* Packet must contain all TSO headers. */ 3280 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3281 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3282 inlen > (dlen + vlan))) 3283 return MLX5_TXCMP_CODE_ERROR; 3284 MLX5_ASSERT(inlen >= txq->inlen_mode); 3285 /* 3286 * Check whether there are enough free WQEBBs: 3287 * - Control Segment 3288 * - Ethernet Segment 3289 * - First Segment of inlined Ethernet data 3290 * - ... data continued ... 3291 * - Data Segments of pointer/min inline type 3292 */ 3293 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3294 MLX5_ESEG_MIN_INLINE_SIZE + 3295 MLX5_WSEG_SIZE + 3296 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3297 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3298 return MLX5_TXCMP_CODE_EXIT; 3299 /* Check for maximal WQE size. */ 3300 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3301 return MLX5_TXCMP_CODE_ERROR; 3302 #ifdef MLX5_PMD_SOFT_COUNTERS 3303 /* Update sent data bytes/packets counters. */ 3304 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3305 loc->mbuf->tso_segsz; 3306 /* 3307 * One will be added for mbuf itself 3308 * at the end of the mlx5_tx_burst from 3309 * loc->pkts_sent field. 3310 */ 3311 --ntcp; 3312 txq->stats.opackets += ntcp; 3313 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3314 #endif 3315 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3316 loc->wqe_last = wqe; 3317 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3318 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3319 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3320 txq->wqe_ci += (ds + 3) / 4; 3321 loc->wqe_free -= (ds + 3) / 4; 3322 return MLX5_TXCMP_CODE_MULTI; 3323 } 3324 3325 /** 3326 * Tx one packet function for multi-segment SEND. Supports all 3327 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3328 * sends one packet per WQE, without any data inlining in 3329 * Ethernet Segment. 3330 * 3331 * This routine is responsible for storing processed mbuf 3332 * into elts ring buffer and update elts_head. 3333 * 3334 * @param txq 3335 * Pointer to TX queue structure. 3336 * @param loc 3337 * Pointer to burst routine local context. 3338 * @param olx 3339 * Configured Tx offloads mask. It is fully defined at 3340 * compile time and may be used for optimization. 3341 * 3342 * @return 3343 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3344 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3345 * Local context variables partially updated. 3346 */ 3347 static __rte_always_inline enum mlx5_txcmp_code 3348 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 3349 struct mlx5_txq_local *__rte_restrict loc, 3350 unsigned int olx) 3351 { 3352 struct mlx5_wqe_dseg *__rte_restrict dseg; 3353 struct mlx5_wqe *__rte_restrict wqe; 3354 unsigned int ds, nseg; 3355 3356 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3357 if (MLX5_TXOFF_CONFIG(TXPP)) { 3358 enum mlx5_txcmp_code wret; 3359 3360 /* Generate WAIT for scheduling if requested. */ 3361 wret = mlx5_tx_schedule_send(txq, loc, olx); 3362 if (wret == MLX5_TXCMP_CODE_EXIT) 3363 return MLX5_TXCMP_CODE_EXIT; 3364 if (wret == MLX5_TXCMP_CODE_ERROR) 3365 return MLX5_TXCMP_CODE_ERROR; 3366 } 3367 /* 3368 * No inline at all, it means the CPU cycles saving 3369 * is prioritized at configuration, we should not 3370 * copy any packet data to WQE. 3371 */ 3372 nseg = NB_SEGS(loc->mbuf); 3373 ds = 2 + nseg; 3374 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3375 return MLX5_TXCMP_CODE_EXIT; 3376 /* Check for maximal WQE size. */ 3377 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3378 return MLX5_TXCMP_CODE_ERROR; 3379 /* 3380 * Some Tx offloads may cause an error if 3381 * packet is not long enough, check against 3382 * assumed minimal length. 3383 */ 3384 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3385 return MLX5_TXCMP_CODE_ERROR; 3386 #ifdef MLX5_PMD_SOFT_COUNTERS 3387 /* Update sent data bytes counter. */ 3388 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3389 if (MLX5_TXOFF_CONFIG(VLAN) && 3390 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3391 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3392 #endif 3393 /* 3394 * SEND WQE, one WQEBB: 3395 * - Control Segment, SEND opcode 3396 * - Ethernet Segment, optional VLAN, no inline 3397 * - Data Segments, pointer only type 3398 */ 3399 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3400 loc->wqe_last = wqe; 3401 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3402 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3403 dseg = &wqe->dseg[0]; 3404 do { 3405 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3406 struct rte_mbuf *mbuf; 3407 3408 /* 3409 * Zero length segment found, have to 3410 * correct total size of WQE in segments. 3411 * It is supposed to be rare occasion, so 3412 * in normal case (no zero length segments) 3413 * we avoid extra writing to the Control 3414 * Segment. 3415 */ 3416 --ds; 3417 wqe->cseg.sq_ds -= RTE_BE32(1); 3418 mbuf = loc->mbuf; 3419 loc->mbuf = mbuf->next; 3420 rte_pktmbuf_free_seg(mbuf); 3421 if (--nseg == 0) 3422 break; 3423 } else { 3424 mlx5_tx_dseg_ptr 3425 (txq, loc, dseg, 3426 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3427 rte_pktmbuf_data_len(loc->mbuf), olx); 3428 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3429 --loc->elts_free; 3430 if (--nseg == 0) 3431 break; 3432 ++dseg; 3433 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3434 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3435 loc->mbuf = loc->mbuf->next; 3436 } 3437 } while (true); 3438 txq->wqe_ci += (ds + 3) / 4; 3439 loc->wqe_free -= (ds + 3) / 4; 3440 return MLX5_TXCMP_CODE_MULTI; 3441 } 3442 3443 /** 3444 * Tx one packet function for multi-segment SEND. Supports all 3445 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3446 * sends one packet per WQE, with data inlining in 3447 * Ethernet Segment and minimal Data Segments. 3448 * 3449 * This routine is responsible for storing processed mbuf 3450 * into elts ring buffer and update elts_head. 3451 * 3452 * @param txq 3453 * Pointer to TX queue structure. 3454 * @param loc 3455 * Pointer to burst routine local context. 3456 * @param olx 3457 * Configured Tx offloads mask. It is fully defined at 3458 * compile time and may be used for optimization. 3459 * 3460 * @return 3461 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3462 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3463 * Local context variables partially updated. 3464 */ 3465 static __rte_always_inline enum mlx5_txcmp_code 3466 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 3467 struct mlx5_txq_local *__rte_restrict loc, 3468 unsigned int olx) 3469 { 3470 struct mlx5_wqe *__rte_restrict wqe; 3471 unsigned int ds, inlen, dlen, vlan = 0; 3472 3473 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3474 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3475 if (MLX5_TXOFF_CONFIG(TXPP)) { 3476 enum mlx5_txcmp_code wret; 3477 3478 /* Generate WAIT for scheduling if requested. */ 3479 wret = mlx5_tx_schedule_send(txq, loc, olx); 3480 if (wret == MLX5_TXCMP_CODE_EXIT) 3481 return MLX5_TXCMP_CODE_EXIT; 3482 if (wret == MLX5_TXCMP_CODE_ERROR) 3483 return MLX5_TXCMP_CODE_ERROR; 3484 } 3485 /* 3486 * First calculate data length to be inlined 3487 * to estimate the required space for WQE. 3488 */ 3489 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3490 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3491 vlan = sizeof(struct rte_vlan_hdr); 3492 inlen = dlen + vlan; 3493 /* Check against minimal length. */ 3494 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3495 return MLX5_TXCMP_CODE_ERROR; 3496 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3497 if (inlen > txq->inlen_send || 3498 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 3499 struct rte_mbuf *mbuf; 3500 unsigned int nxlen; 3501 uintptr_t start; 3502 3503 /* 3504 * Packet length exceeds the allowed inline 3505 * data length, check whether the minimal 3506 * inlining is required. 3507 */ 3508 if (txq->inlen_mode) { 3509 MLX5_ASSERT(txq->inlen_mode >= 3510 MLX5_ESEG_MIN_INLINE_SIZE); 3511 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 3512 inlen = txq->inlen_mode; 3513 } else { 3514 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 3515 !vlan || txq->vlan_en) { 3516 /* 3517 * VLAN insertion will be done inside by HW. 3518 * It is not utmost effective - VLAN flag is 3519 * checked twice, but we should proceed the 3520 * inlining length correctly and take into 3521 * account the VLAN header being inserted. 3522 */ 3523 return mlx5_tx_packet_multi_send 3524 (txq, loc, olx); 3525 } 3526 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3527 } 3528 /* 3529 * Now we know the minimal amount of data is requested 3530 * to inline. Check whether we should inline the buffers 3531 * from the chain beginning to eliminate some mbufs. 3532 */ 3533 mbuf = loc->mbuf; 3534 nxlen = rte_pktmbuf_data_len(mbuf); 3535 if (unlikely(nxlen <= txq->inlen_send)) { 3536 /* We can inline first mbuf at least. */ 3537 if (nxlen < inlen) { 3538 unsigned int smlen; 3539 3540 /* Scan mbufs till inlen filled. */ 3541 do { 3542 smlen = nxlen; 3543 mbuf = NEXT(mbuf); 3544 MLX5_ASSERT(mbuf); 3545 nxlen = rte_pktmbuf_data_len(mbuf); 3546 nxlen += smlen; 3547 } while (unlikely(nxlen < inlen)); 3548 if (unlikely(nxlen > txq->inlen_send)) { 3549 /* We cannot inline entire mbuf. */ 3550 smlen = inlen - smlen; 3551 start = rte_pktmbuf_mtod_offset 3552 (mbuf, uintptr_t, smlen); 3553 goto do_align; 3554 } 3555 } 3556 do { 3557 inlen = nxlen; 3558 mbuf = NEXT(mbuf); 3559 /* There should be not end of packet. */ 3560 MLX5_ASSERT(mbuf); 3561 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3562 } while (unlikely(nxlen < txq->inlen_send)); 3563 } 3564 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3565 /* 3566 * Check whether we can do inline to align start 3567 * address of data buffer to cacheline. 3568 */ 3569 do_align: 3570 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3571 if (unlikely(start)) { 3572 start += inlen; 3573 if (start <= txq->inlen_send) 3574 inlen = start; 3575 } 3576 } 3577 /* 3578 * Check whether there are enough free WQEBBs: 3579 * - Control Segment 3580 * - Ethernet Segment 3581 * - First Segment of inlined Ethernet data 3582 * - ... data continued ... 3583 * - Data Segments of pointer/min inline type 3584 * 3585 * Estimate the number of Data Segments conservatively, 3586 * supposing no any mbufs is being freed during inlining. 3587 */ 3588 MLX5_ASSERT(inlen <= txq->inlen_send); 3589 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3590 MLX5_ESEG_MIN_INLINE_SIZE + 3591 MLX5_WSEG_SIZE + 3592 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3593 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3594 return MLX5_TXCMP_CODE_EXIT; 3595 /* Check for maximal WQE size. */ 3596 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3597 return MLX5_TXCMP_CODE_ERROR; 3598 #ifdef MLX5_PMD_SOFT_COUNTERS 3599 /* Update sent data bytes/packets counters. */ 3600 txq->stats.obytes += dlen + vlan; 3601 #endif 3602 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3603 loc->wqe_last = wqe; 3604 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3605 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3606 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3607 txq->wqe_ci += (ds + 3) / 4; 3608 loc->wqe_free -= (ds + 3) / 4; 3609 return MLX5_TXCMP_CODE_MULTI; 3610 } 3611 3612 /** 3613 * Tx burst function for multi-segment packets. Supports all 3614 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3615 * sends one packet per WQE. Function stops sending if it 3616 * encounters the single-segment packet. 3617 * 3618 * This routine is responsible for storing processed mbuf 3619 * into elts ring buffer and update elts_head. 3620 * 3621 * @param txq 3622 * Pointer to TX queue structure. 3623 * @param[in] pkts 3624 * Packets to transmit. 3625 * @param pkts_n 3626 * Number of packets in array. 3627 * @param loc 3628 * Pointer to burst routine local context. 3629 * @param olx 3630 * Configured Tx offloads mask. It is fully defined at 3631 * compile time and may be used for optimization. 3632 * 3633 * @return 3634 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3635 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3636 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3637 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3638 * Local context variables updated. 3639 */ 3640 static __rte_always_inline enum mlx5_txcmp_code 3641 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 3642 struct rte_mbuf **__rte_restrict pkts, 3643 unsigned int pkts_n, 3644 struct mlx5_txq_local *__rte_restrict loc, 3645 unsigned int olx) 3646 { 3647 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3648 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3649 pkts += loc->pkts_sent + 1; 3650 pkts_n -= loc->pkts_sent; 3651 for (;;) { 3652 enum mlx5_txcmp_code ret; 3653 3654 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3655 /* 3656 * Estimate the number of free elts quickly but 3657 * conservatively. Some segment may be fully inlined 3658 * and freed, ignore this here - precise estimation 3659 * is costly. 3660 */ 3661 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3662 return MLX5_TXCMP_CODE_EXIT; 3663 if (MLX5_TXOFF_CONFIG(TSO) && 3664 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3665 /* Proceed with multi-segment TSO. */ 3666 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3667 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3668 /* Proceed with multi-segment SEND with inlining. */ 3669 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3670 } else { 3671 /* Proceed with multi-segment SEND w/o inlining. */ 3672 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3673 } 3674 if (ret == MLX5_TXCMP_CODE_EXIT) 3675 return MLX5_TXCMP_CODE_EXIT; 3676 if (ret == MLX5_TXCMP_CODE_ERROR) 3677 return MLX5_TXCMP_CODE_ERROR; 3678 /* WQE is built, go to the next packet. */ 3679 ++loc->pkts_sent; 3680 --pkts_n; 3681 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3682 return MLX5_TXCMP_CODE_EXIT; 3683 loc->mbuf = *pkts++; 3684 if (pkts_n > 1) 3685 rte_prefetch0(*pkts); 3686 if (likely(NB_SEGS(loc->mbuf) > 1)) 3687 continue; 3688 /* Here ends the series of multi-segment packets. */ 3689 if (MLX5_TXOFF_CONFIG(TSO) && 3690 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3691 return MLX5_TXCMP_CODE_TSO; 3692 return MLX5_TXCMP_CODE_SINGLE; 3693 } 3694 MLX5_ASSERT(false); 3695 } 3696 3697 /** 3698 * Tx burst function for single-segment packets with TSO. 3699 * Supports all types of Tx offloads, except multi-packets. 3700 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3701 * Function stops sending if it encounters the multi-segment 3702 * packet or packet without TSO requested. 3703 * 3704 * The routine is responsible for storing processed mbuf 3705 * into elts ring buffer and update elts_head if inline 3706 * offloads is requested due to possible early freeing 3707 * of the inlined mbufs (can not store pkts array in elts 3708 * as a batch). 3709 * 3710 * @param txq 3711 * Pointer to TX queue structure. 3712 * @param[in] pkts 3713 * Packets to transmit. 3714 * @param pkts_n 3715 * Number of packets in array. 3716 * @param loc 3717 * Pointer to burst routine local context. 3718 * @param olx 3719 * Configured Tx offloads mask. It is fully defined at 3720 * compile time and may be used for optimization. 3721 * 3722 * @return 3723 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3724 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3725 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3726 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3727 * Local context variables updated. 3728 */ 3729 static __rte_always_inline enum mlx5_txcmp_code 3730 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 3731 struct rte_mbuf **__rte_restrict pkts, 3732 unsigned int pkts_n, 3733 struct mlx5_txq_local *__rte_restrict loc, 3734 unsigned int olx) 3735 { 3736 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3737 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3738 pkts += loc->pkts_sent + 1; 3739 pkts_n -= loc->pkts_sent; 3740 for (;;) { 3741 struct mlx5_wqe_dseg *__rte_restrict dseg; 3742 struct mlx5_wqe *__rte_restrict wqe; 3743 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3744 uint8_t *dptr; 3745 3746 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3747 if (MLX5_TXOFF_CONFIG(TXPP)) { 3748 enum mlx5_txcmp_code wret; 3749 3750 /* Generate WAIT for scheduling if requested. */ 3751 wret = mlx5_tx_schedule_send(txq, loc, olx); 3752 if (wret == MLX5_TXCMP_CODE_EXIT) 3753 return MLX5_TXCMP_CODE_EXIT; 3754 if (wret == MLX5_TXCMP_CODE_ERROR) 3755 return MLX5_TXCMP_CODE_ERROR; 3756 } 3757 dlen = rte_pktmbuf_data_len(loc->mbuf); 3758 if (MLX5_TXOFF_CONFIG(VLAN) && 3759 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3760 vlan = sizeof(struct rte_vlan_hdr); 3761 } 3762 /* 3763 * First calculate the WQE size to check 3764 * whether we have enough space in ring buffer. 3765 */ 3766 hlen = loc->mbuf->l2_len + vlan + 3767 loc->mbuf->l3_len + loc->mbuf->l4_len; 3768 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3769 return MLX5_TXCMP_CODE_ERROR; 3770 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3771 hlen += loc->mbuf->outer_l2_len + 3772 loc->mbuf->outer_l3_len; 3773 /* Segment must contain all TSO headers. */ 3774 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3775 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3776 hlen > (dlen + vlan))) 3777 return MLX5_TXCMP_CODE_ERROR; 3778 /* 3779 * Check whether there are enough free WQEBBs: 3780 * - Control Segment 3781 * - Ethernet Segment 3782 * - First Segment of inlined Ethernet data 3783 * - ... data continued ... 3784 * - Finishing Data Segment of pointer type 3785 */ 3786 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3787 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3788 if (loc->wqe_free < ((ds + 3) / 4)) 3789 return MLX5_TXCMP_CODE_EXIT; 3790 #ifdef MLX5_PMD_SOFT_COUNTERS 3791 /* Update sent data bytes/packets counters. */ 3792 ntcp = (dlen + vlan - hlen + 3793 loc->mbuf->tso_segsz - 1) / 3794 loc->mbuf->tso_segsz; 3795 /* 3796 * One will be added for mbuf itself at the end 3797 * of the mlx5_tx_burst from loc->pkts_sent field. 3798 */ 3799 --ntcp; 3800 txq->stats.opackets += ntcp; 3801 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3802 #endif 3803 /* 3804 * Build the TSO WQE: 3805 * - Control Segment 3806 * - Ethernet Segment with hlen bytes inlined 3807 * - Data Segment of pointer type 3808 */ 3809 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3810 loc->wqe_last = wqe; 3811 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3812 MLX5_OPCODE_TSO, olx); 3813 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3814 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3815 dlen -= hlen - vlan; 3816 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3817 /* 3818 * WQE is built, update the loop parameters 3819 * and go to the next packet. 3820 */ 3821 txq->wqe_ci += (ds + 3) / 4; 3822 loc->wqe_free -= (ds + 3) / 4; 3823 if (MLX5_TXOFF_CONFIG(INLINE)) 3824 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3825 --loc->elts_free; 3826 ++loc->pkts_sent; 3827 --pkts_n; 3828 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3829 return MLX5_TXCMP_CODE_EXIT; 3830 loc->mbuf = *pkts++; 3831 if (pkts_n > 1) 3832 rte_prefetch0(*pkts); 3833 if (MLX5_TXOFF_CONFIG(MULTI) && 3834 unlikely(NB_SEGS(loc->mbuf) > 1)) 3835 return MLX5_TXCMP_CODE_MULTI; 3836 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3837 return MLX5_TXCMP_CODE_SINGLE; 3838 /* Continue with the next TSO packet. */ 3839 } 3840 MLX5_ASSERT(false); 3841 } 3842 3843 /** 3844 * Analyze the packet and select the best method to send. 3845 * 3846 * @param txq 3847 * Pointer to TX queue structure. 3848 * @param loc 3849 * Pointer to burst routine local context. 3850 * @param olx 3851 * Configured Tx offloads mask. It is fully defined at 3852 * compile time and may be used for optimization. 3853 * @param newp 3854 * The predefined flag whether do complete check for 3855 * multi-segment packets and TSO. 3856 * 3857 * @return 3858 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3859 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3860 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3861 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3862 */ 3863 static __rte_always_inline enum mlx5_txcmp_code 3864 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 3865 struct mlx5_txq_local *__rte_restrict loc, 3866 unsigned int olx, 3867 bool newp) 3868 { 3869 /* Check for multi-segment packet. */ 3870 if (newp && 3871 MLX5_TXOFF_CONFIG(MULTI) && 3872 unlikely(NB_SEGS(loc->mbuf) > 1)) 3873 return MLX5_TXCMP_CODE_MULTI; 3874 /* Check for TSO packet. */ 3875 if (newp && 3876 MLX5_TXOFF_CONFIG(TSO) && 3877 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3878 return MLX5_TXCMP_CODE_TSO; 3879 /* Check if eMPW is enabled at all. */ 3880 if (!MLX5_TXOFF_CONFIG(EMPW)) 3881 return MLX5_TXCMP_CODE_SINGLE; 3882 /* Check if eMPW can be engaged. */ 3883 if (MLX5_TXOFF_CONFIG(VLAN) && 3884 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3885 (!MLX5_TXOFF_CONFIG(INLINE) || 3886 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3887 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3888 /* 3889 * eMPW does not support VLAN insertion offload, 3890 * we have to inline the entire packet but 3891 * packet is too long for inlining. 3892 */ 3893 return MLX5_TXCMP_CODE_SINGLE; 3894 } 3895 return MLX5_TXCMP_CODE_EMPW; 3896 } 3897 3898 /** 3899 * Check the next packet attributes to match with the eMPW batch ones. 3900 * In addition, for legacy MPW the packet length is checked either. 3901 * 3902 * @param txq 3903 * Pointer to TX queue structure. 3904 * @param es 3905 * Pointer to Ethernet Segment of eMPW batch. 3906 * @param loc 3907 * Pointer to burst routine local context. 3908 * @param dlen 3909 * Length of previous packet in MPW descriptor. 3910 * @param olx 3911 * Configured Tx offloads mask. It is fully defined at 3912 * compile time and may be used for optimization. 3913 * 3914 * @return 3915 * true - packet match with eMPW batch attributes. 3916 * false - no match, eMPW should be restarted. 3917 */ 3918 static __rte_always_inline bool 3919 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 3920 struct mlx5_wqe_eseg *__rte_restrict es, 3921 struct mlx5_txq_local *__rte_restrict loc, 3922 uint32_t dlen, 3923 unsigned int olx) 3924 { 3925 uint8_t swp_flags = 0; 3926 3927 /* Compare the checksum flags, if any. */ 3928 if (MLX5_TXOFF_CONFIG(CSUM) && 3929 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 3930 return false; 3931 /* Compare the Software Parser offsets and flags. */ 3932 if (MLX5_TXOFF_CONFIG(SWP) && 3933 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 3934 es->swp_flags != swp_flags)) 3935 return false; 3936 /* Fill metadata field if needed. */ 3937 if (MLX5_TXOFF_CONFIG(METADATA) && 3938 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 3939 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 3940 return false; 3941 /* Legacy MPW can send packets with the same lengt only. */ 3942 if (MLX5_TXOFF_CONFIG(MPW) && 3943 dlen != rte_pktmbuf_data_len(loc->mbuf)) 3944 return false; 3945 /* There must be no VLAN packets in eMPW loop. */ 3946 if (MLX5_TXOFF_CONFIG(VLAN)) 3947 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 3948 /* Check if the scheduling is requested. */ 3949 if (MLX5_TXOFF_CONFIG(TXPP) && 3950 loc->mbuf->ol_flags & txq->ts_mask) 3951 return false; 3952 return true; 3953 } 3954 3955 /* 3956 * Update send loop variables and WQE for eMPW loop 3957 * without data inlining. Number of Data Segments is 3958 * equal to the number of sent packets. 3959 * 3960 * @param txq 3961 * Pointer to TX queue structure. 3962 * @param loc 3963 * Pointer to burst routine local context. 3964 * @param ds 3965 * Number of packets/Data Segments/Packets. 3966 * @param slen 3967 * Accumulated statistics, bytes sent 3968 * @param olx 3969 * Configured Tx offloads mask. It is fully defined at 3970 * compile time and may be used for optimization. 3971 * 3972 * @return 3973 * true - packet match with eMPW batch attributes. 3974 * false - no match, eMPW should be restarted. 3975 */ 3976 static __rte_always_inline void 3977 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 3978 struct mlx5_txq_local *__rte_restrict loc, 3979 unsigned int ds, 3980 unsigned int slen, 3981 unsigned int olx __rte_unused) 3982 { 3983 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3984 #ifdef MLX5_PMD_SOFT_COUNTERS 3985 /* Update sent data bytes counter. */ 3986 txq->stats.obytes += slen; 3987 #else 3988 (void)slen; 3989 #endif 3990 loc->elts_free -= ds; 3991 loc->pkts_sent += ds; 3992 ds += 2; 3993 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3994 txq->wqe_ci += (ds + 3) / 4; 3995 loc->wqe_free -= (ds + 3) / 4; 3996 } 3997 3998 /* 3999 * Update send loop variables and WQE for eMPW loop 4000 * with data inlining. Gets the size of pushed descriptors 4001 * and data to the WQE. 4002 * 4003 * @param txq 4004 * Pointer to TX queue structure. 4005 * @param loc 4006 * Pointer to burst routine local context. 4007 * @param len 4008 * Total size of descriptor/data in bytes. 4009 * @param slen 4010 * Accumulated statistics, data bytes sent. 4011 * @param wqem 4012 * The base WQE for the eMPW/MPW descriptor. 4013 * @param olx 4014 * Configured Tx offloads mask. It is fully defined at 4015 * compile time and may be used for optimization. 4016 * 4017 * @return 4018 * true - packet match with eMPW batch attributes. 4019 * false - no match, eMPW should be restarted. 4020 */ 4021 static __rte_always_inline void 4022 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 4023 struct mlx5_txq_local *__rte_restrict loc, 4024 unsigned int len, 4025 unsigned int slen, 4026 struct mlx5_wqe *__rte_restrict wqem, 4027 unsigned int olx __rte_unused) 4028 { 4029 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 4030 4031 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4032 #ifdef MLX5_PMD_SOFT_COUNTERS 4033 /* Update sent data bytes counter. */ 4034 txq->stats.obytes += slen; 4035 #else 4036 (void)slen; 4037 #endif 4038 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 4039 /* 4040 * If the legacy MPW session contains the inline packets 4041 * we should set the only inline data segment length 4042 * and align the total length to the segment size. 4043 */ 4044 MLX5_ASSERT(len > sizeof(dseg->bcount)); 4045 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 4046 MLX5_ETH_WQE_DATA_INLINE); 4047 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 4048 } else { 4049 /* 4050 * The session is not legacy MPW or contains the 4051 * data buffer pointer segments. 4052 */ 4053 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 4054 len = len / MLX5_WSEG_SIZE + 2; 4055 } 4056 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 4057 txq->wqe_ci += (len + 3) / 4; 4058 loc->wqe_free -= (len + 3) / 4; 4059 loc->wqe_last = wqem; 4060 } 4061 4062 /** 4063 * The set of Tx burst functions for single-segment packets 4064 * without TSO and with Multi-Packet Writing feature support. 4065 * Supports all types of Tx offloads, except multi-packets 4066 * and TSO. 4067 * 4068 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 4069 * as many packet per WQE as it can. If eMPW is not configured 4070 * or packet can not be sent with eMPW (VLAN insertion) the 4071 * ordinary SEND opcode is used and only one packet placed 4072 * in WQE. 4073 * 4074 * Functions stop sending if it encounters the multi-segment 4075 * packet or packet with TSO requested. 4076 * 4077 * The routines are responsible for storing processed mbuf 4078 * into elts ring buffer and update elts_head if inlining 4079 * offload is requested. Otherwise the copying mbufs to elts 4080 * can be postponed and completed at the end of burst routine. 4081 * 4082 * @param txq 4083 * Pointer to TX queue structure. 4084 * @param[in] pkts 4085 * Packets to transmit. 4086 * @param pkts_n 4087 * Number of packets in array. 4088 * @param loc 4089 * Pointer to burst routine local context. 4090 * @param olx 4091 * Configured Tx offloads mask. It is fully defined at 4092 * compile time and may be used for optimization. 4093 * 4094 * @return 4095 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 4096 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 4097 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 4098 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 4099 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 4100 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 4101 * 4102 * Local context variables updated. 4103 * 4104 * 4105 * The routine sends packets with MLX5_OPCODE_EMPW 4106 * without inlining, this is dedicated optimized branch. 4107 * No VLAN insertion is supported. 4108 */ 4109 static __rte_always_inline enum mlx5_txcmp_code 4110 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 4111 struct rte_mbuf **__rte_restrict pkts, 4112 unsigned int pkts_n, 4113 struct mlx5_txq_local *__rte_restrict loc, 4114 unsigned int olx) 4115 { 4116 /* 4117 * Subroutine is the part of mlx5_tx_burst_single() 4118 * and sends single-segment packet with eMPW opcode 4119 * without data inlining. 4120 */ 4121 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4122 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4123 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4124 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4125 pkts += loc->pkts_sent + 1; 4126 pkts_n -= loc->pkts_sent; 4127 for (;;) { 4128 struct mlx5_wqe_dseg *__rte_restrict dseg; 4129 struct mlx5_wqe_eseg *__rte_restrict eseg; 4130 enum mlx5_txcmp_code ret; 4131 unsigned int part, loop; 4132 unsigned int slen = 0; 4133 4134 next_empw: 4135 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4136 if (MLX5_TXOFF_CONFIG(TXPP)) { 4137 enum mlx5_txcmp_code wret; 4138 4139 /* Generate WAIT for scheduling if requested. */ 4140 wret = mlx5_tx_schedule_send(txq, loc, olx); 4141 if (wret == MLX5_TXCMP_CODE_EXIT) 4142 return MLX5_TXCMP_CODE_EXIT; 4143 if (wret == MLX5_TXCMP_CODE_ERROR) 4144 return MLX5_TXCMP_CODE_ERROR; 4145 } 4146 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4147 MLX5_MPW_MAX_PACKETS : 4148 MLX5_EMPW_MAX_PACKETS); 4149 if (unlikely(loc->elts_free < part)) { 4150 /* We have no enough elts to save all mbufs. */ 4151 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 4152 return MLX5_TXCMP_CODE_EXIT; 4153 /* But we still able to send at least minimal eMPW. */ 4154 part = loc->elts_free; 4155 } 4156 /* Check whether we have enough WQEs */ 4157 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 4158 if (unlikely(loc->wqe_free < 4159 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4160 return MLX5_TXCMP_CODE_EXIT; 4161 part = (loc->wqe_free * 4) - 2; 4162 } 4163 if (likely(part > 1)) 4164 rte_prefetch0(*pkts); 4165 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4166 /* 4167 * Build eMPW title WQEBB: 4168 * - Control Segment, eMPW opcode 4169 * - Ethernet Segment, no inline 4170 */ 4171 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 4172 MLX5_OPCODE_ENHANCED_MPSW, olx); 4173 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4174 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4175 eseg = &loc->wqe_last->eseg; 4176 dseg = &loc->wqe_last->dseg[0]; 4177 loop = part; 4178 /* Store the packet length for legacy MPW. */ 4179 if (MLX5_TXOFF_CONFIG(MPW)) 4180 eseg->mss = rte_cpu_to_be_16 4181 (rte_pktmbuf_data_len(loc->mbuf)); 4182 for (;;) { 4183 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4184 #ifdef MLX5_PMD_SOFT_COUNTERS 4185 /* Update sent data bytes counter. */ 4186 slen += dlen; 4187 #endif 4188 mlx5_tx_dseg_ptr 4189 (txq, loc, dseg, 4190 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4191 dlen, olx); 4192 if (unlikely(--loop == 0)) 4193 break; 4194 loc->mbuf = *pkts++; 4195 if (likely(loop > 1)) 4196 rte_prefetch0(*pkts); 4197 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4198 /* 4199 * Unroll the completion code to avoid 4200 * returning variable value - it results in 4201 * unoptimized sequent checking in caller. 4202 */ 4203 if (ret == MLX5_TXCMP_CODE_MULTI) { 4204 part -= loop; 4205 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4206 if (unlikely(!loc->elts_free || 4207 !loc->wqe_free)) 4208 return MLX5_TXCMP_CODE_EXIT; 4209 return MLX5_TXCMP_CODE_MULTI; 4210 } 4211 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4212 if (ret == MLX5_TXCMP_CODE_TSO) { 4213 part -= loop; 4214 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4215 if (unlikely(!loc->elts_free || 4216 !loc->wqe_free)) 4217 return MLX5_TXCMP_CODE_EXIT; 4218 return MLX5_TXCMP_CODE_TSO; 4219 } 4220 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4221 part -= loop; 4222 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4223 if (unlikely(!loc->elts_free || 4224 !loc->wqe_free)) 4225 return MLX5_TXCMP_CODE_EXIT; 4226 return MLX5_TXCMP_CODE_SINGLE; 4227 } 4228 if (ret != MLX5_TXCMP_CODE_EMPW) { 4229 MLX5_ASSERT(false); 4230 part -= loop; 4231 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4232 return MLX5_TXCMP_CODE_ERROR; 4233 } 4234 /* 4235 * Check whether packet parameters coincide 4236 * within assumed eMPW batch: 4237 * - check sum settings 4238 * - metadata value 4239 * - software parser settings 4240 * - packets length (legacy MPW only) 4241 * - scheduling is not required 4242 */ 4243 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 4244 MLX5_ASSERT(loop); 4245 part -= loop; 4246 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4247 if (unlikely(!loc->elts_free || 4248 !loc->wqe_free)) 4249 return MLX5_TXCMP_CODE_EXIT; 4250 pkts_n -= part; 4251 goto next_empw; 4252 } 4253 /* Packet attributes match, continue the same eMPW. */ 4254 ++dseg; 4255 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4256 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4257 } 4258 /* eMPW is built successfully, update loop parameters. */ 4259 MLX5_ASSERT(!loop); 4260 MLX5_ASSERT(pkts_n >= part); 4261 #ifdef MLX5_PMD_SOFT_COUNTERS 4262 /* Update sent data bytes counter. */ 4263 txq->stats.obytes += slen; 4264 #endif 4265 loc->elts_free -= part; 4266 loc->pkts_sent += part; 4267 txq->wqe_ci += (2 + part + 3) / 4; 4268 loc->wqe_free -= (2 + part + 3) / 4; 4269 pkts_n -= part; 4270 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4271 return MLX5_TXCMP_CODE_EXIT; 4272 loc->mbuf = *pkts++; 4273 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4274 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4275 return ret; 4276 /* Continue sending eMPW batches. */ 4277 } 4278 MLX5_ASSERT(false); 4279 } 4280 4281 /** 4282 * The routine sends packets with MLX5_OPCODE_EMPW 4283 * with inlining, optionally supports VLAN insertion. 4284 */ 4285 static __rte_always_inline enum mlx5_txcmp_code 4286 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 4287 struct rte_mbuf **__rte_restrict pkts, 4288 unsigned int pkts_n, 4289 struct mlx5_txq_local *__rte_restrict loc, 4290 unsigned int olx) 4291 { 4292 /* 4293 * Subroutine is the part of mlx5_tx_burst_single() 4294 * and sends single-segment packet with eMPW opcode 4295 * with data inlining. 4296 */ 4297 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4298 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4299 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4300 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4301 pkts += loc->pkts_sent + 1; 4302 pkts_n -= loc->pkts_sent; 4303 for (;;) { 4304 struct mlx5_wqe_dseg *__rte_restrict dseg; 4305 struct mlx5_wqe *__rte_restrict wqem; 4306 enum mlx5_txcmp_code ret; 4307 unsigned int room, part, nlim; 4308 unsigned int slen = 0; 4309 4310 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4311 if (MLX5_TXOFF_CONFIG(TXPP)) { 4312 enum mlx5_txcmp_code wret; 4313 4314 /* Generate WAIT for scheduling if requested. */ 4315 wret = mlx5_tx_schedule_send(txq, loc, olx); 4316 if (wret == MLX5_TXCMP_CODE_EXIT) 4317 return MLX5_TXCMP_CODE_EXIT; 4318 if (wret == MLX5_TXCMP_CODE_ERROR) 4319 return MLX5_TXCMP_CODE_ERROR; 4320 } 4321 /* 4322 * Limits the amount of packets in one WQE 4323 * to improve CQE latency generation. 4324 */ 4325 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4326 MLX5_MPW_INLINE_MAX_PACKETS : 4327 MLX5_EMPW_MAX_PACKETS); 4328 /* Check whether we have minimal amount WQEs */ 4329 if (unlikely(loc->wqe_free < 4330 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4331 return MLX5_TXCMP_CODE_EXIT; 4332 if (likely(pkts_n > 1)) 4333 rte_prefetch0(*pkts); 4334 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4335 /* 4336 * Build eMPW title WQEBB: 4337 * - Control Segment, eMPW opcode, zero DS 4338 * - Ethernet Segment, no inline 4339 */ 4340 mlx5_tx_cseg_init(txq, loc, wqem, 0, 4341 MLX5_OPCODE_ENHANCED_MPSW, olx); 4342 mlx5_tx_eseg_none(txq, loc, wqem, 4343 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4344 dseg = &wqem->dseg[0]; 4345 /* Store the packet length for legacy MPW. */ 4346 if (MLX5_TXOFF_CONFIG(MPW)) 4347 wqem->eseg.mss = rte_cpu_to_be_16 4348 (rte_pktmbuf_data_len(loc->mbuf)); 4349 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4350 loc->wqe_free) * MLX5_WQE_SIZE - 4351 MLX5_WQE_CSEG_SIZE - 4352 MLX5_WQE_ESEG_SIZE; 4353 /* Limit the room for legacy MPW sessions for performance. */ 4354 if (MLX5_TXOFF_CONFIG(MPW)) 4355 room = RTE_MIN(room, 4356 RTE_MAX(txq->inlen_empw + 4357 sizeof(dseg->bcount) + 4358 (MLX5_TXOFF_CONFIG(VLAN) ? 4359 sizeof(struct rte_vlan_hdr) : 0), 4360 MLX5_MPW_INLINE_MAX_PACKETS * 4361 MLX5_WQE_DSEG_SIZE)); 4362 /* Build WQE till we have space, packets and resources. */ 4363 part = room; 4364 for (;;) { 4365 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4366 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4367 unsigned int tlen; 4368 4369 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4370 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 4371 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4372 /* 4373 * Some Tx offloads may cause an error if 4374 * packet is not long enough, check against 4375 * assumed minimal length. 4376 */ 4377 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4378 part -= room; 4379 if (unlikely(!part)) 4380 return MLX5_TXCMP_CODE_ERROR; 4381 /* 4382 * We have some successfully built 4383 * packet Data Segments to send. 4384 */ 4385 mlx5_tx_idone_empw(txq, loc, part, 4386 slen, wqem, olx); 4387 return MLX5_TXCMP_CODE_ERROR; 4388 } 4389 /* Inline or not inline - that's the Question. */ 4390 if (dlen > txq->inlen_empw || 4391 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 4392 goto pointer_empw; 4393 if (MLX5_TXOFF_CONFIG(MPW)) { 4394 if (dlen > txq->inlen_send) 4395 goto pointer_empw; 4396 tlen = dlen; 4397 if (part == room) { 4398 /* Open new inline MPW session. */ 4399 tlen += sizeof(dseg->bcount); 4400 dseg->bcount = RTE_BE32(0); 4401 dseg = RTE_PTR_ADD 4402 (dseg, sizeof(dseg->bcount)); 4403 } else { 4404 /* 4405 * No pointer and inline descriptor 4406 * intermix for legacy MPW sessions. 4407 */ 4408 if (wqem->dseg[0].bcount) 4409 break; 4410 } 4411 } else { 4412 tlen = sizeof(dseg->bcount) + dlen; 4413 } 4414 /* Inline entire packet, optional VLAN insertion. */ 4415 if (MLX5_TXOFF_CONFIG(VLAN) && 4416 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4417 /* 4418 * The packet length must be checked in 4419 * mlx5_tx_able_to_empw() and packet 4420 * fits into inline length guaranteed. 4421 */ 4422 MLX5_ASSERT((dlen + 4423 sizeof(struct rte_vlan_hdr)) <= 4424 txq->inlen_empw); 4425 tlen += sizeof(struct rte_vlan_hdr); 4426 if (room < tlen) 4427 break; 4428 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4429 dptr, dlen, olx); 4430 #ifdef MLX5_PMD_SOFT_COUNTERS 4431 /* Update sent data bytes counter. */ 4432 slen += sizeof(struct rte_vlan_hdr); 4433 #endif 4434 } else { 4435 if (room < tlen) 4436 break; 4437 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4438 dptr, dlen, olx); 4439 } 4440 if (!MLX5_TXOFF_CONFIG(MPW)) 4441 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4442 MLX5_ASSERT(room >= tlen); 4443 room -= tlen; 4444 /* 4445 * Packet data are completely inline, 4446 * we can try to free the packet. 4447 */ 4448 if (likely(loc->pkts_sent == loc->mbuf_free)) { 4449 /* 4450 * All the packets from the burst beginning 4451 * are inline, we can free mbufs directly 4452 * from the origin array on tx_burst exit(). 4453 */ 4454 loc->mbuf_free++; 4455 goto next_mbuf; 4456 } 4457 /* 4458 * In order no to call rte_pktmbuf_free_seg() here, 4459 * in the most inner loop (that might be very 4460 * expensive) we just save the mbuf in elts. 4461 */ 4462 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4463 loc->elts_free--; 4464 goto next_mbuf; 4465 pointer_empw: 4466 /* 4467 * No pointer and inline descriptor 4468 * intermix for legacy MPW sessions. 4469 */ 4470 if (MLX5_TXOFF_CONFIG(MPW) && 4471 part != room && 4472 wqem->dseg[0].bcount == RTE_BE32(0)) 4473 break; 4474 /* 4475 * Not inlinable VLAN packets are 4476 * proceeded outside of this routine. 4477 */ 4478 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4479 if (MLX5_TXOFF_CONFIG(VLAN)) 4480 MLX5_ASSERT(!(loc->mbuf->ol_flags & 4481 PKT_TX_VLAN_PKT)); 4482 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4483 /* We have to store mbuf in elts.*/ 4484 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4485 loc->elts_free--; 4486 room -= MLX5_WQE_DSEG_SIZE; 4487 /* Ring buffer wraparound is checked at the loop end.*/ 4488 ++dseg; 4489 next_mbuf: 4490 #ifdef MLX5_PMD_SOFT_COUNTERS 4491 /* Update sent data bytes counter. */ 4492 slen += dlen; 4493 #endif 4494 loc->pkts_sent++; 4495 pkts_n--; 4496 if (unlikely(!pkts_n || !loc->elts_free)) { 4497 /* 4498 * We have no resources/packets to 4499 * continue build descriptors. 4500 */ 4501 part -= room; 4502 mlx5_tx_idone_empw(txq, loc, part, 4503 slen, wqem, olx); 4504 return MLX5_TXCMP_CODE_EXIT; 4505 } 4506 loc->mbuf = *pkts++; 4507 if (likely(pkts_n > 1)) 4508 rte_prefetch0(*pkts); 4509 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4510 /* 4511 * Unroll the completion code to avoid 4512 * returning variable value - it results in 4513 * unoptimized sequent checking in caller. 4514 */ 4515 if (ret == MLX5_TXCMP_CODE_MULTI) { 4516 part -= room; 4517 mlx5_tx_idone_empw(txq, loc, part, 4518 slen, wqem, olx); 4519 if (unlikely(!loc->elts_free || 4520 !loc->wqe_free)) 4521 return MLX5_TXCMP_CODE_EXIT; 4522 return MLX5_TXCMP_CODE_MULTI; 4523 } 4524 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4525 if (ret == MLX5_TXCMP_CODE_TSO) { 4526 part -= room; 4527 mlx5_tx_idone_empw(txq, loc, part, 4528 slen, wqem, olx); 4529 if (unlikely(!loc->elts_free || 4530 !loc->wqe_free)) 4531 return MLX5_TXCMP_CODE_EXIT; 4532 return MLX5_TXCMP_CODE_TSO; 4533 } 4534 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4535 part -= room; 4536 mlx5_tx_idone_empw(txq, loc, part, 4537 slen, wqem, olx); 4538 if (unlikely(!loc->elts_free || 4539 !loc->wqe_free)) 4540 return MLX5_TXCMP_CODE_EXIT; 4541 return MLX5_TXCMP_CODE_SINGLE; 4542 } 4543 if (ret != MLX5_TXCMP_CODE_EMPW) { 4544 MLX5_ASSERT(false); 4545 part -= room; 4546 mlx5_tx_idone_empw(txq, loc, part, 4547 slen, wqem, olx); 4548 return MLX5_TXCMP_CODE_ERROR; 4549 } 4550 /* Check if we have minimal room left. */ 4551 nlim--; 4552 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4553 break; 4554 /* 4555 * Check whether packet parameters coincide 4556 * within assumed eMPW batch: 4557 * - check sum settings 4558 * - metadata value 4559 * - software parser settings 4560 * - packets length (legacy MPW only) 4561 * - scheduling is not required 4562 */ 4563 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 4564 loc, dlen, olx)) 4565 break; 4566 /* Packet attributes match, continue the same eMPW. */ 4567 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4568 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4569 } 4570 /* 4571 * We get here to close an existing eMPW 4572 * session and start the new one. 4573 */ 4574 MLX5_ASSERT(pkts_n); 4575 part -= room; 4576 if (unlikely(!part)) 4577 return MLX5_TXCMP_CODE_EXIT; 4578 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 4579 if (unlikely(!loc->elts_free || 4580 !loc->wqe_free)) 4581 return MLX5_TXCMP_CODE_EXIT; 4582 /* Continue the loop with new eMPW session. */ 4583 } 4584 MLX5_ASSERT(false); 4585 } 4586 4587 /** 4588 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4589 * Data inlining and VLAN insertion are supported. 4590 */ 4591 static __rte_always_inline enum mlx5_txcmp_code 4592 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 4593 struct rte_mbuf **__rte_restrict pkts, 4594 unsigned int pkts_n, 4595 struct mlx5_txq_local *__rte_restrict loc, 4596 unsigned int olx) 4597 { 4598 /* 4599 * Subroutine is the part of mlx5_tx_burst_single() 4600 * and sends single-segment packet with SEND opcode. 4601 */ 4602 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4603 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4604 pkts += loc->pkts_sent + 1; 4605 pkts_n -= loc->pkts_sent; 4606 for (;;) { 4607 struct mlx5_wqe *__rte_restrict wqe; 4608 enum mlx5_txcmp_code ret; 4609 4610 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4611 if (MLX5_TXOFF_CONFIG(TXPP)) { 4612 enum mlx5_txcmp_code wret; 4613 4614 /* Generate WAIT for scheduling if requested. */ 4615 wret = mlx5_tx_schedule_send(txq, loc, olx); 4616 if (wret == MLX5_TXCMP_CODE_EXIT) 4617 return MLX5_TXCMP_CODE_EXIT; 4618 if (wret == MLX5_TXCMP_CODE_ERROR) 4619 return MLX5_TXCMP_CODE_ERROR; 4620 } 4621 if (MLX5_TXOFF_CONFIG(INLINE)) { 4622 unsigned int inlen, vlan = 0; 4623 4624 inlen = rte_pktmbuf_data_len(loc->mbuf); 4625 if (MLX5_TXOFF_CONFIG(VLAN) && 4626 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4627 vlan = sizeof(struct rte_vlan_hdr); 4628 inlen += vlan; 4629 } 4630 /* 4631 * If inlining is enabled at configuration time 4632 * the limit must be not less than minimal size. 4633 * Otherwise we would do extra check for data 4634 * size to avoid crashes due to length overflow. 4635 */ 4636 MLX5_ASSERT(txq->inlen_send >= 4637 MLX5_ESEG_MIN_INLINE_SIZE); 4638 if (inlen <= txq->inlen_send) { 4639 unsigned int seg_n, wqe_n; 4640 4641 rte_prefetch0(rte_pktmbuf_mtod 4642 (loc->mbuf, uint8_t *)); 4643 /* Check against minimal length. */ 4644 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4645 return MLX5_TXCMP_CODE_ERROR; 4646 if (loc->mbuf->ol_flags & 4647 PKT_TX_DYNF_NOINLINE) { 4648 /* 4649 * The hint flag not to inline packet 4650 * data is set. Check whether we can 4651 * follow the hint. 4652 */ 4653 if ((!MLX5_TXOFF_CONFIG(EMPW) && 4654 txq->inlen_mode) || 4655 (MLX5_TXOFF_CONFIG(MPW) && 4656 txq->inlen_mode)) { 4657 if (inlen <= txq->inlen_send) 4658 goto single_inline; 4659 /* 4660 * The hardware requires the 4661 * minimal inline data header. 4662 */ 4663 goto single_min_inline; 4664 } 4665 if (MLX5_TXOFF_CONFIG(VLAN) && 4666 vlan && !txq->vlan_en) { 4667 /* 4668 * We must insert VLAN tag 4669 * by software means. 4670 */ 4671 goto single_part_inline; 4672 } 4673 goto single_no_inline; 4674 } 4675 single_inline: 4676 /* 4677 * Completely inlined packet data WQE: 4678 * - Control Segment, SEND opcode 4679 * - Ethernet Segment, no VLAN insertion 4680 * - Data inlined, VLAN optionally inserted 4681 * - Alignment to MLX5_WSEG_SIZE 4682 * Have to estimate amount of WQEBBs 4683 */ 4684 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4685 MLX5_ESEG_MIN_INLINE_SIZE + 4686 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4687 /* Check if there are enough WQEBBs. */ 4688 wqe_n = (seg_n + 3) / 4; 4689 if (wqe_n > loc->wqe_free) 4690 return MLX5_TXCMP_CODE_EXIT; 4691 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4692 loc->wqe_last = wqe; 4693 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4694 MLX5_OPCODE_SEND, olx); 4695 mlx5_tx_eseg_data(txq, loc, wqe, 4696 vlan, inlen, 0, olx); 4697 txq->wqe_ci += wqe_n; 4698 loc->wqe_free -= wqe_n; 4699 /* 4700 * Packet data are completely inlined, 4701 * free the packet immediately. 4702 */ 4703 rte_pktmbuf_free_seg(loc->mbuf); 4704 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4705 MLX5_TXOFF_CONFIG(MPW)) && 4706 txq->inlen_mode) { 4707 /* 4708 * If minimal inlining is requested the eMPW 4709 * feature should be disabled due to data is 4710 * inlined into Ethernet Segment, which can 4711 * not contain inlined data for eMPW due to 4712 * segment shared for all packets. 4713 */ 4714 struct mlx5_wqe_dseg *__rte_restrict dseg; 4715 unsigned int ds; 4716 uint8_t *dptr; 4717 4718 /* 4719 * The inline-mode settings require 4720 * to inline the specified amount of 4721 * data bytes to the Ethernet Segment. 4722 * We should check the free space in 4723 * WQE ring buffer to inline partially. 4724 */ 4725 single_min_inline: 4726 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 4727 MLX5_ASSERT(inlen > txq->inlen_mode); 4728 MLX5_ASSERT(txq->inlen_mode >= 4729 MLX5_ESEG_MIN_INLINE_SIZE); 4730 /* 4731 * Check whether there are enough free WQEBBs: 4732 * - Control Segment 4733 * - Ethernet Segment 4734 * - First Segment of inlined Ethernet data 4735 * - ... data continued ... 4736 * - Finishing Data Segment of pointer type 4737 */ 4738 ds = (MLX5_WQE_CSEG_SIZE + 4739 MLX5_WQE_ESEG_SIZE + 4740 MLX5_WQE_DSEG_SIZE + 4741 txq->inlen_mode - 4742 MLX5_ESEG_MIN_INLINE_SIZE + 4743 MLX5_WQE_DSEG_SIZE + 4744 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4745 if (loc->wqe_free < ((ds + 3) / 4)) 4746 return MLX5_TXCMP_CODE_EXIT; 4747 /* 4748 * Build the ordinary SEND WQE: 4749 * - Control Segment 4750 * - Ethernet Segment, inline inlen_mode bytes 4751 * - Data Segment of pointer type 4752 */ 4753 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4754 loc->wqe_last = wqe; 4755 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4756 MLX5_OPCODE_SEND, olx); 4757 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4758 txq->inlen_mode, 4759 0, olx); 4760 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4761 txq->inlen_mode - vlan; 4762 inlen -= txq->inlen_mode; 4763 mlx5_tx_dseg_ptr(txq, loc, dseg, 4764 dptr, inlen, olx); 4765 /* 4766 * WQE is built, update the loop parameters 4767 * and got to the next packet. 4768 */ 4769 txq->wqe_ci += (ds + 3) / 4; 4770 loc->wqe_free -= (ds + 3) / 4; 4771 /* We have to store mbuf in elts.*/ 4772 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4773 txq->elts[txq->elts_head++ & txq->elts_m] = 4774 loc->mbuf; 4775 --loc->elts_free; 4776 } else { 4777 uint8_t *dptr; 4778 unsigned int dlen; 4779 4780 /* 4781 * Partially inlined packet data WQE, we have 4782 * some space in title WQEBB, we can fill it 4783 * with some packet data. It takes one WQEBB, 4784 * it is available, no extra space check: 4785 * - Control Segment, SEND opcode 4786 * - Ethernet Segment, no VLAN insertion 4787 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4788 * - Data Segment, pointer type 4789 * 4790 * We also get here if VLAN insertion is not 4791 * supported by HW, the inline is enabled. 4792 */ 4793 single_part_inline: 4794 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4795 loc->wqe_last = wqe; 4796 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4797 MLX5_OPCODE_SEND, olx); 4798 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4799 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4800 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4801 /* 4802 * The length check is performed above, by 4803 * comparing with txq->inlen_send. We should 4804 * not get overflow here. 4805 */ 4806 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4807 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4808 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4809 dptr, dlen, olx); 4810 ++txq->wqe_ci; 4811 --loc->wqe_free; 4812 /* We have to store mbuf in elts.*/ 4813 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4814 txq->elts[txq->elts_head++ & txq->elts_m] = 4815 loc->mbuf; 4816 --loc->elts_free; 4817 } 4818 #ifdef MLX5_PMD_SOFT_COUNTERS 4819 /* Update sent data bytes counter. */ 4820 txq->stats.obytes += vlan + 4821 rte_pktmbuf_data_len(loc->mbuf); 4822 #endif 4823 } else { 4824 /* 4825 * No inline at all, it means the CPU cycles saving 4826 * is prioritized at configuration, we should not 4827 * copy any packet data to WQE. 4828 * 4829 * SEND WQE, one WQEBB: 4830 * - Control Segment, SEND opcode 4831 * - Ethernet Segment, optional VLAN, no inline 4832 * - Data Segment, pointer type 4833 */ 4834 single_no_inline: 4835 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4836 loc->wqe_last = wqe; 4837 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4838 MLX5_OPCODE_SEND, olx); 4839 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4840 mlx5_tx_dseg_ptr 4841 (txq, loc, &wqe->dseg[0], 4842 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4843 rte_pktmbuf_data_len(loc->mbuf), olx); 4844 ++txq->wqe_ci; 4845 --loc->wqe_free; 4846 /* 4847 * We should not store mbuf pointer in elts 4848 * if no inlining is configured, this is done 4849 * by calling routine in a batch copy. 4850 */ 4851 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4852 --loc->elts_free; 4853 #ifdef MLX5_PMD_SOFT_COUNTERS 4854 /* Update sent data bytes counter. */ 4855 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4856 if (MLX5_TXOFF_CONFIG(VLAN) && 4857 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4858 txq->stats.obytes += 4859 sizeof(struct rte_vlan_hdr); 4860 #endif 4861 } 4862 ++loc->pkts_sent; 4863 --pkts_n; 4864 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4865 return MLX5_TXCMP_CODE_EXIT; 4866 loc->mbuf = *pkts++; 4867 if (pkts_n > 1) 4868 rte_prefetch0(*pkts); 4869 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4870 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4871 return ret; 4872 } 4873 MLX5_ASSERT(false); 4874 } 4875 4876 static __rte_always_inline enum mlx5_txcmp_code 4877 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 4878 struct rte_mbuf **__rte_restrict pkts, 4879 unsigned int pkts_n, 4880 struct mlx5_txq_local *__rte_restrict loc, 4881 unsigned int olx) 4882 { 4883 enum mlx5_txcmp_code ret; 4884 4885 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4886 if (ret == MLX5_TXCMP_CODE_SINGLE) 4887 goto ordinary_send; 4888 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 4889 for (;;) { 4890 /* Optimize for inline/no inline eMPW send. */ 4891 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4892 mlx5_tx_burst_empw_inline 4893 (txq, pkts, pkts_n, loc, olx) : 4894 mlx5_tx_burst_empw_simple 4895 (txq, pkts, pkts_n, loc, olx); 4896 if (ret != MLX5_TXCMP_CODE_SINGLE) 4897 return ret; 4898 /* The resources to send one packet should remain. */ 4899 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4900 ordinary_send: 4901 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4902 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 4903 if (ret != MLX5_TXCMP_CODE_EMPW) 4904 return ret; 4905 /* The resources to send one packet should remain. */ 4906 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4907 } 4908 } 4909 4910 /** 4911 * DPDK Tx callback template. This is configured template 4912 * used to generate routines optimized for specified offload setup. 4913 * One of this generated functions is chosen at SQ configuration 4914 * time. 4915 * 4916 * @param txq 4917 * Generic pointer to TX queue structure. 4918 * @param[in] pkts 4919 * Packets to transmit. 4920 * @param pkts_n 4921 * Number of packets in array. 4922 * @param olx 4923 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 4924 * values. Should be static to take compile time static configuration 4925 * advantages. 4926 * 4927 * @return 4928 * Number of packets successfully transmitted (<= pkts_n). 4929 */ 4930 static __rte_always_inline uint16_t 4931 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 4932 struct rte_mbuf **__rte_restrict pkts, 4933 uint16_t pkts_n, 4934 unsigned int olx) 4935 { 4936 struct mlx5_txq_local loc; 4937 enum mlx5_txcmp_code ret; 4938 unsigned int part; 4939 4940 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4941 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4942 if (unlikely(!pkts_n)) 4943 return 0; 4944 if (MLX5_TXOFF_CONFIG(INLINE)) 4945 loc.mbuf_free = 0; 4946 loc.pkts_sent = 0; 4947 loc.pkts_copy = 0; 4948 loc.wqe_last = NULL; 4949 4950 send_loop: 4951 loc.pkts_loop = loc.pkts_sent; 4952 /* 4953 * Check if there are some CQEs, if any: 4954 * - process an encountered errors 4955 * - process the completed WQEs 4956 * - free related mbufs 4957 * - doorbell the NIC about processed CQEs 4958 */ 4959 rte_prefetch0(*(pkts + loc.pkts_sent)); 4960 mlx5_tx_handle_completion(txq, olx); 4961 /* 4962 * Calculate the number of available resources - elts and WQEs. 4963 * There are two possible different scenarios: 4964 * - no data inlining into WQEs, one WQEBB may contains up to 4965 * four packets, in this case elts become scarce resource 4966 * - data inlining into WQEs, one packet may require multiple 4967 * WQEBBs, the WQEs become the limiting factor. 4968 */ 4969 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4970 loc.elts_free = txq->elts_s - 4971 (uint16_t)(txq->elts_head - txq->elts_tail); 4972 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4973 loc.wqe_free = txq->wqe_s - 4974 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 4975 if (unlikely(!loc.elts_free || !loc.wqe_free)) 4976 goto burst_exit; 4977 for (;;) { 4978 /* 4979 * Fetch the packet from array. Usually this is 4980 * the first packet in series of multi/single 4981 * segment packets. 4982 */ 4983 loc.mbuf = *(pkts + loc.pkts_sent); 4984 /* Dedicated branch for multi-segment packets. */ 4985 if (MLX5_TXOFF_CONFIG(MULTI) && 4986 unlikely(NB_SEGS(loc.mbuf) > 1)) { 4987 /* 4988 * Multi-segment packet encountered. 4989 * Hardware is able to process it only 4990 * with SEND/TSO opcodes, one packet 4991 * per WQE, do it in dedicated routine. 4992 */ 4993 enter_send_multi: 4994 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 4995 part = loc.pkts_sent - loc.pkts_copy; 4996 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4997 /* 4998 * There are some single-segment mbufs not 4999 * stored in elts. The mbufs must be in the 5000 * same order as WQEs, so we must copy the 5001 * mbufs to elts here, before the coming 5002 * multi-segment packet mbufs is appended. 5003 */ 5004 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 5005 part, olx); 5006 loc.pkts_copy = loc.pkts_sent; 5007 } 5008 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5009 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 5010 if (!MLX5_TXOFF_CONFIG(INLINE)) 5011 loc.pkts_copy = loc.pkts_sent; 5012 /* 5013 * These returned code checks are supposed 5014 * to be optimized out due to routine inlining. 5015 */ 5016 if (ret == MLX5_TXCMP_CODE_EXIT) { 5017 /* 5018 * The routine returns this code when 5019 * all packets are sent or there is no 5020 * enough resources to complete request. 5021 */ 5022 break; 5023 } 5024 if (ret == MLX5_TXCMP_CODE_ERROR) { 5025 /* 5026 * The routine returns this code when 5027 * some error in the incoming packets 5028 * format occurred. 5029 */ 5030 txq->stats.oerrors++; 5031 break; 5032 } 5033 if (ret == MLX5_TXCMP_CODE_SINGLE) { 5034 /* 5035 * The single-segment packet was encountered 5036 * in the array, try to send it with the 5037 * best optimized way, possible engaging eMPW. 5038 */ 5039 goto enter_send_single; 5040 } 5041 if (MLX5_TXOFF_CONFIG(TSO) && 5042 ret == MLX5_TXCMP_CODE_TSO) { 5043 /* 5044 * The single-segment TSO packet was 5045 * encountered in the array. 5046 */ 5047 goto enter_send_tso; 5048 } 5049 /* We must not get here. Something is going wrong. */ 5050 MLX5_ASSERT(false); 5051 txq->stats.oerrors++; 5052 break; 5053 } 5054 /* Dedicated branch for single-segment TSO packets. */ 5055 if (MLX5_TXOFF_CONFIG(TSO) && 5056 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 5057 /* 5058 * TSO might require special way for inlining 5059 * (dedicated parameters) and is sent with 5060 * MLX5_OPCODE_TSO opcode only, provide this 5061 * in dedicated branch. 5062 */ 5063 enter_send_tso: 5064 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 5065 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5066 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 5067 /* 5068 * These returned code checks are supposed 5069 * to be optimized out due to routine inlining. 5070 */ 5071 if (ret == MLX5_TXCMP_CODE_EXIT) 5072 break; 5073 if (ret == MLX5_TXCMP_CODE_ERROR) { 5074 txq->stats.oerrors++; 5075 break; 5076 } 5077 if (ret == MLX5_TXCMP_CODE_SINGLE) 5078 goto enter_send_single; 5079 if (MLX5_TXOFF_CONFIG(MULTI) && 5080 ret == MLX5_TXCMP_CODE_MULTI) { 5081 /* 5082 * The multi-segment packet was 5083 * encountered in the array. 5084 */ 5085 goto enter_send_multi; 5086 } 5087 /* We must not get here. Something is going wrong. */ 5088 MLX5_ASSERT(false); 5089 txq->stats.oerrors++; 5090 break; 5091 } 5092 /* 5093 * The dedicated branch for the single-segment packets 5094 * without TSO. Often these ones can be sent using 5095 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 5096 * The routine builds the WQEs till it encounters 5097 * the TSO or multi-segment packet (in case if these 5098 * offloads are requested at SQ configuration time). 5099 */ 5100 enter_send_single: 5101 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5102 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 5103 /* 5104 * These returned code checks are supposed 5105 * to be optimized out due to routine inlining. 5106 */ 5107 if (ret == MLX5_TXCMP_CODE_EXIT) 5108 break; 5109 if (ret == MLX5_TXCMP_CODE_ERROR) { 5110 txq->stats.oerrors++; 5111 break; 5112 } 5113 if (MLX5_TXOFF_CONFIG(MULTI) && 5114 ret == MLX5_TXCMP_CODE_MULTI) { 5115 /* 5116 * The multi-segment packet was 5117 * encountered in the array. 5118 */ 5119 goto enter_send_multi; 5120 } 5121 if (MLX5_TXOFF_CONFIG(TSO) && 5122 ret == MLX5_TXCMP_CODE_TSO) { 5123 /* 5124 * The single-segment TSO packet was 5125 * encountered in the array. 5126 */ 5127 goto enter_send_tso; 5128 } 5129 /* We must not get here. Something is going wrong. */ 5130 MLX5_ASSERT(false); 5131 txq->stats.oerrors++; 5132 break; 5133 } 5134 /* 5135 * Main Tx loop is completed, do the rest: 5136 * - set completion request if thresholds are reached 5137 * - doorbell the hardware 5138 * - copy the rest of mbufs to elts (if any) 5139 */ 5140 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 5141 loc.pkts_sent >= loc.pkts_copy); 5142 /* Take a shortcut if nothing is sent. */ 5143 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 5144 goto burst_exit; 5145 /* Request CQE generation if limits are reached. */ 5146 mlx5_tx_request_completion(txq, &loc, olx); 5147 /* 5148 * Ring QP doorbell immediately after WQE building completion 5149 * to improve latencies. The pure software related data treatment 5150 * can be completed after doorbell. Tx CQEs for this SQ are 5151 * processed in this thread only by the polling. 5152 * 5153 * The rdma core library can map doorbell register in two ways, 5154 * depending on the environment variable "MLX5_SHUT_UP_BF": 5155 * 5156 * - as regular cached memory, the variable is either missing or 5157 * set to zero. This type of mapping may cause the significant 5158 * doorbell register writing latency and requires explicit 5159 * memory write barrier to mitigate this issue and prevent 5160 * write combining. 5161 * 5162 * - as non-cached memory, the variable is present and set to 5163 * not "0" value. This type of mapping may cause performance 5164 * impact under heavy loading conditions but the explicit write 5165 * memory barrier is not required and it may improve core 5166 * performance. 5167 * 5168 * - the legacy behaviour (prior 19.08 release) was to use some 5169 * heuristics to decide whether write memory barrier should 5170 * be performed. This behavior is supported with specifying 5171 * tx_db_nc=2, write barrier is skipped if application 5172 * provides the full recommended burst of packets, it 5173 * supposes the next packets are coming and the write barrier 5174 * will be issued on the next burst (after descriptor writing, 5175 * at least). 5176 */ 5177 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 5178 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 5179 /* Not all of the mbufs may be stored into elts yet. */ 5180 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 5181 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5182 /* 5183 * There are some single-segment mbufs not stored in elts. 5184 * It can be only if the last packet was single-segment. 5185 * The copying is gathered into one place due to it is 5186 * a good opportunity to optimize that with SIMD. 5187 * Unfortunately if inlining is enabled the gaps in 5188 * pointer array may happen due to early freeing of the 5189 * inlined mbufs. 5190 */ 5191 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 5192 loc.pkts_copy = loc.pkts_sent; 5193 } 5194 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5195 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5196 if (pkts_n > loc.pkts_sent) { 5197 /* 5198 * If burst size is large there might be no enough CQE 5199 * fetched from completion queue and no enough resources 5200 * freed to send all the packets. 5201 */ 5202 goto send_loop; 5203 } 5204 burst_exit: 5205 #ifdef MLX5_PMD_SOFT_COUNTERS 5206 /* Increment sent packets counter. */ 5207 txq->stats.opackets += loc.pkts_sent; 5208 #endif 5209 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 5210 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 5211 return loc.pkts_sent; 5212 } 5213 5214 /* Generate routines with Enhanced Multi-Packet Write support. */ 5215 MLX5_TXOFF_DECL(full_empw, 5216 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 5217 5218 MLX5_TXOFF_DECL(none_empw, 5219 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5220 5221 MLX5_TXOFF_DECL(md_empw, 5222 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5223 5224 MLX5_TXOFF_DECL(mt_empw, 5225 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5226 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5227 5228 MLX5_TXOFF_DECL(mtsc_empw, 5229 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5230 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5231 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5232 5233 MLX5_TXOFF_DECL(mti_empw, 5234 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5235 MLX5_TXOFF_CONFIG_INLINE | 5236 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5237 5238 MLX5_TXOFF_DECL(mtv_empw, 5239 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5240 MLX5_TXOFF_CONFIG_VLAN | 5241 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5242 5243 MLX5_TXOFF_DECL(mtiv_empw, 5244 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5245 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5246 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5247 5248 MLX5_TXOFF_DECL(sc_empw, 5249 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5250 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5251 5252 MLX5_TXOFF_DECL(sci_empw, 5253 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5254 MLX5_TXOFF_CONFIG_INLINE | 5255 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5256 5257 MLX5_TXOFF_DECL(scv_empw, 5258 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5259 MLX5_TXOFF_CONFIG_VLAN | 5260 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5261 5262 MLX5_TXOFF_DECL(sciv_empw, 5263 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5264 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5265 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5266 5267 MLX5_TXOFF_DECL(i_empw, 5268 MLX5_TXOFF_CONFIG_INLINE | 5269 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5270 5271 MLX5_TXOFF_DECL(v_empw, 5272 MLX5_TXOFF_CONFIG_VLAN | 5273 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5274 5275 MLX5_TXOFF_DECL(iv_empw, 5276 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5277 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5278 5279 /* Generate routines without Enhanced Multi-Packet Write support. */ 5280 MLX5_TXOFF_DECL(full, 5281 MLX5_TXOFF_CONFIG_FULL) 5282 5283 MLX5_TXOFF_DECL(none, 5284 MLX5_TXOFF_CONFIG_NONE) 5285 5286 MLX5_TXOFF_DECL(md, 5287 MLX5_TXOFF_CONFIG_METADATA) 5288 5289 MLX5_TXOFF_DECL(mt, 5290 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5291 MLX5_TXOFF_CONFIG_METADATA) 5292 5293 MLX5_TXOFF_DECL(mtsc, 5294 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5295 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5296 MLX5_TXOFF_CONFIG_METADATA) 5297 5298 MLX5_TXOFF_DECL(mti, 5299 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5300 MLX5_TXOFF_CONFIG_INLINE | 5301 MLX5_TXOFF_CONFIG_METADATA) 5302 5303 5304 MLX5_TXOFF_DECL(mtv, 5305 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5306 MLX5_TXOFF_CONFIG_VLAN | 5307 MLX5_TXOFF_CONFIG_METADATA) 5308 5309 5310 MLX5_TXOFF_DECL(mtiv, 5311 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5312 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5313 MLX5_TXOFF_CONFIG_METADATA) 5314 5315 MLX5_TXOFF_DECL(sc, 5316 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5317 MLX5_TXOFF_CONFIG_METADATA) 5318 5319 MLX5_TXOFF_DECL(sci, 5320 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5321 MLX5_TXOFF_CONFIG_INLINE | 5322 MLX5_TXOFF_CONFIG_METADATA) 5323 5324 5325 MLX5_TXOFF_DECL(scv, 5326 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5327 MLX5_TXOFF_CONFIG_VLAN | 5328 MLX5_TXOFF_CONFIG_METADATA) 5329 5330 5331 MLX5_TXOFF_DECL(sciv, 5332 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5333 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5334 MLX5_TXOFF_CONFIG_METADATA) 5335 5336 MLX5_TXOFF_DECL(i, 5337 MLX5_TXOFF_CONFIG_INLINE | 5338 MLX5_TXOFF_CONFIG_METADATA) 5339 5340 MLX5_TXOFF_DECL(v, 5341 MLX5_TXOFF_CONFIG_VLAN | 5342 MLX5_TXOFF_CONFIG_METADATA) 5343 5344 MLX5_TXOFF_DECL(iv, 5345 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5346 MLX5_TXOFF_CONFIG_METADATA) 5347 5348 /* Generate routines with timestamp scheduling. */ 5349 MLX5_TXOFF_DECL(full_ts_nompw, 5350 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5351 5352 MLX5_TXOFF_DECL(full_ts_nompwi, 5353 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5354 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5355 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5356 MLX5_TXOFF_CONFIG_TXPP) 5357 5358 MLX5_TXOFF_DECL(full_ts, 5359 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5360 MLX5_TXOFF_CONFIG_EMPW) 5361 5362 MLX5_TXOFF_DECL(full_ts_noi, 5363 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5364 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5365 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5366 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5367 5368 MLX5_TXOFF_DECL(none_ts, 5369 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5370 MLX5_TXOFF_CONFIG_EMPW) 5371 5372 MLX5_TXOFF_DECL(mdi_ts, 5373 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5374 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5375 5376 MLX5_TXOFF_DECL(mti_ts, 5377 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5378 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5379 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5380 5381 MLX5_TXOFF_DECL(mtiv_ts, 5382 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5383 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5384 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5385 MLX5_TXOFF_CONFIG_EMPW) 5386 5387 /* 5388 * Generate routines with Legacy Multi-Packet Write support. 5389 * This mode is supported by ConnectX-4 Lx only and imposes 5390 * offload limitations, not supported: 5391 * - ACL/Flows (metadata are becoming meaningless) 5392 * - WQE Inline headers 5393 * - SRIOV (E-Switch offloads) 5394 * - VLAN insertion 5395 * - tunnel encapsulation/decapsulation 5396 * - TSO 5397 */ 5398 MLX5_TXOFF_DECL(none_mpw, 5399 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5400 MLX5_TXOFF_CONFIG_MPW) 5401 5402 MLX5_TXOFF_DECL(mci_mpw, 5403 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5404 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5405 MLX5_TXOFF_CONFIG_MPW) 5406 5407 MLX5_TXOFF_DECL(mc_mpw, 5408 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5409 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5410 5411 MLX5_TXOFF_DECL(i_mpw, 5412 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5413 MLX5_TXOFF_CONFIG_MPW) 5414 5415 /* 5416 * Array of declared and compiled Tx burst function and corresponding 5417 * supported offloads set. The array is used to select the Tx burst 5418 * function for specified offloads set at Tx queue configuration time. 5419 */ 5420 const struct { 5421 eth_tx_burst_t func; 5422 unsigned int olx; 5423 } txoff_func[] = { 5424 MLX5_TXOFF_INFO(full_empw, 5425 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5426 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5427 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5428 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5429 5430 MLX5_TXOFF_INFO(none_empw, 5431 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5432 5433 MLX5_TXOFF_INFO(md_empw, 5434 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5435 5436 MLX5_TXOFF_INFO(mt_empw, 5437 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5438 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5439 5440 MLX5_TXOFF_INFO(mtsc_empw, 5441 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5442 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5443 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5444 5445 MLX5_TXOFF_INFO(mti_empw, 5446 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5447 MLX5_TXOFF_CONFIG_INLINE | 5448 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5449 5450 MLX5_TXOFF_INFO(mtv_empw, 5451 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5452 MLX5_TXOFF_CONFIG_VLAN | 5453 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5454 5455 MLX5_TXOFF_INFO(mtiv_empw, 5456 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5457 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5458 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5459 5460 MLX5_TXOFF_INFO(sc_empw, 5461 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5462 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5463 5464 MLX5_TXOFF_INFO(sci_empw, 5465 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5466 MLX5_TXOFF_CONFIG_INLINE | 5467 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5468 5469 MLX5_TXOFF_INFO(scv_empw, 5470 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5471 MLX5_TXOFF_CONFIG_VLAN | 5472 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5473 5474 MLX5_TXOFF_INFO(sciv_empw, 5475 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5476 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5477 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5478 5479 MLX5_TXOFF_INFO(i_empw, 5480 MLX5_TXOFF_CONFIG_INLINE | 5481 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5482 5483 MLX5_TXOFF_INFO(v_empw, 5484 MLX5_TXOFF_CONFIG_VLAN | 5485 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5486 5487 MLX5_TXOFF_INFO(iv_empw, 5488 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5489 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5490 5491 MLX5_TXOFF_INFO(full_ts_nompw, 5492 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5493 5494 MLX5_TXOFF_INFO(full_ts_nompwi, 5495 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5496 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5497 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5498 MLX5_TXOFF_CONFIG_TXPP) 5499 5500 MLX5_TXOFF_INFO(full_ts, 5501 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5502 MLX5_TXOFF_CONFIG_EMPW) 5503 5504 MLX5_TXOFF_INFO(full_ts_noi, 5505 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5506 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5507 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5508 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5509 5510 MLX5_TXOFF_INFO(none_ts, 5511 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5512 MLX5_TXOFF_CONFIG_EMPW) 5513 5514 MLX5_TXOFF_INFO(mdi_ts, 5515 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5516 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5517 5518 MLX5_TXOFF_INFO(mti_ts, 5519 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5520 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5521 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5522 5523 MLX5_TXOFF_INFO(mtiv_ts, 5524 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5525 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5526 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5527 MLX5_TXOFF_CONFIG_EMPW) 5528 5529 MLX5_TXOFF_INFO(full, 5530 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5531 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5532 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5533 MLX5_TXOFF_CONFIG_METADATA) 5534 5535 MLX5_TXOFF_INFO(none, 5536 MLX5_TXOFF_CONFIG_NONE) 5537 5538 MLX5_TXOFF_INFO(md, 5539 MLX5_TXOFF_CONFIG_METADATA) 5540 5541 MLX5_TXOFF_INFO(mt, 5542 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5543 MLX5_TXOFF_CONFIG_METADATA) 5544 5545 MLX5_TXOFF_INFO(mtsc, 5546 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5547 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5548 MLX5_TXOFF_CONFIG_METADATA) 5549 5550 MLX5_TXOFF_INFO(mti, 5551 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5552 MLX5_TXOFF_CONFIG_INLINE | 5553 MLX5_TXOFF_CONFIG_METADATA) 5554 5555 MLX5_TXOFF_INFO(mtv, 5556 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5557 MLX5_TXOFF_CONFIG_VLAN | 5558 MLX5_TXOFF_CONFIG_METADATA) 5559 5560 MLX5_TXOFF_INFO(mtiv, 5561 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5562 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5563 MLX5_TXOFF_CONFIG_METADATA) 5564 5565 MLX5_TXOFF_INFO(sc, 5566 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5567 MLX5_TXOFF_CONFIG_METADATA) 5568 5569 MLX5_TXOFF_INFO(sci, 5570 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5571 MLX5_TXOFF_CONFIG_INLINE | 5572 MLX5_TXOFF_CONFIG_METADATA) 5573 5574 MLX5_TXOFF_INFO(scv, 5575 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5576 MLX5_TXOFF_CONFIG_VLAN | 5577 MLX5_TXOFF_CONFIG_METADATA) 5578 5579 MLX5_TXOFF_INFO(sciv, 5580 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5581 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5582 MLX5_TXOFF_CONFIG_METADATA) 5583 5584 MLX5_TXOFF_INFO(i, 5585 MLX5_TXOFF_CONFIG_INLINE | 5586 MLX5_TXOFF_CONFIG_METADATA) 5587 5588 MLX5_TXOFF_INFO(v, 5589 MLX5_TXOFF_CONFIG_VLAN | 5590 MLX5_TXOFF_CONFIG_METADATA) 5591 5592 MLX5_TXOFF_INFO(iv, 5593 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5594 MLX5_TXOFF_CONFIG_METADATA) 5595 5596 MLX5_TXOFF_INFO(none_mpw, 5597 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5598 MLX5_TXOFF_CONFIG_MPW) 5599 5600 MLX5_TXOFF_INFO(mci_mpw, 5601 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5602 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5603 MLX5_TXOFF_CONFIG_MPW) 5604 5605 MLX5_TXOFF_INFO(mc_mpw, 5606 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5607 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5608 5609 MLX5_TXOFF_INFO(i_mpw, 5610 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5611 MLX5_TXOFF_CONFIG_MPW) 5612 }; 5613 5614 /** 5615 * Configure the Tx function to use. The routine checks configured 5616 * Tx offloads for the device and selects appropriate Tx burst 5617 * routine. There are multiple Tx burst routines compiled from 5618 * the same template in the most optimal way for the dedicated 5619 * Tx offloads set. 5620 * 5621 * @param dev 5622 * Pointer to private data structure. 5623 * 5624 * @return 5625 * Pointer to selected Tx burst function. 5626 */ 5627 eth_tx_burst_t 5628 mlx5_select_tx_function(struct rte_eth_dev *dev) 5629 { 5630 struct mlx5_priv *priv = dev->data->dev_private; 5631 struct mlx5_dev_config *config = &priv->config; 5632 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5633 unsigned int diff = 0, olx = 0, i, m; 5634 5635 MLX5_ASSERT(priv); 5636 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5637 /* We should support Multi-Segment Packets. */ 5638 olx |= MLX5_TXOFF_CONFIG_MULTI; 5639 } 5640 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5641 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5642 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5643 DEV_TX_OFFLOAD_IP_TNL_TSO | 5644 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5645 /* We should support TCP Send Offload. */ 5646 olx |= MLX5_TXOFF_CONFIG_TSO; 5647 } 5648 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5649 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5650 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5651 /* We should support Software Parser for Tunnels. */ 5652 olx |= MLX5_TXOFF_CONFIG_SWP; 5653 } 5654 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5655 DEV_TX_OFFLOAD_UDP_CKSUM | 5656 DEV_TX_OFFLOAD_TCP_CKSUM | 5657 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5658 /* We should support IP/TCP/UDP Checksums. */ 5659 olx |= MLX5_TXOFF_CONFIG_CSUM; 5660 } 5661 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5662 /* We should support VLAN insertion. */ 5663 olx |= MLX5_TXOFF_CONFIG_VLAN; 5664 } 5665 if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && 5666 rte_mbuf_dynflag_lookup 5667 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 && 5668 rte_mbuf_dynfield_lookup 5669 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) { 5670 /* Offload configured, dynamic entities registered. */ 5671 olx |= MLX5_TXOFF_CONFIG_TXPP; 5672 } 5673 if (priv->txqs_n && (*priv->txqs)[0]) { 5674 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5675 5676 if (txd->inlen_send) { 5677 /* 5678 * Check the data inline requirements. Data inline 5679 * is enabled on per device basis, we can check 5680 * the first Tx queue only. 5681 * 5682 * If device does not support VLAN insertion in WQE 5683 * and some queues are requested to perform VLAN 5684 * insertion offload than inline must be enabled. 5685 */ 5686 olx |= MLX5_TXOFF_CONFIG_INLINE; 5687 } 5688 } 5689 if (config->mps == MLX5_MPW_ENHANCED && 5690 config->txq_inline_min <= 0) { 5691 /* 5692 * The NIC supports Enhanced Multi-Packet Write 5693 * and does not require minimal inline data. 5694 */ 5695 olx |= MLX5_TXOFF_CONFIG_EMPW; 5696 } 5697 if (rte_flow_dynf_metadata_avail()) { 5698 /* We should support Flow metadata. */ 5699 olx |= MLX5_TXOFF_CONFIG_METADATA; 5700 } 5701 if (config->mps == MLX5_MPW) { 5702 /* 5703 * The NIC supports Legacy Multi-Packet Write. 5704 * The MLX5_TXOFF_CONFIG_MPW controls the 5705 * descriptor building method in combination 5706 * with MLX5_TXOFF_CONFIG_EMPW. 5707 */ 5708 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5709 MLX5_TXOFF_CONFIG_SWP | 5710 MLX5_TXOFF_CONFIG_VLAN | 5711 MLX5_TXOFF_CONFIG_METADATA))) 5712 olx |= MLX5_TXOFF_CONFIG_EMPW | 5713 MLX5_TXOFF_CONFIG_MPW; 5714 } 5715 /* 5716 * Scan the routines table to find the minimal 5717 * satisfying routine with requested offloads. 5718 */ 5719 m = RTE_DIM(txoff_func); 5720 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5721 unsigned int tmp; 5722 5723 tmp = txoff_func[i].olx; 5724 if (tmp == olx) { 5725 /* Meets requested offloads exactly.*/ 5726 m = i; 5727 break; 5728 } 5729 if ((tmp & olx) != olx) { 5730 /* Does not meet requested offloads at all. */ 5731 continue; 5732 } 5733 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) 5734 /* Do not enable legacy MPW if not configured. */ 5735 continue; 5736 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5737 /* Do not enable eMPW if not configured. */ 5738 continue; 5739 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5740 /* Do not enable inlining if not configured. */ 5741 continue; 5742 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) 5743 /* Do not enable scheduling if not configured. */ 5744 continue; 5745 /* 5746 * Some routine meets the requirements. 5747 * Check whether it has minimal amount 5748 * of not requested offloads. 5749 */ 5750 tmp = __builtin_popcountl(tmp & ~olx); 5751 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5752 /* First or better match, save and continue. */ 5753 m = i; 5754 diff = tmp; 5755 continue; 5756 } 5757 if (tmp == diff) { 5758 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5759 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5760 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5761 /* Lighter not requested offload. */ 5762 m = i; 5763 } 5764 } 5765 } 5766 if (m >= RTE_DIM(txoff_func)) { 5767 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5768 " for requested offloads %04X", 5769 dev->data->port_id, olx); 5770 return NULL; 5771 } 5772 DRV_LOG(DEBUG, "port %u has selected Tx function" 5773 " supporting offloads %04X/%04X", 5774 dev->data->port_id, olx, txoff_func[m].olx); 5775 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5776 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5777 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5778 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5779 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5780 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5781 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5782 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5783 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5784 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5785 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5786 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5787 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5788 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5789 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) 5790 DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); 5791 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5792 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5793 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5794 else 5795 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5796 } 5797 return txoff_func[m].func; 5798 } 5799 5800 /** 5801 * DPDK callback to get the TX queue information 5802 * 5803 * @param dev 5804 * Pointer to the device structure. 5805 * 5806 * @param tx_queue_id 5807 * Tx queue identificator. 5808 * 5809 * @param qinfo 5810 * Pointer to the TX queue information structure. 5811 * 5812 * @return 5813 * None. 5814 */ 5815 5816 void 5817 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 5818 struct rte_eth_txq_info *qinfo) 5819 { 5820 struct mlx5_priv *priv = dev->data->dev_private; 5821 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5822 struct mlx5_txq_ctrl *txq_ctrl = 5823 container_of(txq, struct mlx5_txq_ctrl, txq); 5824 5825 if (!txq) 5826 return; 5827 qinfo->nb_desc = txq->elts_s; 5828 qinfo->conf.tx_thresh.pthresh = 0; 5829 qinfo->conf.tx_thresh.hthresh = 0; 5830 qinfo->conf.tx_thresh.wthresh = 0; 5831 qinfo->conf.tx_rs_thresh = 0; 5832 qinfo->conf.tx_free_thresh = 0; 5833 qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; 5834 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 5835 } 5836 5837 /** 5838 * DPDK callback to get the TX packet burst mode information 5839 * 5840 * @param dev 5841 * Pointer to the device structure. 5842 * 5843 * @param tx_queue_id 5844 * Tx queue identificatior. 5845 * 5846 * @param mode 5847 * Pointer to the burts mode information. 5848 * 5849 * @return 5850 * 0 as success, -EINVAL as failure. 5851 */ 5852 5853 int 5854 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, 5855 uint16_t tx_queue_id, 5856 struct rte_eth_burst_mode *mode) 5857 { 5858 eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; 5859 struct mlx5_priv *priv = dev->data->dev_private; 5860 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5861 unsigned int i, olx; 5862 5863 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5864 if (pkt_burst == txoff_func[i].func) { 5865 olx = txoff_func[i].olx; 5866 snprintf(mode->info, sizeof(mode->info), 5867 "%s%s%s%s%s%s%s%s%s%s", 5868 (olx & MLX5_TXOFF_CONFIG_EMPW) ? 5869 ((olx & MLX5_TXOFF_CONFIG_MPW) ? 5870 "Legacy MPW" : "Enhanced MPW") : "No MPW", 5871 (olx & MLX5_TXOFF_CONFIG_MULTI) ? 5872 " + MULTI" : "", 5873 (olx & MLX5_TXOFF_CONFIG_TSO) ? 5874 " + TSO" : "", 5875 (olx & MLX5_TXOFF_CONFIG_SWP) ? 5876 " + SWP" : "", 5877 (olx & MLX5_TXOFF_CONFIG_CSUM) ? 5878 " + CSUM" : "", 5879 (olx & MLX5_TXOFF_CONFIG_INLINE) ? 5880 " + INLINE" : "", 5881 (olx & MLX5_TXOFF_CONFIG_VLAN) ? 5882 " + VLAN" : "", 5883 (olx & MLX5_TXOFF_CONFIG_METADATA) ? 5884 " + METADATA" : "", 5885 (olx & MLX5_TXOFF_CONFIG_TXPP) ? 5886 " + TXPP" : "", 5887 (txq && txq->fast_free) ? 5888 " + Fast Free" : ""); 5889 return 0; 5890 } 5891 } 5892 return -EINVAL; 5893 } 5894