1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 22 #include "mlx5_autoconf.h" 23 #include "mlx5_defs.h" 24 #include "mlx5.h" 25 #include "mlx5_mr.h" 26 #include "mlx5_utils.h" 27 #include "mlx5_rxtx.h" 28 29 /* TX burst subroutines return codes. */ 30 enum mlx5_txcmp_code { 31 MLX5_TXCMP_CODE_EXIT = 0, 32 MLX5_TXCMP_CODE_ERROR, 33 MLX5_TXCMP_CODE_SINGLE, 34 MLX5_TXCMP_CODE_MULTI, 35 MLX5_TXCMP_CODE_TSO, 36 MLX5_TXCMP_CODE_EMPW, 37 }; 38 39 /* 40 * These defines are used to configure Tx burst routine option set 41 * supported at compile time. The not specified options are optimized out 42 * out due to if conditions can be explicitly calculated at compile time. 43 * The offloads with bigger runtime check (require more CPU cycles to 44 * skip) overhead should have the bigger index - this is needed to 45 * select the better matching routine function if no exact match and 46 * some offloads are not actually requested. 47 */ 48 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 49 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 50 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 51 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 52 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 53 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 54 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 55 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 56 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 57 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 58 59 /* The most common offloads groups. */ 60 #define MLX5_TXOFF_CONFIG_NONE 0 61 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 62 MLX5_TXOFF_CONFIG_TSO | \ 63 MLX5_TXOFF_CONFIG_SWP | \ 64 MLX5_TXOFF_CONFIG_CSUM | \ 65 MLX5_TXOFF_CONFIG_INLINE | \ 66 MLX5_TXOFF_CONFIG_VLAN | \ 67 MLX5_TXOFF_CONFIG_METADATA) 68 69 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 70 71 #define MLX5_TXOFF_DECL(func, olx) \ 72 static uint16_t mlx5_tx_burst_##func(void *txq, \ 73 struct rte_mbuf **pkts, \ 74 uint16_t pkts_n) \ 75 { \ 76 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 77 pkts, pkts_n, (olx)); \ 78 } 79 80 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 81 82 /* static asserts */ 83 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 84 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 85 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 86 (sizeof(uint16_t) + 87 sizeof(rte_v128u32_t)), 88 "invalid Ethernet Segment data size"); 89 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 90 (sizeof(uint16_t) + 91 sizeof(struct rte_vlan_hdr) + 92 2 * RTE_ETHER_ADDR_LEN), 93 "invalid Ethernet Segment data size"); 94 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 95 (sizeof(uint16_t) + 96 sizeof(rte_v128u32_t)), 97 "invalid Ethernet Segment data size"); 98 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 99 (sizeof(uint16_t) + 100 sizeof(struct rte_vlan_hdr) + 101 2 * RTE_ETHER_ADDR_LEN), 102 "invalid Ethernet Segment data size"); 103 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 104 (sizeof(uint16_t) + 105 sizeof(rte_v128u32_t)), 106 "invalid Ethernet Segment data size"); 107 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 108 (sizeof(uint16_t) + 109 sizeof(struct rte_vlan_hdr) + 110 2 * RTE_ETHER_ADDR_LEN), 111 "invalid Ethernet Segment data size"); 112 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 113 (2 * RTE_ETHER_ADDR_LEN), 114 "invalid Data Segment data size"); 115 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 116 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 117 static_assert((sizeof(struct rte_vlan_hdr) + 118 sizeof(struct rte_ether_hdr)) == 119 MLX5_ESEG_MIN_INLINE_SIZE, 120 "invalid min inline data size"); 121 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 122 MLX5_DSEG_MAX, "invalid WQE max size"); 123 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 124 "invalid WQE Control Segment size"); 125 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 126 "invalid WQE Ethernet Segment size"); 127 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 128 "invalid WQE Data Segment size"); 129 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 130 "invalid WQE size"); 131 132 static __rte_always_inline uint32_t 133 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 134 volatile struct mlx5_mini_cqe8 *mcqe); 135 136 static __rte_always_inline int 137 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 138 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 139 140 static __rte_always_inline uint32_t 141 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 142 143 static __rte_always_inline void 144 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 145 volatile struct mlx5_cqe *cqe, 146 volatile struct mlx5_mini_cqe8 *mcqe); 147 148 static int 149 mlx5_queue_state_modify(struct rte_eth_dev *dev, 150 struct mlx5_mp_arg_queue_state_modify *sm); 151 152 static inline void 153 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 154 volatile struct mlx5_cqe *__rte_restrict cqe, 155 uint32_t phcsum, uint8_t l4_type); 156 157 static inline void 158 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 159 volatile struct mlx5_cqe *__rte_restrict cqe, 160 volatile struct mlx5_mini_cqe8 *mcqe, 161 struct mlx5_rxq_data *rxq, uint32_t len); 162 163 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 164 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 165 }; 166 167 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 168 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 169 170 uint64_t rte_net_mlx5_dynf_inline_mask; 171 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 172 173 /** 174 * Build a table to translate Rx completion flags to packet type. 175 * 176 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 177 */ 178 void 179 mlx5_set_ptype_table(void) 180 { 181 unsigned int i; 182 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 183 184 /* Last entry must not be overwritten, reserved for errored packet. */ 185 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 186 (*p)[i] = RTE_PTYPE_UNKNOWN; 187 /* 188 * The index to the array should have: 189 * bit[1:0] = l3_hdr_type 190 * bit[4:2] = l4_hdr_type 191 * bit[5] = ip_frag 192 * bit[6] = tunneled 193 * bit[7] = outer_l3_type 194 */ 195 /* L2 */ 196 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 197 /* L3 */ 198 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 199 RTE_PTYPE_L4_NONFRAG; 200 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 201 RTE_PTYPE_L4_NONFRAG; 202 /* Fragmented */ 203 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 204 RTE_PTYPE_L4_FRAG; 205 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 206 RTE_PTYPE_L4_FRAG; 207 /* TCP */ 208 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 209 RTE_PTYPE_L4_TCP; 210 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 211 RTE_PTYPE_L4_TCP; 212 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 213 RTE_PTYPE_L4_TCP; 214 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 215 RTE_PTYPE_L4_TCP; 216 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 217 RTE_PTYPE_L4_TCP; 218 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 219 RTE_PTYPE_L4_TCP; 220 /* UDP */ 221 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 222 RTE_PTYPE_L4_UDP; 223 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 224 RTE_PTYPE_L4_UDP; 225 /* Repeat with outer_l3_type being set. Just in case. */ 226 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 227 RTE_PTYPE_L4_NONFRAG; 228 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 229 RTE_PTYPE_L4_NONFRAG; 230 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 231 RTE_PTYPE_L4_FRAG; 232 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 233 RTE_PTYPE_L4_FRAG; 234 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 235 RTE_PTYPE_L4_TCP; 236 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 237 RTE_PTYPE_L4_TCP; 238 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 239 RTE_PTYPE_L4_TCP; 240 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 241 RTE_PTYPE_L4_TCP; 242 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 243 RTE_PTYPE_L4_TCP; 244 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 245 RTE_PTYPE_L4_TCP; 246 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 247 RTE_PTYPE_L4_UDP; 248 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 249 RTE_PTYPE_L4_UDP; 250 /* Tunneled - L3 */ 251 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 252 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 253 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 254 RTE_PTYPE_INNER_L4_NONFRAG; 255 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 256 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 257 RTE_PTYPE_INNER_L4_NONFRAG; 258 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 259 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 260 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L4_NONFRAG; 262 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 263 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L4_NONFRAG; 265 /* Tunneled - Fragmented */ 266 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 268 RTE_PTYPE_INNER_L4_FRAG; 269 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 270 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L4_FRAG; 272 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 273 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L4_FRAG; 275 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 276 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L4_FRAG; 278 /* Tunneled - TCP */ 279 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 280 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 281 RTE_PTYPE_INNER_L4_TCP; 282 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 283 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 284 RTE_PTYPE_INNER_L4_TCP; 285 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 286 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 287 RTE_PTYPE_INNER_L4_TCP; 288 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 289 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 290 RTE_PTYPE_INNER_L4_TCP; 291 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 292 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 293 RTE_PTYPE_INNER_L4_TCP; 294 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 295 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 296 RTE_PTYPE_INNER_L4_TCP; 297 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 298 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 299 RTE_PTYPE_INNER_L4_TCP; 300 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 301 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 302 RTE_PTYPE_INNER_L4_TCP; 303 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 304 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 305 RTE_PTYPE_INNER_L4_TCP; 306 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 307 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 308 RTE_PTYPE_INNER_L4_TCP; 309 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 310 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 311 RTE_PTYPE_INNER_L4_TCP; 312 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 313 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 314 RTE_PTYPE_INNER_L4_TCP; 315 /* Tunneled - UDP */ 316 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 317 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 318 RTE_PTYPE_INNER_L4_UDP; 319 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 320 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 321 RTE_PTYPE_INNER_L4_UDP; 322 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 323 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 324 RTE_PTYPE_INNER_L4_UDP; 325 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 326 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 327 RTE_PTYPE_INNER_L4_UDP; 328 } 329 330 /** 331 * Build a table to translate packet to checksum type of Verbs. 332 */ 333 void 334 mlx5_set_cksum_table(void) 335 { 336 unsigned int i; 337 uint8_t v; 338 339 /* 340 * The index should have: 341 * bit[0] = PKT_TX_TCP_SEG 342 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 343 * bit[4] = PKT_TX_IP_CKSUM 344 * bit[8] = PKT_TX_OUTER_IP_CKSUM 345 * bit[9] = tunnel 346 */ 347 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 348 v = 0; 349 if (i & (1 << 9)) { 350 /* Tunneled packet. */ 351 if (i & (1 << 8)) /* Outer IP. */ 352 v |= MLX5_ETH_WQE_L3_CSUM; 353 if (i & (1 << 4)) /* Inner IP. */ 354 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 355 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 356 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 357 } else { 358 /* No tunnel. */ 359 if (i & (1 << 4)) /* IP. */ 360 v |= MLX5_ETH_WQE_L3_CSUM; 361 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 362 v |= MLX5_ETH_WQE_L4_CSUM; 363 } 364 mlx5_cksum_table[i] = v; 365 } 366 } 367 368 /** 369 * Build a table to translate packet type of mbuf to SWP type of Verbs. 370 */ 371 void 372 mlx5_set_swp_types_table(void) 373 { 374 unsigned int i; 375 uint8_t v; 376 377 /* 378 * The index should have: 379 * bit[0:1] = PKT_TX_L4_MASK 380 * bit[4] = PKT_TX_IPV6 381 * bit[8] = PKT_TX_OUTER_IPV6 382 * bit[9] = PKT_TX_OUTER_UDP 383 */ 384 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 385 v = 0; 386 if (i & (1 << 8)) 387 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 388 if (i & (1 << 9)) 389 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 390 if (i & (1 << 4)) 391 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 392 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 393 v |= MLX5_ETH_WQE_L4_INNER_UDP; 394 mlx5_swp_types_table[i] = v; 395 } 396 } 397 398 /** 399 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 400 * Flags must be preliminary initialized to zero. 401 * 402 * @param loc 403 * Pointer to burst routine local context. 404 * @param swp_flags 405 * Pointer to store Software Parser flags 406 * @param olx 407 * Configured Tx offloads mask. It is fully defined at 408 * compile time and may be used for optimization. 409 * 410 * @return 411 * Software Parser offsets packed in dword. 412 * Software Parser flags are set by pointer. 413 */ 414 static __rte_always_inline uint32_t 415 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 416 uint8_t *swp_flags, 417 unsigned int olx) 418 { 419 uint64_t ol, tunnel; 420 unsigned int idx, off; 421 uint32_t set; 422 423 if (!MLX5_TXOFF_CONFIG(SWP)) 424 return 0; 425 ol = loc->mbuf->ol_flags; 426 tunnel = ol & PKT_TX_TUNNEL_MASK; 427 /* 428 * Check whether Software Parser is required. 429 * Only customized tunnels may ask for. 430 */ 431 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 432 return 0; 433 /* 434 * The index should have: 435 * bit[0:1] = PKT_TX_L4_MASK 436 * bit[4] = PKT_TX_IPV6 437 * bit[8] = PKT_TX_OUTER_IPV6 438 * bit[9] = PKT_TX_OUTER_UDP 439 */ 440 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 441 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 442 *swp_flags = mlx5_swp_types_table[idx]; 443 /* 444 * Set offsets for SW parser. Since ConnectX-5, SW parser just 445 * complements HW parser. SW parser starts to engage only if HW parser 446 * can't reach a header. For the older devices, HW parser will not kick 447 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 448 * should be set regardless of HW offload. 449 */ 450 off = loc->mbuf->outer_l2_len; 451 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 452 off += sizeof(struct rte_vlan_hdr); 453 set = (off >> 1) << 8; /* Outer L3 offset. */ 454 off += loc->mbuf->outer_l3_len; 455 if (tunnel == PKT_TX_TUNNEL_UDP) 456 set |= off >> 1; /* Outer L4 offset. */ 457 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 458 const uint64_t csum = ol & PKT_TX_L4_MASK; 459 off += loc->mbuf->l2_len; 460 set |= (off >> 1) << 24; /* Inner L3 offset. */ 461 if (csum == PKT_TX_TCP_CKSUM || 462 csum == PKT_TX_UDP_CKSUM || 463 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 464 off += loc->mbuf->l3_len; 465 set |= (off >> 1) << 16; /* Inner L4 offset. */ 466 } 467 } 468 set = rte_cpu_to_le_32(set); 469 return set; 470 } 471 472 /** 473 * Convert the Checksum offloads to Verbs. 474 * 475 * @param buf 476 * Pointer to the mbuf. 477 * 478 * @return 479 * Converted checksum flags. 480 */ 481 static __rte_always_inline uint8_t 482 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 483 { 484 uint32_t idx; 485 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 486 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 487 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 488 489 /* 490 * The index should have: 491 * bit[0] = PKT_TX_TCP_SEG 492 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 493 * bit[4] = PKT_TX_IP_CKSUM 494 * bit[8] = PKT_TX_OUTER_IP_CKSUM 495 * bit[9] = tunnel 496 */ 497 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 498 return mlx5_cksum_table[idx]; 499 } 500 501 /** 502 * Internal function to compute the number of used descriptors in an RX queue 503 * 504 * @param rxq 505 * The Rx queue. 506 * 507 * @return 508 * The number of used rx descriptor. 509 */ 510 static uint32_t 511 rx_queue_count(struct mlx5_rxq_data *rxq) 512 { 513 struct rxq_zip *zip = &rxq->zip; 514 volatile struct mlx5_cqe *cqe; 515 const unsigned int cqe_n = (1 << rxq->cqe_n); 516 const unsigned int sges_n = (1 << rxq->sges_n); 517 const unsigned int elts_n = (1 << rxq->elts_n); 518 const unsigned int strd_n = (1 << rxq->strd_num_n); 519 const unsigned int cqe_cnt = cqe_n - 1; 520 unsigned int cq_ci, used; 521 522 /* if we are processing a compressed cqe */ 523 if (zip->ai) { 524 used = zip->cqe_cnt - zip->ai; 525 cq_ci = zip->cq_ci; 526 } else { 527 used = 0; 528 cq_ci = rxq->cq_ci; 529 } 530 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 531 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 532 int8_t op_own; 533 unsigned int n; 534 535 op_own = cqe->op_own; 536 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 537 n = rte_be_to_cpu_32(cqe->byte_cnt); 538 else 539 n = 1; 540 cq_ci += n; 541 used += n; 542 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 543 } 544 used = RTE_MIN(used * sges_n, elts_n * strd_n); 545 return used; 546 } 547 548 /** 549 * DPDK callback to check the status of a rx descriptor. 550 * 551 * @param rx_queue 552 * The Rx queue. 553 * @param[in] offset 554 * The index of the descriptor in the ring. 555 * 556 * @return 557 * The status of the tx descriptor. 558 */ 559 int 560 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 561 { 562 struct mlx5_rxq_data *rxq = rx_queue; 563 struct mlx5_rxq_ctrl *rxq_ctrl = 564 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 565 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 566 567 if (dev->rx_pkt_burst == NULL || 568 dev->rx_pkt_burst == removed_rx_burst) { 569 rte_errno = ENOTSUP; 570 return -rte_errno; 571 } 572 if (offset >= (1 << rxq->cqe_n)) { 573 rte_errno = EINVAL; 574 return -rte_errno; 575 } 576 if (offset < rx_queue_count(rxq)) 577 return RTE_ETH_RX_DESC_DONE; 578 return RTE_ETH_RX_DESC_AVAIL; 579 } 580 581 /** 582 * DPDK callback to get the RX queue information 583 * 584 * @param dev 585 * Pointer to the device structure. 586 * 587 * @param rx_queue_id 588 * Rx queue identificator. 589 * 590 * @param qinfo 591 * Pointer to the RX queue information structure. 592 * 593 * @return 594 * None. 595 */ 596 597 void 598 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 599 struct rte_eth_rxq_info *qinfo) 600 { 601 struct mlx5_priv *priv = dev->data->dev_private; 602 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; 603 struct mlx5_rxq_ctrl *rxq_ctrl = 604 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 605 606 if (!rxq) 607 return; 608 qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ? 609 rxq->mprq_mp : rxq->mp; 610 qinfo->conf.rx_thresh.pthresh = 0; 611 qinfo->conf.rx_thresh.hthresh = 0; 612 qinfo->conf.rx_thresh.wthresh = 0; 613 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 614 qinfo->conf.rx_drop_en = 1; 615 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; 616 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 617 qinfo->scattered_rx = dev->data->scattered_rx; 618 qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ? 619 (1 << rxq->elts_n) * (1 << rxq->strd_num_n) : 620 (1 << rxq->elts_n); 621 } 622 623 /** 624 * DPDK callback to get the RX packet burst mode information 625 * 626 * @param dev 627 * Pointer to the device structure. 628 * 629 * @param rx_queue_id 630 * Rx queue identificatior. 631 * 632 * @param mode 633 * Pointer to the burts mode information. 634 * 635 * @return 636 * 0 as success, -EINVAL as failure. 637 */ 638 639 int 640 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 641 uint16_t rx_queue_id __rte_unused, 642 struct rte_eth_burst_mode *mode) 643 { 644 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 645 struct mlx5_priv *priv = dev->data->dev_private; 646 struct mlx5_rxq_data *rxq; 647 648 rxq = (*priv->rxqs)[rx_queue_id]; 649 if (!rxq) { 650 rte_errno = EINVAL; 651 return -rte_errno; 652 } 653 if (pkt_burst == mlx5_rx_burst) { 654 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 655 } else if (pkt_burst == mlx5_rx_burst_mprq) { 656 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 657 } else if (pkt_burst == mlx5_rx_burst_vec) { 658 #if defined RTE_ARCH_X86_64 659 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 660 #elif defined RTE_ARCH_ARM64 661 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 662 #elif defined RTE_ARCH_PPC_64 663 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 664 #else 665 return -EINVAL; 666 #endif 667 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { 668 #if defined RTE_ARCH_X86_64 669 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); 670 #elif defined RTE_ARCH_ARM64 671 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); 672 #elif defined RTE_ARCH_PPC_64 673 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); 674 #else 675 return -EINVAL; 676 #endif 677 } else { 678 return -EINVAL; 679 } 680 return 0; 681 } 682 683 /** 684 * DPDK callback to get the number of used descriptors in a RX queue 685 * 686 * @param dev 687 * Pointer to the device structure. 688 * 689 * @param rx_queue_id 690 * The Rx queue. 691 * 692 * @return 693 * The number of used rx descriptor. 694 * -EINVAL if the queue is invalid 695 */ 696 uint32_t 697 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 698 { 699 struct mlx5_priv *priv = dev->data->dev_private; 700 struct mlx5_rxq_data *rxq; 701 702 if (dev->rx_pkt_burst == NULL || 703 dev->rx_pkt_burst == removed_rx_burst) { 704 rte_errno = ENOTSUP; 705 return -rte_errno; 706 } 707 rxq = (*priv->rxqs)[rx_queue_id]; 708 if (!rxq) { 709 rte_errno = EINVAL; 710 return -rte_errno; 711 } 712 return rx_queue_count(rxq); 713 } 714 715 #define MLX5_SYSTEM_LOG_DIR "/var/log" 716 /** 717 * Dump debug information to log file. 718 * 719 * @param fname 720 * The file name. 721 * @param hex_title 722 * If not NULL this string is printed as a header to the output 723 * and the output will be in hexadecimal view. 724 * @param buf 725 * This is the buffer address to print out. 726 * @param len 727 * The number of bytes to dump out. 728 */ 729 void 730 mlx5_dump_debug_information(const char *fname, const char *hex_title, 731 const void *buf, unsigned int hex_len) 732 { 733 FILE *fd; 734 735 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 736 fd = fopen(path, "a+"); 737 if (!fd) { 738 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 739 MKSTR(path2, "./%s", fname); 740 fd = fopen(path2, "a+"); 741 if (!fd) { 742 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 743 return; 744 } 745 DRV_LOG(INFO, "New debug dump in file %s", path2); 746 } else { 747 DRV_LOG(INFO, "New debug dump in file %s", path); 748 } 749 if (hex_title) 750 rte_hexdump(fd, hex_title, buf, hex_len); 751 else 752 fprintf(fd, "%s", (const char *)buf); 753 fprintf(fd, "\n\n\n"); 754 fclose(fd); 755 } 756 757 /** 758 * Move QP from error state to running state and initialize indexes. 759 * 760 * @param txq_ctrl 761 * Pointer to TX queue control structure. 762 * 763 * @return 764 * 0 on success, else -1. 765 */ 766 static int 767 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 768 { 769 struct mlx5_mp_arg_queue_state_modify sm = { 770 .is_wq = 0, 771 .queue_id = txq_ctrl->txq.idx, 772 }; 773 774 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 775 return -1; 776 txq_ctrl->txq.wqe_ci = 0; 777 txq_ctrl->txq.wqe_pi = 0; 778 txq_ctrl->txq.elts_comp = 0; 779 return 0; 780 } 781 782 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 783 static int 784 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 785 { 786 static const uint8_t magic[] = "seen"; 787 int ret = 1; 788 unsigned int i; 789 790 for (i = 0; i < sizeof(magic); ++i) 791 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 792 ret = 0; 793 err_cqe->rsvd1[i] = magic[i]; 794 } 795 return ret; 796 } 797 798 /** 799 * Handle error CQE. 800 * 801 * @param txq 802 * Pointer to TX queue structure. 803 * @param error_cqe 804 * Pointer to the error CQE. 805 * 806 * @return 807 * Negative value if queue recovery failed, otherwise 808 * the error completion entry is handled successfully. 809 */ 810 static int 811 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, 812 volatile struct mlx5_err_cqe *err_cqe) 813 { 814 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 815 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 816 struct mlx5_txq_ctrl *txq_ctrl = 817 container_of(txq, struct mlx5_txq_ctrl, txq); 818 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 819 int seen = check_err_cqe_seen(err_cqe); 820 821 if (!seen && txq_ctrl->dump_file_n < 822 txq_ctrl->priv->config.max_dump_files_num) { 823 MKSTR(err_str, "Unexpected CQE error syndrome " 824 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 825 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 826 txq->cqe_s, txq->qp_num_8s >> 8, 827 rte_be_to_cpu_16(err_cqe->wqe_counter), 828 txq->wqe_ci, txq->cq_ci); 829 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 830 PORT_ID(txq_ctrl->priv), txq->idx, 831 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 832 mlx5_dump_debug_information(name, NULL, err_str, 0); 833 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 834 (const void *)((uintptr_t) 835 txq->cqes), 836 sizeof(*err_cqe) * 837 (1 << txq->cqe_n)); 838 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 839 (const void *)((uintptr_t) 840 txq->wqes), 841 MLX5_WQE_SIZE * 842 (1 << txq->wqe_n)); 843 txq_ctrl->dump_file_n++; 844 } 845 if (!seen) 846 /* 847 * Count errors in WQEs units. 848 * Later it can be improved to count error packets, 849 * for example, by SQ parsing to find how much packets 850 * should be counted for each WQE. 851 */ 852 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 853 new_wqe_pi) & wqe_m; 854 if (tx_recover_qp(txq_ctrl)) { 855 /* Recovering failed - retry later on the same WQE. */ 856 return -1; 857 } 858 /* Release all the remaining buffers. */ 859 txq_free_elts(txq_ctrl); 860 } 861 return 0; 862 } 863 864 /** 865 * Translate RX completion flags to packet type. 866 * 867 * @param[in] rxq 868 * Pointer to RX queue structure. 869 * @param[in] cqe 870 * Pointer to CQE. 871 * 872 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 873 * 874 * @return 875 * Packet type for struct rte_mbuf. 876 */ 877 static inline uint32_t 878 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 879 volatile struct mlx5_mini_cqe8 *mcqe) 880 { 881 uint8_t idx; 882 uint8_t ptype; 883 uint8_t pinfo = (cqe->pkt_info & 0x3) << 6; 884 885 /* Get l3/l4 header from mini-CQE in case L3/L4 format*/ 886 if (mcqe == NULL || 887 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 888 ptype = (cqe->hdr_type_etc & 0xfc00) >> 10; 889 else 890 ptype = mcqe->hdr_type >> 2; 891 /* 892 * The index to the array should have: 893 * bit[1:0] = l3_hdr_type 894 * bit[4:2] = l4_hdr_type 895 * bit[5] = ip_frag 896 * bit[6] = tunneled 897 * bit[7] = outer_l3_type 898 */ 899 idx = pinfo | ptype; 900 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 901 } 902 903 /** 904 * Initialize Rx WQ and indexes. 905 * 906 * @param[in] rxq 907 * Pointer to RX queue structure. 908 */ 909 void 910 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 911 { 912 const unsigned int wqe_n = 1 << rxq->elts_n; 913 unsigned int i; 914 915 for (i = 0; (i != wqe_n); ++i) { 916 volatile struct mlx5_wqe_data_seg *scat; 917 uintptr_t addr; 918 uint32_t byte_count; 919 920 if (mlx5_rxq_mprq_enabled(rxq)) { 921 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 922 923 scat = &((volatile struct mlx5_wqe_mprq *) 924 rxq->wqes)[i].dseg; 925 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 926 1 << rxq->strd_num_n); 927 byte_count = (1 << rxq->strd_sz_n) * 928 (1 << rxq->strd_num_n); 929 } else { 930 struct rte_mbuf *buf = (*rxq->elts)[i]; 931 932 scat = &((volatile struct mlx5_wqe_data_seg *) 933 rxq->wqes)[i]; 934 addr = rte_pktmbuf_mtod(buf, uintptr_t); 935 byte_count = DATA_LEN(buf); 936 } 937 /* scat->addr must be able to store a pointer. */ 938 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 939 *scat = (struct mlx5_wqe_data_seg){ 940 .addr = rte_cpu_to_be_64(addr), 941 .byte_count = rte_cpu_to_be_32(byte_count), 942 .lkey = mlx5_rx_addr2mr(rxq, addr), 943 }; 944 } 945 rxq->consumed_strd = 0; 946 rxq->decompressed = 0; 947 rxq->rq_pi = 0; 948 rxq->zip = (struct rxq_zip){ 949 .ai = 0, 950 }; 951 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? 952 (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0; 953 /* Update doorbell counter. */ 954 rxq->rq_ci = wqe_n >> rxq->sges_n; 955 rte_io_wmb(); 956 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 957 } 958 959 /** 960 * Modify a Verbs/DevX queue state. 961 * This must be called from the primary process. 962 * 963 * @param dev 964 * Pointer to Ethernet device. 965 * @param sm 966 * State modify request parameters. 967 * 968 * @return 969 * 0 in case of success else non-zero value and rte_errno is set. 970 */ 971 int 972 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 973 const struct mlx5_mp_arg_queue_state_modify *sm) 974 { 975 int ret; 976 struct mlx5_priv *priv = dev->data->dev_private; 977 978 if (sm->is_wq) { 979 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 980 struct mlx5_rxq_ctrl *rxq_ctrl = 981 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 982 983 ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, sm->state); 984 if (ret) { 985 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 986 sm->state, strerror(errno)); 987 rte_errno = errno; 988 return ret; 989 } 990 } else { 991 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 992 struct mlx5_txq_ctrl *txq_ctrl = 993 container_of(txq, struct mlx5_txq_ctrl, txq); 994 995 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 996 MLX5_TXQ_MOD_ERR2RDY, 997 (uint8_t)priv->dev_port); 998 if (ret) 999 return ret; 1000 } 1001 return 0; 1002 } 1003 1004 /** 1005 * Modify a Verbs queue state. 1006 * 1007 * @param dev 1008 * Pointer to Ethernet device. 1009 * @param sm 1010 * State modify request parameters. 1011 * 1012 * @return 1013 * 0 in case of success else non-zero value. 1014 */ 1015 static int 1016 mlx5_queue_state_modify(struct rte_eth_dev *dev, 1017 struct mlx5_mp_arg_queue_state_modify *sm) 1018 { 1019 struct mlx5_priv *priv = dev->data->dev_private; 1020 int ret = 0; 1021 1022 switch (rte_eal_process_type()) { 1023 case RTE_PROC_PRIMARY: 1024 ret = mlx5_queue_state_modify_primary(dev, sm); 1025 break; 1026 case RTE_PROC_SECONDARY: 1027 ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); 1028 break; 1029 default: 1030 break; 1031 } 1032 return ret; 1033 } 1034 1035 /** 1036 * Handle a Rx error. 1037 * The function inserts the RQ state to reset when the first error CQE is 1038 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 1039 * it moves the RQ state to ready and initializes the RQ. 1040 * Next CQE identification and error counting are in the caller responsibility. 1041 * 1042 * @param[in] rxq 1043 * Pointer to RX queue structure. 1044 * @param[in] vec 1045 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 1046 * 0 when called from non-vectorized Rx burst. 1047 * 1048 * @return 1049 * -1 in case of recovery error, otherwise the CQE status. 1050 */ 1051 int 1052 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 1053 { 1054 const uint16_t cqe_n = 1 << rxq->cqe_n; 1055 const uint16_t cqe_mask = cqe_n - 1; 1056 const uint16_t wqe_n = 1 << rxq->elts_n; 1057 const uint16_t strd_n = 1 << rxq->strd_num_n; 1058 struct mlx5_rxq_ctrl *rxq_ctrl = 1059 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 1060 union { 1061 volatile struct mlx5_cqe *cqe; 1062 volatile struct mlx5_err_cqe *err_cqe; 1063 } u = { 1064 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 1065 }; 1066 struct mlx5_mp_arg_queue_state_modify sm; 1067 int ret; 1068 1069 switch (rxq->err_state) { 1070 case MLX5_RXQ_ERR_STATE_NO_ERROR: 1071 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 1072 /* Fall-through */ 1073 case MLX5_RXQ_ERR_STATE_NEED_RESET: 1074 sm.is_wq = 1; 1075 sm.queue_id = rxq->idx; 1076 sm.state = IBV_WQS_RESET; 1077 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 1078 return -1; 1079 if (rxq_ctrl->dump_file_n < 1080 rxq_ctrl->priv->config.max_dump_files_num) { 1081 MKSTR(err_str, "Unexpected CQE error syndrome " 1082 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 1083 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 1084 rxq->cqn, rxq_ctrl->wqn, 1085 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 1086 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 1087 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 1088 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 1089 mlx5_dump_debug_information(name, NULL, err_str, 0); 1090 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 1091 (const void *)((uintptr_t) 1092 rxq->cqes), 1093 sizeof(*u.cqe) * cqe_n); 1094 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 1095 (const void *)((uintptr_t) 1096 rxq->wqes), 1097 16 * wqe_n); 1098 rxq_ctrl->dump_file_n++; 1099 } 1100 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 1101 /* Fall-through */ 1102 case MLX5_RXQ_ERR_STATE_NEED_READY: 1103 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1104 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1105 rte_io_wmb(); 1106 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1107 rte_io_wmb(); 1108 /* 1109 * The RQ consumer index must be zeroed while moving 1110 * from RESET state to RDY state. 1111 */ 1112 *rxq->rq_db = rte_cpu_to_be_32(0); 1113 rte_io_wmb(); 1114 sm.is_wq = 1; 1115 sm.queue_id = rxq->idx; 1116 sm.state = IBV_WQS_RDY; 1117 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1118 &sm)) 1119 return -1; 1120 if (vec) { 1121 const uint32_t elts_n = 1122 mlx5_rxq_mprq_enabled(rxq) ? 1123 wqe_n * strd_n : wqe_n; 1124 const uint32_t e_mask = elts_n - 1; 1125 uint32_t elts_ci = 1126 mlx5_rxq_mprq_enabled(rxq) ? 1127 rxq->elts_ci : rxq->rq_ci; 1128 uint32_t elt_idx; 1129 struct rte_mbuf **elt; 1130 int i; 1131 unsigned int n = elts_n - (elts_ci - 1132 rxq->rq_pi); 1133 1134 for (i = 0; i < (int)n; ++i) { 1135 elt_idx = (elts_ci + i) & e_mask; 1136 elt = &(*rxq->elts)[elt_idx]; 1137 *elt = rte_mbuf_raw_alloc(rxq->mp); 1138 if (!*elt) { 1139 for (i--; i >= 0; --i) { 1140 elt_idx = (elts_ci + 1141 i) & elts_n; 1142 elt = &(*rxq->elts) 1143 [elt_idx]; 1144 rte_pktmbuf_free_seg 1145 (*elt); 1146 } 1147 return -1; 1148 } 1149 } 1150 for (i = 0; i < (int)elts_n; ++i) { 1151 elt = &(*rxq->elts)[i]; 1152 DATA_LEN(*elt) = 1153 (uint16_t)((*elt)->buf_len - 1154 rte_pktmbuf_headroom(*elt)); 1155 } 1156 /* Padding with a fake mbuf for vec Rx. */ 1157 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1158 (*rxq->elts)[elts_n + i] = 1159 &rxq->fake_mbuf; 1160 } 1161 mlx5_rxq_initialize(rxq); 1162 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1163 } 1164 return ret; 1165 default: 1166 return -1; 1167 } 1168 } 1169 1170 /** 1171 * Get size of the next packet for a given CQE. For compressed CQEs, the 1172 * consumer index is updated only once all packets of the current one have 1173 * been processed. 1174 * 1175 * @param rxq 1176 * Pointer to RX queue. 1177 * @param cqe 1178 * CQE to process. 1179 * @param[out] mcqe 1180 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1181 * written. 1182 * 1183 * @return 1184 * 0 in case of empty CQE, otherwise the packet size in bytes. 1185 */ 1186 static inline int 1187 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1188 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1189 { 1190 struct rxq_zip *zip = &rxq->zip; 1191 uint16_t cqe_n = cqe_cnt + 1; 1192 int len; 1193 uint16_t idx, end; 1194 1195 do { 1196 len = 0; 1197 /* Process compressed data in the CQE and mini arrays. */ 1198 if (zip->ai) { 1199 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1200 (volatile struct mlx5_mini_cqe8 (*)[8]) 1201 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1202 cqe_cnt].pkt_info); 1203 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & 1204 rxq->byte_mask); 1205 *mcqe = &(*mc)[zip->ai & 7]; 1206 if ((++zip->ai & 7) == 0) { 1207 /* Invalidate consumed CQEs */ 1208 idx = zip->ca; 1209 end = zip->na; 1210 while (idx != end) { 1211 (*rxq->cqes)[idx & cqe_cnt].op_own = 1212 MLX5_CQE_INVALIDATE; 1213 ++idx; 1214 } 1215 /* 1216 * Increment consumer index to skip the number 1217 * of CQEs consumed. Hardware leaves holes in 1218 * the CQ ring for software use. 1219 */ 1220 zip->ca = zip->na; 1221 zip->na += 8; 1222 } 1223 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1224 /* Invalidate the rest */ 1225 idx = zip->ca; 1226 end = zip->cq_ci; 1227 1228 while (idx != end) { 1229 (*rxq->cqes)[idx & cqe_cnt].op_own = 1230 MLX5_CQE_INVALIDATE; 1231 ++idx; 1232 } 1233 rxq->cq_ci = zip->cq_ci; 1234 zip->ai = 0; 1235 } 1236 /* 1237 * No compressed data, get next CQE and verify if it is 1238 * compressed. 1239 */ 1240 } else { 1241 int ret; 1242 int8_t op_own; 1243 uint32_t cq_ci; 1244 1245 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1246 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1247 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1248 rxq->err_state)) { 1249 ret = mlx5_rx_err_handle(rxq, 0); 1250 if (ret == MLX5_CQE_STATUS_HW_OWN || 1251 ret == -1) 1252 return 0; 1253 } else { 1254 return 0; 1255 } 1256 } 1257 /* 1258 * Introduce the local variable to have queue cq_ci 1259 * index in queue structure always consistent with 1260 * actual CQE boundary (not pointing to the middle 1261 * of compressed CQE session). 1262 */ 1263 cq_ci = rxq->cq_ci + 1; 1264 op_own = cqe->op_own; 1265 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1266 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1267 (volatile struct mlx5_mini_cqe8 (*)[8]) 1268 (uintptr_t)(&(*rxq->cqes) 1269 [cq_ci & cqe_cnt].pkt_info); 1270 1271 /* Fix endianness. */ 1272 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1273 /* 1274 * Current mini array position is the one 1275 * returned by check_cqe64(). 1276 * 1277 * If completion comprises several mini arrays, 1278 * as a special case the second one is located 1279 * 7 CQEs after the initial CQE instead of 8 1280 * for subsequent ones. 1281 */ 1282 zip->ca = cq_ci; 1283 zip->na = zip->ca + 7; 1284 /* Compute the next non compressed CQE. */ 1285 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1286 /* Get packet size to return. */ 1287 len = rte_be_to_cpu_32((*mc)[0].byte_cnt & 1288 rxq->byte_mask); 1289 *mcqe = &(*mc)[0]; 1290 zip->ai = 1; 1291 /* Prefetch all to be invalidated */ 1292 idx = zip->ca; 1293 end = zip->cq_ci; 1294 while (idx != end) { 1295 rte_prefetch0(&(*rxq->cqes)[(idx) & 1296 cqe_cnt]); 1297 ++idx; 1298 } 1299 } else { 1300 rxq->cq_ci = cq_ci; 1301 len = rte_be_to_cpu_32(cqe->byte_cnt); 1302 } 1303 } 1304 if (unlikely(rxq->err_state)) { 1305 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1306 ++rxq->stats.idropped; 1307 } else { 1308 return len; 1309 } 1310 } while (1); 1311 } 1312 1313 /** 1314 * Translate RX completion flags to offload flags. 1315 * 1316 * @param[in] cqe 1317 * Pointer to CQE. 1318 * 1319 * @return 1320 * Offload flags (ol_flags) for struct rte_mbuf. 1321 */ 1322 static inline uint32_t 1323 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1324 { 1325 uint32_t ol_flags = 0; 1326 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1327 1328 ol_flags = 1329 TRANSPOSE(flags, 1330 MLX5_CQE_RX_L3_HDR_VALID, 1331 PKT_RX_IP_CKSUM_GOOD) | 1332 TRANSPOSE(flags, 1333 MLX5_CQE_RX_L4_HDR_VALID, 1334 PKT_RX_L4_CKSUM_GOOD); 1335 return ol_flags; 1336 } 1337 1338 /** 1339 * Fill in mbuf fields from RX completion flags. 1340 * Note that pkt->ol_flags should be initialized outside of this function. 1341 * 1342 * @param rxq 1343 * Pointer to RX queue. 1344 * @param pkt 1345 * mbuf to fill. 1346 * @param cqe 1347 * CQE to process. 1348 * @param rss_hash_res 1349 * Packet RSS Hash result. 1350 */ 1351 static inline void 1352 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1353 volatile struct mlx5_cqe *cqe, 1354 volatile struct mlx5_mini_cqe8 *mcqe) 1355 { 1356 /* Update packet information. */ 1357 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe); 1358 1359 if (rxq->rss_hash) { 1360 uint32_t rss_hash_res = 0; 1361 1362 /* If compressed, take hash result from mini-CQE. */ 1363 if (mcqe == NULL || 1364 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH) 1365 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1366 else 1367 rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result); 1368 if (rss_hash_res) { 1369 pkt->hash.rss = rss_hash_res; 1370 pkt->ol_flags |= PKT_RX_RSS_HASH; 1371 } 1372 } 1373 if (rxq->mark) { 1374 uint32_t mark = 0; 1375 1376 /* If compressed, take flow tag from mini-CQE. */ 1377 if (mcqe == NULL || 1378 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1379 mark = cqe->sop_drop_qpn; 1380 else 1381 mark = ((mcqe->byte_cnt_flow & 0xff) << 8) | 1382 (mcqe->flow_tag_high << 16); 1383 if (MLX5_FLOW_MARK_IS_VALID(mark)) { 1384 pkt->ol_flags |= PKT_RX_FDIR; 1385 if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { 1386 pkt->ol_flags |= PKT_RX_FDIR_ID; 1387 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1388 } 1389 } 1390 } 1391 if (rxq->dynf_meta && cqe->flow_table_metadata) { 1392 pkt->ol_flags |= rxq->flow_meta_mask; 1393 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = 1394 cqe->flow_table_metadata; 1395 } 1396 if (rxq->csum) 1397 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1398 if (rxq->vlan_strip) { 1399 bool vlan_strip; 1400 1401 if (mcqe == NULL || 1402 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1403 vlan_strip = cqe->hdr_type_etc & 1404 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1405 else 1406 vlan_strip = mcqe->hdr_type & 1407 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 1408 if (vlan_strip) { 1409 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1410 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1411 } 1412 } 1413 if (rxq->hw_timestamp) { 1414 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 1415 1416 if (rxq->rt_timestamp) 1417 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 1418 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 1419 pkt->ol_flags |= rxq->timestamp_rx_flag; 1420 } 1421 } 1422 1423 /** 1424 * DPDK callback for RX. 1425 * 1426 * @param dpdk_rxq 1427 * Generic pointer to RX queue structure. 1428 * @param[out] pkts 1429 * Array to store received packets. 1430 * @param pkts_n 1431 * Maximum number of packets in array. 1432 * 1433 * @return 1434 * Number of packets successfully received (<= pkts_n). 1435 */ 1436 uint16_t 1437 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1438 { 1439 struct mlx5_rxq_data *rxq = dpdk_rxq; 1440 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1441 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1442 const unsigned int sges_n = rxq->sges_n; 1443 struct rte_mbuf *pkt = NULL; 1444 struct rte_mbuf *seg = NULL; 1445 volatile struct mlx5_cqe *cqe = 1446 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1447 unsigned int i = 0; 1448 unsigned int rq_ci = rxq->rq_ci << sges_n; 1449 int len = 0; /* keep its value across iterations. */ 1450 1451 while (pkts_n) { 1452 unsigned int idx = rq_ci & wqe_cnt; 1453 volatile struct mlx5_wqe_data_seg *wqe = 1454 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1455 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1456 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1457 1458 if (pkt) 1459 NEXT(seg) = rep; 1460 seg = rep; 1461 rte_prefetch0(seg); 1462 rte_prefetch0(cqe); 1463 rte_prefetch0(wqe); 1464 /* Allocate the buf from the same pool. */ 1465 rep = rte_mbuf_raw_alloc(seg->pool); 1466 if (unlikely(rep == NULL)) { 1467 ++rxq->stats.rx_nombuf; 1468 if (!pkt) { 1469 /* 1470 * no buffers before we even started, 1471 * bail out silently. 1472 */ 1473 break; 1474 } 1475 while (pkt != seg) { 1476 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 1477 rep = NEXT(pkt); 1478 NEXT(pkt) = NULL; 1479 NB_SEGS(pkt) = 1; 1480 rte_mbuf_raw_free(pkt); 1481 pkt = rep; 1482 } 1483 break; 1484 } 1485 if (!pkt) { 1486 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1487 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1488 if (!len) { 1489 rte_mbuf_raw_free(rep); 1490 break; 1491 } 1492 pkt = seg; 1493 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 1494 pkt->ol_flags &= EXT_ATTACHED_MBUF; 1495 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1496 if (rxq->crc_present) 1497 len -= RTE_ETHER_CRC_LEN; 1498 PKT_LEN(pkt) = len; 1499 if (cqe->lro_num_seg > 1) { 1500 mlx5_lro_update_hdr 1501 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1502 mcqe, rxq, len); 1503 pkt->ol_flags |= PKT_RX_LRO; 1504 pkt->tso_segsz = len / cqe->lro_num_seg; 1505 } 1506 } 1507 DATA_LEN(rep) = DATA_LEN(seg); 1508 PKT_LEN(rep) = PKT_LEN(seg); 1509 SET_DATA_OFF(rep, DATA_OFF(seg)); 1510 PORT(rep) = PORT(seg); 1511 (*rxq->elts)[idx] = rep; 1512 /* 1513 * Fill NIC descriptor with the new buffer. The lkey and size 1514 * of the buffers are already known, only the buffer address 1515 * changes. 1516 */ 1517 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1518 /* If there's only one MR, no need to replace LKey in WQE. */ 1519 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1520 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1521 if (len > DATA_LEN(seg)) { 1522 len -= DATA_LEN(seg); 1523 ++NB_SEGS(pkt); 1524 ++rq_ci; 1525 continue; 1526 } 1527 DATA_LEN(seg) = len; 1528 #ifdef MLX5_PMD_SOFT_COUNTERS 1529 /* Increment bytes counter. */ 1530 rxq->stats.ibytes += PKT_LEN(pkt); 1531 #endif 1532 /* Return packet. */ 1533 *(pkts++) = pkt; 1534 pkt = NULL; 1535 --pkts_n; 1536 ++i; 1537 /* Align consumer index to the next stride. */ 1538 rq_ci >>= sges_n; 1539 ++rq_ci; 1540 rq_ci <<= sges_n; 1541 } 1542 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1543 return 0; 1544 /* Update the consumer index. */ 1545 rxq->rq_ci = rq_ci >> sges_n; 1546 rte_io_wmb(); 1547 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1548 rte_io_wmb(); 1549 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1550 #ifdef MLX5_PMD_SOFT_COUNTERS 1551 /* Increment packets counter. */ 1552 rxq->stats.ipackets += i; 1553 #endif 1554 return i; 1555 } 1556 1557 /** 1558 * Update LRO packet TCP header. 1559 * The HW LRO feature doesn't update the TCP header after coalescing the 1560 * TCP segments but supplies information in CQE to fill it by SW. 1561 * 1562 * @param tcp 1563 * Pointer to the TCP header. 1564 * @param cqe 1565 * Pointer to the completion entry.. 1566 * @param phcsum 1567 * The L3 pseudo-header checksum. 1568 */ 1569 static inline void 1570 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1571 volatile struct mlx5_cqe *__rte_restrict cqe, 1572 uint32_t phcsum, uint8_t l4_type) 1573 { 1574 /* 1575 * The HW calculates only the TCP payload checksum, need to complete 1576 * the TCP header checksum and the L3 pseudo-header checksum. 1577 */ 1578 uint32_t csum = phcsum + cqe->csum; 1579 1580 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1581 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1582 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1583 tcp->recv_ack = cqe->lro_ack_seq_num; 1584 tcp->rx_win = cqe->lro_tcp_win; 1585 } 1586 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1587 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1588 tcp->cksum = 0; 1589 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1590 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1591 csum = (~csum) & 0xffff; 1592 if (csum == 0) 1593 csum = 0xffff; 1594 tcp->cksum = csum; 1595 } 1596 1597 /** 1598 * Update LRO packet headers. 1599 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1600 * TCP segments but supply information in CQE to fill it by SW. 1601 * 1602 * @param padd 1603 * The packet address. 1604 * @param cqe 1605 * Pointer to the completion entry.. 1606 * @param len 1607 * The packet length. 1608 */ 1609 static inline void 1610 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1611 volatile struct mlx5_cqe *__rte_restrict cqe, 1612 volatile struct mlx5_mini_cqe8 *mcqe, 1613 struct mlx5_rxq_data *rxq, uint32_t len) 1614 { 1615 union { 1616 struct rte_ether_hdr *eth; 1617 struct rte_vlan_hdr *vlan; 1618 struct rte_ipv4_hdr *ipv4; 1619 struct rte_ipv6_hdr *ipv6; 1620 struct rte_tcp_hdr *tcp; 1621 uint8_t *hdr; 1622 } h = { 1623 .hdr = padd, 1624 }; 1625 uint16_t proto = h.eth->ether_type; 1626 uint32_t phcsum; 1627 uint8_t l4_type; 1628 1629 h.eth++; 1630 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1631 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1632 proto = h.vlan->eth_proto; 1633 h.vlan++; 1634 } 1635 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1636 h.ipv4->time_to_live = cqe->lro_min_ttl; 1637 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1638 h.ipv4->hdr_checksum = 0; 1639 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1640 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1641 h.ipv4++; 1642 } else { 1643 h.ipv6->hop_limits = cqe->lro_min_ttl; 1644 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1645 sizeof(*h.ipv6)); 1646 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1647 h.ipv6++; 1648 } 1649 if (mcqe == NULL || 1650 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1651 l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1652 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1653 else 1654 l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) & 1655 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1656 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type); 1657 } 1658 1659 void 1660 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1661 { 1662 struct mlx5_mprq_buf *buf = opaque; 1663 1664 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 1665 rte_mempool_put(buf->mp, buf); 1666 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 1667 __ATOMIC_RELAXED) == 0)) { 1668 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1669 rte_mempool_put(buf->mp, buf); 1670 } 1671 } 1672 1673 void 1674 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1675 { 1676 mlx5_mprq_buf_free_cb(NULL, buf); 1677 } 1678 1679 /** 1680 * DPDK callback for RX with Multi-Packet RQ support. 1681 * 1682 * @param dpdk_rxq 1683 * Generic pointer to RX queue structure. 1684 * @param[out] pkts 1685 * Array to store received packets. 1686 * @param pkts_n 1687 * Maximum number of packets in array. 1688 * 1689 * @return 1690 * Number of packets successfully received (<= pkts_n). 1691 */ 1692 uint16_t 1693 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1694 { 1695 struct mlx5_rxq_data *rxq = dpdk_rxq; 1696 const uint32_t strd_n = 1 << rxq->strd_num_n; 1697 const uint32_t strd_sz = 1 << rxq->strd_sz_n; 1698 const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; 1699 const uint32_t wq_mask = (1 << rxq->elts_n) - 1; 1700 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1701 unsigned int i = 0; 1702 uint32_t rq_ci = rxq->rq_ci; 1703 uint16_t consumed_strd = rxq->consumed_strd; 1704 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1705 1706 while (i < pkts_n) { 1707 struct rte_mbuf *pkt; 1708 int ret; 1709 uint32_t len; 1710 uint16_t strd_cnt; 1711 uint16_t strd_idx; 1712 uint32_t byte_cnt; 1713 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1714 enum mlx5_rqx_code rxq_code; 1715 1716 if (consumed_strd == strd_n) { 1717 /* Replace WQE if the buffer is still in use. */ 1718 mprq_buf_replace(rxq, rq_ci & wq_mask); 1719 /* Advance to the next WQE. */ 1720 consumed_strd = 0; 1721 ++rq_ci; 1722 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1723 } 1724 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1725 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1726 if (!ret) 1727 break; 1728 byte_cnt = ret; 1729 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1730 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1731 if (rxq->crc_present) 1732 len -= RTE_ETHER_CRC_LEN; 1733 if (mcqe && 1734 rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1735 strd_cnt = (len / strd_sz) + !!(len % strd_sz); 1736 else 1737 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1738 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1739 MLX5_ASSERT(strd_cnt); 1740 consumed_strd += strd_cnt; 1741 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1742 continue; 1743 strd_idx = rte_be_to_cpu_16(mcqe == NULL ? 1744 cqe->wqe_counter : 1745 mcqe->stride_idx); 1746 MLX5_ASSERT(strd_idx < strd_n); 1747 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1748 wq_mask)); 1749 pkt = rte_pktmbuf_alloc(rxq->mp); 1750 if (unlikely(pkt == NULL)) { 1751 ++rxq->stats.rx_nombuf; 1752 break; 1753 } 1754 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1755 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1756 if (rxq->crc_present) 1757 len -= RTE_ETHER_CRC_LEN; 1758 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, 1759 strd_idx, strd_cnt); 1760 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 1761 rte_pktmbuf_free_seg(pkt); 1762 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 1763 ++rxq->stats.idropped; 1764 continue; 1765 } 1766 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 1767 ++rxq->stats.rx_nombuf; 1768 break; 1769 } 1770 } 1771 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1772 if (cqe->lro_num_seg > 1) { 1773 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), 1774 cqe, mcqe, rxq, len); 1775 pkt->ol_flags |= PKT_RX_LRO; 1776 pkt->tso_segsz = len / cqe->lro_num_seg; 1777 } 1778 PKT_LEN(pkt) = len; 1779 PORT(pkt) = rxq->port_id; 1780 #ifdef MLX5_PMD_SOFT_COUNTERS 1781 /* Increment bytes counter. */ 1782 rxq->stats.ibytes += PKT_LEN(pkt); 1783 #endif 1784 /* Return packet. */ 1785 *(pkts++) = pkt; 1786 ++i; 1787 } 1788 /* Update the consumer indexes. */ 1789 rxq->consumed_strd = consumed_strd; 1790 rte_io_wmb(); 1791 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1792 if (rq_ci != rxq->rq_ci) { 1793 rxq->rq_ci = rq_ci; 1794 rte_io_wmb(); 1795 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1796 } 1797 #ifdef MLX5_PMD_SOFT_COUNTERS 1798 /* Increment packets counter. */ 1799 rxq->stats.ipackets += i; 1800 #endif 1801 return i; 1802 } 1803 1804 /** 1805 * Dummy DPDK callback for TX. 1806 * 1807 * This function is used to temporarily replace the real callback during 1808 * unsafe control operations on the queue, or in case of error. 1809 * 1810 * @param dpdk_txq 1811 * Generic pointer to TX queue structure. 1812 * @param[in] pkts 1813 * Packets to transmit. 1814 * @param pkts_n 1815 * Number of packets in array. 1816 * 1817 * @return 1818 * Number of packets successfully transmitted (<= pkts_n). 1819 */ 1820 uint16_t 1821 removed_tx_burst(void *dpdk_txq __rte_unused, 1822 struct rte_mbuf **pkts __rte_unused, 1823 uint16_t pkts_n __rte_unused) 1824 { 1825 rte_mb(); 1826 return 0; 1827 } 1828 1829 /** 1830 * Dummy DPDK callback for RX. 1831 * 1832 * This function is used to temporarily replace the real callback during 1833 * unsafe control operations on the queue, or in case of error. 1834 * 1835 * @param dpdk_rxq 1836 * Generic pointer to RX queue structure. 1837 * @param[out] pkts 1838 * Array to store received packets. 1839 * @param pkts_n 1840 * Maximum number of packets in array. 1841 * 1842 * @return 1843 * Number of packets successfully received (<= pkts_n). 1844 */ 1845 uint16_t 1846 removed_rx_burst(void *dpdk_txq __rte_unused, 1847 struct rte_mbuf **pkts __rte_unused, 1848 uint16_t pkts_n __rte_unused) 1849 { 1850 rte_mb(); 1851 return 0; 1852 } 1853 1854 /* 1855 * Vectorized Rx/Tx routines are not compiled in when required vector 1856 * instructions are not supported on a target architecture. The following null 1857 * stubs are needed for linkage when those are not included outside of this file 1858 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1859 */ 1860 1861 __rte_weak uint16_t 1862 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1863 struct rte_mbuf **pkts __rte_unused, 1864 uint16_t pkts_n __rte_unused) 1865 { 1866 return 0; 1867 } 1868 1869 __rte_weak uint16_t 1870 mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused, 1871 struct rte_mbuf **pkts __rte_unused, 1872 uint16_t pkts_n __rte_unused) 1873 { 1874 return 0; 1875 } 1876 1877 __rte_weak int 1878 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1879 { 1880 return -ENOTSUP; 1881 } 1882 1883 __rte_weak int 1884 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1885 { 1886 return -ENOTSUP; 1887 } 1888 1889 /** 1890 * Free the mbufs from the linear array of pointers. 1891 * 1892 * @param pkts 1893 * Pointer to array of packets to be free. 1894 * @param pkts_n 1895 * Number of packets to be freed. 1896 * @param olx 1897 * Configured Tx offloads mask. It is fully defined at 1898 * compile time and may be used for optimization. 1899 */ 1900 static __rte_always_inline void 1901 mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, 1902 unsigned int pkts_n, 1903 unsigned int olx __rte_unused) 1904 { 1905 struct rte_mempool *pool = NULL; 1906 struct rte_mbuf **p_free = NULL; 1907 struct rte_mbuf *mbuf; 1908 unsigned int n_free = 0; 1909 1910 /* 1911 * The implemented algorithm eliminates 1912 * copying pointers to temporary array 1913 * for rte_mempool_put_bulk() calls. 1914 */ 1915 MLX5_ASSERT(pkts); 1916 MLX5_ASSERT(pkts_n); 1917 for (;;) { 1918 for (;;) { 1919 /* 1920 * Decrement mbuf reference counter, detach 1921 * indirect and external buffers if needed. 1922 */ 1923 mbuf = rte_pktmbuf_prefree_seg(*pkts); 1924 if (likely(mbuf != NULL)) { 1925 MLX5_ASSERT(mbuf == *pkts); 1926 if (likely(n_free != 0)) { 1927 if (unlikely(pool != mbuf->pool)) 1928 /* From different pool. */ 1929 break; 1930 } else { 1931 /* Start new scan array. */ 1932 pool = mbuf->pool; 1933 p_free = pkts; 1934 } 1935 ++n_free; 1936 ++pkts; 1937 --pkts_n; 1938 if (unlikely(pkts_n == 0)) { 1939 mbuf = NULL; 1940 break; 1941 } 1942 } else { 1943 /* 1944 * This happens if mbuf is still referenced. 1945 * We can't put it back to the pool, skip. 1946 */ 1947 ++pkts; 1948 --pkts_n; 1949 if (unlikely(n_free != 0)) 1950 /* There is some array to free.*/ 1951 break; 1952 if (unlikely(pkts_n == 0)) 1953 /* Last mbuf, nothing to free. */ 1954 return; 1955 } 1956 } 1957 for (;;) { 1958 /* 1959 * This loop is implemented to avoid multiple 1960 * inlining of rte_mempool_put_bulk(). 1961 */ 1962 MLX5_ASSERT(pool); 1963 MLX5_ASSERT(p_free); 1964 MLX5_ASSERT(n_free); 1965 /* 1966 * Free the array of pre-freed mbufs 1967 * belonging to the same memory pool. 1968 */ 1969 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 1970 if (unlikely(mbuf != NULL)) { 1971 /* There is the request to start new scan. */ 1972 pool = mbuf->pool; 1973 p_free = pkts++; 1974 n_free = 1; 1975 --pkts_n; 1976 if (likely(pkts_n != 0)) 1977 break; 1978 /* 1979 * This is the last mbuf to be freed. 1980 * Do one more loop iteration to complete. 1981 * This is rare case of the last unique mbuf. 1982 */ 1983 mbuf = NULL; 1984 continue; 1985 } 1986 if (likely(pkts_n == 0)) 1987 return; 1988 n_free = 0; 1989 break; 1990 } 1991 } 1992 } 1993 1994 /** 1995 * Free the mbuf from the elts ring buffer till new tail. 1996 * 1997 * @param txq 1998 * Pointer to Tx queue structure. 1999 * @param tail 2000 * Index in elts to free up to, becomes new elts tail. 2001 * @param olx 2002 * Configured Tx offloads mask. It is fully defined at 2003 * compile time and may be used for optimization. 2004 */ 2005 static __rte_always_inline void 2006 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 2007 uint16_t tail, 2008 unsigned int olx __rte_unused) 2009 { 2010 uint16_t n_elts = tail - txq->elts_tail; 2011 2012 MLX5_ASSERT(n_elts); 2013 MLX5_ASSERT(n_elts <= txq->elts_s); 2014 /* 2015 * Implement a loop to support ring buffer wraparound 2016 * with single inlining of mlx5_tx_free_mbuf(). 2017 */ 2018 do { 2019 unsigned int part; 2020 2021 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 2022 part = RTE_MIN(part, n_elts); 2023 MLX5_ASSERT(part); 2024 MLX5_ASSERT(part <= txq->elts_s); 2025 mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], 2026 part, olx); 2027 txq->elts_tail += part; 2028 n_elts -= part; 2029 } while (n_elts); 2030 } 2031 2032 /** 2033 * Store the mbuf being sent into elts ring buffer. 2034 * On Tx completion these mbufs will be freed. 2035 * 2036 * @param txq 2037 * Pointer to Tx queue structure. 2038 * @param pkts 2039 * Pointer to array of packets to be stored. 2040 * @param pkts_n 2041 * Number of packets to be stored. 2042 * @param olx 2043 * Configured Tx offloads mask. It is fully defined at 2044 * compile time and may be used for optimization. 2045 */ 2046 static __rte_always_inline void 2047 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 2048 struct rte_mbuf **__rte_restrict pkts, 2049 unsigned int pkts_n, 2050 unsigned int olx __rte_unused) 2051 { 2052 unsigned int part; 2053 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 2054 2055 MLX5_ASSERT(pkts); 2056 MLX5_ASSERT(pkts_n); 2057 part = txq->elts_s - (txq->elts_head & txq->elts_m); 2058 MLX5_ASSERT(part); 2059 MLX5_ASSERT(part <= txq->elts_s); 2060 /* This code is a good candidate for vectorizing with SIMD. */ 2061 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 2062 (void *)pkts, 2063 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 2064 txq->elts_head += pkts_n; 2065 if (unlikely(part < pkts_n)) 2066 /* The copy is wrapping around the elts array. */ 2067 rte_memcpy((void *)elts, (void *)(pkts + part), 2068 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2069 } 2070 2071 /** 2072 * Update completion queue consuming index via doorbell 2073 * and flush the completed data buffers. 2074 * 2075 * @param txq 2076 * Pointer to TX queue structure. 2077 * @param valid CQE pointer 2078 * if not NULL update txq->wqe_pi and flush the buffers 2079 * @param olx 2080 * Configured Tx offloads mask. It is fully defined at 2081 * compile time and may be used for optimization. 2082 */ 2083 static __rte_always_inline void 2084 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, 2085 volatile struct mlx5_cqe *last_cqe, 2086 unsigned int olx __rte_unused) 2087 { 2088 if (likely(last_cqe != NULL)) { 2089 uint16_t tail; 2090 2091 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 2092 tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; 2093 if (likely(tail != txq->elts_tail)) { 2094 mlx5_tx_free_elts(txq, tail, olx); 2095 MLX5_ASSERT(tail == txq->elts_tail); 2096 } 2097 } 2098 } 2099 2100 /** 2101 * Manage TX completions. This routine checks the CQ for 2102 * arrived CQEs, deduces the last accomplished WQE in SQ, 2103 * updates SQ producing index and frees all completed mbufs. 2104 * 2105 * @param txq 2106 * Pointer to TX queue structure. 2107 * @param olx 2108 * Configured Tx offloads mask. It is fully defined at 2109 * compile time and may be used for optimization. 2110 * 2111 * NOTE: not inlined intentionally, it makes tx_burst 2112 * routine smaller, simple and faster - from experiments. 2113 */ 2114 static void 2115 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 2116 unsigned int olx __rte_unused) 2117 { 2118 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2119 volatile struct mlx5_cqe *last_cqe = NULL; 2120 bool ring_doorbell = false; 2121 int ret; 2122 2123 do { 2124 volatile struct mlx5_cqe *cqe; 2125 2126 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2127 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2128 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2129 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2130 /* No new CQEs in completion queue. */ 2131 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 2132 break; 2133 } 2134 /* 2135 * Some error occurred, try to restart. 2136 * We have no barrier after WQE related Doorbell 2137 * written, make sure all writes are completed 2138 * here, before we might perform SQ reset. 2139 */ 2140 rte_wmb(); 2141 ret = mlx5_tx_error_cqe_handle 2142 (txq, (volatile struct mlx5_err_cqe *)cqe); 2143 if (unlikely(ret < 0)) { 2144 /* 2145 * Some error occurred on queue error 2146 * handling, we do not advance the index 2147 * here, allowing to retry on next call. 2148 */ 2149 return; 2150 } 2151 /* 2152 * We are going to fetch all entries with 2153 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. 2154 * The send queue is supposed to be empty. 2155 */ 2156 ring_doorbell = true; 2157 ++txq->cq_ci; 2158 txq->cq_pi = txq->cq_ci; 2159 last_cqe = NULL; 2160 continue; 2161 } 2162 /* Normal transmit completion. */ 2163 MLX5_ASSERT(txq->cq_ci != txq->cq_pi); 2164 #ifdef RTE_LIBRTE_MLX5_DEBUG 2165 MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == 2166 cqe->wqe_counter); 2167 #endif 2168 ring_doorbell = true; 2169 ++txq->cq_ci; 2170 last_cqe = cqe; 2171 /* 2172 * We have to restrict the amount of processed CQEs 2173 * in one tx_burst routine call. The CQ may be large 2174 * and many CQEs may be updated by the NIC in one 2175 * transaction. Buffers freeing is time consuming, 2176 * multiple iterations may introduce significant 2177 * latency. 2178 */ 2179 if (likely(--count == 0)) 2180 break; 2181 } while (true); 2182 if (likely(ring_doorbell)) { 2183 /* Ring doorbell to notify hardware. */ 2184 rte_compiler_barrier(); 2185 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2186 mlx5_tx_comp_flush(txq, last_cqe, olx); 2187 } 2188 } 2189 2190 /** 2191 * Check if the completion request flag should be set in the last WQE. 2192 * Both pushed mbufs and WQEs are monitored and the completion request 2193 * flag is set if any of thresholds is reached. 2194 * 2195 * @param txq 2196 * Pointer to TX queue structure. 2197 * @param loc 2198 * Pointer to burst routine local context. 2199 * @param olx 2200 * Configured Tx offloads mask. It is fully defined at 2201 * compile time and may be used for optimization. 2202 */ 2203 static __rte_always_inline void 2204 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 2205 struct mlx5_txq_local *__rte_restrict loc, 2206 unsigned int olx) 2207 { 2208 uint16_t head = txq->elts_head; 2209 unsigned int part; 2210 2211 part = MLX5_TXOFF_CONFIG(INLINE) ? 2212 0 : loc->pkts_sent - loc->pkts_copy; 2213 head += part; 2214 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2215 (MLX5_TXOFF_CONFIG(INLINE) && 2216 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2217 volatile struct mlx5_wqe *last = loc->wqe_last; 2218 2219 MLX5_ASSERT(last); 2220 txq->elts_comp = head; 2221 if (MLX5_TXOFF_CONFIG(INLINE)) 2222 txq->wqe_comp = txq->wqe_ci; 2223 /* Request unconditional completion on last WQE. */ 2224 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2225 MLX5_COMP_MODE_OFFSET); 2226 /* Save elts_head in dedicated free on completion queue. */ 2227 #ifdef RTE_LIBRTE_MLX5_DEBUG 2228 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 2229 (last->cseg.opcode >> 8) << 16; 2230 #else 2231 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 2232 #endif 2233 /* A CQE slot must always be available. */ 2234 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 2235 } 2236 } 2237 2238 /** 2239 * DPDK callback to check the status of a tx descriptor. 2240 * 2241 * @param tx_queue 2242 * The tx queue. 2243 * @param[in] offset 2244 * The index of the descriptor in the ring. 2245 * 2246 * @return 2247 * The status of the tx descriptor. 2248 */ 2249 int 2250 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2251 { 2252 struct mlx5_txq_data *__rte_restrict txq = tx_queue; 2253 uint16_t used; 2254 2255 mlx5_tx_handle_completion(txq, 0); 2256 used = txq->elts_head - txq->elts_tail; 2257 if (offset < used) 2258 return RTE_ETH_TX_DESC_FULL; 2259 return RTE_ETH_TX_DESC_DONE; 2260 } 2261 2262 /** 2263 * Build the Control Segment with specified opcode: 2264 * - MLX5_OPCODE_SEND 2265 * - MLX5_OPCODE_ENHANCED_MPSW 2266 * - MLX5_OPCODE_TSO 2267 * 2268 * @param txq 2269 * Pointer to TX queue structure. 2270 * @param loc 2271 * Pointer to burst routine local context. 2272 * @param wqe 2273 * Pointer to WQE to fill with built Control Segment. 2274 * @param ds 2275 * Supposed length of WQE in segments. 2276 * @param opcode 2277 * SQ WQE opcode to put into Control Segment. 2278 * @param olx 2279 * Configured Tx offloads mask. It is fully defined at 2280 * compile time and may be used for optimization. 2281 */ 2282 static __rte_always_inline void 2283 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 2284 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2285 struct mlx5_wqe *__rte_restrict wqe, 2286 unsigned int ds, 2287 unsigned int opcode, 2288 unsigned int olx __rte_unused) 2289 { 2290 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 2291 2292 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2293 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2294 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2295 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2296 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2297 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2298 MLX5_COMP_MODE_OFFSET); 2299 cs->misc = RTE_BE32(0); 2300 } 2301 2302 /** 2303 * Build the Synchronize Queue Segment with specified completion index. 2304 * 2305 * @param txq 2306 * Pointer to TX queue structure. 2307 * @param loc 2308 * Pointer to burst routine local context. 2309 * @param wqe 2310 * Pointer to WQE to fill with built Control Segment. 2311 * @param wci 2312 * Completion index in Clock Queue to wait. 2313 * @param olx 2314 * Configured Tx offloads mask. It is fully defined at 2315 * compile time and may be used for optimization. 2316 */ 2317 static __rte_always_inline void 2318 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 2319 struct mlx5_txq_local *restrict loc __rte_unused, 2320 struct mlx5_wqe *restrict wqe, 2321 unsigned int wci, 2322 unsigned int olx __rte_unused) 2323 { 2324 struct mlx5_wqe_qseg *qs; 2325 2326 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 2327 qs->max_index = rte_cpu_to_be_32(wci); 2328 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 2329 qs->reserved0 = RTE_BE32(0); 2330 qs->reserved1 = RTE_BE32(0); 2331 } 2332 2333 /** 2334 * Build the Ethernet Segment without inlined data. 2335 * Supports Software Parser, Checksums and VLAN 2336 * insertion Tx offload features. 2337 * 2338 * @param txq 2339 * Pointer to TX queue structure. 2340 * @param loc 2341 * Pointer to burst routine local context. 2342 * @param wqe 2343 * Pointer to WQE to fill with built Ethernet Segment. 2344 * @param olx 2345 * Configured Tx offloads mask. It is fully defined at 2346 * compile time and may be used for optimization. 2347 */ 2348 static __rte_always_inline void 2349 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2350 struct mlx5_txq_local *__rte_restrict loc, 2351 struct mlx5_wqe *__rte_restrict wqe, 2352 unsigned int olx) 2353 { 2354 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2355 uint32_t csum; 2356 2357 /* 2358 * Calculate and set check sum flags first, dword field 2359 * in segment may be shared with Software Parser flags. 2360 */ 2361 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2362 es->flags = rte_cpu_to_le_32(csum); 2363 /* 2364 * Calculate and set Software Parser offsets and flags. 2365 * These flags a set for custom UDP and IP tunnel packets. 2366 */ 2367 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2368 /* Fill metadata field if needed. */ 2369 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2370 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2371 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2372 /* Engage VLAN tag insertion feature if requested. */ 2373 if (MLX5_TXOFF_CONFIG(VLAN) && 2374 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2375 /* 2376 * We should get here only if device support 2377 * this feature correctly. 2378 */ 2379 MLX5_ASSERT(txq->vlan_en); 2380 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2381 loc->mbuf->vlan_tci); 2382 } else { 2383 es->inline_hdr = RTE_BE32(0); 2384 } 2385 } 2386 2387 /** 2388 * Build the Ethernet Segment with minimal inlined data 2389 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2390 * used to fill the gap in single WQEBB WQEs. 2391 * Supports Software Parser, Checksums and VLAN 2392 * insertion Tx offload features. 2393 * 2394 * @param txq 2395 * Pointer to TX queue structure. 2396 * @param loc 2397 * Pointer to burst routine local context. 2398 * @param wqe 2399 * Pointer to WQE to fill with built Ethernet Segment. 2400 * @param vlan 2401 * Length of VLAN tag insertion if any. 2402 * @param olx 2403 * Configured Tx offloads mask. It is fully defined at 2404 * compile time and may be used for optimization. 2405 */ 2406 static __rte_always_inline void 2407 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2408 struct mlx5_txq_local *__rte_restrict loc, 2409 struct mlx5_wqe *__rte_restrict wqe, 2410 unsigned int vlan, 2411 unsigned int olx) 2412 { 2413 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2414 uint32_t csum; 2415 uint8_t *psrc, *pdst; 2416 2417 /* 2418 * Calculate and set check sum flags first, dword field 2419 * in segment may be shared with Software Parser flags. 2420 */ 2421 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2422 es->flags = rte_cpu_to_le_32(csum); 2423 /* 2424 * Calculate and set Software Parser offsets and flags. 2425 * These flags a set for custom UDP and IP tunnel packets. 2426 */ 2427 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2428 /* Fill metadata field if needed. */ 2429 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2430 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2431 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2432 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2433 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2434 es->inline_data = *(unaligned_uint16_t *)psrc; 2435 psrc += sizeof(uint16_t); 2436 pdst = (uint8_t *)(es + 1); 2437 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2438 /* Implement VLAN tag insertion as part inline data. */ 2439 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2440 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2441 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2442 /* Insert VLAN ethertype + VLAN tag. */ 2443 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2444 ((RTE_ETHER_TYPE_VLAN << 16) | 2445 loc->mbuf->vlan_tci); 2446 pdst += sizeof(struct rte_vlan_hdr); 2447 /* Copy the rest two bytes from packet data. */ 2448 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2449 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2450 } else { 2451 /* Fill the gap in the title WQEBB with inline data. */ 2452 rte_mov16(pdst, psrc); 2453 } 2454 } 2455 2456 /** 2457 * Build the Ethernet Segment with entire packet 2458 * data inlining. Checks the boundary of WQEBB and 2459 * ring buffer wrapping, supports Software Parser, 2460 * Checksums and VLAN insertion Tx offload features. 2461 * 2462 * @param txq 2463 * Pointer to TX queue structure. 2464 * @param loc 2465 * Pointer to burst routine local context. 2466 * @param wqe 2467 * Pointer to WQE to fill with built Ethernet Segment. 2468 * @param vlan 2469 * Length of VLAN tag insertion if any. 2470 * @param inlen 2471 * Length of data to inline (VLAN included, if any). 2472 * @param tso 2473 * TSO flag, set mss field from the packet. 2474 * @param olx 2475 * Configured Tx offloads mask. It is fully defined at 2476 * compile time and may be used for optimization. 2477 * 2478 * @return 2479 * Pointer to the next Data Segment (aligned and wrapped around). 2480 */ 2481 static __rte_always_inline struct mlx5_wqe_dseg * 2482 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 2483 struct mlx5_txq_local *__rte_restrict loc, 2484 struct mlx5_wqe *__rte_restrict wqe, 2485 unsigned int vlan, 2486 unsigned int inlen, 2487 unsigned int tso, 2488 unsigned int olx) 2489 { 2490 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2491 uint32_t csum; 2492 uint8_t *psrc, *pdst; 2493 unsigned int part; 2494 2495 /* 2496 * Calculate and set check sum flags first, dword field 2497 * in segment may be shared with Software Parser flags. 2498 */ 2499 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2500 if (tso) { 2501 csum <<= 24; 2502 csum |= loc->mbuf->tso_segsz; 2503 es->flags = rte_cpu_to_be_32(csum); 2504 } else { 2505 es->flags = rte_cpu_to_le_32(csum); 2506 } 2507 /* 2508 * Calculate and set Software Parser offsets and flags. 2509 * These flags a set for custom UDP and IP tunnel packets. 2510 */ 2511 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2512 /* Fill metadata field if needed. */ 2513 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2514 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2515 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2516 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2517 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2518 es->inline_data = *(unaligned_uint16_t *)psrc; 2519 psrc += sizeof(uint16_t); 2520 pdst = (uint8_t *)(es + 1); 2521 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2522 /* Implement VLAN tag insertion as part inline data. */ 2523 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2524 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2525 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2526 /* Insert VLAN ethertype + VLAN tag. */ 2527 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2528 ((RTE_ETHER_TYPE_VLAN << 16) | 2529 loc->mbuf->vlan_tci); 2530 pdst += sizeof(struct rte_vlan_hdr); 2531 /* Copy the rest two bytes from packet data. */ 2532 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2533 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2534 psrc += sizeof(uint16_t); 2535 } else { 2536 /* Fill the gap in the title WQEBB with inline data. */ 2537 rte_mov16(pdst, psrc); 2538 psrc += sizeof(rte_v128u32_t); 2539 } 2540 pdst = (uint8_t *)(es + 2); 2541 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2542 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2543 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2544 if (!inlen) { 2545 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2546 return (struct mlx5_wqe_dseg *)pdst; 2547 } 2548 /* 2549 * The WQEBB space availability is checked by caller. 2550 * Here we should be aware of WQE ring buffer wraparound only. 2551 */ 2552 part = (uint8_t *)txq->wqes_end - pdst; 2553 part = RTE_MIN(part, inlen); 2554 do { 2555 rte_memcpy(pdst, psrc, part); 2556 inlen -= part; 2557 if (likely(!inlen)) { 2558 /* 2559 * If return value is not used by the caller 2560 * the code below will be optimized out. 2561 */ 2562 pdst += part; 2563 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2564 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2565 pdst = (uint8_t *)txq->wqes; 2566 return (struct mlx5_wqe_dseg *)pdst; 2567 } 2568 pdst = (uint8_t *)txq->wqes; 2569 psrc += part; 2570 part = inlen; 2571 } while (true); 2572 } 2573 2574 /** 2575 * Copy data from chain of mbuf to the specified linear buffer. 2576 * Checksums and VLAN insertion Tx offload features. If data 2577 * from some mbuf copied completely this mbuf is freed. Local 2578 * structure is used to keep the byte stream state. 2579 * 2580 * @param pdst 2581 * Pointer to the destination linear buffer. 2582 * @param loc 2583 * Pointer to burst routine local context. 2584 * @param len 2585 * Length of data to be copied. 2586 * @param must 2587 * Length of data to be copied ignoring no inline hint. 2588 * @param olx 2589 * Configured Tx offloads mask. It is fully defined at 2590 * compile time and may be used for optimization. 2591 * 2592 * @return 2593 * Number of actual copied data bytes. This is always greater than or 2594 * equal to must parameter and might be lesser than len in no inline 2595 * hint flag is encountered. 2596 */ 2597 static __rte_always_inline unsigned int 2598 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2599 struct mlx5_txq_local *__rte_restrict loc, 2600 unsigned int len, 2601 unsigned int must, 2602 unsigned int olx __rte_unused) 2603 { 2604 struct rte_mbuf *mbuf; 2605 unsigned int part, dlen, copy = 0; 2606 uint8_t *psrc; 2607 2608 MLX5_ASSERT(len); 2609 MLX5_ASSERT(must <= len); 2610 do { 2611 /* Allow zero length packets, must check first. */ 2612 dlen = rte_pktmbuf_data_len(loc->mbuf); 2613 if (dlen <= loc->mbuf_off) { 2614 /* Exhausted packet, just free. */ 2615 mbuf = loc->mbuf; 2616 loc->mbuf = mbuf->next; 2617 rte_pktmbuf_free_seg(mbuf); 2618 loc->mbuf_off = 0; 2619 MLX5_ASSERT(loc->mbuf_nseg > 1); 2620 MLX5_ASSERT(loc->mbuf); 2621 --loc->mbuf_nseg; 2622 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2623 unsigned int diff; 2624 2625 if (copy >= must) { 2626 /* 2627 * We already copied the minimal 2628 * requested amount of data. 2629 */ 2630 return copy; 2631 } 2632 diff = must - copy; 2633 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 2634 /* 2635 * Copy only the minimal required 2636 * part of the data buffer. 2637 */ 2638 len = diff; 2639 } 2640 } 2641 continue; 2642 } 2643 dlen -= loc->mbuf_off; 2644 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2645 loc->mbuf_off); 2646 part = RTE_MIN(len, dlen); 2647 rte_memcpy(pdst, psrc, part); 2648 copy += part; 2649 loc->mbuf_off += part; 2650 len -= part; 2651 if (!len) { 2652 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2653 loc->mbuf_off = 0; 2654 /* Exhausted packet, just free. */ 2655 mbuf = loc->mbuf; 2656 loc->mbuf = mbuf->next; 2657 rte_pktmbuf_free_seg(mbuf); 2658 loc->mbuf_off = 0; 2659 MLX5_ASSERT(loc->mbuf_nseg >= 1); 2660 --loc->mbuf_nseg; 2661 } 2662 return copy; 2663 } 2664 pdst += part; 2665 } while (true); 2666 } 2667 2668 /** 2669 * Build the Ethernet Segment with inlined data from 2670 * multi-segment packet. Checks the boundary of WQEBB 2671 * and ring buffer wrapping, supports Software Parser, 2672 * Checksums and VLAN insertion Tx offload features. 2673 * 2674 * @param txq 2675 * Pointer to TX queue structure. 2676 * @param loc 2677 * Pointer to burst routine local context. 2678 * @param wqe 2679 * Pointer to WQE to fill with built Ethernet Segment. 2680 * @param vlan 2681 * Length of VLAN tag insertion if any. 2682 * @param inlen 2683 * Length of data to inline (VLAN included, if any). 2684 * @param tso 2685 * TSO flag, set mss field from the packet. 2686 * @param olx 2687 * Configured Tx offloads mask. It is fully defined at 2688 * compile time and may be used for optimization. 2689 * 2690 * @return 2691 * Pointer to the next Data Segment (aligned and 2692 * possible NOT wrapped around - caller should do 2693 * wrapping check on its own). 2694 */ 2695 static __rte_always_inline struct mlx5_wqe_dseg * 2696 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 2697 struct mlx5_txq_local *__rte_restrict loc, 2698 struct mlx5_wqe *__rte_restrict wqe, 2699 unsigned int vlan, 2700 unsigned int inlen, 2701 unsigned int tso, 2702 unsigned int olx) 2703 { 2704 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2705 uint32_t csum; 2706 uint8_t *pdst; 2707 unsigned int part, tlen = 0; 2708 2709 /* 2710 * Calculate and set check sum flags first, uint32_t field 2711 * in segment may be shared with Software Parser flags. 2712 */ 2713 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2714 if (tso) { 2715 csum <<= 24; 2716 csum |= loc->mbuf->tso_segsz; 2717 es->flags = rte_cpu_to_be_32(csum); 2718 } else { 2719 es->flags = rte_cpu_to_le_32(csum); 2720 } 2721 /* 2722 * Calculate and set Software Parser offsets and flags. 2723 * These flags a set for custom UDP and IP tunnel packets. 2724 */ 2725 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2726 /* Fill metadata field if needed. */ 2727 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2728 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2729 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2730 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2731 pdst = (uint8_t *)&es->inline_data; 2732 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2733 /* Implement VLAN tag insertion as part inline data. */ 2734 mlx5_tx_mseg_memcpy(pdst, loc, 2735 2 * RTE_ETHER_ADDR_LEN, 2736 2 * RTE_ETHER_ADDR_LEN, olx); 2737 pdst += 2 * RTE_ETHER_ADDR_LEN; 2738 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2739 ((RTE_ETHER_TYPE_VLAN << 16) | 2740 loc->mbuf->vlan_tci); 2741 pdst += sizeof(struct rte_vlan_hdr); 2742 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2743 } 2744 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2745 /* 2746 * The WQEBB space availability is checked by caller. 2747 * Here we should be aware of WQE ring buffer wraparound only. 2748 */ 2749 part = (uint8_t *)txq->wqes_end - pdst; 2750 part = RTE_MIN(part, inlen - tlen); 2751 MLX5_ASSERT(part); 2752 do { 2753 unsigned int copy; 2754 2755 /* 2756 * Copying may be interrupted inside the routine 2757 * if run into no inline hint flag. 2758 */ 2759 copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); 2760 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 2761 tlen += copy; 2762 if (likely(inlen <= tlen) || copy < part) { 2763 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 2764 pdst += copy; 2765 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2766 return (struct mlx5_wqe_dseg *)pdst; 2767 } 2768 pdst = (uint8_t *)txq->wqes; 2769 part = inlen - tlen; 2770 } while (true); 2771 } 2772 2773 /** 2774 * Build the Data Segment of pointer type. 2775 * 2776 * @param txq 2777 * Pointer to TX queue structure. 2778 * @param loc 2779 * Pointer to burst routine local context. 2780 * @param dseg 2781 * Pointer to WQE to fill with built Data Segment. 2782 * @param buf 2783 * Data buffer to point. 2784 * @param len 2785 * Data buffer length. 2786 * @param olx 2787 * Configured Tx offloads mask. It is fully defined at 2788 * compile time and may be used for optimization. 2789 */ 2790 static __rte_always_inline void 2791 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 2792 struct mlx5_txq_local *__rte_restrict loc, 2793 struct mlx5_wqe_dseg *__rte_restrict dseg, 2794 uint8_t *buf, 2795 unsigned int len, 2796 unsigned int olx __rte_unused) 2797 2798 { 2799 MLX5_ASSERT(len); 2800 dseg->bcount = rte_cpu_to_be_32(len); 2801 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2802 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2803 } 2804 2805 /** 2806 * Build the Data Segment of pointer type or inline 2807 * if data length is less than buffer in minimal 2808 * Data Segment size. 2809 * 2810 * @param txq 2811 * Pointer to TX queue structure. 2812 * @param loc 2813 * Pointer to burst routine local context. 2814 * @param dseg 2815 * Pointer to WQE to fill with built Data Segment. 2816 * @param buf 2817 * Data buffer to point. 2818 * @param len 2819 * Data buffer length. 2820 * @param olx 2821 * Configured Tx offloads mask. It is fully defined at 2822 * compile time and may be used for optimization. 2823 */ 2824 static __rte_always_inline void 2825 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 2826 struct mlx5_txq_local *__rte_restrict loc, 2827 struct mlx5_wqe_dseg *__rte_restrict dseg, 2828 uint8_t *buf, 2829 unsigned int len, 2830 unsigned int olx __rte_unused) 2831 2832 { 2833 uintptr_t dst, src; 2834 2835 MLX5_ASSERT(len); 2836 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2837 dseg->bcount = rte_cpu_to_be_32(len); 2838 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2839 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2840 2841 return; 2842 } 2843 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2844 /* Unrolled implementation of generic rte_memcpy. */ 2845 dst = (uintptr_t)&dseg->inline_data[0]; 2846 src = (uintptr_t)buf; 2847 if (len & 0x08) { 2848 #ifdef RTE_ARCH_STRICT_ALIGN 2849 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2850 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2851 dst += sizeof(uint32_t); 2852 src += sizeof(uint32_t); 2853 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2854 dst += sizeof(uint32_t); 2855 src += sizeof(uint32_t); 2856 #else 2857 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2858 dst += sizeof(uint64_t); 2859 src += sizeof(uint64_t); 2860 #endif 2861 } 2862 if (len & 0x04) { 2863 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2864 dst += sizeof(uint32_t); 2865 src += sizeof(uint32_t); 2866 } 2867 if (len & 0x02) { 2868 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2869 dst += sizeof(uint16_t); 2870 src += sizeof(uint16_t); 2871 } 2872 if (len & 0x01) 2873 *(uint8_t *)dst = *(uint8_t *)src; 2874 } 2875 2876 /** 2877 * Build the Data Segment of inlined data from single 2878 * segment packet, no VLAN insertion. 2879 * 2880 * @param txq 2881 * Pointer to TX queue structure. 2882 * @param loc 2883 * Pointer to burst routine local context. 2884 * @param dseg 2885 * Pointer to WQE to fill with built Data Segment. 2886 * @param buf 2887 * Data buffer to point. 2888 * @param len 2889 * Data buffer length. 2890 * @param olx 2891 * Configured Tx offloads mask. It is fully defined at 2892 * compile time and may be used for optimization. 2893 * 2894 * @return 2895 * Pointer to the next Data Segment after inlined data. 2896 * Ring buffer wraparound check is needed. We do not 2897 * do it here because it may not be needed for the 2898 * last packet in the eMPW session. 2899 */ 2900 static __rte_always_inline struct mlx5_wqe_dseg * 2901 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 2902 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2903 struct mlx5_wqe_dseg *__rte_restrict dseg, 2904 uint8_t *buf, 2905 unsigned int len, 2906 unsigned int olx __rte_unused) 2907 { 2908 unsigned int part; 2909 uint8_t *pdst; 2910 2911 if (!MLX5_TXOFF_CONFIG(MPW)) { 2912 /* Store the descriptor byte counter for eMPW sessions. */ 2913 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2914 pdst = &dseg->inline_data[0]; 2915 } else { 2916 /* The entire legacy MPW session counter is stored on close. */ 2917 pdst = (uint8_t *)dseg; 2918 } 2919 /* 2920 * The WQEBB space availability is checked by caller. 2921 * Here we should be aware of WQE ring buffer wraparound only. 2922 */ 2923 part = (uint8_t *)txq->wqes_end - pdst; 2924 part = RTE_MIN(part, len); 2925 do { 2926 rte_memcpy(pdst, buf, part); 2927 len -= part; 2928 if (likely(!len)) { 2929 pdst += part; 2930 if (!MLX5_TXOFF_CONFIG(MPW)) 2931 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2932 /* Note: no final wraparound check here. */ 2933 return (struct mlx5_wqe_dseg *)pdst; 2934 } 2935 pdst = (uint8_t *)txq->wqes; 2936 buf += part; 2937 part = len; 2938 } while (true); 2939 } 2940 2941 /** 2942 * Build the Data Segment of inlined data from single 2943 * segment packet with VLAN insertion. 2944 * 2945 * @param txq 2946 * Pointer to TX queue structure. 2947 * @param loc 2948 * Pointer to burst routine local context. 2949 * @param dseg 2950 * Pointer to the dseg fill with built Data Segment. 2951 * @param buf 2952 * Data buffer to point. 2953 * @param len 2954 * Data buffer length. 2955 * @param olx 2956 * Configured Tx offloads mask. It is fully defined at 2957 * compile time and may be used for optimization. 2958 * 2959 * @return 2960 * Pointer to the next Data Segment after inlined data. 2961 * Ring buffer wraparound check is needed. 2962 */ 2963 static __rte_always_inline struct mlx5_wqe_dseg * 2964 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 2965 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2966 struct mlx5_wqe_dseg *__rte_restrict dseg, 2967 uint8_t *buf, 2968 unsigned int len, 2969 unsigned int olx __rte_unused) 2970 2971 { 2972 unsigned int part; 2973 uint8_t *pdst; 2974 2975 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 2976 if (!MLX5_TXOFF_CONFIG(MPW)) { 2977 /* Store the descriptor byte counter for eMPW sessions. */ 2978 dseg->bcount = rte_cpu_to_be_32 2979 ((len + sizeof(struct rte_vlan_hdr)) | 2980 MLX5_ETH_WQE_DATA_INLINE); 2981 pdst = &dseg->inline_data[0]; 2982 } else { 2983 /* The entire legacy MPW session counter is stored on close. */ 2984 pdst = (uint8_t *)dseg; 2985 } 2986 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 2987 buf += MLX5_DSEG_MIN_INLINE_SIZE; 2988 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 2989 len -= MLX5_DSEG_MIN_INLINE_SIZE; 2990 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 2991 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2992 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2993 pdst = (uint8_t *)txq->wqes; 2994 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 2995 loc->mbuf->vlan_tci); 2996 pdst += sizeof(struct rte_vlan_hdr); 2997 /* 2998 * The WQEBB space availability is checked by caller. 2999 * Here we should be aware of WQE ring buffer wraparound only. 3000 */ 3001 part = (uint8_t *)txq->wqes_end - pdst; 3002 part = RTE_MIN(part, len); 3003 do { 3004 rte_memcpy(pdst, buf, part); 3005 len -= part; 3006 if (likely(!len)) { 3007 pdst += part; 3008 if (!MLX5_TXOFF_CONFIG(MPW)) 3009 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 3010 /* Note: no final wraparound check here. */ 3011 return (struct mlx5_wqe_dseg *)pdst; 3012 } 3013 pdst = (uint8_t *)txq->wqes; 3014 buf += part; 3015 part = len; 3016 } while (true); 3017 } 3018 3019 /** 3020 * Build the Ethernet Segment with optionally inlined data with 3021 * VLAN insertion and following Data Segments (if any) from 3022 * multi-segment packet. Used by ordinary send and TSO. 3023 * 3024 * @param txq 3025 * Pointer to TX queue structure. 3026 * @param loc 3027 * Pointer to burst routine local context. 3028 * @param wqe 3029 * Pointer to WQE to fill with built Ethernet/Data Segments. 3030 * @param vlan 3031 * Length of VLAN header to insert, 0 means no VLAN insertion. 3032 * @param inlen 3033 * Data length to inline. For TSO this parameter specifies 3034 * exact value, for ordinary send routine can be aligned by 3035 * caller to provide better WQE space saving and data buffer 3036 * start address alignment. This length includes VLAN header 3037 * being inserted. 3038 * @param tso 3039 * Zero means ordinary send, inlined data can be extended, 3040 * otherwise this is TSO, inlined data length is fixed. 3041 * @param olx 3042 * Configured Tx offloads mask. It is fully defined at 3043 * compile time and may be used for optimization. 3044 * 3045 * @return 3046 * Actual size of built WQE in segments. 3047 */ 3048 static __rte_always_inline unsigned int 3049 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 3050 struct mlx5_txq_local *__rte_restrict loc, 3051 struct mlx5_wqe *__rte_restrict wqe, 3052 unsigned int vlan, 3053 unsigned int inlen, 3054 unsigned int tso, 3055 unsigned int olx __rte_unused) 3056 { 3057 struct mlx5_wqe_dseg *__rte_restrict dseg; 3058 unsigned int ds; 3059 3060 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 3061 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 3062 loc->mbuf_off = 0; 3063 3064 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 3065 if (!loc->mbuf_nseg) 3066 goto dseg_done; 3067 /* 3068 * There are still some mbuf remaining, not inlined. 3069 * The first mbuf may be partially inlined and we 3070 * must process the possible non-zero data offset. 3071 */ 3072 if (loc->mbuf_off) { 3073 unsigned int dlen; 3074 uint8_t *dptr; 3075 3076 /* 3077 * Exhausted packets must be dropped before. 3078 * Non-zero offset means there are some data 3079 * remained in the packet. 3080 */ 3081 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 3082 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 3083 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 3084 loc->mbuf_off); 3085 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 3086 /* 3087 * Build the pointer/minimal data Data Segment. 3088 * Do ring buffer wrapping check in advance. 3089 */ 3090 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3091 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3092 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 3093 /* Store the mbuf to be freed on completion. */ 3094 MLX5_ASSERT(loc->elts_free); 3095 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3096 --loc->elts_free; 3097 ++dseg; 3098 if (--loc->mbuf_nseg == 0) 3099 goto dseg_done; 3100 loc->mbuf = loc->mbuf->next; 3101 loc->mbuf_off = 0; 3102 } 3103 do { 3104 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3105 struct rte_mbuf *mbuf; 3106 3107 /* Zero length segment found, just skip. */ 3108 mbuf = loc->mbuf; 3109 loc->mbuf = loc->mbuf->next; 3110 rte_pktmbuf_free_seg(mbuf); 3111 if (--loc->mbuf_nseg == 0) 3112 break; 3113 } else { 3114 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3115 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3116 mlx5_tx_dseg_iptr 3117 (txq, loc, dseg, 3118 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3119 rte_pktmbuf_data_len(loc->mbuf), olx); 3120 MLX5_ASSERT(loc->elts_free); 3121 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3122 --loc->elts_free; 3123 ++dseg; 3124 if (--loc->mbuf_nseg == 0) 3125 break; 3126 loc->mbuf = loc->mbuf->next; 3127 } 3128 } while (true); 3129 3130 dseg_done: 3131 /* Calculate actual segments used from the dseg pointer. */ 3132 if ((uintptr_t)wqe < (uintptr_t)dseg) 3133 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3134 else 3135 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3136 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3137 return ds; 3138 } 3139 3140 /** 3141 * The routine checks timestamp flag in the current packet, 3142 * and push WAIT WQE into the queue if scheduling is required. 3143 * 3144 * @param txq 3145 * Pointer to TX queue structure. 3146 * @param loc 3147 * Pointer to burst routine local context. 3148 * @param olx 3149 * Configured Tx offloads mask. It is fully defined at 3150 * compile time and may be used for optimization. 3151 * 3152 * @return 3153 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3154 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 3155 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 3156 * Local context variables partially updated. 3157 */ 3158 static __rte_always_inline enum mlx5_txcmp_code 3159 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 3160 struct mlx5_txq_local *restrict loc, 3161 unsigned int olx) 3162 { 3163 if (MLX5_TXOFF_CONFIG(TXPP) && 3164 loc->mbuf->ol_flags & txq->ts_mask) { 3165 struct mlx5_wqe *wqe; 3166 uint64_t ts; 3167 int32_t wci; 3168 3169 /* 3170 * Estimate the required space quickly and roughly. 3171 * We would like to ensure the packet can be pushed 3172 * to the queue and we won't get the orphan WAIT WQE. 3173 */ 3174 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 3175 loc->elts_free < NB_SEGS(loc->mbuf)) 3176 return MLX5_TXCMP_CODE_EXIT; 3177 /* Convert the timestamp into completion to wait. */ 3178 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 3179 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 3180 if (unlikely(wci < 0)) 3181 return MLX5_TXCMP_CODE_SINGLE; 3182 /* Build the WAIT WQE with specified completion. */ 3183 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3184 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 3185 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 3186 ++txq->wqe_ci; 3187 --loc->wqe_free; 3188 return MLX5_TXCMP_CODE_MULTI; 3189 } 3190 return MLX5_TXCMP_CODE_SINGLE; 3191 } 3192 3193 /** 3194 * Tx one packet function for multi-segment TSO. Supports all 3195 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3196 * sends one packet per WQE. 3197 * 3198 * This routine is responsible for storing processed mbuf 3199 * into elts ring buffer and update elts_head. 3200 * 3201 * @param txq 3202 * Pointer to TX queue structure. 3203 * @param loc 3204 * Pointer to burst routine local context. 3205 * @param olx 3206 * Configured Tx offloads mask. It is fully defined at 3207 * compile time and may be used for optimization. 3208 * 3209 * @return 3210 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3211 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3212 * Local context variables partially updated. 3213 */ 3214 static __rte_always_inline enum mlx5_txcmp_code 3215 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 3216 struct mlx5_txq_local *__rte_restrict loc, 3217 unsigned int olx) 3218 { 3219 struct mlx5_wqe *__rte_restrict wqe; 3220 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3221 3222 if (MLX5_TXOFF_CONFIG(TXPP)) { 3223 enum mlx5_txcmp_code wret; 3224 3225 /* Generate WAIT for scheduling if requested. */ 3226 wret = mlx5_tx_schedule_send(txq, loc, olx); 3227 if (wret == MLX5_TXCMP_CODE_EXIT) 3228 return MLX5_TXCMP_CODE_EXIT; 3229 if (wret == MLX5_TXCMP_CODE_ERROR) 3230 return MLX5_TXCMP_CODE_ERROR; 3231 } 3232 /* 3233 * Calculate data length to be inlined to estimate 3234 * the required space in WQE ring buffer. 3235 */ 3236 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3237 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3238 vlan = sizeof(struct rte_vlan_hdr); 3239 inlen = loc->mbuf->l2_len + vlan + 3240 loc->mbuf->l3_len + loc->mbuf->l4_len; 3241 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3242 return MLX5_TXCMP_CODE_ERROR; 3243 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3244 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3245 /* Packet must contain all TSO headers. */ 3246 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3247 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3248 inlen > (dlen + vlan))) 3249 return MLX5_TXCMP_CODE_ERROR; 3250 MLX5_ASSERT(inlen >= txq->inlen_mode); 3251 /* 3252 * Check whether there are enough free WQEBBs: 3253 * - Control Segment 3254 * - Ethernet Segment 3255 * - First Segment of inlined Ethernet data 3256 * - ... data continued ... 3257 * - Data Segments of pointer/min inline type 3258 */ 3259 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3260 MLX5_ESEG_MIN_INLINE_SIZE + 3261 MLX5_WSEG_SIZE + 3262 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3263 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3264 return MLX5_TXCMP_CODE_EXIT; 3265 /* Check for maximal WQE size. */ 3266 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3267 return MLX5_TXCMP_CODE_ERROR; 3268 #ifdef MLX5_PMD_SOFT_COUNTERS 3269 /* Update sent data bytes/packets counters. */ 3270 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3271 loc->mbuf->tso_segsz; 3272 /* 3273 * One will be added for mbuf itself 3274 * at the end of the mlx5_tx_burst from 3275 * loc->pkts_sent field. 3276 */ 3277 --ntcp; 3278 txq->stats.opackets += ntcp; 3279 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3280 #endif 3281 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3282 loc->wqe_last = wqe; 3283 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3284 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3285 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3286 txq->wqe_ci += (ds + 3) / 4; 3287 loc->wqe_free -= (ds + 3) / 4; 3288 return MLX5_TXCMP_CODE_MULTI; 3289 } 3290 3291 /** 3292 * Tx one packet function for multi-segment SEND. Supports all 3293 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3294 * sends one packet per WQE, without any data inlining in 3295 * Ethernet Segment. 3296 * 3297 * This routine is responsible for storing processed mbuf 3298 * into elts ring buffer and update elts_head. 3299 * 3300 * @param txq 3301 * Pointer to TX queue structure. 3302 * @param loc 3303 * Pointer to burst routine local context. 3304 * @param olx 3305 * Configured Tx offloads mask. It is fully defined at 3306 * compile time and may be used for optimization. 3307 * 3308 * @return 3309 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3310 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3311 * Local context variables partially updated. 3312 */ 3313 static __rte_always_inline enum mlx5_txcmp_code 3314 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 3315 struct mlx5_txq_local *__rte_restrict loc, 3316 unsigned int olx) 3317 { 3318 struct mlx5_wqe_dseg *__rte_restrict dseg; 3319 struct mlx5_wqe *__rte_restrict wqe; 3320 unsigned int ds, nseg; 3321 3322 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3323 if (MLX5_TXOFF_CONFIG(TXPP)) { 3324 enum mlx5_txcmp_code wret; 3325 3326 /* Generate WAIT for scheduling if requested. */ 3327 wret = mlx5_tx_schedule_send(txq, loc, olx); 3328 if (wret == MLX5_TXCMP_CODE_EXIT) 3329 return MLX5_TXCMP_CODE_EXIT; 3330 if (wret == MLX5_TXCMP_CODE_ERROR) 3331 return MLX5_TXCMP_CODE_ERROR; 3332 } 3333 /* 3334 * No inline at all, it means the CPU cycles saving 3335 * is prioritized at configuration, we should not 3336 * copy any packet data to WQE. 3337 */ 3338 nseg = NB_SEGS(loc->mbuf); 3339 ds = 2 + nseg; 3340 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3341 return MLX5_TXCMP_CODE_EXIT; 3342 /* Check for maximal WQE size. */ 3343 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3344 return MLX5_TXCMP_CODE_ERROR; 3345 /* 3346 * Some Tx offloads may cause an error if 3347 * packet is not long enough, check against 3348 * assumed minimal length. 3349 */ 3350 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3351 return MLX5_TXCMP_CODE_ERROR; 3352 #ifdef MLX5_PMD_SOFT_COUNTERS 3353 /* Update sent data bytes counter. */ 3354 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3355 if (MLX5_TXOFF_CONFIG(VLAN) && 3356 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3357 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3358 #endif 3359 /* 3360 * SEND WQE, one WQEBB: 3361 * - Control Segment, SEND opcode 3362 * - Ethernet Segment, optional VLAN, no inline 3363 * - Data Segments, pointer only type 3364 */ 3365 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3366 loc->wqe_last = wqe; 3367 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3368 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3369 dseg = &wqe->dseg[0]; 3370 do { 3371 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3372 struct rte_mbuf *mbuf; 3373 3374 /* 3375 * Zero length segment found, have to 3376 * correct total size of WQE in segments. 3377 * It is supposed to be rare occasion, so 3378 * in normal case (no zero length segments) 3379 * we avoid extra writing to the Control 3380 * Segment. 3381 */ 3382 --ds; 3383 wqe->cseg.sq_ds -= RTE_BE32(1); 3384 mbuf = loc->mbuf; 3385 loc->mbuf = mbuf->next; 3386 rte_pktmbuf_free_seg(mbuf); 3387 if (--nseg == 0) 3388 break; 3389 } else { 3390 mlx5_tx_dseg_ptr 3391 (txq, loc, dseg, 3392 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3393 rte_pktmbuf_data_len(loc->mbuf), olx); 3394 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3395 --loc->elts_free; 3396 if (--nseg == 0) 3397 break; 3398 ++dseg; 3399 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3400 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3401 loc->mbuf = loc->mbuf->next; 3402 } 3403 } while (true); 3404 txq->wqe_ci += (ds + 3) / 4; 3405 loc->wqe_free -= (ds + 3) / 4; 3406 return MLX5_TXCMP_CODE_MULTI; 3407 } 3408 3409 /** 3410 * Tx one packet function for multi-segment SEND. Supports all 3411 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3412 * sends one packet per WQE, with data inlining in 3413 * Ethernet Segment and minimal Data Segments. 3414 * 3415 * This routine is responsible for storing processed mbuf 3416 * into elts ring buffer and update elts_head. 3417 * 3418 * @param txq 3419 * Pointer to TX queue structure. 3420 * @param loc 3421 * Pointer to burst routine local context. 3422 * @param olx 3423 * Configured Tx offloads mask. It is fully defined at 3424 * compile time and may be used for optimization. 3425 * 3426 * @return 3427 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3428 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3429 * Local context variables partially updated. 3430 */ 3431 static __rte_always_inline enum mlx5_txcmp_code 3432 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 3433 struct mlx5_txq_local *__rte_restrict loc, 3434 unsigned int olx) 3435 { 3436 struct mlx5_wqe *__rte_restrict wqe; 3437 unsigned int ds, inlen, dlen, vlan = 0; 3438 3439 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3440 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3441 if (MLX5_TXOFF_CONFIG(TXPP)) { 3442 enum mlx5_txcmp_code wret; 3443 3444 /* Generate WAIT for scheduling if requested. */ 3445 wret = mlx5_tx_schedule_send(txq, loc, olx); 3446 if (wret == MLX5_TXCMP_CODE_EXIT) 3447 return MLX5_TXCMP_CODE_EXIT; 3448 if (wret == MLX5_TXCMP_CODE_ERROR) 3449 return MLX5_TXCMP_CODE_ERROR; 3450 } 3451 /* 3452 * First calculate data length to be inlined 3453 * to estimate the required space for WQE. 3454 */ 3455 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3456 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3457 vlan = sizeof(struct rte_vlan_hdr); 3458 inlen = dlen + vlan; 3459 /* Check against minimal length. */ 3460 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3461 return MLX5_TXCMP_CODE_ERROR; 3462 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3463 if (inlen > txq->inlen_send || 3464 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 3465 struct rte_mbuf *mbuf; 3466 unsigned int nxlen; 3467 uintptr_t start; 3468 3469 /* 3470 * Packet length exceeds the allowed inline 3471 * data length, check whether the minimal 3472 * inlining is required. 3473 */ 3474 if (txq->inlen_mode) { 3475 MLX5_ASSERT(txq->inlen_mode >= 3476 MLX5_ESEG_MIN_INLINE_SIZE); 3477 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 3478 inlen = txq->inlen_mode; 3479 } else { 3480 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 3481 !vlan || txq->vlan_en) { 3482 /* 3483 * VLAN insertion will be done inside by HW. 3484 * It is not utmost effective - VLAN flag is 3485 * checked twice, but we should proceed the 3486 * inlining length correctly and take into 3487 * account the VLAN header being inserted. 3488 */ 3489 return mlx5_tx_packet_multi_send 3490 (txq, loc, olx); 3491 } 3492 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3493 } 3494 /* 3495 * Now we know the minimal amount of data is requested 3496 * to inline. Check whether we should inline the buffers 3497 * from the chain beginning to eliminate some mbufs. 3498 */ 3499 mbuf = loc->mbuf; 3500 nxlen = rte_pktmbuf_data_len(mbuf); 3501 if (unlikely(nxlen <= txq->inlen_send)) { 3502 /* We can inline first mbuf at least. */ 3503 if (nxlen < inlen) { 3504 unsigned int smlen; 3505 3506 /* Scan mbufs till inlen filled. */ 3507 do { 3508 smlen = nxlen; 3509 mbuf = NEXT(mbuf); 3510 MLX5_ASSERT(mbuf); 3511 nxlen = rte_pktmbuf_data_len(mbuf); 3512 nxlen += smlen; 3513 } while (unlikely(nxlen < inlen)); 3514 if (unlikely(nxlen > txq->inlen_send)) { 3515 /* We cannot inline entire mbuf. */ 3516 smlen = inlen - smlen; 3517 start = rte_pktmbuf_mtod_offset 3518 (mbuf, uintptr_t, smlen); 3519 goto do_align; 3520 } 3521 } 3522 do { 3523 inlen = nxlen; 3524 mbuf = NEXT(mbuf); 3525 /* There should be not end of packet. */ 3526 MLX5_ASSERT(mbuf); 3527 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3528 } while (unlikely(nxlen < txq->inlen_send)); 3529 } 3530 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3531 /* 3532 * Check whether we can do inline to align start 3533 * address of data buffer to cacheline. 3534 */ 3535 do_align: 3536 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3537 if (unlikely(start)) { 3538 start += inlen; 3539 if (start <= txq->inlen_send) 3540 inlen = start; 3541 } 3542 } 3543 /* 3544 * Check whether there are enough free WQEBBs: 3545 * - Control Segment 3546 * - Ethernet Segment 3547 * - First Segment of inlined Ethernet data 3548 * - ... data continued ... 3549 * - Data Segments of pointer/min inline type 3550 * 3551 * Estimate the number of Data Segments conservatively, 3552 * supposing no any mbufs is being freed during inlining. 3553 */ 3554 MLX5_ASSERT(inlen <= txq->inlen_send); 3555 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3556 MLX5_ESEG_MIN_INLINE_SIZE + 3557 MLX5_WSEG_SIZE + 3558 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3559 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3560 return MLX5_TXCMP_CODE_EXIT; 3561 /* Check for maximal WQE size. */ 3562 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3563 return MLX5_TXCMP_CODE_ERROR; 3564 #ifdef MLX5_PMD_SOFT_COUNTERS 3565 /* Update sent data bytes/packets counters. */ 3566 txq->stats.obytes += dlen + vlan; 3567 #endif 3568 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3569 loc->wqe_last = wqe; 3570 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3571 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3572 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3573 txq->wqe_ci += (ds + 3) / 4; 3574 loc->wqe_free -= (ds + 3) / 4; 3575 return MLX5_TXCMP_CODE_MULTI; 3576 } 3577 3578 /** 3579 * Tx burst function for multi-segment packets. Supports all 3580 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3581 * sends one packet per WQE. Function stops sending if it 3582 * encounters the single-segment packet. 3583 * 3584 * This routine is responsible for storing processed mbuf 3585 * into elts ring buffer and update elts_head. 3586 * 3587 * @param txq 3588 * Pointer to TX queue structure. 3589 * @param[in] pkts 3590 * Packets to transmit. 3591 * @param pkts_n 3592 * Number of packets in array. 3593 * @param loc 3594 * Pointer to burst routine local context. 3595 * @param olx 3596 * Configured Tx offloads mask. It is fully defined at 3597 * compile time and may be used for optimization. 3598 * 3599 * @return 3600 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3601 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3602 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3603 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3604 * Local context variables updated. 3605 */ 3606 static __rte_always_inline enum mlx5_txcmp_code 3607 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 3608 struct rte_mbuf **__rte_restrict pkts, 3609 unsigned int pkts_n, 3610 struct mlx5_txq_local *__rte_restrict loc, 3611 unsigned int olx) 3612 { 3613 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3614 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3615 pkts += loc->pkts_sent + 1; 3616 pkts_n -= loc->pkts_sent; 3617 for (;;) { 3618 enum mlx5_txcmp_code ret; 3619 3620 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3621 /* 3622 * Estimate the number of free elts quickly but 3623 * conservatively. Some segment may be fully inlined 3624 * and freed, ignore this here - precise estimation 3625 * is costly. 3626 */ 3627 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3628 return MLX5_TXCMP_CODE_EXIT; 3629 if (MLX5_TXOFF_CONFIG(TSO) && 3630 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3631 /* Proceed with multi-segment TSO. */ 3632 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3633 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3634 /* Proceed with multi-segment SEND with inlining. */ 3635 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3636 } else { 3637 /* Proceed with multi-segment SEND w/o inlining. */ 3638 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3639 } 3640 if (ret == MLX5_TXCMP_CODE_EXIT) 3641 return MLX5_TXCMP_CODE_EXIT; 3642 if (ret == MLX5_TXCMP_CODE_ERROR) 3643 return MLX5_TXCMP_CODE_ERROR; 3644 /* WQE is built, go to the next packet. */ 3645 ++loc->pkts_sent; 3646 --pkts_n; 3647 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3648 return MLX5_TXCMP_CODE_EXIT; 3649 loc->mbuf = *pkts++; 3650 if (pkts_n > 1) 3651 rte_prefetch0(*pkts); 3652 if (likely(NB_SEGS(loc->mbuf) > 1)) 3653 continue; 3654 /* Here ends the series of multi-segment packets. */ 3655 if (MLX5_TXOFF_CONFIG(TSO) && 3656 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3657 return MLX5_TXCMP_CODE_TSO; 3658 return MLX5_TXCMP_CODE_SINGLE; 3659 } 3660 MLX5_ASSERT(false); 3661 } 3662 3663 /** 3664 * Tx burst function for single-segment packets with TSO. 3665 * Supports all types of Tx offloads, except multi-packets. 3666 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3667 * Function stops sending if it encounters the multi-segment 3668 * packet or packet without TSO requested. 3669 * 3670 * The routine is responsible for storing processed mbuf 3671 * into elts ring buffer and update elts_head if inline 3672 * offloads is requested due to possible early freeing 3673 * of the inlined mbufs (can not store pkts array in elts 3674 * as a batch). 3675 * 3676 * @param txq 3677 * Pointer to TX queue structure. 3678 * @param[in] pkts 3679 * Packets to transmit. 3680 * @param pkts_n 3681 * Number of packets in array. 3682 * @param loc 3683 * Pointer to burst routine local context. 3684 * @param olx 3685 * Configured Tx offloads mask. It is fully defined at 3686 * compile time and may be used for optimization. 3687 * 3688 * @return 3689 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3690 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3691 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3692 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3693 * Local context variables updated. 3694 */ 3695 static __rte_always_inline enum mlx5_txcmp_code 3696 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 3697 struct rte_mbuf **__rte_restrict pkts, 3698 unsigned int pkts_n, 3699 struct mlx5_txq_local *__rte_restrict loc, 3700 unsigned int olx) 3701 { 3702 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3703 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3704 pkts += loc->pkts_sent + 1; 3705 pkts_n -= loc->pkts_sent; 3706 for (;;) { 3707 struct mlx5_wqe_dseg *__rte_restrict dseg; 3708 struct mlx5_wqe *__rte_restrict wqe; 3709 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3710 uint8_t *dptr; 3711 3712 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3713 if (MLX5_TXOFF_CONFIG(TXPP)) { 3714 enum mlx5_txcmp_code wret; 3715 3716 /* Generate WAIT for scheduling if requested. */ 3717 wret = mlx5_tx_schedule_send(txq, loc, olx); 3718 if (wret == MLX5_TXCMP_CODE_EXIT) 3719 return MLX5_TXCMP_CODE_EXIT; 3720 if (wret == MLX5_TXCMP_CODE_ERROR) 3721 return MLX5_TXCMP_CODE_ERROR; 3722 } 3723 dlen = rte_pktmbuf_data_len(loc->mbuf); 3724 if (MLX5_TXOFF_CONFIG(VLAN) && 3725 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3726 vlan = sizeof(struct rte_vlan_hdr); 3727 } 3728 /* 3729 * First calculate the WQE size to check 3730 * whether we have enough space in ring buffer. 3731 */ 3732 hlen = loc->mbuf->l2_len + vlan + 3733 loc->mbuf->l3_len + loc->mbuf->l4_len; 3734 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3735 return MLX5_TXCMP_CODE_ERROR; 3736 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3737 hlen += loc->mbuf->outer_l2_len + 3738 loc->mbuf->outer_l3_len; 3739 /* Segment must contain all TSO headers. */ 3740 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3741 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3742 hlen > (dlen + vlan))) 3743 return MLX5_TXCMP_CODE_ERROR; 3744 /* 3745 * Check whether there are enough free WQEBBs: 3746 * - Control Segment 3747 * - Ethernet Segment 3748 * - First Segment of inlined Ethernet data 3749 * - ... data continued ... 3750 * - Finishing Data Segment of pointer type 3751 */ 3752 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3753 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3754 if (loc->wqe_free < ((ds + 3) / 4)) 3755 return MLX5_TXCMP_CODE_EXIT; 3756 #ifdef MLX5_PMD_SOFT_COUNTERS 3757 /* Update sent data bytes/packets counters. */ 3758 ntcp = (dlen + vlan - hlen + 3759 loc->mbuf->tso_segsz - 1) / 3760 loc->mbuf->tso_segsz; 3761 /* 3762 * One will be added for mbuf itself at the end 3763 * of the mlx5_tx_burst from loc->pkts_sent field. 3764 */ 3765 --ntcp; 3766 txq->stats.opackets += ntcp; 3767 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3768 #endif 3769 /* 3770 * Build the TSO WQE: 3771 * - Control Segment 3772 * - Ethernet Segment with hlen bytes inlined 3773 * - Data Segment of pointer type 3774 */ 3775 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3776 loc->wqe_last = wqe; 3777 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3778 MLX5_OPCODE_TSO, olx); 3779 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3780 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3781 dlen -= hlen - vlan; 3782 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3783 /* 3784 * WQE is built, update the loop parameters 3785 * and go to the next packet. 3786 */ 3787 txq->wqe_ci += (ds + 3) / 4; 3788 loc->wqe_free -= (ds + 3) / 4; 3789 if (MLX5_TXOFF_CONFIG(INLINE)) 3790 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3791 --loc->elts_free; 3792 ++loc->pkts_sent; 3793 --pkts_n; 3794 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3795 return MLX5_TXCMP_CODE_EXIT; 3796 loc->mbuf = *pkts++; 3797 if (pkts_n > 1) 3798 rte_prefetch0(*pkts); 3799 if (MLX5_TXOFF_CONFIG(MULTI) && 3800 unlikely(NB_SEGS(loc->mbuf) > 1)) 3801 return MLX5_TXCMP_CODE_MULTI; 3802 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3803 return MLX5_TXCMP_CODE_SINGLE; 3804 /* Continue with the next TSO packet. */ 3805 } 3806 MLX5_ASSERT(false); 3807 } 3808 3809 /** 3810 * Analyze the packet and select the best method to send. 3811 * 3812 * @param txq 3813 * Pointer to TX queue structure. 3814 * @param loc 3815 * Pointer to burst routine local context. 3816 * @param olx 3817 * Configured Tx offloads mask. It is fully defined at 3818 * compile time and may be used for optimization. 3819 * @param newp 3820 * The predefined flag whether do complete check for 3821 * multi-segment packets and TSO. 3822 * 3823 * @return 3824 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3825 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3826 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3827 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3828 */ 3829 static __rte_always_inline enum mlx5_txcmp_code 3830 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 3831 struct mlx5_txq_local *__rte_restrict loc, 3832 unsigned int olx, 3833 bool newp) 3834 { 3835 /* Check for multi-segment packet. */ 3836 if (newp && 3837 MLX5_TXOFF_CONFIG(MULTI) && 3838 unlikely(NB_SEGS(loc->mbuf) > 1)) 3839 return MLX5_TXCMP_CODE_MULTI; 3840 /* Check for TSO packet. */ 3841 if (newp && 3842 MLX5_TXOFF_CONFIG(TSO) && 3843 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3844 return MLX5_TXCMP_CODE_TSO; 3845 /* Check if eMPW is enabled at all. */ 3846 if (!MLX5_TXOFF_CONFIG(EMPW)) 3847 return MLX5_TXCMP_CODE_SINGLE; 3848 /* Check if eMPW can be engaged. */ 3849 if (MLX5_TXOFF_CONFIG(VLAN) && 3850 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3851 (!MLX5_TXOFF_CONFIG(INLINE) || 3852 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3853 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3854 /* 3855 * eMPW does not support VLAN insertion offload, 3856 * we have to inline the entire packet but 3857 * packet is too long for inlining. 3858 */ 3859 return MLX5_TXCMP_CODE_SINGLE; 3860 } 3861 return MLX5_TXCMP_CODE_EMPW; 3862 } 3863 3864 /** 3865 * Check the next packet attributes to match with the eMPW batch ones. 3866 * In addition, for legacy MPW the packet length is checked either. 3867 * 3868 * @param txq 3869 * Pointer to TX queue structure. 3870 * @param es 3871 * Pointer to Ethernet Segment of eMPW batch. 3872 * @param loc 3873 * Pointer to burst routine local context. 3874 * @param dlen 3875 * Length of previous packet in MPW descriptor. 3876 * @param olx 3877 * Configured Tx offloads mask. It is fully defined at 3878 * compile time and may be used for optimization. 3879 * 3880 * @return 3881 * true - packet match with eMPW batch attributes. 3882 * false - no match, eMPW should be restarted. 3883 */ 3884 static __rte_always_inline bool 3885 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 3886 struct mlx5_wqe_eseg *__rte_restrict es, 3887 struct mlx5_txq_local *__rte_restrict loc, 3888 uint32_t dlen, 3889 unsigned int olx) 3890 { 3891 uint8_t swp_flags = 0; 3892 3893 /* Compare the checksum flags, if any. */ 3894 if (MLX5_TXOFF_CONFIG(CSUM) && 3895 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 3896 return false; 3897 /* Compare the Software Parser offsets and flags. */ 3898 if (MLX5_TXOFF_CONFIG(SWP) && 3899 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 3900 es->swp_flags != swp_flags)) 3901 return false; 3902 /* Fill metadata field if needed. */ 3903 if (MLX5_TXOFF_CONFIG(METADATA) && 3904 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 3905 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 3906 return false; 3907 /* Legacy MPW can send packets with the same lengt only. */ 3908 if (MLX5_TXOFF_CONFIG(MPW) && 3909 dlen != rte_pktmbuf_data_len(loc->mbuf)) 3910 return false; 3911 /* There must be no VLAN packets in eMPW loop. */ 3912 if (MLX5_TXOFF_CONFIG(VLAN)) 3913 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 3914 /* Check if the scheduling is requested. */ 3915 if (MLX5_TXOFF_CONFIG(TXPP) && 3916 loc->mbuf->ol_flags & txq->ts_mask) 3917 return false; 3918 return true; 3919 } 3920 3921 /* 3922 * Update send loop variables and WQE for eMPW loop 3923 * without data inlining. Number of Data Segments is 3924 * equal to the number of sent packets. 3925 * 3926 * @param txq 3927 * Pointer to TX queue structure. 3928 * @param loc 3929 * Pointer to burst routine local context. 3930 * @param ds 3931 * Number of packets/Data Segments/Packets. 3932 * @param slen 3933 * Accumulated statistics, bytes sent 3934 * @param olx 3935 * Configured Tx offloads mask. It is fully defined at 3936 * compile time and may be used for optimization. 3937 * 3938 * @return 3939 * true - packet match with eMPW batch attributes. 3940 * false - no match, eMPW should be restarted. 3941 */ 3942 static __rte_always_inline void 3943 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 3944 struct mlx5_txq_local *__rte_restrict loc, 3945 unsigned int ds, 3946 unsigned int slen, 3947 unsigned int olx __rte_unused) 3948 { 3949 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3950 #ifdef MLX5_PMD_SOFT_COUNTERS 3951 /* Update sent data bytes counter. */ 3952 txq->stats.obytes += slen; 3953 #else 3954 (void)slen; 3955 #endif 3956 loc->elts_free -= ds; 3957 loc->pkts_sent += ds; 3958 ds += 2; 3959 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3960 txq->wqe_ci += (ds + 3) / 4; 3961 loc->wqe_free -= (ds + 3) / 4; 3962 } 3963 3964 /* 3965 * Update send loop variables and WQE for eMPW loop 3966 * with data inlining. Gets the size of pushed descriptors 3967 * and data to the WQE. 3968 * 3969 * @param txq 3970 * Pointer to TX queue structure. 3971 * @param loc 3972 * Pointer to burst routine local context. 3973 * @param len 3974 * Total size of descriptor/data in bytes. 3975 * @param slen 3976 * Accumulated statistics, data bytes sent. 3977 * @param wqem 3978 * The base WQE for the eMPW/MPW descriptor. 3979 * @param olx 3980 * Configured Tx offloads mask. It is fully defined at 3981 * compile time and may be used for optimization. 3982 * 3983 * @return 3984 * true - packet match with eMPW batch attributes. 3985 * false - no match, eMPW should be restarted. 3986 */ 3987 static __rte_always_inline void 3988 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 3989 struct mlx5_txq_local *__rte_restrict loc, 3990 unsigned int len, 3991 unsigned int slen, 3992 struct mlx5_wqe *__rte_restrict wqem, 3993 unsigned int olx __rte_unused) 3994 { 3995 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 3996 3997 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3998 #ifdef MLX5_PMD_SOFT_COUNTERS 3999 /* Update sent data bytes counter. */ 4000 txq->stats.obytes += slen; 4001 #else 4002 (void)slen; 4003 #endif 4004 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 4005 /* 4006 * If the legacy MPW session contains the inline packets 4007 * we should set the only inline data segment length 4008 * and align the total length to the segment size. 4009 */ 4010 MLX5_ASSERT(len > sizeof(dseg->bcount)); 4011 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 4012 MLX5_ETH_WQE_DATA_INLINE); 4013 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 4014 } else { 4015 /* 4016 * The session is not legacy MPW or contains the 4017 * data buffer pointer segments. 4018 */ 4019 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 4020 len = len / MLX5_WSEG_SIZE + 2; 4021 } 4022 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 4023 txq->wqe_ci += (len + 3) / 4; 4024 loc->wqe_free -= (len + 3) / 4; 4025 loc->wqe_last = wqem; 4026 } 4027 4028 /** 4029 * The set of Tx burst functions for single-segment packets 4030 * without TSO and with Multi-Packet Writing feature support. 4031 * Supports all types of Tx offloads, except multi-packets 4032 * and TSO. 4033 * 4034 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 4035 * as many packet per WQE as it can. If eMPW is not configured 4036 * or packet can not be sent with eMPW (VLAN insertion) the 4037 * ordinary SEND opcode is used and only one packet placed 4038 * in WQE. 4039 * 4040 * Functions stop sending if it encounters the multi-segment 4041 * packet or packet with TSO requested. 4042 * 4043 * The routines are responsible for storing processed mbuf 4044 * into elts ring buffer and update elts_head if inlining 4045 * offload is requested. Otherwise the copying mbufs to elts 4046 * can be postponed and completed at the end of burst routine. 4047 * 4048 * @param txq 4049 * Pointer to TX queue structure. 4050 * @param[in] pkts 4051 * Packets to transmit. 4052 * @param pkts_n 4053 * Number of packets in array. 4054 * @param loc 4055 * Pointer to burst routine local context. 4056 * @param olx 4057 * Configured Tx offloads mask. It is fully defined at 4058 * compile time and may be used for optimization. 4059 * 4060 * @return 4061 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 4062 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 4063 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 4064 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 4065 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 4066 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 4067 * 4068 * Local context variables updated. 4069 * 4070 * 4071 * The routine sends packets with MLX5_OPCODE_EMPW 4072 * without inlining, this is dedicated optimized branch. 4073 * No VLAN insertion is supported. 4074 */ 4075 static __rte_always_inline enum mlx5_txcmp_code 4076 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 4077 struct rte_mbuf **__rte_restrict pkts, 4078 unsigned int pkts_n, 4079 struct mlx5_txq_local *__rte_restrict loc, 4080 unsigned int olx) 4081 { 4082 /* 4083 * Subroutine is the part of mlx5_tx_burst_single() 4084 * and sends single-segment packet with eMPW opcode 4085 * without data inlining. 4086 */ 4087 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4088 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4089 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4090 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4091 pkts += loc->pkts_sent + 1; 4092 pkts_n -= loc->pkts_sent; 4093 for (;;) { 4094 struct mlx5_wqe_dseg *__rte_restrict dseg; 4095 struct mlx5_wqe_eseg *__rte_restrict eseg; 4096 enum mlx5_txcmp_code ret; 4097 unsigned int part, loop; 4098 unsigned int slen = 0; 4099 4100 next_empw: 4101 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4102 if (MLX5_TXOFF_CONFIG(TXPP)) { 4103 enum mlx5_txcmp_code wret; 4104 4105 /* Generate WAIT for scheduling if requested. */ 4106 wret = mlx5_tx_schedule_send(txq, loc, olx); 4107 if (wret == MLX5_TXCMP_CODE_EXIT) 4108 return MLX5_TXCMP_CODE_EXIT; 4109 if (wret == MLX5_TXCMP_CODE_ERROR) 4110 return MLX5_TXCMP_CODE_ERROR; 4111 } 4112 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4113 MLX5_MPW_MAX_PACKETS : 4114 MLX5_EMPW_MAX_PACKETS); 4115 if (unlikely(loc->elts_free < part)) { 4116 /* We have no enough elts to save all mbufs. */ 4117 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 4118 return MLX5_TXCMP_CODE_EXIT; 4119 /* But we still able to send at least minimal eMPW. */ 4120 part = loc->elts_free; 4121 } 4122 /* Check whether we have enough WQEs */ 4123 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 4124 if (unlikely(loc->wqe_free < 4125 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4126 return MLX5_TXCMP_CODE_EXIT; 4127 part = (loc->wqe_free * 4) - 2; 4128 } 4129 if (likely(part > 1)) 4130 rte_prefetch0(*pkts); 4131 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4132 /* 4133 * Build eMPW title WQEBB: 4134 * - Control Segment, eMPW opcode 4135 * - Ethernet Segment, no inline 4136 */ 4137 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 4138 MLX5_OPCODE_ENHANCED_MPSW, olx); 4139 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4140 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4141 eseg = &loc->wqe_last->eseg; 4142 dseg = &loc->wqe_last->dseg[0]; 4143 loop = part; 4144 /* Store the packet length for legacy MPW. */ 4145 if (MLX5_TXOFF_CONFIG(MPW)) 4146 eseg->mss = rte_cpu_to_be_16 4147 (rte_pktmbuf_data_len(loc->mbuf)); 4148 for (;;) { 4149 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4150 #ifdef MLX5_PMD_SOFT_COUNTERS 4151 /* Update sent data bytes counter. */ 4152 slen += dlen; 4153 #endif 4154 mlx5_tx_dseg_ptr 4155 (txq, loc, dseg, 4156 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4157 dlen, olx); 4158 if (unlikely(--loop == 0)) 4159 break; 4160 loc->mbuf = *pkts++; 4161 if (likely(loop > 1)) 4162 rte_prefetch0(*pkts); 4163 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4164 /* 4165 * Unroll the completion code to avoid 4166 * returning variable value - it results in 4167 * unoptimized sequent checking in caller. 4168 */ 4169 if (ret == MLX5_TXCMP_CODE_MULTI) { 4170 part -= loop; 4171 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4172 if (unlikely(!loc->elts_free || 4173 !loc->wqe_free)) 4174 return MLX5_TXCMP_CODE_EXIT; 4175 return MLX5_TXCMP_CODE_MULTI; 4176 } 4177 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4178 if (ret == MLX5_TXCMP_CODE_TSO) { 4179 part -= loop; 4180 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4181 if (unlikely(!loc->elts_free || 4182 !loc->wqe_free)) 4183 return MLX5_TXCMP_CODE_EXIT; 4184 return MLX5_TXCMP_CODE_TSO; 4185 } 4186 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4187 part -= loop; 4188 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4189 if (unlikely(!loc->elts_free || 4190 !loc->wqe_free)) 4191 return MLX5_TXCMP_CODE_EXIT; 4192 return MLX5_TXCMP_CODE_SINGLE; 4193 } 4194 if (ret != MLX5_TXCMP_CODE_EMPW) { 4195 MLX5_ASSERT(false); 4196 part -= loop; 4197 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4198 return MLX5_TXCMP_CODE_ERROR; 4199 } 4200 /* 4201 * Check whether packet parameters coincide 4202 * within assumed eMPW batch: 4203 * - check sum settings 4204 * - metadata value 4205 * - software parser settings 4206 * - packets length (legacy MPW only) 4207 * - scheduling is not required 4208 */ 4209 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 4210 MLX5_ASSERT(loop); 4211 part -= loop; 4212 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4213 if (unlikely(!loc->elts_free || 4214 !loc->wqe_free)) 4215 return MLX5_TXCMP_CODE_EXIT; 4216 pkts_n -= part; 4217 goto next_empw; 4218 } 4219 /* Packet attributes match, continue the same eMPW. */ 4220 ++dseg; 4221 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4222 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4223 } 4224 /* eMPW is built successfully, update loop parameters. */ 4225 MLX5_ASSERT(!loop); 4226 MLX5_ASSERT(pkts_n >= part); 4227 #ifdef MLX5_PMD_SOFT_COUNTERS 4228 /* Update sent data bytes counter. */ 4229 txq->stats.obytes += slen; 4230 #endif 4231 loc->elts_free -= part; 4232 loc->pkts_sent += part; 4233 txq->wqe_ci += (2 + part + 3) / 4; 4234 loc->wqe_free -= (2 + part + 3) / 4; 4235 pkts_n -= part; 4236 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4237 return MLX5_TXCMP_CODE_EXIT; 4238 loc->mbuf = *pkts++; 4239 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4240 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4241 return ret; 4242 /* Continue sending eMPW batches. */ 4243 } 4244 MLX5_ASSERT(false); 4245 } 4246 4247 /** 4248 * The routine sends packets with MLX5_OPCODE_EMPW 4249 * with inlining, optionally supports VLAN insertion. 4250 */ 4251 static __rte_always_inline enum mlx5_txcmp_code 4252 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 4253 struct rte_mbuf **__rte_restrict pkts, 4254 unsigned int pkts_n, 4255 struct mlx5_txq_local *__rte_restrict loc, 4256 unsigned int olx) 4257 { 4258 /* 4259 * Subroutine is the part of mlx5_tx_burst_single() 4260 * and sends single-segment packet with eMPW opcode 4261 * with data inlining. 4262 */ 4263 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4264 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4265 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4266 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4267 pkts += loc->pkts_sent + 1; 4268 pkts_n -= loc->pkts_sent; 4269 for (;;) { 4270 struct mlx5_wqe_dseg *__rte_restrict dseg; 4271 struct mlx5_wqe *__rte_restrict wqem; 4272 enum mlx5_txcmp_code ret; 4273 unsigned int room, part, nlim; 4274 unsigned int slen = 0; 4275 4276 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4277 if (MLX5_TXOFF_CONFIG(TXPP)) { 4278 enum mlx5_txcmp_code wret; 4279 4280 /* Generate WAIT for scheduling if requested. */ 4281 wret = mlx5_tx_schedule_send(txq, loc, olx); 4282 if (wret == MLX5_TXCMP_CODE_EXIT) 4283 return MLX5_TXCMP_CODE_EXIT; 4284 if (wret == MLX5_TXCMP_CODE_ERROR) 4285 return MLX5_TXCMP_CODE_ERROR; 4286 } 4287 /* 4288 * Limits the amount of packets in one WQE 4289 * to improve CQE latency generation. 4290 */ 4291 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4292 MLX5_MPW_INLINE_MAX_PACKETS : 4293 MLX5_EMPW_MAX_PACKETS); 4294 /* Check whether we have minimal amount WQEs */ 4295 if (unlikely(loc->wqe_free < 4296 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4297 return MLX5_TXCMP_CODE_EXIT; 4298 if (likely(pkts_n > 1)) 4299 rte_prefetch0(*pkts); 4300 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4301 /* 4302 * Build eMPW title WQEBB: 4303 * - Control Segment, eMPW opcode, zero DS 4304 * - Ethernet Segment, no inline 4305 */ 4306 mlx5_tx_cseg_init(txq, loc, wqem, 0, 4307 MLX5_OPCODE_ENHANCED_MPSW, olx); 4308 mlx5_tx_eseg_none(txq, loc, wqem, 4309 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4310 dseg = &wqem->dseg[0]; 4311 /* Store the packet length for legacy MPW. */ 4312 if (MLX5_TXOFF_CONFIG(MPW)) 4313 wqem->eseg.mss = rte_cpu_to_be_16 4314 (rte_pktmbuf_data_len(loc->mbuf)); 4315 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4316 loc->wqe_free) * MLX5_WQE_SIZE - 4317 MLX5_WQE_CSEG_SIZE - 4318 MLX5_WQE_ESEG_SIZE; 4319 /* Limit the room for legacy MPW sessions for performance. */ 4320 if (MLX5_TXOFF_CONFIG(MPW)) 4321 room = RTE_MIN(room, 4322 RTE_MAX(txq->inlen_empw + 4323 sizeof(dseg->bcount) + 4324 (MLX5_TXOFF_CONFIG(VLAN) ? 4325 sizeof(struct rte_vlan_hdr) : 0), 4326 MLX5_MPW_INLINE_MAX_PACKETS * 4327 MLX5_WQE_DSEG_SIZE)); 4328 /* Build WQE till we have space, packets and resources. */ 4329 part = room; 4330 for (;;) { 4331 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4332 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4333 unsigned int tlen; 4334 4335 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4336 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 4337 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4338 /* 4339 * Some Tx offloads may cause an error if 4340 * packet is not long enough, check against 4341 * assumed minimal length. 4342 */ 4343 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4344 part -= room; 4345 if (unlikely(!part)) 4346 return MLX5_TXCMP_CODE_ERROR; 4347 /* 4348 * We have some successfully built 4349 * packet Data Segments to send. 4350 */ 4351 mlx5_tx_idone_empw(txq, loc, part, 4352 slen, wqem, olx); 4353 return MLX5_TXCMP_CODE_ERROR; 4354 } 4355 /* Inline or not inline - that's the Question. */ 4356 if (dlen > txq->inlen_empw || 4357 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 4358 goto pointer_empw; 4359 if (MLX5_TXOFF_CONFIG(MPW)) { 4360 if (dlen > txq->inlen_send) 4361 goto pointer_empw; 4362 tlen = dlen; 4363 if (part == room) { 4364 /* Open new inline MPW session. */ 4365 tlen += sizeof(dseg->bcount); 4366 dseg->bcount = RTE_BE32(0); 4367 dseg = RTE_PTR_ADD 4368 (dseg, sizeof(dseg->bcount)); 4369 } else { 4370 /* 4371 * No pointer and inline descriptor 4372 * intermix for legacy MPW sessions. 4373 */ 4374 if (wqem->dseg[0].bcount) 4375 break; 4376 } 4377 } else { 4378 tlen = sizeof(dseg->bcount) + dlen; 4379 } 4380 /* Inline entire packet, optional VLAN insertion. */ 4381 if (MLX5_TXOFF_CONFIG(VLAN) && 4382 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4383 /* 4384 * The packet length must be checked in 4385 * mlx5_tx_able_to_empw() and packet 4386 * fits into inline length guaranteed. 4387 */ 4388 MLX5_ASSERT((dlen + 4389 sizeof(struct rte_vlan_hdr)) <= 4390 txq->inlen_empw); 4391 tlen += sizeof(struct rte_vlan_hdr); 4392 if (room < tlen) 4393 break; 4394 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4395 dptr, dlen, olx); 4396 #ifdef MLX5_PMD_SOFT_COUNTERS 4397 /* Update sent data bytes counter. */ 4398 slen += sizeof(struct rte_vlan_hdr); 4399 #endif 4400 } else { 4401 if (room < tlen) 4402 break; 4403 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4404 dptr, dlen, olx); 4405 } 4406 if (!MLX5_TXOFF_CONFIG(MPW)) 4407 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4408 MLX5_ASSERT(room >= tlen); 4409 room -= tlen; 4410 /* 4411 * Packet data are completely inlined, 4412 * free the packet immediately. 4413 */ 4414 rte_pktmbuf_free_seg(loc->mbuf); 4415 goto next_mbuf; 4416 pointer_empw: 4417 /* 4418 * No pointer and inline descriptor 4419 * intermix for legacy MPW sessions. 4420 */ 4421 if (MLX5_TXOFF_CONFIG(MPW) && 4422 part != room && 4423 wqem->dseg[0].bcount == RTE_BE32(0)) 4424 break; 4425 /* 4426 * Not inlinable VLAN packets are 4427 * proceeded outside of this routine. 4428 */ 4429 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4430 if (MLX5_TXOFF_CONFIG(VLAN)) 4431 MLX5_ASSERT(!(loc->mbuf->ol_flags & 4432 PKT_TX_VLAN_PKT)); 4433 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4434 /* We have to store mbuf in elts.*/ 4435 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4436 room -= MLX5_WQE_DSEG_SIZE; 4437 /* Ring buffer wraparound is checked at the loop end.*/ 4438 ++dseg; 4439 next_mbuf: 4440 #ifdef MLX5_PMD_SOFT_COUNTERS 4441 /* Update sent data bytes counter. */ 4442 slen += dlen; 4443 #endif 4444 loc->pkts_sent++; 4445 loc->elts_free--; 4446 pkts_n--; 4447 if (unlikely(!pkts_n || !loc->elts_free)) { 4448 /* 4449 * We have no resources/packets to 4450 * continue build descriptors. 4451 */ 4452 part -= room; 4453 mlx5_tx_idone_empw(txq, loc, part, 4454 slen, wqem, olx); 4455 return MLX5_TXCMP_CODE_EXIT; 4456 } 4457 loc->mbuf = *pkts++; 4458 if (likely(pkts_n > 1)) 4459 rte_prefetch0(*pkts); 4460 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4461 /* 4462 * Unroll the completion code to avoid 4463 * returning variable value - it results in 4464 * unoptimized sequent checking in caller. 4465 */ 4466 if (ret == MLX5_TXCMP_CODE_MULTI) { 4467 part -= room; 4468 mlx5_tx_idone_empw(txq, loc, part, 4469 slen, wqem, olx); 4470 if (unlikely(!loc->elts_free || 4471 !loc->wqe_free)) 4472 return MLX5_TXCMP_CODE_EXIT; 4473 return MLX5_TXCMP_CODE_MULTI; 4474 } 4475 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4476 if (ret == MLX5_TXCMP_CODE_TSO) { 4477 part -= room; 4478 mlx5_tx_idone_empw(txq, loc, part, 4479 slen, wqem, olx); 4480 if (unlikely(!loc->elts_free || 4481 !loc->wqe_free)) 4482 return MLX5_TXCMP_CODE_EXIT; 4483 return MLX5_TXCMP_CODE_TSO; 4484 } 4485 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4486 part -= room; 4487 mlx5_tx_idone_empw(txq, loc, part, 4488 slen, wqem, olx); 4489 if (unlikely(!loc->elts_free || 4490 !loc->wqe_free)) 4491 return MLX5_TXCMP_CODE_EXIT; 4492 return MLX5_TXCMP_CODE_SINGLE; 4493 } 4494 if (ret != MLX5_TXCMP_CODE_EMPW) { 4495 MLX5_ASSERT(false); 4496 part -= room; 4497 mlx5_tx_idone_empw(txq, loc, part, 4498 slen, wqem, olx); 4499 return MLX5_TXCMP_CODE_ERROR; 4500 } 4501 /* Check if we have minimal room left. */ 4502 nlim--; 4503 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4504 break; 4505 /* 4506 * Check whether packet parameters coincide 4507 * within assumed eMPW batch: 4508 * - check sum settings 4509 * - metadata value 4510 * - software parser settings 4511 * - packets length (legacy MPW only) 4512 * - scheduling is not required 4513 */ 4514 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 4515 loc, dlen, olx)) 4516 break; 4517 /* Packet attributes match, continue the same eMPW. */ 4518 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4519 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4520 } 4521 /* 4522 * We get here to close an existing eMPW 4523 * session and start the new one. 4524 */ 4525 MLX5_ASSERT(pkts_n); 4526 part -= room; 4527 if (unlikely(!part)) 4528 return MLX5_TXCMP_CODE_EXIT; 4529 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 4530 if (unlikely(!loc->elts_free || 4531 !loc->wqe_free)) 4532 return MLX5_TXCMP_CODE_EXIT; 4533 /* Continue the loop with new eMPW session. */ 4534 } 4535 MLX5_ASSERT(false); 4536 } 4537 4538 /** 4539 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4540 * Data inlining and VLAN insertion are supported. 4541 */ 4542 static __rte_always_inline enum mlx5_txcmp_code 4543 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 4544 struct rte_mbuf **__rte_restrict pkts, 4545 unsigned int pkts_n, 4546 struct mlx5_txq_local *__rte_restrict loc, 4547 unsigned int olx) 4548 { 4549 /* 4550 * Subroutine is the part of mlx5_tx_burst_single() 4551 * and sends single-segment packet with SEND opcode. 4552 */ 4553 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4554 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4555 pkts += loc->pkts_sent + 1; 4556 pkts_n -= loc->pkts_sent; 4557 for (;;) { 4558 struct mlx5_wqe *__rte_restrict wqe; 4559 enum mlx5_txcmp_code ret; 4560 4561 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4562 if (MLX5_TXOFF_CONFIG(TXPP)) { 4563 enum mlx5_txcmp_code wret; 4564 4565 /* Generate WAIT for scheduling if requested. */ 4566 wret = mlx5_tx_schedule_send(txq, loc, olx); 4567 if (wret == MLX5_TXCMP_CODE_EXIT) 4568 return MLX5_TXCMP_CODE_EXIT; 4569 if (wret == MLX5_TXCMP_CODE_ERROR) 4570 return MLX5_TXCMP_CODE_ERROR; 4571 } 4572 if (MLX5_TXOFF_CONFIG(INLINE)) { 4573 unsigned int inlen, vlan = 0; 4574 4575 inlen = rte_pktmbuf_data_len(loc->mbuf); 4576 if (MLX5_TXOFF_CONFIG(VLAN) && 4577 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4578 vlan = sizeof(struct rte_vlan_hdr); 4579 inlen += vlan; 4580 } 4581 /* 4582 * If inlining is enabled at configuration time 4583 * the limit must be not less than minimal size. 4584 * Otherwise we would do extra check for data 4585 * size to avoid crashes due to length overflow. 4586 */ 4587 MLX5_ASSERT(txq->inlen_send >= 4588 MLX5_ESEG_MIN_INLINE_SIZE); 4589 if (inlen <= txq->inlen_send) { 4590 unsigned int seg_n, wqe_n; 4591 4592 rte_prefetch0(rte_pktmbuf_mtod 4593 (loc->mbuf, uint8_t *)); 4594 /* Check against minimal length. */ 4595 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4596 return MLX5_TXCMP_CODE_ERROR; 4597 if (loc->mbuf->ol_flags & 4598 PKT_TX_DYNF_NOINLINE) { 4599 /* 4600 * The hint flag not to inline packet 4601 * data is set. Check whether we can 4602 * follow the hint. 4603 */ 4604 if ((!MLX5_TXOFF_CONFIG(EMPW) && 4605 txq->inlen_mode) || 4606 (MLX5_TXOFF_CONFIG(MPW) && 4607 txq->inlen_mode)) { 4608 if (inlen <= txq->inlen_send) 4609 goto single_inline; 4610 /* 4611 * The hardware requires the 4612 * minimal inline data header. 4613 */ 4614 goto single_min_inline; 4615 } 4616 if (MLX5_TXOFF_CONFIG(VLAN) && 4617 vlan && !txq->vlan_en) { 4618 /* 4619 * We must insert VLAN tag 4620 * by software means. 4621 */ 4622 goto single_part_inline; 4623 } 4624 goto single_no_inline; 4625 } 4626 single_inline: 4627 /* 4628 * Completely inlined packet data WQE: 4629 * - Control Segment, SEND opcode 4630 * - Ethernet Segment, no VLAN insertion 4631 * - Data inlined, VLAN optionally inserted 4632 * - Alignment to MLX5_WSEG_SIZE 4633 * Have to estimate amount of WQEBBs 4634 */ 4635 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4636 MLX5_ESEG_MIN_INLINE_SIZE + 4637 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4638 /* Check if there are enough WQEBBs. */ 4639 wqe_n = (seg_n + 3) / 4; 4640 if (wqe_n > loc->wqe_free) 4641 return MLX5_TXCMP_CODE_EXIT; 4642 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4643 loc->wqe_last = wqe; 4644 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4645 MLX5_OPCODE_SEND, olx); 4646 mlx5_tx_eseg_data(txq, loc, wqe, 4647 vlan, inlen, 0, olx); 4648 txq->wqe_ci += wqe_n; 4649 loc->wqe_free -= wqe_n; 4650 /* 4651 * Packet data are completely inlined, 4652 * free the packet immediately. 4653 */ 4654 rte_pktmbuf_free_seg(loc->mbuf); 4655 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4656 MLX5_TXOFF_CONFIG(MPW)) && 4657 txq->inlen_mode) { 4658 /* 4659 * If minimal inlining is requested the eMPW 4660 * feature should be disabled due to data is 4661 * inlined into Ethernet Segment, which can 4662 * not contain inlined data for eMPW due to 4663 * segment shared for all packets. 4664 */ 4665 struct mlx5_wqe_dseg *__rte_restrict dseg; 4666 unsigned int ds; 4667 uint8_t *dptr; 4668 4669 /* 4670 * The inline-mode settings require 4671 * to inline the specified amount of 4672 * data bytes to the Ethernet Segment. 4673 * We should check the free space in 4674 * WQE ring buffer to inline partially. 4675 */ 4676 single_min_inline: 4677 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 4678 MLX5_ASSERT(inlen > txq->inlen_mode); 4679 MLX5_ASSERT(txq->inlen_mode >= 4680 MLX5_ESEG_MIN_INLINE_SIZE); 4681 /* 4682 * Check whether there are enough free WQEBBs: 4683 * - Control Segment 4684 * - Ethernet Segment 4685 * - First Segment of inlined Ethernet data 4686 * - ... data continued ... 4687 * - Finishing Data Segment of pointer type 4688 */ 4689 ds = (MLX5_WQE_CSEG_SIZE + 4690 MLX5_WQE_ESEG_SIZE + 4691 MLX5_WQE_DSEG_SIZE + 4692 txq->inlen_mode - 4693 MLX5_ESEG_MIN_INLINE_SIZE + 4694 MLX5_WQE_DSEG_SIZE + 4695 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4696 if (loc->wqe_free < ((ds + 3) / 4)) 4697 return MLX5_TXCMP_CODE_EXIT; 4698 /* 4699 * Build the ordinary SEND WQE: 4700 * - Control Segment 4701 * - Ethernet Segment, inline inlen_mode bytes 4702 * - Data Segment of pointer type 4703 */ 4704 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4705 loc->wqe_last = wqe; 4706 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4707 MLX5_OPCODE_SEND, olx); 4708 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4709 txq->inlen_mode, 4710 0, olx); 4711 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4712 txq->inlen_mode - vlan; 4713 inlen -= txq->inlen_mode; 4714 mlx5_tx_dseg_ptr(txq, loc, dseg, 4715 dptr, inlen, olx); 4716 /* 4717 * WQE is built, update the loop parameters 4718 * and got to the next packet. 4719 */ 4720 txq->wqe_ci += (ds + 3) / 4; 4721 loc->wqe_free -= (ds + 3) / 4; 4722 /* We have to store mbuf in elts.*/ 4723 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4724 txq->elts[txq->elts_head++ & txq->elts_m] = 4725 loc->mbuf; 4726 --loc->elts_free; 4727 } else { 4728 uint8_t *dptr; 4729 unsigned int dlen; 4730 4731 /* 4732 * Partially inlined packet data WQE, we have 4733 * some space in title WQEBB, we can fill it 4734 * with some packet data. It takes one WQEBB, 4735 * it is available, no extra space check: 4736 * - Control Segment, SEND opcode 4737 * - Ethernet Segment, no VLAN insertion 4738 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4739 * - Data Segment, pointer type 4740 * 4741 * We also get here if VLAN insertion is not 4742 * supported by HW, the inline is enabled. 4743 */ 4744 single_part_inline: 4745 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4746 loc->wqe_last = wqe; 4747 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4748 MLX5_OPCODE_SEND, olx); 4749 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4750 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4751 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4752 /* 4753 * The length check is performed above, by 4754 * comparing with txq->inlen_send. We should 4755 * not get overflow here. 4756 */ 4757 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4758 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4759 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4760 dptr, dlen, olx); 4761 ++txq->wqe_ci; 4762 --loc->wqe_free; 4763 /* We have to store mbuf in elts.*/ 4764 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4765 txq->elts[txq->elts_head++ & txq->elts_m] = 4766 loc->mbuf; 4767 --loc->elts_free; 4768 } 4769 #ifdef MLX5_PMD_SOFT_COUNTERS 4770 /* Update sent data bytes counter. */ 4771 txq->stats.obytes += vlan + 4772 rte_pktmbuf_data_len(loc->mbuf); 4773 #endif 4774 } else { 4775 /* 4776 * No inline at all, it means the CPU cycles saving 4777 * is prioritized at configuration, we should not 4778 * copy any packet data to WQE. 4779 * 4780 * SEND WQE, one WQEBB: 4781 * - Control Segment, SEND opcode 4782 * - Ethernet Segment, optional VLAN, no inline 4783 * - Data Segment, pointer type 4784 */ 4785 single_no_inline: 4786 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4787 loc->wqe_last = wqe; 4788 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4789 MLX5_OPCODE_SEND, olx); 4790 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4791 mlx5_tx_dseg_ptr 4792 (txq, loc, &wqe->dseg[0], 4793 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4794 rte_pktmbuf_data_len(loc->mbuf), olx); 4795 ++txq->wqe_ci; 4796 --loc->wqe_free; 4797 /* 4798 * We should not store mbuf pointer in elts 4799 * if no inlining is configured, this is done 4800 * by calling routine in a batch copy. 4801 */ 4802 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4803 --loc->elts_free; 4804 #ifdef MLX5_PMD_SOFT_COUNTERS 4805 /* Update sent data bytes counter. */ 4806 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4807 if (MLX5_TXOFF_CONFIG(VLAN) && 4808 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4809 txq->stats.obytes += 4810 sizeof(struct rte_vlan_hdr); 4811 #endif 4812 } 4813 ++loc->pkts_sent; 4814 --pkts_n; 4815 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4816 return MLX5_TXCMP_CODE_EXIT; 4817 loc->mbuf = *pkts++; 4818 if (pkts_n > 1) 4819 rte_prefetch0(*pkts); 4820 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4821 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4822 return ret; 4823 } 4824 MLX5_ASSERT(false); 4825 } 4826 4827 static __rte_always_inline enum mlx5_txcmp_code 4828 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 4829 struct rte_mbuf **__rte_restrict pkts, 4830 unsigned int pkts_n, 4831 struct mlx5_txq_local *__rte_restrict loc, 4832 unsigned int olx) 4833 { 4834 enum mlx5_txcmp_code ret; 4835 4836 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4837 if (ret == MLX5_TXCMP_CODE_SINGLE) 4838 goto ordinary_send; 4839 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 4840 for (;;) { 4841 /* Optimize for inline/no inline eMPW send. */ 4842 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4843 mlx5_tx_burst_empw_inline 4844 (txq, pkts, pkts_n, loc, olx) : 4845 mlx5_tx_burst_empw_simple 4846 (txq, pkts, pkts_n, loc, olx); 4847 if (ret != MLX5_TXCMP_CODE_SINGLE) 4848 return ret; 4849 /* The resources to send one packet should remain. */ 4850 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4851 ordinary_send: 4852 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4853 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 4854 if (ret != MLX5_TXCMP_CODE_EMPW) 4855 return ret; 4856 /* The resources to send one packet should remain. */ 4857 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4858 } 4859 } 4860 4861 /** 4862 * DPDK Tx callback template. This is configured template 4863 * used to generate routines optimized for specified offload setup. 4864 * One of this generated functions is chosen at SQ configuration 4865 * time. 4866 * 4867 * @param txq 4868 * Generic pointer to TX queue structure. 4869 * @param[in] pkts 4870 * Packets to transmit. 4871 * @param pkts_n 4872 * Number of packets in array. 4873 * @param olx 4874 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 4875 * values. Should be static to take compile time static configuration 4876 * advantages. 4877 * 4878 * @return 4879 * Number of packets successfully transmitted (<= pkts_n). 4880 */ 4881 static __rte_always_inline uint16_t 4882 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 4883 struct rte_mbuf **__rte_restrict pkts, 4884 uint16_t pkts_n, 4885 unsigned int olx) 4886 { 4887 struct mlx5_txq_local loc; 4888 enum mlx5_txcmp_code ret; 4889 unsigned int part; 4890 4891 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4892 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4893 if (unlikely(!pkts_n)) 4894 return 0; 4895 loc.pkts_sent = 0; 4896 loc.pkts_copy = 0; 4897 loc.wqe_last = NULL; 4898 4899 send_loop: 4900 loc.pkts_loop = loc.pkts_sent; 4901 /* 4902 * Check if there are some CQEs, if any: 4903 * - process an encountered errors 4904 * - process the completed WQEs 4905 * - free related mbufs 4906 * - doorbell the NIC about processed CQEs 4907 */ 4908 rte_prefetch0(*(pkts + loc.pkts_sent)); 4909 mlx5_tx_handle_completion(txq, olx); 4910 /* 4911 * Calculate the number of available resources - elts and WQEs. 4912 * There are two possible different scenarios: 4913 * - no data inlining into WQEs, one WQEBB may contains up to 4914 * four packets, in this case elts become scarce resource 4915 * - data inlining into WQEs, one packet may require multiple 4916 * WQEBBs, the WQEs become the limiting factor. 4917 */ 4918 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4919 loc.elts_free = txq->elts_s - 4920 (uint16_t)(txq->elts_head - txq->elts_tail); 4921 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4922 loc.wqe_free = txq->wqe_s - 4923 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 4924 if (unlikely(!loc.elts_free || !loc.wqe_free)) 4925 goto burst_exit; 4926 for (;;) { 4927 /* 4928 * Fetch the packet from array. Usually this is 4929 * the first packet in series of multi/single 4930 * segment packets. 4931 */ 4932 loc.mbuf = *(pkts + loc.pkts_sent); 4933 /* Dedicated branch for multi-segment packets. */ 4934 if (MLX5_TXOFF_CONFIG(MULTI) && 4935 unlikely(NB_SEGS(loc.mbuf) > 1)) { 4936 /* 4937 * Multi-segment packet encountered. 4938 * Hardware is able to process it only 4939 * with SEND/TSO opcodes, one packet 4940 * per WQE, do it in dedicated routine. 4941 */ 4942 enter_send_multi: 4943 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 4944 part = loc.pkts_sent - loc.pkts_copy; 4945 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4946 /* 4947 * There are some single-segment mbufs not 4948 * stored in elts. The mbufs must be in the 4949 * same order as WQEs, so we must copy the 4950 * mbufs to elts here, before the coming 4951 * multi-segment packet mbufs is appended. 4952 */ 4953 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 4954 part, olx); 4955 loc.pkts_copy = loc.pkts_sent; 4956 } 4957 MLX5_ASSERT(pkts_n > loc.pkts_sent); 4958 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 4959 if (!MLX5_TXOFF_CONFIG(INLINE)) 4960 loc.pkts_copy = loc.pkts_sent; 4961 /* 4962 * These returned code checks are supposed 4963 * to be optimized out due to routine inlining. 4964 */ 4965 if (ret == MLX5_TXCMP_CODE_EXIT) { 4966 /* 4967 * The routine returns this code when 4968 * all packets are sent or there is no 4969 * enough resources to complete request. 4970 */ 4971 break; 4972 } 4973 if (ret == MLX5_TXCMP_CODE_ERROR) { 4974 /* 4975 * The routine returns this code when 4976 * some error in the incoming packets 4977 * format occurred. 4978 */ 4979 txq->stats.oerrors++; 4980 break; 4981 } 4982 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4983 /* 4984 * The single-segment packet was encountered 4985 * in the array, try to send it with the 4986 * best optimized way, possible engaging eMPW. 4987 */ 4988 goto enter_send_single; 4989 } 4990 if (MLX5_TXOFF_CONFIG(TSO) && 4991 ret == MLX5_TXCMP_CODE_TSO) { 4992 /* 4993 * The single-segment TSO packet was 4994 * encountered in the array. 4995 */ 4996 goto enter_send_tso; 4997 } 4998 /* We must not get here. Something is going wrong. */ 4999 MLX5_ASSERT(false); 5000 txq->stats.oerrors++; 5001 break; 5002 } 5003 /* Dedicated branch for single-segment TSO packets. */ 5004 if (MLX5_TXOFF_CONFIG(TSO) && 5005 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 5006 /* 5007 * TSO might require special way for inlining 5008 * (dedicated parameters) and is sent with 5009 * MLX5_OPCODE_TSO opcode only, provide this 5010 * in dedicated branch. 5011 */ 5012 enter_send_tso: 5013 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 5014 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5015 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 5016 /* 5017 * These returned code checks are supposed 5018 * to be optimized out due to routine inlining. 5019 */ 5020 if (ret == MLX5_TXCMP_CODE_EXIT) 5021 break; 5022 if (ret == MLX5_TXCMP_CODE_ERROR) { 5023 txq->stats.oerrors++; 5024 break; 5025 } 5026 if (ret == MLX5_TXCMP_CODE_SINGLE) 5027 goto enter_send_single; 5028 if (MLX5_TXOFF_CONFIG(MULTI) && 5029 ret == MLX5_TXCMP_CODE_MULTI) { 5030 /* 5031 * The multi-segment packet was 5032 * encountered in the array. 5033 */ 5034 goto enter_send_multi; 5035 } 5036 /* We must not get here. Something is going wrong. */ 5037 MLX5_ASSERT(false); 5038 txq->stats.oerrors++; 5039 break; 5040 } 5041 /* 5042 * The dedicated branch for the single-segment packets 5043 * without TSO. Often these ones can be sent using 5044 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 5045 * The routine builds the WQEs till it encounters 5046 * the TSO or multi-segment packet (in case if these 5047 * offloads are requested at SQ configuration time). 5048 */ 5049 enter_send_single: 5050 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5051 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 5052 /* 5053 * These returned code checks are supposed 5054 * to be optimized out due to routine inlining. 5055 */ 5056 if (ret == MLX5_TXCMP_CODE_EXIT) 5057 break; 5058 if (ret == MLX5_TXCMP_CODE_ERROR) { 5059 txq->stats.oerrors++; 5060 break; 5061 } 5062 if (MLX5_TXOFF_CONFIG(MULTI) && 5063 ret == MLX5_TXCMP_CODE_MULTI) { 5064 /* 5065 * The multi-segment packet was 5066 * encountered in the array. 5067 */ 5068 goto enter_send_multi; 5069 } 5070 if (MLX5_TXOFF_CONFIG(TSO) && 5071 ret == MLX5_TXCMP_CODE_TSO) { 5072 /* 5073 * The single-segment TSO packet was 5074 * encountered in the array. 5075 */ 5076 goto enter_send_tso; 5077 } 5078 /* We must not get here. Something is going wrong. */ 5079 MLX5_ASSERT(false); 5080 txq->stats.oerrors++; 5081 break; 5082 } 5083 /* 5084 * Main Tx loop is completed, do the rest: 5085 * - set completion request if thresholds are reached 5086 * - doorbell the hardware 5087 * - copy the rest of mbufs to elts (if any) 5088 */ 5089 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 5090 loc.pkts_sent >= loc.pkts_copy); 5091 /* Take a shortcut if nothing is sent. */ 5092 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 5093 goto burst_exit; 5094 /* Request CQE generation if limits are reached. */ 5095 mlx5_tx_request_completion(txq, &loc, olx); 5096 /* 5097 * Ring QP doorbell immediately after WQE building completion 5098 * to improve latencies. The pure software related data treatment 5099 * can be completed after doorbell. Tx CQEs for this SQ are 5100 * processed in this thread only by the polling. 5101 * 5102 * The rdma core library can map doorbell register in two ways, 5103 * depending on the environment variable "MLX5_SHUT_UP_BF": 5104 * 5105 * - as regular cached memory, the variable is either missing or 5106 * set to zero. This type of mapping may cause the significant 5107 * doorbell register writing latency and requires explicit 5108 * memory write barrier to mitigate this issue and prevent 5109 * write combining. 5110 * 5111 * - as non-cached memory, the variable is present and set to 5112 * not "0" value. This type of mapping may cause performance 5113 * impact under heavy loading conditions but the explicit write 5114 * memory barrier is not required and it may improve core 5115 * performance. 5116 * 5117 * - the legacy behaviour (prior 19.08 release) was to use some 5118 * heuristics to decide whether write memory barrier should 5119 * be performed. This behavior is supported with specifying 5120 * tx_db_nc=2, write barrier is skipped if application 5121 * provides the full recommended burst of packets, it 5122 * supposes the next packets are coming and the write barrier 5123 * will be issued on the next burst (after descriptor writing, 5124 * at least). 5125 */ 5126 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 5127 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 5128 /* Not all of the mbufs may be stored into elts yet. */ 5129 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 5130 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5131 /* 5132 * There are some single-segment mbufs not stored in elts. 5133 * It can be only if the last packet was single-segment. 5134 * The copying is gathered into one place due to it is 5135 * a good opportunity to optimize that with SIMD. 5136 * Unfortunately if inlining is enabled the gaps in 5137 * pointer array may happen due to early freeing of the 5138 * inlined mbufs. 5139 */ 5140 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 5141 loc.pkts_copy = loc.pkts_sent; 5142 } 5143 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5144 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5145 if (pkts_n > loc.pkts_sent) { 5146 /* 5147 * If burst size is large there might be no enough CQE 5148 * fetched from completion queue and no enough resources 5149 * freed to send all the packets. 5150 */ 5151 goto send_loop; 5152 } 5153 burst_exit: 5154 #ifdef MLX5_PMD_SOFT_COUNTERS 5155 /* Increment sent packets counter. */ 5156 txq->stats.opackets += loc.pkts_sent; 5157 #endif 5158 return loc.pkts_sent; 5159 } 5160 5161 /* Generate routines with Enhanced Multi-Packet Write support. */ 5162 MLX5_TXOFF_DECL(full_empw, 5163 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 5164 5165 MLX5_TXOFF_DECL(none_empw, 5166 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5167 5168 MLX5_TXOFF_DECL(md_empw, 5169 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5170 5171 MLX5_TXOFF_DECL(mt_empw, 5172 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5173 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5174 5175 MLX5_TXOFF_DECL(mtsc_empw, 5176 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5177 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5178 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5179 5180 MLX5_TXOFF_DECL(mti_empw, 5181 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5182 MLX5_TXOFF_CONFIG_INLINE | 5183 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5184 5185 MLX5_TXOFF_DECL(mtv_empw, 5186 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5187 MLX5_TXOFF_CONFIG_VLAN | 5188 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5189 5190 MLX5_TXOFF_DECL(mtiv_empw, 5191 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5192 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5193 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5194 5195 MLX5_TXOFF_DECL(sc_empw, 5196 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5197 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5198 5199 MLX5_TXOFF_DECL(sci_empw, 5200 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5201 MLX5_TXOFF_CONFIG_INLINE | 5202 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5203 5204 MLX5_TXOFF_DECL(scv_empw, 5205 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5206 MLX5_TXOFF_CONFIG_VLAN | 5207 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5208 5209 MLX5_TXOFF_DECL(sciv_empw, 5210 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5211 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5212 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5213 5214 MLX5_TXOFF_DECL(i_empw, 5215 MLX5_TXOFF_CONFIG_INLINE | 5216 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5217 5218 MLX5_TXOFF_DECL(v_empw, 5219 MLX5_TXOFF_CONFIG_VLAN | 5220 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5221 5222 MLX5_TXOFF_DECL(iv_empw, 5223 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5224 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5225 5226 /* Generate routines without Enhanced Multi-Packet Write support. */ 5227 MLX5_TXOFF_DECL(full, 5228 MLX5_TXOFF_CONFIG_FULL) 5229 5230 MLX5_TXOFF_DECL(none, 5231 MLX5_TXOFF_CONFIG_NONE) 5232 5233 MLX5_TXOFF_DECL(md, 5234 MLX5_TXOFF_CONFIG_METADATA) 5235 5236 MLX5_TXOFF_DECL(mt, 5237 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5238 MLX5_TXOFF_CONFIG_METADATA) 5239 5240 MLX5_TXOFF_DECL(mtsc, 5241 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5242 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5243 MLX5_TXOFF_CONFIG_METADATA) 5244 5245 MLX5_TXOFF_DECL(mti, 5246 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5247 MLX5_TXOFF_CONFIG_INLINE | 5248 MLX5_TXOFF_CONFIG_METADATA) 5249 5250 5251 MLX5_TXOFF_DECL(mtv, 5252 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5253 MLX5_TXOFF_CONFIG_VLAN | 5254 MLX5_TXOFF_CONFIG_METADATA) 5255 5256 5257 MLX5_TXOFF_DECL(mtiv, 5258 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5259 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5260 MLX5_TXOFF_CONFIG_METADATA) 5261 5262 MLX5_TXOFF_DECL(sc, 5263 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5264 MLX5_TXOFF_CONFIG_METADATA) 5265 5266 MLX5_TXOFF_DECL(sci, 5267 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5268 MLX5_TXOFF_CONFIG_INLINE | 5269 MLX5_TXOFF_CONFIG_METADATA) 5270 5271 5272 MLX5_TXOFF_DECL(scv, 5273 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5274 MLX5_TXOFF_CONFIG_VLAN | 5275 MLX5_TXOFF_CONFIG_METADATA) 5276 5277 5278 MLX5_TXOFF_DECL(sciv, 5279 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5280 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5281 MLX5_TXOFF_CONFIG_METADATA) 5282 5283 MLX5_TXOFF_DECL(i, 5284 MLX5_TXOFF_CONFIG_INLINE | 5285 MLX5_TXOFF_CONFIG_METADATA) 5286 5287 MLX5_TXOFF_DECL(v, 5288 MLX5_TXOFF_CONFIG_VLAN | 5289 MLX5_TXOFF_CONFIG_METADATA) 5290 5291 MLX5_TXOFF_DECL(iv, 5292 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5293 MLX5_TXOFF_CONFIG_METADATA) 5294 5295 /* Generate routines with timestamp scheduling. */ 5296 MLX5_TXOFF_DECL(full_ts_nompw, 5297 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5298 5299 MLX5_TXOFF_DECL(full_ts_nompwi, 5300 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5301 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5302 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5303 MLX5_TXOFF_CONFIG_TXPP) 5304 5305 MLX5_TXOFF_DECL(full_ts, 5306 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5307 MLX5_TXOFF_CONFIG_EMPW) 5308 5309 MLX5_TXOFF_DECL(full_ts_noi, 5310 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5311 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5312 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5313 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5314 5315 MLX5_TXOFF_DECL(none_ts, 5316 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5317 MLX5_TXOFF_CONFIG_EMPW) 5318 5319 MLX5_TXOFF_DECL(mdi_ts, 5320 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5321 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5322 5323 MLX5_TXOFF_DECL(mti_ts, 5324 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5325 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5326 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5327 5328 MLX5_TXOFF_DECL(mtiv_ts, 5329 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5330 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5331 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5332 MLX5_TXOFF_CONFIG_EMPW) 5333 5334 /* 5335 * Generate routines with Legacy Multi-Packet Write support. 5336 * This mode is supported by ConnectX-4 Lx only and imposes 5337 * offload limitations, not supported: 5338 * - ACL/Flows (metadata are becoming meaningless) 5339 * - WQE Inline headers 5340 * - SRIOV (E-Switch offloads) 5341 * - VLAN insertion 5342 * - tunnel encapsulation/decapsulation 5343 * - TSO 5344 */ 5345 MLX5_TXOFF_DECL(none_mpw, 5346 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5347 MLX5_TXOFF_CONFIG_MPW) 5348 5349 MLX5_TXOFF_DECL(mci_mpw, 5350 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5351 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5352 MLX5_TXOFF_CONFIG_MPW) 5353 5354 MLX5_TXOFF_DECL(mc_mpw, 5355 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5356 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5357 5358 MLX5_TXOFF_DECL(i_mpw, 5359 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5360 MLX5_TXOFF_CONFIG_MPW) 5361 5362 /* 5363 * Array of declared and compiled Tx burst function and corresponding 5364 * supported offloads set. The array is used to select the Tx burst 5365 * function for specified offloads set at Tx queue configuration time. 5366 */ 5367 const struct { 5368 eth_tx_burst_t func; 5369 unsigned int olx; 5370 } txoff_func[] = { 5371 MLX5_TXOFF_INFO(full_empw, 5372 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5373 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5374 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5375 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5376 5377 MLX5_TXOFF_INFO(none_empw, 5378 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5379 5380 MLX5_TXOFF_INFO(md_empw, 5381 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5382 5383 MLX5_TXOFF_INFO(mt_empw, 5384 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5385 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5386 5387 MLX5_TXOFF_INFO(mtsc_empw, 5388 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5389 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5390 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5391 5392 MLX5_TXOFF_INFO(mti_empw, 5393 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5394 MLX5_TXOFF_CONFIG_INLINE | 5395 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5396 5397 MLX5_TXOFF_INFO(mtv_empw, 5398 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5399 MLX5_TXOFF_CONFIG_VLAN | 5400 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5401 5402 MLX5_TXOFF_INFO(mtiv_empw, 5403 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5404 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5405 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5406 5407 MLX5_TXOFF_INFO(sc_empw, 5408 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5409 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5410 5411 MLX5_TXOFF_INFO(sci_empw, 5412 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5413 MLX5_TXOFF_CONFIG_INLINE | 5414 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5415 5416 MLX5_TXOFF_INFO(scv_empw, 5417 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5418 MLX5_TXOFF_CONFIG_VLAN | 5419 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5420 5421 MLX5_TXOFF_INFO(sciv_empw, 5422 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5423 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5424 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5425 5426 MLX5_TXOFF_INFO(i_empw, 5427 MLX5_TXOFF_CONFIG_INLINE | 5428 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5429 5430 MLX5_TXOFF_INFO(v_empw, 5431 MLX5_TXOFF_CONFIG_VLAN | 5432 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5433 5434 MLX5_TXOFF_INFO(iv_empw, 5435 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5436 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5437 5438 MLX5_TXOFF_INFO(full_ts_nompw, 5439 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5440 5441 MLX5_TXOFF_INFO(full_ts_nompwi, 5442 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5443 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5444 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5445 MLX5_TXOFF_CONFIG_TXPP) 5446 5447 MLX5_TXOFF_INFO(full_ts, 5448 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5449 MLX5_TXOFF_CONFIG_EMPW) 5450 5451 MLX5_TXOFF_INFO(full_ts_noi, 5452 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5453 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5454 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5455 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5456 5457 MLX5_TXOFF_INFO(none_ts, 5458 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5459 MLX5_TXOFF_CONFIG_EMPW) 5460 5461 MLX5_TXOFF_INFO(mdi_ts, 5462 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5463 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5464 5465 MLX5_TXOFF_INFO(mti_ts, 5466 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5467 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5468 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5469 5470 MLX5_TXOFF_INFO(mtiv_ts, 5471 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5472 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5473 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5474 MLX5_TXOFF_CONFIG_EMPW) 5475 5476 MLX5_TXOFF_INFO(full, 5477 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5478 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5479 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5480 MLX5_TXOFF_CONFIG_METADATA) 5481 5482 MLX5_TXOFF_INFO(none, 5483 MLX5_TXOFF_CONFIG_NONE) 5484 5485 MLX5_TXOFF_INFO(md, 5486 MLX5_TXOFF_CONFIG_METADATA) 5487 5488 MLX5_TXOFF_INFO(mt, 5489 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5490 MLX5_TXOFF_CONFIG_METADATA) 5491 5492 MLX5_TXOFF_INFO(mtsc, 5493 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5494 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5495 MLX5_TXOFF_CONFIG_METADATA) 5496 5497 MLX5_TXOFF_INFO(mti, 5498 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5499 MLX5_TXOFF_CONFIG_INLINE | 5500 MLX5_TXOFF_CONFIG_METADATA) 5501 5502 MLX5_TXOFF_INFO(mtv, 5503 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5504 MLX5_TXOFF_CONFIG_VLAN | 5505 MLX5_TXOFF_CONFIG_METADATA) 5506 5507 MLX5_TXOFF_INFO(mtiv, 5508 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5509 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5510 MLX5_TXOFF_CONFIG_METADATA) 5511 5512 MLX5_TXOFF_INFO(sc, 5513 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5514 MLX5_TXOFF_CONFIG_METADATA) 5515 5516 MLX5_TXOFF_INFO(sci, 5517 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5518 MLX5_TXOFF_CONFIG_INLINE | 5519 MLX5_TXOFF_CONFIG_METADATA) 5520 5521 MLX5_TXOFF_INFO(scv, 5522 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5523 MLX5_TXOFF_CONFIG_VLAN | 5524 MLX5_TXOFF_CONFIG_METADATA) 5525 5526 MLX5_TXOFF_INFO(sciv, 5527 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5528 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5529 MLX5_TXOFF_CONFIG_METADATA) 5530 5531 MLX5_TXOFF_INFO(i, 5532 MLX5_TXOFF_CONFIG_INLINE | 5533 MLX5_TXOFF_CONFIG_METADATA) 5534 5535 MLX5_TXOFF_INFO(v, 5536 MLX5_TXOFF_CONFIG_VLAN | 5537 MLX5_TXOFF_CONFIG_METADATA) 5538 5539 MLX5_TXOFF_INFO(iv, 5540 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5541 MLX5_TXOFF_CONFIG_METADATA) 5542 5543 MLX5_TXOFF_INFO(none_mpw, 5544 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5545 MLX5_TXOFF_CONFIG_MPW) 5546 5547 MLX5_TXOFF_INFO(mci_mpw, 5548 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5549 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5550 MLX5_TXOFF_CONFIG_MPW) 5551 5552 MLX5_TXOFF_INFO(mc_mpw, 5553 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5554 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5555 5556 MLX5_TXOFF_INFO(i_mpw, 5557 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5558 MLX5_TXOFF_CONFIG_MPW) 5559 }; 5560 5561 /** 5562 * Configure the Tx function to use. The routine checks configured 5563 * Tx offloads for the device and selects appropriate Tx burst 5564 * routine. There are multiple Tx burst routines compiled from 5565 * the same template in the most optimal way for the dedicated 5566 * Tx offloads set. 5567 * 5568 * @param dev 5569 * Pointer to private data structure. 5570 * 5571 * @return 5572 * Pointer to selected Tx burst function. 5573 */ 5574 eth_tx_burst_t 5575 mlx5_select_tx_function(struct rte_eth_dev *dev) 5576 { 5577 struct mlx5_priv *priv = dev->data->dev_private; 5578 struct mlx5_dev_config *config = &priv->config; 5579 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5580 unsigned int diff = 0, olx = 0, i, m; 5581 5582 MLX5_ASSERT(priv); 5583 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5584 /* We should support Multi-Segment Packets. */ 5585 olx |= MLX5_TXOFF_CONFIG_MULTI; 5586 } 5587 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5588 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5589 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5590 DEV_TX_OFFLOAD_IP_TNL_TSO | 5591 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5592 /* We should support TCP Send Offload. */ 5593 olx |= MLX5_TXOFF_CONFIG_TSO; 5594 } 5595 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5596 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5597 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5598 /* We should support Software Parser for Tunnels. */ 5599 olx |= MLX5_TXOFF_CONFIG_SWP; 5600 } 5601 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5602 DEV_TX_OFFLOAD_UDP_CKSUM | 5603 DEV_TX_OFFLOAD_TCP_CKSUM | 5604 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5605 /* We should support IP/TCP/UDP Checksums. */ 5606 olx |= MLX5_TXOFF_CONFIG_CSUM; 5607 } 5608 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5609 /* We should support VLAN insertion. */ 5610 olx |= MLX5_TXOFF_CONFIG_VLAN; 5611 } 5612 if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && 5613 rte_mbuf_dynflag_lookup 5614 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 && 5615 rte_mbuf_dynfield_lookup 5616 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) { 5617 /* Offload configured, dynamic entities registered. */ 5618 olx |= MLX5_TXOFF_CONFIG_TXPP; 5619 } 5620 if (priv->txqs_n && (*priv->txqs)[0]) { 5621 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5622 5623 if (txd->inlen_send) { 5624 /* 5625 * Check the data inline requirements. Data inline 5626 * is enabled on per device basis, we can check 5627 * the first Tx queue only. 5628 * 5629 * If device does not support VLAN insertion in WQE 5630 * and some queues are requested to perform VLAN 5631 * insertion offload than inline must be enabled. 5632 */ 5633 olx |= MLX5_TXOFF_CONFIG_INLINE; 5634 } 5635 } 5636 if (config->mps == MLX5_MPW_ENHANCED && 5637 config->txq_inline_min <= 0) { 5638 /* 5639 * The NIC supports Enhanced Multi-Packet Write 5640 * and does not require minimal inline data. 5641 */ 5642 olx |= MLX5_TXOFF_CONFIG_EMPW; 5643 } 5644 if (rte_flow_dynf_metadata_avail()) { 5645 /* We should support Flow metadata. */ 5646 olx |= MLX5_TXOFF_CONFIG_METADATA; 5647 } 5648 if (config->mps == MLX5_MPW) { 5649 /* 5650 * The NIC supports Legacy Multi-Packet Write. 5651 * The MLX5_TXOFF_CONFIG_MPW controls the 5652 * descriptor building method in combination 5653 * with MLX5_TXOFF_CONFIG_EMPW. 5654 */ 5655 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5656 MLX5_TXOFF_CONFIG_SWP | 5657 MLX5_TXOFF_CONFIG_VLAN | 5658 MLX5_TXOFF_CONFIG_METADATA))) 5659 olx |= MLX5_TXOFF_CONFIG_EMPW | 5660 MLX5_TXOFF_CONFIG_MPW; 5661 } 5662 /* 5663 * Scan the routines table to find the minimal 5664 * satisfying routine with requested offloads. 5665 */ 5666 m = RTE_DIM(txoff_func); 5667 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5668 unsigned int tmp; 5669 5670 tmp = txoff_func[i].olx; 5671 if (tmp == olx) { 5672 /* Meets requested offloads exactly.*/ 5673 m = i; 5674 break; 5675 } 5676 if ((tmp & olx) != olx) { 5677 /* Does not meet requested offloads at all. */ 5678 continue; 5679 } 5680 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) 5681 /* Do not enable legacy MPW if not configured. */ 5682 continue; 5683 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5684 /* Do not enable eMPW if not configured. */ 5685 continue; 5686 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5687 /* Do not enable inlining if not configured. */ 5688 continue; 5689 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) 5690 /* Do not enable scheduling if not configured. */ 5691 continue; 5692 /* 5693 * Some routine meets the requirements. 5694 * Check whether it has minimal amount 5695 * of not requested offloads. 5696 */ 5697 tmp = __builtin_popcountl(tmp & ~olx); 5698 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5699 /* First or better match, save and continue. */ 5700 m = i; 5701 diff = tmp; 5702 continue; 5703 } 5704 if (tmp == diff) { 5705 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5706 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5707 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5708 /* Lighter not requested offload. */ 5709 m = i; 5710 } 5711 } 5712 } 5713 if (m >= RTE_DIM(txoff_func)) { 5714 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5715 " for requested offloads %04X", 5716 dev->data->port_id, olx); 5717 return NULL; 5718 } 5719 DRV_LOG(DEBUG, "port %u has selected Tx function" 5720 " supporting offloads %04X/%04X", 5721 dev->data->port_id, olx, txoff_func[m].olx); 5722 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5723 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5724 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5725 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5726 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5727 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5728 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5729 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5730 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5731 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5732 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5733 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5734 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5735 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5736 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) 5737 DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); 5738 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5739 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5740 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5741 else 5742 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5743 } 5744 return txoff_func[m].func; 5745 } 5746 5747 /** 5748 * DPDK callback to get the TX queue information 5749 * 5750 * @param dev 5751 * Pointer to the device structure. 5752 * 5753 * @param tx_queue_id 5754 * Tx queue identificator. 5755 * 5756 * @param qinfo 5757 * Pointer to the TX queue information structure. 5758 * 5759 * @return 5760 * None. 5761 */ 5762 5763 void 5764 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 5765 struct rte_eth_txq_info *qinfo) 5766 { 5767 struct mlx5_priv *priv = dev->data->dev_private; 5768 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5769 struct mlx5_txq_ctrl *txq_ctrl = 5770 container_of(txq, struct mlx5_txq_ctrl, txq); 5771 5772 if (!txq) 5773 return; 5774 qinfo->nb_desc = txq->elts_s; 5775 qinfo->conf.tx_thresh.pthresh = 0; 5776 qinfo->conf.tx_thresh.hthresh = 0; 5777 qinfo->conf.tx_thresh.wthresh = 0; 5778 qinfo->conf.tx_rs_thresh = 0; 5779 qinfo->conf.tx_free_thresh = 0; 5780 qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; 5781 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 5782 } 5783 5784 /** 5785 * DPDK callback to get the TX packet burst mode information 5786 * 5787 * @param dev 5788 * Pointer to the device structure. 5789 * 5790 * @param tx_queue_id 5791 * Tx queue identificatior. 5792 * 5793 * @param mode 5794 * Pointer to the burts mode information. 5795 * 5796 * @return 5797 * 0 as success, -EINVAL as failure. 5798 */ 5799 5800 int 5801 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, 5802 uint16_t tx_queue_id __rte_unused, 5803 struct rte_eth_burst_mode *mode) 5804 { 5805 eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; 5806 unsigned int i, olx; 5807 5808 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5809 if (pkt_burst == txoff_func[i].func) { 5810 olx = txoff_func[i].olx; 5811 snprintf(mode->info, sizeof(mode->info), 5812 "%s%s%s%s%s%s%s%s%s", 5813 (olx & MLX5_TXOFF_CONFIG_EMPW) ? 5814 ((olx & MLX5_TXOFF_CONFIG_MPW) ? 5815 "Legacy MPW" : "Enhanced MPW") : "No MPW", 5816 (olx & MLX5_TXOFF_CONFIG_MULTI) ? 5817 " + MULTI" : "", 5818 (olx & MLX5_TXOFF_CONFIG_TSO) ? 5819 " + TSO" : "", 5820 (olx & MLX5_TXOFF_CONFIG_SWP) ? 5821 " + SWP" : "", 5822 (olx & MLX5_TXOFF_CONFIG_CSUM) ? 5823 " + CSUM" : "", 5824 (olx & MLX5_TXOFF_CONFIG_INLINE) ? 5825 " + INLINE" : "", 5826 (olx & MLX5_TXOFF_CONFIG_VLAN) ? 5827 " + VLAN" : "", 5828 (olx & MLX5_TXOFF_CONFIG_METADATA) ? 5829 " + METADATA" : "", 5830 (olx & MLX5_TXOFF_CONFIG_TXPP) ? 5831 " + TXPP" : ""); 5832 return 0; 5833 } 5834 } 5835 return -EINVAL; 5836 } 5837