1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 22 #include "mlx5_defs.h" 23 #include "mlx5.h" 24 #include "mlx5_mr.h" 25 #include "mlx5_utils.h" 26 #include "mlx5_rxtx.h" 27 #include "mlx5_autoconf.h" 28 29 /* TX burst subroutines return codes. */ 30 enum mlx5_txcmp_code { 31 MLX5_TXCMP_CODE_EXIT = 0, 32 MLX5_TXCMP_CODE_ERROR, 33 MLX5_TXCMP_CODE_SINGLE, 34 MLX5_TXCMP_CODE_MULTI, 35 MLX5_TXCMP_CODE_TSO, 36 MLX5_TXCMP_CODE_EMPW, 37 }; 38 39 /* 40 * These defines are used to configure Tx burst routine option set 41 * supported at compile time. The not specified options are optimized out 42 * out due to if conditions can be explicitly calculated at compile time. 43 * The offloads with bigger runtime check (require more CPU cycles to 44 * skip) overhead should have the bigger index - this is needed to 45 * select the better matching routine function if no exact match and 46 * some offloads are not actually requested. 47 */ 48 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 49 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 50 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 51 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 52 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 53 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 54 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 55 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 56 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 57 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 58 59 /* The most common offloads groups. */ 60 #define MLX5_TXOFF_CONFIG_NONE 0 61 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 62 MLX5_TXOFF_CONFIG_TSO | \ 63 MLX5_TXOFF_CONFIG_SWP | \ 64 MLX5_TXOFF_CONFIG_CSUM | \ 65 MLX5_TXOFF_CONFIG_INLINE | \ 66 MLX5_TXOFF_CONFIG_VLAN | \ 67 MLX5_TXOFF_CONFIG_METADATA) 68 69 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 70 71 #define MLX5_TXOFF_DECL(func, olx) \ 72 static uint16_t mlx5_tx_burst_##func(void *txq, \ 73 struct rte_mbuf **pkts, \ 74 uint16_t pkts_n) \ 75 { \ 76 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 77 pkts, pkts_n, (olx)); \ 78 } 79 80 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 81 82 static __rte_always_inline uint32_t 83 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 84 85 static __rte_always_inline int 86 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 87 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 88 89 static __rte_always_inline uint32_t 90 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 91 92 static __rte_always_inline void 93 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 94 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); 95 96 static __rte_always_inline void 97 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 98 const unsigned int strd_n); 99 100 static int 101 mlx5_queue_state_modify(struct rte_eth_dev *dev, 102 struct mlx5_mp_arg_queue_state_modify *sm); 103 104 static inline void 105 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 106 volatile struct mlx5_cqe *__rte_restrict cqe, 107 uint32_t phcsum); 108 109 static inline void 110 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 111 volatile struct mlx5_cqe *__rte_restrict cqe, 112 uint32_t len); 113 114 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 115 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 116 }; 117 118 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 119 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 120 121 uint64_t rte_net_mlx5_dynf_inline_mask; 122 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 123 124 /** 125 * Build a table to translate Rx completion flags to packet type. 126 * 127 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 128 */ 129 void 130 mlx5_set_ptype_table(void) 131 { 132 unsigned int i; 133 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 134 135 /* Last entry must not be overwritten, reserved for errored packet. */ 136 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 137 (*p)[i] = RTE_PTYPE_UNKNOWN; 138 /* 139 * The index to the array should have: 140 * bit[1:0] = l3_hdr_type 141 * bit[4:2] = l4_hdr_type 142 * bit[5] = ip_frag 143 * bit[6] = tunneled 144 * bit[7] = outer_l3_type 145 */ 146 /* L2 */ 147 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 148 /* L3 */ 149 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 150 RTE_PTYPE_L4_NONFRAG; 151 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 152 RTE_PTYPE_L4_NONFRAG; 153 /* Fragmented */ 154 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 155 RTE_PTYPE_L4_FRAG; 156 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 157 RTE_PTYPE_L4_FRAG; 158 /* TCP */ 159 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 160 RTE_PTYPE_L4_TCP; 161 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 162 RTE_PTYPE_L4_TCP; 163 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 164 RTE_PTYPE_L4_TCP; 165 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 166 RTE_PTYPE_L4_TCP; 167 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 168 RTE_PTYPE_L4_TCP; 169 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 170 RTE_PTYPE_L4_TCP; 171 /* UDP */ 172 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 173 RTE_PTYPE_L4_UDP; 174 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 175 RTE_PTYPE_L4_UDP; 176 /* Repeat with outer_l3_type being set. Just in case. */ 177 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 178 RTE_PTYPE_L4_NONFRAG; 179 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 180 RTE_PTYPE_L4_NONFRAG; 181 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 182 RTE_PTYPE_L4_FRAG; 183 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 184 RTE_PTYPE_L4_FRAG; 185 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 186 RTE_PTYPE_L4_TCP; 187 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 188 RTE_PTYPE_L4_TCP; 189 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 190 RTE_PTYPE_L4_TCP; 191 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 192 RTE_PTYPE_L4_TCP; 193 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 194 RTE_PTYPE_L4_TCP; 195 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 196 RTE_PTYPE_L4_TCP; 197 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 198 RTE_PTYPE_L4_UDP; 199 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 200 RTE_PTYPE_L4_UDP; 201 /* Tunneled - L3 */ 202 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 203 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 204 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 205 RTE_PTYPE_INNER_L4_NONFRAG; 206 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 207 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 208 RTE_PTYPE_INNER_L4_NONFRAG; 209 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 210 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 211 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 212 RTE_PTYPE_INNER_L4_NONFRAG; 213 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 214 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 215 RTE_PTYPE_INNER_L4_NONFRAG; 216 /* Tunneled - Fragmented */ 217 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 218 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 219 RTE_PTYPE_INNER_L4_FRAG; 220 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 221 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 222 RTE_PTYPE_INNER_L4_FRAG; 223 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 224 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 225 RTE_PTYPE_INNER_L4_FRAG; 226 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 227 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 228 RTE_PTYPE_INNER_L4_FRAG; 229 /* Tunneled - TCP */ 230 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 231 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 232 RTE_PTYPE_INNER_L4_TCP; 233 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 234 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 235 RTE_PTYPE_INNER_L4_TCP; 236 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 237 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 238 RTE_PTYPE_INNER_L4_TCP; 239 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 240 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 241 RTE_PTYPE_INNER_L4_TCP; 242 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 243 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 244 RTE_PTYPE_INNER_L4_TCP; 245 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 246 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 247 RTE_PTYPE_INNER_L4_TCP; 248 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 249 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 250 RTE_PTYPE_INNER_L4_TCP; 251 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 252 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 253 RTE_PTYPE_INNER_L4_TCP; 254 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 255 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 256 RTE_PTYPE_INNER_L4_TCP; 257 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 258 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 259 RTE_PTYPE_INNER_L4_TCP; 260 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 262 RTE_PTYPE_INNER_L4_TCP; 263 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 265 RTE_PTYPE_INNER_L4_TCP; 266 /* Tunneled - UDP */ 267 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 268 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 269 RTE_PTYPE_INNER_L4_UDP; 270 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 272 RTE_PTYPE_INNER_L4_UDP; 273 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 275 RTE_PTYPE_INNER_L4_UDP; 276 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 278 RTE_PTYPE_INNER_L4_UDP; 279 } 280 281 /** 282 * Build a table to translate packet to checksum type of Verbs. 283 */ 284 void 285 mlx5_set_cksum_table(void) 286 { 287 unsigned int i; 288 uint8_t v; 289 290 /* 291 * The index should have: 292 * bit[0] = PKT_TX_TCP_SEG 293 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 294 * bit[4] = PKT_TX_IP_CKSUM 295 * bit[8] = PKT_TX_OUTER_IP_CKSUM 296 * bit[9] = tunnel 297 */ 298 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 299 v = 0; 300 if (i & (1 << 9)) { 301 /* Tunneled packet. */ 302 if (i & (1 << 8)) /* Outer IP. */ 303 v |= MLX5_ETH_WQE_L3_CSUM; 304 if (i & (1 << 4)) /* Inner IP. */ 305 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 306 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 307 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 308 } else { 309 /* No tunnel. */ 310 if (i & (1 << 4)) /* IP. */ 311 v |= MLX5_ETH_WQE_L3_CSUM; 312 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 313 v |= MLX5_ETH_WQE_L4_CSUM; 314 } 315 mlx5_cksum_table[i] = v; 316 } 317 } 318 319 /** 320 * Build a table to translate packet type of mbuf to SWP type of Verbs. 321 */ 322 void 323 mlx5_set_swp_types_table(void) 324 { 325 unsigned int i; 326 uint8_t v; 327 328 /* 329 * The index should have: 330 * bit[0:1] = PKT_TX_L4_MASK 331 * bit[4] = PKT_TX_IPV6 332 * bit[8] = PKT_TX_OUTER_IPV6 333 * bit[9] = PKT_TX_OUTER_UDP 334 */ 335 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 336 v = 0; 337 if (i & (1 << 8)) 338 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 339 if (i & (1 << 9)) 340 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 341 if (i & (1 << 4)) 342 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 343 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 344 v |= MLX5_ETH_WQE_L4_INNER_UDP; 345 mlx5_swp_types_table[i] = v; 346 } 347 } 348 349 /** 350 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 351 * Flags must be preliminary initialized to zero. 352 * 353 * @param loc 354 * Pointer to burst routine local context. 355 * @param swp_flags 356 * Pointer to store Software Parser flags 357 * @param olx 358 * Configured Tx offloads mask. It is fully defined at 359 * compile time and may be used for optimization. 360 * 361 * @return 362 * Software Parser offsets packed in dword. 363 * Software Parser flags are set by pointer. 364 */ 365 static __rte_always_inline uint32_t 366 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 367 uint8_t *swp_flags, 368 unsigned int olx) 369 { 370 uint64_t ol, tunnel; 371 unsigned int idx, off; 372 uint32_t set; 373 374 if (!MLX5_TXOFF_CONFIG(SWP)) 375 return 0; 376 ol = loc->mbuf->ol_flags; 377 tunnel = ol & PKT_TX_TUNNEL_MASK; 378 /* 379 * Check whether Software Parser is required. 380 * Only customized tunnels may ask for. 381 */ 382 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 383 return 0; 384 /* 385 * The index should have: 386 * bit[0:1] = PKT_TX_L4_MASK 387 * bit[4] = PKT_TX_IPV6 388 * bit[8] = PKT_TX_OUTER_IPV6 389 * bit[9] = PKT_TX_OUTER_UDP 390 */ 391 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 392 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 393 *swp_flags = mlx5_swp_types_table[idx]; 394 /* 395 * Set offsets for SW parser. Since ConnectX-5, SW parser just 396 * complements HW parser. SW parser starts to engage only if HW parser 397 * can't reach a header. For the older devices, HW parser will not kick 398 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 399 * should be set regardless of HW offload. 400 */ 401 off = loc->mbuf->outer_l2_len; 402 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 403 off += sizeof(struct rte_vlan_hdr); 404 set = (off >> 1) << 8; /* Outer L3 offset. */ 405 off += loc->mbuf->outer_l3_len; 406 if (tunnel == PKT_TX_TUNNEL_UDP) 407 set |= off >> 1; /* Outer L4 offset. */ 408 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 409 const uint64_t csum = ol & PKT_TX_L4_MASK; 410 off += loc->mbuf->l2_len; 411 set |= (off >> 1) << 24; /* Inner L3 offset. */ 412 if (csum == PKT_TX_TCP_CKSUM || 413 csum == PKT_TX_UDP_CKSUM || 414 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 415 off += loc->mbuf->l3_len; 416 set |= (off >> 1) << 16; /* Inner L4 offset. */ 417 } 418 } 419 set = rte_cpu_to_le_32(set); 420 return set; 421 } 422 423 /** 424 * Convert the Checksum offloads to Verbs. 425 * 426 * @param buf 427 * Pointer to the mbuf. 428 * 429 * @return 430 * Converted checksum flags. 431 */ 432 static __rte_always_inline uint8_t 433 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 434 { 435 uint32_t idx; 436 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 437 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 438 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 439 440 /* 441 * The index should have: 442 * bit[0] = PKT_TX_TCP_SEG 443 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 444 * bit[4] = PKT_TX_IP_CKSUM 445 * bit[8] = PKT_TX_OUTER_IP_CKSUM 446 * bit[9] = tunnel 447 */ 448 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 449 return mlx5_cksum_table[idx]; 450 } 451 452 /** 453 * Internal function to compute the number of used descriptors in an RX queue 454 * 455 * @param rxq 456 * The Rx queue. 457 * 458 * @return 459 * The number of used rx descriptor. 460 */ 461 static uint32_t 462 rx_queue_count(struct mlx5_rxq_data *rxq) 463 { 464 struct rxq_zip *zip = &rxq->zip; 465 volatile struct mlx5_cqe *cqe; 466 unsigned int cq_ci = rxq->cq_ci; 467 const unsigned int cqe_n = (1 << rxq->cqe_n); 468 const unsigned int cqe_cnt = cqe_n - 1; 469 unsigned int used = 0; 470 471 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 472 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 473 int8_t op_own; 474 unsigned int n; 475 476 op_own = cqe->op_own; 477 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 478 if (unlikely(zip->ai)) 479 n = zip->cqe_cnt - zip->ai; 480 else 481 n = rte_be_to_cpu_32(cqe->byte_cnt); 482 else 483 n = 1; 484 cq_ci += n; 485 used += n; 486 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 487 } 488 used = RTE_MIN(used, cqe_n); 489 return used; 490 } 491 492 /** 493 * DPDK callback to check the status of a rx descriptor. 494 * 495 * @param rx_queue 496 * The Rx queue. 497 * @param[in] offset 498 * The index of the descriptor in the ring. 499 * 500 * @return 501 * The status of the tx descriptor. 502 */ 503 int 504 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 505 { 506 struct mlx5_rxq_data *rxq = rx_queue; 507 struct mlx5_rxq_ctrl *rxq_ctrl = 508 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 509 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 510 511 if (dev->rx_pkt_burst == NULL || 512 dev->rx_pkt_burst == removed_rx_burst) { 513 rte_errno = ENOTSUP; 514 return -rte_errno; 515 } 516 if (offset >= (1 << rxq->cqe_n)) { 517 rte_errno = EINVAL; 518 return -rte_errno; 519 } 520 if (offset < rx_queue_count(rxq)) 521 return RTE_ETH_RX_DESC_DONE; 522 return RTE_ETH_RX_DESC_AVAIL; 523 } 524 525 /** 526 * DPDK callback to get the RX queue information 527 * 528 * @param dev 529 * Pointer to the device structure. 530 * 531 * @param rx_queue_id 532 * Rx queue identificator. 533 * 534 * @param qinfo 535 * Pointer to the RX queue information structure. 536 * 537 * @return 538 * None. 539 */ 540 541 void 542 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 543 struct rte_eth_rxq_info *qinfo) 544 { 545 struct mlx5_priv *priv = dev->data->dev_private; 546 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; 547 struct mlx5_rxq_ctrl *rxq_ctrl = 548 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 549 550 if (!rxq) 551 return; 552 qinfo->mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 553 rxq->mprq_mp : rxq->mp; 554 qinfo->conf.rx_thresh.pthresh = 0; 555 qinfo->conf.rx_thresh.hthresh = 0; 556 qinfo->conf.rx_thresh.wthresh = 0; 557 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 558 qinfo->conf.rx_drop_en = 1; 559 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; 560 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 561 qinfo->scattered_rx = dev->data->scattered_rx; 562 qinfo->nb_desc = 1 << rxq->elts_n; 563 } 564 565 /** 566 * DPDK callback to get the RX packet burst mode information 567 * 568 * @param dev 569 * Pointer to the device structure. 570 * 571 * @param rx_queue_id 572 * Rx queue identificatior. 573 * 574 * @param mode 575 * Pointer to the burts mode information. 576 * 577 * @return 578 * 0 as success, -EINVAL as failure. 579 */ 580 581 int 582 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 583 uint16_t rx_queue_id __rte_unused, 584 struct rte_eth_burst_mode *mode) 585 { 586 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 587 588 if (pkt_burst == mlx5_rx_burst) { 589 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 590 } else if (pkt_burst == mlx5_rx_burst_mprq) { 591 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 592 } else if (pkt_burst == mlx5_rx_burst_vec) { 593 #if defined RTE_ARCH_X86_64 594 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 595 #elif defined RTE_ARCH_ARM64 596 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 597 #elif defined RTE_ARCH_PPC_64 598 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 599 #else 600 return -EINVAL; 601 #endif 602 } else { 603 return -EINVAL; 604 } 605 return 0; 606 } 607 608 /** 609 * DPDK callback to get the number of used descriptors in a RX queue 610 * 611 * @param dev 612 * Pointer to the device structure. 613 * 614 * @param rx_queue_id 615 * The Rx queue. 616 * 617 * @return 618 * The number of used rx descriptor. 619 * -EINVAL if the queue is invalid 620 */ 621 uint32_t 622 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 623 { 624 struct mlx5_priv *priv = dev->data->dev_private; 625 struct mlx5_rxq_data *rxq; 626 627 if (dev->rx_pkt_burst == NULL || 628 dev->rx_pkt_burst == removed_rx_burst) { 629 rte_errno = ENOTSUP; 630 return -rte_errno; 631 } 632 rxq = (*priv->rxqs)[rx_queue_id]; 633 if (!rxq) { 634 rte_errno = EINVAL; 635 return -rte_errno; 636 } 637 return rx_queue_count(rxq); 638 } 639 640 #define MLX5_SYSTEM_LOG_DIR "/var/log" 641 /** 642 * Dump debug information to log file. 643 * 644 * @param fname 645 * The file name. 646 * @param hex_title 647 * If not NULL this string is printed as a header to the output 648 * and the output will be in hexadecimal view. 649 * @param buf 650 * This is the buffer address to print out. 651 * @param len 652 * The number of bytes to dump out. 653 */ 654 void 655 mlx5_dump_debug_information(const char *fname, const char *hex_title, 656 const void *buf, unsigned int hex_len) 657 { 658 FILE *fd; 659 660 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 661 fd = fopen(path, "a+"); 662 if (!fd) { 663 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 664 MKSTR(path2, "./%s", fname); 665 fd = fopen(path2, "a+"); 666 if (!fd) { 667 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 668 return; 669 } 670 DRV_LOG(INFO, "New debug dump in file %s", path2); 671 } else { 672 DRV_LOG(INFO, "New debug dump in file %s", path); 673 } 674 if (hex_title) 675 rte_hexdump(fd, hex_title, buf, hex_len); 676 else 677 fprintf(fd, "%s", (const char *)buf); 678 fprintf(fd, "\n\n\n"); 679 fclose(fd); 680 } 681 682 /** 683 * Move QP from error state to running state and initialize indexes. 684 * 685 * @param txq_ctrl 686 * Pointer to TX queue control structure. 687 * 688 * @return 689 * 0 on success, else -1. 690 */ 691 static int 692 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 693 { 694 struct mlx5_mp_arg_queue_state_modify sm = { 695 .is_wq = 0, 696 .queue_id = txq_ctrl->txq.idx, 697 }; 698 699 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 700 return -1; 701 txq_ctrl->txq.wqe_ci = 0; 702 txq_ctrl->txq.wqe_pi = 0; 703 txq_ctrl->txq.elts_comp = 0; 704 return 0; 705 } 706 707 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 708 static int 709 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 710 { 711 static const uint8_t magic[] = "seen"; 712 int ret = 1; 713 unsigned int i; 714 715 for (i = 0; i < sizeof(magic); ++i) 716 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 717 ret = 0; 718 err_cqe->rsvd1[i] = magic[i]; 719 } 720 return ret; 721 } 722 723 /** 724 * Handle error CQE. 725 * 726 * @param txq 727 * Pointer to TX queue structure. 728 * @param error_cqe 729 * Pointer to the error CQE. 730 * 731 * @return 732 * Negative value if queue recovery failed, otherwise 733 * the error completion entry is handled successfully. 734 */ 735 static int 736 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, 737 volatile struct mlx5_err_cqe *err_cqe) 738 { 739 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 740 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 741 struct mlx5_txq_ctrl *txq_ctrl = 742 container_of(txq, struct mlx5_txq_ctrl, txq); 743 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 744 int seen = check_err_cqe_seen(err_cqe); 745 746 if (!seen && txq_ctrl->dump_file_n < 747 txq_ctrl->priv->config.max_dump_files_num) { 748 MKSTR(err_str, "Unexpected CQE error syndrome " 749 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 750 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 751 txq->cqe_s, txq->qp_num_8s >> 8, 752 rte_be_to_cpu_16(err_cqe->wqe_counter), 753 txq->wqe_ci, txq->cq_ci); 754 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 755 PORT_ID(txq_ctrl->priv), txq->idx, 756 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 757 mlx5_dump_debug_information(name, NULL, err_str, 0); 758 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 759 (const void *)((uintptr_t) 760 txq->cqes), 761 sizeof(*err_cqe) * 762 (1 << txq->cqe_n)); 763 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 764 (const void *)((uintptr_t) 765 txq->wqes), 766 MLX5_WQE_SIZE * 767 (1 << txq->wqe_n)); 768 txq_ctrl->dump_file_n++; 769 } 770 if (!seen) 771 /* 772 * Count errors in WQEs units. 773 * Later it can be improved to count error packets, 774 * for example, by SQ parsing to find how much packets 775 * should be counted for each WQE. 776 */ 777 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 778 new_wqe_pi) & wqe_m; 779 if (tx_recover_qp(txq_ctrl)) { 780 /* Recovering failed - retry later on the same WQE. */ 781 return -1; 782 } 783 /* Release all the remaining buffers. */ 784 txq_free_elts(txq_ctrl); 785 } 786 return 0; 787 } 788 789 /** 790 * Translate RX completion flags to packet type. 791 * 792 * @param[in] rxq 793 * Pointer to RX queue structure. 794 * @param[in] cqe 795 * Pointer to CQE. 796 * 797 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 798 * 799 * @return 800 * Packet type for struct rte_mbuf. 801 */ 802 static inline uint32_t 803 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 804 { 805 uint8_t idx; 806 uint8_t pinfo = cqe->pkt_info; 807 uint16_t ptype = cqe->hdr_type_etc; 808 809 /* 810 * The index to the array should have: 811 * bit[1:0] = l3_hdr_type 812 * bit[4:2] = l4_hdr_type 813 * bit[5] = ip_frag 814 * bit[6] = tunneled 815 * bit[7] = outer_l3_type 816 */ 817 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 818 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 819 } 820 821 /** 822 * Initialize Rx WQ and indexes. 823 * 824 * @param[in] rxq 825 * Pointer to RX queue structure. 826 */ 827 void 828 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 829 { 830 const unsigned int wqe_n = 1 << rxq->elts_n; 831 unsigned int i; 832 833 for (i = 0; (i != wqe_n); ++i) { 834 volatile struct mlx5_wqe_data_seg *scat; 835 uintptr_t addr; 836 uint32_t byte_count; 837 838 if (mlx5_rxq_mprq_enabled(rxq)) { 839 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 840 841 scat = &((volatile struct mlx5_wqe_mprq *) 842 rxq->wqes)[i].dseg; 843 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 844 1 << rxq->strd_num_n); 845 byte_count = (1 << rxq->strd_sz_n) * 846 (1 << rxq->strd_num_n); 847 } else { 848 struct rte_mbuf *buf = (*rxq->elts)[i]; 849 850 scat = &((volatile struct mlx5_wqe_data_seg *) 851 rxq->wqes)[i]; 852 addr = rte_pktmbuf_mtod(buf, uintptr_t); 853 byte_count = DATA_LEN(buf); 854 } 855 /* scat->addr must be able to store a pointer. */ 856 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 857 *scat = (struct mlx5_wqe_data_seg){ 858 .addr = rte_cpu_to_be_64(addr), 859 .byte_count = rte_cpu_to_be_32(byte_count), 860 .lkey = mlx5_rx_addr2mr(rxq, addr), 861 }; 862 } 863 rxq->consumed_strd = 0; 864 rxq->decompressed = 0; 865 rxq->rq_pi = 0; 866 rxq->zip = (struct rxq_zip){ 867 .ai = 0, 868 }; 869 /* Update doorbell counter. */ 870 rxq->rq_ci = wqe_n >> rxq->sges_n; 871 rte_io_wmb(); 872 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 873 } 874 875 /** 876 * Modify a Verbs/DevX queue state. 877 * This must be called from the primary process. 878 * 879 * @param dev 880 * Pointer to Ethernet device. 881 * @param sm 882 * State modify request parameters. 883 * 884 * @return 885 * 0 in case of success else non-zero value and rte_errno is set. 886 */ 887 int 888 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 889 const struct mlx5_mp_arg_queue_state_modify *sm) 890 { 891 int ret; 892 struct mlx5_priv *priv = dev->data->dev_private; 893 894 if (sm->is_wq) { 895 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 896 struct mlx5_rxq_ctrl *rxq_ctrl = 897 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 898 899 ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, sm->state); 900 if (ret) { 901 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 902 sm->state, strerror(errno)); 903 rte_errno = errno; 904 return ret; 905 } 906 } else { 907 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 908 struct mlx5_txq_ctrl *txq_ctrl = 909 container_of(txq, struct mlx5_txq_ctrl, txq); 910 911 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 912 MLX5_TXQ_MOD_ERR2RDY, 913 (uint8_t)priv->dev_port); 914 if (ret) 915 return ret; 916 } 917 return 0; 918 } 919 920 /** 921 * Modify a Verbs queue state. 922 * 923 * @param dev 924 * Pointer to Ethernet device. 925 * @param sm 926 * State modify request parameters. 927 * 928 * @return 929 * 0 in case of success else non-zero value. 930 */ 931 static int 932 mlx5_queue_state_modify(struct rte_eth_dev *dev, 933 struct mlx5_mp_arg_queue_state_modify *sm) 934 { 935 struct mlx5_priv *priv = dev->data->dev_private; 936 int ret = 0; 937 938 switch (rte_eal_process_type()) { 939 case RTE_PROC_PRIMARY: 940 ret = mlx5_queue_state_modify_primary(dev, sm); 941 break; 942 case RTE_PROC_SECONDARY: 943 ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); 944 break; 945 default: 946 break; 947 } 948 return ret; 949 } 950 951 /** 952 * Handle a Rx error. 953 * The function inserts the RQ state to reset when the first error CQE is 954 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 955 * it moves the RQ state to ready and initializes the RQ. 956 * Next CQE identification and error counting are in the caller responsibility. 957 * 958 * @param[in] rxq 959 * Pointer to RX queue structure. 960 * @param[in] vec 961 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 962 * 0 when called from non-vectorized Rx burst. 963 * 964 * @return 965 * -1 in case of recovery error, otherwise the CQE status. 966 */ 967 int 968 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 969 { 970 const uint16_t cqe_n = 1 << rxq->cqe_n; 971 const uint16_t cqe_mask = cqe_n - 1; 972 const unsigned int wqe_n = 1 << rxq->elts_n; 973 struct mlx5_rxq_ctrl *rxq_ctrl = 974 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 975 union { 976 volatile struct mlx5_cqe *cqe; 977 volatile struct mlx5_err_cqe *err_cqe; 978 } u = { 979 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 980 }; 981 struct mlx5_mp_arg_queue_state_modify sm; 982 int ret; 983 984 switch (rxq->err_state) { 985 case MLX5_RXQ_ERR_STATE_NO_ERROR: 986 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 987 /* Fall-through */ 988 case MLX5_RXQ_ERR_STATE_NEED_RESET: 989 sm.is_wq = 1; 990 sm.queue_id = rxq->idx; 991 sm.state = IBV_WQS_RESET; 992 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 993 return -1; 994 if (rxq_ctrl->dump_file_n < 995 rxq_ctrl->priv->config.max_dump_files_num) { 996 MKSTR(err_str, "Unexpected CQE error syndrome " 997 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 998 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 999 rxq->cqn, rxq_ctrl->wqn, 1000 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 1001 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 1002 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 1003 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 1004 mlx5_dump_debug_information(name, NULL, err_str, 0); 1005 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 1006 (const void *)((uintptr_t) 1007 rxq->cqes), 1008 sizeof(*u.cqe) * cqe_n); 1009 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 1010 (const void *)((uintptr_t) 1011 rxq->wqes), 1012 16 * wqe_n); 1013 rxq_ctrl->dump_file_n++; 1014 } 1015 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 1016 /* Fall-through */ 1017 case MLX5_RXQ_ERR_STATE_NEED_READY: 1018 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1019 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1020 rte_io_wmb(); 1021 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1022 rte_io_wmb(); 1023 /* 1024 * The RQ consumer index must be zeroed while moving 1025 * from RESET state to RDY state. 1026 */ 1027 *rxq->rq_db = rte_cpu_to_be_32(0); 1028 rte_io_wmb(); 1029 sm.is_wq = 1; 1030 sm.queue_id = rxq->idx; 1031 sm.state = IBV_WQS_RDY; 1032 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1033 &sm)) 1034 return -1; 1035 if (vec) { 1036 const uint16_t q_mask = wqe_n - 1; 1037 uint16_t elt_idx; 1038 struct rte_mbuf **elt; 1039 int i; 1040 unsigned int n = wqe_n - (rxq->rq_ci - 1041 rxq->rq_pi); 1042 1043 for (i = 0; i < (int)n; ++i) { 1044 elt_idx = (rxq->rq_ci + i) & q_mask; 1045 elt = &(*rxq->elts)[elt_idx]; 1046 *elt = rte_mbuf_raw_alloc(rxq->mp); 1047 if (!*elt) { 1048 for (i--; i >= 0; --i) { 1049 elt_idx = (rxq->rq_ci + 1050 i) & q_mask; 1051 elt = &(*rxq->elts) 1052 [elt_idx]; 1053 rte_pktmbuf_free_seg 1054 (*elt); 1055 } 1056 return -1; 1057 } 1058 } 1059 for (i = 0; i < (int)wqe_n; ++i) { 1060 elt = &(*rxq->elts)[i]; 1061 DATA_LEN(*elt) = 1062 (uint16_t)((*elt)->buf_len - 1063 rte_pktmbuf_headroom(*elt)); 1064 } 1065 /* Padding with a fake mbuf for vec Rx. */ 1066 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1067 (*rxq->elts)[wqe_n + i] = 1068 &rxq->fake_mbuf; 1069 } 1070 mlx5_rxq_initialize(rxq); 1071 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1072 } 1073 return ret; 1074 default: 1075 return -1; 1076 } 1077 } 1078 1079 /** 1080 * Get size of the next packet for a given CQE. For compressed CQEs, the 1081 * consumer index is updated only once all packets of the current one have 1082 * been processed. 1083 * 1084 * @param rxq 1085 * Pointer to RX queue. 1086 * @param cqe 1087 * CQE to process. 1088 * @param[out] mcqe 1089 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1090 * written. 1091 * 1092 * @return 1093 * 0 in case of empty CQE, otherwise the packet size in bytes. 1094 */ 1095 static inline int 1096 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1097 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1098 { 1099 struct rxq_zip *zip = &rxq->zip; 1100 uint16_t cqe_n = cqe_cnt + 1; 1101 int len; 1102 uint16_t idx, end; 1103 1104 do { 1105 len = 0; 1106 /* Process compressed data in the CQE and mini arrays. */ 1107 if (zip->ai) { 1108 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1109 (volatile struct mlx5_mini_cqe8 (*)[8]) 1110 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1111 cqe_cnt].pkt_info); 1112 1113 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1114 *mcqe = &(*mc)[zip->ai & 7]; 1115 if ((++zip->ai & 7) == 0) { 1116 /* Invalidate consumed CQEs */ 1117 idx = zip->ca; 1118 end = zip->na; 1119 while (idx != end) { 1120 (*rxq->cqes)[idx & cqe_cnt].op_own = 1121 MLX5_CQE_INVALIDATE; 1122 ++idx; 1123 } 1124 /* 1125 * Increment consumer index to skip the number 1126 * of CQEs consumed. Hardware leaves holes in 1127 * the CQ ring for software use. 1128 */ 1129 zip->ca = zip->na; 1130 zip->na += 8; 1131 } 1132 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1133 /* Invalidate the rest */ 1134 idx = zip->ca; 1135 end = zip->cq_ci; 1136 1137 while (idx != end) { 1138 (*rxq->cqes)[idx & cqe_cnt].op_own = 1139 MLX5_CQE_INVALIDATE; 1140 ++idx; 1141 } 1142 rxq->cq_ci = zip->cq_ci; 1143 zip->ai = 0; 1144 } 1145 /* 1146 * No compressed data, get next CQE and verify if it is 1147 * compressed. 1148 */ 1149 } else { 1150 int ret; 1151 int8_t op_own; 1152 1153 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1154 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1155 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1156 rxq->err_state)) { 1157 ret = mlx5_rx_err_handle(rxq, 0); 1158 if (ret == MLX5_CQE_STATUS_HW_OWN || 1159 ret == -1) 1160 return 0; 1161 } else { 1162 return 0; 1163 } 1164 } 1165 ++rxq->cq_ci; 1166 op_own = cqe->op_own; 1167 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1168 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1169 (volatile struct mlx5_mini_cqe8 (*)[8]) 1170 (uintptr_t)(&(*rxq->cqes) 1171 [rxq->cq_ci & 1172 cqe_cnt].pkt_info); 1173 1174 /* Fix endianness. */ 1175 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1176 /* 1177 * Current mini array position is the one 1178 * returned by check_cqe64(). 1179 * 1180 * If completion comprises several mini arrays, 1181 * as a special case the second one is located 1182 * 7 CQEs after the initial CQE instead of 8 1183 * for subsequent ones. 1184 */ 1185 zip->ca = rxq->cq_ci; 1186 zip->na = zip->ca + 7; 1187 /* Compute the next non compressed CQE. */ 1188 --rxq->cq_ci; 1189 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1190 /* Get packet size to return. */ 1191 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1192 *mcqe = &(*mc)[0]; 1193 zip->ai = 1; 1194 /* Prefetch all to be invalidated */ 1195 idx = zip->ca; 1196 end = zip->cq_ci; 1197 while (idx != end) { 1198 rte_prefetch0(&(*rxq->cqes)[(idx) & 1199 cqe_cnt]); 1200 ++idx; 1201 } 1202 } else { 1203 len = rte_be_to_cpu_32(cqe->byte_cnt); 1204 } 1205 } 1206 if (unlikely(rxq->err_state)) { 1207 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1208 ++rxq->stats.idropped; 1209 } else { 1210 return len; 1211 } 1212 } while (1); 1213 } 1214 1215 /** 1216 * Translate RX completion flags to offload flags. 1217 * 1218 * @param[in] cqe 1219 * Pointer to CQE. 1220 * 1221 * @return 1222 * Offload flags (ol_flags) for struct rte_mbuf. 1223 */ 1224 static inline uint32_t 1225 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1226 { 1227 uint32_t ol_flags = 0; 1228 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1229 1230 ol_flags = 1231 TRANSPOSE(flags, 1232 MLX5_CQE_RX_L3_HDR_VALID, 1233 PKT_RX_IP_CKSUM_GOOD) | 1234 TRANSPOSE(flags, 1235 MLX5_CQE_RX_L4_HDR_VALID, 1236 PKT_RX_L4_CKSUM_GOOD); 1237 return ol_flags; 1238 } 1239 1240 /** 1241 * Fill in mbuf fields from RX completion flags. 1242 * Note that pkt->ol_flags should be initialized outside of this function. 1243 * 1244 * @param rxq 1245 * Pointer to RX queue. 1246 * @param pkt 1247 * mbuf to fill. 1248 * @param cqe 1249 * CQE to process. 1250 * @param rss_hash_res 1251 * Packet RSS Hash result. 1252 */ 1253 static inline void 1254 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1255 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res) 1256 { 1257 /* Update packet information. */ 1258 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); 1259 if (rss_hash_res && rxq->rss_hash) { 1260 pkt->hash.rss = rss_hash_res; 1261 pkt->ol_flags |= PKT_RX_RSS_HASH; 1262 } 1263 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1264 pkt->ol_flags |= PKT_RX_FDIR; 1265 if (cqe->sop_drop_qpn != 1266 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1267 uint32_t mark = cqe->sop_drop_qpn; 1268 1269 pkt->ol_flags |= PKT_RX_FDIR_ID; 1270 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1271 } 1272 } 1273 if (rxq->dynf_meta && cqe->flow_table_metadata) { 1274 pkt->ol_flags |= rxq->flow_meta_mask; 1275 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = 1276 cqe->flow_table_metadata; 1277 } 1278 if (rxq->csum) 1279 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1280 if (rxq->vlan_strip && 1281 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1282 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1283 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1284 } 1285 if (rxq->hw_timestamp) { 1286 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 1287 1288 if (rxq->rt_timestamp) 1289 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 1290 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 1291 pkt->ol_flags |= rxq->timestamp_rx_flag; 1292 } 1293 } 1294 1295 /** 1296 * DPDK callback for RX. 1297 * 1298 * @param dpdk_rxq 1299 * Generic pointer to RX queue structure. 1300 * @param[out] pkts 1301 * Array to store received packets. 1302 * @param pkts_n 1303 * Maximum number of packets in array. 1304 * 1305 * @return 1306 * Number of packets successfully received (<= pkts_n). 1307 */ 1308 uint16_t 1309 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1310 { 1311 struct mlx5_rxq_data *rxq = dpdk_rxq; 1312 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1313 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1314 const unsigned int sges_n = rxq->sges_n; 1315 struct rte_mbuf *pkt = NULL; 1316 struct rte_mbuf *seg = NULL; 1317 volatile struct mlx5_cqe *cqe = 1318 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1319 unsigned int i = 0; 1320 unsigned int rq_ci = rxq->rq_ci << sges_n; 1321 int len = 0; /* keep its value across iterations. */ 1322 1323 while (pkts_n) { 1324 unsigned int idx = rq_ci & wqe_cnt; 1325 volatile struct mlx5_wqe_data_seg *wqe = 1326 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1327 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1328 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1329 uint32_t rss_hash_res; 1330 1331 if (pkt) 1332 NEXT(seg) = rep; 1333 seg = rep; 1334 rte_prefetch0(seg); 1335 rte_prefetch0(cqe); 1336 rte_prefetch0(wqe); 1337 rep = rte_mbuf_raw_alloc(rxq->mp); 1338 if (unlikely(rep == NULL)) { 1339 ++rxq->stats.rx_nombuf; 1340 if (!pkt) { 1341 /* 1342 * no buffers before we even started, 1343 * bail out silently. 1344 */ 1345 break; 1346 } 1347 while (pkt != seg) { 1348 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 1349 rep = NEXT(pkt); 1350 NEXT(pkt) = NULL; 1351 NB_SEGS(pkt) = 1; 1352 rte_mbuf_raw_free(pkt); 1353 pkt = rep; 1354 } 1355 break; 1356 } 1357 if (!pkt) { 1358 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1359 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1360 if (!len) { 1361 rte_mbuf_raw_free(rep); 1362 break; 1363 } 1364 pkt = seg; 1365 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 1366 pkt->ol_flags &= EXT_ATTACHED_MBUF; 1367 /* If compressed, take hash result from mini-CQE. */ 1368 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? 1369 cqe->rx_hash_res : 1370 mcqe->rx_hash_result); 1371 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1372 if (rxq->crc_present) 1373 len -= RTE_ETHER_CRC_LEN; 1374 PKT_LEN(pkt) = len; 1375 if (cqe->lro_num_seg > 1) { 1376 mlx5_lro_update_hdr 1377 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1378 len); 1379 pkt->ol_flags |= PKT_RX_LRO; 1380 pkt->tso_segsz = len / cqe->lro_num_seg; 1381 } 1382 } 1383 DATA_LEN(rep) = DATA_LEN(seg); 1384 PKT_LEN(rep) = PKT_LEN(seg); 1385 SET_DATA_OFF(rep, DATA_OFF(seg)); 1386 PORT(rep) = PORT(seg); 1387 (*rxq->elts)[idx] = rep; 1388 /* 1389 * Fill NIC descriptor with the new buffer. The lkey and size 1390 * of the buffers are already known, only the buffer address 1391 * changes. 1392 */ 1393 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1394 /* If there's only one MR, no need to replace LKey in WQE. */ 1395 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1396 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1397 if (len > DATA_LEN(seg)) { 1398 len -= DATA_LEN(seg); 1399 ++NB_SEGS(pkt); 1400 ++rq_ci; 1401 continue; 1402 } 1403 DATA_LEN(seg) = len; 1404 #ifdef MLX5_PMD_SOFT_COUNTERS 1405 /* Increment bytes counter. */ 1406 rxq->stats.ibytes += PKT_LEN(pkt); 1407 #endif 1408 /* Return packet. */ 1409 *(pkts++) = pkt; 1410 pkt = NULL; 1411 --pkts_n; 1412 ++i; 1413 /* Align consumer index to the next stride. */ 1414 rq_ci >>= sges_n; 1415 ++rq_ci; 1416 rq_ci <<= sges_n; 1417 } 1418 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1419 return 0; 1420 /* Update the consumer index. */ 1421 rxq->rq_ci = rq_ci >> sges_n; 1422 rte_io_wmb(); 1423 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1424 rte_io_wmb(); 1425 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1426 #ifdef MLX5_PMD_SOFT_COUNTERS 1427 /* Increment packets counter. */ 1428 rxq->stats.ipackets += i; 1429 #endif 1430 return i; 1431 } 1432 1433 /** 1434 * Update LRO packet TCP header. 1435 * The HW LRO feature doesn't update the TCP header after coalescing the 1436 * TCP segments but supplies information in CQE to fill it by SW. 1437 * 1438 * @param tcp 1439 * Pointer to the TCP header. 1440 * @param cqe 1441 * Pointer to the completion entry.. 1442 * @param phcsum 1443 * The L3 pseudo-header checksum. 1444 */ 1445 static inline void 1446 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1447 volatile struct mlx5_cqe *__rte_restrict cqe, 1448 uint32_t phcsum) 1449 { 1450 uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1451 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1452 /* 1453 * The HW calculates only the TCP payload checksum, need to complete 1454 * the TCP header checksum and the L3 pseudo-header checksum. 1455 */ 1456 uint32_t csum = phcsum + cqe->csum; 1457 1458 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1459 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1460 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1461 tcp->recv_ack = cqe->lro_ack_seq_num; 1462 tcp->rx_win = cqe->lro_tcp_win; 1463 } 1464 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1465 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1466 tcp->cksum = 0; 1467 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1468 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1469 csum = (~csum) & 0xffff; 1470 if (csum == 0) 1471 csum = 0xffff; 1472 tcp->cksum = csum; 1473 } 1474 1475 /** 1476 * Update LRO packet headers. 1477 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1478 * TCP segments but supply information in CQE to fill it by SW. 1479 * 1480 * @param padd 1481 * The packet address. 1482 * @param cqe 1483 * Pointer to the completion entry.. 1484 * @param len 1485 * The packet length. 1486 */ 1487 static inline void 1488 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1489 volatile struct mlx5_cqe *__rte_restrict cqe, 1490 uint32_t len) 1491 { 1492 union { 1493 struct rte_ether_hdr *eth; 1494 struct rte_vlan_hdr *vlan; 1495 struct rte_ipv4_hdr *ipv4; 1496 struct rte_ipv6_hdr *ipv6; 1497 struct rte_tcp_hdr *tcp; 1498 uint8_t *hdr; 1499 } h = { 1500 .hdr = padd, 1501 }; 1502 uint16_t proto = h.eth->ether_type; 1503 uint32_t phcsum; 1504 1505 h.eth++; 1506 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1507 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1508 proto = h.vlan->eth_proto; 1509 h.vlan++; 1510 } 1511 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1512 h.ipv4->time_to_live = cqe->lro_min_ttl; 1513 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1514 h.ipv4->hdr_checksum = 0; 1515 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1516 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1517 h.ipv4++; 1518 } else { 1519 h.ipv6->hop_limits = cqe->lro_min_ttl; 1520 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1521 sizeof(*h.ipv6)); 1522 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1523 h.ipv6++; 1524 } 1525 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum); 1526 } 1527 1528 void 1529 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1530 { 1531 struct mlx5_mprq_buf *buf = opaque; 1532 1533 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 1534 rte_mempool_put(buf->mp, buf); 1535 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 1536 __ATOMIC_RELAXED) == 0)) { 1537 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1538 rte_mempool_put(buf->mp, buf); 1539 } 1540 } 1541 1542 void 1543 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1544 { 1545 mlx5_mprq_buf_free_cb(NULL, buf); 1546 } 1547 1548 static inline void 1549 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 1550 const unsigned int strd_n) 1551 { 1552 struct mlx5_mprq_buf *rep = rxq->mprq_repl; 1553 volatile struct mlx5_wqe_data_seg *wqe = 1554 &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; 1555 void *addr; 1556 1557 MLX5_ASSERT(rep != NULL); 1558 /* Replace MPRQ buf. */ 1559 (*rxq->mprq_bufs)[rq_idx] = rep; 1560 /* Replace WQE. */ 1561 addr = mlx5_mprq_buf_addr(rep, strd_n); 1562 wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); 1563 /* If there's only one MR, no need to replace LKey in WQE. */ 1564 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1565 wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); 1566 /* Stash a mbuf for next replacement. */ 1567 if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep))) 1568 rxq->mprq_repl = rep; 1569 else 1570 rxq->mprq_repl = NULL; 1571 } 1572 1573 /** 1574 * DPDK callback for RX with Multi-Packet RQ support. 1575 * 1576 * @param dpdk_rxq 1577 * Generic pointer to RX queue structure. 1578 * @param[out] pkts 1579 * Array to store received packets. 1580 * @param pkts_n 1581 * Maximum number of packets in array. 1582 * 1583 * @return 1584 * Number of packets successfully received (<= pkts_n). 1585 */ 1586 uint16_t 1587 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1588 { 1589 struct mlx5_rxq_data *rxq = dpdk_rxq; 1590 const unsigned int strd_n = 1 << rxq->strd_num_n; 1591 const unsigned int strd_sz = 1 << rxq->strd_sz_n; 1592 const unsigned int strd_shift = 1593 MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; 1594 const unsigned int cq_mask = (1 << rxq->cqe_n) - 1; 1595 const unsigned int wq_mask = (1 << rxq->elts_n) - 1; 1596 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1597 unsigned int i = 0; 1598 uint32_t rq_ci = rxq->rq_ci; 1599 uint16_t consumed_strd = rxq->consumed_strd; 1600 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1601 1602 while (i < pkts_n) { 1603 struct rte_mbuf *pkt; 1604 void *addr; 1605 int ret; 1606 uint32_t len; 1607 uint16_t strd_cnt; 1608 uint16_t strd_idx; 1609 uint32_t offset; 1610 uint32_t byte_cnt; 1611 int32_t hdrm_overlap; 1612 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1613 uint32_t rss_hash_res = 0; 1614 1615 if (consumed_strd == strd_n) { 1616 /* Replace WQE only if the buffer is still in use. */ 1617 if (__atomic_load_n(&buf->refcnt, 1618 __ATOMIC_RELAXED) > 1) { 1619 mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n); 1620 /* Release the old buffer. */ 1621 mlx5_mprq_buf_free(buf); 1622 } else if (unlikely(rxq->mprq_repl == NULL)) { 1623 struct mlx5_mprq_buf *rep; 1624 1625 /* 1626 * Currently, the MPRQ mempool is out of buffer 1627 * and doing memcpy regardless of the size of Rx 1628 * packet. Retry allocation to get back to 1629 * normal. 1630 */ 1631 if (!rte_mempool_get(rxq->mprq_mp, 1632 (void **)&rep)) 1633 rxq->mprq_repl = rep; 1634 } 1635 /* Advance to the next WQE. */ 1636 consumed_strd = 0; 1637 ++rq_ci; 1638 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1639 } 1640 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1641 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1642 if (!ret) 1643 break; 1644 byte_cnt = ret; 1645 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1646 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1647 MLX5_ASSERT(strd_cnt); 1648 consumed_strd += strd_cnt; 1649 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1650 continue; 1651 if (mcqe == NULL) { 1652 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1653 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); 1654 } else { 1655 /* mini-CQE for MPRQ doesn't have hash result. */ 1656 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); 1657 } 1658 MLX5_ASSERT(strd_idx < strd_n); 1659 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1660 wq_mask)); 1661 pkt = rte_pktmbuf_alloc(rxq->mp); 1662 if (unlikely(pkt == NULL)) { 1663 ++rxq->stats.rx_nombuf; 1664 break; 1665 } 1666 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1667 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1668 if (rxq->crc_present) 1669 len -= RTE_ETHER_CRC_LEN; 1670 offset = strd_idx * strd_sz + strd_shift; 1671 addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); 1672 hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz; 1673 /* 1674 * Memcpy packets to the target mbuf if: 1675 * - The size of packet is smaller than mprq_max_memcpy_len. 1676 * - Out of buffer in the Mempool for Multi-Packet RQ. 1677 * - The packet's stride overlaps a headroom and scatter is off. 1678 */ 1679 if (len <= rxq->mprq_max_memcpy_len || 1680 rxq->mprq_repl == NULL || 1681 (hdrm_overlap > 0 && !rxq->strd_scatter_en)) { 1682 if (likely(rte_pktmbuf_tailroom(pkt) >= len)) { 1683 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), 1684 addr, len); 1685 DATA_LEN(pkt) = len; 1686 } else if (rxq->strd_scatter_en) { 1687 struct rte_mbuf *prev = pkt; 1688 uint32_t seg_len = 1689 RTE_MIN(rte_pktmbuf_tailroom(pkt), len); 1690 uint32_t rem_len = len - seg_len; 1691 1692 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), 1693 addr, seg_len); 1694 DATA_LEN(pkt) = seg_len; 1695 while (rem_len) { 1696 struct rte_mbuf *next = 1697 rte_pktmbuf_alloc(rxq->mp); 1698 1699 if (unlikely(next == NULL)) { 1700 rte_pktmbuf_free(pkt); 1701 ++rxq->stats.rx_nombuf; 1702 goto out; 1703 } 1704 NEXT(prev) = next; 1705 SET_DATA_OFF(next, 0); 1706 addr = RTE_PTR_ADD(addr, seg_len); 1707 seg_len = RTE_MIN 1708 (rte_pktmbuf_tailroom(next), 1709 rem_len); 1710 rte_memcpy 1711 (rte_pktmbuf_mtod(next, void *), 1712 addr, seg_len); 1713 DATA_LEN(next) = seg_len; 1714 rem_len -= seg_len; 1715 prev = next; 1716 ++NB_SEGS(pkt); 1717 } 1718 } else { 1719 rte_pktmbuf_free_seg(pkt); 1720 ++rxq->stats.idropped; 1721 continue; 1722 } 1723 } else { 1724 rte_iova_t buf_iova; 1725 struct rte_mbuf_ext_shared_info *shinfo; 1726 uint16_t buf_len = strd_cnt * strd_sz; 1727 void *buf_addr; 1728 1729 /* Increment the refcnt of the whole chunk. */ 1730 __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED); 1731 MLX5_ASSERT(__atomic_load_n(&buf->refcnt, 1732 __ATOMIC_RELAXED) <= strd_n + 1); 1733 buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); 1734 /* 1735 * MLX5 device doesn't use iova but it is necessary in a 1736 * case where the Rx packet is transmitted via a 1737 * different PMD. 1738 */ 1739 buf_iova = rte_mempool_virt2iova(buf) + 1740 RTE_PTR_DIFF(buf_addr, buf); 1741 shinfo = &buf->shinfos[strd_idx]; 1742 rte_mbuf_ext_refcnt_set(shinfo, 1); 1743 /* 1744 * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when 1745 * attaching the stride to mbuf and more offload flags 1746 * will be added below by calling rxq_cq_to_mbuf(). 1747 * Other fields will be overwritten. 1748 */ 1749 rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, 1750 buf_len, shinfo); 1751 /* Set mbuf head-room. */ 1752 SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); 1753 MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); 1754 MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >= 1755 len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); 1756 DATA_LEN(pkt) = len; 1757 /* 1758 * Copy the last fragment of a packet (up to headroom 1759 * size bytes) in case there is a stride overlap with 1760 * a next packet's headroom. Allocate a separate mbuf 1761 * to store this fragment and link it. Scatter is on. 1762 */ 1763 if (hdrm_overlap > 0) { 1764 MLX5_ASSERT(rxq->strd_scatter_en); 1765 struct rte_mbuf *seg = 1766 rte_pktmbuf_alloc(rxq->mp); 1767 1768 if (unlikely(seg == NULL)) { 1769 rte_pktmbuf_free_seg(pkt); 1770 ++rxq->stats.rx_nombuf; 1771 break; 1772 } 1773 SET_DATA_OFF(seg, 0); 1774 rte_memcpy(rte_pktmbuf_mtod(seg, void *), 1775 RTE_PTR_ADD(addr, len - hdrm_overlap), 1776 hdrm_overlap); 1777 DATA_LEN(seg) = hdrm_overlap; 1778 DATA_LEN(pkt) = len - hdrm_overlap; 1779 NEXT(pkt) = seg; 1780 NB_SEGS(pkt) = 2; 1781 } 1782 } 1783 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1784 if (cqe->lro_num_seg > 1) { 1785 mlx5_lro_update_hdr(addr, cqe, len); 1786 pkt->ol_flags |= PKT_RX_LRO; 1787 pkt->tso_segsz = len / cqe->lro_num_seg; 1788 } 1789 PKT_LEN(pkt) = len; 1790 PORT(pkt) = rxq->port_id; 1791 #ifdef MLX5_PMD_SOFT_COUNTERS 1792 /* Increment bytes counter. */ 1793 rxq->stats.ibytes += PKT_LEN(pkt); 1794 #endif 1795 /* Return packet. */ 1796 *(pkts++) = pkt; 1797 ++i; 1798 } 1799 out: 1800 /* Update the consumer indexes. */ 1801 rxq->consumed_strd = consumed_strd; 1802 rte_io_wmb(); 1803 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1804 if (rq_ci != rxq->rq_ci) { 1805 rxq->rq_ci = rq_ci; 1806 rte_io_wmb(); 1807 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1808 } 1809 #ifdef MLX5_PMD_SOFT_COUNTERS 1810 /* Increment packets counter. */ 1811 rxq->stats.ipackets += i; 1812 #endif 1813 return i; 1814 } 1815 1816 /** 1817 * Dummy DPDK callback for TX. 1818 * 1819 * This function is used to temporarily replace the real callback during 1820 * unsafe control operations on the queue, or in case of error. 1821 * 1822 * @param dpdk_txq 1823 * Generic pointer to TX queue structure. 1824 * @param[in] pkts 1825 * Packets to transmit. 1826 * @param pkts_n 1827 * Number of packets in array. 1828 * 1829 * @return 1830 * Number of packets successfully transmitted (<= pkts_n). 1831 */ 1832 uint16_t 1833 removed_tx_burst(void *dpdk_txq __rte_unused, 1834 struct rte_mbuf **pkts __rte_unused, 1835 uint16_t pkts_n __rte_unused) 1836 { 1837 rte_mb(); 1838 return 0; 1839 } 1840 1841 /** 1842 * Dummy DPDK callback for RX. 1843 * 1844 * This function is used to temporarily replace the real callback during 1845 * unsafe control operations on the queue, or in case of error. 1846 * 1847 * @param dpdk_rxq 1848 * Generic pointer to RX queue structure. 1849 * @param[out] pkts 1850 * Array to store received packets. 1851 * @param pkts_n 1852 * Maximum number of packets in array. 1853 * 1854 * @return 1855 * Number of packets successfully received (<= pkts_n). 1856 */ 1857 uint16_t 1858 removed_rx_burst(void *dpdk_txq __rte_unused, 1859 struct rte_mbuf **pkts __rte_unused, 1860 uint16_t pkts_n __rte_unused) 1861 { 1862 rte_mb(); 1863 return 0; 1864 } 1865 1866 /* 1867 * Vectorized Rx/Tx routines are not compiled in when required vector 1868 * instructions are not supported on a target architecture. The following null 1869 * stubs are needed for linkage when those are not included outside of this file 1870 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1871 */ 1872 1873 __rte_weak uint16_t 1874 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1875 struct rte_mbuf **pkts __rte_unused, 1876 uint16_t pkts_n __rte_unused) 1877 { 1878 return 0; 1879 } 1880 1881 __rte_weak int 1882 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1883 { 1884 return -ENOTSUP; 1885 } 1886 1887 __rte_weak int 1888 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1889 { 1890 return -ENOTSUP; 1891 } 1892 1893 /** 1894 * Free the mbufs from the linear array of pointers. 1895 * 1896 * @param pkts 1897 * Pointer to array of packets to be free. 1898 * @param pkts_n 1899 * Number of packets to be freed. 1900 * @param olx 1901 * Configured Tx offloads mask. It is fully defined at 1902 * compile time and may be used for optimization. 1903 */ 1904 static __rte_always_inline void 1905 mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, 1906 unsigned int pkts_n, 1907 unsigned int olx __rte_unused) 1908 { 1909 struct rte_mempool *pool = NULL; 1910 struct rte_mbuf **p_free = NULL; 1911 struct rte_mbuf *mbuf; 1912 unsigned int n_free = 0; 1913 1914 /* 1915 * The implemented algorithm eliminates 1916 * copying pointers to temporary array 1917 * for rte_mempool_put_bulk() calls. 1918 */ 1919 MLX5_ASSERT(pkts); 1920 MLX5_ASSERT(pkts_n); 1921 for (;;) { 1922 for (;;) { 1923 /* 1924 * Decrement mbuf reference counter, detach 1925 * indirect and external buffers if needed. 1926 */ 1927 mbuf = rte_pktmbuf_prefree_seg(*pkts); 1928 if (likely(mbuf != NULL)) { 1929 MLX5_ASSERT(mbuf == *pkts); 1930 if (likely(n_free != 0)) { 1931 if (unlikely(pool != mbuf->pool)) 1932 /* From different pool. */ 1933 break; 1934 } else { 1935 /* Start new scan array. */ 1936 pool = mbuf->pool; 1937 p_free = pkts; 1938 } 1939 ++n_free; 1940 ++pkts; 1941 --pkts_n; 1942 if (unlikely(pkts_n == 0)) { 1943 mbuf = NULL; 1944 break; 1945 } 1946 } else { 1947 /* 1948 * This happens if mbuf is still referenced. 1949 * We can't put it back to the pool, skip. 1950 */ 1951 ++pkts; 1952 --pkts_n; 1953 if (unlikely(n_free != 0)) 1954 /* There is some array to free.*/ 1955 break; 1956 if (unlikely(pkts_n == 0)) 1957 /* Last mbuf, nothing to free. */ 1958 return; 1959 } 1960 } 1961 for (;;) { 1962 /* 1963 * This loop is implemented to avoid multiple 1964 * inlining of rte_mempool_put_bulk(). 1965 */ 1966 MLX5_ASSERT(pool); 1967 MLX5_ASSERT(p_free); 1968 MLX5_ASSERT(n_free); 1969 /* 1970 * Free the array of pre-freed mbufs 1971 * belonging to the same memory pool. 1972 */ 1973 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 1974 if (unlikely(mbuf != NULL)) { 1975 /* There is the request to start new scan. */ 1976 pool = mbuf->pool; 1977 p_free = pkts++; 1978 n_free = 1; 1979 --pkts_n; 1980 if (likely(pkts_n != 0)) 1981 break; 1982 /* 1983 * This is the last mbuf to be freed. 1984 * Do one more loop iteration to complete. 1985 * This is rare case of the last unique mbuf. 1986 */ 1987 mbuf = NULL; 1988 continue; 1989 } 1990 if (likely(pkts_n == 0)) 1991 return; 1992 n_free = 0; 1993 break; 1994 } 1995 } 1996 } 1997 1998 /** 1999 * Free the mbuf from the elts ring buffer till new tail. 2000 * 2001 * @param txq 2002 * Pointer to Tx queue structure. 2003 * @param tail 2004 * Index in elts to free up to, becomes new elts tail. 2005 * @param olx 2006 * Configured Tx offloads mask. It is fully defined at 2007 * compile time and may be used for optimization. 2008 */ 2009 static __rte_always_inline void 2010 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 2011 uint16_t tail, 2012 unsigned int olx __rte_unused) 2013 { 2014 uint16_t n_elts = tail - txq->elts_tail; 2015 2016 MLX5_ASSERT(n_elts); 2017 MLX5_ASSERT(n_elts <= txq->elts_s); 2018 /* 2019 * Implement a loop to support ring buffer wraparound 2020 * with single inlining of mlx5_tx_free_mbuf(). 2021 */ 2022 do { 2023 unsigned int part; 2024 2025 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 2026 part = RTE_MIN(part, n_elts); 2027 MLX5_ASSERT(part); 2028 MLX5_ASSERT(part <= txq->elts_s); 2029 mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], 2030 part, olx); 2031 txq->elts_tail += part; 2032 n_elts -= part; 2033 } while (n_elts); 2034 } 2035 2036 /** 2037 * Store the mbuf being sent into elts ring buffer. 2038 * On Tx completion these mbufs will be freed. 2039 * 2040 * @param txq 2041 * Pointer to Tx queue structure. 2042 * @param pkts 2043 * Pointer to array of packets to be stored. 2044 * @param pkts_n 2045 * Number of packets to be stored. 2046 * @param olx 2047 * Configured Tx offloads mask. It is fully defined at 2048 * compile time and may be used for optimization. 2049 */ 2050 static __rte_always_inline void 2051 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 2052 struct rte_mbuf **__rte_restrict pkts, 2053 unsigned int pkts_n, 2054 unsigned int olx __rte_unused) 2055 { 2056 unsigned int part; 2057 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 2058 2059 MLX5_ASSERT(pkts); 2060 MLX5_ASSERT(pkts_n); 2061 part = txq->elts_s - (txq->elts_head & txq->elts_m); 2062 MLX5_ASSERT(part); 2063 MLX5_ASSERT(part <= txq->elts_s); 2064 /* This code is a good candidate for vectorizing with SIMD. */ 2065 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 2066 (void *)pkts, 2067 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 2068 txq->elts_head += pkts_n; 2069 if (unlikely(part < pkts_n)) 2070 /* The copy is wrapping around the elts array. */ 2071 rte_memcpy((void *)elts, (void *)(pkts + part), 2072 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2073 } 2074 2075 /** 2076 * Update completion queue consuming index via doorbell 2077 * and flush the completed data buffers. 2078 * 2079 * @param txq 2080 * Pointer to TX queue structure. 2081 * @param valid CQE pointer 2082 * if not NULL update txq->wqe_pi and flush the buffers 2083 * @param olx 2084 * Configured Tx offloads mask. It is fully defined at 2085 * compile time and may be used for optimization. 2086 */ 2087 static __rte_always_inline void 2088 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, 2089 volatile struct mlx5_cqe *last_cqe, 2090 unsigned int olx __rte_unused) 2091 { 2092 if (likely(last_cqe != NULL)) { 2093 uint16_t tail; 2094 2095 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 2096 tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; 2097 if (likely(tail != txq->elts_tail)) { 2098 mlx5_tx_free_elts(txq, tail, olx); 2099 MLX5_ASSERT(tail == txq->elts_tail); 2100 } 2101 } 2102 } 2103 2104 /** 2105 * Manage TX completions. This routine checks the CQ for 2106 * arrived CQEs, deduces the last accomplished WQE in SQ, 2107 * updates SQ producing index and frees all completed mbufs. 2108 * 2109 * @param txq 2110 * Pointer to TX queue structure. 2111 * @param olx 2112 * Configured Tx offloads mask. It is fully defined at 2113 * compile time and may be used for optimization. 2114 * 2115 * NOTE: not inlined intentionally, it makes tx_burst 2116 * routine smaller, simple and faster - from experiments. 2117 */ 2118 static void 2119 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 2120 unsigned int olx __rte_unused) 2121 { 2122 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2123 volatile struct mlx5_cqe *last_cqe = NULL; 2124 bool ring_doorbell = false; 2125 int ret; 2126 2127 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 2128 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 2129 do { 2130 volatile struct mlx5_cqe *cqe; 2131 2132 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2133 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2134 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2135 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2136 /* No new CQEs in completion queue. */ 2137 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 2138 break; 2139 } 2140 /* 2141 * Some error occurred, try to restart. 2142 * We have no barrier after WQE related Doorbell 2143 * written, make sure all writes are completed 2144 * here, before we might perform SQ reset. 2145 */ 2146 rte_wmb(); 2147 ret = mlx5_tx_error_cqe_handle 2148 (txq, (volatile struct mlx5_err_cqe *)cqe); 2149 if (unlikely(ret < 0)) { 2150 /* 2151 * Some error occurred on queue error 2152 * handling, we do not advance the index 2153 * here, allowing to retry on next call. 2154 */ 2155 return; 2156 } 2157 /* 2158 * We are going to fetch all entries with 2159 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. 2160 * The send queue is supposed to be empty. 2161 */ 2162 ring_doorbell = true; 2163 ++txq->cq_ci; 2164 txq->cq_pi = txq->cq_ci; 2165 last_cqe = NULL; 2166 continue; 2167 } 2168 /* Normal transmit completion. */ 2169 MLX5_ASSERT(txq->cq_ci != txq->cq_pi); 2170 MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == 2171 cqe->wqe_counter); 2172 ring_doorbell = true; 2173 ++txq->cq_ci; 2174 last_cqe = cqe; 2175 /* 2176 * We have to restrict the amount of processed CQEs 2177 * in one tx_burst routine call. The CQ may be large 2178 * and many CQEs may be updated by the NIC in one 2179 * transaction. Buffers freeing is time consuming, 2180 * multiple iterations may introduce significant 2181 * latency. 2182 */ 2183 if (likely(--count == 0)) 2184 break; 2185 } while (true); 2186 if (likely(ring_doorbell)) { 2187 /* Ring doorbell to notify hardware. */ 2188 rte_compiler_barrier(); 2189 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2190 mlx5_tx_comp_flush(txq, last_cqe, olx); 2191 } 2192 } 2193 2194 /** 2195 * Check if the completion request flag should be set in the last WQE. 2196 * Both pushed mbufs and WQEs are monitored and the completion request 2197 * flag is set if any of thresholds is reached. 2198 * 2199 * @param txq 2200 * Pointer to TX queue structure. 2201 * @param loc 2202 * Pointer to burst routine local context. 2203 * @param olx 2204 * Configured Tx offloads mask. It is fully defined at 2205 * compile time and may be used for optimization. 2206 */ 2207 static __rte_always_inline void 2208 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 2209 struct mlx5_txq_local *__rte_restrict loc, 2210 unsigned int olx) 2211 { 2212 uint16_t head = txq->elts_head; 2213 unsigned int part; 2214 2215 part = MLX5_TXOFF_CONFIG(INLINE) ? 2216 0 : loc->pkts_sent - loc->pkts_copy; 2217 head += part; 2218 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2219 (MLX5_TXOFF_CONFIG(INLINE) && 2220 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2221 volatile struct mlx5_wqe *last = loc->wqe_last; 2222 2223 MLX5_ASSERT(last); 2224 txq->elts_comp = head; 2225 if (MLX5_TXOFF_CONFIG(INLINE)) 2226 txq->wqe_comp = txq->wqe_ci; 2227 /* Request unconditional completion on last WQE. */ 2228 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2229 MLX5_COMP_MODE_OFFSET); 2230 /* Save elts_head in dedicated free on completion queue. */ 2231 #ifdef RTE_LIBRTE_MLX5_DEBUG 2232 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 2233 (last->cseg.opcode >> 8) << 16; 2234 #else 2235 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 2236 #endif 2237 /* A CQE slot must always be available. */ 2238 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 2239 } 2240 } 2241 2242 /** 2243 * DPDK callback to check the status of a tx descriptor. 2244 * 2245 * @param tx_queue 2246 * The tx queue. 2247 * @param[in] offset 2248 * The index of the descriptor in the ring. 2249 * 2250 * @return 2251 * The status of the tx descriptor. 2252 */ 2253 int 2254 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2255 { 2256 struct mlx5_txq_data *__rte_restrict txq = tx_queue; 2257 uint16_t used; 2258 2259 mlx5_tx_handle_completion(txq, 0); 2260 used = txq->elts_head - txq->elts_tail; 2261 if (offset < used) 2262 return RTE_ETH_TX_DESC_FULL; 2263 return RTE_ETH_TX_DESC_DONE; 2264 } 2265 2266 /** 2267 * Build the Control Segment with specified opcode: 2268 * - MLX5_OPCODE_SEND 2269 * - MLX5_OPCODE_ENHANCED_MPSW 2270 * - MLX5_OPCODE_TSO 2271 * 2272 * @param txq 2273 * Pointer to TX queue structure. 2274 * @param loc 2275 * Pointer to burst routine local context. 2276 * @param wqe 2277 * Pointer to WQE to fill with built Control Segment. 2278 * @param ds 2279 * Supposed length of WQE in segments. 2280 * @param opcode 2281 * SQ WQE opcode to put into Control Segment. 2282 * @param olx 2283 * Configured Tx offloads mask. It is fully defined at 2284 * compile time and may be used for optimization. 2285 */ 2286 static __rte_always_inline void 2287 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 2288 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2289 struct mlx5_wqe *__rte_restrict wqe, 2290 unsigned int ds, 2291 unsigned int opcode, 2292 unsigned int olx __rte_unused) 2293 { 2294 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 2295 2296 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2297 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2298 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2299 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2300 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2301 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2302 MLX5_COMP_MODE_OFFSET); 2303 cs->misc = RTE_BE32(0); 2304 } 2305 2306 /** 2307 * Build the Synchronize Queue Segment with specified completion index. 2308 * 2309 * @param txq 2310 * Pointer to TX queue structure. 2311 * @param loc 2312 * Pointer to burst routine local context. 2313 * @param wqe 2314 * Pointer to WQE to fill with built Control Segment. 2315 * @param wci 2316 * Completion index in Clock Queue to wait. 2317 * @param olx 2318 * Configured Tx offloads mask. It is fully defined at 2319 * compile time and may be used for optimization. 2320 */ 2321 static __rte_always_inline void 2322 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 2323 struct mlx5_txq_local *restrict loc __rte_unused, 2324 struct mlx5_wqe *restrict wqe, 2325 unsigned int wci, 2326 unsigned int olx __rte_unused) 2327 { 2328 struct mlx5_wqe_qseg *qs; 2329 2330 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 2331 qs->max_index = rte_cpu_to_be_32(wci); 2332 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq->id); 2333 qs->reserved0 = RTE_BE32(0); 2334 qs->reserved1 = RTE_BE32(0); 2335 } 2336 2337 /** 2338 * Build the Ethernet Segment without inlined data. 2339 * Supports Software Parser, Checksums and VLAN 2340 * insertion Tx offload features. 2341 * 2342 * @param txq 2343 * Pointer to TX queue structure. 2344 * @param loc 2345 * Pointer to burst routine local context. 2346 * @param wqe 2347 * Pointer to WQE to fill with built Ethernet Segment. 2348 * @param olx 2349 * Configured Tx offloads mask. It is fully defined at 2350 * compile time and may be used for optimization. 2351 */ 2352 static __rte_always_inline void 2353 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2354 struct mlx5_txq_local *__rte_restrict loc, 2355 struct mlx5_wqe *__rte_restrict wqe, 2356 unsigned int olx) 2357 { 2358 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2359 uint32_t csum; 2360 2361 /* 2362 * Calculate and set check sum flags first, dword field 2363 * in segment may be shared with Software Parser flags. 2364 */ 2365 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2366 es->flags = rte_cpu_to_le_32(csum); 2367 /* 2368 * Calculate and set Software Parser offsets and flags. 2369 * These flags a set for custom UDP and IP tunnel packets. 2370 */ 2371 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2372 /* Fill metadata field if needed. */ 2373 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2374 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2375 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2376 /* Engage VLAN tag insertion feature if requested. */ 2377 if (MLX5_TXOFF_CONFIG(VLAN) && 2378 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2379 /* 2380 * We should get here only if device support 2381 * this feature correctly. 2382 */ 2383 MLX5_ASSERT(txq->vlan_en); 2384 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2385 loc->mbuf->vlan_tci); 2386 } else { 2387 es->inline_hdr = RTE_BE32(0); 2388 } 2389 } 2390 2391 /** 2392 * Build the Ethernet Segment with minimal inlined data 2393 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2394 * used to fill the gap in single WQEBB WQEs. 2395 * Supports Software Parser, Checksums and VLAN 2396 * insertion Tx offload features. 2397 * 2398 * @param txq 2399 * Pointer to TX queue structure. 2400 * @param loc 2401 * Pointer to burst routine local context. 2402 * @param wqe 2403 * Pointer to WQE to fill with built Ethernet Segment. 2404 * @param vlan 2405 * Length of VLAN tag insertion if any. 2406 * @param olx 2407 * Configured Tx offloads mask. It is fully defined at 2408 * compile time and may be used for optimization. 2409 */ 2410 static __rte_always_inline void 2411 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2412 struct mlx5_txq_local *__rte_restrict loc, 2413 struct mlx5_wqe *__rte_restrict wqe, 2414 unsigned int vlan, 2415 unsigned int olx) 2416 { 2417 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2418 uint32_t csum; 2419 uint8_t *psrc, *pdst; 2420 2421 /* 2422 * Calculate and set check sum flags first, dword field 2423 * in segment may be shared with Software Parser flags. 2424 */ 2425 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2426 es->flags = rte_cpu_to_le_32(csum); 2427 /* 2428 * Calculate and set Software Parser offsets and flags. 2429 * These flags a set for custom UDP and IP tunnel packets. 2430 */ 2431 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2432 /* Fill metadata field if needed. */ 2433 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2434 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2435 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2436 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2437 (sizeof(uint16_t) + 2438 sizeof(rte_v128u32_t)), 2439 "invalid Ethernet Segment data size"); 2440 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2441 (sizeof(uint16_t) + 2442 sizeof(struct rte_vlan_hdr) + 2443 2 * RTE_ETHER_ADDR_LEN), 2444 "invalid Ethernet Segment data size"); 2445 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2446 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2447 es->inline_data = *(unaligned_uint16_t *)psrc; 2448 psrc += sizeof(uint16_t); 2449 pdst = (uint8_t *)(es + 1); 2450 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2451 /* Implement VLAN tag insertion as part inline data. */ 2452 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2453 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2454 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2455 /* Insert VLAN ethertype + VLAN tag. */ 2456 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2457 ((RTE_ETHER_TYPE_VLAN << 16) | 2458 loc->mbuf->vlan_tci); 2459 pdst += sizeof(struct rte_vlan_hdr); 2460 /* Copy the rest two bytes from packet data. */ 2461 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2462 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2463 } else { 2464 /* Fill the gap in the title WQEBB with inline data. */ 2465 rte_mov16(pdst, psrc); 2466 } 2467 } 2468 2469 /** 2470 * Build the Ethernet Segment with entire packet 2471 * data inlining. Checks the boundary of WQEBB and 2472 * ring buffer wrapping, supports Software Parser, 2473 * Checksums and VLAN insertion Tx offload features. 2474 * 2475 * @param txq 2476 * Pointer to TX queue structure. 2477 * @param loc 2478 * Pointer to burst routine local context. 2479 * @param wqe 2480 * Pointer to WQE to fill with built Ethernet Segment. 2481 * @param vlan 2482 * Length of VLAN tag insertion if any. 2483 * @param inlen 2484 * Length of data to inline (VLAN included, if any). 2485 * @param tso 2486 * TSO flag, set mss field from the packet. 2487 * @param olx 2488 * Configured Tx offloads mask. It is fully defined at 2489 * compile time and may be used for optimization. 2490 * 2491 * @return 2492 * Pointer to the next Data Segment (aligned and wrapped around). 2493 */ 2494 static __rte_always_inline struct mlx5_wqe_dseg * 2495 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 2496 struct mlx5_txq_local *__rte_restrict loc, 2497 struct mlx5_wqe *__rte_restrict wqe, 2498 unsigned int vlan, 2499 unsigned int inlen, 2500 unsigned int tso, 2501 unsigned int olx) 2502 { 2503 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2504 uint32_t csum; 2505 uint8_t *psrc, *pdst; 2506 unsigned int part; 2507 2508 /* 2509 * Calculate and set check sum flags first, dword field 2510 * in segment may be shared with Software Parser flags. 2511 */ 2512 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2513 if (tso) { 2514 csum <<= 24; 2515 csum |= loc->mbuf->tso_segsz; 2516 es->flags = rte_cpu_to_be_32(csum); 2517 } else { 2518 es->flags = rte_cpu_to_le_32(csum); 2519 } 2520 /* 2521 * Calculate and set Software Parser offsets and flags. 2522 * These flags a set for custom UDP and IP tunnel packets. 2523 */ 2524 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2525 /* Fill metadata field if needed. */ 2526 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2527 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2528 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2529 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2530 (sizeof(uint16_t) + 2531 sizeof(rte_v128u32_t)), 2532 "invalid Ethernet Segment data size"); 2533 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2534 (sizeof(uint16_t) + 2535 sizeof(struct rte_vlan_hdr) + 2536 2 * RTE_ETHER_ADDR_LEN), 2537 "invalid Ethernet Segment data size"); 2538 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2539 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2540 es->inline_data = *(unaligned_uint16_t *)psrc; 2541 psrc += sizeof(uint16_t); 2542 pdst = (uint8_t *)(es + 1); 2543 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2544 /* Implement VLAN tag insertion as part inline data. */ 2545 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2546 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2547 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2548 /* Insert VLAN ethertype + VLAN tag. */ 2549 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2550 ((RTE_ETHER_TYPE_VLAN << 16) | 2551 loc->mbuf->vlan_tci); 2552 pdst += sizeof(struct rte_vlan_hdr); 2553 /* Copy the rest two bytes from packet data. */ 2554 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2555 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2556 psrc += sizeof(uint16_t); 2557 } else { 2558 /* Fill the gap in the title WQEBB with inline data. */ 2559 rte_mov16(pdst, psrc); 2560 psrc += sizeof(rte_v128u32_t); 2561 } 2562 pdst = (uint8_t *)(es + 2); 2563 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2564 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2565 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2566 if (!inlen) { 2567 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2568 return (struct mlx5_wqe_dseg *)pdst; 2569 } 2570 /* 2571 * The WQEBB space availability is checked by caller. 2572 * Here we should be aware of WQE ring buffer wraparound only. 2573 */ 2574 part = (uint8_t *)txq->wqes_end - pdst; 2575 part = RTE_MIN(part, inlen); 2576 do { 2577 rte_memcpy(pdst, psrc, part); 2578 inlen -= part; 2579 if (likely(!inlen)) { 2580 /* 2581 * If return value is not used by the caller 2582 * the code below will be optimized out. 2583 */ 2584 pdst += part; 2585 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2586 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2587 pdst = (uint8_t *)txq->wqes; 2588 return (struct mlx5_wqe_dseg *)pdst; 2589 } 2590 pdst = (uint8_t *)txq->wqes; 2591 psrc += part; 2592 part = inlen; 2593 } while (true); 2594 } 2595 2596 /** 2597 * Copy data from chain of mbuf to the specified linear buffer. 2598 * Checksums and VLAN insertion Tx offload features. If data 2599 * from some mbuf copied completely this mbuf is freed. Local 2600 * structure is used to keep the byte stream state. 2601 * 2602 * @param pdst 2603 * Pointer to the destination linear buffer. 2604 * @param loc 2605 * Pointer to burst routine local context. 2606 * @param len 2607 * Length of data to be copied. 2608 * @param must 2609 * Length of data to be copied ignoring no inline hint. 2610 * @param olx 2611 * Configured Tx offloads mask. It is fully defined at 2612 * compile time and may be used for optimization. 2613 * 2614 * @return 2615 * Number of actual copied data bytes. This is always greater than or 2616 * equal to must parameter and might be lesser than len in no inline 2617 * hint flag is encountered. 2618 */ 2619 static __rte_always_inline unsigned int 2620 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2621 struct mlx5_txq_local *__rte_restrict loc, 2622 unsigned int len, 2623 unsigned int must, 2624 unsigned int olx __rte_unused) 2625 { 2626 struct rte_mbuf *mbuf; 2627 unsigned int part, dlen, copy = 0; 2628 uint8_t *psrc; 2629 2630 MLX5_ASSERT(len); 2631 MLX5_ASSERT(must <= len); 2632 do { 2633 /* Allow zero length packets, must check first. */ 2634 dlen = rte_pktmbuf_data_len(loc->mbuf); 2635 if (dlen <= loc->mbuf_off) { 2636 /* Exhausted packet, just free. */ 2637 mbuf = loc->mbuf; 2638 loc->mbuf = mbuf->next; 2639 rte_pktmbuf_free_seg(mbuf); 2640 loc->mbuf_off = 0; 2641 MLX5_ASSERT(loc->mbuf_nseg > 1); 2642 MLX5_ASSERT(loc->mbuf); 2643 --loc->mbuf_nseg; 2644 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2645 unsigned int diff; 2646 2647 if (copy >= must) { 2648 /* 2649 * We already copied the minimal 2650 * requested amount of data. 2651 */ 2652 return copy; 2653 } 2654 diff = must - copy; 2655 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 2656 /* 2657 * Copy only the minimal required 2658 * part of the data buffer. 2659 */ 2660 len = diff; 2661 } 2662 } 2663 continue; 2664 } 2665 dlen -= loc->mbuf_off; 2666 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2667 loc->mbuf_off); 2668 part = RTE_MIN(len, dlen); 2669 rte_memcpy(pdst, psrc, part); 2670 copy += part; 2671 loc->mbuf_off += part; 2672 len -= part; 2673 if (!len) { 2674 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2675 loc->mbuf_off = 0; 2676 /* Exhausted packet, just free. */ 2677 mbuf = loc->mbuf; 2678 loc->mbuf = mbuf->next; 2679 rte_pktmbuf_free_seg(mbuf); 2680 loc->mbuf_off = 0; 2681 MLX5_ASSERT(loc->mbuf_nseg >= 1); 2682 --loc->mbuf_nseg; 2683 } 2684 return copy; 2685 } 2686 pdst += part; 2687 } while (true); 2688 } 2689 2690 /** 2691 * Build the Ethernet Segment with inlined data from 2692 * multi-segment packet. Checks the boundary of WQEBB 2693 * and ring buffer wrapping, supports Software Parser, 2694 * Checksums and VLAN insertion Tx offload features. 2695 * 2696 * @param txq 2697 * Pointer to TX queue structure. 2698 * @param loc 2699 * Pointer to burst routine local context. 2700 * @param wqe 2701 * Pointer to WQE to fill with built Ethernet Segment. 2702 * @param vlan 2703 * Length of VLAN tag insertion if any. 2704 * @param inlen 2705 * Length of data to inline (VLAN included, if any). 2706 * @param tso 2707 * TSO flag, set mss field from the packet. 2708 * @param olx 2709 * Configured Tx offloads mask. It is fully defined at 2710 * compile time and may be used for optimization. 2711 * 2712 * @return 2713 * Pointer to the next Data Segment (aligned and 2714 * possible NOT wrapped around - caller should do 2715 * wrapping check on its own). 2716 */ 2717 static __rte_always_inline struct mlx5_wqe_dseg * 2718 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 2719 struct mlx5_txq_local *__rte_restrict loc, 2720 struct mlx5_wqe *__rte_restrict wqe, 2721 unsigned int vlan, 2722 unsigned int inlen, 2723 unsigned int tso, 2724 unsigned int olx) 2725 { 2726 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2727 uint32_t csum; 2728 uint8_t *pdst; 2729 unsigned int part, tlen = 0; 2730 2731 /* 2732 * Calculate and set check sum flags first, uint32_t field 2733 * in segment may be shared with Software Parser flags. 2734 */ 2735 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2736 if (tso) { 2737 csum <<= 24; 2738 csum |= loc->mbuf->tso_segsz; 2739 es->flags = rte_cpu_to_be_32(csum); 2740 } else { 2741 es->flags = rte_cpu_to_le_32(csum); 2742 } 2743 /* 2744 * Calculate and set Software Parser offsets and flags. 2745 * These flags a set for custom UDP and IP tunnel packets. 2746 */ 2747 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2748 /* Fill metadata field if needed. */ 2749 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2750 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2751 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2752 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2753 (sizeof(uint16_t) + 2754 sizeof(rte_v128u32_t)), 2755 "invalid Ethernet Segment data size"); 2756 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2757 (sizeof(uint16_t) + 2758 sizeof(struct rte_vlan_hdr) + 2759 2 * RTE_ETHER_ADDR_LEN), 2760 "invalid Ethernet Segment data size"); 2761 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2762 pdst = (uint8_t *)&es->inline_data; 2763 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2764 /* Implement VLAN tag insertion as part inline data. */ 2765 mlx5_tx_mseg_memcpy(pdst, loc, 2766 2 * RTE_ETHER_ADDR_LEN, 2767 2 * RTE_ETHER_ADDR_LEN, olx); 2768 pdst += 2 * RTE_ETHER_ADDR_LEN; 2769 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2770 ((RTE_ETHER_TYPE_VLAN << 16) | 2771 loc->mbuf->vlan_tci); 2772 pdst += sizeof(struct rte_vlan_hdr); 2773 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2774 } 2775 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2776 /* 2777 * The WQEBB space availability is checked by caller. 2778 * Here we should be aware of WQE ring buffer wraparound only. 2779 */ 2780 part = (uint8_t *)txq->wqes_end - pdst; 2781 part = RTE_MIN(part, inlen - tlen); 2782 MLX5_ASSERT(part); 2783 do { 2784 unsigned int copy; 2785 2786 /* 2787 * Copying may be interrupted inside the routine 2788 * if run into no inline hint flag. 2789 */ 2790 copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); 2791 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 2792 tlen += copy; 2793 if (likely(inlen <= tlen) || copy < part) { 2794 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 2795 pdst += copy; 2796 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2797 return (struct mlx5_wqe_dseg *)pdst; 2798 } 2799 pdst = (uint8_t *)txq->wqes; 2800 part = inlen - tlen; 2801 } while (true); 2802 } 2803 2804 /** 2805 * Build the Data Segment of pointer type. 2806 * 2807 * @param txq 2808 * Pointer to TX queue structure. 2809 * @param loc 2810 * Pointer to burst routine local context. 2811 * @param dseg 2812 * Pointer to WQE to fill with built Data Segment. 2813 * @param buf 2814 * Data buffer to point. 2815 * @param len 2816 * Data buffer length. 2817 * @param olx 2818 * Configured Tx offloads mask. It is fully defined at 2819 * compile time and may be used for optimization. 2820 */ 2821 static __rte_always_inline void 2822 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 2823 struct mlx5_txq_local *__rte_restrict loc, 2824 struct mlx5_wqe_dseg *__rte_restrict dseg, 2825 uint8_t *buf, 2826 unsigned int len, 2827 unsigned int olx __rte_unused) 2828 2829 { 2830 MLX5_ASSERT(len); 2831 dseg->bcount = rte_cpu_to_be_32(len); 2832 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2833 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2834 } 2835 2836 /** 2837 * Build the Data Segment of pointer type or inline 2838 * if data length is less than buffer in minimal 2839 * Data Segment size. 2840 * 2841 * @param txq 2842 * Pointer to TX queue structure. 2843 * @param loc 2844 * Pointer to burst routine local context. 2845 * @param dseg 2846 * Pointer to WQE to fill with built Data Segment. 2847 * @param buf 2848 * Data buffer to point. 2849 * @param len 2850 * Data buffer length. 2851 * @param olx 2852 * Configured Tx offloads mask. It is fully defined at 2853 * compile time and may be used for optimization. 2854 */ 2855 static __rte_always_inline void 2856 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 2857 struct mlx5_txq_local *__rte_restrict loc, 2858 struct mlx5_wqe_dseg *__rte_restrict dseg, 2859 uint8_t *buf, 2860 unsigned int len, 2861 unsigned int olx __rte_unused) 2862 2863 { 2864 uintptr_t dst, src; 2865 2866 MLX5_ASSERT(len); 2867 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2868 dseg->bcount = rte_cpu_to_be_32(len); 2869 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2870 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2871 2872 return; 2873 } 2874 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2875 /* Unrolled implementation of generic rte_memcpy. */ 2876 dst = (uintptr_t)&dseg->inline_data[0]; 2877 src = (uintptr_t)buf; 2878 if (len & 0x08) { 2879 #ifdef RTE_ARCH_STRICT_ALIGN 2880 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2881 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2882 dst += sizeof(uint32_t); 2883 src += sizeof(uint32_t); 2884 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2885 dst += sizeof(uint32_t); 2886 src += sizeof(uint32_t); 2887 #else 2888 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2889 dst += sizeof(uint64_t); 2890 src += sizeof(uint64_t); 2891 #endif 2892 } 2893 if (len & 0x04) { 2894 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2895 dst += sizeof(uint32_t); 2896 src += sizeof(uint32_t); 2897 } 2898 if (len & 0x02) { 2899 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2900 dst += sizeof(uint16_t); 2901 src += sizeof(uint16_t); 2902 } 2903 if (len & 0x01) 2904 *(uint8_t *)dst = *(uint8_t *)src; 2905 } 2906 2907 /** 2908 * Build the Data Segment of inlined data from single 2909 * segment packet, no VLAN insertion. 2910 * 2911 * @param txq 2912 * Pointer to TX queue structure. 2913 * @param loc 2914 * Pointer to burst routine local context. 2915 * @param dseg 2916 * Pointer to WQE to fill with built Data Segment. 2917 * @param buf 2918 * Data buffer to point. 2919 * @param len 2920 * Data buffer length. 2921 * @param olx 2922 * Configured Tx offloads mask. It is fully defined at 2923 * compile time and may be used for optimization. 2924 * 2925 * @return 2926 * Pointer to the next Data Segment after inlined data. 2927 * Ring buffer wraparound check is needed. We do not 2928 * do it here because it may not be needed for the 2929 * last packet in the eMPW session. 2930 */ 2931 static __rte_always_inline struct mlx5_wqe_dseg * 2932 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 2933 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2934 struct mlx5_wqe_dseg *__rte_restrict dseg, 2935 uint8_t *buf, 2936 unsigned int len, 2937 unsigned int olx __rte_unused) 2938 { 2939 unsigned int part; 2940 uint8_t *pdst; 2941 2942 if (!MLX5_TXOFF_CONFIG(MPW)) { 2943 /* Store the descriptor byte counter for eMPW sessions. */ 2944 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2945 pdst = &dseg->inline_data[0]; 2946 } else { 2947 /* The entire legacy MPW session counter is stored on close. */ 2948 pdst = (uint8_t *)dseg; 2949 } 2950 /* 2951 * The WQEBB space availability is checked by caller. 2952 * Here we should be aware of WQE ring buffer wraparound only. 2953 */ 2954 part = (uint8_t *)txq->wqes_end - pdst; 2955 part = RTE_MIN(part, len); 2956 do { 2957 rte_memcpy(pdst, buf, part); 2958 len -= part; 2959 if (likely(!len)) { 2960 pdst += part; 2961 if (!MLX5_TXOFF_CONFIG(MPW)) 2962 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2963 /* Note: no final wraparound check here. */ 2964 return (struct mlx5_wqe_dseg *)pdst; 2965 } 2966 pdst = (uint8_t *)txq->wqes; 2967 buf += part; 2968 part = len; 2969 } while (true); 2970 } 2971 2972 /** 2973 * Build the Data Segment of inlined data from single 2974 * segment packet with VLAN insertion. 2975 * 2976 * @param txq 2977 * Pointer to TX queue structure. 2978 * @param loc 2979 * Pointer to burst routine local context. 2980 * @param dseg 2981 * Pointer to the dseg fill with built Data Segment. 2982 * @param buf 2983 * Data buffer to point. 2984 * @param len 2985 * Data buffer length. 2986 * @param olx 2987 * Configured Tx offloads mask. It is fully defined at 2988 * compile time and may be used for optimization. 2989 * 2990 * @return 2991 * Pointer to the next Data Segment after inlined data. 2992 * Ring buffer wraparound check is needed. 2993 */ 2994 static __rte_always_inline struct mlx5_wqe_dseg * 2995 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 2996 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2997 struct mlx5_wqe_dseg *__rte_restrict dseg, 2998 uint8_t *buf, 2999 unsigned int len, 3000 unsigned int olx __rte_unused) 3001 3002 { 3003 unsigned int part; 3004 uint8_t *pdst; 3005 3006 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 3007 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 3008 (2 * RTE_ETHER_ADDR_LEN), 3009 "invalid Data Segment data size"); 3010 if (!MLX5_TXOFF_CONFIG(MPW)) { 3011 /* Store the descriptor byte counter for eMPW sessions. */ 3012 dseg->bcount = rte_cpu_to_be_32 3013 ((len + sizeof(struct rte_vlan_hdr)) | 3014 MLX5_ETH_WQE_DATA_INLINE); 3015 pdst = &dseg->inline_data[0]; 3016 } else { 3017 /* The entire legacy MPW session counter is stored on close. */ 3018 pdst = (uint8_t *)dseg; 3019 } 3020 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 3021 buf += MLX5_DSEG_MIN_INLINE_SIZE; 3022 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 3023 len -= MLX5_DSEG_MIN_INLINE_SIZE; 3024 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 3025 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 3026 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 3027 pdst = (uint8_t *)txq->wqes; 3028 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 3029 loc->mbuf->vlan_tci); 3030 pdst += sizeof(struct rte_vlan_hdr); 3031 /* 3032 * The WQEBB space availability is checked by caller. 3033 * Here we should be aware of WQE ring buffer wraparound only. 3034 */ 3035 part = (uint8_t *)txq->wqes_end - pdst; 3036 part = RTE_MIN(part, len); 3037 do { 3038 rte_memcpy(pdst, buf, part); 3039 len -= part; 3040 if (likely(!len)) { 3041 pdst += part; 3042 if (!MLX5_TXOFF_CONFIG(MPW)) 3043 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 3044 /* Note: no final wraparound check here. */ 3045 return (struct mlx5_wqe_dseg *)pdst; 3046 } 3047 pdst = (uint8_t *)txq->wqes; 3048 buf += part; 3049 part = len; 3050 } while (true); 3051 } 3052 3053 /** 3054 * Build the Ethernet Segment with optionally inlined data with 3055 * VLAN insertion and following Data Segments (if any) from 3056 * multi-segment packet. Used by ordinary send and TSO. 3057 * 3058 * @param txq 3059 * Pointer to TX queue structure. 3060 * @param loc 3061 * Pointer to burst routine local context. 3062 * @param wqe 3063 * Pointer to WQE to fill with built Ethernet/Data Segments. 3064 * @param vlan 3065 * Length of VLAN header to insert, 0 means no VLAN insertion. 3066 * @param inlen 3067 * Data length to inline. For TSO this parameter specifies 3068 * exact value, for ordinary send routine can be aligned by 3069 * caller to provide better WQE space saving and data buffer 3070 * start address alignment. This length includes VLAN header 3071 * being inserted. 3072 * @param tso 3073 * Zero means ordinary send, inlined data can be extended, 3074 * otherwise this is TSO, inlined data length is fixed. 3075 * @param olx 3076 * Configured Tx offloads mask. It is fully defined at 3077 * compile time and may be used for optimization. 3078 * 3079 * @return 3080 * Actual size of built WQE in segments. 3081 */ 3082 static __rte_always_inline unsigned int 3083 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 3084 struct mlx5_txq_local *__rte_restrict loc, 3085 struct mlx5_wqe *__rte_restrict wqe, 3086 unsigned int vlan, 3087 unsigned int inlen, 3088 unsigned int tso, 3089 unsigned int olx __rte_unused) 3090 { 3091 struct mlx5_wqe_dseg *__rte_restrict dseg; 3092 unsigned int ds; 3093 3094 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 3095 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 3096 loc->mbuf_off = 0; 3097 3098 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 3099 if (!loc->mbuf_nseg) 3100 goto dseg_done; 3101 /* 3102 * There are still some mbuf remaining, not inlined. 3103 * The first mbuf may be partially inlined and we 3104 * must process the possible non-zero data offset. 3105 */ 3106 if (loc->mbuf_off) { 3107 unsigned int dlen; 3108 uint8_t *dptr; 3109 3110 /* 3111 * Exhausted packets must be dropped before. 3112 * Non-zero offset means there are some data 3113 * remained in the packet. 3114 */ 3115 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 3116 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 3117 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 3118 loc->mbuf_off); 3119 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 3120 /* 3121 * Build the pointer/minimal data Data Segment. 3122 * Do ring buffer wrapping check in advance. 3123 */ 3124 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3125 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3126 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 3127 /* Store the mbuf to be freed on completion. */ 3128 MLX5_ASSERT(loc->elts_free); 3129 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3130 --loc->elts_free; 3131 ++dseg; 3132 if (--loc->mbuf_nseg == 0) 3133 goto dseg_done; 3134 loc->mbuf = loc->mbuf->next; 3135 loc->mbuf_off = 0; 3136 } 3137 do { 3138 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3139 struct rte_mbuf *mbuf; 3140 3141 /* Zero length segment found, just skip. */ 3142 mbuf = loc->mbuf; 3143 loc->mbuf = loc->mbuf->next; 3144 rte_pktmbuf_free_seg(mbuf); 3145 if (--loc->mbuf_nseg == 0) 3146 break; 3147 } else { 3148 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3149 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3150 mlx5_tx_dseg_iptr 3151 (txq, loc, dseg, 3152 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3153 rte_pktmbuf_data_len(loc->mbuf), olx); 3154 MLX5_ASSERT(loc->elts_free); 3155 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3156 --loc->elts_free; 3157 ++dseg; 3158 if (--loc->mbuf_nseg == 0) 3159 break; 3160 loc->mbuf = loc->mbuf->next; 3161 } 3162 } while (true); 3163 3164 dseg_done: 3165 /* Calculate actual segments used from the dseg pointer. */ 3166 if ((uintptr_t)wqe < (uintptr_t)dseg) 3167 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3168 else 3169 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3170 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3171 return ds; 3172 } 3173 3174 /** 3175 * The routine checks timestamp flag in the current packet, 3176 * and push WAIT WQE into the queue if scheduling is required. 3177 * 3178 * @param txq 3179 * Pointer to TX queue structure. 3180 * @param loc 3181 * Pointer to burst routine local context. 3182 * @param olx 3183 * Configured Tx offloads mask. It is fully defined at 3184 * compile time and may be used for optimization. 3185 * 3186 * @return 3187 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3188 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 3189 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 3190 * Local context variables partially updated. 3191 */ 3192 static __rte_always_inline enum mlx5_txcmp_code 3193 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 3194 struct mlx5_txq_local *restrict loc, 3195 unsigned int olx) 3196 { 3197 if (MLX5_TXOFF_CONFIG(TXPP) && 3198 loc->mbuf->ol_flags & txq->ts_mask) { 3199 struct mlx5_wqe *wqe; 3200 uint64_t ts; 3201 int32_t wci; 3202 3203 /* 3204 * Estimate the required space quickly and roughly. 3205 * We would like to ensure the packet can be pushed 3206 * to the queue and we won't get the orphan WAIT WQE. 3207 */ 3208 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 3209 loc->elts_free < NB_SEGS(loc->mbuf)) 3210 return MLX5_TXCMP_CODE_EXIT; 3211 /* Convert the timestamp into completion to wait. */ 3212 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 3213 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 3214 if (unlikely(wci < 0)) 3215 return MLX5_TXCMP_CODE_SINGLE; 3216 /* Build the WAIT WQE with specified completion. */ 3217 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3218 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 3219 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 3220 ++txq->wqe_ci; 3221 --loc->wqe_free; 3222 return MLX5_TXCMP_CODE_MULTI; 3223 } 3224 return MLX5_TXCMP_CODE_SINGLE; 3225 } 3226 3227 /** 3228 * Tx one packet function for multi-segment TSO. Supports all 3229 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3230 * sends one packet per WQE. 3231 * 3232 * This routine is responsible for storing processed mbuf 3233 * into elts ring buffer and update elts_head. 3234 * 3235 * @param txq 3236 * Pointer to TX queue structure. 3237 * @param loc 3238 * Pointer to burst routine local context. 3239 * @param olx 3240 * Configured Tx offloads mask. It is fully defined at 3241 * compile time and may be used for optimization. 3242 * 3243 * @return 3244 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3245 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3246 * Local context variables partially updated. 3247 */ 3248 static __rte_always_inline enum mlx5_txcmp_code 3249 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 3250 struct mlx5_txq_local *__rte_restrict loc, 3251 unsigned int olx) 3252 { 3253 struct mlx5_wqe *__rte_restrict wqe; 3254 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3255 3256 if (MLX5_TXOFF_CONFIG(TXPP)) { 3257 enum mlx5_txcmp_code wret; 3258 3259 /* Generate WAIT for scheduling if requested. */ 3260 wret = mlx5_tx_schedule_send(txq, loc, olx); 3261 if (wret == MLX5_TXCMP_CODE_EXIT) 3262 return MLX5_TXCMP_CODE_EXIT; 3263 if (wret == MLX5_TXCMP_CODE_ERROR) 3264 return MLX5_TXCMP_CODE_ERROR; 3265 } 3266 /* 3267 * Calculate data length to be inlined to estimate 3268 * the required space in WQE ring buffer. 3269 */ 3270 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3271 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3272 vlan = sizeof(struct rte_vlan_hdr); 3273 inlen = loc->mbuf->l2_len + vlan + 3274 loc->mbuf->l3_len + loc->mbuf->l4_len; 3275 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3276 return MLX5_TXCMP_CODE_ERROR; 3277 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3278 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3279 /* Packet must contain all TSO headers. */ 3280 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3281 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3282 inlen > (dlen + vlan))) 3283 return MLX5_TXCMP_CODE_ERROR; 3284 MLX5_ASSERT(inlen >= txq->inlen_mode); 3285 /* 3286 * Check whether there are enough free WQEBBs: 3287 * - Control Segment 3288 * - Ethernet Segment 3289 * - First Segment of inlined Ethernet data 3290 * - ... data continued ... 3291 * - Data Segments of pointer/min inline type 3292 */ 3293 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3294 MLX5_ESEG_MIN_INLINE_SIZE + 3295 MLX5_WSEG_SIZE + 3296 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3297 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3298 return MLX5_TXCMP_CODE_EXIT; 3299 /* Check for maximal WQE size. */ 3300 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3301 return MLX5_TXCMP_CODE_ERROR; 3302 #ifdef MLX5_PMD_SOFT_COUNTERS 3303 /* Update sent data bytes/packets counters. */ 3304 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3305 loc->mbuf->tso_segsz; 3306 /* 3307 * One will be added for mbuf itself 3308 * at the end of the mlx5_tx_burst from 3309 * loc->pkts_sent field. 3310 */ 3311 --ntcp; 3312 txq->stats.opackets += ntcp; 3313 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3314 #endif 3315 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3316 loc->wqe_last = wqe; 3317 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3318 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3319 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3320 txq->wqe_ci += (ds + 3) / 4; 3321 loc->wqe_free -= (ds + 3) / 4; 3322 return MLX5_TXCMP_CODE_MULTI; 3323 } 3324 3325 /** 3326 * Tx one packet function for multi-segment SEND. Supports all 3327 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3328 * sends one packet per WQE, without any data inlining in 3329 * Ethernet Segment. 3330 * 3331 * This routine is responsible for storing processed mbuf 3332 * into elts ring buffer and update elts_head. 3333 * 3334 * @param txq 3335 * Pointer to TX queue structure. 3336 * @param loc 3337 * Pointer to burst routine local context. 3338 * @param olx 3339 * Configured Tx offloads mask. It is fully defined at 3340 * compile time and may be used for optimization. 3341 * 3342 * @return 3343 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3344 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3345 * Local context variables partially updated. 3346 */ 3347 static __rte_always_inline enum mlx5_txcmp_code 3348 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 3349 struct mlx5_txq_local *__rte_restrict loc, 3350 unsigned int olx) 3351 { 3352 struct mlx5_wqe_dseg *__rte_restrict dseg; 3353 struct mlx5_wqe *__rte_restrict wqe; 3354 unsigned int ds, nseg; 3355 3356 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3357 if (MLX5_TXOFF_CONFIG(TXPP)) { 3358 enum mlx5_txcmp_code wret; 3359 3360 /* Generate WAIT for scheduling if requested. */ 3361 wret = mlx5_tx_schedule_send(txq, loc, olx); 3362 if (wret == MLX5_TXCMP_CODE_EXIT) 3363 return MLX5_TXCMP_CODE_EXIT; 3364 if (wret == MLX5_TXCMP_CODE_ERROR) 3365 return MLX5_TXCMP_CODE_ERROR; 3366 } 3367 /* 3368 * No inline at all, it means the CPU cycles saving 3369 * is prioritized at configuration, we should not 3370 * copy any packet data to WQE. 3371 */ 3372 nseg = NB_SEGS(loc->mbuf); 3373 ds = 2 + nseg; 3374 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3375 return MLX5_TXCMP_CODE_EXIT; 3376 /* Check for maximal WQE size. */ 3377 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3378 return MLX5_TXCMP_CODE_ERROR; 3379 /* 3380 * Some Tx offloads may cause an error if 3381 * packet is not long enough, check against 3382 * assumed minimal length. 3383 */ 3384 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3385 return MLX5_TXCMP_CODE_ERROR; 3386 #ifdef MLX5_PMD_SOFT_COUNTERS 3387 /* Update sent data bytes counter. */ 3388 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3389 if (MLX5_TXOFF_CONFIG(VLAN) && 3390 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3391 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3392 #endif 3393 /* 3394 * SEND WQE, one WQEBB: 3395 * - Control Segment, SEND opcode 3396 * - Ethernet Segment, optional VLAN, no inline 3397 * - Data Segments, pointer only type 3398 */ 3399 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3400 loc->wqe_last = wqe; 3401 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3402 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3403 dseg = &wqe->dseg[0]; 3404 do { 3405 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3406 struct rte_mbuf *mbuf; 3407 3408 /* 3409 * Zero length segment found, have to 3410 * correct total size of WQE in segments. 3411 * It is supposed to be rare occasion, so 3412 * in normal case (no zero length segments) 3413 * we avoid extra writing to the Control 3414 * Segment. 3415 */ 3416 --ds; 3417 wqe->cseg.sq_ds -= RTE_BE32(1); 3418 mbuf = loc->mbuf; 3419 loc->mbuf = mbuf->next; 3420 rte_pktmbuf_free_seg(mbuf); 3421 if (--nseg == 0) 3422 break; 3423 } else { 3424 mlx5_tx_dseg_ptr 3425 (txq, loc, dseg, 3426 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3427 rte_pktmbuf_data_len(loc->mbuf), olx); 3428 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3429 --loc->elts_free; 3430 if (--nseg == 0) 3431 break; 3432 ++dseg; 3433 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3434 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3435 loc->mbuf = loc->mbuf->next; 3436 } 3437 } while (true); 3438 txq->wqe_ci += (ds + 3) / 4; 3439 loc->wqe_free -= (ds + 3) / 4; 3440 return MLX5_TXCMP_CODE_MULTI; 3441 } 3442 3443 /** 3444 * Tx one packet function for multi-segment SEND. Supports all 3445 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3446 * sends one packet per WQE, with data inlining in 3447 * Ethernet Segment and minimal Data Segments. 3448 * 3449 * This routine is responsible for storing processed mbuf 3450 * into elts ring buffer and update elts_head. 3451 * 3452 * @param txq 3453 * Pointer to TX queue structure. 3454 * @param loc 3455 * Pointer to burst routine local context. 3456 * @param olx 3457 * Configured Tx offloads mask. It is fully defined at 3458 * compile time and may be used for optimization. 3459 * 3460 * @return 3461 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3462 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3463 * Local context variables partially updated. 3464 */ 3465 static __rte_always_inline enum mlx5_txcmp_code 3466 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 3467 struct mlx5_txq_local *__rte_restrict loc, 3468 unsigned int olx) 3469 { 3470 struct mlx5_wqe *__rte_restrict wqe; 3471 unsigned int ds, inlen, dlen, vlan = 0; 3472 3473 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3474 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3475 if (MLX5_TXOFF_CONFIG(TXPP)) { 3476 enum mlx5_txcmp_code wret; 3477 3478 /* Generate WAIT for scheduling if requested. */ 3479 wret = mlx5_tx_schedule_send(txq, loc, olx); 3480 if (wret == MLX5_TXCMP_CODE_EXIT) 3481 return MLX5_TXCMP_CODE_EXIT; 3482 if (wret == MLX5_TXCMP_CODE_ERROR) 3483 return MLX5_TXCMP_CODE_ERROR; 3484 } 3485 /* 3486 * First calculate data length to be inlined 3487 * to estimate the required space for WQE. 3488 */ 3489 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3490 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3491 vlan = sizeof(struct rte_vlan_hdr); 3492 inlen = dlen + vlan; 3493 /* Check against minimal length. */ 3494 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3495 return MLX5_TXCMP_CODE_ERROR; 3496 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3497 if (inlen > txq->inlen_send || 3498 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 3499 struct rte_mbuf *mbuf; 3500 unsigned int nxlen; 3501 uintptr_t start; 3502 3503 /* 3504 * Packet length exceeds the allowed inline 3505 * data length, check whether the minimal 3506 * inlining is required. 3507 */ 3508 if (txq->inlen_mode) { 3509 MLX5_ASSERT(txq->inlen_mode >= 3510 MLX5_ESEG_MIN_INLINE_SIZE); 3511 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 3512 inlen = txq->inlen_mode; 3513 } else { 3514 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 3515 !vlan || txq->vlan_en) { 3516 /* 3517 * VLAN insertion will be done inside by HW. 3518 * It is not utmost effective - VLAN flag is 3519 * checked twice, but we should proceed the 3520 * inlining length correctly and take into 3521 * account the VLAN header being inserted. 3522 */ 3523 return mlx5_tx_packet_multi_send 3524 (txq, loc, olx); 3525 } 3526 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3527 } 3528 /* 3529 * Now we know the minimal amount of data is requested 3530 * to inline. Check whether we should inline the buffers 3531 * from the chain beginning to eliminate some mbufs. 3532 */ 3533 mbuf = loc->mbuf; 3534 nxlen = rte_pktmbuf_data_len(mbuf); 3535 if (unlikely(nxlen <= txq->inlen_send)) { 3536 /* We can inline first mbuf at least. */ 3537 if (nxlen < inlen) { 3538 unsigned int smlen; 3539 3540 /* Scan mbufs till inlen filled. */ 3541 do { 3542 smlen = nxlen; 3543 mbuf = NEXT(mbuf); 3544 MLX5_ASSERT(mbuf); 3545 nxlen = rte_pktmbuf_data_len(mbuf); 3546 nxlen += smlen; 3547 } while (unlikely(nxlen < inlen)); 3548 if (unlikely(nxlen > txq->inlen_send)) { 3549 /* We cannot inline entire mbuf. */ 3550 smlen = inlen - smlen; 3551 start = rte_pktmbuf_mtod_offset 3552 (mbuf, uintptr_t, smlen); 3553 goto do_align; 3554 } 3555 } 3556 do { 3557 inlen = nxlen; 3558 mbuf = NEXT(mbuf); 3559 /* There should be not end of packet. */ 3560 MLX5_ASSERT(mbuf); 3561 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3562 } while (unlikely(nxlen < txq->inlen_send)); 3563 } 3564 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3565 /* 3566 * Check whether we can do inline to align start 3567 * address of data buffer to cacheline. 3568 */ 3569 do_align: 3570 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3571 if (unlikely(start)) { 3572 start += inlen; 3573 if (start <= txq->inlen_send) 3574 inlen = start; 3575 } 3576 } 3577 /* 3578 * Check whether there are enough free WQEBBs: 3579 * - Control Segment 3580 * - Ethernet Segment 3581 * - First Segment of inlined Ethernet data 3582 * - ... data continued ... 3583 * - Data Segments of pointer/min inline type 3584 * 3585 * Estimate the number of Data Segments conservatively, 3586 * supposing no any mbufs is being freed during inlining. 3587 */ 3588 MLX5_ASSERT(inlen <= txq->inlen_send); 3589 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3590 MLX5_ESEG_MIN_INLINE_SIZE + 3591 MLX5_WSEG_SIZE + 3592 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3593 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3594 return MLX5_TXCMP_CODE_EXIT; 3595 /* Check for maximal WQE size. */ 3596 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3597 return MLX5_TXCMP_CODE_ERROR; 3598 #ifdef MLX5_PMD_SOFT_COUNTERS 3599 /* Update sent data bytes/packets counters. */ 3600 txq->stats.obytes += dlen + vlan; 3601 #endif 3602 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3603 loc->wqe_last = wqe; 3604 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3605 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3606 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3607 txq->wqe_ci += (ds + 3) / 4; 3608 loc->wqe_free -= (ds + 3) / 4; 3609 return MLX5_TXCMP_CODE_MULTI; 3610 } 3611 3612 /** 3613 * Tx burst function for multi-segment packets. Supports all 3614 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3615 * sends one packet per WQE. Function stops sending if it 3616 * encounters the single-segment packet. 3617 * 3618 * This routine is responsible for storing processed mbuf 3619 * into elts ring buffer and update elts_head. 3620 * 3621 * @param txq 3622 * Pointer to TX queue structure. 3623 * @param[in] pkts 3624 * Packets to transmit. 3625 * @param pkts_n 3626 * Number of packets in array. 3627 * @param loc 3628 * Pointer to burst routine local context. 3629 * @param olx 3630 * Configured Tx offloads mask. It is fully defined at 3631 * compile time and may be used for optimization. 3632 * 3633 * @return 3634 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3635 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3636 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3637 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3638 * Local context variables updated. 3639 */ 3640 static __rte_always_inline enum mlx5_txcmp_code 3641 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 3642 struct rte_mbuf **__rte_restrict pkts, 3643 unsigned int pkts_n, 3644 struct mlx5_txq_local *__rte_restrict loc, 3645 unsigned int olx) 3646 { 3647 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3648 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3649 pkts += loc->pkts_sent + 1; 3650 pkts_n -= loc->pkts_sent; 3651 for (;;) { 3652 enum mlx5_txcmp_code ret; 3653 3654 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3655 /* 3656 * Estimate the number of free elts quickly but 3657 * conservatively. Some segment may be fully inlined 3658 * and freed, ignore this here - precise estimation 3659 * is costly. 3660 */ 3661 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3662 return MLX5_TXCMP_CODE_EXIT; 3663 if (MLX5_TXOFF_CONFIG(TSO) && 3664 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3665 /* Proceed with multi-segment TSO. */ 3666 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3667 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3668 /* Proceed with multi-segment SEND with inlining. */ 3669 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3670 } else { 3671 /* Proceed with multi-segment SEND w/o inlining. */ 3672 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3673 } 3674 if (ret == MLX5_TXCMP_CODE_EXIT) 3675 return MLX5_TXCMP_CODE_EXIT; 3676 if (ret == MLX5_TXCMP_CODE_ERROR) 3677 return MLX5_TXCMP_CODE_ERROR; 3678 /* WQE is built, go to the next packet. */ 3679 ++loc->pkts_sent; 3680 --pkts_n; 3681 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3682 return MLX5_TXCMP_CODE_EXIT; 3683 loc->mbuf = *pkts++; 3684 if (pkts_n > 1) 3685 rte_prefetch0(*pkts); 3686 if (likely(NB_SEGS(loc->mbuf) > 1)) 3687 continue; 3688 /* Here ends the series of multi-segment packets. */ 3689 if (MLX5_TXOFF_CONFIG(TSO) && 3690 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3691 return MLX5_TXCMP_CODE_TSO; 3692 return MLX5_TXCMP_CODE_SINGLE; 3693 } 3694 MLX5_ASSERT(false); 3695 } 3696 3697 /** 3698 * Tx burst function for single-segment packets with TSO. 3699 * Supports all types of Tx offloads, except multi-packets. 3700 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3701 * Function stops sending if it encounters the multi-segment 3702 * packet or packet without TSO requested. 3703 * 3704 * The routine is responsible for storing processed mbuf 3705 * into elts ring buffer and update elts_head if inline 3706 * offloads is requested due to possible early freeing 3707 * of the inlined mbufs (can not store pkts array in elts 3708 * as a batch). 3709 * 3710 * @param txq 3711 * Pointer to TX queue structure. 3712 * @param[in] pkts 3713 * Packets to transmit. 3714 * @param pkts_n 3715 * Number of packets in array. 3716 * @param loc 3717 * Pointer to burst routine local context. 3718 * @param olx 3719 * Configured Tx offloads mask. It is fully defined at 3720 * compile time and may be used for optimization. 3721 * 3722 * @return 3723 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3724 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3725 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3726 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3727 * Local context variables updated. 3728 */ 3729 static __rte_always_inline enum mlx5_txcmp_code 3730 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 3731 struct rte_mbuf **__rte_restrict pkts, 3732 unsigned int pkts_n, 3733 struct mlx5_txq_local *__rte_restrict loc, 3734 unsigned int olx) 3735 { 3736 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3737 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3738 pkts += loc->pkts_sent + 1; 3739 pkts_n -= loc->pkts_sent; 3740 for (;;) { 3741 struct mlx5_wqe_dseg *__rte_restrict dseg; 3742 struct mlx5_wqe *__rte_restrict wqe; 3743 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3744 uint8_t *dptr; 3745 3746 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3747 if (MLX5_TXOFF_CONFIG(TXPP)) { 3748 enum mlx5_txcmp_code wret; 3749 3750 /* Generate WAIT for scheduling if requested. */ 3751 wret = mlx5_tx_schedule_send(txq, loc, olx); 3752 if (wret == MLX5_TXCMP_CODE_EXIT) 3753 return MLX5_TXCMP_CODE_EXIT; 3754 if (wret == MLX5_TXCMP_CODE_ERROR) 3755 return MLX5_TXCMP_CODE_ERROR; 3756 } 3757 dlen = rte_pktmbuf_data_len(loc->mbuf); 3758 if (MLX5_TXOFF_CONFIG(VLAN) && 3759 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3760 vlan = sizeof(struct rte_vlan_hdr); 3761 } 3762 /* 3763 * First calculate the WQE size to check 3764 * whether we have enough space in ring buffer. 3765 */ 3766 hlen = loc->mbuf->l2_len + vlan + 3767 loc->mbuf->l3_len + loc->mbuf->l4_len; 3768 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3769 return MLX5_TXCMP_CODE_ERROR; 3770 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3771 hlen += loc->mbuf->outer_l2_len + 3772 loc->mbuf->outer_l3_len; 3773 /* Segment must contain all TSO headers. */ 3774 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3775 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3776 hlen > (dlen + vlan))) 3777 return MLX5_TXCMP_CODE_ERROR; 3778 /* 3779 * Check whether there are enough free WQEBBs: 3780 * - Control Segment 3781 * - Ethernet Segment 3782 * - First Segment of inlined Ethernet data 3783 * - ... data continued ... 3784 * - Finishing Data Segment of pointer type 3785 */ 3786 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3787 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3788 if (loc->wqe_free < ((ds + 3) / 4)) 3789 return MLX5_TXCMP_CODE_EXIT; 3790 #ifdef MLX5_PMD_SOFT_COUNTERS 3791 /* Update sent data bytes/packets counters. */ 3792 ntcp = (dlen + vlan - hlen + 3793 loc->mbuf->tso_segsz - 1) / 3794 loc->mbuf->tso_segsz; 3795 /* 3796 * One will be added for mbuf itself at the end 3797 * of the mlx5_tx_burst from loc->pkts_sent field. 3798 */ 3799 --ntcp; 3800 txq->stats.opackets += ntcp; 3801 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3802 #endif 3803 /* 3804 * Build the TSO WQE: 3805 * - Control Segment 3806 * - Ethernet Segment with hlen bytes inlined 3807 * - Data Segment of pointer type 3808 */ 3809 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3810 loc->wqe_last = wqe; 3811 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3812 MLX5_OPCODE_TSO, olx); 3813 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3814 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3815 dlen -= hlen - vlan; 3816 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3817 /* 3818 * WQE is built, update the loop parameters 3819 * and go to the next packet. 3820 */ 3821 txq->wqe_ci += (ds + 3) / 4; 3822 loc->wqe_free -= (ds + 3) / 4; 3823 if (MLX5_TXOFF_CONFIG(INLINE)) 3824 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3825 --loc->elts_free; 3826 ++loc->pkts_sent; 3827 --pkts_n; 3828 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3829 return MLX5_TXCMP_CODE_EXIT; 3830 loc->mbuf = *pkts++; 3831 if (pkts_n > 1) 3832 rte_prefetch0(*pkts); 3833 if (MLX5_TXOFF_CONFIG(MULTI) && 3834 unlikely(NB_SEGS(loc->mbuf) > 1)) 3835 return MLX5_TXCMP_CODE_MULTI; 3836 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3837 return MLX5_TXCMP_CODE_SINGLE; 3838 /* Continue with the next TSO packet. */ 3839 } 3840 MLX5_ASSERT(false); 3841 } 3842 3843 /** 3844 * Analyze the packet and select the best method to send. 3845 * 3846 * @param txq 3847 * Pointer to TX queue structure. 3848 * @param loc 3849 * Pointer to burst routine local context. 3850 * @param olx 3851 * Configured Tx offloads mask. It is fully defined at 3852 * compile time and may be used for optimization. 3853 * @param newp 3854 * The predefined flag whether do complete check for 3855 * multi-segment packets and TSO. 3856 * 3857 * @return 3858 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3859 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3860 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3861 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3862 */ 3863 static __rte_always_inline enum mlx5_txcmp_code 3864 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 3865 struct mlx5_txq_local *__rte_restrict loc, 3866 unsigned int olx, 3867 bool newp) 3868 { 3869 /* Check for multi-segment packet. */ 3870 if (newp && 3871 MLX5_TXOFF_CONFIG(MULTI) && 3872 unlikely(NB_SEGS(loc->mbuf) > 1)) 3873 return MLX5_TXCMP_CODE_MULTI; 3874 /* Check for TSO packet. */ 3875 if (newp && 3876 MLX5_TXOFF_CONFIG(TSO) && 3877 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3878 return MLX5_TXCMP_CODE_TSO; 3879 /* Check if eMPW is enabled at all. */ 3880 if (!MLX5_TXOFF_CONFIG(EMPW)) 3881 return MLX5_TXCMP_CODE_SINGLE; 3882 /* Check if eMPW can be engaged. */ 3883 if (MLX5_TXOFF_CONFIG(VLAN) && 3884 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3885 (!MLX5_TXOFF_CONFIG(INLINE) || 3886 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3887 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3888 /* 3889 * eMPW does not support VLAN insertion offload, 3890 * we have to inline the entire packet but 3891 * packet is too long for inlining. 3892 */ 3893 return MLX5_TXCMP_CODE_SINGLE; 3894 } 3895 return MLX5_TXCMP_CODE_EMPW; 3896 } 3897 3898 /** 3899 * Check the next packet attributes to match with the eMPW batch ones. 3900 * In addition, for legacy MPW the packet length is checked either. 3901 * 3902 * @param txq 3903 * Pointer to TX queue structure. 3904 * @param es 3905 * Pointer to Ethernet Segment of eMPW batch. 3906 * @param loc 3907 * Pointer to burst routine local context. 3908 * @param dlen 3909 * Length of previous packet in MPW descriptor. 3910 * @param olx 3911 * Configured Tx offloads mask. It is fully defined at 3912 * compile time and may be used for optimization. 3913 * 3914 * @return 3915 * true - packet match with eMPW batch attributes. 3916 * false - no match, eMPW should be restarted. 3917 */ 3918 static __rte_always_inline bool 3919 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 3920 struct mlx5_wqe_eseg *__rte_restrict es, 3921 struct mlx5_txq_local *__rte_restrict loc, 3922 uint32_t dlen, 3923 unsigned int olx) 3924 { 3925 uint8_t swp_flags = 0; 3926 3927 /* Compare the checksum flags, if any. */ 3928 if (MLX5_TXOFF_CONFIG(CSUM) && 3929 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 3930 return false; 3931 /* Compare the Software Parser offsets and flags. */ 3932 if (MLX5_TXOFF_CONFIG(SWP) && 3933 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 3934 es->swp_flags != swp_flags)) 3935 return false; 3936 /* Fill metadata field if needed. */ 3937 if (MLX5_TXOFF_CONFIG(METADATA) && 3938 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 3939 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 3940 return false; 3941 /* Legacy MPW can send packets with the same lengt only. */ 3942 if (MLX5_TXOFF_CONFIG(MPW) && 3943 dlen != rte_pktmbuf_data_len(loc->mbuf)) 3944 return false; 3945 /* There must be no VLAN packets in eMPW loop. */ 3946 if (MLX5_TXOFF_CONFIG(VLAN)) 3947 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 3948 /* Check if the scheduling is requested. */ 3949 if (MLX5_TXOFF_CONFIG(TXPP) && 3950 loc->mbuf->ol_flags & txq->ts_mask) 3951 return false; 3952 return true; 3953 } 3954 3955 /* 3956 * Update send loop variables and WQE for eMPW loop 3957 * without data inlining. Number of Data Segments is 3958 * equal to the number of sent packets. 3959 * 3960 * @param txq 3961 * Pointer to TX queue structure. 3962 * @param loc 3963 * Pointer to burst routine local context. 3964 * @param ds 3965 * Number of packets/Data Segments/Packets. 3966 * @param slen 3967 * Accumulated statistics, bytes sent 3968 * @param olx 3969 * Configured Tx offloads mask. It is fully defined at 3970 * compile time and may be used for optimization. 3971 * 3972 * @return 3973 * true - packet match with eMPW batch attributes. 3974 * false - no match, eMPW should be restarted. 3975 */ 3976 static __rte_always_inline void 3977 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 3978 struct mlx5_txq_local *__rte_restrict loc, 3979 unsigned int ds, 3980 unsigned int slen, 3981 unsigned int olx __rte_unused) 3982 { 3983 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3984 #ifdef MLX5_PMD_SOFT_COUNTERS 3985 /* Update sent data bytes counter. */ 3986 txq->stats.obytes += slen; 3987 #else 3988 (void)slen; 3989 #endif 3990 loc->elts_free -= ds; 3991 loc->pkts_sent += ds; 3992 ds += 2; 3993 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3994 txq->wqe_ci += (ds + 3) / 4; 3995 loc->wqe_free -= (ds + 3) / 4; 3996 } 3997 3998 /* 3999 * Update send loop variables and WQE for eMPW loop 4000 * with data inlining. Gets the size of pushed descriptors 4001 * and data to the WQE. 4002 * 4003 * @param txq 4004 * Pointer to TX queue structure. 4005 * @param loc 4006 * Pointer to burst routine local context. 4007 * @param len 4008 * Total size of descriptor/data in bytes. 4009 * @param slen 4010 * Accumulated statistics, data bytes sent. 4011 * @param wqem 4012 * The base WQE for the eMPW/MPW descriptor. 4013 * @param olx 4014 * Configured Tx offloads mask. It is fully defined at 4015 * compile time and may be used for optimization. 4016 * 4017 * @return 4018 * true - packet match with eMPW batch attributes. 4019 * false - no match, eMPW should be restarted. 4020 */ 4021 static __rte_always_inline void 4022 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 4023 struct mlx5_txq_local *__rte_restrict loc, 4024 unsigned int len, 4025 unsigned int slen, 4026 struct mlx5_wqe *__rte_restrict wqem, 4027 unsigned int olx __rte_unused) 4028 { 4029 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 4030 4031 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4032 #ifdef MLX5_PMD_SOFT_COUNTERS 4033 /* Update sent data bytes counter. */ 4034 txq->stats.obytes += slen; 4035 #else 4036 (void)slen; 4037 #endif 4038 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 4039 /* 4040 * If the legacy MPW session contains the inline packets 4041 * we should set the only inline data segment length 4042 * and align the total length to the segment size. 4043 */ 4044 MLX5_ASSERT(len > sizeof(dseg->bcount)); 4045 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 4046 MLX5_ETH_WQE_DATA_INLINE); 4047 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 4048 } else { 4049 /* 4050 * The session is not legacy MPW or contains the 4051 * data buffer pointer segments. 4052 */ 4053 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 4054 len = len / MLX5_WSEG_SIZE + 2; 4055 } 4056 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 4057 txq->wqe_ci += (len + 3) / 4; 4058 loc->wqe_free -= (len + 3) / 4; 4059 loc->wqe_last = wqem; 4060 } 4061 4062 /** 4063 * The set of Tx burst functions for single-segment packets 4064 * without TSO and with Multi-Packet Writing feature support. 4065 * Supports all types of Tx offloads, except multi-packets 4066 * and TSO. 4067 * 4068 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 4069 * as many packet per WQE as it can. If eMPW is not configured 4070 * or packet can not be sent with eMPW (VLAN insertion) the 4071 * ordinary SEND opcode is used and only one packet placed 4072 * in WQE. 4073 * 4074 * Functions stop sending if it encounters the multi-segment 4075 * packet or packet with TSO requested. 4076 * 4077 * The routines are responsible for storing processed mbuf 4078 * into elts ring buffer and update elts_head if inlining 4079 * offload is requested. Otherwise the copying mbufs to elts 4080 * can be postponed and completed at the end of burst routine. 4081 * 4082 * @param txq 4083 * Pointer to TX queue structure. 4084 * @param[in] pkts 4085 * Packets to transmit. 4086 * @param pkts_n 4087 * Number of packets in array. 4088 * @param loc 4089 * Pointer to burst routine local context. 4090 * @param olx 4091 * Configured Tx offloads mask. It is fully defined at 4092 * compile time and may be used for optimization. 4093 * 4094 * @return 4095 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 4096 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 4097 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 4098 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 4099 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 4100 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 4101 * 4102 * Local context variables updated. 4103 * 4104 * 4105 * The routine sends packets with MLX5_OPCODE_EMPW 4106 * without inlining, this is dedicated optimized branch. 4107 * No VLAN insertion is supported. 4108 */ 4109 static __rte_always_inline enum mlx5_txcmp_code 4110 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 4111 struct rte_mbuf **__rte_restrict pkts, 4112 unsigned int pkts_n, 4113 struct mlx5_txq_local *__rte_restrict loc, 4114 unsigned int olx) 4115 { 4116 /* 4117 * Subroutine is the part of mlx5_tx_burst_single() 4118 * and sends single-segment packet with eMPW opcode 4119 * without data inlining. 4120 */ 4121 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4122 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4123 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4124 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4125 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4126 pkts += loc->pkts_sent + 1; 4127 pkts_n -= loc->pkts_sent; 4128 for (;;) { 4129 struct mlx5_wqe_dseg *__rte_restrict dseg; 4130 struct mlx5_wqe_eseg *__rte_restrict eseg; 4131 enum mlx5_txcmp_code ret; 4132 unsigned int part, loop; 4133 unsigned int slen = 0; 4134 4135 next_empw: 4136 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4137 if (MLX5_TXOFF_CONFIG(TXPP)) { 4138 enum mlx5_txcmp_code wret; 4139 4140 /* Generate WAIT for scheduling if requested. */ 4141 wret = mlx5_tx_schedule_send(txq, loc, olx); 4142 if (wret == MLX5_TXCMP_CODE_EXIT) 4143 return MLX5_TXCMP_CODE_EXIT; 4144 if (wret == MLX5_TXCMP_CODE_ERROR) 4145 return MLX5_TXCMP_CODE_ERROR; 4146 } 4147 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4148 MLX5_MPW_MAX_PACKETS : 4149 MLX5_EMPW_MAX_PACKETS); 4150 if (unlikely(loc->elts_free < part)) { 4151 /* We have no enough elts to save all mbufs. */ 4152 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 4153 return MLX5_TXCMP_CODE_EXIT; 4154 /* But we still able to send at least minimal eMPW. */ 4155 part = loc->elts_free; 4156 } 4157 /* Check whether we have enough WQEs */ 4158 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 4159 if (unlikely(loc->wqe_free < 4160 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4161 return MLX5_TXCMP_CODE_EXIT; 4162 part = (loc->wqe_free * 4) - 2; 4163 } 4164 if (likely(part > 1)) 4165 rte_prefetch0(*pkts); 4166 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4167 /* 4168 * Build eMPW title WQEBB: 4169 * - Control Segment, eMPW opcode 4170 * - Ethernet Segment, no inline 4171 */ 4172 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 4173 MLX5_OPCODE_ENHANCED_MPSW, olx); 4174 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4175 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4176 eseg = &loc->wqe_last->eseg; 4177 dseg = &loc->wqe_last->dseg[0]; 4178 loop = part; 4179 /* Store the packet length for legacy MPW. */ 4180 if (MLX5_TXOFF_CONFIG(MPW)) 4181 eseg->mss = rte_cpu_to_be_16 4182 (rte_pktmbuf_data_len(loc->mbuf)); 4183 for (;;) { 4184 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4185 #ifdef MLX5_PMD_SOFT_COUNTERS 4186 /* Update sent data bytes counter. */ 4187 slen += dlen; 4188 #endif 4189 mlx5_tx_dseg_ptr 4190 (txq, loc, dseg, 4191 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4192 dlen, olx); 4193 if (unlikely(--loop == 0)) 4194 break; 4195 loc->mbuf = *pkts++; 4196 if (likely(loop > 1)) 4197 rte_prefetch0(*pkts); 4198 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4199 /* 4200 * Unroll the completion code to avoid 4201 * returning variable value - it results in 4202 * unoptimized sequent checking in caller. 4203 */ 4204 if (ret == MLX5_TXCMP_CODE_MULTI) { 4205 part -= loop; 4206 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4207 if (unlikely(!loc->elts_free || 4208 !loc->wqe_free)) 4209 return MLX5_TXCMP_CODE_EXIT; 4210 return MLX5_TXCMP_CODE_MULTI; 4211 } 4212 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4213 if (ret == MLX5_TXCMP_CODE_TSO) { 4214 part -= loop; 4215 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4216 if (unlikely(!loc->elts_free || 4217 !loc->wqe_free)) 4218 return MLX5_TXCMP_CODE_EXIT; 4219 return MLX5_TXCMP_CODE_TSO; 4220 } 4221 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4222 part -= loop; 4223 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4224 if (unlikely(!loc->elts_free || 4225 !loc->wqe_free)) 4226 return MLX5_TXCMP_CODE_EXIT; 4227 return MLX5_TXCMP_CODE_SINGLE; 4228 } 4229 if (ret != MLX5_TXCMP_CODE_EMPW) { 4230 MLX5_ASSERT(false); 4231 part -= loop; 4232 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4233 return MLX5_TXCMP_CODE_ERROR; 4234 } 4235 /* 4236 * Check whether packet parameters coincide 4237 * within assumed eMPW batch: 4238 * - check sum settings 4239 * - metadata value 4240 * - software parser settings 4241 * - packets length (legacy MPW only) 4242 * - scheduling is not required 4243 */ 4244 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 4245 MLX5_ASSERT(loop); 4246 part -= loop; 4247 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4248 if (unlikely(!loc->elts_free || 4249 !loc->wqe_free)) 4250 return MLX5_TXCMP_CODE_EXIT; 4251 pkts_n -= part; 4252 goto next_empw; 4253 } 4254 /* Packet attributes match, continue the same eMPW. */ 4255 ++dseg; 4256 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4257 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4258 } 4259 /* eMPW is built successfully, update loop parameters. */ 4260 MLX5_ASSERT(!loop); 4261 MLX5_ASSERT(pkts_n >= part); 4262 #ifdef MLX5_PMD_SOFT_COUNTERS 4263 /* Update sent data bytes counter. */ 4264 txq->stats.obytes += slen; 4265 #endif 4266 loc->elts_free -= part; 4267 loc->pkts_sent += part; 4268 txq->wqe_ci += (2 + part + 3) / 4; 4269 loc->wqe_free -= (2 + part + 3) / 4; 4270 pkts_n -= part; 4271 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4272 return MLX5_TXCMP_CODE_EXIT; 4273 loc->mbuf = *pkts++; 4274 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4275 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4276 return ret; 4277 /* Continue sending eMPW batches. */ 4278 } 4279 MLX5_ASSERT(false); 4280 } 4281 4282 /** 4283 * The routine sends packets with MLX5_OPCODE_EMPW 4284 * with inlining, optionally supports VLAN insertion. 4285 */ 4286 static __rte_always_inline enum mlx5_txcmp_code 4287 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 4288 struct rte_mbuf **__rte_restrict pkts, 4289 unsigned int pkts_n, 4290 struct mlx5_txq_local *__rte_restrict loc, 4291 unsigned int olx) 4292 { 4293 /* 4294 * Subroutine is the part of mlx5_tx_burst_single() 4295 * and sends single-segment packet with eMPW opcode 4296 * with data inlining. 4297 */ 4298 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4299 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4300 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4301 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4302 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4303 pkts += loc->pkts_sent + 1; 4304 pkts_n -= loc->pkts_sent; 4305 for (;;) { 4306 struct mlx5_wqe_dseg *__rte_restrict dseg; 4307 struct mlx5_wqe *__rte_restrict wqem; 4308 enum mlx5_txcmp_code ret; 4309 unsigned int room, part, nlim; 4310 unsigned int slen = 0; 4311 4312 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4313 if (MLX5_TXOFF_CONFIG(TXPP)) { 4314 enum mlx5_txcmp_code wret; 4315 4316 /* Generate WAIT for scheduling if requested. */ 4317 wret = mlx5_tx_schedule_send(txq, loc, olx); 4318 if (wret == MLX5_TXCMP_CODE_EXIT) 4319 return MLX5_TXCMP_CODE_EXIT; 4320 if (wret == MLX5_TXCMP_CODE_ERROR) 4321 return MLX5_TXCMP_CODE_ERROR; 4322 } 4323 /* 4324 * Limits the amount of packets in one WQE 4325 * to improve CQE latency generation. 4326 */ 4327 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4328 MLX5_MPW_INLINE_MAX_PACKETS : 4329 MLX5_EMPW_MAX_PACKETS); 4330 /* Check whether we have minimal amount WQEs */ 4331 if (unlikely(loc->wqe_free < 4332 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4333 return MLX5_TXCMP_CODE_EXIT; 4334 if (likely(pkts_n > 1)) 4335 rte_prefetch0(*pkts); 4336 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4337 /* 4338 * Build eMPW title WQEBB: 4339 * - Control Segment, eMPW opcode, zero DS 4340 * - Ethernet Segment, no inline 4341 */ 4342 mlx5_tx_cseg_init(txq, loc, wqem, 0, 4343 MLX5_OPCODE_ENHANCED_MPSW, olx); 4344 mlx5_tx_eseg_none(txq, loc, wqem, 4345 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4346 dseg = &wqem->dseg[0]; 4347 /* Store the packet length for legacy MPW. */ 4348 if (MLX5_TXOFF_CONFIG(MPW)) 4349 wqem->eseg.mss = rte_cpu_to_be_16 4350 (rte_pktmbuf_data_len(loc->mbuf)); 4351 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4352 loc->wqe_free) * MLX5_WQE_SIZE - 4353 MLX5_WQE_CSEG_SIZE - 4354 MLX5_WQE_ESEG_SIZE; 4355 /* Limit the room for legacy MPW sessions for performance. */ 4356 if (MLX5_TXOFF_CONFIG(MPW)) 4357 room = RTE_MIN(room, 4358 RTE_MAX(txq->inlen_empw + 4359 sizeof(dseg->bcount) + 4360 (MLX5_TXOFF_CONFIG(VLAN) ? 4361 sizeof(struct rte_vlan_hdr) : 0), 4362 MLX5_MPW_INLINE_MAX_PACKETS * 4363 MLX5_WQE_DSEG_SIZE)); 4364 /* Build WQE till we have space, packets and resources. */ 4365 part = room; 4366 for (;;) { 4367 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4368 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4369 unsigned int tlen; 4370 4371 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4372 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 4373 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4374 /* 4375 * Some Tx offloads may cause an error if 4376 * packet is not long enough, check against 4377 * assumed minimal length. 4378 */ 4379 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4380 part -= room; 4381 if (unlikely(!part)) 4382 return MLX5_TXCMP_CODE_ERROR; 4383 /* 4384 * We have some successfully built 4385 * packet Data Segments to send. 4386 */ 4387 mlx5_tx_idone_empw(txq, loc, part, 4388 slen, wqem, olx); 4389 return MLX5_TXCMP_CODE_ERROR; 4390 } 4391 /* Inline or not inline - that's the Question. */ 4392 if (dlen > txq->inlen_empw || 4393 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 4394 goto pointer_empw; 4395 if (MLX5_TXOFF_CONFIG(MPW)) { 4396 if (dlen > txq->inlen_send) 4397 goto pointer_empw; 4398 tlen = dlen; 4399 if (part == room) { 4400 /* Open new inline MPW session. */ 4401 tlen += sizeof(dseg->bcount); 4402 dseg->bcount = RTE_BE32(0); 4403 dseg = RTE_PTR_ADD 4404 (dseg, sizeof(dseg->bcount)); 4405 } else { 4406 /* 4407 * No pointer and inline descriptor 4408 * intermix for legacy MPW sessions. 4409 */ 4410 if (wqem->dseg[0].bcount) 4411 break; 4412 } 4413 } else { 4414 tlen = sizeof(dseg->bcount) + dlen; 4415 } 4416 /* Inline entire packet, optional VLAN insertion. */ 4417 if (MLX5_TXOFF_CONFIG(VLAN) && 4418 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4419 /* 4420 * The packet length must be checked in 4421 * mlx5_tx_able_to_empw() and packet 4422 * fits into inline length guaranteed. 4423 */ 4424 MLX5_ASSERT((dlen + 4425 sizeof(struct rte_vlan_hdr)) <= 4426 txq->inlen_empw); 4427 tlen += sizeof(struct rte_vlan_hdr); 4428 if (room < tlen) 4429 break; 4430 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4431 dptr, dlen, olx); 4432 #ifdef MLX5_PMD_SOFT_COUNTERS 4433 /* Update sent data bytes counter. */ 4434 slen += sizeof(struct rte_vlan_hdr); 4435 #endif 4436 } else { 4437 if (room < tlen) 4438 break; 4439 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4440 dptr, dlen, olx); 4441 } 4442 if (!MLX5_TXOFF_CONFIG(MPW)) 4443 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4444 MLX5_ASSERT(room >= tlen); 4445 room -= tlen; 4446 /* 4447 * Packet data are completely inlined, 4448 * free the packet immediately. 4449 */ 4450 rte_pktmbuf_free_seg(loc->mbuf); 4451 goto next_mbuf; 4452 pointer_empw: 4453 /* 4454 * No pointer and inline descriptor 4455 * intermix for legacy MPW sessions. 4456 */ 4457 if (MLX5_TXOFF_CONFIG(MPW) && 4458 part != room && 4459 wqem->dseg[0].bcount == RTE_BE32(0)) 4460 break; 4461 /* 4462 * Not inlinable VLAN packets are 4463 * proceeded outside of this routine. 4464 */ 4465 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4466 if (MLX5_TXOFF_CONFIG(VLAN)) 4467 MLX5_ASSERT(!(loc->mbuf->ol_flags & 4468 PKT_TX_VLAN_PKT)); 4469 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4470 /* We have to store mbuf in elts.*/ 4471 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4472 room -= MLX5_WQE_DSEG_SIZE; 4473 /* Ring buffer wraparound is checked at the loop end.*/ 4474 ++dseg; 4475 next_mbuf: 4476 #ifdef MLX5_PMD_SOFT_COUNTERS 4477 /* Update sent data bytes counter. */ 4478 slen += dlen; 4479 #endif 4480 loc->pkts_sent++; 4481 loc->elts_free--; 4482 pkts_n--; 4483 if (unlikely(!pkts_n || !loc->elts_free)) { 4484 /* 4485 * We have no resources/packets to 4486 * continue build descriptors. 4487 */ 4488 part -= room; 4489 mlx5_tx_idone_empw(txq, loc, part, 4490 slen, wqem, olx); 4491 return MLX5_TXCMP_CODE_EXIT; 4492 } 4493 loc->mbuf = *pkts++; 4494 if (likely(pkts_n > 1)) 4495 rte_prefetch0(*pkts); 4496 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4497 /* 4498 * Unroll the completion code to avoid 4499 * returning variable value - it results in 4500 * unoptimized sequent checking in caller. 4501 */ 4502 if (ret == MLX5_TXCMP_CODE_MULTI) { 4503 part -= room; 4504 mlx5_tx_idone_empw(txq, loc, part, 4505 slen, wqem, olx); 4506 if (unlikely(!loc->elts_free || 4507 !loc->wqe_free)) 4508 return MLX5_TXCMP_CODE_EXIT; 4509 return MLX5_TXCMP_CODE_MULTI; 4510 } 4511 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4512 if (ret == MLX5_TXCMP_CODE_TSO) { 4513 part -= room; 4514 mlx5_tx_idone_empw(txq, loc, part, 4515 slen, wqem, olx); 4516 if (unlikely(!loc->elts_free || 4517 !loc->wqe_free)) 4518 return MLX5_TXCMP_CODE_EXIT; 4519 return MLX5_TXCMP_CODE_TSO; 4520 } 4521 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4522 part -= room; 4523 mlx5_tx_idone_empw(txq, loc, part, 4524 slen, wqem, olx); 4525 if (unlikely(!loc->elts_free || 4526 !loc->wqe_free)) 4527 return MLX5_TXCMP_CODE_EXIT; 4528 return MLX5_TXCMP_CODE_SINGLE; 4529 } 4530 if (ret != MLX5_TXCMP_CODE_EMPW) { 4531 MLX5_ASSERT(false); 4532 part -= room; 4533 mlx5_tx_idone_empw(txq, loc, part, 4534 slen, wqem, olx); 4535 return MLX5_TXCMP_CODE_ERROR; 4536 } 4537 /* Check if we have minimal room left. */ 4538 nlim--; 4539 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4540 break; 4541 /* 4542 * Check whether packet parameters coincide 4543 * within assumed eMPW batch: 4544 * - check sum settings 4545 * - metadata value 4546 * - software parser settings 4547 * - packets length (legacy MPW only) 4548 * - scheduling is not required 4549 */ 4550 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 4551 loc, dlen, olx)) 4552 break; 4553 /* Packet attributes match, continue the same eMPW. */ 4554 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4555 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4556 } 4557 /* 4558 * We get here to close an existing eMPW 4559 * session and start the new one. 4560 */ 4561 MLX5_ASSERT(pkts_n); 4562 part -= room; 4563 if (unlikely(!part)) 4564 return MLX5_TXCMP_CODE_EXIT; 4565 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 4566 if (unlikely(!loc->elts_free || 4567 !loc->wqe_free)) 4568 return MLX5_TXCMP_CODE_EXIT; 4569 /* Continue the loop with new eMPW session. */ 4570 } 4571 MLX5_ASSERT(false); 4572 } 4573 4574 /** 4575 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4576 * Data inlining and VLAN insertion are supported. 4577 */ 4578 static __rte_always_inline enum mlx5_txcmp_code 4579 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 4580 struct rte_mbuf **__rte_restrict pkts, 4581 unsigned int pkts_n, 4582 struct mlx5_txq_local *__rte_restrict loc, 4583 unsigned int olx) 4584 { 4585 /* 4586 * Subroutine is the part of mlx5_tx_burst_single() 4587 * and sends single-segment packet with SEND opcode. 4588 */ 4589 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4590 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4591 pkts += loc->pkts_sent + 1; 4592 pkts_n -= loc->pkts_sent; 4593 for (;;) { 4594 struct mlx5_wqe *__rte_restrict wqe; 4595 enum mlx5_txcmp_code ret; 4596 4597 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4598 if (MLX5_TXOFF_CONFIG(TXPP)) { 4599 enum mlx5_txcmp_code wret; 4600 4601 /* Generate WAIT for scheduling if requested. */ 4602 wret = mlx5_tx_schedule_send(txq, loc, olx); 4603 if (wret == MLX5_TXCMP_CODE_EXIT) 4604 return MLX5_TXCMP_CODE_EXIT; 4605 if (wret == MLX5_TXCMP_CODE_ERROR) 4606 return MLX5_TXCMP_CODE_ERROR; 4607 } 4608 if (MLX5_TXOFF_CONFIG(INLINE)) { 4609 unsigned int inlen, vlan = 0; 4610 4611 inlen = rte_pktmbuf_data_len(loc->mbuf); 4612 if (MLX5_TXOFF_CONFIG(VLAN) && 4613 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4614 vlan = sizeof(struct rte_vlan_hdr); 4615 inlen += vlan; 4616 static_assert((sizeof(struct rte_vlan_hdr) + 4617 sizeof(struct rte_ether_hdr)) == 4618 MLX5_ESEG_MIN_INLINE_SIZE, 4619 "invalid min inline data size"); 4620 } 4621 /* 4622 * If inlining is enabled at configuration time 4623 * the limit must be not less than minimal size. 4624 * Otherwise we would do extra check for data 4625 * size to avoid crashes due to length overflow. 4626 */ 4627 MLX5_ASSERT(txq->inlen_send >= 4628 MLX5_ESEG_MIN_INLINE_SIZE); 4629 if (inlen <= txq->inlen_send) { 4630 unsigned int seg_n, wqe_n; 4631 4632 rte_prefetch0(rte_pktmbuf_mtod 4633 (loc->mbuf, uint8_t *)); 4634 /* Check against minimal length. */ 4635 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4636 return MLX5_TXCMP_CODE_ERROR; 4637 if (loc->mbuf->ol_flags & 4638 PKT_TX_DYNF_NOINLINE) { 4639 /* 4640 * The hint flag not to inline packet 4641 * data is set. Check whether we can 4642 * follow the hint. 4643 */ 4644 if ((!MLX5_TXOFF_CONFIG(EMPW) && 4645 txq->inlen_mode) || 4646 (MLX5_TXOFF_CONFIG(MPW) && 4647 txq->inlen_mode)) { 4648 if (inlen <= txq->inlen_send) 4649 goto single_inline; 4650 /* 4651 * The hardware requires the 4652 * minimal inline data header. 4653 */ 4654 goto single_min_inline; 4655 } 4656 if (MLX5_TXOFF_CONFIG(VLAN) && 4657 vlan && !txq->vlan_en) { 4658 /* 4659 * We must insert VLAN tag 4660 * by software means. 4661 */ 4662 goto single_part_inline; 4663 } 4664 goto single_no_inline; 4665 } 4666 single_inline: 4667 /* 4668 * Completely inlined packet data WQE: 4669 * - Control Segment, SEND opcode 4670 * - Ethernet Segment, no VLAN insertion 4671 * - Data inlined, VLAN optionally inserted 4672 * - Alignment to MLX5_WSEG_SIZE 4673 * Have to estimate amount of WQEBBs 4674 */ 4675 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4676 MLX5_ESEG_MIN_INLINE_SIZE + 4677 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4678 /* Check if there are enough WQEBBs. */ 4679 wqe_n = (seg_n + 3) / 4; 4680 if (wqe_n > loc->wqe_free) 4681 return MLX5_TXCMP_CODE_EXIT; 4682 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4683 loc->wqe_last = wqe; 4684 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4685 MLX5_OPCODE_SEND, olx); 4686 mlx5_tx_eseg_data(txq, loc, wqe, 4687 vlan, inlen, 0, olx); 4688 txq->wqe_ci += wqe_n; 4689 loc->wqe_free -= wqe_n; 4690 /* 4691 * Packet data are completely inlined, 4692 * free the packet immediately. 4693 */ 4694 rte_pktmbuf_free_seg(loc->mbuf); 4695 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4696 MLX5_TXOFF_CONFIG(MPW)) && 4697 txq->inlen_mode) { 4698 /* 4699 * If minimal inlining is requested the eMPW 4700 * feature should be disabled due to data is 4701 * inlined into Ethernet Segment, which can 4702 * not contain inlined data for eMPW due to 4703 * segment shared for all packets. 4704 */ 4705 struct mlx5_wqe_dseg *__rte_restrict dseg; 4706 unsigned int ds; 4707 uint8_t *dptr; 4708 4709 /* 4710 * The inline-mode settings require 4711 * to inline the specified amount of 4712 * data bytes to the Ethernet Segment. 4713 * We should check the free space in 4714 * WQE ring buffer to inline partially. 4715 */ 4716 single_min_inline: 4717 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 4718 MLX5_ASSERT(inlen > txq->inlen_mode); 4719 MLX5_ASSERT(txq->inlen_mode >= 4720 MLX5_ESEG_MIN_INLINE_SIZE); 4721 /* 4722 * Check whether there are enough free WQEBBs: 4723 * - Control Segment 4724 * - Ethernet Segment 4725 * - First Segment of inlined Ethernet data 4726 * - ... data continued ... 4727 * - Finishing Data Segment of pointer type 4728 */ 4729 ds = (MLX5_WQE_CSEG_SIZE + 4730 MLX5_WQE_ESEG_SIZE + 4731 MLX5_WQE_DSEG_SIZE + 4732 txq->inlen_mode - 4733 MLX5_ESEG_MIN_INLINE_SIZE + 4734 MLX5_WQE_DSEG_SIZE + 4735 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4736 if (loc->wqe_free < ((ds + 3) / 4)) 4737 return MLX5_TXCMP_CODE_EXIT; 4738 /* 4739 * Build the ordinary SEND WQE: 4740 * - Control Segment 4741 * - Ethernet Segment, inline inlen_mode bytes 4742 * - Data Segment of pointer type 4743 */ 4744 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4745 loc->wqe_last = wqe; 4746 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4747 MLX5_OPCODE_SEND, olx); 4748 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4749 txq->inlen_mode, 4750 0, olx); 4751 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4752 txq->inlen_mode - vlan; 4753 inlen -= txq->inlen_mode; 4754 mlx5_tx_dseg_ptr(txq, loc, dseg, 4755 dptr, inlen, olx); 4756 /* 4757 * WQE is built, update the loop parameters 4758 * and got to the next packet. 4759 */ 4760 txq->wqe_ci += (ds + 3) / 4; 4761 loc->wqe_free -= (ds + 3) / 4; 4762 /* We have to store mbuf in elts.*/ 4763 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4764 txq->elts[txq->elts_head++ & txq->elts_m] = 4765 loc->mbuf; 4766 --loc->elts_free; 4767 } else { 4768 uint8_t *dptr; 4769 unsigned int dlen; 4770 4771 /* 4772 * Partially inlined packet data WQE, we have 4773 * some space in title WQEBB, we can fill it 4774 * with some packet data. It takes one WQEBB, 4775 * it is available, no extra space check: 4776 * - Control Segment, SEND opcode 4777 * - Ethernet Segment, no VLAN insertion 4778 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4779 * - Data Segment, pointer type 4780 * 4781 * We also get here if VLAN insertion is not 4782 * supported by HW, the inline is enabled. 4783 */ 4784 single_part_inline: 4785 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4786 loc->wqe_last = wqe; 4787 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4788 MLX5_OPCODE_SEND, olx); 4789 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4790 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4791 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4792 /* 4793 * The length check is performed above, by 4794 * comparing with txq->inlen_send. We should 4795 * not get overflow here. 4796 */ 4797 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4798 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4799 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4800 dptr, dlen, olx); 4801 ++txq->wqe_ci; 4802 --loc->wqe_free; 4803 /* We have to store mbuf in elts.*/ 4804 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4805 txq->elts[txq->elts_head++ & txq->elts_m] = 4806 loc->mbuf; 4807 --loc->elts_free; 4808 } 4809 #ifdef MLX5_PMD_SOFT_COUNTERS 4810 /* Update sent data bytes counter. */ 4811 txq->stats.obytes += vlan + 4812 rte_pktmbuf_data_len(loc->mbuf); 4813 #endif 4814 } else { 4815 /* 4816 * No inline at all, it means the CPU cycles saving 4817 * is prioritized at configuration, we should not 4818 * copy any packet data to WQE. 4819 * 4820 * SEND WQE, one WQEBB: 4821 * - Control Segment, SEND opcode 4822 * - Ethernet Segment, optional VLAN, no inline 4823 * - Data Segment, pointer type 4824 */ 4825 single_no_inline: 4826 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4827 loc->wqe_last = wqe; 4828 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4829 MLX5_OPCODE_SEND, olx); 4830 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4831 mlx5_tx_dseg_ptr 4832 (txq, loc, &wqe->dseg[0], 4833 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4834 rte_pktmbuf_data_len(loc->mbuf), olx); 4835 ++txq->wqe_ci; 4836 --loc->wqe_free; 4837 /* 4838 * We should not store mbuf pointer in elts 4839 * if no inlining is configured, this is done 4840 * by calling routine in a batch copy. 4841 */ 4842 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4843 --loc->elts_free; 4844 #ifdef MLX5_PMD_SOFT_COUNTERS 4845 /* Update sent data bytes counter. */ 4846 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4847 if (MLX5_TXOFF_CONFIG(VLAN) && 4848 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4849 txq->stats.obytes += 4850 sizeof(struct rte_vlan_hdr); 4851 #endif 4852 } 4853 ++loc->pkts_sent; 4854 --pkts_n; 4855 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4856 return MLX5_TXCMP_CODE_EXIT; 4857 loc->mbuf = *pkts++; 4858 if (pkts_n > 1) 4859 rte_prefetch0(*pkts); 4860 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4861 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4862 return ret; 4863 } 4864 MLX5_ASSERT(false); 4865 } 4866 4867 static __rte_always_inline enum mlx5_txcmp_code 4868 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 4869 struct rte_mbuf **__rte_restrict pkts, 4870 unsigned int pkts_n, 4871 struct mlx5_txq_local *__rte_restrict loc, 4872 unsigned int olx) 4873 { 4874 enum mlx5_txcmp_code ret; 4875 4876 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4877 if (ret == MLX5_TXCMP_CODE_SINGLE) 4878 goto ordinary_send; 4879 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 4880 for (;;) { 4881 /* Optimize for inline/no inline eMPW send. */ 4882 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4883 mlx5_tx_burst_empw_inline 4884 (txq, pkts, pkts_n, loc, olx) : 4885 mlx5_tx_burst_empw_simple 4886 (txq, pkts, pkts_n, loc, olx); 4887 if (ret != MLX5_TXCMP_CODE_SINGLE) 4888 return ret; 4889 /* The resources to send one packet should remain. */ 4890 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4891 ordinary_send: 4892 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4893 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 4894 if (ret != MLX5_TXCMP_CODE_EMPW) 4895 return ret; 4896 /* The resources to send one packet should remain. */ 4897 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4898 } 4899 } 4900 4901 /** 4902 * DPDK Tx callback template. This is configured template 4903 * used to generate routines optimized for specified offload setup. 4904 * One of this generated functions is chosen at SQ configuration 4905 * time. 4906 * 4907 * @param txq 4908 * Generic pointer to TX queue structure. 4909 * @param[in] pkts 4910 * Packets to transmit. 4911 * @param pkts_n 4912 * Number of packets in array. 4913 * @param olx 4914 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 4915 * values. Should be static to take compile time static configuration 4916 * advantages. 4917 * 4918 * @return 4919 * Number of packets successfully transmitted (<= pkts_n). 4920 */ 4921 static __rte_always_inline uint16_t 4922 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 4923 struct rte_mbuf **__rte_restrict pkts, 4924 uint16_t pkts_n, 4925 unsigned int olx) 4926 { 4927 struct mlx5_txq_local loc; 4928 enum mlx5_txcmp_code ret; 4929 unsigned int part; 4930 4931 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4932 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4933 if (unlikely(!pkts_n)) 4934 return 0; 4935 loc.pkts_sent = 0; 4936 loc.pkts_copy = 0; 4937 loc.wqe_last = NULL; 4938 4939 send_loop: 4940 loc.pkts_loop = loc.pkts_sent; 4941 /* 4942 * Check if there are some CQEs, if any: 4943 * - process an encountered errors 4944 * - process the completed WQEs 4945 * - free related mbufs 4946 * - doorbell the NIC about processed CQEs 4947 */ 4948 rte_prefetch0(*(pkts + loc.pkts_sent)); 4949 mlx5_tx_handle_completion(txq, olx); 4950 /* 4951 * Calculate the number of available resources - elts and WQEs. 4952 * There are two possible different scenarios: 4953 * - no data inlining into WQEs, one WQEBB may contains up to 4954 * four packets, in this case elts become scarce resource 4955 * - data inlining into WQEs, one packet may require multiple 4956 * WQEBBs, the WQEs become the limiting factor. 4957 */ 4958 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4959 loc.elts_free = txq->elts_s - 4960 (uint16_t)(txq->elts_head - txq->elts_tail); 4961 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4962 loc.wqe_free = txq->wqe_s - 4963 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 4964 if (unlikely(!loc.elts_free || !loc.wqe_free)) 4965 goto burst_exit; 4966 for (;;) { 4967 /* 4968 * Fetch the packet from array. Usually this is 4969 * the first packet in series of multi/single 4970 * segment packets. 4971 */ 4972 loc.mbuf = *(pkts + loc.pkts_sent); 4973 /* Dedicated branch for multi-segment packets. */ 4974 if (MLX5_TXOFF_CONFIG(MULTI) && 4975 unlikely(NB_SEGS(loc.mbuf) > 1)) { 4976 /* 4977 * Multi-segment packet encountered. 4978 * Hardware is able to process it only 4979 * with SEND/TSO opcodes, one packet 4980 * per WQE, do it in dedicated routine. 4981 */ 4982 enter_send_multi: 4983 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 4984 part = loc.pkts_sent - loc.pkts_copy; 4985 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4986 /* 4987 * There are some single-segment mbufs not 4988 * stored in elts. The mbufs must be in the 4989 * same order as WQEs, so we must copy the 4990 * mbufs to elts here, before the coming 4991 * multi-segment packet mbufs is appended. 4992 */ 4993 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 4994 part, olx); 4995 loc.pkts_copy = loc.pkts_sent; 4996 } 4997 MLX5_ASSERT(pkts_n > loc.pkts_sent); 4998 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 4999 if (!MLX5_TXOFF_CONFIG(INLINE)) 5000 loc.pkts_copy = loc.pkts_sent; 5001 /* 5002 * These returned code checks are supposed 5003 * to be optimized out due to routine inlining. 5004 */ 5005 if (ret == MLX5_TXCMP_CODE_EXIT) { 5006 /* 5007 * The routine returns this code when 5008 * all packets are sent or there is no 5009 * enough resources to complete request. 5010 */ 5011 break; 5012 } 5013 if (ret == MLX5_TXCMP_CODE_ERROR) { 5014 /* 5015 * The routine returns this code when 5016 * some error in the incoming packets 5017 * format occurred. 5018 */ 5019 txq->stats.oerrors++; 5020 break; 5021 } 5022 if (ret == MLX5_TXCMP_CODE_SINGLE) { 5023 /* 5024 * The single-segment packet was encountered 5025 * in the array, try to send it with the 5026 * best optimized way, possible engaging eMPW. 5027 */ 5028 goto enter_send_single; 5029 } 5030 if (MLX5_TXOFF_CONFIG(TSO) && 5031 ret == MLX5_TXCMP_CODE_TSO) { 5032 /* 5033 * The single-segment TSO packet was 5034 * encountered in the array. 5035 */ 5036 goto enter_send_tso; 5037 } 5038 /* We must not get here. Something is going wrong. */ 5039 MLX5_ASSERT(false); 5040 txq->stats.oerrors++; 5041 break; 5042 } 5043 /* Dedicated branch for single-segment TSO packets. */ 5044 if (MLX5_TXOFF_CONFIG(TSO) && 5045 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 5046 /* 5047 * TSO might require special way for inlining 5048 * (dedicated parameters) and is sent with 5049 * MLX5_OPCODE_TSO opcode only, provide this 5050 * in dedicated branch. 5051 */ 5052 enter_send_tso: 5053 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 5054 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5055 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 5056 /* 5057 * These returned code checks are supposed 5058 * to be optimized out due to routine inlining. 5059 */ 5060 if (ret == MLX5_TXCMP_CODE_EXIT) 5061 break; 5062 if (ret == MLX5_TXCMP_CODE_ERROR) { 5063 txq->stats.oerrors++; 5064 break; 5065 } 5066 if (ret == MLX5_TXCMP_CODE_SINGLE) 5067 goto enter_send_single; 5068 if (MLX5_TXOFF_CONFIG(MULTI) && 5069 ret == MLX5_TXCMP_CODE_MULTI) { 5070 /* 5071 * The multi-segment packet was 5072 * encountered in the array. 5073 */ 5074 goto enter_send_multi; 5075 } 5076 /* We must not get here. Something is going wrong. */ 5077 MLX5_ASSERT(false); 5078 txq->stats.oerrors++; 5079 break; 5080 } 5081 /* 5082 * The dedicated branch for the single-segment packets 5083 * without TSO. Often these ones can be sent using 5084 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 5085 * The routine builds the WQEs till it encounters 5086 * the TSO or multi-segment packet (in case if these 5087 * offloads are requested at SQ configuration time). 5088 */ 5089 enter_send_single: 5090 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5091 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 5092 /* 5093 * These returned code checks are supposed 5094 * to be optimized out due to routine inlining. 5095 */ 5096 if (ret == MLX5_TXCMP_CODE_EXIT) 5097 break; 5098 if (ret == MLX5_TXCMP_CODE_ERROR) { 5099 txq->stats.oerrors++; 5100 break; 5101 } 5102 if (MLX5_TXOFF_CONFIG(MULTI) && 5103 ret == MLX5_TXCMP_CODE_MULTI) { 5104 /* 5105 * The multi-segment packet was 5106 * encountered in the array. 5107 */ 5108 goto enter_send_multi; 5109 } 5110 if (MLX5_TXOFF_CONFIG(TSO) && 5111 ret == MLX5_TXCMP_CODE_TSO) { 5112 /* 5113 * The single-segment TSO packet was 5114 * encountered in the array. 5115 */ 5116 goto enter_send_tso; 5117 } 5118 /* We must not get here. Something is going wrong. */ 5119 MLX5_ASSERT(false); 5120 txq->stats.oerrors++; 5121 break; 5122 } 5123 /* 5124 * Main Tx loop is completed, do the rest: 5125 * - set completion request if thresholds are reached 5126 * - doorbell the hardware 5127 * - copy the rest of mbufs to elts (if any) 5128 */ 5129 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 5130 loc.pkts_sent >= loc.pkts_copy); 5131 /* Take a shortcut if nothing is sent. */ 5132 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 5133 goto burst_exit; 5134 /* Request CQE generation if limits are reached. */ 5135 mlx5_tx_request_completion(txq, &loc, olx); 5136 /* 5137 * Ring QP doorbell immediately after WQE building completion 5138 * to improve latencies. The pure software related data treatment 5139 * can be completed after doorbell. Tx CQEs for this SQ are 5140 * processed in this thread only by the polling. 5141 * 5142 * The rdma core library can map doorbell register in two ways, 5143 * depending on the environment variable "MLX5_SHUT_UP_BF": 5144 * 5145 * - as regular cached memory, the variable is either missing or 5146 * set to zero. This type of mapping may cause the significant 5147 * doorbell register writing latency and requires explicit 5148 * memory write barrier to mitigate this issue and prevent 5149 * write combining. 5150 * 5151 * - as non-cached memory, the variable is present and set to 5152 * not "0" value. This type of mapping may cause performance 5153 * impact under heavy loading conditions but the explicit write 5154 * memory barrier is not required and it may improve core 5155 * performance. 5156 * 5157 * - the legacy behaviour (prior 19.08 release) was to use some 5158 * heuristics to decide whether write memory barrier should 5159 * be performed. This behavior is supported with specifying 5160 * tx_db_nc=2, write barrier is skipped if application 5161 * provides the full recommended burst of packets, it 5162 * supposes the next packets are coming and the write barrier 5163 * will be issued on the next burst (after descriptor writing, 5164 * at least). 5165 */ 5166 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 5167 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 5168 /* Not all of the mbufs may be stored into elts yet. */ 5169 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 5170 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5171 /* 5172 * There are some single-segment mbufs not stored in elts. 5173 * It can be only if the last packet was single-segment. 5174 * The copying is gathered into one place due to it is 5175 * a good opportunity to optimize that with SIMD. 5176 * Unfortunately if inlining is enabled the gaps in 5177 * pointer array may happen due to early freeing of the 5178 * inlined mbufs. 5179 */ 5180 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 5181 loc.pkts_copy = loc.pkts_sent; 5182 } 5183 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5184 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5185 if (pkts_n > loc.pkts_sent) { 5186 /* 5187 * If burst size is large there might be no enough CQE 5188 * fetched from completion queue and no enough resources 5189 * freed to send all the packets. 5190 */ 5191 goto send_loop; 5192 } 5193 burst_exit: 5194 #ifdef MLX5_PMD_SOFT_COUNTERS 5195 /* Increment sent packets counter. */ 5196 txq->stats.opackets += loc.pkts_sent; 5197 #endif 5198 return loc.pkts_sent; 5199 } 5200 5201 /* Generate routines with Enhanced Multi-Packet Write support. */ 5202 MLX5_TXOFF_DECL(full_empw, 5203 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 5204 5205 MLX5_TXOFF_DECL(none_empw, 5206 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5207 5208 MLX5_TXOFF_DECL(md_empw, 5209 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5210 5211 MLX5_TXOFF_DECL(mt_empw, 5212 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5213 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5214 5215 MLX5_TXOFF_DECL(mtsc_empw, 5216 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5217 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5218 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5219 5220 MLX5_TXOFF_DECL(mti_empw, 5221 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5222 MLX5_TXOFF_CONFIG_INLINE | 5223 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5224 5225 MLX5_TXOFF_DECL(mtv_empw, 5226 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5227 MLX5_TXOFF_CONFIG_VLAN | 5228 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5229 5230 MLX5_TXOFF_DECL(mtiv_empw, 5231 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5232 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5233 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5234 5235 MLX5_TXOFF_DECL(sc_empw, 5236 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5237 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5238 5239 MLX5_TXOFF_DECL(sci_empw, 5240 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5241 MLX5_TXOFF_CONFIG_INLINE | 5242 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5243 5244 MLX5_TXOFF_DECL(scv_empw, 5245 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5246 MLX5_TXOFF_CONFIG_VLAN | 5247 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5248 5249 MLX5_TXOFF_DECL(sciv_empw, 5250 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5251 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5252 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5253 5254 MLX5_TXOFF_DECL(i_empw, 5255 MLX5_TXOFF_CONFIG_INLINE | 5256 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5257 5258 MLX5_TXOFF_DECL(v_empw, 5259 MLX5_TXOFF_CONFIG_VLAN | 5260 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5261 5262 MLX5_TXOFF_DECL(iv_empw, 5263 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5264 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5265 5266 /* Generate routines without Enhanced Multi-Packet Write support. */ 5267 MLX5_TXOFF_DECL(full, 5268 MLX5_TXOFF_CONFIG_FULL) 5269 5270 MLX5_TXOFF_DECL(none, 5271 MLX5_TXOFF_CONFIG_NONE) 5272 5273 MLX5_TXOFF_DECL(md, 5274 MLX5_TXOFF_CONFIG_METADATA) 5275 5276 MLX5_TXOFF_DECL(mt, 5277 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5278 MLX5_TXOFF_CONFIG_METADATA) 5279 5280 MLX5_TXOFF_DECL(mtsc, 5281 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5282 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5283 MLX5_TXOFF_CONFIG_METADATA) 5284 5285 MLX5_TXOFF_DECL(mti, 5286 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5287 MLX5_TXOFF_CONFIG_INLINE | 5288 MLX5_TXOFF_CONFIG_METADATA) 5289 5290 5291 MLX5_TXOFF_DECL(mtv, 5292 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5293 MLX5_TXOFF_CONFIG_VLAN | 5294 MLX5_TXOFF_CONFIG_METADATA) 5295 5296 5297 MLX5_TXOFF_DECL(mtiv, 5298 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5299 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5300 MLX5_TXOFF_CONFIG_METADATA) 5301 5302 MLX5_TXOFF_DECL(sc, 5303 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5304 MLX5_TXOFF_CONFIG_METADATA) 5305 5306 MLX5_TXOFF_DECL(sci, 5307 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5308 MLX5_TXOFF_CONFIG_INLINE | 5309 MLX5_TXOFF_CONFIG_METADATA) 5310 5311 5312 MLX5_TXOFF_DECL(scv, 5313 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5314 MLX5_TXOFF_CONFIG_VLAN | 5315 MLX5_TXOFF_CONFIG_METADATA) 5316 5317 5318 MLX5_TXOFF_DECL(sciv, 5319 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5320 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5321 MLX5_TXOFF_CONFIG_METADATA) 5322 5323 MLX5_TXOFF_DECL(i, 5324 MLX5_TXOFF_CONFIG_INLINE | 5325 MLX5_TXOFF_CONFIG_METADATA) 5326 5327 MLX5_TXOFF_DECL(v, 5328 MLX5_TXOFF_CONFIG_VLAN | 5329 MLX5_TXOFF_CONFIG_METADATA) 5330 5331 MLX5_TXOFF_DECL(iv, 5332 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5333 MLX5_TXOFF_CONFIG_METADATA) 5334 5335 /* Generate routines with timestamp scheduling. */ 5336 MLX5_TXOFF_DECL(full_ts_nompw, 5337 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5338 5339 MLX5_TXOFF_DECL(full_ts_nompwi, 5340 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5341 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5342 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5343 MLX5_TXOFF_CONFIG_TXPP) 5344 5345 MLX5_TXOFF_DECL(full_ts, 5346 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5347 MLX5_TXOFF_CONFIG_EMPW) 5348 5349 MLX5_TXOFF_DECL(full_ts_noi, 5350 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5351 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5352 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5353 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5354 5355 MLX5_TXOFF_DECL(none_ts, 5356 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5357 MLX5_TXOFF_CONFIG_EMPW) 5358 5359 MLX5_TXOFF_DECL(mdi_ts, 5360 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5361 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5362 5363 MLX5_TXOFF_DECL(mti_ts, 5364 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5365 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5366 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5367 5368 MLX5_TXOFF_DECL(mtiv_ts, 5369 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5370 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5371 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5372 MLX5_TXOFF_CONFIG_EMPW) 5373 5374 /* 5375 * Generate routines with Legacy Multi-Packet Write support. 5376 * This mode is supported by ConnectX-4 Lx only and imposes 5377 * offload limitations, not supported: 5378 * - ACL/Flows (metadata are becoming meaningless) 5379 * - WQE Inline headers 5380 * - SRIOV (E-Switch offloads) 5381 * - VLAN insertion 5382 * - tunnel encapsulation/decapsulation 5383 * - TSO 5384 */ 5385 MLX5_TXOFF_DECL(none_mpw, 5386 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5387 MLX5_TXOFF_CONFIG_MPW) 5388 5389 MLX5_TXOFF_DECL(mci_mpw, 5390 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5391 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5392 MLX5_TXOFF_CONFIG_MPW) 5393 5394 MLX5_TXOFF_DECL(mc_mpw, 5395 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5396 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5397 5398 MLX5_TXOFF_DECL(i_mpw, 5399 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5400 MLX5_TXOFF_CONFIG_MPW) 5401 5402 /* 5403 * Array of declared and compiled Tx burst function and corresponding 5404 * supported offloads set. The array is used to select the Tx burst 5405 * function for specified offloads set at Tx queue configuration time. 5406 */ 5407 const struct { 5408 eth_tx_burst_t func; 5409 unsigned int olx; 5410 } txoff_func[] = { 5411 MLX5_TXOFF_INFO(full_empw, 5412 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5413 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5414 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5415 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5416 5417 MLX5_TXOFF_INFO(none_empw, 5418 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5419 5420 MLX5_TXOFF_INFO(md_empw, 5421 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5422 5423 MLX5_TXOFF_INFO(mt_empw, 5424 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5425 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5426 5427 MLX5_TXOFF_INFO(mtsc_empw, 5428 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5429 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5430 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5431 5432 MLX5_TXOFF_INFO(mti_empw, 5433 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5434 MLX5_TXOFF_CONFIG_INLINE | 5435 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5436 5437 MLX5_TXOFF_INFO(mtv_empw, 5438 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5439 MLX5_TXOFF_CONFIG_VLAN | 5440 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5441 5442 MLX5_TXOFF_INFO(mtiv_empw, 5443 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5444 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5445 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5446 5447 MLX5_TXOFF_INFO(sc_empw, 5448 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5449 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5450 5451 MLX5_TXOFF_INFO(sci_empw, 5452 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5453 MLX5_TXOFF_CONFIG_INLINE | 5454 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5455 5456 MLX5_TXOFF_INFO(scv_empw, 5457 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5458 MLX5_TXOFF_CONFIG_VLAN | 5459 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5460 5461 MLX5_TXOFF_INFO(sciv_empw, 5462 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5463 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5464 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5465 5466 MLX5_TXOFF_INFO(i_empw, 5467 MLX5_TXOFF_CONFIG_INLINE | 5468 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5469 5470 MLX5_TXOFF_INFO(v_empw, 5471 MLX5_TXOFF_CONFIG_VLAN | 5472 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5473 5474 MLX5_TXOFF_INFO(iv_empw, 5475 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5476 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5477 5478 MLX5_TXOFF_INFO(full_ts_nompw, 5479 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5480 5481 MLX5_TXOFF_INFO(full_ts_nompwi, 5482 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5483 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5484 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5485 MLX5_TXOFF_CONFIG_TXPP) 5486 5487 MLX5_TXOFF_INFO(full_ts, 5488 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5489 MLX5_TXOFF_CONFIG_EMPW) 5490 5491 MLX5_TXOFF_INFO(full_ts_noi, 5492 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5493 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5494 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5495 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5496 5497 MLX5_TXOFF_INFO(none_ts, 5498 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5499 MLX5_TXOFF_CONFIG_EMPW) 5500 5501 MLX5_TXOFF_INFO(mdi_ts, 5502 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5503 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5504 5505 MLX5_TXOFF_INFO(mti_ts, 5506 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5507 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5508 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5509 5510 MLX5_TXOFF_INFO(mtiv_ts, 5511 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5512 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5513 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5514 MLX5_TXOFF_CONFIG_EMPW) 5515 5516 MLX5_TXOFF_INFO(full, 5517 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5518 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5519 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5520 MLX5_TXOFF_CONFIG_METADATA) 5521 5522 MLX5_TXOFF_INFO(none, 5523 MLX5_TXOFF_CONFIG_NONE) 5524 5525 MLX5_TXOFF_INFO(md, 5526 MLX5_TXOFF_CONFIG_METADATA) 5527 5528 MLX5_TXOFF_INFO(mt, 5529 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5530 MLX5_TXOFF_CONFIG_METADATA) 5531 5532 MLX5_TXOFF_INFO(mtsc, 5533 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5534 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5535 MLX5_TXOFF_CONFIG_METADATA) 5536 5537 MLX5_TXOFF_INFO(mti, 5538 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5539 MLX5_TXOFF_CONFIG_INLINE | 5540 MLX5_TXOFF_CONFIG_METADATA) 5541 5542 MLX5_TXOFF_INFO(mtv, 5543 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5544 MLX5_TXOFF_CONFIG_VLAN | 5545 MLX5_TXOFF_CONFIG_METADATA) 5546 5547 MLX5_TXOFF_INFO(mtiv, 5548 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5549 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5550 MLX5_TXOFF_CONFIG_METADATA) 5551 5552 MLX5_TXOFF_INFO(sc, 5553 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5554 MLX5_TXOFF_CONFIG_METADATA) 5555 5556 MLX5_TXOFF_INFO(sci, 5557 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5558 MLX5_TXOFF_CONFIG_INLINE | 5559 MLX5_TXOFF_CONFIG_METADATA) 5560 5561 MLX5_TXOFF_INFO(scv, 5562 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5563 MLX5_TXOFF_CONFIG_VLAN | 5564 MLX5_TXOFF_CONFIG_METADATA) 5565 5566 MLX5_TXOFF_INFO(sciv, 5567 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5568 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5569 MLX5_TXOFF_CONFIG_METADATA) 5570 5571 MLX5_TXOFF_INFO(i, 5572 MLX5_TXOFF_CONFIG_INLINE | 5573 MLX5_TXOFF_CONFIG_METADATA) 5574 5575 MLX5_TXOFF_INFO(v, 5576 MLX5_TXOFF_CONFIG_VLAN | 5577 MLX5_TXOFF_CONFIG_METADATA) 5578 5579 MLX5_TXOFF_INFO(iv, 5580 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5581 MLX5_TXOFF_CONFIG_METADATA) 5582 5583 MLX5_TXOFF_INFO(none_mpw, 5584 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5585 MLX5_TXOFF_CONFIG_MPW) 5586 5587 MLX5_TXOFF_INFO(mci_mpw, 5588 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5589 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5590 MLX5_TXOFF_CONFIG_MPW) 5591 5592 MLX5_TXOFF_INFO(mc_mpw, 5593 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5594 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5595 5596 MLX5_TXOFF_INFO(i_mpw, 5597 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5598 MLX5_TXOFF_CONFIG_MPW) 5599 }; 5600 5601 /** 5602 * Configure the Tx function to use. The routine checks configured 5603 * Tx offloads for the device and selects appropriate Tx burst 5604 * routine. There are multiple Tx burst routines compiled from 5605 * the same template in the most optimal way for the dedicated 5606 * Tx offloads set. 5607 * 5608 * @param dev 5609 * Pointer to private data structure. 5610 * 5611 * @return 5612 * Pointer to selected Tx burst function. 5613 */ 5614 eth_tx_burst_t 5615 mlx5_select_tx_function(struct rte_eth_dev *dev) 5616 { 5617 struct mlx5_priv *priv = dev->data->dev_private; 5618 struct mlx5_dev_config *config = &priv->config; 5619 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5620 unsigned int diff = 0, olx = 0, i, m; 5621 5622 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 5623 MLX5_DSEG_MAX, "invalid WQE max size"); 5624 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 5625 "invalid WQE Control Segment size"); 5626 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 5627 "invalid WQE Ethernet Segment size"); 5628 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 5629 "invalid WQE Data Segment size"); 5630 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 5631 "invalid WQE size"); 5632 MLX5_ASSERT(priv); 5633 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5634 /* We should support Multi-Segment Packets. */ 5635 olx |= MLX5_TXOFF_CONFIG_MULTI; 5636 } 5637 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5638 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5639 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5640 DEV_TX_OFFLOAD_IP_TNL_TSO | 5641 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5642 /* We should support TCP Send Offload. */ 5643 olx |= MLX5_TXOFF_CONFIG_TSO; 5644 } 5645 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5646 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5647 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5648 /* We should support Software Parser for Tunnels. */ 5649 olx |= MLX5_TXOFF_CONFIG_SWP; 5650 } 5651 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5652 DEV_TX_OFFLOAD_UDP_CKSUM | 5653 DEV_TX_OFFLOAD_TCP_CKSUM | 5654 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5655 /* We should support IP/TCP/UDP Checksums. */ 5656 olx |= MLX5_TXOFF_CONFIG_CSUM; 5657 } 5658 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5659 /* We should support VLAN insertion. */ 5660 olx |= MLX5_TXOFF_CONFIG_VLAN; 5661 } 5662 if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && 5663 rte_mbuf_dynflag_lookup 5664 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 && 5665 rte_mbuf_dynfield_lookup 5666 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) { 5667 /* Offload configured, dynamic entities registered. */ 5668 olx |= MLX5_TXOFF_CONFIG_TXPP; 5669 } 5670 if (priv->txqs_n && (*priv->txqs)[0]) { 5671 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5672 5673 if (txd->inlen_send) { 5674 /* 5675 * Check the data inline requirements. Data inline 5676 * is enabled on per device basis, we can check 5677 * the first Tx queue only. 5678 * 5679 * If device does not support VLAN insertion in WQE 5680 * and some queues are requested to perform VLAN 5681 * insertion offload than inline must be enabled. 5682 */ 5683 olx |= MLX5_TXOFF_CONFIG_INLINE; 5684 } 5685 } 5686 if (config->mps == MLX5_MPW_ENHANCED && 5687 config->txq_inline_min <= 0) { 5688 /* 5689 * The NIC supports Enhanced Multi-Packet Write 5690 * and does not require minimal inline data. 5691 */ 5692 olx |= MLX5_TXOFF_CONFIG_EMPW; 5693 } 5694 if (rte_flow_dynf_metadata_avail()) { 5695 /* We should support Flow metadata. */ 5696 olx |= MLX5_TXOFF_CONFIG_METADATA; 5697 } 5698 if (config->mps == MLX5_MPW) { 5699 /* 5700 * The NIC supports Legacy Multi-Packet Write. 5701 * The MLX5_TXOFF_CONFIG_MPW controls the 5702 * descriptor building method in combination 5703 * with MLX5_TXOFF_CONFIG_EMPW. 5704 */ 5705 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5706 MLX5_TXOFF_CONFIG_SWP | 5707 MLX5_TXOFF_CONFIG_VLAN | 5708 MLX5_TXOFF_CONFIG_METADATA))) 5709 olx |= MLX5_TXOFF_CONFIG_EMPW | 5710 MLX5_TXOFF_CONFIG_MPW; 5711 } 5712 /* 5713 * Scan the routines table to find the minimal 5714 * satisfying routine with requested offloads. 5715 */ 5716 m = RTE_DIM(txoff_func); 5717 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5718 unsigned int tmp; 5719 5720 tmp = txoff_func[i].olx; 5721 if (tmp == olx) { 5722 /* Meets requested offloads exactly.*/ 5723 m = i; 5724 break; 5725 } 5726 if ((tmp & olx) != olx) { 5727 /* Does not meet requested offloads at all. */ 5728 continue; 5729 } 5730 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) 5731 /* Do not enable legacy MPW if not configured. */ 5732 continue; 5733 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5734 /* Do not enable eMPW if not configured. */ 5735 continue; 5736 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5737 /* Do not enable inlining if not configured. */ 5738 continue; 5739 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) 5740 /* Do not enable scheduling if not configured. */ 5741 continue; 5742 /* 5743 * Some routine meets the requirements. 5744 * Check whether it has minimal amount 5745 * of not requested offloads. 5746 */ 5747 tmp = __builtin_popcountl(tmp & ~olx); 5748 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5749 /* First or better match, save and continue. */ 5750 m = i; 5751 diff = tmp; 5752 continue; 5753 } 5754 if (tmp == diff) { 5755 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5756 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5757 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5758 /* Lighter not requested offload. */ 5759 m = i; 5760 } 5761 } 5762 } 5763 if (m >= RTE_DIM(txoff_func)) { 5764 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5765 " for requested offloads %04X", 5766 dev->data->port_id, olx); 5767 return NULL; 5768 } 5769 DRV_LOG(DEBUG, "port %u has selected Tx function" 5770 " supporting offloads %04X/%04X", 5771 dev->data->port_id, olx, txoff_func[m].olx); 5772 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5773 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5774 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5775 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5776 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5777 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5778 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5779 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5780 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5781 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5782 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5783 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5784 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5785 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5786 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) 5787 DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); 5788 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5789 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5790 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5791 else 5792 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5793 } 5794 return txoff_func[m].func; 5795 } 5796 5797 /** 5798 * DPDK callback to get the TX queue information 5799 * 5800 * @param dev 5801 * Pointer to the device structure. 5802 * 5803 * @param tx_queue_id 5804 * Tx queue identificator. 5805 * 5806 * @param qinfo 5807 * Pointer to the TX queue information structure. 5808 * 5809 * @return 5810 * None. 5811 */ 5812 5813 void 5814 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 5815 struct rte_eth_txq_info *qinfo) 5816 { 5817 struct mlx5_priv *priv = dev->data->dev_private; 5818 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5819 struct mlx5_txq_ctrl *txq_ctrl = 5820 container_of(txq, struct mlx5_txq_ctrl, txq); 5821 5822 if (!txq) 5823 return; 5824 qinfo->nb_desc = txq->elts_s; 5825 qinfo->conf.tx_thresh.pthresh = 0; 5826 qinfo->conf.tx_thresh.hthresh = 0; 5827 qinfo->conf.tx_thresh.wthresh = 0; 5828 qinfo->conf.tx_rs_thresh = 0; 5829 qinfo->conf.tx_free_thresh = 0; 5830 qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; 5831 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 5832 } 5833 5834 /** 5835 * DPDK callback to get the TX packet burst mode information 5836 * 5837 * @param dev 5838 * Pointer to the device structure. 5839 * 5840 * @param tx_queue_id 5841 * Tx queue identificatior. 5842 * 5843 * @param mode 5844 * Pointer to the burts mode information. 5845 * 5846 * @return 5847 * 0 as success, -EINVAL as failure. 5848 */ 5849 5850 int 5851 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, 5852 uint16_t tx_queue_id __rte_unused, 5853 struct rte_eth_burst_mode *mode) 5854 { 5855 eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; 5856 unsigned int i, olx; 5857 5858 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5859 if (pkt_burst == txoff_func[i].func) { 5860 olx = txoff_func[i].olx; 5861 snprintf(mode->info, sizeof(mode->info), 5862 "%s%s%s%s%s%s%s%s%s", 5863 (olx & MLX5_TXOFF_CONFIG_EMPW) ? 5864 ((olx & MLX5_TXOFF_CONFIG_MPW) ? 5865 "Legacy MPW" : "Enhanced MPW") : "No MPW", 5866 (olx & MLX5_TXOFF_CONFIG_MULTI) ? 5867 " + MULTI" : "", 5868 (olx & MLX5_TXOFF_CONFIG_TSO) ? 5869 " + TSO" : "", 5870 (olx & MLX5_TXOFF_CONFIG_SWP) ? 5871 " + SWP" : "", 5872 (olx & MLX5_TXOFF_CONFIG_CSUM) ? 5873 " + CSUM" : "", 5874 (olx & MLX5_TXOFF_CONFIG_INLINE) ? 5875 " + INLINE" : "", 5876 (olx & MLX5_TXOFF_CONFIG_VLAN) ? 5877 " + VLAN" : "", 5878 (olx & MLX5_TXOFF_CONFIG_METADATA) ? 5879 " + METADATA" : "", 5880 (olx & MLX5_TXOFF_CONFIG_TXPP) ? 5881 " + TXPP" : ""); 5882 return 0; 5883 } 5884 } 5885 return -EINVAL; 5886 } 5887