1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <assert.h> 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdlib.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #include <infiniband/mlx5dv.h> 18 #ifdef PEDANTIC 19 #pragma GCC diagnostic error "-Wpedantic" 20 #endif 21 22 #include <rte_mbuf.h> 23 #include <rte_mempool.h> 24 #include <rte_prefetch.h> 25 #include <rte_common.h> 26 #include <rte_branch_prediction.h> 27 #include <rte_ether.h> 28 #include <rte_cycles.h> 29 #include <rte_flow.h> 30 31 #include "mlx5.h" 32 #include "mlx5_utils.h" 33 #include "mlx5_rxtx.h" 34 #include "mlx5_autoconf.h" 35 #include "mlx5_defs.h" 36 #include "mlx5_prm.h" 37 38 /* TX burst subroutines return codes. */ 39 enum mlx5_txcmp_code { 40 MLX5_TXCMP_CODE_EXIT = 0, 41 MLX5_TXCMP_CODE_ERROR, 42 MLX5_TXCMP_CODE_SINGLE, 43 MLX5_TXCMP_CODE_MULTI, 44 MLX5_TXCMP_CODE_TSO, 45 MLX5_TXCMP_CODE_EMPW, 46 }; 47 48 /* 49 * These defines are used to configure Tx burst routine option set 50 * supported at compile time. The not specified options are optimized out 51 * out due to if conditions can be explicitly calculated at compile time. 52 * The offloads with bigger runtime check (require more CPU cycles to 53 * skip) overhead should have the bigger index - this is needed to 54 * select the better matching routine function if no exact match and 55 * some offloads are not actually requested. 56 */ 57 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 58 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 59 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 60 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 61 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 62 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 63 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 64 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 65 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 66 67 /* The most common offloads groups. */ 68 #define MLX5_TXOFF_CONFIG_NONE 0 69 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 70 MLX5_TXOFF_CONFIG_TSO | \ 71 MLX5_TXOFF_CONFIG_SWP | \ 72 MLX5_TXOFF_CONFIG_CSUM | \ 73 MLX5_TXOFF_CONFIG_INLINE | \ 74 MLX5_TXOFF_CONFIG_VLAN | \ 75 MLX5_TXOFF_CONFIG_METADATA) 76 77 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 78 79 #define MLX5_TXOFF_DECL(func, olx) \ 80 static uint16_t mlx5_tx_burst_##func(void *txq, \ 81 struct rte_mbuf **pkts, \ 82 uint16_t pkts_n) \ 83 { \ 84 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 85 pkts, pkts_n, (olx)); \ 86 } 87 88 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 89 90 static __rte_always_inline uint32_t 91 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 92 93 static __rte_always_inline int 94 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 95 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 96 97 static __rte_always_inline uint32_t 98 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 99 100 static __rte_always_inline void 101 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 102 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); 103 104 static __rte_always_inline void 105 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 106 const unsigned int strd_n); 107 108 static int 109 mlx5_queue_state_modify(struct rte_eth_dev *dev, 110 struct mlx5_mp_arg_queue_state_modify *sm); 111 112 static inline void 113 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, 114 volatile struct mlx5_cqe *restrict cqe, 115 uint32_t phcsum); 116 117 static inline void 118 mlx5_lro_update_hdr(uint8_t *restrict padd, 119 volatile struct mlx5_cqe *restrict cqe, 120 uint32_t len); 121 122 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 123 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 124 }; 125 126 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 127 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 128 129 /** 130 * Build a table to translate Rx completion flags to packet type. 131 * 132 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 133 */ 134 void 135 mlx5_set_ptype_table(void) 136 { 137 unsigned int i; 138 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 139 140 /* Last entry must not be overwritten, reserved for errored packet. */ 141 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 142 (*p)[i] = RTE_PTYPE_UNKNOWN; 143 /* 144 * The index to the array should have: 145 * bit[1:0] = l3_hdr_type 146 * bit[4:2] = l4_hdr_type 147 * bit[5] = ip_frag 148 * bit[6] = tunneled 149 * bit[7] = outer_l3_type 150 */ 151 /* L2 */ 152 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 153 /* L3 */ 154 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 155 RTE_PTYPE_L4_NONFRAG; 156 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 157 RTE_PTYPE_L4_NONFRAG; 158 /* Fragmented */ 159 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 160 RTE_PTYPE_L4_FRAG; 161 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 162 RTE_PTYPE_L4_FRAG; 163 /* TCP */ 164 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 165 RTE_PTYPE_L4_TCP; 166 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 167 RTE_PTYPE_L4_TCP; 168 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 169 RTE_PTYPE_L4_TCP; 170 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 171 RTE_PTYPE_L4_TCP; 172 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 173 RTE_PTYPE_L4_TCP; 174 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 175 RTE_PTYPE_L4_TCP; 176 /* UDP */ 177 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 178 RTE_PTYPE_L4_UDP; 179 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 180 RTE_PTYPE_L4_UDP; 181 /* Repeat with outer_l3_type being set. Just in case. */ 182 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 183 RTE_PTYPE_L4_NONFRAG; 184 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 185 RTE_PTYPE_L4_NONFRAG; 186 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 187 RTE_PTYPE_L4_FRAG; 188 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 189 RTE_PTYPE_L4_FRAG; 190 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 191 RTE_PTYPE_L4_TCP; 192 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 193 RTE_PTYPE_L4_TCP; 194 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 195 RTE_PTYPE_L4_TCP; 196 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 197 RTE_PTYPE_L4_TCP; 198 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 199 RTE_PTYPE_L4_TCP; 200 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 201 RTE_PTYPE_L4_TCP; 202 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 203 RTE_PTYPE_L4_UDP; 204 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 205 RTE_PTYPE_L4_UDP; 206 /* Tunneled - L3 */ 207 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 208 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 209 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 210 RTE_PTYPE_INNER_L4_NONFRAG; 211 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 212 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 213 RTE_PTYPE_INNER_L4_NONFRAG; 214 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 215 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 216 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 217 RTE_PTYPE_INNER_L4_NONFRAG; 218 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 219 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 220 RTE_PTYPE_INNER_L4_NONFRAG; 221 /* Tunneled - Fragmented */ 222 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 223 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 224 RTE_PTYPE_INNER_L4_FRAG; 225 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 226 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 227 RTE_PTYPE_INNER_L4_FRAG; 228 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 229 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 230 RTE_PTYPE_INNER_L4_FRAG; 231 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 232 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 233 RTE_PTYPE_INNER_L4_FRAG; 234 /* Tunneled - TCP */ 235 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 236 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 237 RTE_PTYPE_INNER_L4_TCP; 238 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 239 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 240 RTE_PTYPE_INNER_L4_TCP; 241 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 242 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 243 RTE_PTYPE_INNER_L4_TCP; 244 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 245 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 246 RTE_PTYPE_INNER_L4_TCP; 247 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 248 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 249 RTE_PTYPE_INNER_L4_TCP; 250 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 251 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 252 RTE_PTYPE_INNER_L4_TCP; 253 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 254 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 255 RTE_PTYPE_INNER_L4_TCP; 256 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 257 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 258 RTE_PTYPE_INNER_L4_TCP; 259 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 260 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L4_TCP; 262 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 263 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L4_TCP; 265 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 266 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L4_TCP; 268 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 269 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 270 RTE_PTYPE_INNER_L4_TCP; 271 /* Tunneled - UDP */ 272 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 273 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L4_UDP; 275 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 276 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L4_UDP; 278 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 279 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 280 RTE_PTYPE_INNER_L4_UDP; 281 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 282 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 283 RTE_PTYPE_INNER_L4_UDP; 284 } 285 286 /** 287 * Build a table to translate packet to checksum type of Verbs. 288 */ 289 void 290 mlx5_set_cksum_table(void) 291 { 292 unsigned int i; 293 uint8_t v; 294 295 /* 296 * The index should have: 297 * bit[0] = PKT_TX_TCP_SEG 298 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 299 * bit[4] = PKT_TX_IP_CKSUM 300 * bit[8] = PKT_TX_OUTER_IP_CKSUM 301 * bit[9] = tunnel 302 */ 303 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 304 v = 0; 305 if (i & (1 << 9)) { 306 /* Tunneled packet. */ 307 if (i & (1 << 8)) /* Outer IP. */ 308 v |= MLX5_ETH_WQE_L3_CSUM; 309 if (i & (1 << 4)) /* Inner IP. */ 310 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 311 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 312 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 313 } else { 314 /* No tunnel. */ 315 if (i & (1 << 4)) /* IP. */ 316 v |= MLX5_ETH_WQE_L3_CSUM; 317 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 318 v |= MLX5_ETH_WQE_L4_CSUM; 319 } 320 mlx5_cksum_table[i] = v; 321 } 322 } 323 324 /** 325 * Build a table to translate packet type of mbuf to SWP type of Verbs. 326 */ 327 void 328 mlx5_set_swp_types_table(void) 329 { 330 unsigned int i; 331 uint8_t v; 332 333 /* 334 * The index should have: 335 * bit[0:1] = PKT_TX_L4_MASK 336 * bit[4] = PKT_TX_IPV6 337 * bit[8] = PKT_TX_OUTER_IPV6 338 * bit[9] = PKT_TX_OUTER_UDP 339 */ 340 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 341 v = 0; 342 if (i & (1 << 8)) 343 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 344 if (i & (1 << 9)) 345 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 346 if (i & (1 << 4)) 347 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 348 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 349 v |= MLX5_ETH_WQE_L4_INNER_UDP; 350 mlx5_swp_types_table[i] = v; 351 } 352 } 353 354 /** 355 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 356 * Flags must be preliminary initialized to zero. 357 * 358 * @param loc 359 * Pointer to burst routine local context. 360 * @param swp_flags 361 * Pointer to store Software Parser flags 362 * @param olx 363 * Configured Tx offloads mask. It is fully defined at 364 * compile time and may be used for optimization. 365 * 366 * @return 367 * Software Parser offsets packed in dword. 368 * Software Parser flags are set by pointer. 369 */ 370 static __rte_always_inline uint32_t 371 txq_mbuf_to_swp(struct mlx5_txq_local *restrict loc, 372 uint8_t *swp_flags, 373 unsigned int olx) 374 { 375 uint64_t ol, tunnel; 376 unsigned int idx, off; 377 uint32_t set; 378 379 if (!MLX5_TXOFF_CONFIG(SWP)) 380 return 0; 381 ol = loc->mbuf->ol_flags; 382 tunnel = ol & PKT_TX_TUNNEL_MASK; 383 /* 384 * Check whether Software Parser is required. 385 * Only customized tunnels may ask for. 386 */ 387 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 388 return 0; 389 /* 390 * The index should have: 391 * bit[0:1] = PKT_TX_L4_MASK 392 * bit[4] = PKT_TX_IPV6 393 * bit[8] = PKT_TX_OUTER_IPV6 394 * bit[9] = PKT_TX_OUTER_UDP 395 */ 396 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 397 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 398 *swp_flags = mlx5_swp_types_table[idx]; 399 /* 400 * Set offsets for SW parser. Since ConnectX-5, SW parser just 401 * complements HW parser. SW parser starts to engage only if HW parser 402 * can't reach a header. For the older devices, HW parser will not kick 403 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 404 * should be set regardless of HW offload. 405 */ 406 off = loc->mbuf->outer_l2_len; 407 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 408 off += sizeof(struct rte_vlan_hdr); 409 set = (off >> 1) << 8; /* Outer L3 offset. */ 410 off += loc->mbuf->outer_l3_len; 411 if (tunnel == PKT_TX_TUNNEL_UDP) 412 set |= off >> 1; /* Outer L4 offset. */ 413 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 414 const uint64_t csum = ol & PKT_TX_L4_MASK; 415 off += loc->mbuf->l2_len; 416 set |= (off >> 1) << 24; /* Inner L3 offset. */ 417 if (csum == PKT_TX_TCP_CKSUM || 418 csum == PKT_TX_UDP_CKSUM || 419 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 420 off += loc->mbuf->l3_len; 421 set |= (off >> 1) << 16; /* Inner L4 offset. */ 422 } 423 } 424 set = rte_cpu_to_le_32(set); 425 return set; 426 } 427 428 /** 429 * Convert the Checksum offloads to Verbs. 430 * 431 * @param buf 432 * Pointer to the mbuf. 433 * 434 * @return 435 * Converted checksum flags. 436 */ 437 static __rte_always_inline uint8_t 438 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 439 { 440 uint32_t idx; 441 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 442 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 443 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 444 445 /* 446 * The index should have: 447 * bit[0] = PKT_TX_TCP_SEG 448 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 449 * bit[4] = PKT_TX_IP_CKSUM 450 * bit[8] = PKT_TX_OUTER_IP_CKSUM 451 * bit[9] = tunnel 452 */ 453 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 454 return mlx5_cksum_table[idx]; 455 } 456 457 /** 458 * Internal function to compute the number of used descriptors in an RX queue 459 * 460 * @param rxq 461 * The Rx queue. 462 * 463 * @return 464 * The number of used rx descriptor. 465 */ 466 static uint32_t 467 rx_queue_count(struct mlx5_rxq_data *rxq) 468 { 469 struct rxq_zip *zip = &rxq->zip; 470 volatile struct mlx5_cqe *cqe; 471 const unsigned int cqe_n = (1 << rxq->cqe_n); 472 const unsigned int cqe_cnt = cqe_n - 1; 473 unsigned int cq_ci; 474 unsigned int used; 475 476 /* if we are processing a compressed cqe */ 477 if (zip->ai) { 478 used = zip->cqe_cnt - zip->ca; 479 cq_ci = zip->cq_ci; 480 } else { 481 used = 0; 482 cq_ci = rxq->cq_ci; 483 } 484 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 485 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 486 int8_t op_own; 487 unsigned int n; 488 489 op_own = cqe->op_own; 490 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 491 n = rte_be_to_cpu_32(cqe->byte_cnt); 492 else 493 n = 1; 494 cq_ci += n; 495 used += n; 496 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 497 } 498 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 499 return used; 500 } 501 502 /** 503 * DPDK callback to check the status of a rx descriptor. 504 * 505 * @param rx_queue 506 * The Rx queue. 507 * @param[in] offset 508 * The index of the descriptor in the ring. 509 * 510 * @return 511 * The status of the tx descriptor. 512 */ 513 int 514 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 515 { 516 struct mlx5_rxq_data *rxq = rx_queue; 517 struct mlx5_rxq_ctrl *rxq_ctrl = 518 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 519 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 520 521 if (dev->rx_pkt_burst != mlx5_rx_burst) { 522 rte_errno = ENOTSUP; 523 return -rte_errno; 524 } 525 if (offset >= (1 << rxq->elts_n)) { 526 rte_errno = EINVAL; 527 return -rte_errno; 528 } 529 if (offset < rx_queue_count(rxq)) 530 return RTE_ETH_RX_DESC_DONE; 531 return RTE_ETH_RX_DESC_AVAIL; 532 } 533 534 /** 535 * DPDK callback to get the number of used descriptors in a RX queue 536 * 537 * @param dev 538 * Pointer to the device structure. 539 * 540 * @param rx_queue_id 541 * The Rx queue. 542 * 543 * @return 544 * The number of used rx descriptor. 545 * -EINVAL if the queue is invalid 546 */ 547 uint32_t 548 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 549 { 550 struct mlx5_priv *priv = dev->data->dev_private; 551 struct mlx5_rxq_data *rxq; 552 553 if (dev->rx_pkt_burst != mlx5_rx_burst) { 554 rte_errno = ENOTSUP; 555 return -rte_errno; 556 } 557 rxq = (*priv->rxqs)[rx_queue_id]; 558 if (!rxq) { 559 rte_errno = EINVAL; 560 return -rte_errno; 561 } 562 return rx_queue_count(rxq); 563 } 564 565 #define MLX5_SYSTEM_LOG_DIR "/var/log" 566 /** 567 * Dump debug information to log file. 568 * 569 * @param fname 570 * The file name. 571 * @param hex_title 572 * If not NULL this string is printed as a header to the output 573 * and the output will be in hexadecimal view. 574 * @param buf 575 * This is the buffer address to print out. 576 * @param len 577 * The number of bytes to dump out. 578 */ 579 void 580 mlx5_dump_debug_information(const char *fname, const char *hex_title, 581 const void *buf, unsigned int hex_len) 582 { 583 FILE *fd; 584 585 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 586 fd = fopen(path, "a+"); 587 if (!fd) { 588 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 589 MKSTR(path2, "./%s", fname); 590 fd = fopen(path2, "a+"); 591 if (!fd) { 592 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 593 return; 594 } 595 DRV_LOG(INFO, "New debug dump in file %s", path2); 596 } else { 597 DRV_LOG(INFO, "New debug dump in file %s", path); 598 } 599 if (hex_title) 600 rte_hexdump(fd, hex_title, buf, hex_len); 601 else 602 fprintf(fd, "%s", (const char *)buf); 603 fprintf(fd, "\n\n\n"); 604 fclose(fd); 605 } 606 607 /** 608 * Move QP from error state to running state and initialize indexes. 609 * 610 * @param txq_ctrl 611 * Pointer to TX queue control structure. 612 * 613 * @return 614 * 0 on success, else -1. 615 */ 616 static int 617 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 618 { 619 struct mlx5_mp_arg_queue_state_modify sm = { 620 .is_wq = 0, 621 .queue_id = txq_ctrl->txq.idx, 622 }; 623 624 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 625 return -1; 626 txq_ctrl->txq.wqe_ci = 0; 627 txq_ctrl->txq.wqe_pi = 0; 628 txq_ctrl->txq.elts_comp = 0; 629 return 0; 630 } 631 632 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 633 static int 634 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 635 { 636 static const uint8_t magic[] = "seen"; 637 int ret = 1; 638 unsigned int i; 639 640 for (i = 0; i < sizeof(magic); ++i) 641 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 642 ret = 0; 643 err_cqe->rsvd1[i] = magic[i]; 644 } 645 return ret; 646 } 647 648 /** 649 * Handle error CQE. 650 * 651 * @param txq 652 * Pointer to TX queue structure. 653 * @param error_cqe 654 * Pointer to the error CQE. 655 * 656 * @return 657 * Negative value if queue recovery failed, 658 * the last Tx buffer element to free otherwise. 659 */ 660 int 661 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq, 662 volatile struct mlx5_err_cqe *err_cqe) 663 { 664 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 665 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 666 struct mlx5_txq_ctrl *txq_ctrl = 667 container_of(txq, struct mlx5_txq_ctrl, txq); 668 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 669 int seen = check_err_cqe_seen(err_cqe); 670 671 if (!seen && txq_ctrl->dump_file_n < 672 txq_ctrl->priv->config.max_dump_files_num) { 673 MKSTR(err_str, "Unexpected CQE error syndrome " 674 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 675 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 676 txq->cqe_s, txq->qp_num_8s >> 8, 677 rte_be_to_cpu_16(err_cqe->wqe_counter), 678 txq->wqe_ci, txq->cq_ci); 679 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 680 PORT_ID(txq_ctrl->priv), txq->idx, 681 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 682 mlx5_dump_debug_information(name, NULL, err_str, 0); 683 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 684 (const void *)((uintptr_t) 685 txq->cqes), 686 sizeof(*err_cqe) * 687 (1 << txq->cqe_n)); 688 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 689 (const void *)((uintptr_t) 690 txq->wqes), 691 MLX5_WQE_SIZE * 692 (1 << txq->wqe_n)); 693 txq_ctrl->dump_file_n++; 694 } 695 if (!seen) 696 /* 697 * Count errors in WQEs units. 698 * Later it can be improved to count error packets, 699 * for example, by SQ parsing to find how much packets 700 * should be counted for each WQE. 701 */ 702 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 703 new_wqe_pi) & wqe_m; 704 if (tx_recover_qp(txq_ctrl) == 0) { 705 txq->cq_ci++; 706 /* Release all the remaining buffers. */ 707 return txq->elts_head; 708 } 709 /* Recovering failed - try again later on the same WQE. */ 710 return -1; 711 } else { 712 txq->cq_ci++; 713 } 714 /* Do not release buffers. */ 715 return txq->elts_tail; 716 } 717 718 /** 719 * Translate RX completion flags to packet type. 720 * 721 * @param[in] rxq 722 * Pointer to RX queue structure. 723 * @param[in] cqe 724 * Pointer to CQE. 725 * 726 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 727 * 728 * @return 729 * Packet type for struct rte_mbuf. 730 */ 731 static inline uint32_t 732 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 733 { 734 uint8_t idx; 735 uint8_t pinfo = cqe->pkt_info; 736 uint16_t ptype = cqe->hdr_type_etc; 737 738 /* 739 * The index to the array should have: 740 * bit[1:0] = l3_hdr_type 741 * bit[4:2] = l4_hdr_type 742 * bit[5] = ip_frag 743 * bit[6] = tunneled 744 * bit[7] = outer_l3_type 745 */ 746 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 747 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 748 } 749 750 /** 751 * Initialize Rx WQ and indexes. 752 * 753 * @param[in] rxq 754 * Pointer to RX queue structure. 755 */ 756 void 757 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 758 { 759 const unsigned int wqe_n = 1 << rxq->elts_n; 760 unsigned int i; 761 762 for (i = 0; (i != wqe_n); ++i) { 763 volatile struct mlx5_wqe_data_seg *scat; 764 uintptr_t addr; 765 uint32_t byte_count; 766 767 if (mlx5_rxq_mprq_enabled(rxq)) { 768 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 769 770 scat = &((volatile struct mlx5_wqe_mprq *) 771 rxq->wqes)[i].dseg; 772 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 773 1 << rxq->strd_num_n); 774 byte_count = (1 << rxq->strd_sz_n) * 775 (1 << rxq->strd_num_n); 776 } else { 777 struct rte_mbuf *buf = (*rxq->elts)[i]; 778 779 scat = &((volatile struct mlx5_wqe_data_seg *) 780 rxq->wqes)[i]; 781 addr = rte_pktmbuf_mtod(buf, uintptr_t); 782 byte_count = DATA_LEN(buf); 783 } 784 /* scat->addr must be able to store a pointer. */ 785 assert(sizeof(scat->addr) >= sizeof(uintptr_t)); 786 *scat = (struct mlx5_wqe_data_seg){ 787 .addr = rte_cpu_to_be_64(addr), 788 .byte_count = rte_cpu_to_be_32(byte_count), 789 .lkey = mlx5_rx_addr2mr(rxq, addr), 790 }; 791 } 792 rxq->consumed_strd = 0; 793 rxq->decompressed = 0; 794 rxq->rq_pi = 0; 795 rxq->zip = (struct rxq_zip){ 796 .ai = 0, 797 }; 798 /* Update doorbell counter. */ 799 rxq->rq_ci = wqe_n >> rxq->sges_n; 800 rte_cio_wmb(); 801 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 802 } 803 804 /** 805 * Modify a Verbs/DevX queue state. 806 * This must be called from the primary process. 807 * 808 * @param dev 809 * Pointer to Ethernet device. 810 * @param sm 811 * State modify request parameters. 812 * 813 * @return 814 * 0 in case of success else non-zero value and rte_errno is set. 815 */ 816 int 817 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 818 const struct mlx5_mp_arg_queue_state_modify *sm) 819 { 820 int ret; 821 struct mlx5_priv *priv = dev->data->dev_private; 822 823 if (sm->is_wq) { 824 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 825 struct mlx5_rxq_ctrl *rxq_ctrl = 826 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 827 828 if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { 829 struct ibv_wq_attr mod = { 830 .attr_mask = IBV_WQ_ATTR_STATE, 831 .wq_state = sm->state, 832 }; 833 834 ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod); 835 } else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */ 836 struct mlx5_devx_modify_rq_attr rq_attr; 837 838 memset(&rq_attr, 0, sizeof(rq_attr)); 839 if (sm->state == IBV_WQS_RESET) { 840 rq_attr.rq_state = MLX5_RQC_STATE_ERR; 841 rq_attr.state = MLX5_RQC_STATE_RST; 842 } else if (sm->state == IBV_WQS_RDY) { 843 rq_attr.rq_state = MLX5_RQC_STATE_RST; 844 rq_attr.state = MLX5_RQC_STATE_RDY; 845 } else if (sm->state == IBV_WQS_ERR) { 846 rq_attr.rq_state = MLX5_RQC_STATE_RDY; 847 rq_attr.state = MLX5_RQC_STATE_ERR; 848 } 849 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, 850 &rq_attr); 851 } 852 if (ret) { 853 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 854 sm->state, strerror(errno)); 855 rte_errno = errno; 856 return ret; 857 } 858 } else { 859 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 860 struct mlx5_txq_ctrl *txq_ctrl = 861 container_of(txq, struct mlx5_txq_ctrl, txq); 862 struct ibv_qp_attr mod = { 863 .qp_state = IBV_QPS_RESET, 864 .port_num = (uint8_t)priv->ibv_port, 865 }; 866 struct ibv_qp *qp = txq_ctrl->obj->qp; 867 868 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 869 if (ret) { 870 DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " 871 "%s", strerror(errno)); 872 rte_errno = errno; 873 return ret; 874 } 875 mod.qp_state = IBV_QPS_INIT; 876 ret = mlx5_glue->modify_qp(qp, &mod, 877 (IBV_QP_STATE | IBV_QP_PORT)); 878 if (ret) { 879 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s", 880 strerror(errno)); 881 rte_errno = errno; 882 return ret; 883 } 884 mod.qp_state = IBV_QPS_RTR; 885 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 886 if (ret) { 887 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s", 888 strerror(errno)); 889 rte_errno = errno; 890 return ret; 891 } 892 mod.qp_state = IBV_QPS_RTS; 893 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 894 if (ret) { 895 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s", 896 strerror(errno)); 897 rte_errno = errno; 898 return ret; 899 } 900 } 901 return 0; 902 } 903 904 /** 905 * Modify a Verbs queue state. 906 * 907 * @param dev 908 * Pointer to Ethernet device. 909 * @param sm 910 * State modify request parameters. 911 * 912 * @return 913 * 0 in case of success else non-zero value. 914 */ 915 static int 916 mlx5_queue_state_modify(struct rte_eth_dev *dev, 917 struct mlx5_mp_arg_queue_state_modify *sm) 918 { 919 int ret = 0; 920 921 switch (rte_eal_process_type()) { 922 case RTE_PROC_PRIMARY: 923 ret = mlx5_queue_state_modify_primary(dev, sm); 924 break; 925 case RTE_PROC_SECONDARY: 926 ret = mlx5_mp_req_queue_state_modify(dev, sm); 927 break; 928 default: 929 break; 930 } 931 return ret; 932 } 933 934 /** 935 * Handle a Rx error. 936 * The function inserts the RQ state to reset when the first error CQE is 937 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 938 * it moves the RQ state to ready and initializes the RQ. 939 * Next CQE identification and error counting are in the caller responsibility. 940 * 941 * @param[in] rxq 942 * Pointer to RX queue structure. 943 * @param[in] vec 944 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 945 * 0 when called from non-vectorized Rx burst. 946 * 947 * @return 948 * -1 in case of recovery error, otherwise the CQE status. 949 */ 950 int 951 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 952 { 953 const uint16_t cqe_n = 1 << rxq->cqe_n; 954 const uint16_t cqe_mask = cqe_n - 1; 955 const unsigned int wqe_n = 1 << rxq->elts_n; 956 struct mlx5_rxq_ctrl *rxq_ctrl = 957 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 958 union { 959 volatile struct mlx5_cqe *cqe; 960 volatile struct mlx5_err_cqe *err_cqe; 961 } u = { 962 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 963 }; 964 struct mlx5_mp_arg_queue_state_modify sm; 965 int ret; 966 967 switch (rxq->err_state) { 968 case MLX5_RXQ_ERR_STATE_NO_ERROR: 969 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 970 /* Fall-through */ 971 case MLX5_RXQ_ERR_STATE_NEED_RESET: 972 sm.is_wq = 1; 973 sm.queue_id = rxq->idx; 974 sm.state = IBV_WQS_RESET; 975 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 976 return -1; 977 if (rxq_ctrl->dump_file_n < 978 rxq_ctrl->priv->config.max_dump_files_num) { 979 MKSTR(err_str, "Unexpected CQE error syndrome " 980 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 981 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 982 rxq->cqn, rxq_ctrl->wqn, 983 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 984 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 985 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 986 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 987 mlx5_dump_debug_information(name, NULL, err_str, 0); 988 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 989 (const void *)((uintptr_t) 990 rxq->cqes), 991 sizeof(*u.cqe) * cqe_n); 992 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 993 (const void *)((uintptr_t) 994 rxq->wqes), 995 16 * wqe_n); 996 rxq_ctrl->dump_file_n++; 997 } 998 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 999 /* Fall-through */ 1000 case MLX5_RXQ_ERR_STATE_NEED_READY: 1001 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1002 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1003 rte_cio_wmb(); 1004 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1005 rte_cio_wmb(); 1006 /* 1007 * The RQ consumer index must be zeroed while moving 1008 * from RESET state to RDY state. 1009 */ 1010 *rxq->rq_db = rte_cpu_to_be_32(0); 1011 rte_cio_wmb(); 1012 sm.is_wq = 1; 1013 sm.queue_id = rxq->idx; 1014 sm.state = IBV_WQS_RDY; 1015 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1016 &sm)) 1017 return -1; 1018 if (vec) { 1019 const uint16_t q_mask = wqe_n - 1; 1020 uint16_t elt_idx; 1021 struct rte_mbuf **elt; 1022 int i; 1023 unsigned int n = wqe_n - (rxq->rq_ci - 1024 rxq->rq_pi); 1025 1026 for (i = 0; i < (int)n; ++i) { 1027 elt_idx = (rxq->rq_ci + i) & q_mask; 1028 elt = &(*rxq->elts)[elt_idx]; 1029 *elt = rte_mbuf_raw_alloc(rxq->mp); 1030 if (!*elt) { 1031 for (i--; i >= 0; --i) { 1032 elt_idx = (rxq->rq_ci + 1033 i) & q_mask; 1034 elt = &(*rxq->elts) 1035 [elt_idx]; 1036 rte_pktmbuf_free_seg 1037 (*elt); 1038 } 1039 return -1; 1040 } 1041 } 1042 for (i = 0; i < (int)wqe_n; ++i) { 1043 elt = &(*rxq->elts)[i]; 1044 DATA_LEN(*elt) = 1045 (uint16_t)((*elt)->buf_len - 1046 rte_pktmbuf_headroom(*elt)); 1047 } 1048 /* Padding with a fake mbuf for vec Rx. */ 1049 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1050 (*rxq->elts)[wqe_n + i] = 1051 &rxq->fake_mbuf; 1052 } 1053 mlx5_rxq_initialize(rxq); 1054 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1055 } 1056 return ret; 1057 default: 1058 return -1; 1059 } 1060 } 1061 1062 /** 1063 * Get size of the next packet for a given CQE. For compressed CQEs, the 1064 * consumer index is updated only once all packets of the current one have 1065 * been processed. 1066 * 1067 * @param rxq 1068 * Pointer to RX queue. 1069 * @param cqe 1070 * CQE to process. 1071 * @param[out] mcqe 1072 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1073 * written. 1074 * 1075 * @return 1076 * 0 in case of empty CQE, otherwise the packet size in bytes. 1077 */ 1078 static inline int 1079 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1080 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1081 { 1082 struct rxq_zip *zip = &rxq->zip; 1083 uint16_t cqe_n = cqe_cnt + 1; 1084 int len; 1085 uint16_t idx, end; 1086 1087 do { 1088 len = 0; 1089 /* Process compressed data in the CQE and mini arrays. */ 1090 if (zip->ai) { 1091 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1092 (volatile struct mlx5_mini_cqe8 (*)[8]) 1093 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1094 cqe_cnt].pkt_info); 1095 1096 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1097 *mcqe = &(*mc)[zip->ai & 7]; 1098 if ((++zip->ai & 7) == 0) { 1099 /* Invalidate consumed CQEs */ 1100 idx = zip->ca; 1101 end = zip->na; 1102 while (idx != end) { 1103 (*rxq->cqes)[idx & cqe_cnt].op_own = 1104 MLX5_CQE_INVALIDATE; 1105 ++idx; 1106 } 1107 /* 1108 * Increment consumer index to skip the number 1109 * of CQEs consumed. Hardware leaves holes in 1110 * the CQ ring for software use. 1111 */ 1112 zip->ca = zip->na; 1113 zip->na += 8; 1114 } 1115 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1116 /* Invalidate the rest */ 1117 idx = zip->ca; 1118 end = zip->cq_ci; 1119 1120 while (idx != end) { 1121 (*rxq->cqes)[idx & cqe_cnt].op_own = 1122 MLX5_CQE_INVALIDATE; 1123 ++idx; 1124 } 1125 rxq->cq_ci = zip->cq_ci; 1126 zip->ai = 0; 1127 } 1128 /* 1129 * No compressed data, get next CQE and verify if it is 1130 * compressed. 1131 */ 1132 } else { 1133 int ret; 1134 int8_t op_own; 1135 1136 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1137 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1138 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1139 rxq->err_state)) { 1140 ret = mlx5_rx_err_handle(rxq, 0); 1141 if (ret == MLX5_CQE_STATUS_HW_OWN || 1142 ret == -1) 1143 return 0; 1144 } else { 1145 return 0; 1146 } 1147 } 1148 ++rxq->cq_ci; 1149 op_own = cqe->op_own; 1150 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1151 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1152 (volatile struct mlx5_mini_cqe8 (*)[8]) 1153 (uintptr_t)(&(*rxq->cqes) 1154 [rxq->cq_ci & 1155 cqe_cnt].pkt_info); 1156 1157 /* Fix endianness. */ 1158 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1159 /* 1160 * Current mini array position is the one 1161 * returned by check_cqe64(). 1162 * 1163 * If completion comprises several mini arrays, 1164 * as a special case the second one is located 1165 * 7 CQEs after the initial CQE instead of 8 1166 * for subsequent ones. 1167 */ 1168 zip->ca = rxq->cq_ci; 1169 zip->na = zip->ca + 7; 1170 /* Compute the next non compressed CQE. */ 1171 --rxq->cq_ci; 1172 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1173 /* Get packet size to return. */ 1174 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1175 *mcqe = &(*mc)[0]; 1176 zip->ai = 1; 1177 /* Prefetch all to be invalidated */ 1178 idx = zip->ca; 1179 end = zip->cq_ci; 1180 while (idx != end) { 1181 rte_prefetch0(&(*rxq->cqes)[(idx) & 1182 cqe_cnt]); 1183 ++idx; 1184 } 1185 } else { 1186 len = rte_be_to_cpu_32(cqe->byte_cnt); 1187 } 1188 } 1189 if (unlikely(rxq->err_state)) { 1190 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1191 ++rxq->stats.idropped; 1192 } else { 1193 return len; 1194 } 1195 } while (1); 1196 } 1197 1198 /** 1199 * Translate RX completion flags to offload flags. 1200 * 1201 * @param[in] cqe 1202 * Pointer to CQE. 1203 * 1204 * @return 1205 * Offload flags (ol_flags) for struct rte_mbuf. 1206 */ 1207 static inline uint32_t 1208 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1209 { 1210 uint32_t ol_flags = 0; 1211 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1212 1213 ol_flags = 1214 TRANSPOSE(flags, 1215 MLX5_CQE_RX_L3_HDR_VALID, 1216 PKT_RX_IP_CKSUM_GOOD) | 1217 TRANSPOSE(flags, 1218 MLX5_CQE_RX_L4_HDR_VALID, 1219 PKT_RX_L4_CKSUM_GOOD); 1220 return ol_flags; 1221 } 1222 1223 /** 1224 * Fill in mbuf fields from RX completion flags. 1225 * Note that pkt->ol_flags should be initialized outside of this function. 1226 * 1227 * @param rxq 1228 * Pointer to RX queue. 1229 * @param pkt 1230 * mbuf to fill. 1231 * @param cqe 1232 * CQE to process. 1233 * @param rss_hash_res 1234 * Packet RSS Hash result. 1235 */ 1236 static inline void 1237 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1238 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res) 1239 { 1240 /* Update packet information. */ 1241 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); 1242 if (rss_hash_res && rxq->rss_hash) { 1243 pkt->hash.rss = rss_hash_res; 1244 pkt->ol_flags |= PKT_RX_RSS_HASH; 1245 } 1246 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1247 pkt->ol_flags |= PKT_RX_FDIR; 1248 if (cqe->sop_drop_qpn != 1249 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1250 uint32_t mark = cqe->sop_drop_qpn; 1251 1252 pkt->ol_flags |= PKT_RX_FDIR_ID; 1253 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1254 } 1255 } 1256 if (rte_flow_dynf_metadata_avail() && cqe->flow_table_metadata) { 1257 pkt->ol_flags |= PKT_RX_DYNF_METADATA; 1258 *RTE_FLOW_DYNF_METADATA(pkt) = cqe->flow_table_metadata; 1259 } 1260 if (rxq->csum) 1261 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1262 if (rxq->vlan_strip && 1263 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1264 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1265 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1266 } 1267 if (rxq->hw_timestamp) { 1268 pkt->timestamp = rte_be_to_cpu_64(cqe->timestamp); 1269 pkt->ol_flags |= PKT_RX_TIMESTAMP; 1270 } 1271 } 1272 1273 /** 1274 * DPDK callback for RX. 1275 * 1276 * @param dpdk_rxq 1277 * Generic pointer to RX queue structure. 1278 * @param[out] pkts 1279 * Array to store received packets. 1280 * @param pkts_n 1281 * Maximum number of packets in array. 1282 * 1283 * @return 1284 * Number of packets successfully received (<= pkts_n). 1285 */ 1286 uint16_t 1287 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1288 { 1289 struct mlx5_rxq_data *rxq = dpdk_rxq; 1290 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1291 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1292 const unsigned int sges_n = rxq->sges_n; 1293 struct rte_mbuf *pkt = NULL; 1294 struct rte_mbuf *seg = NULL; 1295 volatile struct mlx5_cqe *cqe = 1296 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1297 unsigned int i = 0; 1298 unsigned int rq_ci = rxq->rq_ci << sges_n; 1299 int len = 0; /* keep its value across iterations. */ 1300 1301 while (pkts_n) { 1302 unsigned int idx = rq_ci & wqe_cnt; 1303 volatile struct mlx5_wqe_data_seg *wqe = 1304 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1305 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1306 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1307 uint32_t rss_hash_res; 1308 1309 if (pkt) 1310 NEXT(seg) = rep; 1311 seg = rep; 1312 rte_prefetch0(seg); 1313 rte_prefetch0(cqe); 1314 rte_prefetch0(wqe); 1315 rep = rte_mbuf_raw_alloc(rxq->mp); 1316 if (unlikely(rep == NULL)) { 1317 ++rxq->stats.rx_nombuf; 1318 if (!pkt) { 1319 /* 1320 * no buffers before we even started, 1321 * bail out silently. 1322 */ 1323 break; 1324 } 1325 while (pkt != seg) { 1326 assert(pkt != (*rxq->elts)[idx]); 1327 rep = NEXT(pkt); 1328 NEXT(pkt) = NULL; 1329 NB_SEGS(pkt) = 1; 1330 rte_mbuf_raw_free(pkt); 1331 pkt = rep; 1332 } 1333 break; 1334 } 1335 if (!pkt) { 1336 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1337 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1338 if (!len) { 1339 rte_mbuf_raw_free(rep); 1340 break; 1341 } 1342 pkt = seg; 1343 assert(len >= (rxq->crc_present << 2)); 1344 pkt->ol_flags = 0; 1345 /* If compressed, take hash result from mini-CQE. */ 1346 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? 1347 cqe->rx_hash_res : 1348 mcqe->rx_hash_result); 1349 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1350 if (rxq->crc_present) 1351 len -= RTE_ETHER_CRC_LEN; 1352 PKT_LEN(pkt) = len; 1353 if (cqe->lro_num_seg > 1) { 1354 mlx5_lro_update_hdr 1355 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1356 len); 1357 pkt->ol_flags |= PKT_RX_LRO; 1358 pkt->tso_segsz = len / cqe->lro_num_seg; 1359 } 1360 } 1361 DATA_LEN(rep) = DATA_LEN(seg); 1362 PKT_LEN(rep) = PKT_LEN(seg); 1363 SET_DATA_OFF(rep, DATA_OFF(seg)); 1364 PORT(rep) = PORT(seg); 1365 (*rxq->elts)[idx] = rep; 1366 /* 1367 * Fill NIC descriptor with the new buffer. The lkey and size 1368 * of the buffers are already known, only the buffer address 1369 * changes. 1370 */ 1371 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1372 /* If there's only one MR, no need to replace LKey in WQE. */ 1373 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1374 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1375 if (len > DATA_LEN(seg)) { 1376 len -= DATA_LEN(seg); 1377 ++NB_SEGS(pkt); 1378 ++rq_ci; 1379 continue; 1380 } 1381 DATA_LEN(seg) = len; 1382 #ifdef MLX5_PMD_SOFT_COUNTERS 1383 /* Increment bytes counter. */ 1384 rxq->stats.ibytes += PKT_LEN(pkt); 1385 #endif 1386 /* Return packet. */ 1387 *(pkts++) = pkt; 1388 pkt = NULL; 1389 --pkts_n; 1390 ++i; 1391 /* Align consumer index to the next stride. */ 1392 rq_ci >>= sges_n; 1393 ++rq_ci; 1394 rq_ci <<= sges_n; 1395 } 1396 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1397 return 0; 1398 /* Update the consumer index. */ 1399 rxq->rq_ci = rq_ci >> sges_n; 1400 rte_cio_wmb(); 1401 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1402 rte_cio_wmb(); 1403 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1404 #ifdef MLX5_PMD_SOFT_COUNTERS 1405 /* Increment packets counter. */ 1406 rxq->stats.ipackets += i; 1407 #endif 1408 return i; 1409 } 1410 1411 /** 1412 * Update LRO packet TCP header. 1413 * The HW LRO feature doesn't update the TCP header after coalescing the 1414 * TCP segments but supplies information in CQE to fill it by SW. 1415 * 1416 * @param tcp 1417 * Pointer to the TCP header. 1418 * @param cqe 1419 * Pointer to the completion entry.. 1420 * @param phcsum 1421 * The L3 pseudo-header checksum. 1422 */ 1423 static inline void 1424 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, 1425 volatile struct mlx5_cqe *restrict cqe, 1426 uint32_t phcsum) 1427 { 1428 uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1429 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1430 /* 1431 * The HW calculates only the TCP payload checksum, need to complete 1432 * the TCP header checksum and the L3 pseudo-header checksum. 1433 */ 1434 uint32_t csum = phcsum + cqe->csum; 1435 1436 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1437 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1438 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1439 tcp->recv_ack = cqe->lro_ack_seq_num; 1440 tcp->rx_win = cqe->lro_tcp_win; 1441 } 1442 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1443 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1444 tcp->cksum = 0; 1445 csum += rte_raw_cksum(tcp, (tcp->data_off & 0xF) * 4); 1446 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1447 csum = (~csum) & 0xffff; 1448 if (csum == 0) 1449 csum = 0xffff; 1450 tcp->cksum = csum; 1451 } 1452 1453 /** 1454 * Update LRO packet headers. 1455 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1456 * TCP segments but supply information in CQE to fill it by SW. 1457 * 1458 * @param padd 1459 * The packet address. 1460 * @param cqe 1461 * Pointer to the completion entry.. 1462 * @param len 1463 * The packet length. 1464 */ 1465 static inline void 1466 mlx5_lro_update_hdr(uint8_t *restrict padd, 1467 volatile struct mlx5_cqe *restrict cqe, 1468 uint32_t len) 1469 { 1470 union { 1471 struct rte_ether_hdr *eth; 1472 struct rte_vlan_hdr *vlan; 1473 struct rte_ipv4_hdr *ipv4; 1474 struct rte_ipv6_hdr *ipv6; 1475 struct rte_tcp_hdr *tcp; 1476 uint8_t *hdr; 1477 } h = { 1478 .hdr = padd, 1479 }; 1480 uint16_t proto = h.eth->ether_type; 1481 uint32_t phcsum; 1482 1483 h.eth++; 1484 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1485 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1486 proto = h.vlan->eth_proto; 1487 h.vlan++; 1488 } 1489 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1490 h.ipv4->time_to_live = cqe->lro_min_ttl; 1491 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1492 h.ipv4->hdr_checksum = 0; 1493 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1494 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1495 h.ipv4++; 1496 } else { 1497 h.ipv6->hop_limits = cqe->lro_min_ttl; 1498 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1499 sizeof(*h.ipv6)); 1500 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1501 h.ipv6++; 1502 } 1503 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum); 1504 } 1505 1506 void 1507 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1508 { 1509 struct mlx5_mprq_buf *buf = opaque; 1510 1511 if (rte_atomic16_read(&buf->refcnt) == 1) { 1512 rte_mempool_put(buf->mp, buf); 1513 } else if (rte_atomic16_add_return(&buf->refcnt, -1) == 0) { 1514 rte_atomic16_set(&buf->refcnt, 1); 1515 rte_mempool_put(buf->mp, buf); 1516 } 1517 } 1518 1519 void 1520 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1521 { 1522 mlx5_mprq_buf_free_cb(NULL, buf); 1523 } 1524 1525 static inline void 1526 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 1527 const unsigned int strd_n) 1528 { 1529 struct mlx5_mprq_buf *rep = rxq->mprq_repl; 1530 volatile struct mlx5_wqe_data_seg *wqe = 1531 &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; 1532 void *addr; 1533 1534 assert(rep != NULL); 1535 /* Replace MPRQ buf. */ 1536 (*rxq->mprq_bufs)[rq_idx] = rep; 1537 /* Replace WQE. */ 1538 addr = mlx5_mprq_buf_addr(rep, strd_n); 1539 wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); 1540 /* If there's only one MR, no need to replace LKey in WQE. */ 1541 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1542 wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); 1543 /* Stash a mbuf for next replacement. */ 1544 if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep))) 1545 rxq->mprq_repl = rep; 1546 else 1547 rxq->mprq_repl = NULL; 1548 } 1549 1550 /** 1551 * DPDK callback for RX with Multi-Packet RQ support. 1552 * 1553 * @param dpdk_rxq 1554 * Generic pointer to RX queue structure. 1555 * @param[out] pkts 1556 * Array to store received packets. 1557 * @param pkts_n 1558 * Maximum number of packets in array. 1559 * 1560 * @return 1561 * Number of packets successfully received (<= pkts_n). 1562 */ 1563 uint16_t 1564 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1565 { 1566 struct mlx5_rxq_data *rxq = dpdk_rxq; 1567 const unsigned int strd_n = 1 << rxq->strd_num_n; 1568 const unsigned int strd_sz = 1 << rxq->strd_sz_n; 1569 const unsigned int strd_shift = 1570 MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; 1571 const unsigned int cq_mask = (1 << rxq->cqe_n) - 1; 1572 const unsigned int wq_mask = (1 << rxq->elts_n) - 1; 1573 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1574 unsigned int i = 0; 1575 uint32_t rq_ci = rxq->rq_ci; 1576 uint16_t consumed_strd = rxq->consumed_strd; 1577 uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM; 1578 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1579 1580 while (i < pkts_n) { 1581 struct rte_mbuf *pkt; 1582 void *addr; 1583 int ret; 1584 unsigned int len; 1585 uint16_t strd_cnt; 1586 uint16_t strd_idx; 1587 uint32_t offset; 1588 uint32_t byte_cnt; 1589 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1590 uint32_t rss_hash_res = 0; 1591 uint8_t lro_num_seg; 1592 1593 if (consumed_strd == strd_n) { 1594 /* Replace WQE only if the buffer is still in use. */ 1595 if (rte_atomic16_read(&buf->refcnt) > 1) { 1596 mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n); 1597 /* Release the old buffer. */ 1598 mlx5_mprq_buf_free(buf); 1599 } else if (unlikely(rxq->mprq_repl == NULL)) { 1600 struct mlx5_mprq_buf *rep; 1601 1602 /* 1603 * Currently, the MPRQ mempool is out of buffer 1604 * and doing memcpy regardless of the size of Rx 1605 * packet. Retry allocation to get back to 1606 * normal. 1607 */ 1608 if (!rte_mempool_get(rxq->mprq_mp, 1609 (void **)&rep)) 1610 rxq->mprq_repl = rep; 1611 } 1612 /* Advance to the next WQE. */ 1613 consumed_strd = 0; 1614 ++rq_ci; 1615 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1616 } 1617 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1618 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1619 if (!ret) 1620 break; 1621 byte_cnt = ret; 1622 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1623 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1624 assert(strd_cnt); 1625 consumed_strd += strd_cnt; 1626 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1627 continue; 1628 if (mcqe == NULL) { 1629 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1630 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); 1631 } else { 1632 /* mini-CQE for MPRQ doesn't have hash result. */ 1633 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); 1634 } 1635 assert(strd_idx < strd_n); 1636 assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); 1637 lro_num_seg = cqe->lro_num_seg; 1638 /* 1639 * Currently configured to receive a packet per a stride. But if 1640 * MTU is adjusted through kernel interface, device could 1641 * consume multiple strides without raising an error. In this 1642 * case, the packet should be dropped because it is bigger than 1643 * the max_rx_pkt_len. 1644 */ 1645 if (unlikely(!lro_num_seg && strd_cnt > 1)) { 1646 ++rxq->stats.idropped; 1647 continue; 1648 } 1649 pkt = rte_pktmbuf_alloc(rxq->mp); 1650 if (unlikely(pkt == NULL)) { 1651 ++rxq->stats.rx_nombuf; 1652 break; 1653 } 1654 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1655 assert((int)len >= (rxq->crc_present << 2)); 1656 if (rxq->crc_present) 1657 len -= RTE_ETHER_CRC_LEN; 1658 offset = strd_idx * strd_sz + strd_shift; 1659 addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); 1660 /* 1661 * Memcpy packets to the target mbuf if: 1662 * - The size of packet is smaller than mprq_max_memcpy_len. 1663 * - Out of buffer in the Mempool for Multi-Packet RQ. 1664 */ 1665 if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) { 1666 /* 1667 * When memcpy'ing packet due to out-of-buffer, the 1668 * packet must be smaller than the target mbuf. 1669 */ 1670 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { 1671 rte_pktmbuf_free_seg(pkt); 1672 ++rxq->stats.idropped; 1673 continue; 1674 } 1675 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), addr, len); 1676 DATA_LEN(pkt) = len; 1677 } else { 1678 rte_iova_t buf_iova; 1679 struct rte_mbuf_ext_shared_info *shinfo; 1680 uint16_t buf_len = strd_cnt * strd_sz; 1681 void *buf_addr; 1682 1683 /* Increment the refcnt of the whole chunk. */ 1684 rte_atomic16_add_return(&buf->refcnt, 1); 1685 assert((uint16_t)rte_atomic16_read(&buf->refcnt) <= 1686 strd_n + 1); 1687 buf_addr = RTE_PTR_SUB(addr, headroom_sz); 1688 /* 1689 * MLX5 device doesn't use iova but it is necessary in a 1690 * case where the Rx packet is transmitted via a 1691 * different PMD. 1692 */ 1693 buf_iova = rte_mempool_virt2iova(buf) + 1694 RTE_PTR_DIFF(buf_addr, buf); 1695 shinfo = &buf->shinfos[strd_idx]; 1696 rte_mbuf_ext_refcnt_set(shinfo, 1); 1697 /* 1698 * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when 1699 * attaching the stride to mbuf and more offload flags 1700 * will be added below by calling rxq_cq_to_mbuf(). 1701 * Other fields will be overwritten. 1702 */ 1703 rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, 1704 buf_len, shinfo); 1705 /* Set mbuf head-room. */ 1706 pkt->data_off = headroom_sz; 1707 assert(pkt->ol_flags == EXT_ATTACHED_MBUF); 1708 /* 1709 * Prevent potential overflow due to MTU change through 1710 * kernel interface. 1711 */ 1712 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { 1713 rte_pktmbuf_free_seg(pkt); 1714 ++rxq->stats.idropped; 1715 continue; 1716 } 1717 DATA_LEN(pkt) = len; 1718 /* 1719 * LRO packet may consume all the stride memory, in this 1720 * case packet head-room space is not guaranteed so must 1721 * to add an empty mbuf for the head-room. 1722 */ 1723 if (!rxq->strd_headroom_en) { 1724 struct rte_mbuf *headroom_mbuf = 1725 rte_pktmbuf_alloc(rxq->mp); 1726 1727 if (unlikely(headroom_mbuf == NULL)) { 1728 rte_pktmbuf_free_seg(pkt); 1729 ++rxq->stats.rx_nombuf; 1730 break; 1731 } 1732 PORT(pkt) = rxq->port_id; 1733 NEXT(headroom_mbuf) = pkt; 1734 pkt = headroom_mbuf; 1735 NB_SEGS(pkt) = 2; 1736 } 1737 } 1738 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1739 if (lro_num_seg > 1) { 1740 mlx5_lro_update_hdr(addr, cqe, len); 1741 pkt->ol_flags |= PKT_RX_LRO; 1742 pkt->tso_segsz = strd_sz; 1743 } 1744 PKT_LEN(pkt) = len; 1745 PORT(pkt) = rxq->port_id; 1746 #ifdef MLX5_PMD_SOFT_COUNTERS 1747 /* Increment bytes counter. */ 1748 rxq->stats.ibytes += PKT_LEN(pkt); 1749 #endif 1750 /* Return packet. */ 1751 *(pkts++) = pkt; 1752 ++i; 1753 } 1754 /* Update the consumer indexes. */ 1755 rxq->consumed_strd = consumed_strd; 1756 rte_cio_wmb(); 1757 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1758 if (rq_ci != rxq->rq_ci) { 1759 rxq->rq_ci = rq_ci; 1760 rte_cio_wmb(); 1761 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1762 } 1763 #ifdef MLX5_PMD_SOFT_COUNTERS 1764 /* Increment packets counter. */ 1765 rxq->stats.ipackets += i; 1766 #endif 1767 return i; 1768 } 1769 1770 /** 1771 * Dummy DPDK callback for TX. 1772 * 1773 * This function is used to temporarily replace the real callback during 1774 * unsafe control operations on the queue, or in case of error. 1775 * 1776 * @param dpdk_txq 1777 * Generic pointer to TX queue structure. 1778 * @param[in] pkts 1779 * Packets to transmit. 1780 * @param pkts_n 1781 * Number of packets in array. 1782 * 1783 * @return 1784 * Number of packets successfully transmitted (<= pkts_n). 1785 */ 1786 uint16_t 1787 removed_tx_burst(void *dpdk_txq __rte_unused, 1788 struct rte_mbuf **pkts __rte_unused, 1789 uint16_t pkts_n __rte_unused) 1790 { 1791 rte_mb(); 1792 return 0; 1793 } 1794 1795 /** 1796 * Dummy DPDK callback for RX. 1797 * 1798 * This function is used to temporarily replace the real callback during 1799 * unsafe control operations on the queue, or in case of error. 1800 * 1801 * @param dpdk_rxq 1802 * Generic pointer to RX queue structure. 1803 * @param[out] pkts 1804 * Array to store received packets. 1805 * @param pkts_n 1806 * Maximum number of packets in array. 1807 * 1808 * @return 1809 * Number of packets successfully received (<= pkts_n). 1810 */ 1811 uint16_t 1812 removed_rx_burst(void *dpdk_txq __rte_unused, 1813 struct rte_mbuf **pkts __rte_unused, 1814 uint16_t pkts_n __rte_unused) 1815 { 1816 rte_mb(); 1817 return 0; 1818 } 1819 1820 /* 1821 * Vectorized Rx/Tx routines are not compiled in when required vector 1822 * instructions are not supported on a target architecture. The following null 1823 * stubs are needed for linkage when those are not included outside of this file 1824 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1825 */ 1826 1827 __rte_weak uint16_t 1828 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1829 struct rte_mbuf **pkts __rte_unused, 1830 uint16_t pkts_n __rte_unused) 1831 { 1832 return 0; 1833 } 1834 1835 __rte_weak int 1836 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1837 { 1838 return -ENOTSUP; 1839 } 1840 1841 __rte_weak int 1842 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1843 { 1844 return -ENOTSUP; 1845 } 1846 1847 /** 1848 * Free the mbufs from the linear array of pointers. 1849 * 1850 * @param pkts 1851 * Pointer to array of packets to be free. 1852 * @param pkts_n 1853 * Number of packets to be freed. 1854 * @param olx 1855 * Configured Tx offloads mask. It is fully defined at 1856 * compile time and may be used for optimization. 1857 */ 1858 static __rte_always_inline void 1859 mlx5_tx_free_mbuf(struct rte_mbuf **restrict pkts, 1860 unsigned int pkts_n, 1861 unsigned int olx __rte_unused) 1862 { 1863 struct rte_mempool *pool = NULL; 1864 struct rte_mbuf **p_free = NULL; 1865 struct rte_mbuf *mbuf; 1866 unsigned int n_free = 0; 1867 1868 /* 1869 * The implemented algorithm eliminates 1870 * copying pointers to temporary array 1871 * for rte_mempool_put_bulk() calls. 1872 */ 1873 assert(pkts); 1874 assert(pkts_n); 1875 for (;;) { 1876 for (;;) { 1877 /* 1878 * Decrement mbuf reference counter, detach 1879 * indirect and external buffers if needed. 1880 */ 1881 mbuf = rte_pktmbuf_prefree_seg(*pkts); 1882 if (likely(mbuf != NULL)) { 1883 assert(mbuf == *pkts); 1884 if (likely(n_free != 0)) { 1885 if (unlikely(pool != mbuf->pool)) 1886 /* From different pool. */ 1887 break; 1888 } else { 1889 /* Start new scan array. */ 1890 pool = mbuf->pool; 1891 p_free = pkts; 1892 } 1893 ++n_free; 1894 ++pkts; 1895 --pkts_n; 1896 if (unlikely(pkts_n == 0)) { 1897 mbuf = NULL; 1898 break; 1899 } 1900 } else { 1901 /* 1902 * This happens if mbuf is still referenced. 1903 * We can't put it back to the pool, skip. 1904 */ 1905 ++pkts; 1906 --pkts_n; 1907 if (unlikely(n_free != 0)) 1908 /* There is some array to free.*/ 1909 break; 1910 if (unlikely(pkts_n == 0)) 1911 /* Last mbuf, nothing to free. */ 1912 return; 1913 } 1914 } 1915 for (;;) { 1916 /* 1917 * This loop is implemented to avoid multiple 1918 * inlining of rte_mempool_put_bulk(). 1919 */ 1920 assert(pool); 1921 assert(p_free); 1922 assert(n_free); 1923 /* 1924 * Free the array of pre-freed mbufs 1925 * belonging to the same memory pool. 1926 */ 1927 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 1928 if (unlikely(mbuf != NULL)) { 1929 /* There is the request to start new scan. */ 1930 pool = mbuf->pool; 1931 p_free = pkts++; 1932 n_free = 1; 1933 --pkts_n; 1934 if (likely(pkts_n != 0)) 1935 break; 1936 /* 1937 * This is the last mbuf to be freed. 1938 * Do one more loop iteration to complete. 1939 * This is rare case of the last unique mbuf. 1940 */ 1941 mbuf = NULL; 1942 continue; 1943 } 1944 if (likely(pkts_n == 0)) 1945 return; 1946 n_free = 0; 1947 break; 1948 } 1949 } 1950 } 1951 1952 /** 1953 * Free the mbuf from the elts ring buffer till new tail. 1954 * 1955 * @param txq 1956 * Pointer to Tx queue structure. 1957 * @param tail 1958 * Index in elts to free up to, becomes new elts tail. 1959 * @param olx 1960 * Configured Tx offloads mask. It is fully defined at 1961 * compile time and may be used for optimization. 1962 */ 1963 static __rte_always_inline void 1964 mlx5_tx_free_elts(struct mlx5_txq_data *restrict txq, 1965 uint16_t tail, 1966 unsigned int olx __rte_unused) 1967 { 1968 uint16_t n_elts = tail - txq->elts_tail; 1969 1970 assert(n_elts); 1971 assert(n_elts <= txq->elts_s); 1972 /* 1973 * Implement a loop to support ring buffer wraparound 1974 * with single inlining of mlx5_tx_free_mbuf(). 1975 */ 1976 do { 1977 unsigned int part; 1978 1979 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 1980 part = RTE_MIN(part, n_elts); 1981 assert(part); 1982 assert(part <= txq->elts_s); 1983 mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], 1984 part, olx); 1985 txq->elts_tail += part; 1986 n_elts -= part; 1987 } while (n_elts); 1988 } 1989 1990 /** 1991 * Store the mbuf being sent into elts ring buffer. 1992 * On Tx completion these mbufs will be freed. 1993 * 1994 * @param txq 1995 * Pointer to Tx queue structure. 1996 * @param pkts 1997 * Pointer to array of packets to be stored. 1998 * @param pkts_n 1999 * Number of packets to be stored. 2000 * @param olx 2001 * Configured Tx offloads mask. It is fully defined at 2002 * compile time and may be used for optimization. 2003 */ 2004 static __rte_always_inline void 2005 mlx5_tx_copy_elts(struct mlx5_txq_data *restrict txq, 2006 struct rte_mbuf **restrict pkts, 2007 unsigned int pkts_n, 2008 unsigned int olx __rte_unused) 2009 { 2010 unsigned int part; 2011 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 2012 2013 assert(pkts); 2014 assert(pkts_n); 2015 part = txq->elts_s - (txq->elts_head & txq->elts_m); 2016 assert(part); 2017 assert(part <= txq->elts_s); 2018 /* This code is a good candidate for vectorizing with SIMD. */ 2019 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 2020 (void *)pkts, 2021 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 2022 txq->elts_head += pkts_n; 2023 if (unlikely(part < pkts_n)) 2024 /* The copy is wrapping around the elts array. */ 2025 rte_memcpy((void *)elts, (void *)(pkts + part), 2026 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2027 } 2028 2029 /** 2030 * Update completion queue consuming index via doorbell 2031 * and flush the completed data buffers. 2032 * 2033 * @param txq 2034 * Pointer to TX queue structure. 2035 * @param valid CQE pointer 2036 * if not NULL update txq->wqe_pi and flush the buffers 2037 * @param itail 2038 * if not negative - flush the buffers till this index. 2039 * @param olx 2040 * Configured Tx offloads mask. It is fully defined at 2041 * compile time and may be used for optimization. 2042 */ 2043 static __rte_always_inline void 2044 mlx5_tx_comp_flush(struct mlx5_txq_data *restrict txq, 2045 volatile struct mlx5_cqe *last_cqe, 2046 int itail, 2047 unsigned int olx __rte_unused) 2048 { 2049 uint16_t tail; 2050 2051 if (likely(last_cqe != NULL)) { 2052 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 2053 tail = ((volatile struct mlx5_wqe_cseg *) 2054 (txq->wqes + (txq->wqe_pi & txq->wqe_m)))->misc; 2055 } else if (itail >= 0) { 2056 tail = (uint16_t)itail; 2057 } else { 2058 return; 2059 } 2060 rte_compiler_barrier(); 2061 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2062 if (likely(tail != txq->elts_tail)) { 2063 mlx5_tx_free_elts(txq, tail, olx); 2064 assert(tail == txq->elts_tail); 2065 } 2066 } 2067 2068 /** 2069 * Manage TX completions. This routine checks the CQ for 2070 * arrived CQEs, deduces the last accomplished WQE in SQ, 2071 * updates SQ producing index and frees all completed mbufs. 2072 * 2073 * @param txq 2074 * Pointer to TX queue structure. 2075 * @param olx 2076 * Configured Tx offloads mask. It is fully defined at 2077 * compile time and may be used for optimization. 2078 * 2079 * NOTE: not inlined intentionally, it makes tx_burst 2080 * routine smaller, simple and faster - from experiments. 2081 */ 2082 static void 2083 mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, 2084 unsigned int olx __rte_unused) 2085 { 2086 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2087 volatile struct mlx5_cqe *last_cqe = NULL; 2088 int ret; 2089 2090 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 2091 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 2092 do { 2093 volatile struct mlx5_cqe *cqe; 2094 2095 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2096 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2097 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2098 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2099 /* No new CQEs in completion queue. */ 2100 assert(ret == MLX5_CQE_STATUS_HW_OWN); 2101 break; 2102 } 2103 /* 2104 * Some error occurred, try to restart. 2105 * We have no barrier after WQE related Doorbell 2106 * written, make sure all writes are completed 2107 * here, before we might perform SQ reset. 2108 */ 2109 rte_wmb(); 2110 ret = mlx5_tx_error_cqe_handle 2111 (txq, (volatile struct mlx5_err_cqe *)cqe); 2112 /* 2113 * Flush buffers, update consuming index 2114 * if recovery succeeded. Otherwise 2115 * just try to recover later. 2116 */ 2117 last_cqe = NULL; 2118 break; 2119 } 2120 /* Normal transmit completion. */ 2121 ++txq->cq_ci; 2122 last_cqe = cqe; 2123 #ifndef NDEBUG 2124 if (txq->cq_pi) 2125 --txq->cq_pi; 2126 #endif 2127 /* 2128 * We have to restrict the amount of processed CQEs 2129 * in one tx_burst routine call. The CQ may be large 2130 * and many CQEs may be updated by the NIC in one 2131 * transaction. Buffers freeing is time consuming, 2132 * multiple iterations may introduce significant 2133 * latency. 2134 */ 2135 } while (--count); 2136 mlx5_tx_comp_flush(txq, last_cqe, ret, olx); 2137 } 2138 2139 /** 2140 * Check if the completion request flag should be set in the last WQE. 2141 * Both pushed mbufs and WQEs are monitored and the completion request 2142 * flag is set if any of thresholds is reached. 2143 * 2144 * @param txq 2145 * Pointer to TX queue structure. 2146 * @param loc 2147 * Pointer to burst routine local context. 2148 * @param multi, 2149 * Routine is called from multi-segment sending loop, 2150 * do not correct the elts_head according to the pkts_copy. 2151 * @param olx 2152 * Configured Tx offloads mask. It is fully defined at 2153 * compile time and may be used for optimization. 2154 */ 2155 static __rte_always_inline void 2156 mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, 2157 struct mlx5_txq_local *restrict loc, 2158 bool multi, 2159 unsigned int olx) 2160 { 2161 uint16_t head = txq->elts_head; 2162 unsigned int part; 2163 2164 part = (MLX5_TXOFF_CONFIG(INLINE) || multi) ? 2165 0 : loc->pkts_sent - loc->pkts_copy; 2166 head += part; 2167 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2168 (MLX5_TXOFF_CONFIG(INLINE) && 2169 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2170 volatile struct mlx5_wqe *last = loc->wqe_last; 2171 2172 txq->elts_comp = head; 2173 if (MLX5_TXOFF_CONFIG(INLINE)) 2174 txq->wqe_comp = txq->wqe_ci; 2175 /* Request unconditional completion on last WQE. */ 2176 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2177 MLX5_COMP_MODE_OFFSET); 2178 /* Save elts_head in unused "immediate" field of WQE. */ 2179 last->cseg.misc = head; 2180 /* 2181 * A CQE slot must always be available. Count the 2182 * issued CEQ "always" request instead of production 2183 * index due to here can be CQE with errors and 2184 * difference with ci may become inconsistent. 2185 */ 2186 assert(txq->cqe_s > ++txq->cq_pi); 2187 } 2188 } 2189 2190 /** 2191 * DPDK callback to check the status of a tx descriptor. 2192 * 2193 * @param tx_queue 2194 * The tx queue. 2195 * @param[in] offset 2196 * The index of the descriptor in the ring. 2197 * 2198 * @return 2199 * The status of the tx descriptor. 2200 */ 2201 int 2202 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2203 { 2204 struct mlx5_txq_data *restrict txq = tx_queue; 2205 uint16_t used; 2206 2207 mlx5_tx_handle_completion(txq, 0); 2208 used = txq->elts_head - txq->elts_tail; 2209 if (offset < used) 2210 return RTE_ETH_TX_DESC_FULL; 2211 return RTE_ETH_TX_DESC_DONE; 2212 } 2213 2214 /** 2215 * Build the Control Segment with specified opcode: 2216 * - MLX5_OPCODE_SEND 2217 * - MLX5_OPCODE_ENHANCED_MPSW 2218 * - MLX5_OPCODE_TSO 2219 * 2220 * @param txq 2221 * Pointer to TX queue structure. 2222 * @param loc 2223 * Pointer to burst routine local context. 2224 * @param wqe 2225 * Pointer to WQE to fill with built Control Segment. 2226 * @param ds 2227 * Supposed length of WQE in segments. 2228 * @param opcode 2229 * SQ WQE opcode to put into Control Segment. 2230 * @param olx 2231 * Configured Tx offloads mask. It is fully defined at 2232 * compile time and may be used for optimization. 2233 */ 2234 static __rte_always_inline void 2235 mlx5_tx_cseg_init(struct mlx5_txq_data *restrict txq, 2236 struct mlx5_txq_local *restrict loc __rte_unused, 2237 struct mlx5_wqe *restrict wqe, 2238 unsigned int ds, 2239 unsigned int opcode, 2240 unsigned int olx __rte_unused) 2241 { 2242 struct mlx5_wqe_cseg *restrict cs = &wqe->cseg; 2243 2244 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2245 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2246 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2247 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2248 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2249 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2250 MLX5_COMP_MODE_OFFSET); 2251 cs->misc = RTE_BE32(0); 2252 } 2253 2254 /** 2255 * Build the Ethernet Segment without inlined data. 2256 * Supports Software Parser, Checksums and VLAN 2257 * insertion Tx offload features. 2258 * 2259 * @param txq 2260 * Pointer to TX queue structure. 2261 * @param loc 2262 * Pointer to burst routine local context. 2263 * @param wqe 2264 * Pointer to WQE to fill with built Ethernet Segment. 2265 * @param olx 2266 * Configured Tx offloads mask. It is fully defined at 2267 * compile time and may be used for optimization. 2268 */ 2269 static __rte_always_inline void 2270 mlx5_tx_eseg_none(struct mlx5_txq_data *restrict txq __rte_unused, 2271 struct mlx5_txq_local *restrict loc, 2272 struct mlx5_wqe *restrict wqe, 2273 unsigned int olx) 2274 { 2275 struct mlx5_wqe_eseg *restrict es = &wqe->eseg; 2276 uint32_t csum; 2277 2278 /* 2279 * Calculate and set check sum flags first, dword field 2280 * in segment may be shared with Software Parser flags. 2281 */ 2282 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2283 es->flags = rte_cpu_to_le_32(csum); 2284 /* 2285 * Calculate and set Software Parser offsets and flags. 2286 * These flags a set for custom UDP and IP tunnel packets. 2287 */ 2288 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2289 /* Fill metadata field if needed. */ 2290 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2291 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2292 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2293 /* Engage VLAN tag insertion feature if requested. */ 2294 if (MLX5_TXOFF_CONFIG(VLAN) && 2295 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2296 /* 2297 * We should get here only if device support 2298 * this feature correctly. 2299 */ 2300 assert(txq->vlan_en); 2301 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2302 loc->mbuf->vlan_tci); 2303 } else { 2304 es->inline_hdr = RTE_BE32(0); 2305 } 2306 } 2307 2308 /** 2309 * Build the Ethernet Segment with minimal inlined data 2310 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2311 * used to fill the gap in single WQEBB WQEs. 2312 * Supports Software Parser, Checksums and VLAN 2313 * insertion Tx offload features. 2314 * 2315 * @param txq 2316 * Pointer to TX queue structure. 2317 * @param loc 2318 * Pointer to burst routine local context. 2319 * @param wqe 2320 * Pointer to WQE to fill with built Ethernet Segment. 2321 * @param vlan 2322 * Length of VLAN tag insertion if any. 2323 * @param olx 2324 * Configured Tx offloads mask. It is fully defined at 2325 * compile time and may be used for optimization. 2326 */ 2327 static __rte_always_inline void 2328 mlx5_tx_eseg_dmin(struct mlx5_txq_data *restrict txq __rte_unused, 2329 struct mlx5_txq_local *restrict loc, 2330 struct mlx5_wqe *restrict wqe, 2331 unsigned int vlan, 2332 unsigned int olx) 2333 { 2334 struct mlx5_wqe_eseg *restrict es = &wqe->eseg; 2335 uint32_t csum; 2336 uint8_t *psrc, *pdst; 2337 2338 /* 2339 * Calculate and set check sum flags first, dword field 2340 * in segment may be shared with Software Parser flags. 2341 */ 2342 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2343 es->flags = rte_cpu_to_le_32(csum); 2344 /* 2345 * Calculate and set Software Parser offsets and flags. 2346 * These flags a set for custom UDP and IP tunnel packets. 2347 */ 2348 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2349 /* Fill metadata field if needed. */ 2350 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2351 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2352 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2353 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2354 (sizeof(uint16_t) + 2355 sizeof(rte_v128u32_t)), 2356 "invalid Ethernet Segment data size"); 2357 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2358 (sizeof(uint16_t) + 2359 sizeof(struct rte_vlan_hdr) + 2360 2 * RTE_ETHER_ADDR_LEN), 2361 "invalid Ethernet Segment data size"); 2362 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2363 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2364 es->inline_data = *(unaligned_uint16_t *)psrc; 2365 psrc += sizeof(uint16_t); 2366 pdst = (uint8_t *)(es + 1); 2367 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2368 /* Implement VLAN tag insertion as part inline data. */ 2369 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2370 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2371 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2372 /* Insert VLAN ethertype + VLAN tag. */ 2373 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2374 ((RTE_ETHER_TYPE_VLAN << 16) | 2375 loc->mbuf->vlan_tci); 2376 pdst += sizeof(struct rte_vlan_hdr); 2377 /* Copy the rest two bytes from packet data. */ 2378 assert(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2379 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2380 } else { 2381 /* Fill the gap in the title WQEBB with inline data. */ 2382 rte_mov16(pdst, psrc); 2383 } 2384 } 2385 2386 /** 2387 * Build the Ethernet Segment with entire packet 2388 * data inlining. Checks the boundary of WQEBB and 2389 * ring buffer wrapping, supports Software Parser, 2390 * Checksums and VLAN insertion Tx offload features. 2391 * 2392 * @param txq 2393 * Pointer to TX queue structure. 2394 * @param loc 2395 * Pointer to burst routine local context. 2396 * @param wqe 2397 * Pointer to WQE to fill with built Ethernet Segment. 2398 * @param vlan 2399 * Length of VLAN tag insertion if any. 2400 * @param inlen 2401 * Length of data to inline (VLAN included, if any). 2402 * @param tso 2403 * TSO flag, set mss field from the packet. 2404 * @param olx 2405 * Configured Tx offloads mask. It is fully defined at 2406 * compile time and may be used for optimization. 2407 * 2408 * @return 2409 * Pointer to the next Data Segment (aligned and wrapped around). 2410 */ 2411 static __rte_always_inline struct mlx5_wqe_dseg * 2412 mlx5_tx_eseg_data(struct mlx5_txq_data *restrict txq, 2413 struct mlx5_txq_local *restrict loc, 2414 struct mlx5_wqe *restrict wqe, 2415 unsigned int vlan, 2416 unsigned int inlen, 2417 unsigned int tso, 2418 unsigned int olx) 2419 { 2420 struct mlx5_wqe_eseg *restrict es = &wqe->eseg; 2421 uint32_t csum; 2422 uint8_t *psrc, *pdst; 2423 unsigned int part; 2424 2425 /* 2426 * Calculate and set check sum flags first, dword field 2427 * in segment may be shared with Software Parser flags. 2428 */ 2429 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2430 if (tso) { 2431 csum <<= 24; 2432 csum |= loc->mbuf->tso_segsz; 2433 es->flags = rte_cpu_to_be_32(csum); 2434 } else { 2435 es->flags = rte_cpu_to_le_32(csum); 2436 } 2437 /* 2438 * Calculate and set Software Parser offsets and flags. 2439 * These flags a set for custom UDP and IP tunnel packets. 2440 */ 2441 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2442 /* Fill metadata field if needed. */ 2443 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2444 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2445 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2446 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2447 (sizeof(uint16_t) + 2448 sizeof(rte_v128u32_t)), 2449 "invalid Ethernet Segment data size"); 2450 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2451 (sizeof(uint16_t) + 2452 sizeof(struct rte_vlan_hdr) + 2453 2 * RTE_ETHER_ADDR_LEN), 2454 "invalid Ethernet Segment data size"); 2455 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2456 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2457 es->inline_data = *(unaligned_uint16_t *)psrc; 2458 psrc += sizeof(uint16_t); 2459 pdst = (uint8_t *)(es + 1); 2460 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2461 /* Implement VLAN tag insertion as part inline data. */ 2462 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2463 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2464 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2465 /* Insert VLAN ethertype + VLAN tag. */ 2466 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2467 ((RTE_ETHER_TYPE_VLAN << 16) | 2468 loc->mbuf->vlan_tci); 2469 pdst += sizeof(struct rte_vlan_hdr); 2470 /* Copy the rest two bytes from packet data. */ 2471 assert(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2472 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2473 psrc += sizeof(uint16_t); 2474 } else { 2475 /* Fill the gap in the title WQEBB with inline data. */ 2476 rte_mov16(pdst, psrc); 2477 psrc += sizeof(rte_v128u32_t); 2478 } 2479 pdst = (uint8_t *)(es + 2); 2480 assert(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2481 assert(pdst < (uint8_t *)txq->wqes_end); 2482 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2483 if (!inlen) { 2484 assert(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2485 return (struct mlx5_wqe_dseg *)pdst; 2486 } 2487 /* 2488 * The WQEBB space availability is checked by caller. 2489 * Here we should be aware of WQE ring buffer wraparound only. 2490 */ 2491 part = (uint8_t *)txq->wqes_end - pdst; 2492 part = RTE_MIN(part, inlen); 2493 do { 2494 rte_memcpy(pdst, psrc, part); 2495 inlen -= part; 2496 if (likely(!inlen)) { 2497 /* 2498 * If return value is not used by the caller 2499 * the code below will be optimized out. 2500 */ 2501 pdst += part; 2502 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2503 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2504 pdst = (uint8_t *)txq->wqes; 2505 return (struct mlx5_wqe_dseg *)pdst; 2506 } 2507 pdst = (uint8_t *)txq->wqes; 2508 psrc += part; 2509 part = inlen; 2510 } while (true); 2511 } 2512 2513 /** 2514 * Copy data from chain of mbuf to the specified linear buffer. 2515 * Checksums and VLAN insertion Tx offload features. If data 2516 * from some mbuf copied completely this mbuf is freed. Local 2517 * structure is used to keep the byte stream state. 2518 * 2519 * @param pdst 2520 * Pointer to the destination linear buffer. 2521 * @param loc 2522 * Pointer to burst routine local context. 2523 * @param len 2524 * Length of data to be copied. 2525 * @param olx 2526 * Configured Tx offloads mask. It is fully defined at 2527 * compile time and may be used for optimization. 2528 */ 2529 static __rte_always_inline void 2530 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2531 struct mlx5_txq_local *restrict loc, 2532 unsigned int len, 2533 unsigned int olx __rte_unused) 2534 { 2535 struct rte_mbuf *mbuf; 2536 unsigned int part, dlen; 2537 uint8_t *psrc; 2538 2539 assert(len); 2540 do { 2541 /* Allow zero length packets, must check first. */ 2542 dlen = rte_pktmbuf_data_len(loc->mbuf); 2543 if (dlen <= loc->mbuf_off) { 2544 /* Exhausted packet, just free. */ 2545 mbuf = loc->mbuf; 2546 loc->mbuf = mbuf->next; 2547 rte_pktmbuf_free_seg(mbuf); 2548 loc->mbuf_off = 0; 2549 assert(loc->mbuf_nseg > 1); 2550 assert(loc->mbuf); 2551 --loc->mbuf_nseg; 2552 continue; 2553 } 2554 dlen -= loc->mbuf_off; 2555 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2556 loc->mbuf_off); 2557 part = RTE_MIN(len, dlen); 2558 rte_memcpy(pdst, psrc, part); 2559 loc->mbuf_off += part; 2560 len -= part; 2561 if (!len) { 2562 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2563 loc->mbuf_off = 0; 2564 /* Exhausted packet, just free. */ 2565 mbuf = loc->mbuf; 2566 loc->mbuf = mbuf->next; 2567 rte_pktmbuf_free_seg(mbuf); 2568 loc->mbuf_off = 0; 2569 assert(loc->mbuf_nseg >= 1); 2570 --loc->mbuf_nseg; 2571 } 2572 return; 2573 } 2574 pdst += part; 2575 } while (true); 2576 } 2577 2578 /** 2579 * Build the Ethernet Segment with inlined data from 2580 * multi-segment packet. Checks the boundary of WQEBB 2581 * and ring buffer wrapping, supports Software Parser, 2582 * Checksums and VLAN insertion Tx offload features. 2583 * 2584 * @param txq 2585 * Pointer to TX queue structure. 2586 * @param loc 2587 * Pointer to burst routine local context. 2588 * @param wqe 2589 * Pointer to WQE to fill with built Ethernet Segment. 2590 * @param vlan 2591 * Length of VLAN tag insertion if any. 2592 * @param inlen 2593 * Length of data to inline (VLAN included, if any). 2594 * @param tso 2595 * TSO flag, set mss field from the packet. 2596 * @param olx 2597 * Configured Tx offloads mask. It is fully defined at 2598 * compile time and may be used for optimization. 2599 * 2600 * @return 2601 * Pointer to the next Data Segment (aligned and 2602 * possible NOT wrapped around - caller should do 2603 * wrapping check on its own). 2604 */ 2605 static __rte_always_inline struct mlx5_wqe_dseg * 2606 mlx5_tx_eseg_mdat(struct mlx5_txq_data *restrict txq, 2607 struct mlx5_txq_local *restrict loc, 2608 struct mlx5_wqe *restrict wqe, 2609 unsigned int vlan, 2610 unsigned int inlen, 2611 unsigned int tso, 2612 unsigned int olx) 2613 { 2614 struct mlx5_wqe_eseg *restrict es = &wqe->eseg; 2615 uint32_t csum; 2616 uint8_t *pdst; 2617 unsigned int part; 2618 2619 /* 2620 * Calculate and set check sum flags first, uint32_t field 2621 * in segment may be shared with Software Parser flags. 2622 */ 2623 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2624 if (tso) { 2625 csum <<= 24; 2626 csum |= loc->mbuf->tso_segsz; 2627 es->flags = rte_cpu_to_be_32(csum); 2628 } else { 2629 es->flags = rte_cpu_to_le_32(csum); 2630 } 2631 /* 2632 * Calculate and set Software Parser offsets and flags. 2633 * These flags a set for custom UDP and IP tunnel packets. 2634 */ 2635 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2636 /* Fill metadata field if needed. */ 2637 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2638 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2639 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2640 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2641 (sizeof(uint16_t) + 2642 sizeof(rte_v128u32_t)), 2643 "invalid Ethernet Segment data size"); 2644 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2645 (sizeof(uint16_t) + 2646 sizeof(struct rte_vlan_hdr) + 2647 2 * RTE_ETHER_ADDR_LEN), 2648 "invalid Ethernet Segment data size"); 2649 assert(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2650 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2651 pdst = (uint8_t *)&es->inline_data; 2652 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2653 /* Implement VLAN tag insertion as part inline data. */ 2654 mlx5_tx_mseg_memcpy(pdst, loc, 2 * RTE_ETHER_ADDR_LEN, olx); 2655 pdst += 2 * RTE_ETHER_ADDR_LEN; 2656 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2657 ((RTE_ETHER_TYPE_VLAN << 16) | 2658 loc->mbuf->vlan_tci); 2659 pdst += sizeof(struct rte_vlan_hdr); 2660 inlen -= 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2661 } 2662 assert(pdst < (uint8_t *)txq->wqes_end); 2663 /* 2664 * The WQEBB space availability is checked by caller. 2665 * Here we should be aware of WQE ring buffer wraparound only. 2666 */ 2667 part = (uint8_t *)txq->wqes_end - pdst; 2668 part = RTE_MIN(part, inlen); 2669 assert(part); 2670 do { 2671 mlx5_tx_mseg_memcpy(pdst, loc, part, olx); 2672 inlen -= part; 2673 if (likely(!inlen)) { 2674 pdst += part; 2675 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2676 return (struct mlx5_wqe_dseg *)pdst; 2677 } 2678 pdst = (uint8_t *)txq->wqes; 2679 part = inlen; 2680 } while (true); 2681 } 2682 2683 /** 2684 * Build the Data Segment of pointer type. 2685 * 2686 * @param txq 2687 * Pointer to TX queue structure. 2688 * @param loc 2689 * Pointer to burst routine local context. 2690 * @param dseg 2691 * Pointer to WQE to fill with built Data Segment. 2692 * @param buf 2693 * Data buffer to point. 2694 * @param len 2695 * Data buffer length. 2696 * @param olx 2697 * Configured Tx offloads mask. It is fully defined at 2698 * compile time and may be used for optimization. 2699 */ 2700 static __rte_always_inline void 2701 mlx5_tx_dseg_ptr(struct mlx5_txq_data *restrict txq, 2702 struct mlx5_txq_local *restrict loc, 2703 struct mlx5_wqe_dseg *restrict dseg, 2704 uint8_t *buf, 2705 unsigned int len, 2706 unsigned int olx __rte_unused) 2707 2708 { 2709 assert(len); 2710 dseg->bcount = rte_cpu_to_be_32(len); 2711 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2712 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2713 } 2714 2715 /** 2716 * Build the Data Segment of pointer type or inline 2717 * if data length is less than buffer in minimal 2718 * Data Segment size. 2719 * 2720 * @param txq 2721 * Pointer to TX queue structure. 2722 * @param loc 2723 * Pointer to burst routine local context. 2724 * @param dseg 2725 * Pointer to WQE to fill with built Data Segment. 2726 * @param buf 2727 * Data buffer to point. 2728 * @param len 2729 * Data buffer length. 2730 * @param olx 2731 * Configured Tx offloads mask. It is fully defined at 2732 * compile time and may be used for optimization. 2733 */ 2734 static __rte_always_inline void 2735 mlx5_tx_dseg_iptr(struct mlx5_txq_data *restrict txq, 2736 struct mlx5_txq_local *restrict loc, 2737 struct mlx5_wqe_dseg *restrict dseg, 2738 uint8_t *buf, 2739 unsigned int len, 2740 unsigned int olx __rte_unused) 2741 2742 { 2743 uintptr_t dst, src; 2744 2745 assert(len); 2746 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2747 dseg->bcount = rte_cpu_to_be_32(len); 2748 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2749 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2750 2751 return; 2752 } 2753 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2754 /* Unrolled implementation of generic rte_memcpy. */ 2755 dst = (uintptr_t)&dseg->inline_data[0]; 2756 src = (uintptr_t)buf; 2757 if (len & 0x08) { 2758 #ifdef RTE_ARCH_STRICT_ALIGN 2759 assert(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2760 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2761 dst += sizeof(uint32_t); 2762 src += sizeof(uint32_t); 2763 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2764 dst += sizeof(uint32_t); 2765 src += sizeof(uint32_t); 2766 #else 2767 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2768 dst += sizeof(uint64_t); 2769 src += sizeof(uint64_t); 2770 #endif 2771 } 2772 if (len & 0x04) { 2773 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2774 dst += sizeof(uint32_t); 2775 src += sizeof(uint32_t); 2776 } 2777 if (len & 0x02) { 2778 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2779 dst += sizeof(uint16_t); 2780 src += sizeof(uint16_t); 2781 } 2782 if (len & 0x01) 2783 *(uint8_t *)dst = *(uint8_t *)src; 2784 } 2785 2786 /** 2787 * Build the Data Segment of inlined data from single 2788 * segment packet, no VLAN insertion. 2789 * 2790 * @param txq 2791 * Pointer to TX queue structure. 2792 * @param loc 2793 * Pointer to burst routine local context. 2794 * @param dseg 2795 * Pointer to WQE to fill with built Data Segment. 2796 * @param buf 2797 * Data buffer to point. 2798 * @param len 2799 * Data buffer length. 2800 * @param olx 2801 * Configured Tx offloads mask. It is fully defined at 2802 * compile time and may be used for optimization. 2803 * 2804 * @return 2805 * Pointer to the next Data Segment after inlined data. 2806 * Ring buffer wraparound check is needed. We do not 2807 * do it here because it may not be needed for the 2808 * last packet in the eMPW session. 2809 */ 2810 static __rte_always_inline struct mlx5_wqe_dseg * 2811 mlx5_tx_dseg_empw(struct mlx5_txq_data *restrict txq, 2812 struct mlx5_txq_local *restrict loc __rte_unused, 2813 struct mlx5_wqe_dseg *restrict dseg, 2814 uint8_t *buf, 2815 unsigned int len, 2816 unsigned int olx __rte_unused) 2817 { 2818 unsigned int part; 2819 uint8_t *pdst; 2820 2821 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2822 pdst = &dseg->inline_data[0]; 2823 /* 2824 * The WQEBB space availability is checked by caller. 2825 * Here we should be aware of WQE ring buffer wraparound only. 2826 */ 2827 part = (uint8_t *)txq->wqes_end - pdst; 2828 part = RTE_MIN(part, len); 2829 do { 2830 rte_memcpy(pdst, buf, part); 2831 len -= part; 2832 if (likely(!len)) { 2833 pdst += part; 2834 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2835 /* Note: no final wraparound check here. */ 2836 return (struct mlx5_wqe_dseg *)pdst; 2837 } 2838 pdst = (uint8_t *)txq->wqes; 2839 buf += part; 2840 part = len; 2841 } while (true); 2842 } 2843 2844 /** 2845 * Build the Data Segment of inlined data from single 2846 * segment packet with VLAN insertion. 2847 * 2848 * @param txq 2849 * Pointer to TX queue structure. 2850 * @param loc 2851 * Pointer to burst routine local context. 2852 * @param dseg 2853 * Pointer to the dseg fill with built Data Segment. 2854 * @param buf 2855 * Data buffer to point. 2856 * @param len 2857 * Data buffer length. 2858 * @param olx 2859 * Configured Tx offloads mask. It is fully defined at 2860 * compile time and may be used for optimization. 2861 * 2862 * @return 2863 * Pointer to the next Data Segment after inlined data. 2864 * Ring buffer wraparound check is needed. 2865 */ 2866 static __rte_always_inline struct mlx5_wqe_dseg * 2867 mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, 2868 struct mlx5_txq_local *restrict loc __rte_unused, 2869 struct mlx5_wqe_dseg *restrict dseg, 2870 uint8_t *buf, 2871 unsigned int len, 2872 unsigned int olx __rte_unused) 2873 2874 { 2875 unsigned int part; 2876 uint8_t *pdst; 2877 2878 assert(len > MLX5_ESEG_MIN_INLINE_SIZE); 2879 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 2880 (2 * RTE_ETHER_ADDR_LEN), 2881 "invalid Data Segment data size"); 2882 dseg->bcount = rte_cpu_to_be_32((len + sizeof(struct rte_vlan_hdr)) | 2883 MLX5_ETH_WQE_DATA_INLINE); 2884 pdst = &dseg->inline_data[0]; 2885 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 2886 buf += MLX5_DSEG_MIN_INLINE_SIZE; 2887 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 2888 len -= MLX5_DSEG_MIN_INLINE_SIZE; 2889 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 2890 assert(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2891 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2892 pdst = (uint8_t *)txq->wqes; 2893 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 2894 loc->mbuf->vlan_tci); 2895 pdst += sizeof(struct rte_vlan_hdr); 2896 /* 2897 * The WQEBB space availability is checked by caller. 2898 * Here we should be aware of WQE ring buffer wraparound only. 2899 */ 2900 part = (uint8_t *)txq->wqes_end - pdst; 2901 part = RTE_MIN(part, len); 2902 do { 2903 rte_memcpy(pdst, buf, part); 2904 len -= part; 2905 if (likely(!len)) { 2906 pdst += part; 2907 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2908 /* Note: no final wraparound check here. */ 2909 return (struct mlx5_wqe_dseg *)pdst; 2910 } 2911 pdst = (uint8_t *)txq->wqes; 2912 buf += part; 2913 part = len; 2914 } while (true); 2915 } 2916 2917 /** 2918 * Build the Ethernet Segment with optionally inlined data with 2919 * VLAN insertion and following Data Segments (if any) from 2920 * multi-segment packet. Used by ordinary send and TSO. 2921 * 2922 * @param txq 2923 * Pointer to TX queue structure. 2924 * @param loc 2925 * Pointer to burst routine local context. 2926 * @param wqe 2927 * Pointer to WQE to fill with built Ethernet/Data Segments. 2928 * @param vlan 2929 * Length of VLAN header to insert, 0 means no VLAN insertion. 2930 * @param inlen 2931 * Data length to inline. For TSO this parameter specifies 2932 * exact value, for ordinary send routine can be aligned by 2933 * caller to provide better WQE space saving and data buffer 2934 * start address alignment. This length includes VLAN header 2935 * being inserted. 2936 * @param tso 2937 * Zero means ordinary send, inlined data can be extended, 2938 * otherwise this is TSO, inlined data length is fixed. 2939 * @param olx 2940 * Configured Tx offloads mask. It is fully defined at 2941 * compile time and may be used for optimization. 2942 * 2943 * @return 2944 * Actual size of built WQE in segments. 2945 */ 2946 static __rte_always_inline unsigned int 2947 mlx5_tx_mseg_build(struct mlx5_txq_data *restrict txq, 2948 struct mlx5_txq_local *restrict loc, 2949 struct mlx5_wqe *restrict wqe, 2950 unsigned int vlan, 2951 unsigned int inlen, 2952 unsigned int tso, 2953 unsigned int olx __rte_unused) 2954 { 2955 struct mlx5_wqe_dseg *restrict dseg; 2956 unsigned int ds; 2957 2958 assert((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 2959 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 2960 loc->mbuf_off = 0; 2961 2962 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 2963 if (!loc->mbuf_nseg) 2964 goto dseg_done; 2965 /* 2966 * There are still some mbuf remaining, not inlined. 2967 * The first mbuf may be partially inlined and we 2968 * must process the possible non-zero data offset. 2969 */ 2970 if (loc->mbuf_off) { 2971 unsigned int dlen; 2972 uint8_t *dptr; 2973 2974 /* 2975 * Exhausted packets must be dropped before. 2976 * Non-zero offset means there are some data 2977 * remained in the packet. 2978 */ 2979 assert(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 2980 assert(rte_pktmbuf_data_len(loc->mbuf)); 2981 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2982 loc->mbuf_off); 2983 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 2984 /* 2985 * Build the pointer/minimal data Data Segment. 2986 * Do ring buffer wrapping check in advance. 2987 */ 2988 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2989 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2990 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 2991 /* Store the mbuf to be freed on completion. */ 2992 assert(loc->elts_free); 2993 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2994 --loc->elts_free; 2995 ++dseg; 2996 if (--loc->mbuf_nseg == 0) 2997 goto dseg_done; 2998 loc->mbuf = loc->mbuf->next; 2999 loc->mbuf_off = 0; 3000 } 3001 do { 3002 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3003 struct rte_mbuf *mbuf; 3004 3005 /* Zero length segment found, just skip. */ 3006 mbuf = loc->mbuf; 3007 loc->mbuf = loc->mbuf->next; 3008 rte_pktmbuf_free_seg(mbuf); 3009 if (--loc->mbuf_nseg == 0) 3010 break; 3011 } else { 3012 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3013 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3014 mlx5_tx_dseg_iptr 3015 (txq, loc, dseg, 3016 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3017 rte_pktmbuf_data_len(loc->mbuf), olx); 3018 assert(loc->elts_free); 3019 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3020 --loc->elts_free; 3021 ++dseg; 3022 if (--loc->mbuf_nseg == 0) 3023 break; 3024 loc->mbuf = loc->mbuf->next; 3025 } 3026 } while (true); 3027 3028 dseg_done: 3029 /* Calculate actual segments used from the dseg pointer. */ 3030 if ((uintptr_t)wqe < (uintptr_t)dseg) 3031 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3032 else 3033 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3034 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3035 return ds; 3036 } 3037 3038 /** 3039 * Tx one packet function for multi-segment TSO. Supports all 3040 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3041 * sends one packet per WQE. 3042 * 3043 * This routine is responsible for storing processed mbuf 3044 * into elts ring buffer and update elts_head. 3045 * 3046 * @param txq 3047 * Pointer to TX queue structure. 3048 * @param loc 3049 * Pointer to burst routine local context. 3050 * @param olx 3051 * Configured Tx offloads mask. It is fully defined at 3052 * compile time and may be used for optimization. 3053 * 3054 * @return 3055 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3056 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3057 * Local context variables partially updated. 3058 */ 3059 static __rte_always_inline enum mlx5_txcmp_code 3060 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, 3061 struct mlx5_txq_local *restrict loc, 3062 unsigned int olx) 3063 { 3064 struct mlx5_wqe *restrict wqe; 3065 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3066 3067 /* 3068 * Calculate data length to be inlined to estimate 3069 * the required space in WQE ring buffer. 3070 */ 3071 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3072 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3073 vlan = sizeof(struct rte_vlan_hdr); 3074 inlen = loc->mbuf->l2_len + vlan + 3075 loc->mbuf->l3_len + loc->mbuf->l4_len; 3076 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3077 return MLX5_TXCMP_CODE_ERROR; 3078 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3079 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3080 /* Packet must contain all TSO headers. */ 3081 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3082 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3083 inlen > (dlen + vlan))) 3084 return MLX5_TXCMP_CODE_ERROR; 3085 assert(inlen >= txq->inlen_mode); 3086 /* 3087 * Check whether there are enough free WQEBBs: 3088 * - Control Segment 3089 * - Ethernet Segment 3090 * - First Segment of inlined Ethernet data 3091 * - ... data continued ... 3092 * - Data Segments of pointer/min inline type 3093 */ 3094 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3095 MLX5_ESEG_MIN_INLINE_SIZE + 3096 MLX5_WSEG_SIZE + 3097 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3098 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3099 return MLX5_TXCMP_CODE_EXIT; 3100 /* Check for maximal WQE size. */ 3101 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3102 return MLX5_TXCMP_CODE_ERROR; 3103 #ifdef MLX5_PMD_SOFT_COUNTERS 3104 /* Update sent data bytes/packets counters. */ 3105 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3106 loc->mbuf->tso_segsz; 3107 /* 3108 * One will be added for mbuf itself 3109 * at the end of the mlx5_tx_burst from 3110 * loc->pkts_sent field. 3111 */ 3112 --ntcp; 3113 txq->stats.opackets += ntcp; 3114 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3115 #endif 3116 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3117 loc->wqe_last = wqe; 3118 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3119 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3120 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3121 txq->wqe_ci += (ds + 3) / 4; 3122 loc->wqe_free -= (ds + 3) / 4; 3123 /* Request CQE generation if limits are reached. */ 3124 mlx5_tx_request_completion(txq, loc, true, olx); 3125 return MLX5_TXCMP_CODE_MULTI; 3126 } 3127 3128 /** 3129 * Tx one packet function for multi-segment SEND. Supports all 3130 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3131 * sends one packet per WQE, without any data inlining in 3132 * Ethernet Segment. 3133 * 3134 * This routine is responsible for storing processed mbuf 3135 * into elts ring buffer and update elts_head. 3136 * 3137 * @param txq 3138 * Pointer to TX queue structure. 3139 * @param loc 3140 * Pointer to burst routine local context. 3141 * @param olx 3142 * Configured Tx offloads mask. It is fully defined at 3143 * compile time and may be used for optimization. 3144 * 3145 * @return 3146 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3147 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3148 * Local context variables partially updated. 3149 */ 3150 static __rte_always_inline enum mlx5_txcmp_code 3151 mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, 3152 struct mlx5_txq_local *restrict loc, 3153 unsigned int olx) 3154 { 3155 struct mlx5_wqe_dseg *restrict dseg; 3156 struct mlx5_wqe *restrict wqe; 3157 unsigned int ds, nseg; 3158 3159 assert(NB_SEGS(loc->mbuf) > 1); 3160 /* 3161 * No inline at all, it means the CPU cycles saving 3162 * is prioritized at configuration, we should not 3163 * copy any packet data to WQE. 3164 */ 3165 nseg = NB_SEGS(loc->mbuf); 3166 ds = 2 + nseg; 3167 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3168 return MLX5_TXCMP_CODE_EXIT; 3169 /* Check for maximal WQE size. */ 3170 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3171 return MLX5_TXCMP_CODE_ERROR; 3172 /* 3173 * Some Tx offloads may cause an error if 3174 * packet is not long enough, check against 3175 * assumed minimal length. 3176 */ 3177 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3178 return MLX5_TXCMP_CODE_ERROR; 3179 #ifdef MLX5_PMD_SOFT_COUNTERS 3180 /* Update sent data bytes counter. */ 3181 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3182 if (MLX5_TXOFF_CONFIG(VLAN) && 3183 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3184 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3185 #endif 3186 /* 3187 * SEND WQE, one WQEBB: 3188 * - Control Segment, SEND opcode 3189 * - Ethernet Segment, optional VLAN, no inline 3190 * - Data Segments, pointer only type 3191 */ 3192 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3193 loc->wqe_last = wqe; 3194 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3195 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3196 dseg = &wqe->dseg[0]; 3197 do { 3198 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3199 struct rte_mbuf *mbuf; 3200 3201 /* 3202 * Zero length segment found, have to 3203 * correct total size of WQE in segments. 3204 * It is supposed to be rare occasion, so 3205 * in normal case (no zero length segments) 3206 * we avoid extra writing to the Control 3207 * Segment. 3208 */ 3209 --ds; 3210 wqe->cseg.sq_ds -= RTE_BE32(1); 3211 mbuf = loc->mbuf; 3212 loc->mbuf = mbuf->next; 3213 rte_pktmbuf_free_seg(mbuf); 3214 if (--nseg == 0) 3215 break; 3216 } else { 3217 mlx5_tx_dseg_ptr 3218 (txq, loc, dseg, 3219 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3220 rte_pktmbuf_data_len(loc->mbuf), olx); 3221 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3222 --loc->elts_free; 3223 if (--nseg == 0) 3224 break; 3225 ++dseg; 3226 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3227 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3228 loc->mbuf = loc->mbuf->next; 3229 } 3230 } while (true); 3231 txq->wqe_ci += (ds + 3) / 4; 3232 loc->wqe_free -= (ds + 3) / 4; 3233 /* Request CQE generation if limits are reached. */ 3234 mlx5_tx_request_completion(txq, loc, true, olx); 3235 return MLX5_TXCMP_CODE_MULTI; 3236 } 3237 3238 /** 3239 * Tx one packet function for multi-segment SEND. Supports all 3240 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3241 * sends one packet per WQE, with data inlining in 3242 * Ethernet Segment and minimal Data Segments. 3243 * 3244 * This routine is responsible for storing processed mbuf 3245 * into elts ring buffer and update elts_head. 3246 * 3247 * @param txq 3248 * Pointer to TX queue structure. 3249 * @param loc 3250 * Pointer to burst routine local context. 3251 * @param olx 3252 * Configured Tx offloads mask. It is fully defined at 3253 * compile time and may be used for optimization. 3254 * 3255 * @return 3256 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3257 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3258 * Local context variables partially updated. 3259 */ 3260 static __rte_always_inline enum mlx5_txcmp_code 3261 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq, 3262 struct mlx5_txq_local *restrict loc, 3263 unsigned int olx) 3264 { 3265 struct mlx5_wqe *restrict wqe; 3266 unsigned int ds, inlen, dlen, vlan = 0; 3267 3268 assert(MLX5_TXOFF_CONFIG(INLINE)); 3269 assert(NB_SEGS(loc->mbuf) > 1); 3270 /* 3271 * First calculate data length to be inlined 3272 * to estimate the required space for WQE. 3273 */ 3274 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3275 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3276 vlan = sizeof(struct rte_vlan_hdr); 3277 inlen = dlen + vlan; 3278 /* Check against minimal length. */ 3279 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3280 return MLX5_TXCMP_CODE_ERROR; 3281 assert(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3282 if (inlen > txq->inlen_send) { 3283 struct rte_mbuf *mbuf; 3284 unsigned int nxlen; 3285 uintptr_t start; 3286 3287 /* 3288 * Packet length exceeds the allowed inline 3289 * data length, check whether the minimal 3290 * inlining is required. 3291 */ 3292 if (txq->inlen_mode) { 3293 assert(txq->inlen_mode >= MLX5_ESEG_MIN_INLINE_SIZE); 3294 assert(txq->inlen_mode <= txq->inlen_send); 3295 inlen = txq->inlen_mode; 3296 } else { 3297 if (!vlan || txq->vlan_en) { 3298 /* 3299 * VLAN insertion will be done inside by HW. 3300 * It is not utmost effective - VLAN flag is 3301 * checked twice, but we should proceed the 3302 * inlining length correctly and take into 3303 * account the VLAN header being inserted. 3304 */ 3305 return mlx5_tx_packet_multi_send 3306 (txq, loc, olx); 3307 } 3308 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3309 } 3310 /* 3311 * Now we know the minimal amount of data is requested 3312 * to inline. Check whether we should inline the buffers 3313 * from the chain beginning to eliminate some mbufs. 3314 */ 3315 mbuf = loc->mbuf; 3316 nxlen = rte_pktmbuf_data_len(mbuf); 3317 if (unlikely(nxlen <= txq->inlen_send)) { 3318 /* We can inline first mbuf at least. */ 3319 if (nxlen < inlen) { 3320 unsigned int smlen; 3321 3322 /* Scan mbufs till inlen filled. */ 3323 do { 3324 smlen = nxlen; 3325 mbuf = NEXT(mbuf); 3326 assert(mbuf); 3327 nxlen = rte_pktmbuf_data_len(mbuf); 3328 nxlen += smlen; 3329 } while (unlikely(nxlen < inlen)); 3330 if (unlikely(nxlen > txq->inlen_send)) { 3331 /* We cannot inline entire mbuf. */ 3332 smlen = inlen - smlen; 3333 start = rte_pktmbuf_mtod_offset 3334 (mbuf, uintptr_t, smlen); 3335 goto do_align; 3336 } 3337 } 3338 do { 3339 inlen = nxlen; 3340 mbuf = NEXT(mbuf); 3341 /* There should be not end of packet. */ 3342 assert(mbuf); 3343 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3344 } while (unlikely(nxlen < txq->inlen_send)); 3345 } 3346 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3347 /* 3348 * Check whether we can do inline to align start 3349 * address of data buffer to cacheline. 3350 */ 3351 do_align: 3352 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3353 if (unlikely(start)) { 3354 start += inlen; 3355 if (start <= txq->inlen_send) 3356 inlen = start; 3357 } 3358 } 3359 /* 3360 * Check whether there are enough free WQEBBs: 3361 * - Control Segment 3362 * - Ethernet Segment 3363 * - First Segment of inlined Ethernet data 3364 * - ... data continued ... 3365 * - Data Segments of pointer/min inline type 3366 * 3367 * Estimate the number of Data Segments conservatively, 3368 * supposing no any mbufs is being freed during inlining. 3369 */ 3370 assert(inlen <= txq->inlen_send); 3371 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3372 MLX5_ESEG_MIN_INLINE_SIZE + 3373 MLX5_WSEG_SIZE + 3374 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3375 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3376 return MLX5_TXCMP_CODE_EXIT; 3377 /* Check for maximal WQE size. */ 3378 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3379 return MLX5_TXCMP_CODE_ERROR; 3380 #ifdef MLX5_PMD_SOFT_COUNTERS 3381 /* Update sent data bytes/packets counters. */ 3382 txq->stats.obytes += dlen + vlan; 3383 #endif 3384 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3385 loc->wqe_last = wqe; 3386 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3387 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3388 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3389 txq->wqe_ci += (ds + 3) / 4; 3390 loc->wqe_free -= (ds + 3) / 4; 3391 /* Request CQE generation if limits are reached. */ 3392 mlx5_tx_request_completion(txq, loc, true, olx); 3393 return MLX5_TXCMP_CODE_MULTI; 3394 } 3395 3396 /** 3397 * Tx burst function for multi-segment packets. Supports all 3398 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3399 * sends one packet per WQE. Function stops sending if it 3400 * encounters the single-segment packet. 3401 * 3402 * This routine is responsible for storing processed mbuf 3403 * into elts ring buffer and update elts_head. 3404 * 3405 * @param txq 3406 * Pointer to TX queue structure. 3407 * @param[in] pkts 3408 * Packets to transmit. 3409 * @param pkts_n 3410 * Number of packets in array. 3411 * @param loc 3412 * Pointer to burst routine local context. 3413 * @param olx 3414 * Configured Tx offloads mask. It is fully defined at 3415 * compile time and may be used for optimization. 3416 * 3417 * @return 3418 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3419 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3420 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3421 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3422 * Local context variables updated. 3423 */ 3424 static __rte_always_inline enum mlx5_txcmp_code 3425 mlx5_tx_burst_mseg(struct mlx5_txq_data *restrict txq, 3426 struct rte_mbuf **restrict pkts, 3427 unsigned int pkts_n, 3428 struct mlx5_txq_local *restrict loc, 3429 unsigned int olx) 3430 { 3431 assert(loc->elts_free && loc->wqe_free); 3432 assert(pkts_n > loc->pkts_sent); 3433 pkts += loc->pkts_sent + 1; 3434 pkts_n -= loc->pkts_sent; 3435 for (;;) { 3436 enum mlx5_txcmp_code ret; 3437 3438 assert(NB_SEGS(loc->mbuf) > 1); 3439 /* 3440 * Estimate the number of free elts quickly but 3441 * conservatively. Some segment may be fully inlined 3442 * and freed, ignore this here - precise estimation 3443 * is costly. 3444 */ 3445 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3446 return MLX5_TXCMP_CODE_EXIT; 3447 if (MLX5_TXOFF_CONFIG(TSO) && 3448 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3449 /* Proceed with multi-segment TSO. */ 3450 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3451 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3452 /* Proceed with multi-segment SEND with inlining. */ 3453 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3454 } else { 3455 /* Proceed with multi-segment SEND w/o inlining. */ 3456 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3457 } 3458 if (ret == MLX5_TXCMP_CODE_EXIT) 3459 return MLX5_TXCMP_CODE_EXIT; 3460 if (ret == MLX5_TXCMP_CODE_ERROR) 3461 return MLX5_TXCMP_CODE_ERROR; 3462 /* WQE is built, go to the next packet. */ 3463 ++loc->pkts_sent; 3464 --pkts_n; 3465 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3466 return MLX5_TXCMP_CODE_EXIT; 3467 loc->mbuf = *pkts++; 3468 if (pkts_n > 1) 3469 rte_prefetch0(*pkts); 3470 if (likely(NB_SEGS(loc->mbuf) > 1)) 3471 continue; 3472 /* Here ends the series of multi-segment packets. */ 3473 if (MLX5_TXOFF_CONFIG(TSO) && 3474 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3475 return MLX5_TXCMP_CODE_TSO; 3476 return MLX5_TXCMP_CODE_SINGLE; 3477 } 3478 assert(false); 3479 } 3480 3481 /** 3482 * Tx burst function for single-segment packets with TSO. 3483 * Supports all types of Tx offloads, except multi-packets. 3484 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3485 * Function stops sending if it encounters the multi-segment 3486 * packet or packet without TSO requested. 3487 * 3488 * The routine is responsible for storing processed mbuf 3489 * into elts ring buffer and update elts_head if inline 3490 * offloads is requested due to possible early freeing 3491 * of the inlined mbufs (can not store pkts array in elts 3492 * as a batch). 3493 * 3494 * @param txq 3495 * Pointer to TX queue structure. 3496 * @param[in] pkts 3497 * Packets to transmit. 3498 * @param pkts_n 3499 * Number of packets in array. 3500 * @param loc 3501 * Pointer to burst routine local context. 3502 * @param olx 3503 * Configured Tx offloads mask. It is fully defined at 3504 * compile time and may be used for optimization. 3505 * 3506 * @return 3507 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3508 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3509 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3510 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3511 * Local context variables updated. 3512 */ 3513 static __rte_always_inline enum mlx5_txcmp_code 3514 mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, 3515 struct rte_mbuf **restrict pkts, 3516 unsigned int pkts_n, 3517 struct mlx5_txq_local *restrict loc, 3518 unsigned int olx) 3519 { 3520 assert(loc->elts_free && loc->wqe_free); 3521 assert(pkts_n > loc->pkts_sent); 3522 pkts += loc->pkts_sent + 1; 3523 pkts_n -= loc->pkts_sent; 3524 for (;;) { 3525 struct mlx5_wqe_dseg *restrict dseg; 3526 struct mlx5_wqe *restrict wqe; 3527 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3528 uint8_t *dptr; 3529 3530 assert(NB_SEGS(loc->mbuf) == 1); 3531 dlen = rte_pktmbuf_data_len(loc->mbuf); 3532 if (MLX5_TXOFF_CONFIG(VLAN) && 3533 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3534 vlan = sizeof(struct rte_vlan_hdr); 3535 } 3536 /* 3537 * First calculate the WQE size to check 3538 * whether we have enough space in ring buffer. 3539 */ 3540 hlen = loc->mbuf->l2_len + vlan + 3541 loc->mbuf->l3_len + loc->mbuf->l4_len; 3542 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3543 return MLX5_TXCMP_CODE_ERROR; 3544 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3545 hlen += loc->mbuf->outer_l2_len + 3546 loc->mbuf->outer_l3_len; 3547 /* Segment must contain all TSO headers. */ 3548 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3549 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3550 hlen > (dlen + vlan))) 3551 return MLX5_TXCMP_CODE_ERROR; 3552 /* 3553 * Check whether there are enough free WQEBBs: 3554 * - Control Segment 3555 * - Ethernet Segment 3556 * - First Segment of inlined Ethernet data 3557 * - ... data continued ... 3558 * - Finishing Data Segment of pointer type 3559 */ 3560 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3561 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3562 if (loc->wqe_free < ((ds + 3) / 4)) 3563 return MLX5_TXCMP_CODE_EXIT; 3564 #ifdef MLX5_PMD_SOFT_COUNTERS 3565 /* Update sent data bytes/packets counters. */ 3566 ntcp = (dlen + vlan - hlen + 3567 loc->mbuf->tso_segsz - 1) / 3568 loc->mbuf->tso_segsz; 3569 /* 3570 * One will be added for mbuf itself at the end 3571 * of the mlx5_tx_burst from loc->pkts_sent field. 3572 */ 3573 --ntcp; 3574 txq->stats.opackets += ntcp; 3575 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3576 #endif 3577 /* 3578 * Build the TSO WQE: 3579 * - Control Segment 3580 * - Ethernet Segment with hlen bytes inlined 3581 * - Data Segment of pointer type 3582 */ 3583 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3584 loc->wqe_last = wqe; 3585 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3586 MLX5_OPCODE_TSO, olx); 3587 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3588 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3589 dlen -= hlen - vlan; 3590 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3591 /* 3592 * WQE is built, update the loop parameters 3593 * and go to the next packet. 3594 */ 3595 txq->wqe_ci += (ds + 3) / 4; 3596 loc->wqe_free -= (ds + 3) / 4; 3597 if (MLX5_TXOFF_CONFIG(INLINE)) 3598 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3599 --loc->elts_free; 3600 ++loc->pkts_sent; 3601 --pkts_n; 3602 /* Request CQE generation if limits are reached. */ 3603 mlx5_tx_request_completion(txq, loc, false, olx); 3604 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3605 return MLX5_TXCMP_CODE_EXIT; 3606 loc->mbuf = *pkts++; 3607 if (pkts_n > 1) 3608 rte_prefetch0(*pkts); 3609 if (MLX5_TXOFF_CONFIG(MULTI) && 3610 unlikely(NB_SEGS(loc->mbuf) > 1)) 3611 return MLX5_TXCMP_CODE_MULTI; 3612 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3613 return MLX5_TXCMP_CODE_SINGLE; 3614 /* Continue with the next TSO packet. */ 3615 } 3616 assert(false); 3617 } 3618 3619 /** 3620 * Analyze the packet and select the best method to send. 3621 * 3622 * @param txq 3623 * Pointer to TX queue structure. 3624 * @param loc 3625 * Pointer to burst routine local context. 3626 * @param olx 3627 * Configured Tx offloads mask. It is fully defined at 3628 * compile time and may be used for optimization. 3629 * @param newp 3630 * The predefined flag whether do complete check for 3631 * multi-segment packets and TSO. 3632 * 3633 * @return 3634 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3635 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3636 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3637 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3638 */ 3639 static __rte_always_inline enum mlx5_txcmp_code 3640 mlx5_tx_able_to_empw(struct mlx5_txq_data *restrict txq, 3641 struct mlx5_txq_local *restrict loc, 3642 unsigned int olx, 3643 bool newp) 3644 { 3645 /* Check for multi-segment packet. */ 3646 if (newp && 3647 MLX5_TXOFF_CONFIG(MULTI) && 3648 unlikely(NB_SEGS(loc->mbuf) > 1)) 3649 return MLX5_TXCMP_CODE_MULTI; 3650 /* Check for TSO packet. */ 3651 if (newp && 3652 MLX5_TXOFF_CONFIG(TSO) && 3653 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3654 return MLX5_TXCMP_CODE_TSO; 3655 /* Check if eMPW is enabled at all. */ 3656 if (!MLX5_TXOFF_CONFIG(EMPW)) 3657 return MLX5_TXCMP_CODE_SINGLE; 3658 /* Check if eMPW can be engaged. */ 3659 if (MLX5_TXOFF_CONFIG(VLAN) && 3660 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3661 (!MLX5_TXOFF_CONFIG(INLINE) || 3662 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3663 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3664 /* 3665 * eMPW does not support VLAN insertion offload, 3666 * we have to inline the entire packet but 3667 * packet is too long for inlining. 3668 */ 3669 return MLX5_TXCMP_CODE_SINGLE; 3670 } 3671 return MLX5_TXCMP_CODE_EMPW; 3672 } 3673 3674 /** 3675 * Check the next packet attributes to match with the eMPW batch ones. 3676 * In addition, for legacy MPW the packet length is checked either. 3677 * 3678 * @param txq 3679 * Pointer to TX queue structure. 3680 * @param es 3681 * Pointer to Ethernet Segment of eMPW batch. 3682 * @param loc 3683 * Pointer to burst routine local context. 3684 * @param dlen 3685 * Length of previous packet in MPW descriptor. 3686 * @param olx 3687 * Configured Tx offloads mask. It is fully defined at 3688 * compile time and may be used for optimization. 3689 * 3690 * @return 3691 * true - packet match with eMPW batch attributes. 3692 * false - no match, eMPW should be restarted. 3693 */ 3694 static __rte_always_inline bool 3695 mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused, 3696 struct mlx5_wqe_eseg *restrict es, 3697 struct mlx5_txq_local *restrict loc, 3698 uint32_t dlen, 3699 unsigned int olx) 3700 { 3701 uint8_t swp_flags = 0; 3702 3703 /* Compare the checksum flags, if any. */ 3704 if (MLX5_TXOFF_CONFIG(CSUM) && 3705 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 3706 return false; 3707 /* Compare the Software Parser offsets and flags. */ 3708 if (MLX5_TXOFF_CONFIG(SWP) && 3709 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 3710 es->swp_flags != swp_flags)) 3711 return false; 3712 /* Fill metadata field if needed. */ 3713 if (MLX5_TXOFF_CONFIG(METADATA) && 3714 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 3715 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 3716 return false; 3717 /* Legacy MPW can send packets with the same lengt only. */ 3718 if (MLX5_TXOFF_CONFIG(MPW) && 3719 dlen != rte_pktmbuf_data_len(loc->mbuf)) 3720 return false; 3721 /* There must be no VLAN packets in eMPW loop. */ 3722 if (MLX5_TXOFF_CONFIG(VLAN)) 3723 assert(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 3724 return true; 3725 } 3726 3727 /* 3728 * Update send loop variables and WQE for eMPW loop 3729 * without data inlining. Number of Data Segments is 3730 * equal to the number of sent packets. 3731 * 3732 * @param txq 3733 * Pointer to TX queue structure. 3734 * @param loc 3735 * Pointer to burst routine local context. 3736 * @param ds 3737 * Number of packets/Data Segments/Packets. 3738 * @param slen 3739 * Accumulated statistics, bytes sent 3740 * @param olx 3741 * Configured Tx offloads mask. It is fully defined at 3742 * compile time and may be used for optimization. 3743 * 3744 * @return 3745 * true - packet match with eMPW batch attributes. 3746 * false - no match, eMPW should be restarted. 3747 */ 3748 static __rte_always_inline void 3749 mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, 3750 struct mlx5_txq_local *restrict loc, 3751 unsigned int ds, 3752 unsigned int slen, 3753 unsigned int olx) 3754 { 3755 assert(!MLX5_TXOFF_CONFIG(INLINE)); 3756 #ifdef MLX5_PMD_SOFT_COUNTERS 3757 /* Update sent data bytes counter. */ 3758 txq->stats.obytes += slen; 3759 #else 3760 (void)slen; 3761 #endif 3762 loc->elts_free -= ds; 3763 loc->pkts_sent += ds; 3764 ds += 2; 3765 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3766 txq->wqe_ci += (ds + 3) / 4; 3767 loc->wqe_free -= (ds + 3) / 4; 3768 /* Request CQE generation if limits are reached. */ 3769 mlx5_tx_request_completion(txq, loc, false, olx); 3770 } 3771 3772 /* 3773 * Update send loop variables and WQE for eMPW loop 3774 * with data inlining. Gets the size of pushed descriptors 3775 * and data to the WQE. 3776 * 3777 * @param txq 3778 * Pointer to TX queue structure. 3779 * @param loc 3780 * Pointer to burst routine local context. 3781 * @param len 3782 * Total size of descriptor/data in bytes. 3783 * @param slen 3784 * Accumulated statistics, data bytes sent. 3785 * @param olx 3786 * Configured Tx offloads mask. It is fully defined at 3787 * compile time and may be used for optimization. 3788 * 3789 * @return 3790 * true - packet match with eMPW batch attributes. 3791 * false - no match, eMPW should be restarted. 3792 */ 3793 static __rte_always_inline void 3794 mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, 3795 struct mlx5_txq_local *restrict loc, 3796 unsigned int len, 3797 unsigned int slen, 3798 unsigned int olx __rte_unused) 3799 { 3800 assert(MLX5_TXOFF_CONFIG(INLINE)); 3801 assert((len % MLX5_WSEG_SIZE) == 0); 3802 #ifdef MLX5_PMD_SOFT_COUNTERS 3803 /* Update sent data bytes counter. */ 3804 txq->stats.obytes += slen; 3805 #else 3806 (void)slen; 3807 #endif 3808 len = len / MLX5_WSEG_SIZE + 2; 3809 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 3810 txq->wqe_ci += (len + 3) / 4; 3811 loc->wqe_free -= (len + 3) / 4; 3812 /* Request CQE generation if limits are reached. */ 3813 mlx5_tx_request_completion(txq, loc, false, olx); 3814 } 3815 3816 /** 3817 * The set of Tx burst functions for single-segment packets 3818 * without TSO and with Multi-Packet Writing feature support. 3819 * Supports all types of Tx offloads, except multi-packets 3820 * and TSO. 3821 * 3822 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 3823 * as many packet per WQE as it can. If eMPW is not configured 3824 * or packet can not be sent with eMPW (VLAN insertion) the 3825 * ordinary SEND opcode is used and only one packet placed 3826 * in WQE. 3827 * 3828 * Functions stop sending if it encounters the multi-segment 3829 * packet or packet with TSO requested. 3830 * 3831 * The routines are responsible for storing processed mbuf 3832 * into elts ring buffer and update elts_head if inlining 3833 * offload is requested. Otherwise the copying mbufs to elts 3834 * can be postponed and completed at the end of burst routine. 3835 * 3836 * @param txq 3837 * Pointer to TX queue structure. 3838 * @param[in] pkts 3839 * Packets to transmit. 3840 * @param pkts_n 3841 * Number of packets in array. 3842 * @param loc 3843 * Pointer to burst routine local context. 3844 * @param olx 3845 * Configured Tx offloads mask. It is fully defined at 3846 * compile time and may be used for optimization. 3847 * 3848 * @return 3849 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3850 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3851 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3852 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 3853 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 3854 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 3855 * 3856 * Local context variables updated. 3857 * 3858 * 3859 * The routine sends packets with MLX5_OPCODE_EMPW 3860 * without inlining, this is dedicated optimized branch. 3861 * No VLAN insertion is supported. 3862 */ 3863 static __rte_always_inline enum mlx5_txcmp_code 3864 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *restrict txq, 3865 struct rte_mbuf **restrict pkts, 3866 unsigned int pkts_n, 3867 struct mlx5_txq_local *restrict loc, 3868 unsigned int olx) 3869 { 3870 /* 3871 * Subroutine is the part of mlx5_tx_burst_single() 3872 * and sends single-segment packet with eMPW opcode 3873 * without data inlining. 3874 */ 3875 assert(!MLX5_TXOFF_CONFIG(INLINE)); 3876 assert(MLX5_TXOFF_CONFIG(EMPW)); 3877 assert(loc->elts_free && loc->wqe_free); 3878 assert(pkts_n > loc->pkts_sent); 3879 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 3880 pkts += loc->pkts_sent + 1; 3881 pkts_n -= loc->pkts_sent; 3882 for (;;) { 3883 struct mlx5_wqe_dseg *restrict dseg; 3884 struct mlx5_wqe_eseg *restrict eseg; 3885 enum mlx5_txcmp_code ret; 3886 unsigned int part, loop; 3887 unsigned int slen = 0; 3888 3889 next_empw: 3890 assert(NB_SEGS(loc->mbuf) == 1); 3891 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 3892 MLX5_MPW_MAX_PACKETS : 3893 MLX5_EMPW_MAX_PACKETS); 3894 if (unlikely(loc->elts_free < part)) { 3895 /* We have no enough elts to save all mbufs. */ 3896 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 3897 return MLX5_TXCMP_CODE_EXIT; 3898 /* But we still able to send at least minimal eMPW. */ 3899 part = loc->elts_free; 3900 } 3901 /* Check whether we have enough WQEs */ 3902 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 3903 if (unlikely(loc->wqe_free < 3904 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 3905 return MLX5_TXCMP_CODE_EXIT; 3906 part = (loc->wqe_free * 4) - 2; 3907 } 3908 if (likely(part > 1)) 3909 rte_prefetch0(*pkts); 3910 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3911 /* 3912 * Build eMPW title WQEBB: 3913 * - Control Segment, eMPW opcode 3914 * - Ethernet Segment, no inline 3915 */ 3916 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 3917 MLX5_OPCODE_ENHANCED_MPSW, olx); 3918 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 3919 olx & ~MLX5_TXOFF_CONFIG_VLAN); 3920 eseg = &loc->wqe_last->eseg; 3921 dseg = &loc->wqe_last->dseg[0]; 3922 loop = part; 3923 /* Store the packet length for legacy MPW. */ 3924 if (MLX5_TXOFF_CONFIG(MPW)) 3925 eseg->mss = rte_cpu_to_be_16 3926 (rte_pktmbuf_data_len(loc->mbuf)); 3927 for (;;) { 3928 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 3929 #ifdef MLX5_PMD_SOFT_COUNTERS 3930 /* Update sent data bytes counter. */ 3931 slen += dlen; 3932 #endif 3933 mlx5_tx_dseg_ptr 3934 (txq, loc, dseg, 3935 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3936 dlen, olx); 3937 if (unlikely(--loop == 0)) 3938 break; 3939 loc->mbuf = *pkts++; 3940 if (likely(loop > 1)) 3941 rte_prefetch0(*pkts); 3942 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3943 /* 3944 * Unroll the completion code to avoid 3945 * returning variable value - it results in 3946 * unoptimized sequent checking in caller. 3947 */ 3948 if (ret == MLX5_TXCMP_CODE_MULTI) { 3949 part -= loop; 3950 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3951 if (unlikely(!loc->elts_free || 3952 !loc->wqe_free)) 3953 return MLX5_TXCMP_CODE_EXIT; 3954 return MLX5_TXCMP_CODE_MULTI; 3955 } 3956 assert(NB_SEGS(loc->mbuf) == 1); 3957 if (ret == MLX5_TXCMP_CODE_TSO) { 3958 part -= loop; 3959 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3960 if (unlikely(!loc->elts_free || 3961 !loc->wqe_free)) 3962 return MLX5_TXCMP_CODE_EXIT; 3963 return MLX5_TXCMP_CODE_TSO; 3964 } 3965 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3966 part -= loop; 3967 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3968 if (unlikely(!loc->elts_free || 3969 !loc->wqe_free)) 3970 return MLX5_TXCMP_CODE_EXIT; 3971 return MLX5_TXCMP_CODE_SINGLE; 3972 } 3973 if (ret != MLX5_TXCMP_CODE_EMPW) { 3974 assert(false); 3975 part -= loop; 3976 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3977 return MLX5_TXCMP_CODE_ERROR; 3978 } 3979 /* 3980 * Check whether packet parameters coincide 3981 * within assumed eMPW batch: 3982 * - check sum settings 3983 * - metadata value 3984 * - software parser settings 3985 * - packets length (legacy MPW only) 3986 */ 3987 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 3988 assert(loop); 3989 part -= loop; 3990 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3991 if (unlikely(!loc->elts_free || 3992 !loc->wqe_free)) 3993 return MLX5_TXCMP_CODE_EXIT; 3994 pkts_n -= part; 3995 goto next_empw; 3996 } 3997 /* Packet attributes match, continue the same eMPW. */ 3998 ++dseg; 3999 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4000 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4001 } 4002 /* eMPW is built successfully, update loop parameters. */ 4003 assert(!loop); 4004 assert(pkts_n >= part); 4005 #ifdef MLX5_PMD_SOFT_COUNTERS 4006 /* Update sent data bytes counter. */ 4007 txq->stats.obytes += slen; 4008 #endif 4009 loc->elts_free -= part; 4010 loc->pkts_sent += part; 4011 txq->wqe_ci += (2 + part + 3) / 4; 4012 loc->wqe_free -= (2 + part + 3) / 4; 4013 pkts_n -= part; 4014 /* Request CQE generation if limits are reached. */ 4015 mlx5_tx_request_completion(txq, loc, false, olx); 4016 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4017 return MLX5_TXCMP_CODE_EXIT; 4018 loc->mbuf = *pkts++; 4019 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4020 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4021 return ret; 4022 /* Continue sending eMPW batches. */ 4023 } 4024 assert(false); 4025 } 4026 4027 /** 4028 * The routine sends packets with MLX5_OPCODE_EMPW 4029 * with inlining, optionally supports VLAN insertion. 4030 */ 4031 static __rte_always_inline enum mlx5_txcmp_code 4032 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, 4033 struct rte_mbuf **restrict pkts, 4034 unsigned int pkts_n, 4035 struct mlx5_txq_local *restrict loc, 4036 unsigned int olx) 4037 { 4038 /* 4039 * Subroutine is the part of mlx5_tx_burst_single() 4040 * and sends single-segment packet with eMPW opcode 4041 * with data inlining. 4042 */ 4043 assert(MLX5_TXOFF_CONFIG(INLINE)); 4044 assert(MLX5_TXOFF_CONFIG(EMPW)); 4045 assert(loc->elts_free && loc->wqe_free); 4046 assert(pkts_n > loc->pkts_sent); 4047 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4048 pkts += loc->pkts_sent + 1; 4049 pkts_n -= loc->pkts_sent; 4050 for (;;) { 4051 struct mlx5_wqe_dseg *restrict dseg; 4052 struct mlx5_wqe_eseg *restrict eseg; 4053 enum mlx5_txcmp_code ret; 4054 unsigned int room, part, nlim; 4055 unsigned int slen = 0; 4056 4057 assert(NB_SEGS(loc->mbuf) == 1); 4058 /* 4059 * Limits the amount of packets in one WQE 4060 * to improve CQE latency generation. 4061 */ 4062 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4063 MLX5_MPW_INLINE_MAX_PACKETS : 4064 MLX5_EMPW_MAX_PACKETS); 4065 /* Check whether we have minimal amount WQEs */ 4066 if (unlikely(loc->wqe_free < 4067 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4068 return MLX5_TXCMP_CODE_EXIT; 4069 if (likely(pkts_n > 1)) 4070 rte_prefetch0(*pkts); 4071 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4072 /* 4073 * Build eMPW title WQEBB: 4074 * - Control Segment, eMPW opcode, zero DS 4075 * - Ethernet Segment, no inline 4076 */ 4077 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, 0, 4078 MLX5_OPCODE_ENHANCED_MPSW, olx); 4079 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4080 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4081 eseg = &loc->wqe_last->eseg; 4082 dseg = &loc->wqe_last->dseg[0]; 4083 /* Store the packet length for legacy MPW. */ 4084 if (MLX5_TXOFF_CONFIG(MPW)) 4085 eseg->mss = rte_cpu_to_be_16 4086 (rte_pktmbuf_data_len(loc->mbuf)); 4087 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4088 loc->wqe_free) * MLX5_WQE_SIZE - 4089 MLX5_WQE_CSEG_SIZE - 4090 MLX5_WQE_ESEG_SIZE; 4091 /* Build WQE till we have space, packets and resources. */ 4092 part = room; 4093 for (;;) { 4094 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4095 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4096 unsigned int tlen; 4097 4098 assert(room >= MLX5_WQE_DSEG_SIZE); 4099 assert((room % MLX5_WQE_DSEG_SIZE) == 0); 4100 assert((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4101 /* 4102 * Some Tx offloads may cause an error if 4103 * packet is not long enough, check against 4104 * assumed minimal length. 4105 */ 4106 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4107 part -= room; 4108 if (unlikely(!part)) 4109 return MLX5_TXCMP_CODE_ERROR; 4110 /* 4111 * We have some successfully built 4112 * packet Data Segments to send. 4113 */ 4114 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4115 return MLX5_TXCMP_CODE_ERROR; 4116 } 4117 /* Inline or not inline - that's the Question. */ 4118 if (dlen > txq->inlen_empw) 4119 goto pointer_empw; 4120 /* Inline entire packet, optional VLAN insertion. */ 4121 tlen = sizeof(dseg->bcount) + dlen; 4122 if (MLX5_TXOFF_CONFIG(VLAN) && 4123 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4124 /* 4125 * The packet length must be checked in 4126 * mlx5_tx_able_to_empw() and packet 4127 * fits into inline length guaranteed. 4128 */ 4129 assert((dlen + sizeof(struct rte_vlan_hdr)) <= 4130 txq->inlen_empw); 4131 tlen += sizeof(struct rte_vlan_hdr); 4132 if (room < tlen) 4133 break; 4134 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4135 dptr, dlen, olx); 4136 #ifdef MLX5_PMD_SOFT_COUNTERS 4137 /* Update sent data bytes counter. */ 4138 slen += sizeof(struct rte_vlan_hdr); 4139 #endif 4140 } else { 4141 if (room < tlen) 4142 break; 4143 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4144 dptr, dlen, olx); 4145 } 4146 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4147 assert(room >= tlen); 4148 room -= tlen; 4149 /* 4150 * Packet data are completely inlined, 4151 * free the packet immediately. 4152 */ 4153 rte_pktmbuf_free_seg(loc->mbuf); 4154 goto next_mbuf; 4155 pointer_empw: 4156 /* 4157 * Not inlinable VLAN packets are 4158 * proceeded outside of this routine. 4159 */ 4160 assert(room >= MLX5_WQE_DSEG_SIZE); 4161 if (MLX5_TXOFF_CONFIG(VLAN)) 4162 assert(!(loc->mbuf->ol_flags & 4163 PKT_TX_VLAN_PKT)); 4164 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4165 /* We have to store mbuf in elts.*/ 4166 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4167 room -= MLX5_WQE_DSEG_SIZE; 4168 /* Ring buffer wraparound is checked at the loop end.*/ 4169 ++dseg; 4170 next_mbuf: 4171 #ifdef MLX5_PMD_SOFT_COUNTERS 4172 /* Update sent data bytes counter. */ 4173 slen += dlen; 4174 #endif 4175 loc->pkts_sent++; 4176 loc->elts_free--; 4177 pkts_n--; 4178 if (unlikely(!pkts_n || !loc->elts_free)) { 4179 /* 4180 * We have no resources/packets to 4181 * continue build descriptors. 4182 */ 4183 part -= room; 4184 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4185 return MLX5_TXCMP_CODE_EXIT; 4186 } 4187 loc->mbuf = *pkts++; 4188 if (likely(pkts_n > 1)) 4189 rte_prefetch0(*pkts); 4190 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4191 /* 4192 * Unroll the completion code to avoid 4193 * returning variable value - it results in 4194 * unoptimized sequent checking in caller. 4195 */ 4196 if (ret == MLX5_TXCMP_CODE_MULTI) { 4197 part -= room; 4198 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4199 if (unlikely(!loc->elts_free || 4200 !loc->wqe_free)) 4201 return MLX5_TXCMP_CODE_EXIT; 4202 return MLX5_TXCMP_CODE_MULTI; 4203 } 4204 assert(NB_SEGS(loc->mbuf) == 1); 4205 if (ret == MLX5_TXCMP_CODE_TSO) { 4206 part -= room; 4207 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4208 if (unlikely(!loc->elts_free || 4209 !loc->wqe_free)) 4210 return MLX5_TXCMP_CODE_EXIT; 4211 return MLX5_TXCMP_CODE_TSO; 4212 } 4213 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4214 part -= room; 4215 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4216 if (unlikely(!loc->elts_free || 4217 !loc->wqe_free)) 4218 return MLX5_TXCMP_CODE_EXIT; 4219 return MLX5_TXCMP_CODE_SINGLE; 4220 } 4221 if (ret != MLX5_TXCMP_CODE_EMPW) { 4222 assert(false); 4223 part -= room; 4224 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4225 return MLX5_TXCMP_CODE_ERROR; 4226 } 4227 /* Check if we have minimal room left. */ 4228 nlim--; 4229 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4230 break; 4231 /* 4232 * Check whether packet parameters coincide 4233 * within assumed eMPW batch: 4234 * - check sum settings 4235 * - metadata value 4236 * - software parser settings 4237 * - packets length (legacy MPW only) 4238 */ 4239 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) 4240 break; 4241 /* Packet attributes match, continue the same eMPW. */ 4242 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4243 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4244 } 4245 /* 4246 * We get here to close an existing eMPW 4247 * session and start the new one. 4248 */ 4249 assert(pkts_n); 4250 part -= room; 4251 if (unlikely(!part)) 4252 return MLX5_TXCMP_CODE_EXIT; 4253 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4254 if (unlikely(!loc->elts_free || 4255 !loc->wqe_free)) 4256 return MLX5_TXCMP_CODE_EXIT; 4257 /* Continue the loop with new eMPW session. */ 4258 } 4259 assert(false); 4260 } 4261 4262 /** 4263 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4264 * Data inlining and VLAN insertion are supported. 4265 */ 4266 static __rte_always_inline enum mlx5_txcmp_code 4267 mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, 4268 struct rte_mbuf **restrict pkts, 4269 unsigned int pkts_n, 4270 struct mlx5_txq_local *restrict loc, 4271 unsigned int olx) 4272 { 4273 /* 4274 * Subroutine is the part of mlx5_tx_burst_single() 4275 * and sends single-segment packet with SEND opcode. 4276 */ 4277 assert(loc->elts_free && loc->wqe_free); 4278 assert(pkts_n > loc->pkts_sent); 4279 pkts += loc->pkts_sent + 1; 4280 pkts_n -= loc->pkts_sent; 4281 for (;;) { 4282 struct mlx5_wqe *restrict wqe; 4283 enum mlx5_txcmp_code ret; 4284 4285 assert(NB_SEGS(loc->mbuf) == 1); 4286 if (MLX5_TXOFF_CONFIG(INLINE)) { 4287 unsigned int inlen, vlan = 0; 4288 4289 inlen = rte_pktmbuf_data_len(loc->mbuf); 4290 if (MLX5_TXOFF_CONFIG(VLAN) && 4291 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4292 vlan = sizeof(struct rte_vlan_hdr); 4293 inlen += vlan; 4294 static_assert((sizeof(struct rte_vlan_hdr) + 4295 sizeof(struct rte_ether_hdr)) == 4296 MLX5_ESEG_MIN_INLINE_SIZE, 4297 "invalid min inline data size"); 4298 } 4299 /* 4300 * If inlining is enabled at configuration time 4301 * the limit must be not less than minimal size. 4302 * Otherwise we would do extra check for data 4303 * size to avoid crashes due to length overflow. 4304 */ 4305 assert(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 4306 if (inlen <= txq->inlen_send) { 4307 unsigned int seg_n, wqe_n; 4308 4309 rte_prefetch0(rte_pktmbuf_mtod 4310 (loc->mbuf, uint8_t *)); 4311 /* Check against minimal length. */ 4312 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4313 return MLX5_TXCMP_CODE_ERROR; 4314 /* 4315 * Completely inlined packet data WQE: 4316 * - Control Segment, SEND opcode 4317 * - Ethernet Segment, no VLAN insertion 4318 * - Data inlined, VLAN optionally inserted 4319 * - Alignment to MLX5_WSEG_SIZE 4320 * Have to estimate amount of WQEBBs 4321 */ 4322 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4323 MLX5_ESEG_MIN_INLINE_SIZE + 4324 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4325 /* Check if there are enough WQEBBs. */ 4326 wqe_n = (seg_n + 3) / 4; 4327 if (wqe_n > loc->wqe_free) 4328 return MLX5_TXCMP_CODE_EXIT; 4329 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4330 loc->wqe_last = wqe; 4331 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4332 MLX5_OPCODE_SEND, olx); 4333 mlx5_tx_eseg_data(txq, loc, wqe, 4334 vlan, inlen, 0, olx); 4335 txq->wqe_ci += wqe_n; 4336 loc->wqe_free -= wqe_n; 4337 /* 4338 * Packet data are completely inlined, 4339 * free the packet immediately. 4340 */ 4341 rte_pktmbuf_free_seg(loc->mbuf); 4342 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4343 MLX5_TXOFF_CONFIG(MPW)) && 4344 txq->inlen_mode) { 4345 /* 4346 * If minimal inlining is requested the eMPW 4347 * feature should be disabled due to data is 4348 * inlined into Ethernet Segment, which can 4349 * not contain inlined data for eMPW due to 4350 * segment shared for all packets. 4351 */ 4352 struct mlx5_wqe_dseg *restrict dseg; 4353 unsigned int ds; 4354 uint8_t *dptr; 4355 4356 /* 4357 * The inline-mode settings require 4358 * to inline the specified amount of 4359 * data bytes to the Ethernet Segment. 4360 * We should check the free space in 4361 * WQE ring buffer to inline partially. 4362 */ 4363 assert(txq->inlen_send >= txq->inlen_mode); 4364 assert(inlen > txq->inlen_mode); 4365 assert(txq->inlen_mode >= 4366 MLX5_ESEG_MIN_INLINE_SIZE); 4367 /* 4368 * Check whether there are enough free WQEBBs: 4369 * - Control Segment 4370 * - Ethernet Segment 4371 * - First Segment of inlined Ethernet data 4372 * - ... data continued ... 4373 * - Finishing Data Segment of pointer type 4374 */ 4375 ds = (MLX5_WQE_CSEG_SIZE + 4376 MLX5_WQE_ESEG_SIZE + 4377 MLX5_WQE_DSEG_SIZE + 4378 txq->inlen_mode - 4379 MLX5_ESEG_MIN_INLINE_SIZE + 4380 MLX5_WQE_DSEG_SIZE + 4381 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4382 if (loc->wqe_free < ((ds + 3) / 4)) 4383 return MLX5_TXCMP_CODE_EXIT; 4384 /* 4385 * Build the ordinary SEND WQE: 4386 * - Control Segment 4387 * - Ethernet Segment, inline inlen_mode bytes 4388 * - Data Segment of pointer type 4389 */ 4390 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4391 loc->wqe_last = wqe; 4392 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4393 MLX5_OPCODE_SEND, olx); 4394 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4395 txq->inlen_mode, 4396 0, olx); 4397 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4398 txq->inlen_mode - vlan; 4399 inlen -= txq->inlen_mode; 4400 mlx5_tx_dseg_ptr(txq, loc, dseg, 4401 dptr, inlen, olx); 4402 /* 4403 * WQE is built, update the loop parameters 4404 * and got to the next packet. 4405 */ 4406 txq->wqe_ci += (ds + 3) / 4; 4407 loc->wqe_free -= (ds + 3) / 4; 4408 /* We have to store mbuf in elts.*/ 4409 assert(MLX5_TXOFF_CONFIG(INLINE)); 4410 txq->elts[txq->elts_head++ & txq->elts_m] = 4411 loc->mbuf; 4412 --loc->elts_free; 4413 } else { 4414 uint8_t *dptr; 4415 unsigned int dlen; 4416 4417 /* 4418 * Partially inlined packet data WQE, we have 4419 * some space in title WQEBB, we can fill it 4420 * with some packet data. It takes one WQEBB, 4421 * it is available, no extra space check: 4422 * - Control Segment, SEND opcode 4423 * - Ethernet Segment, no VLAN insertion 4424 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4425 * - Data Segment, pointer type 4426 * 4427 * We also get here if VLAN insertion is not 4428 * supported by HW, the inline is enabled. 4429 */ 4430 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4431 loc->wqe_last = wqe; 4432 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4433 MLX5_OPCODE_SEND, olx); 4434 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4435 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4436 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4437 /* 4438 * The length check is performed above, by 4439 * comparing with txq->inlen_send. We should 4440 * not get overflow here. 4441 */ 4442 assert(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4443 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4444 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4445 dptr, dlen, olx); 4446 ++txq->wqe_ci; 4447 --loc->wqe_free; 4448 /* We have to store mbuf in elts.*/ 4449 assert(MLX5_TXOFF_CONFIG(INLINE)); 4450 txq->elts[txq->elts_head++ & txq->elts_m] = 4451 loc->mbuf; 4452 --loc->elts_free; 4453 } 4454 #ifdef MLX5_PMD_SOFT_COUNTERS 4455 /* Update sent data bytes counter. */ 4456 txq->stats.obytes += vlan + 4457 rte_pktmbuf_data_len(loc->mbuf); 4458 #endif 4459 } else { 4460 /* 4461 * No inline at all, it means the CPU cycles saving 4462 * is prioritized at configuration, we should not 4463 * copy any packet data to WQE. 4464 * 4465 * SEND WQE, one WQEBB: 4466 * - Control Segment, SEND opcode 4467 * - Ethernet Segment, optional VLAN, no inline 4468 * - Data Segment, pointer type 4469 */ 4470 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4471 loc->wqe_last = wqe; 4472 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4473 MLX5_OPCODE_SEND, olx); 4474 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4475 mlx5_tx_dseg_ptr 4476 (txq, loc, &wqe->dseg[0], 4477 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4478 rte_pktmbuf_data_len(loc->mbuf), olx); 4479 ++txq->wqe_ci; 4480 --loc->wqe_free; 4481 /* 4482 * We should not store mbuf pointer in elts 4483 * if no inlining is configured, this is done 4484 * by calling routine in a batch copy. 4485 */ 4486 assert(!MLX5_TXOFF_CONFIG(INLINE)); 4487 --loc->elts_free; 4488 #ifdef MLX5_PMD_SOFT_COUNTERS 4489 /* Update sent data bytes counter. */ 4490 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4491 if (MLX5_TXOFF_CONFIG(VLAN) && 4492 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4493 txq->stats.obytes += 4494 sizeof(struct rte_vlan_hdr); 4495 #endif 4496 } 4497 ++loc->pkts_sent; 4498 --pkts_n; 4499 /* Request CQE generation if limits are reached. */ 4500 mlx5_tx_request_completion(txq, loc, false, olx); 4501 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4502 return MLX5_TXCMP_CODE_EXIT; 4503 loc->mbuf = *pkts++; 4504 if (pkts_n > 1) 4505 rte_prefetch0(*pkts); 4506 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4507 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4508 return ret; 4509 } 4510 assert(false); 4511 } 4512 4513 static __rte_always_inline enum mlx5_txcmp_code 4514 mlx5_tx_burst_single(struct mlx5_txq_data *restrict txq, 4515 struct rte_mbuf **restrict pkts, 4516 unsigned int pkts_n, 4517 struct mlx5_txq_local *restrict loc, 4518 unsigned int olx) 4519 { 4520 enum mlx5_txcmp_code ret; 4521 4522 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4523 if (ret == MLX5_TXCMP_CODE_SINGLE) 4524 goto ordinary_send; 4525 assert(ret == MLX5_TXCMP_CODE_EMPW); 4526 for (;;) { 4527 /* Optimize for inline/no inline eMPW send. */ 4528 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4529 mlx5_tx_burst_empw_inline 4530 (txq, pkts, pkts_n, loc, olx) : 4531 mlx5_tx_burst_empw_simple 4532 (txq, pkts, pkts_n, loc, olx); 4533 if (ret != MLX5_TXCMP_CODE_SINGLE) 4534 return ret; 4535 /* The resources to send one packet should remain. */ 4536 assert(loc->elts_free && loc->wqe_free); 4537 ordinary_send: 4538 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4539 assert(ret != MLX5_TXCMP_CODE_SINGLE); 4540 if (ret != MLX5_TXCMP_CODE_EMPW) 4541 return ret; 4542 /* The resources to send one packet should remain. */ 4543 assert(loc->elts_free && loc->wqe_free); 4544 } 4545 } 4546 4547 /** 4548 * DPDK Tx callback template. This is configured template 4549 * used to generate routines optimized for specified offload setup. 4550 * One of this generated functions is chosen at SQ configuration 4551 * time. 4552 * 4553 * @param txq 4554 * Generic pointer to TX queue structure. 4555 * @param[in] pkts 4556 * Packets to transmit. 4557 * @param pkts_n 4558 * Number of packets in array. 4559 * @param olx 4560 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 4561 * values. Should be static to take compile time static configuration 4562 * advantages. 4563 * 4564 * @return 4565 * Number of packets successfully transmitted (<= pkts_n). 4566 */ 4567 static __rte_always_inline uint16_t 4568 mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, 4569 struct rte_mbuf **restrict pkts, 4570 uint16_t pkts_n, 4571 unsigned int olx) 4572 { 4573 struct mlx5_txq_local loc; 4574 enum mlx5_txcmp_code ret; 4575 unsigned int part; 4576 4577 assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4578 assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4579 if (unlikely(!pkts_n)) 4580 return 0; 4581 loc.pkts_sent = 0; 4582 loc.pkts_copy = 0; 4583 loc.wqe_last = NULL; 4584 4585 send_loop: 4586 loc.pkts_loop = loc.pkts_sent; 4587 /* 4588 * Check if there are some CQEs, if any: 4589 * - process an encountered errors 4590 * - process the completed WQEs 4591 * - free related mbufs 4592 * - doorbell the NIC about processed CQEs 4593 */ 4594 rte_prefetch0(*(pkts + loc.pkts_sent)); 4595 mlx5_tx_handle_completion(txq, olx); 4596 /* 4597 * Calculate the number of available resources - elts and WQEs. 4598 * There are two possible different scenarios: 4599 * - no data inlining into WQEs, one WQEBB may contains upto 4600 * four packets, in this case elts become scarce resource 4601 * - data inlining into WQEs, one packet may require multiple 4602 * WQEBBs, the WQEs become the limiting factor. 4603 */ 4604 assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4605 loc.elts_free = txq->elts_s - 4606 (uint16_t)(txq->elts_head - txq->elts_tail); 4607 assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4608 loc.wqe_free = txq->wqe_s - 4609 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 4610 if (unlikely(!loc.elts_free || !loc.wqe_free)) 4611 goto burst_exit; 4612 for (;;) { 4613 /* 4614 * Fetch the packet from array. Usually this is 4615 * the first packet in series of multi/single 4616 * segment packets. 4617 */ 4618 loc.mbuf = *(pkts + loc.pkts_sent); 4619 /* Dedicated branch for multi-segment packets. */ 4620 if (MLX5_TXOFF_CONFIG(MULTI) && 4621 unlikely(NB_SEGS(loc.mbuf) > 1)) { 4622 /* 4623 * Multi-segment packet encountered. 4624 * Hardware is able to process it only 4625 * with SEND/TSO opcodes, one packet 4626 * per WQE, do it in dedicated routine. 4627 */ 4628 enter_send_multi: 4629 assert(loc.pkts_sent >= loc.pkts_copy); 4630 part = loc.pkts_sent - loc.pkts_copy; 4631 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4632 /* 4633 * There are some single-segment mbufs not 4634 * stored in elts. The mbufs must be in the 4635 * same order as WQEs, so we must copy the 4636 * mbufs to elts here, before the coming 4637 * multi-segment packet mbufs is appended. 4638 */ 4639 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 4640 part, olx); 4641 loc.pkts_copy = loc.pkts_sent; 4642 } 4643 assert(pkts_n > loc.pkts_sent); 4644 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 4645 if (!MLX5_TXOFF_CONFIG(INLINE)) 4646 loc.pkts_copy = loc.pkts_sent; 4647 /* 4648 * These returned code checks are supposed 4649 * to be optimized out due to routine inlining. 4650 */ 4651 if (ret == MLX5_TXCMP_CODE_EXIT) { 4652 /* 4653 * The routine returns this code when 4654 * all packets are sent or there is no 4655 * enough resources to complete request. 4656 */ 4657 break; 4658 } 4659 if (ret == MLX5_TXCMP_CODE_ERROR) { 4660 /* 4661 * The routine returns this code when 4662 * some error in the incoming packets 4663 * format occurred. 4664 */ 4665 txq->stats.oerrors++; 4666 break; 4667 } 4668 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4669 /* 4670 * The single-segment packet was encountered 4671 * in the array, try to send it with the 4672 * best optimized way, possible engaging eMPW. 4673 */ 4674 goto enter_send_single; 4675 } 4676 if (MLX5_TXOFF_CONFIG(TSO) && 4677 ret == MLX5_TXCMP_CODE_TSO) { 4678 /* 4679 * The single-segment TSO packet was 4680 * encountered in the array. 4681 */ 4682 goto enter_send_tso; 4683 } 4684 /* We must not get here. Something is going wrong. */ 4685 assert(false); 4686 txq->stats.oerrors++; 4687 break; 4688 } 4689 /* Dedicated branch for single-segment TSO packets. */ 4690 if (MLX5_TXOFF_CONFIG(TSO) && 4691 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 4692 /* 4693 * TSO might require special way for inlining 4694 * (dedicated parameters) and is sent with 4695 * MLX5_OPCODE_TSO opcode only, provide this 4696 * in dedicated branch. 4697 */ 4698 enter_send_tso: 4699 assert(NB_SEGS(loc.mbuf) == 1); 4700 assert(pkts_n > loc.pkts_sent); 4701 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 4702 /* 4703 * These returned code checks are supposed 4704 * to be optimized out due to routine inlining. 4705 */ 4706 if (ret == MLX5_TXCMP_CODE_EXIT) 4707 break; 4708 if (ret == MLX5_TXCMP_CODE_ERROR) { 4709 txq->stats.oerrors++; 4710 break; 4711 } 4712 if (ret == MLX5_TXCMP_CODE_SINGLE) 4713 goto enter_send_single; 4714 if (MLX5_TXOFF_CONFIG(MULTI) && 4715 ret == MLX5_TXCMP_CODE_MULTI) { 4716 /* 4717 * The multi-segment packet was 4718 * encountered in the array. 4719 */ 4720 goto enter_send_multi; 4721 } 4722 /* We must not get here. Something is going wrong. */ 4723 assert(false); 4724 txq->stats.oerrors++; 4725 break; 4726 } 4727 /* 4728 * The dedicated branch for the single-segment packets 4729 * without TSO. Often these ones can be sent using 4730 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 4731 * The routine builds the WQEs till it encounters 4732 * the TSO or multi-segment packet (in case if these 4733 * offloads are requested at SQ configuration time). 4734 */ 4735 enter_send_single: 4736 assert(pkts_n > loc.pkts_sent); 4737 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 4738 /* 4739 * These returned code checks are supposed 4740 * to be optimized out due to routine inlining. 4741 */ 4742 if (ret == MLX5_TXCMP_CODE_EXIT) 4743 break; 4744 if (ret == MLX5_TXCMP_CODE_ERROR) { 4745 txq->stats.oerrors++; 4746 break; 4747 } 4748 if (MLX5_TXOFF_CONFIG(MULTI) && 4749 ret == MLX5_TXCMP_CODE_MULTI) { 4750 /* 4751 * The multi-segment packet was 4752 * encountered in the array. 4753 */ 4754 goto enter_send_multi; 4755 } 4756 if (MLX5_TXOFF_CONFIG(TSO) && 4757 ret == MLX5_TXCMP_CODE_TSO) { 4758 /* 4759 * The single-segment TSO packet was 4760 * encountered in the array. 4761 */ 4762 goto enter_send_tso; 4763 } 4764 /* We must not get here. Something is going wrong. */ 4765 assert(false); 4766 txq->stats.oerrors++; 4767 break; 4768 } 4769 /* 4770 * Main Tx loop is completed, do the rest: 4771 * - set completion request if thresholds are reached 4772 * - doorbell the hardware 4773 * - copy the rest of mbufs to elts (if any) 4774 */ 4775 assert(MLX5_TXOFF_CONFIG(INLINE) || loc.pkts_sent >= loc.pkts_copy); 4776 /* Take a shortcut if nothing is sent. */ 4777 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 4778 goto burst_exit; 4779 /* 4780 * Ring QP doorbell immediately after WQE building completion 4781 * to improve latencies. The pure software related data treatment 4782 * can be completed after doorbell. Tx CQEs for this SQ are 4783 * processed in this thread only by the polling. 4784 * 4785 * The rdma core library can map doorbell register in two ways, 4786 * depending on the environment variable "MLX5_SHUT_UP_BF": 4787 * 4788 * - as regular cached memory, the variable is either missing or 4789 * set to zero. This type of mapping may cause the significant 4790 * doorbell register writing latency and requires explicit 4791 * memory write barrier to mitigate this issue and prevent 4792 * write combining. 4793 * 4794 * - as non-cached memory, the variable is present and set to 4795 * not "0" value. This type of mapping may cause performance 4796 * impact under heavy loading conditions but the explicit write 4797 * memory barrier is not required and it may improve core 4798 * performance. 4799 * 4800 * - the legacy behaviour (prior 19.08 release) was to use some 4801 * heuristics to decide whether write memory barrier should 4802 * be performed. This behavior is supported with specifying 4803 * tx_db_nc=2, write barrier is skipped if application 4804 * provides the full recommended burst of packets, it 4805 * supposes the next packets are coming and the write barrier 4806 * will be issued on the next burst (after descriptor writing, 4807 * at least). 4808 */ 4809 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 4810 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 4811 /* Not all of the mbufs may be stored into elts yet. */ 4812 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 4813 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4814 /* 4815 * There are some single-segment mbufs not stored in elts. 4816 * It can be only if the last packet was single-segment. 4817 * The copying is gathered into one place due to it is 4818 * a good opportunity to optimize that with SIMD. 4819 * Unfortunately if inlining is enabled the gaps in 4820 * pointer array may happen due to early freeing of the 4821 * inlined mbufs. 4822 */ 4823 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 4824 loc.pkts_copy = loc.pkts_sent; 4825 } 4826 assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4827 assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4828 if (pkts_n > loc.pkts_sent) { 4829 /* 4830 * If burst size is large there might be no enough CQE 4831 * fetched from completion queue and no enough resources 4832 * freed to send all the packets. 4833 */ 4834 goto send_loop; 4835 } 4836 burst_exit: 4837 #ifdef MLX5_PMD_SOFT_COUNTERS 4838 /* Increment sent packets counter. */ 4839 txq->stats.opackets += loc.pkts_sent; 4840 #endif 4841 return loc.pkts_sent; 4842 } 4843 4844 /* Generate routines with Enhanced Multi-Packet Write support. */ 4845 MLX5_TXOFF_DECL(full_empw, 4846 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 4847 4848 MLX5_TXOFF_DECL(none_empw, 4849 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 4850 4851 MLX5_TXOFF_DECL(md_empw, 4852 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4853 4854 MLX5_TXOFF_DECL(mt_empw, 4855 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4856 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4857 4858 MLX5_TXOFF_DECL(mtsc_empw, 4859 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4860 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4861 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4862 4863 MLX5_TXOFF_DECL(mti_empw, 4864 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4865 MLX5_TXOFF_CONFIG_INLINE | 4866 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4867 4868 MLX5_TXOFF_DECL(mtv_empw, 4869 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4870 MLX5_TXOFF_CONFIG_VLAN | 4871 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4872 4873 MLX5_TXOFF_DECL(mtiv_empw, 4874 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4875 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4876 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4877 4878 MLX5_TXOFF_DECL(sc_empw, 4879 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4880 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4881 4882 MLX5_TXOFF_DECL(sci_empw, 4883 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4884 MLX5_TXOFF_CONFIG_INLINE | 4885 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4886 4887 MLX5_TXOFF_DECL(scv_empw, 4888 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4889 MLX5_TXOFF_CONFIG_VLAN | 4890 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4891 4892 MLX5_TXOFF_DECL(sciv_empw, 4893 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4894 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4895 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4896 4897 MLX5_TXOFF_DECL(i_empw, 4898 MLX5_TXOFF_CONFIG_INLINE | 4899 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4900 4901 MLX5_TXOFF_DECL(v_empw, 4902 MLX5_TXOFF_CONFIG_VLAN | 4903 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4904 4905 MLX5_TXOFF_DECL(iv_empw, 4906 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4907 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4908 4909 /* Generate routines without Enhanced Multi-Packet Write support. */ 4910 MLX5_TXOFF_DECL(full, 4911 MLX5_TXOFF_CONFIG_FULL) 4912 4913 MLX5_TXOFF_DECL(none, 4914 MLX5_TXOFF_CONFIG_NONE) 4915 4916 MLX5_TXOFF_DECL(md, 4917 MLX5_TXOFF_CONFIG_METADATA) 4918 4919 MLX5_TXOFF_DECL(mt, 4920 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4921 MLX5_TXOFF_CONFIG_METADATA) 4922 4923 MLX5_TXOFF_DECL(mtsc, 4924 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4925 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4926 MLX5_TXOFF_CONFIG_METADATA) 4927 4928 MLX5_TXOFF_DECL(mti, 4929 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4930 MLX5_TXOFF_CONFIG_INLINE | 4931 MLX5_TXOFF_CONFIG_METADATA) 4932 4933 4934 MLX5_TXOFF_DECL(mtv, 4935 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4936 MLX5_TXOFF_CONFIG_VLAN | 4937 MLX5_TXOFF_CONFIG_METADATA) 4938 4939 4940 MLX5_TXOFF_DECL(mtiv, 4941 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4942 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4943 MLX5_TXOFF_CONFIG_METADATA) 4944 4945 MLX5_TXOFF_DECL(sc, 4946 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4947 MLX5_TXOFF_CONFIG_METADATA) 4948 4949 MLX5_TXOFF_DECL(sci, 4950 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4951 MLX5_TXOFF_CONFIG_INLINE | 4952 MLX5_TXOFF_CONFIG_METADATA) 4953 4954 4955 MLX5_TXOFF_DECL(scv, 4956 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4957 MLX5_TXOFF_CONFIG_VLAN | 4958 MLX5_TXOFF_CONFIG_METADATA) 4959 4960 4961 MLX5_TXOFF_DECL(sciv, 4962 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4963 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4964 MLX5_TXOFF_CONFIG_METADATA) 4965 4966 MLX5_TXOFF_DECL(i, 4967 MLX5_TXOFF_CONFIG_INLINE | 4968 MLX5_TXOFF_CONFIG_METADATA) 4969 4970 MLX5_TXOFF_DECL(v, 4971 MLX5_TXOFF_CONFIG_VLAN | 4972 MLX5_TXOFF_CONFIG_METADATA) 4973 4974 MLX5_TXOFF_DECL(iv, 4975 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4976 MLX5_TXOFF_CONFIG_METADATA) 4977 4978 /* 4979 * Generate routines with Legacy Multi-Packet Write support. 4980 * This mode is supported by ConnectX-4LX only and imposes 4981 * offload limitations, not supported: 4982 * - ACL/Flows (metadata are becoming meaningless) 4983 * - WQE Inline headers 4984 * - SRIOV (E-Switch offloads) 4985 * - VLAN insertion 4986 * - tunnel encapsulation/decapsulation 4987 * - TSO 4988 */ 4989 MLX5_TXOFF_DECL(none_mpw, 4990 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 4991 MLX5_TXOFF_CONFIG_MPW) 4992 4993 MLX5_TXOFF_DECL(mci_mpw, 4994 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 4995 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 4996 MLX5_TXOFF_CONFIG_MPW) 4997 4998 MLX5_TXOFF_DECL(mc_mpw, 4999 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5000 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5001 5002 MLX5_TXOFF_DECL(i_mpw, 5003 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5004 MLX5_TXOFF_CONFIG_MPW) 5005 5006 /* 5007 * Array of declared and compiled Tx burst function and corresponding 5008 * supported offloads set. The array is used to select the Tx burst 5009 * function for specified offloads set at Tx queue configuration time. 5010 */ 5011 const struct { 5012 eth_tx_burst_t func; 5013 unsigned int olx; 5014 } txoff_func[] = { 5015 MLX5_TXOFF_INFO(full_empw, 5016 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5017 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5018 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5019 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5020 5021 MLX5_TXOFF_INFO(none_empw, 5022 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5023 5024 MLX5_TXOFF_INFO(md_empw, 5025 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5026 5027 MLX5_TXOFF_INFO(mt_empw, 5028 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5029 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5030 5031 MLX5_TXOFF_INFO(mtsc_empw, 5032 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5033 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5034 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5035 5036 MLX5_TXOFF_INFO(mti_empw, 5037 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5038 MLX5_TXOFF_CONFIG_INLINE | 5039 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5040 5041 MLX5_TXOFF_INFO(mtv_empw, 5042 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5043 MLX5_TXOFF_CONFIG_VLAN | 5044 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5045 5046 MLX5_TXOFF_INFO(mtiv_empw, 5047 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5048 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5049 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5050 5051 MLX5_TXOFF_INFO(sc_empw, 5052 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5053 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5054 5055 MLX5_TXOFF_INFO(sci_empw, 5056 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5057 MLX5_TXOFF_CONFIG_INLINE | 5058 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5059 5060 MLX5_TXOFF_INFO(scv_empw, 5061 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5062 MLX5_TXOFF_CONFIG_VLAN | 5063 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5064 5065 MLX5_TXOFF_INFO(sciv_empw, 5066 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5067 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5068 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5069 5070 MLX5_TXOFF_INFO(i_empw, 5071 MLX5_TXOFF_CONFIG_INLINE | 5072 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5073 5074 MLX5_TXOFF_INFO(v_empw, 5075 MLX5_TXOFF_CONFIG_VLAN | 5076 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5077 5078 MLX5_TXOFF_INFO(iv_empw, 5079 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5080 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5081 5082 MLX5_TXOFF_INFO(full, 5083 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5084 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5085 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5086 MLX5_TXOFF_CONFIG_METADATA) 5087 5088 MLX5_TXOFF_INFO(none, 5089 MLX5_TXOFF_CONFIG_NONE) 5090 5091 MLX5_TXOFF_INFO(md, 5092 MLX5_TXOFF_CONFIG_METADATA) 5093 5094 MLX5_TXOFF_INFO(mt, 5095 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5096 MLX5_TXOFF_CONFIG_METADATA) 5097 5098 MLX5_TXOFF_INFO(mtsc, 5099 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5100 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5101 MLX5_TXOFF_CONFIG_METADATA) 5102 5103 MLX5_TXOFF_INFO(mti, 5104 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5105 MLX5_TXOFF_CONFIG_INLINE | 5106 MLX5_TXOFF_CONFIG_METADATA) 5107 5108 MLX5_TXOFF_INFO(mtv, 5109 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5110 MLX5_TXOFF_CONFIG_VLAN | 5111 MLX5_TXOFF_CONFIG_METADATA) 5112 5113 MLX5_TXOFF_INFO(mtiv, 5114 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5115 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5116 MLX5_TXOFF_CONFIG_METADATA) 5117 5118 MLX5_TXOFF_INFO(sc, 5119 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5120 MLX5_TXOFF_CONFIG_METADATA) 5121 5122 MLX5_TXOFF_INFO(sci, 5123 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5124 MLX5_TXOFF_CONFIG_INLINE | 5125 MLX5_TXOFF_CONFIG_METADATA) 5126 5127 MLX5_TXOFF_INFO(scv, 5128 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5129 MLX5_TXOFF_CONFIG_VLAN | 5130 MLX5_TXOFF_CONFIG_METADATA) 5131 5132 MLX5_TXOFF_INFO(sciv, 5133 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5134 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5135 MLX5_TXOFF_CONFIG_METADATA) 5136 5137 MLX5_TXOFF_INFO(i, 5138 MLX5_TXOFF_CONFIG_INLINE | 5139 MLX5_TXOFF_CONFIG_METADATA) 5140 5141 MLX5_TXOFF_INFO(v, 5142 MLX5_TXOFF_CONFIG_VLAN | 5143 MLX5_TXOFF_CONFIG_METADATA) 5144 5145 MLX5_TXOFF_INFO(iv, 5146 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5147 MLX5_TXOFF_CONFIG_METADATA) 5148 5149 MLX5_TXOFF_INFO(none_mpw, 5150 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5151 MLX5_TXOFF_CONFIG_MPW) 5152 5153 MLX5_TXOFF_INFO(mci_mpw, 5154 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5155 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5156 MLX5_TXOFF_CONFIG_MPW) 5157 5158 MLX5_TXOFF_INFO(mc_mpw, 5159 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5160 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5161 5162 MLX5_TXOFF_INFO(i_mpw, 5163 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5164 MLX5_TXOFF_CONFIG_MPW) 5165 }; 5166 5167 /** 5168 * Configure the Tx function to use. The routine checks configured 5169 * Tx offloads for the device and selects appropriate Tx burst 5170 * routine. There are multiple Tx burst routines compiled from 5171 * the same template in the most optimal way for the dedicated 5172 * Tx offloads set. 5173 * 5174 * @param dev 5175 * Pointer to private data structure. 5176 * 5177 * @return 5178 * Pointer to selected Tx burst function. 5179 */ 5180 eth_tx_burst_t 5181 mlx5_select_tx_function(struct rte_eth_dev *dev) 5182 { 5183 struct mlx5_priv *priv = dev->data->dev_private; 5184 struct mlx5_dev_config *config = &priv->config; 5185 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5186 unsigned int diff = 0, olx = 0, i, m; 5187 5188 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 5189 MLX5_DSEG_MAX, "invalid WQE max size"); 5190 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 5191 "invalid WQE Control Segment size"); 5192 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 5193 "invalid WQE Ethernet Segment size"); 5194 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 5195 "invalid WQE Data Segment size"); 5196 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 5197 "invalid WQE size"); 5198 assert(priv); 5199 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5200 /* We should support Multi-Segment Packets. */ 5201 olx |= MLX5_TXOFF_CONFIG_MULTI; 5202 } 5203 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5204 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5205 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5206 DEV_TX_OFFLOAD_IP_TNL_TSO | 5207 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5208 /* We should support TCP Send Offload. */ 5209 olx |= MLX5_TXOFF_CONFIG_TSO; 5210 } 5211 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5212 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5213 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5214 /* We should support Software Parser for Tunnels. */ 5215 olx |= MLX5_TXOFF_CONFIG_SWP; 5216 } 5217 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5218 DEV_TX_OFFLOAD_UDP_CKSUM | 5219 DEV_TX_OFFLOAD_TCP_CKSUM | 5220 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5221 /* We should support IP/TCP/UDP Checksums. */ 5222 olx |= MLX5_TXOFF_CONFIG_CSUM; 5223 } 5224 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5225 /* We should support VLAN insertion. */ 5226 olx |= MLX5_TXOFF_CONFIG_VLAN; 5227 } 5228 if (priv->txqs_n && (*priv->txqs)[0]) { 5229 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5230 5231 if (txd->inlen_send) { 5232 /* 5233 * Check the data inline requirements. Data inline 5234 * is enabled on per device basis, we can check 5235 * the first Tx queue only. 5236 * 5237 * If device does not support VLAN insertion in WQE 5238 * and some queues are requested to perform VLAN 5239 * insertion offload than inline must be enabled. 5240 */ 5241 olx |= MLX5_TXOFF_CONFIG_INLINE; 5242 } 5243 } 5244 if (config->mps == MLX5_MPW_ENHANCED && 5245 config->txq_inline_min <= 0) { 5246 /* 5247 * The NIC supports Enhanced Multi-Packet Write 5248 * and does not require minimal inline data. 5249 */ 5250 olx |= MLX5_TXOFF_CONFIG_EMPW; 5251 } 5252 if (rte_flow_dynf_metadata_avail()) { 5253 /* We should support Flow metadata. */ 5254 olx |= MLX5_TXOFF_CONFIG_METADATA; 5255 } 5256 if (config->mps == MLX5_MPW) { 5257 /* 5258 * The NIC supports Legacy Multi-Packet Write. 5259 * The MLX5_TXOFF_CONFIG_MPW controls the 5260 * descriptor building method in combination 5261 * with MLX5_TXOFF_CONFIG_EMPW. 5262 */ 5263 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5264 MLX5_TXOFF_CONFIG_SWP | 5265 MLX5_TXOFF_CONFIG_VLAN | 5266 MLX5_TXOFF_CONFIG_METADATA))) 5267 olx |= MLX5_TXOFF_CONFIG_EMPW | 5268 MLX5_TXOFF_CONFIG_MPW; 5269 } 5270 /* 5271 * Scan the routines table to find the minimal 5272 * satisfying routine with requested offloads. 5273 */ 5274 m = RTE_DIM(txoff_func); 5275 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5276 unsigned int tmp; 5277 5278 tmp = txoff_func[i].olx; 5279 if (tmp == olx) { 5280 /* Meets requested offloads exactly.*/ 5281 m = i; 5282 break; 5283 } 5284 if ((tmp & olx) != olx) { 5285 /* Does not meet requested offloads at all. */ 5286 continue; 5287 } 5288 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5289 /* Do not enable eMPW if not configured. */ 5290 continue; 5291 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5292 /* Do not enable inlining if not configured. */ 5293 continue; 5294 /* 5295 * Some routine meets the requirements. 5296 * Check whether it has minimal amount 5297 * of not requested offloads. 5298 */ 5299 tmp = __builtin_popcountl(tmp & ~olx); 5300 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5301 /* First or better match, save and continue. */ 5302 m = i; 5303 diff = tmp; 5304 continue; 5305 } 5306 if (tmp == diff) { 5307 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5308 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5309 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5310 /* Lighter not requested offload. */ 5311 m = i; 5312 } 5313 } 5314 } 5315 if (m >= RTE_DIM(txoff_func)) { 5316 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5317 " for requested offloads %04X", 5318 dev->data->port_id, olx); 5319 return NULL; 5320 } 5321 DRV_LOG(DEBUG, "port %u has selected Tx function" 5322 " supporting offloads %04X/%04X", 5323 dev->data->port_id, olx, txoff_func[m].olx); 5324 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5325 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5326 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5327 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5328 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5329 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5330 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5331 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5332 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5333 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5334 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5335 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5336 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5337 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5338 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5339 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5340 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5341 else 5342 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5343 } 5344 return txoff_func[m].func; 5345 } 5346