1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_glue.h> 20 #include <mlx5_devx_cmds.h> 21 #include <mlx5_prm.h> 22 #include <mlx5_common.h> 23 24 #include "mlx5_defs.h" 25 #include "mlx5.h" 26 #include "mlx5_mr.h" 27 #include "mlx5_utils.h" 28 #include "mlx5_rxtx.h" 29 #include "mlx5_autoconf.h" 30 31 /* TX burst subroutines return codes. */ 32 enum mlx5_txcmp_code { 33 MLX5_TXCMP_CODE_EXIT = 0, 34 MLX5_TXCMP_CODE_ERROR, 35 MLX5_TXCMP_CODE_SINGLE, 36 MLX5_TXCMP_CODE_MULTI, 37 MLX5_TXCMP_CODE_TSO, 38 MLX5_TXCMP_CODE_EMPW, 39 }; 40 41 /* 42 * These defines are used to configure Tx burst routine option set 43 * supported at compile time. The not specified options are optimized out 44 * out due to if conditions can be explicitly calculated at compile time. 45 * The offloads with bigger runtime check (require more CPU cycles to 46 * skip) overhead should have the bigger index - this is needed to 47 * select the better matching routine function if no exact match and 48 * some offloads are not actually requested. 49 */ 50 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 51 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 52 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 53 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 54 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 55 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 56 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 57 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 58 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 59 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 60 61 /* The most common offloads groups. */ 62 #define MLX5_TXOFF_CONFIG_NONE 0 63 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 64 MLX5_TXOFF_CONFIG_TSO | \ 65 MLX5_TXOFF_CONFIG_SWP | \ 66 MLX5_TXOFF_CONFIG_CSUM | \ 67 MLX5_TXOFF_CONFIG_INLINE | \ 68 MLX5_TXOFF_CONFIG_VLAN | \ 69 MLX5_TXOFF_CONFIG_METADATA) 70 71 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 72 73 #define MLX5_TXOFF_DECL(func, olx) \ 74 static uint16_t mlx5_tx_burst_##func(void *txq, \ 75 struct rte_mbuf **pkts, \ 76 uint16_t pkts_n) \ 77 { \ 78 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 79 pkts, pkts_n, (olx)); \ 80 } 81 82 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 83 84 static __rte_always_inline uint32_t 85 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 86 87 static __rte_always_inline int 88 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 89 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 90 91 static __rte_always_inline uint32_t 92 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 93 94 static __rte_always_inline void 95 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 96 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); 97 98 static __rte_always_inline void 99 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 100 const unsigned int strd_n); 101 102 static int 103 mlx5_queue_state_modify(struct rte_eth_dev *dev, 104 struct mlx5_mp_arg_queue_state_modify *sm); 105 106 static inline void 107 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 108 volatile struct mlx5_cqe *__rte_restrict cqe, 109 uint32_t phcsum); 110 111 static inline void 112 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 113 volatile struct mlx5_cqe *__rte_restrict cqe, 114 uint32_t len); 115 116 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 117 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 118 }; 119 120 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 121 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 122 123 uint64_t rte_net_mlx5_dynf_inline_mask; 124 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 125 126 /** 127 * Build a table to translate Rx completion flags to packet type. 128 * 129 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 130 */ 131 void 132 mlx5_set_ptype_table(void) 133 { 134 unsigned int i; 135 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 136 137 /* Last entry must not be overwritten, reserved for errored packet. */ 138 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 139 (*p)[i] = RTE_PTYPE_UNKNOWN; 140 /* 141 * The index to the array should have: 142 * bit[1:0] = l3_hdr_type 143 * bit[4:2] = l4_hdr_type 144 * bit[5] = ip_frag 145 * bit[6] = tunneled 146 * bit[7] = outer_l3_type 147 */ 148 /* L2 */ 149 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 150 /* L3 */ 151 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 152 RTE_PTYPE_L4_NONFRAG; 153 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 154 RTE_PTYPE_L4_NONFRAG; 155 /* Fragmented */ 156 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 157 RTE_PTYPE_L4_FRAG; 158 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 159 RTE_PTYPE_L4_FRAG; 160 /* TCP */ 161 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 162 RTE_PTYPE_L4_TCP; 163 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 164 RTE_PTYPE_L4_TCP; 165 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 166 RTE_PTYPE_L4_TCP; 167 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 168 RTE_PTYPE_L4_TCP; 169 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 170 RTE_PTYPE_L4_TCP; 171 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 172 RTE_PTYPE_L4_TCP; 173 /* UDP */ 174 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 175 RTE_PTYPE_L4_UDP; 176 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 177 RTE_PTYPE_L4_UDP; 178 /* Repeat with outer_l3_type being set. Just in case. */ 179 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 180 RTE_PTYPE_L4_NONFRAG; 181 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 182 RTE_PTYPE_L4_NONFRAG; 183 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 184 RTE_PTYPE_L4_FRAG; 185 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 186 RTE_PTYPE_L4_FRAG; 187 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 188 RTE_PTYPE_L4_TCP; 189 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 190 RTE_PTYPE_L4_TCP; 191 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 192 RTE_PTYPE_L4_TCP; 193 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 194 RTE_PTYPE_L4_TCP; 195 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 196 RTE_PTYPE_L4_TCP; 197 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 198 RTE_PTYPE_L4_TCP; 199 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 200 RTE_PTYPE_L4_UDP; 201 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 202 RTE_PTYPE_L4_UDP; 203 /* Tunneled - L3 */ 204 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 205 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 206 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 207 RTE_PTYPE_INNER_L4_NONFRAG; 208 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 209 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 210 RTE_PTYPE_INNER_L4_NONFRAG; 211 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 212 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 213 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 214 RTE_PTYPE_INNER_L4_NONFRAG; 215 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 216 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 217 RTE_PTYPE_INNER_L4_NONFRAG; 218 /* Tunneled - Fragmented */ 219 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 220 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 221 RTE_PTYPE_INNER_L4_FRAG; 222 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 223 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 224 RTE_PTYPE_INNER_L4_FRAG; 225 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 226 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 227 RTE_PTYPE_INNER_L4_FRAG; 228 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 229 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 230 RTE_PTYPE_INNER_L4_FRAG; 231 /* Tunneled - TCP */ 232 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 233 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 234 RTE_PTYPE_INNER_L4_TCP; 235 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 236 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 237 RTE_PTYPE_INNER_L4_TCP; 238 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 239 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 240 RTE_PTYPE_INNER_L4_TCP; 241 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 242 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 243 RTE_PTYPE_INNER_L4_TCP; 244 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 245 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 246 RTE_PTYPE_INNER_L4_TCP; 247 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 248 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 249 RTE_PTYPE_INNER_L4_TCP; 250 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 251 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 252 RTE_PTYPE_INNER_L4_TCP; 253 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 254 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 255 RTE_PTYPE_INNER_L4_TCP; 256 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 257 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 258 RTE_PTYPE_INNER_L4_TCP; 259 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 260 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L4_TCP; 262 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 263 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L4_TCP; 265 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 266 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L4_TCP; 268 /* Tunneled - UDP */ 269 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 270 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L4_UDP; 272 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 273 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L4_UDP; 275 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 276 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L4_UDP; 278 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 279 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 280 RTE_PTYPE_INNER_L4_UDP; 281 } 282 283 /** 284 * Build a table to translate packet to checksum type of Verbs. 285 */ 286 void 287 mlx5_set_cksum_table(void) 288 { 289 unsigned int i; 290 uint8_t v; 291 292 /* 293 * The index should have: 294 * bit[0] = PKT_TX_TCP_SEG 295 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 296 * bit[4] = PKT_TX_IP_CKSUM 297 * bit[8] = PKT_TX_OUTER_IP_CKSUM 298 * bit[9] = tunnel 299 */ 300 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 301 v = 0; 302 if (i & (1 << 9)) { 303 /* Tunneled packet. */ 304 if (i & (1 << 8)) /* Outer IP. */ 305 v |= MLX5_ETH_WQE_L3_CSUM; 306 if (i & (1 << 4)) /* Inner IP. */ 307 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 308 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 309 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 310 } else { 311 /* No tunnel. */ 312 if (i & (1 << 4)) /* IP. */ 313 v |= MLX5_ETH_WQE_L3_CSUM; 314 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 315 v |= MLX5_ETH_WQE_L4_CSUM; 316 } 317 mlx5_cksum_table[i] = v; 318 } 319 } 320 321 /** 322 * Build a table to translate packet type of mbuf to SWP type of Verbs. 323 */ 324 void 325 mlx5_set_swp_types_table(void) 326 { 327 unsigned int i; 328 uint8_t v; 329 330 /* 331 * The index should have: 332 * bit[0:1] = PKT_TX_L4_MASK 333 * bit[4] = PKT_TX_IPV6 334 * bit[8] = PKT_TX_OUTER_IPV6 335 * bit[9] = PKT_TX_OUTER_UDP 336 */ 337 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 338 v = 0; 339 if (i & (1 << 8)) 340 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 341 if (i & (1 << 9)) 342 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 343 if (i & (1 << 4)) 344 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 345 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 346 v |= MLX5_ETH_WQE_L4_INNER_UDP; 347 mlx5_swp_types_table[i] = v; 348 } 349 } 350 351 /** 352 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 353 * Flags must be preliminary initialized to zero. 354 * 355 * @param loc 356 * Pointer to burst routine local context. 357 * @param swp_flags 358 * Pointer to store Software Parser flags 359 * @param olx 360 * Configured Tx offloads mask. It is fully defined at 361 * compile time and may be used for optimization. 362 * 363 * @return 364 * Software Parser offsets packed in dword. 365 * Software Parser flags are set by pointer. 366 */ 367 static __rte_always_inline uint32_t 368 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 369 uint8_t *swp_flags, 370 unsigned int olx) 371 { 372 uint64_t ol, tunnel; 373 unsigned int idx, off; 374 uint32_t set; 375 376 if (!MLX5_TXOFF_CONFIG(SWP)) 377 return 0; 378 ol = loc->mbuf->ol_flags; 379 tunnel = ol & PKT_TX_TUNNEL_MASK; 380 /* 381 * Check whether Software Parser is required. 382 * Only customized tunnels may ask for. 383 */ 384 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 385 return 0; 386 /* 387 * The index should have: 388 * bit[0:1] = PKT_TX_L4_MASK 389 * bit[4] = PKT_TX_IPV6 390 * bit[8] = PKT_TX_OUTER_IPV6 391 * bit[9] = PKT_TX_OUTER_UDP 392 */ 393 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 394 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 395 *swp_flags = mlx5_swp_types_table[idx]; 396 /* 397 * Set offsets for SW parser. Since ConnectX-5, SW parser just 398 * complements HW parser. SW parser starts to engage only if HW parser 399 * can't reach a header. For the older devices, HW parser will not kick 400 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 401 * should be set regardless of HW offload. 402 */ 403 off = loc->mbuf->outer_l2_len; 404 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 405 off += sizeof(struct rte_vlan_hdr); 406 set = (off >> 1) << 8; /* Outer L3 offset. */ 407 off += loc->mbuf->outer_l3_len; 408 if (tunnel == PKT_TX_TUNNEL_UDP) 409 set |= off >> 1; /* Outer L4 offset. */ 410 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 411 const uint64_t csum = ol & PKT_TX_L4_MASK; 412 off += loc->mbuf->l2_len; 413 set |= (off >> 1) << 24; /* Inner L3 offset. */ 414 if (csum == PKT_TX_TCP_CKSUM || 415 csum == PKT_TX_UDP_CKSUM || 416 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 417 off += loc->mbuf->l3_len; 418 set |= (off >> 1) << 16; /* Inner L4 offset. */ 419 } 420 } 421 set = rte_cpu_to_le_32(set); 422 return set; 423 } 424 425 /** 426 * Convert the Checksum offloads to Verbs. 427 * 428 * @param buf 429 * Pointer to the mbuf. 430 * 431 * @return 432 * Converted checksum flags. 433 */ 434 static __rte_always_inline uint8_t 435 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 436 { 437 uint32_t idx; 438 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 439 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 440 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 441 442 /* 443 * The index should have: 444 * bit[0] = PKT_TX_TCP_SEG 445 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 446 * bit[4] = PKT_TX_IP_CKSUM 447 * bit[8] = PKT_TX_OUTER_IP_CKSUM 448 * bit[9] = tunnel 449 */ 450 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 451 return mlx5_cksum_table[idx]; 452 } 453 454 /** 455 * Internal function to compute the number of used descriptors in an RX queue 456 * 457 * @param rxq 458 * The Rx queue. 459 * 460 * @return 461 * The number of used rx descriptor. 462 */ 463 static uint32_t 464 rx_queue_count(struct mlx5_rxq_data *rxq) 465 { 466 struct rxq_zip *zip = &rxq->zip; 467 volatile struct mlx5_cqe *cqe; 468 const unsigned int cqe_n = (1 << rxq->cqe_n); 469 const unsigned int cqe_cnt = cqe_n - 1; 470 unsigned int cq_ci; 471 unsigned int used; 472 473 /* if we are processing a compressed cqe */ 474 if (zip->ai) { 475 used = zip->cqe_cnt - zip->ca; 476 cq_ci = zip->cq_ci; 477 } else { 478 used = 0; 479 cq_ci = rxq->cq_ci; 480 } 481 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 482 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 483 int8_t op_own; 484 unsigned int n; 485 486 op_own = cqe->op_own; 487 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 488 n = rte_be_to_cpu_32(cqe->byte_cnt); 489 else 490 n = 1; 491 cq_ci += n; 492 used += n; 493 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 494 } 495 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 496 return used; 497 } 498 499 /** 500 * DPDK callback to check the status of a rx descriptor. 501 * 502 * @param rx_queue 503 * The Rx queue. 504 * @param[in] offset 505 * The index of the descriptor in the ring. 506 * 507 * @return 508 * The status of the tx descriptor. 509 */ 510 int 511 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 512 { 513 struct mlx5_rxq_data *rxq = rx_queue; 514 struct mlx5_rxq_ctrl *rxq_ctrl = 515 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 516 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 517 518 if (dev->rx_pkt_burst != mlx5_rx_burst) { 519 rte_errno = ENOTSUP; 520 return -rte_errno; 521 } 522 if (offset >= (1 << rxq->elts_n)) { 523 rte_errno = EINVAL; 524 return -rte_errno; 525 } 526 if (offset < rx_queue_count(rxq)) 527 return RTE_ETH_RX_DESC_DONE; 528 return RTE_ETH_RX_DESC_AVAIL; 529 } 530 531 /** 532 * DPDK callback to get the RX queue information 533 * 534 * @param dev 535 * Pointer to the device structure. 536 * 537 * @param rx_queue_id 538 * Rx queue identificator. 539 * 540 * @param qinfo 541 * Pointer to the RX queue information structure. 542 * 543 * @return 544 * None. 545 */ 546 547 void 548 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 549 struct rte_eth_rxq_info *qinfo) 550 { 551 struct mlx5_priv *priv = dev->data->dev_private; 552 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; 553 struct mlx5_rxq_ctrl *rxq_ctrl = 554 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 555 556 if (!rxq) 557 return; 558 qinfo->mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 559 rxq->mprq_mp : rxq->mp; 560 qinfo->conf.rx_thresh.pthresh = 0; 561 qinfo->conf.rx_thresh.hthresh = 0; 562 qinfo->conf.rx_thresh.wthresh = 0; 563 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 564 qinfo->conf.rx_drop_en = 1; 565 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; 566 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 567 qinfo->scattered_rx = dev->data->scattered_rx; 568 qinfo->nb_desc = 1 << rxq->elts_n; 569 } 570 571 /** 572 * DPDK callback to get the RX packet burst mode information 573 * 574 * @param dev 575 * Pointer to the device structure. 576 * 577 * @param rx_queue_id 578 * Rx queue identificatior. 579 * 580 * @param mode 581 * Pointer to the burts mode information. 582 * 583 * @return 584 * 0 as success, -EINVAL as failure. 585 */ 586 587 int 588 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 589 uint16_t rx_queue_id __rte_unused, 590 struct rte_eth_burst_mode *mode) 591 { 592 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 593 594 if (pkt_burst == mlx5_rx_burst) { 595 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 596 } else if (pkt_burst == mlx5_rx_burst_mprq) { 597 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 598 } else if (pkt_burst == mlx5_rx_burst_vec) { 599 #if defined RTE_ARCH_X86_64 600 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 601 #elif defined RTE_ARCH_ARM64 602 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 603 #elif defined RTE_ARCH_PPC_64 604 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 605 #else 606 return -EINVAL; 607 #endif 608 } else { 609 return -EINVAL; 610 } 611 return 0; 612 } 613 614 /** 615 * DPDK callback to get the number of used descriptors in a RX queue 616 * 617 * @param dev 618 * Pointer to the device structure. 619 * 620 * @param rx_queue_id 621 * The Rx queue. 622 * 623 * @return 624 * The number of used rx descriptor. 625 * -EINVAL if the queue is invalid 626 */ 627 uint32_t 628 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 629 { 630 struct mlx5_priv *priv = dev->data->dev_private; 631 struct mlx5_rxq_data *rxq; 632 633 if (dev->rx_pkt_burst != mlx5_rx_burst) { 634 rte_errno = ENOTSUP; 635 return -rte_errno; 636 } 637 rxq = (*priv->rxqs)[rx_queue_id]; 638 if (!rxq) { 639 rte_errno = EINVAL; 640 return -rte_errno; 641 } 642 return rx_queue_count(rxq); 643 } 644 645 #define MLX5_SYSTEM_LOG_DIR "/var/log" 646 /** 647 * Dump debug information to log file. 648 * 649 * @param fname 650 * The file name. 651 * @param hex_title 652 * If not NULL this string is printed as a header to the output 653 * and the output will be in hexadecimal view. 654 * @param buf 655 * This is the buffer address to print out. 656 * @param len 657 * The number of bytes to dump out. 658 */ 659 void 660 mlx5_dump_debug_information(const char *fname, const char *hex_title, 661 const void *buf, unsigned int hex_len) 662 { 663 FILE *fd; 664 665 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 666 fd = fopen(path, "a+"); 667 if (!fd) { 668 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 669 MKSTR(path2, "./%s", fname); 670 fd = fopen(path2, "a+"); 671 if (!fd) { 672 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 673 return; 674 } 675 DRV_LOG(INFO, "New debug dump in file %s", path2); 676 } else { 677 DRV_LOG(INFO, "New debug dump in file %s", path); 678 } 679 if (hex_title) 680 rte_hexdump(fd, hex_title, buf, hex_len); 681 else 682 fprintf(fd, "%s", (const char *)buf); 683 fprintf(fd, "\n\n\n"); 684 fclose(fd); 685 } 686 687 /** 688 * Move QP from error state to running state and initialize indexes. 689 * 690 * @param txq_ctrl 691 * Pointer to TX queue control structure. 692 * 693 * @return 694 * 0 on success, else -1. 695 */ 696 static int 697 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 698 { 699 struct mlx5_mp_arg_queue_state_modify sm = { 700 .is_wq = 0, 701 .queue_id = txq_ctrl->txq.idx, 702 }; 703 704 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 705 return -1; 706 txq_ctrl->txq.wqe_ci = 0; 707 txq_ctrl->txq.wqe_pi = 0; 708 txq_ctrl->txq.elts_comp = 0; 709 return 0; 710 } 711 712 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 713 static int 714 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 715 { 716 static const uint8_t magic[] = "seen"; 717 int ret = 1; 718 unsigned int i; 719 720 for (i = 0; i < sizeof(magic); ++i) 721 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 722 ret = 0; 723 err_cqe->rsvd1[i] = magic[i]; 724 } 725 return ret; 726 } 727 728 /** 729 * Handle error CQE. 730 * 731 * @param txq 732 * Pointer to TX queue structure. 733 * @param error_cqe 734 * Pointer to the error CQE. 735 * 736 * @return 737 * Negative value if queue recovery failed, otherwise 738 * the error completion entry is handled successfully. 739 */ 740 static int 741 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, 742 volatile struct mlx5_err_cqe *err_cqe) 743 { 744 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 745 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 746 struct mlx5_txq_ctrl *txq_ctrl = 747 container_of(txq, struct mlx5_txq_ctrl, txq); 748 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 749 int seen = check_err_cqe_seen(err_cqe); 750 751 if (!seen && txq_ctrl->dump_file_n < 752 txq_ctrl->priv->config.max_dump_files_num) { 753 MKSTR(err_str, "Unexpected CQE error syndrome " 754 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 755 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 756 txq->cqe_s, txq->qp_num_8s >> 8, 757 rte_be_to_cpu_16(err_cqe->wqe_counter), 758 txq->wqe_ci, txq->cq_ci); 759 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 760 PORT_ID(txq_ctrl->priv), txq->idx, 761 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 762 mlx5_dump_debug_information(name, NULL, err_str, 0); 763 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 764 (const void *)((uintptr_t) 765 txq->cqes), 766 sizeof(*err_cqe) * 767 (1 << txq->cqe_n)); 768 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 769 (const void *)((uintptr_t) 770 txq->wqes), 771 MLX5_WQE_SIZE * 772 (1 << txq->wqe_n)); 773 txq_ctrl->dump_file_n++; 774 } 775 if (!seen) 776 /* 777 * Count errors in WQEs units. 778 * Later it can be improved to count error packets, 779 * for example, by SQ parsing to find how much packets 780 * should be counted for each WQE. 781 */ 782 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 783 new_wqe_pi) & wqe_m; 784 if (tx_recover_qp(txq_ctrl)) { 785 /* Recovering failed - retry later on the same WQE. */ 786 return -1; 787 } 788 /* Release all the remaining buffers. */ 789 txq_free_elts(txq_ctrl); 790 } 791 return 0; 792 } 793 794 /** 795 * Translate RX completion flags to packet type. 796 * 797 * @param[in] rxq 798 * Pointer to RX queue structure. 799 * @param[in] cqe 800 * Pointer to CQE. 801 * 802 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 803 * 804 * @return 805 * Packet type for struct rte_mbuf. 806 */ 807 static inline uint32_t 808 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 809 { 810 uint8_t idx; 811 uint8_t pinfo = cqe->pkt_info; 812 uint16_t ptype = cqe->hdr_type_etc; 813 814 /* 815 * The index to the array should have: 816 * bit[1:0] = l3_hdr_type 817 * bit[4:2] = l4_hdr_type 818 * bit[5] = ip_frag 819 * bit[6] = tunneled 820 * bit[7] = outer_l3_type 821 */ 822 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 823 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 824 } 825 826 /** 827 * Initialize Rx WQ and indexes. 828 * 829 * @param[in] rxq 830 * Pointer to RX queue structure. 831 */ 832 void 833 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 834 { 835 const unsigned int wqe_n = 1 << rxq->elts_n; 836 unsigned int i; 837 838 for (i = 0; (i != wqe_n); ++i) { 839 volatile struct mlx5_wqe_data_seg *scat; 840 uintptr_t addr; 841 uint32_t byte_count; 842 843 if (mlx5_rxq_mprq_enabled(rxq)) { 844 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 845 846 scat = &((volatile struct mlx5_wqe_mprq *) 847 rxq->wqes)[i].dseg; 848 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 849 1 << rxq->strd_num_n); 850 byte_count = (1 << rxq->strd_sz_n) * 851 (1 << rxq->strd_num_n); 852 } else { 853 struct rte_mbuf *buf = (*rxq->elts)[i]; 854 855 scat = &((volatile struct mlx5_wqe_data_seg *) 856 rxq->wqes)[i]; 857 addr = rte_pktmbuf_mtod(buf, uintptr_t); 858 byte_count = DATA_LEN(buf); 859 } 860 /* scat->addr must be able to store a pointer. */ 861 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 862 *scat = (struct mlx5_wqe_data_seg){ 863 .addr = rte_cpu_to_be_64(addr), 864 .byte_count = rte_cpu_to_be_32(byte_count), 865 .lkey = mlx5_rx_addr2mr(rxq, addr), 866 }; 867 } 868 rxq->consumed_strd = 0; 869 rxq->decompressed = 0; 870 rxq->rq_pi = 0; 871 rxq->zip = (struct rxq_zip){ 872 .ai = 0, 873 }; 874 /* Update doorbell counter. */ 875 rxq->rq_ci = wqe_n >> rxq->sges_n; 876 rte_cio_wmb(); 877 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 878 } 879 880 /** 881 * Modify a Verbs/DevX queue state. 882 * This must be called from the primary process. 883 * 884 * @param dev 885 * Pointer to Ethernet device. 886 * @param sm 887 * State modify request parameters. 888 * 889 * @return 890 * 0 in case of success else non-zero value and rte_errno is set. 891 */ 892 int 893 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 894 const struct mlx5_mp_arg_queue_state_modify *sm) 895 { 896 int ret; 897 struct mlx5_priv *priv = dev->data->dev_private; 898 899 if (sm->is_wq) { 900 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 901 struct mlx5_rxq_ctrl *rxq_ctrl = 902 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 903 904 if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { 905 struct ibv_wq_attr mod = { 906 .attr_mask = IBV_WQ_ATTR_STATE, 907 .wq_state = sm->state, 908 }; 909 910 ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod); 911 } else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */ 912 struct mlx5_devx_modify_rq_attr rq_attr; 913 914 memset(&rq_attr, 0, sizeof(rq_attr)); 915 if (sm->state == IBV_WQS_RESET) { 916 rq_attr.rq_state = MLX5_RQC_STATE_ERR; 917 rq_attr.state = MLX5_RQC_STATE_RST; 918 } else if (sm->state == IBV_WQS_RDY) { 919 rq_attr.rq_state = MLX5_RQC_STATE_RST; 920 rq_attr.state = MLX5_RQC_STATE_RDY; 921 } else if (sm->state == IBV_WQS_ERR) { 922 rq_attr.rq_state = MLX5_RQC_STATE_RDY; 923 rq_attr.state = MLX5_RQC_STATE_ERR; 924 } 925 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, 926 &rq_attr); 927 } 928 if (ret) { 929 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 930 sm->state, strerror(errno)); 931 rte_errno = errno; 932 return ret; 933 } 934 } else { 935 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 936 struct mlx5_txq_ctrl *txq_ctrl = 937 container_of(txq, struct mlx5_txq_ctrl, txq); 938 939 if (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { 940 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 941 942 /* Change queue state to reset. */ 943 msq_attr.sq_state = MLX5_SQC_STATE_ERR; 944 msq_attr.state = MLX5_SQC_STATE_RST; 945 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, 946 &msq_attr); 947 if (ret) { 948 DRV_LOG(ERR, "Cannot change the " 949 "Tx QP state to RESET %s", 950 strerror(errno)); 951 rte_errno = errno; 952 return ret; 953 } 954 /* Change queue state to ready. */ 955 msq_attr.sq_state = MLX5_SQC_STATE_RST; 956 msq_attr.state = MLX5_SQC_STATE_RDY; 957 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, 958 &msq_attr); 959 if (ret) { 960 DRV_LOG(ERR, "Cannot change the " 961 "Tx QP state to READY %s", 962 strerror(errno)); 963 rte_errno = errno; 964 return ret; 965 } 966 } else { 967 struct ibv_qp_attr mod = { 968 .qp_state = IBV_QPS_RESET, 969 .port_num = (uint8_t)priv->dev_port, 970 }; 971 struct ibv_qp *qp = txq_ctrl->obj->qp; 972 973 MLX5_ASSERT 974 (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_IBV); 975 976 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 977 if (ret) { 978 DRV_LOG(ERR, "Cannot change the " 979 "Tx QP state to RESET %s", 980 strerror(errno)); 981 rte_errno = errno; 982 return ret; 983 } 984 mod.qp_state = IBV_QPS_INIT; 985 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 986 if (ret) { 987 DRV_LOG(ERR, "Cannot change the " 988 "Tx QP state to INIT %s", 989 strerror(errno)); 990 rte_errno = errno; 991 return ret; 992 } 993 mod.qp_state = IBV_QPS_RTR; 994 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 995 if (ret) { 996 DRV_LOG(ERR, "Cannot change the " 997 "Tx QP state to RTR %s", 998 strerror(errno)); 999 rte_errno = errno; 1000 return ret; 1001 } 1002 mod.qp_state = IBV_QPS_RTS; 1003 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 1004 if (ret) { 1005 DRV_LOG(ERR, "Cannot change the " 1006 "Tx QP state to RTS %s", 1007 strerror(errno)); 1008 rte_errno = errno; 1009 return ret; 1010 } 1011 } 1012 } 1013 return 0; 1014 } 1015 1016 /** 1017 * Modify a Verbs queue state. 1018 * 1019 * @param dev 1020 * Pointer to Ethernet device. 1021 * @param sm 1022 * State modify request parameters. 1023 * 1024 * @return 1025 * 0 in case of success else non-zero value. 1026 */ 1027 static int 1028 mlx5_queue_state_modify(struct rte_eth_dev *dev, 1029 struct mlx5_mp_arg_queue_state_modify *sm) 1030 { 1031 struct mlx5_priv *priv = dev->data->dev_private; 1032 int ret = 0; 1033 1034 switch (rte_eal_process_type()) { 1035 case RTE_PROC_PRIMARY: 1036 ret = mlx5_queue_state_modify_primary(dev, sm); 1037 break; 1038 case RTE_PROC_SECONDARY: 1039 ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); 1040 break; 1041 default: 1042 break; 1043 } 1044 return ret; 1045 } 1046 1047 /** 1048 * Handle a Rx error. 1049 * The function inserts the RQ state to reset when the first error CQE is 1050 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 1051 * it moves the RQ state to ready and initializes the RQ. 1052 * Next CQE identification and error counting are in the caller responsibility. 1053 * 1054 * @param[in] rxq 1055 * Pointer to RX queue structure. 1056 * @param[in] vec 1057 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 1058 * 0 when called from non-vectorized Rx burst. 1059 * 1060 * @return 1061 * -1 in case of recovery error, otherwise the CQE status. 1062 */ 1063 int 1064 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 1065 { 1066 const uint16_t cqe_n = 1 << rxq->cqe_n; 1067 const uint16_t cqe_mask = cqe_n - 1; 1068 const unsigned int wqe_n = 1 << rxq->elts_n; 1069 struct mlx5_rxq_ctrl *rxq_ctrl = 1070 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 1071 union { 1072 volatile struct mlx5_cqe *cqe; 1073 volatile struct mlx5_err_cqe *err_cqe; 1074 } u = { 1075 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 1076 }; 1077 struct mlx5_mp_arg_queue_state_modify sm; 1078 int ret; 1079 1080 switch (rxq->err_state) { 1081 case MLX5_RXQ_ERR_STATE_NO_ERROR: 1082 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 1083 /* Fall-through */ 1084 case MLX5_RXQ_ERR_STATE_NEED_RESET: 1085 sm.is_wq = 1; 1086 sm.queue_id = rxq->idx; 1087 sm.state = IBV_WQS_RESET; 1088 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 1089 return -1; 1090 if (rxq_ctrl->dump_file_n < 1091 rxq_ctrl->priv->config.max_dump_files_num) { 1092 MKSTR(err_str, "Unexpected CQE error syndrome " 1093 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 1094 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 1095 rxq->cqn, rxq_ctrl->wqn, 1096 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 1097 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 1098 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 1099 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 1100 mlx5_dump_debug_information(name, NULL, err_str, 0); 1101 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 1102 (const void *)((uintptr_t) 1103 rxq->cqes), 1104 sizeof(*u.cqe) * cqe_n); 1105 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 1106 (const void *)((uintptr_t) 1107 rxq->wqes), 1108 16 * wqe_n); 1109 rxq_ctrl->dump_file_n++; 1110 } 1111 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 1112 /* Fall-through */ 1113 case MLX5_RXQ_ERR_STATE_NEED_READY: 1114 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1115 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1116 rte_cio_wmb(); 1117 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1118 rte_cio_wmb(); 1119 /* 1120 * The RQ consumer index must be zeroed while moving 1121 * from RESET state to RDY state. 1122 */ 1123 *rxq->rq_db = rte_cpu_to_be_32(0); 1124 rte_cio_wmb(); 1125 sm.is_wq = 1; 1126 sm.queue_id = rxq->idx; 1127 sm.state = IBV_WQS_RDY; 1128 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1129 &sm)) 1130 return -1; 1131 if (vec) { 1132 const uint16_t q_mask = wqe_n - 1; 1133 uint16_t elt_idx; 1134 struct rte_mbuf **elt; 1135 int i; 1136 unsigned int n = wqe_n - (rxq->rq_ci - 1137 rxq->rq_pi); 1138 1139 for (i = 0; i < (int)n; ++i) { 1140 elt_idx = (rxq->rq_ci + i) & q_mask; 1141 elt = &(*rxq->elts)[elt_idx]; 1142 *elt = rte_mbuf_raw_alloc(rxq->mp); 1143 if (!*elt) { 1144 for (i--; i >= 0; --i) { 1145 elt_idx = (rxq->rq_ci + 1146 i) & q_mask; 1147 elt = &(*rxq->elts) 1148 [elt_idx]; 1149 rte_pktmbuf_free_seg 1150 (*elt); 1151 } 1152 return -1; 1153 } 1154 } 1155 for (i = 0; i < (int)wqe_n; ++i) { 1156 elt = &(*rxq->elts)[i]; 1157 DATA_LEN(*elt) = 1158 (uint16_t)((*elt)->buf_len - 1159 rte_pktmbuf_headroom(*elt)); 1160 } 1161 /* Padding with a fake mbuf for vec Rx. */ 1162 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1163 (*rxq->elts)[wqe_n + i] = 1164 &rxq->fake_mbuf; 1165 } 1166 mlx5_rxq_initialize(rxq); 1167 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1168 } 1169 return ret; 1170 default: 1171 return -1; 1172 } 1173 } 1174 1175 /** 1176 * Get size of the next packet for a given CQE. For compressed CQEs, the 1177 * consumer index is updated only once all packets of the current one have 1178 * been processed. 1179 * 1180 * @param rxq 1181 * Pointer to RX queue. 1182 * @param cqe 1183 * CQE to process. 1184 * @param[out] mcqe 1185 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1186 * written. 1187 * 1188 * @return 1189 * 0 in case of empty CQE, otherwise the packet size in bytes. 1190 */ 1191 static inline int 1192 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1193 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1194 { 1195 struct rxq_zip *zip = &rxq->zip; 1196 uint16_t cqe_n = cqe_cnt + 1; 1197 int len; 1198 uint16_t idx, end; 1199 1200 do { 1201 len = 0; 1202 /* Process compressed data in the CQE and mini arrays. */ 1203 if (zip->ai) { 1204 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1205 (volatile struct mlx5_mini_cqe8 (*)[8]) 1206 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1207 cqe_cnt].pkt_info); 1208 1209 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1210 *mcqe = &(*mc)[zip->ai & 7]; 1211 if ((++zip->ai & 7) == 0) { 1212 /* Invalidate consumed CQEs */ 1213 idx = zip->ca; 1214 end = zip->na; 1215 while (idx != end) { 1216 (*rxq->cqes)[idx & cqe_cnt].op_own = 1217 MLX5_CQE_INVALIDATE; 1218 ++idx; 1219 } 1220 /* 1221 * Increment consumer index to skip the number 1222 * of CQEs consumed. Hardware leaves holes in 1223 * the CQ ring for software use. 1224 */ 1225 zip->ca = zip->na; 1226 zip->na += 8; 1227 } 1228 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1229 /* Invalidate the rest */ 1230 idx = zip->ca; 1231 end = zip->cq_ci; 1232 1233 while (idx != end) { 1234 (*rxq->cqes)[idx & cqe_cnt].op_own = 1235 MLX5_CQE_INVALIDATE; 1236 ++idx; 1237 } 1238 rxq->cq_ci = zip->cq_ci; 1239 zip->ai = 0; 1240 } 1241 /* 1242 * No compressed data, get next CQE and verify if it is 1243 * compressed. 1244 */ 1245 } else { 1246 int ret; 1247 int8_t op_own; 1248 1249 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1250 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1251 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1252 rxq->err_state)) { 1253 ret = mlx5_rx_err_handle(rxq, 0); 1254 if (ret == MLX5_CQE_STATUS_HW_OWN || 1255 ret == -1) 1256 return 0; 1257 } else { 1258 return 0; 1259 } 1260 } 1261 ++rxq->cq_ci; 1262 op_own = cqe->op_own; 1263 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1264 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1265 (volatile struct mlx5_mini_cqe8 (*)[8]) 1266 (uintptr_t)(&(*rxq->cqes) 1267 [rxq->cq_ci & 1268 cqe_cnt].pkt_info); 1269 1270 /* Fix endianness. */ 1271 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1272 /* 1273 * Current mini array position is the one 1274 * returned by check_cqe64(). 1275 * 1276 * If completion comprises several mini arrays, 1277 * as a special case the second one is located 1278 * 7 CQEs after the initial CQE instead of 8 1279 * for subsequent ones. 1280 */ 1281 zip->ca = rxq->cq_ci; 1282 zip->na = zip->ca + 7; 1283 /* Compute the next non compressed CQE. */ 1284 --rxq->cq_ci; 1285 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1286 /* Get packet size to return. */ 1287 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1288 *mcqe = &(*mc)[0]; 1289 zip->ai = 1; 1290 /* Prefetch all to be invalidated */ 1291 idx = zip->ca; 1292 end = zip->cq_ci; 1293 while (idx != end) { 1294 rte_prefetch0(&(*rxq->cqes)[(idx) & 1295 cqe_cnt]); 1296 ++idx; 1297 } 1298 } else { 1299 len = rte_be_to_cpu_32(cqe->byte_cnt); 1300 } 1301 } 1302 if (unlikely(rxq->err_state)) { 1303 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1304 ++rxq->stats.idropped; 1305 } else { 1306 return len; 1307 } 1308 } while (1); 1309 } 1310 1311 /** 1312 * Translate RX completion flags to offload flags. 1313 * 1314 * @param[in] cqe 1315 * Pointer to CQE. 1316 * 1317 * @return 1318 * Offload flags (ol_flags) for struct rte_mbuf. 1319 */ 1320 static inline uint32_t 1321 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1322 { 1323 uint32_t ol_flags = 0; 1324 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1325 1326 ol_flags = 1327 TRANSPOSE(flags, 1328 MLX5_CQE_RX_L3_HDR_VALID, 1329 PKT_RX_IP_CKSUM_GOOD) | 1330 TRANSPOSE(flags, 1331 MLX5_CQE_RX_L4_HDR_VALID, 1332 PKT_RX_L4_CKSUM_GOOD); 1333 return ol_flags; 1334 } 1335 1336 /** 1337 * Fill in mbuf fields from RX completion flags. 1338 * Note that pkt->ol_flags should be initialized outside of this function. 1339 * 1340 * @param rxq 1341 * Pointer to RX queue. 1342 * @param pkt 1343 * mbuf to fill. 1344 * @param cqe 1345 * CQE to process. 1346 * @param rss_hash_res 1347 * Packet RSS Hash result. 1348 */ 1349 static inline void 1350 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1351 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res) 1352 { 1353 /* Update packet information. */ 1354 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); 1355 if (rss_hash_res && rxq->rss_hash) { 1356 pkt->hash.rss = rss_hash_res; 1357 pkt->ol_flags |= PKT_RX_RSS_HASH; 1358 } 1359 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1360 pkt->ol_flags |= PKT_RX_FDIR; 1361 if (cqe->sop_drop_qpn != 1362 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1363 uint32_t mark = cqe->sop_drop_qpn; 1364 1365 pkt->ol_flags |= PKT_RX_FDIR_ID; 1366 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1367 } 1368 } 1369 if (rxq->dynf_meta && cqe->flow_table_metadata) { 1370 pkt->ol_flags |= rxq->flow_meta_mask; 1371 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = 1372 cqe->flow_table_metadata; 1373 } 1374 if (rxq->csum) 1375 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1376 if (rxq->vlan_strip && 1377 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1378 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1379 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1380 } 1381 if (rxq->hw_timestamp) { 1382 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 1383 1384 if (rxq->rt_timestamp) 1385 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 1386 pkt->timestamp = ts; 1387 pkt->ol_flags |= PKT_RX_TIMESTAMP; 1388 } 1389 } 1390 1391 /** 1392 * DPDK callback for RX. 1393 * 1394 * @param dpdk_rxq 1395 * Generic pointer to RX queue structure. 1396 * @param[out] pkts 1397 * Array to store received packets. 1398 * @param pkts_n 1399 * Maximum number of packets in array. 1400 * 1401 * @return 1402 * Number of packets successfully received (<= pkts_n). 1403 */ 1404 uint16_t 1405 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1406 { 1407 struct mlx5_rxq_data *rxq = dpdk_rxq; 1408 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1409 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1410 const unsigned int sges_n = rxq->sges_n; 1411 struct rte_mbuf *pkt = NULL; 1412 struct rte_mbuf *seg = NULL; 1413 volatile struct mlx5_cqe *cqe = 1414 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1415 unsigned int i = 0; 1416 unsigned int rq_ci = rxq->rq_ci << sges_n; 1417 int len = 0; /* keep its value across iterations. */ 1418 1419 while (pkts_n) { 1420 unsigned int idx = rq_ci & wqe_cnt; 1421 volatile struct mlx5_wqe_data_seg *wqe = 1422 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1423 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1424 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1425 uint32_t rss_hash_res; 1426 1427 if (pkt) 1428 NEXT(seg) = rep; 1429 seg = rep; 1430 rte_prefetch0(seg); 1431 rte_prefetch0(cqe); 1432 rte_prefetch0(wqe); 1433 rep = rte_mbuf_raw_alloc(rxq->mp); 1434 if (unlikely(rep == NULL)) { 1435 ++rxq->stats.rx_nombuf; 1436 if (!pkt) { 1437 /* 1438 * no buffers before we even started, 1439 * bail out silently. 1440 */ 1441 break; 1442 } 1443 while (pkt != seg) { 1444 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 1445 rep = NEXT(pkt); 1446 NEXT(pkt) = NULL; 1447 NB_SEGS(pkt) = 1; 1448 rte_mbuf_raw_free(pkt); 1449 pkt = rep; 1450 } 1451 break; 1452 } 1453 if (!pkt) { 1454 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1455 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1456 if (!len) { 1457 rte_mbuf_raw_free(rep); 1458 break; 1459 } 1460 pkt = seg; 1461 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 1462 pkt->ol_flags &= EXT_ATTACHED_MBUF; 1463 /* If compressed, take hash result from mini-CQE. */ 1464 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? 1465 cqe->rx_hash_res : 1466 mcqe->rx_hash_result); 1467 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1468 if (rxq->crc_present) 1469 len -= RTE_ETHER_CRC_LEN; 1470 PKT_LEN(pkt) = len; 1471 if (cqe->lro_num_seg > 1) { 1472 mlx5_lro_update_hdr 1473 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1474 len); 1475 pkt->ol_flags |= PKT_RX_LRO; 1476 pkt->tso_segsz = len / cqe->lro_num_seg; 1477 } 1478 } 1479 DATA_LEN(rep) = DATA_LEN(seg); 1480 PKT_LEN(rep) = PKT_LEN(seg); 1481 SET_DATA_OFF(rep, DATA_OFF(seg)); 1482 PORT(rep) = PORT(seg); 1483 (*rxq->elts)[idx] = rep; 1484 /* 1485 * Fill NIC descriptor with the new buffer. The lkey and size 1486 * of the buffers are already known, only the buffer address 1487 * changes. 1488 */ 1489 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1490 /* If there's only one MR, no need to replace LKey in WQE. */ 1491 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1492 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1493 if (len > DATA_LEN(seg)) { 1494 len -= DATA_LEN(seg); 1495 ++NB_SEGS(pkt); 1496 ++rq_ci; 1497 continue; 1498 } 1499 DATA_LEN(seg) = len; 1500 #ifdef MLX5_PMD_SOFT_COUNTERS 1501 /* Increment bytes counter. */ 1502 rxq->stats.ibytes += PKT_LEN(pkt); 1503 #endif 1504 /* Return packet. */ 1505 *(pkts++) = pkt; 1506 pkt = NULL; 1507 --pkts_n; 1508 ++i; 1509 /* Align consumer index to the next stride. */ 1510 rq_ci >>= sges_n; 1511 ++rq_ci; 1512 rq_ci <<= sges_n; 1513 } 1514 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1515 return 0; 1516 /* Update the consumer index. */ 1517 rxq->rq_ci = rq_ci >> sges_n; 1518 rte_cio_wmb(); 1519 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1520 rte_cio_wmb(); 1521 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1522 #ifdef MLX5_PMD_SOFT_COUNTERS 1523 /* Increment packets counter. */ 1524 rxq->stats.ipackets += i; 1525 #endif 1526 return i; 1527 } 1528 1529 /** 1530 * Update LRO packet TCP header. 1531 * The HW LRO feature doesn't update the TCP header after coalescing the 1532 * TCP segments but supplies information in CQE to fill it by SW. 1533 * 1534 * @param tcp 1535 * Pointer to the TCP header. 1536 * @param cqe 1537 * Pointer to the completion entry.. 1538 * @param phcsum 1539 * The L3 pseudo-header checksum. 1540 */ 1541 static inline void 1542 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1543 volatile struct mlx5_cqe *__rte_restrict cqe, 1544 uint32_t phcsum) 1545 { 1546 uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1547 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1548 /* 1549 * The HW calculates only the TCP payload checksum, need to complete 1550 * the TCP header checksum and the L3 pseudo-header checksum. 1551 */ 1552 uint32_t csum = phcsum + cqe->csum; 1553 1554 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1555 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1556 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1557 tcp->recv_ack = cqe->lro_ack_seq_num; 1558 tcp->rx_win = cqe->lro_tcp_win; 1559 } 1560 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1561 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1562 tcp->cksum = 0; 1563 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1564 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1565 csum = (~csum) & 0xffff; 1566 if (csum == 0) 1567 csum = 0xffff; 1568 tcp->cksum = csum; 1569 } 1570 1571 /** 1572 * Update LRO packet headers. 1573 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1574 * TCP segments but supply information in CQE to fill it by SW. 1575 * 1576 * @param padd 1577 * The packet address. 1578 * @param cqe 1579 * Pointer to the completion entry.. 1580 * @param len 1581 * The packet length. 1582 */ 1583 static inline void 1584 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1585 volatile struct mlx5_cqe *__rte_restrict cqe, 1586 uint32_t len) 1587 { 1588 union { 1589 struct rte_ether_hdr *eth; 1590 struct rte_vlan_hdr *vlan; 1591 struct rte_ipv4_hdr *ipv4; 1592 struct rte_ipv6_hdr *ipv6; 1593 struct rte_tcp_hdr *tcp; 1594 uint8_t *hdr; 1595 } h = { 1596 .hdr = padd, 1597 }; 1598 uint16_t proto = h.eth->ether_type; 1599 uint32_t phcsum; 1600 1601 h.eth++; 1602 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1603 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1604 proto = h.vlan->eth_proto; 1605 h.vlan++; 1606 } 1607 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1608 h.ipv4->time_to_live = cqe->lro_min_ttl; 1609 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1610 h.ipv4->hdr_checksum = 0; 1611 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1612 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1613 h.ipv4++; 1614 } else { 1615 h.ipv6->hop_limits = cqe->lro_min_ttl; 1616 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1617 sizeof(*h.ipv6)); 1618 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1619 h.ipv6++; 1620 } 1621 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum); 1622 } 1623 1624 void 1625 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1626 { 1627 struct mlx5_mprq_buf *buf = opaque; 1628 1629 if (rte_atomic16_read(&buf->refcnt) == 1) { 1630 rte_mempool_put(buf->mp, buf); 1631 } else if (rte_atomic16_add_return(&buf->refcnt, -1) == 0) { 1632 rte_atomic16_set(&buf->refcnt, 1); 1633 rte_mempool_put(buf->mp, buf); 1634 } 1635 } 1636 1637 void 1638 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1639 { 1640 mlx5_mprq_buf_free_cb(NULL, buf); 1641 } 1642 1643 static inline void 1644 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 1645 const unsigned int strd_n) 1646 { 1647 struct mlx5_mprq_buf *rep = rxq->mprq_repl; 1648 volatile struct mlx5_wqe_data_seg *wqe = 1649 &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; 1650 void *addr; 1651 1652 MLX5_ASSERT(rep != NULL); 1653 /* Replace MPRQ buf. */ 1654 (*rxq->mprq_bufs)[rq_idx] = rep; 1655 /* Replace WQE. */ 1656 addr = mlx5_mprq_buf_addr(rep, strd_n); 1657 wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); 1658 /* If there's only one MR, no need to replace LKey in WQE. */ 1659 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1660 wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); 1661 /* Stash a mbuf for next replacement. */ 1662 if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep))) 1663 rxq->mprq_repl = rep; 1664 else 1665 rxq->mprq_repl = NULL; 1666 } 1667 1668 /** 1669 * DPDK callback for RX with Multi-Packet RQ support. 1670 * 1671 * @param dpdk_rxq 1672 * Generic pointer to RX queue structure. 1673 * @param[out] pkts 1674 * Array to store received packets. 1675 * @param pkts_n 1676 * Maximum number of packets in array. 1677 * 1678 * @return 1679 * Number of packets successfully received (<= pkts_n). 1680 */ 1681 uint16_t 1682 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1683 { 1684 struct mlx5_rxq_data *rxq = dpdk_rxq; 1685 const unsigned int strd_n = 1 << rxq->strd_num_n; 1686 const unsigned int strd_sz = 1 << rxq->strd_sz_n; 1687 const unsigned int strd_shift = 1688 MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; 1689 const unsigned int cq_mask = (1 << rxq->cqe_n) - 1; 1690 const unsigned int wq_mask = (1 << rxq->elts_n) - 1; 1691 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1692 unsigned int i = 0; 1693 uint32_t rq_ci = rxq->rq_ci; 1694 uint16_t consumed_strd = rxq->consumed_strd; 1695 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1696 1697 while (i < pkts_n) { 1698 struct rte_mbuf *pkt; 1699 void *addr; 1700 int ret; 1701 uint32_t len; 1702 uint16_t strd_cnt; 1703 uint16_t strd_idx; 1704 uint32_t offset; 1705 uint32_t byte_cnt; 1706 int32_t hdrm_overlap; 1707 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1708 uint32_t rss_hash_res = 0; 1709 1710 if (consumed_strd == strd_n) { 1711 /* Replace WQE only if the buffer is still in use. */ 1712 if (rte_atomic16_read(&buf->refcnt) > 1) { 1713 mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n); 1714 /* Release the old buffer. */ 1715 mlx5_mprq_buf_free(buf); 1716 } else if (unlikely(rxq->mprq_repl == NULL)) { 1717 struct mlx5_mprq_buf *rep; 1718 1719 /* 1720 * Currently, the MPRQ mempool is out of buffer 1721 * and doing memcpy regardless of the size of Rx 1722 * packet. Retry allocation to get back to 1723 * normal. 1724 */ 1725 if (!rte_mempool_get(rxq->mprq_mp, 1726 (void **)&rep)) 1727 rxq->mprq_repl = rep; 1728 } 1729 /* Advance to the next WQE. */ 1730 consumed_strd = 0; 1731 ++rq_ci; 1732 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1733 } 1734 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1735 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1736 if (!ret) 1737 break; 1738 byte_cnt = ret; 1739 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1740 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1741 MLX5_ASSERT(strd_cnt); 1742 consumed_strd += strd_cnt; 1743 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1744 continue; 1745 if (mcqe == NULL) { 1746 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1747 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); 1748 } else { 1749 /* mini-CQE for MPRQ doesn't have hash result. */ 1750 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); 1751 } 1752 MLX5_ASSERT(strd_idx < strd_n); 1753 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1754 wq_mask)); 1755 pkt = rte_pktmbuf_alloc(rxq->mp); 1756 if (unlikely(pkt == NULL)) { 1757 ++rxq->stats.rx_nombuf; 1758 break; 1759 } 1760 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1761 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1762 if (rxq->crc_present) 1763 len -= RTE_ETHER_CRC_LEN; 1764 offset = strd_idx * strd_sz + strd_shift; 1765 addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); 1766 hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz; 1767 /* 1768 * Memcpy packets to the target mbuf if: 1769 * - The size of packet is smaller than mprq_max_memcpy_len. 1770 * - Out of buffer in the Mempool for Multi-Packet RQ. 1771 * - The packet's stride overlaps a headroom and scatter is off. 1772 */ 1773 if (len <= rxq->mprq_max_memcpy_len || 1774 rxq->mprq_repl == NULL || 1775 (hdrm_overlap > 0 && !rxq->strd_scatter_en)) { 1776 if (likely(rte_pktmbuf_tailroom(pkt) >= len)) { 1777 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), 1778 addr, len); 1779 DATA_LEN(pkt) = len; 1780 } else if (rxq->strd_scatter_en) { 1781 struct rte_mbuf *prev = pkt; 1782 uint32_t seg_len = 1783 RTE_MIN(rte_pktmbuf_tailroom(pkt), len); 1784 uint32_t rem_len = len - seg_len; 1785 1786 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), 1787 addr, seg_len); 1788 DATA_LEN(pkt) = seg_len; 1789 while (rem_len) { 1790 struct rte_mbuf *next = 1791 rte_pktmbuf_alloc(rxq->mp); 1792 1793 if (unlikely(next == NULL)) { 1794 rte_pktmbuf_free(pkt); 1795 ++rxq->stats.rx_nombuf; 1796 goto out; 1797 } 1798 NEXT(prev) = next; 1799 SET_DATA_OFF(next, 0); 1800 addr = RTE_PTR_ADD(addr, seg_len); 1801 seg_len = RTE_MIN 1802 (rte_pktmbuf_tailroom(next), 1803 rem_len); 1804 rte_memcpy 1805 (rte_pktmbuf_mtod(next, void *), 1806 addr, seg_len); 1807 DATA_LEN(next) = seg_len; 1808 rem_len -= seg_len; 1809 prev = next; 1810 ++NB_SEGS(pkt); 1811 } 1812 } else { 1813 rte_pktmbuf_free_seg(pkt); 1814 ++rxq->stats.idropped; 1815 continue; 1816 } 1817 } else { 1818 rte_iova_t buf_iova; 1819 struct rte_mbuf_ext_shared_info *shinfo; 1820 uint16_t buf_len = strd_cnt * strd_sz; 1821 void *buf_addr; 1822 1823 /* Increment the refcnt of the whole chunk. */ 1824 rte_atomic16_add_return(&buf->refcnt, 1); 1825 MLX5_ASSERT((uint16_t)rte_atomic16_read(&buf->refcnt) <= 1826 strd_n + 1); 1827 buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); 1828 /* 1829 * MLX5 device doesn't use iova but it is necessary in a 1830 * case where the Rx packet is transmitted via a 1831 * different PMD. 1832 */ 1833 buf_iova = rte_mempool_virt2iova(buf) + 1834 RTE_PTR_DIFF(buf_addr, buf); 1835 shinfo = &buf->shinfos[strd_idx]; 1836 rte_mbuf_ext_refcnt_set(shinfo, 1); 1837 /* 1838 * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when 1839 * attaching the stride to mbuf and more offload flags 1840 * will be added below by calling rxq_cq_to_mbuf(). 1841 * Other fields will be overwritten. 1842 */ 1843 rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, 1844 buf_len, shinfo); 1845 /* Set mbuf head-room. */ 1846 SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); 1847 MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); 1848 MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >= 1849 len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); 1850 DATA_LEN(pkt) = len; 1851 /* 1852 * Copy the last fragment of a packet (up to headroom 1853 * size bytes) in case there is a stride overlap with 1854 * a next packet's headroom. Allocate a separate mbuf 1855 * to store this fragment and link it. Scatter is on. 1856 */ 1857 if (hdrm_overlap > 0) { 1858 MLX5_ASSERT(rxq->strd_scatter_en); 1859 struct rte_mbuf *seg = 1860 rte_pktmbuf_alloc(rxq->mp); 1861 1862 if (unlikely(seg == NULL)) { 1863 rte_pktmbuf_free_seg(pkt); 1864 ++rxq->stats.rx_nombuf; 1865 break; 1866 } 1867 SET_DATA_OFF(seg, 0); 1868 rte_memcpy(rte_pktmbuf_mtod(seg, void *), 1869 RTE_PTR_ADD(addr, len - hdrm_overlap), 1870 hdrm_overlap); 1871 DATA_LEN(seg) = hdrm_overlap; 1872 DATA_LEN(pkt) = len - hdrm_overlap; 1873 NEXT(pkt) = seg; 1874 NB_SEGS(pkt) = 2; 1875 } 1876 } 1877 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1878 if (cqe->lro_num_seg > 1) { 1879 mlx5_lro_update_hdr(addr, cqe, len); 1880 pkt->ol_flags |= PKT_RX_LRO; 1881 pkt->tso_segsz = len / cqe->lro_num_seg; 1882 } 1883 PKT_LEN(pkt) = len; 1884 PORT(pkt) = rxq->port_id; 1885 #ifdef MLX5_PMD_SOFT_COUNTERS 1886 /* Increment bytes counter. */ 1887 rxq->stats.ibytes += PKT_LEN(pkt); 1888 #endif 1889 /* Return packet. */ 1890 *(pkts++) = pkt; 1891 ++i; 1892 } 1893 out: 1894 /* Update the consumer indexes. */ 1895 rxq->consumed_strd = consumed_strd; 1896 rte_cio_wmb(); 1897 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1898 if (rq_ci != rxq->rq_ci) { 1899 rxq->rq_ci = rq_ci; 1900 rte_cio_wmb(); 1901 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1902 } 1903 #ifdef MLX5_PMD_SOFT_COUNTERS 1904 /* Increment packets counter. */ 1905 rxq->stats.ipackets += i; 1906 #endif 1907 return i; 1908 } 1909 1910 /** 1911 * Dummy DPDK callback for TX. 1912 * 1913 * This function is used to temporarily replace the real callback during 1914 * unsafe control operations on the queue, or in case of error. 1915 * 1916 * @param dpdk_txq 1917 * Generic pointer to TX queue structure. 1918 * @param[in] pkts 1919 * Packets to transmit. 1920 * @param pkts_n 1921 * Number of packets in array. 1922 * 1923 * @return 1924 * Number of packets successfully transmitted (<= pkts_n). 1925 */ 1926 uint16_t 1927 removed_tx_burst(void *dpdk_txq __rte_unused, 1928 struct rte_mbuf **pkts __rte_unused, 1929 uint16_t pkts_n __rte_unused) 1930 { 1931 rte_mb(); 1932 return 0; 1933 } 1934 1935 /** 1936 * Dummy DPDK callback for RX. 1937 * 1938 * This function is used to temporarily replace the real callback during 1939 * unsafe control operations on the queue, or in case of error. 1940 * 1941 * @param dpdk_rxq 1942 * Generic pointer to RX queue structure. 1943 * @param[out] pkts 1944 * Array to store received packets. 1945 * @param pkts_n 1946 * Maximum number of packets in array. 1947 * 1948 * @return 1949 * Number of packets successfully received (<= pkts_n). 1950 */ 1951 uint16_t 1952 removed_rx_burst(void *dpdk_txq __rte_unused, 1953 struct rte_mbuf **pkts __rte_unused, 1954 uint16_t pkts_n __rte_unused) 1955 { 1956 rte_mb(); 1957 return 0; 1958 } 1959 1960 /* 1961 * Vectorized Rx/Tx routines are not compiled in when required vector 1962 * instructions are not supported on a target architecture. The following null 1963 * stubs are needed for linkage when those are not included outside of this file 1964 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1965 */ 1966 1967 __rte_weak uint16_t 1968 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1969 struct rte_mbuf **pkts __rte_unused, 1970 uint16_t pkts_n __rte_unused) 1971 { 1972 return 0; 1973 } 1974 1975 __rte_weak int 1976 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1977 { 1978 return -ENOTSUP; 1979 } 1980 1981 __rte_weak int 1982 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1983 { 1984 return -ENOTSUP; 1985 } 1986 1987 /** 1988 * Free the mbufs from the linear array of pointers. 1989 * 1990 * @param pkts 1991 * Pointer to array of packets to be free. 1992 * @param pkts_n 1993 * Number of packets to be freed. 1994 * @param olx 1995 * Configured Tx offloads mask. It is fully defined at 1996 * compile time and may be used for optimization. 1997 */ 1998 static __rte_always_inline void 1999 mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, 2000 unsigned int pkts_n, 2001 unsigned int olx __rte_unused) 2002 { 2003 struct rte_mempool *pool = NULL; 2004 struct rte_mbuf **p_free = NULL; 2005 struct rte_mbuf *mbuf; 2006 unsigned int n_free = 0; 2007 2008 /* 2009 * The implemented algorithm eliminates 2010 * copying pointers to temporary array 2011 * for rte_mempool_put_bulk() calls. 2012 */ 2013 MLX5_ASSERT(pkts); 2014 MLX5_ASSERT(pkts_n); 2015 for (;;) { 2016 for (;;) { 2017 /* 2018 * Decrement mbuf reference counter, detach 2019 * indirect and external buffers if needed. 2020 */ 2021 mbuf = rte_pktmbuf_prefree_seg(*pkts); 2022 if (likely(mbuf != NULL)) { 2023 MLX5_ASSERT(mbuf == *pkts); 2024 if (likely(n_free != 0)) { 2025 if (unlikely(pool != mbuf->pool)) 2026 /* From different pool. */ 2027 break; 2028 } else { 2029 /* Start new scan array. */ 2030 pool = mbuf->pool; 2031 p_free = pkts; 2032 } 2033 ++n_free; 2034 ++pkts; 2035 --pkts_n; 2036 if (unlikely(pkts_n == 0)) { 2037 mbuf = NULL; 2038 break; 2039 } 2040 } else { 2041 /* 2042 * This happens if mbuf is still referenced. 2043 * We can't put it back to the pool, skip. 2044 */ 2045 ++pkts; 2046 --pkts_n; 2047 if (unlikely(n_free != 0)) 2048 /* There is some array to free.*/ 2049 break; 2050 if (unlikely(pkts_n == 0)) 2051 /* Last mbuf, nothing to free. */ 2052 return; 2053 } 2054 } 2055 for (;;) { 2056 /* 2057 * This loop is implemented to avoid multiple 2058 * inlining of rte_mempool_put_bulk(). 2059 */ 2060 MLX5_ASSERT(pool); 2061 MLX5_ASSERT(p_free); 2062 MLX5_ASSERT(n_free); 2063 /* 2064 * Free the array of pre-freed mbufs 2065 * belonging to the same memory pool. 2066 */ 2067 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 2068 if (unlikely(mbuf != NULL)) { 2069 /* There is the request to start new scan. */ 2070 pool = mbuf->pool; 2071 p_free = pkts++; 2072 n_free = 1; 2073 --pkts_n; 2074 if (likely(pkts_n != 0)) 2075 break; 2076 /* 2077 * This is the last mbuf to be freed. 2078 * Do one more loop iteration to complete. 2079 * This is rare case of the last unique mbuf. 2080 */ 2081 mbuf = NULL; 2082 continue; 2083 } 2084 if (likely(pkts_n == 0)) 2085 return; 2086 n_free = 0; 2087 break; 2088 } 2089 } 2090 } 2091 2092 /** 2093 * Free the mbuf from the elts ring buffer till new tail. 2094 * 2095 * @param txq 2096 * Pointer to Tx queue structure. 2097 * @param tail 2098 * Index in elts to free up to, becomes new elts tail. 2099 * @param olx 2100 * Configured Tx offloads mask. It is fully defined at 2101 * compile time and may be used for optimization. 2102 */ 2103 static __rte_always_inline void 2104 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 2105 uint16_t tail, 2106 unsigned int olx __rte_unused) 2107 { 2108 uint16_t n_elts = tail - txq->elts_tail; 2109 2110 MLX5_ASSERT(n_elts); 2111 MLX5_ASSERT(n_elts <= txq->elts_s); 2112 /* 2113 * Implement a loop to support ring buffer wraparound 2114 * with single inlining of mlx5_tx_free_mbuf(). 2115 */ 2116 do { 2117 unsigned int part; 2118 2119 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 2120 part = RTE_MIN(part, n_elts); 2121 MLX5_ASSERT(part); 2122 MLX5_ASSERT(part <= txq->elts_s); 2123 mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], 2124 part, olx); 2125 txq->elts_tail += part; 2126 n_elts -= part; 2127 } while (n_elts); 2128 } 2129 2130 /** 2131 * Store the mbuf being sent into elts ring buffer. 2132 * On Tx completion these mbufs will be freed. 2133 * 2134 * @param txq 2135 * Pointer to Tx queue structure. 2136 * @param pkts 2137 * Pointer to array of packets to be stored. 2138 * @param pkts_n 2139 * Number of packets to be stored. 2140 * @param olx 2141 * Configured Tx offloads mask. It is fully defined at 2142 * compile time and may be used for optimization. 2143 */ 2144 static __rte_always_inline void 2145 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 2146 struct rte_mbuf **__rte_restrict pkts, 2147 unsigned int pkts_n, 2148 unsigned int olx __rte_unused) 2149 { 2150 unsigned int part; 2151 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 2152 2153 MLX5_ASSERT(pkts); 2154 MLX5_ASSERT(pkts_n); 2155 part = txq->elts_s - (txq->elts_head & txq->elts_m); 2156 MLX5_ASSERT(part); 2157 MLX5_ASSERT(part <= txq->elts_s); 2158 /* This code is a good candidate for vectorizing with SIMD. */ 2159 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 2160 (void *)pkts, 2161 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 2162 txq->elts_head += pkts_n; 2163 if (unlikely(part < pkts_n)) 2164 /* The copy is wrapping around the elts array. */ 2165 rte_memcpy((void *)elts, (void *)(pkts + part), 2166 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2167 } 2168 2169 /** 2170 * Update completion queue consuming index via doorbell 2171 * and flush the completed data buffers. 2172 * 2173 * @param txq 2174 * Pointer to TX queue structure. 2175 * @param valid CQE pointer 2176 * if not NULL update txq->wqe_pi and flush the buffers 2177 * @param olx 2178 * Configured Tx offloads mask. It is fully defined at 2179 * compile time and may be used for optimization. 2180 */ 2181 static __rte_always_inline void 2182 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, 2183 volatile struct mlx5_cqe *last_cqe, 2184 unsigned int olx __rte_unused) 2185 { 2186 if (likely(last_cqe != NULL)) { 2187 uint16_t tail; 2188 2189 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 2190 tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; 2191 if (likely(tail != txq->elts_tail)) { 2192 mlx5_tx_free_elts(txq, tail, olx); 2193 MLX5_ASSERT(tail == txq->elts_tail); 2194 } 2195 } 2196 } 2197 2198 /** 2199 * Manage TX completions. This routine checks the CQ for 2200 * arrived CQEs, deduces the last accomplished WQE in SQ, 2201 * updates SQ producing index and frees all completed mbufs. 2202 * 2203 * @param txq 2204 * Pointer to TX queue structure. 2205 * @param olx 2206 * Configured Tx offloads mask. It is fully defined at 2207 * compile time and may be used for optimization. 2208 * 2209 * NOTE: not inlined intentionally, it makes tx_burst 2210 * routine smaller, simple and faster - from experiments. 2211 */ 2212 static void 2213 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 2214 unsigned int olx __rte_unused) 2215 { 2216 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2217 volatile struct mlx5_cqe *last_cqe = NULL; 2218 bool ring_doorbell = false; 2219 int ret; 2220 2221 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 2222 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 2223 do { 2224 volatile struct mlx5_cqe *cqe; 2225 2226 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2227 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2228 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2229 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2230 /* No new CQEs in completion queue. */ 2231 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 2232 break; 2233 } 2234 /* 2235 * Some error occurred, try to restart. 2236 * We have no barrier after WQE related Doorbell 2237 * written, make sure all writes are completed 2238 * here, before we might perform SQ reset. 2239 */ 2240 rte_wmb(); 2241 ret = mlx5_tx_error_cqe_handle 2242 (txq, (volatile struct mlx5_err_cqe *)cqe); 2243 if (unlikely(ret < 0)) { 2244 /* 2245 * Some error occurred on queue error 2246 * handling, we do not advance the index 2247 * here, allowing to retry on next call. 2248 */ 2249 return; 2250 } 2251 /* 2252 * We are going to fetch all entries with 2253 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. 2254 * The send queue is supposed to be empty. 2255 */ 2256 ring_doorbell = true; 2257 ++txq->cq_ci; 2258 txq->cq_pi = txq->cq_ci; 2259 last_cqe = NULL; 2260 continue; 2261 } 2262 /* Normal transmit completion. */ 2263 MLX5_ASSERT(txq->cq_ci != txq->cq_pi); 2264 MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == 2265 cqe->wqe_counter); 2266 ring_doorbell = true; 2267 ++txq->cq_ci; 2268 last_cqe = cqe; 2269 /* 2270 * We have to restrict the amount of processed CQEs 2271 * in one tx_burst routine call. The CQ may be large 2272 * and many CQEs may be updated by the NIC in one 2273 * transaction. Buffers freeing is time consuming, 2274 * multiple iterations may introduce significant 2275 * latency. 2276 */ 2277 if (likely(--count == 0)) 2278 break; 2279 } while (true); 2280 if (likely(ring_doorbell)) { 2281 /* Ring doorbell to notify hardware. */ 2282 rte_compiler_barrier(); 2283 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2284 mlx5_tx_comp_flush(txq, last_cqe, olx); 2285 } 2286 } 2287 2288 /** 2289 * Check if the completion request flag should be set in the last WQE. 2290 * Both pushed mbufs and WQEs are monitored and the completion request 2291 * flag is set if any of thresholds is reached. 2292 * 2293 * @param txq 2294 * Pointer to TX queue structure. 2295 * @param loc 2296 * Pointer to burst routine local context. 2297 * @param olx 2298 * Configured Tx offloads mask. It is fully defined at 2299 * compile time and may be used for optimization. 2300 */ 2301 static __rte_always_inline void 2302 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 2303 struct mlx5_txq_local *__rte_restrict loc, 2304 unsigned int olx) 2305 { 2306 uint16_t head = txq->elts_head; 2307 unsigned int part; 2308 2309 part = MLX5_TXOFF_CONFIG(INLINE) ? 2310 0 : loc->pkts_sent - loc->pkts_copy; 2311 head += part; 2312 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2313 (MLX5_TXOFF_CONFIG(INLINE) && 2314 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2315 volatile struct mlx5_wqe *last = loc->wqe_last; 2316 2317 MLX5_ASSERT(last); 2318 txq->elts_comp = head; 2319 if (MLX5_TXOFF_CONFIG(INLINE)) 2320 txq->wqe_comp = txq->wqe_ci; 2321 /* Request unconditional completion on last WQE. */ 2322 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2323 MLX5_COMP_MODE_OFFSET); 2324 /* Save elts_head in dedicated free on completion queue. */ 2325 #ifdef RTE_LIBRTE_MLX5_DEBUG 2326 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 2327 (last->cseg.opcode >> 8) << 16; 2328 #else 2329 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 2330 #endif 2331 /* A CQE slot must always be available. */ 2332 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 2333 } 2334 } 2335 2336 /** 2337 * DPDK callback to check the status of a tx descriptor. 2338 * 2339 * @param tx_queue 2340 * The tx queue. 2341 * @param[in] offset 2342 * The index of the descriptor in the ring. 2343 * 2344 * @return 2345 * The status of the tx descriptor. 2346 */ 2347 int 2348 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2349 { 2350 struct mlx5_txq_data *__rte_restrict txq = tx_queue; 2351 uint16_t used; 2352 2353 mlx5_tx_handle_completion(txq, 0); 2354 used = txq->elts_head - txq->elts_tail; 2355 if (offset < used) 2356 return RTE_ETH_TX_DESC_FULL; 2357 return RTE_ETH_TX_DESC_DONE; 2358 } 2359 2360 /** 2361 * Build the Control Segment with specified opcode: 2362 * - MLX5_OPCODE_SEND 2363 * - MLX5_OPCODE_ENHANCED_MPSW 2364 * - MLX5_OPCODE_TSO 2365 * 2366 * @param txq 2367 * Pointer to TX queue structure. 2368 * @param loc 2369 * Pointer to burst routine local context. 2370 * @param wqe 2371 * Pointer to WQE to fill with built Control Segment. 2372 * @param ds 2373 * Supposed length of WQE in segments. 2374 * @param opcode 2375 * SQ WQE opcode to put into Control Segment. 2376 * @param olx 2377 * Configured Tx offloads mask. It is fully defined at 2378 * compile time and may be used for optimization. 2379 */ 2380 static __rte_always_inline void 2381 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 2382 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2383 struct mlx5_wqe *__rte_restrict wqe, 2384 unsigned int ds, 2385 unsigned int opcode, 2386 unsigned int olx __rte_unused) 2387 { 2388 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 2389 2390 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2391 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2392 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2393 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2394 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2395 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2396 MLX5_COMP_MODE_OFFSET); 2397 cs->misc = RTE_BE32(0); 2398 } 2399 2400 /** 2401 * Build the Synchronize Queue Segment with specified completion index. 2402 * 2403 * @param txq 2404 * Pointer to TX queue structure. 2405 * @param loc 2406 * Pointer to burst routine local context. 2407 * @param wqe 2408 * Pointer to WQE to fill with built Control Segment. 2409 * @param wci 2410 * Completion index in Clock Queue to wait. 2411 * @param olx 2412 * Configured Tx offloads mask. It is fully defined at 2413 * compile time and may be used for optimization. 2414 */ 2415 static __rte_always_inline void 2416 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 2417 struct mlx5_txq_local *restrict loc __rte_unused, 2418 struct mlx5_wqe *restrict wqe, 2419 unsigned int wci, 2420 unsigned int olx __rte_unused) 2421 { 2422 struct mlx5_wqe_qseg *qs; 2423 2424 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 2425 qs->max_index = rte_cpu_to_be_32(wci); 2426 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq->id); 2427 qs->reserved0 = RTE_BE32(0); 2428 qs->reserved1 = RTE_BE32(0); 2429 } 2430 2431 /** 2432 * Build the Ethernet Segment without inlined data. 2433 * Supports Software Parser, Checksums and VLAN 2434 * insertion Tx offload features. 2435 * 2436 * @param txq 2437 * Pointer to TX queue structure. 2438 * @param loc 2439 * Pointer to burst routine local context. 2440 * @param wqe 2441 * Pointer to WQE to fill with built Ethernet Segment. 2442 * @param olx 2443 * Configured Tx offloads mask. It is fully defined at 2444 * compile time and may be used for optimization. 2445 */ 2446 static __rte_always_inline void 2447 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2448 struct mlx5_txq_local *__rte_restrict loc, 2449 struct mlx5_wqe *__rte_restrict wqe, 2450 unsigned int olx) 2451 { 2452 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2453 uint32_t csum; 2454 2455 /* 2456 * Calculate and set check sum flags first, dword field 2457 * in segment may be shared with Software Parser flags. 2458 */ 2459 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2460 es->flags = rte_cpu_to_le_32(csum); 2461 /* 2462 * Calculate and set Software Parser offsets and flags. 2463 * These flags a set for custom UDP and IP tunnel packets. 2464 */ 2465 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2466 /* Fill metadata field if needed. */ 2467 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2468 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2469 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2470 /* Engage VLAN tag insertion feature if requested. */ 2471 if (MLX5_TXOFF_CONFIG(VLAN) && 2472 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2473 /* 2474 * We should get here only if device support 2475 * this feature correctly. 2476 */ 2477 MLX5_ASSERT(txq->vlan_en); 2478 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2479 loc->mbuf->vlan_tci); 2480 } else { 2481 es->inline_hdr = RTE_BE32(0); 2482 } 2483 } 2484 2485 /** 2486 * Build the Ethernet Segment with minimal inlined data 2487 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2488 * used to fill the gap in single WQEBB WQEs. 2489 * Supports Software Parser, Checksums and VLAN 2490 * insertion Tx offload features. 2491 * 2492 * @param txq 2493 * Pointer to TX queue structure. 2494 * @param loc 2495 * Pointer to burst routine local context. 2496 * @param wqe 2497 * Pointer to WQE to fill with built Ethernet Segment. 2498 * @param vlan 2499 * Length of VLAN tag insertion if any. 2500 * @param olx 2501 * Configured Tx offloads mask. It is fully defined at 2502 * compile time and may be used for optimization. 2503 */ 2504 static __rte_always_inline void 2505 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2506 struct mlx5_txq_local *__rte_restrict loc, 2507 struct mlx5_wqe *__rte_restrict wqe, 2508 unsigned int vlan, 2509 unsigned int olx) 2510 { 2511 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2512 uint32_t csum; 2513 uint8_t *psrc, *pdst; 2514 2515 /* 2516 * Calculate and set check sum flags first, dword field 2517 * in segment may be shared with Software Parser flags. 2518 */ 2519 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2520 es->flags = rte_cpu_to_le_32(csum); 2521 /* 2522 * Calculate and set Software Parser offsets and flags. 2523 * These flags a set for custom UDP and IP tunnel packets. 2524 */ 2525 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2526 /* Fill metadata field if needed. */ 2527 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2528 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2529 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2530 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2531 (sizeof(uint16_t) + 2532 sizeof(rte_v128u32_t)), 2533 "invalid Ethernet Segment data size"); 2534 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2535 (sizeof(uint16_t) + 2536 sizeof(struct rte_vlan_hdr) + 2537 2 * RTE_ETHER_ADDR_LEN), 2538 "invalid Ethernet Segment data size"); 2539 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2540 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2541 es->inline_data = *(unaligned_uint16_t *)psrc; 2542 psrc += sizeof(uint16_t); 2543 pdst = (uint8_t *)(es + 1); 2544 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2545 /* Implement VLAN tag insertion as part inline data. */ 2546 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2547 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2548 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2549 /* Insert VLAN ethertype + VLAN tag. */ 2550 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2551 ((RTE_ETHER_TYPE_VLAN << 16) | 2552 loc->mbuf->vlan_tci); 2553 pdst += sizeof(struct rte_vlan_hdr); 2554 /* Copy the rest two bytes from packet data. */ 2555 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2556 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2557 } else { 2558 /* Fill the gap in the title WQEBB with inline data. */ 2559 rte_mov16(pdst, psrc); 2560 } 2561 } 2562 2563 /** 2564 * Build the Ethernet Segment with entire packet 2565 * data inlining. Checks the boundary of WQEBB and 2566 * ring buffer wrapping, supports Software Parser, 2567 * Checksums and VLAN insertion Tx offload features. 2568 * 2569 * @param txq 2570 * Pointer to TX queue structure. 2571 * @param loc 2572 * Pointer to burst routine local context. 2573 * @param wqe 2574 * Pointer to WQE to fill with built Ethernet Segment. 2575 * @param vlan 2576 * Length of VLAN tag insertion if any. 2577 * @param inlen 2578 * Length of data to inline (VLAN included, if any). 2579 * @param tso 2580 * TSO flag, set mss field from the packet. 2581 * @param olx 2582 * Configured Tx offloads mask. It is fully defined at 2583 * compile time and may be used for optimization. 2584 * 2585 * @return 2586 * Pointer to the next Data Segment (aligned and wrapped around). 2587 */ 2588 static __rte_always_inline struct mlx5_wqe_dseg * 2589 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 2590 struct mlx5_txq_local *__rte_restrict loc, 2591 struct mlx5_wqe *__rte_restrict wqe, 2592 unsigned int vlan, 2593 unsigned int inlen, 2594 unsigned int tso, 2595 unsigned int olx) 2596 { 2597 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2598 uint32_t csum; 2599 uint8_t *psrc, *pdst; 2600 unsigned int part; 2601 2602 /* 2603 * Calculate and set check sum flags first, dword field 2604 * in segment may be shared with Software Parser flags. 2605 */ 2606 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2607 if (tso) { 2608 csum <<= 24; 2609 csum |= loc->mbuf->tso_segsz; 2610 es->flags = rte_cpu_to_be_32(csum); 2611 } else { 2612 es->flags = rte_cpu_to_le_32(csum); 2613 } 2614 /* 2615 * Calculate and set Software Parser offsets and flags. 2616 * These flags a set for custom UDP and IP tunnel packets. 2617 */ 2618 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2619 /* Fill metadata field if needed. */ 2620 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2621 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2622 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2623 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2624 (sizeof(uint16_t) + 2625 sizeof(rte_v128u32_t)), 2626 "invalid Ethernet Segment data size"); 2627 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2628 (sizeof(uint16_t) + 2629 sizeof(struct rte_vlan_hdr) + 2630 2 * RTE_ETHER_ADDR_LEN), 2631 "invalid Ethernet Segment data size"); 2632 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2633 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2634 es->inline_data = *(unaligned_uint16_t *)psrc; 2635 psrc += sizeof(uint16_t); 2636 pdst = (uint8_t *)(es + 1); 2637 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2638 /* Implement VLAN tag insertion as part inline data. */ 2639 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2640 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2641 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2642 /* Insert VLAN ethertype + VLAN tag. */ 2643 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2644 ((RTE_ETHER_TYPE_VLAN << 16) | 2645 loc->mbuf->vlan_tci); 2646 pdst += sizeof(struct rte_vlan_hdr); 2647 /* Copy the rest two bytes from packet data. */ 2648 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2649 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2650 psrc += sizeof(uint16_t); 2651 } else { 2652 /* Fill the gap in the title WQEBB with inline data. */ 2653 rte_mov16(pdst, psrc); 2654 psrc += sizeof(rte_v128u32_t); 2655 } 2656 pdst = (uint8_t *)(es + 2); 2657 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2658 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2659 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2660 if (!inlen) { 2661 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2662 return (struct mlx5_wqe_dseg *)pdst; 2663 } 2664 /* 2665 * The WQEBB space availability is checked by caller. 2666 * Here we should be aware of WQE ring buffer wraparound only. 2667 */ 2668 part = (uint8_t *)txq->wqes_end - pdst; 2669 part = RTE_MIN(part, inlen); 2670 do { 2671 rte_memcpy(pdst, psrc, part); 2672 inlen -= part; 2673 if (likely(!inlen)) { 2674 /* 2675 * If return value is not used by the caller 2676 * the code below will be optimized out. 2677 */ 2678 pdst += part; 2679 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2680 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2681 pdst = (uint8_t *)txq->wqes; 2682 return (struct mlx5_wqe_dseg *)pdst; 2683 } 2684 pdst = (uint8_t *)txq->wqes; 2685 psrc += part; 2686 part = inlen; 2687 } while (true); 2688 } 2689 2690 /** 2691 * Copy data from chain of mbuf to the specified linear buffer. 2692 * Checksums and VLAN insertion Tx offload features. If data 2693 * from some mbuf copied completely this mbuf is freed. Local 2694 * structure is used to keep the byte stream state. 2695 * 2696 * @param pdst 2697 * Pointer to the destination linear buffer. 2698 * @param loc 2699 * Pointer to burst routine local context. 2700 * @param len 2701 * Length of data to be copied. 2702 * @param must 2703 * Length of data to be copied ignoring no inline hint. 2704 * @param olx 2705 * Configured Tx offloads mask. It is fully defined at 2706 * compile time and may be used for optimization. 2707 * 2708 * @return 2709 * Number of actual copied data bytes. This is always greater than or 2710 * equal to must parameter and might be lesser than len in no inline 2711 * hint flag is encountered. 2712 */ 2713 static __rte_always_inline unsigned int 2714 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2715 struct mlx5_txq_local *__rte_restrict loc, 2716 unsigned int len, 2717 unsigned int must, 2718 unsigned int olx __rte_unused) 2719 { 2720 struct rte_mbuf *mbuf; 2721 unsigned int part, dlen, copy = 0; 2722 uint8_t *psrc; 2723 2724 MLX5_ASSERT(len); 2725 MLX5_ASSERT(must <= len); 2726 do { 2727 /* Allow zero length packets, must check first. */ 2728 dlen = rte_pktmbuf_data_len(loc->mbuf); 2729 if (dlen <= loc->mbuf_off) { 2730 /* Exhausted packet, just free. */ 2731 mbuf = loc->mbuf; 2732 loc->mbuf = mbuf->next; 2733 rte_pktmbuf_free_seg(mbuf); 2734 loc->mbuf_off = 0; 2735 MLX5_ASSERT(loc->mbuf_nseg > 1); 2736 MLX5_ASSERT(loc->mbuf); 2737 --loc->mbuf_nseg; 2738 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2739 unsigned int diff; 2740 2741 if (copy >= must) { 2742 /* 2743 * We already copied the minimal 2744 * requested amount of data. 2745 */ 2746 return copy; 2747 } 2748 diff = must - copy; 2749 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 2750 /* 2751 * Copy only the minimal required 2752 * part of the data buffer. 2753 */ 2754 len = diff; 2755 } 2756 } 2757 continue; 2758 } 2759 dlen -= loc->mbuf_off; 2760 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2761 loc->mbuf_off); 2762 part = RTE_MIN(len, dlen); 2763 rte_memcpy(pdst, psrc, part); 2764 copy += part; 2765 loc->mbuf_off += part; 2766 len -= part; 2767 if (!len) { 2768 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2769 loc->mbuf_off = 0; 2770 /* Exhausted packet, just free. */ 2771 mbuf = loc->mbuf; 2772 loc->mbuf = mbuf->next; 2773 rte_pktmbuf_free_seg(mbuf); 2774 loc->mbuf_off = 0; 2775 MLX5_ASSERT(loc->mbuf_nseg >= 1); 2776 --loc->mbuf_nseg; 2777 } 2778 return copy; 2779 } 2780 pdst += part; 2781 } while (true); 2782 } 2783 2784 /** 2785 * Build the Ethernet Segment with inlined data from 2786 * multi-segment packet. Checks the boundary of WQEBB 2787 * and ring buffer wrapping, supports Software Parser, 2788 * Checksums and VLAN insertion Tx offload features. 2789 * 2790 * @param txq 2791 * Pointer to TX queue structure. 2792 * @param loc 2793 * Pointer to burst routine local context. 2794 * @param wqe 2795 * Pointer to WQE to fill with built Ethernet Segment. 2796 * @param vlan 2797 * Length of VLAN tag insertion if any. 2798 * @param inlen 2799 * Length of data to inline (VLAN included, if any). 2800 * @param tso 2801 * TSO flag, set mss field from the packet. 2802 * @param olx 2803 * Configured Tx offloads mask. It is fully defined at 2804 * compile time and may be used for optimization. 2805 * 2806 * @return 2807 * Pointer to the next Data Segment (aligned and 2808 * possible NOT wrapped around - caller should do 2809 * wrapping check on its own). 2810 */ 2811 static __rte_always_inline struct mlx5_wqe_dseg * 2812 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 2813 struct mlx5_txq_local *__rte_restrict loc, 2814 struct mlx5_wqe *__rte_restrict wqe, 2815 unsigned int vlan, 2816 unsigned int inlen, 2817 unsigned int tso, 2818 unsigned int olx) 2819 { 2820 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2821 uint32_t csum; 2822 uint8_t *pdst; 2823 unsigned int part, tlen = 0; 2824 2825 /* 2826 * Calculate and set check sum flags first, uint32_t field 2827 * in segment may be shared with Software Parser flags. 2828 */ 2829 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2830 if (tso) { 2831 csum <<= 24; 2832 csum |= loc->mbuf->tso_segsz; 2833 es->flags = rte_cpu_to_be_32(csum); 2834 } else { 2835 es->flags = rte_cpu_to_le_32(csum); 2836 } 2837 /* 2838 * Calculate and set Software Parser offsets and flags. 2839 * These flags a set for custom UDP and IP tunnel packets. 2840 */ 2841 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2842 /* Fill metadata field if needed. */ 2843 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2844 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2845 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2846 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2847 (sizeof(uint16_t) + 2848 sizeof(rte_v128u32_t)), 2849 "invalid Ethernet Segment data size"); 2850 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2851 (sizeof(uint16_t) + 2852 sizeof(struct rte_vlan_hdr) + 2853 2 * RTE_ETHER_ADDR_LEN), 2854 "invalid Ethernet Segment data size"); 2855 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2856 pdst = (uint8_t *)&es->inline_data; 2857 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2858 /* Implement VLAN tag insertion as part inline data. */ 2859 mlx5_tx_mseg_memcpy(pdst, loc, 2860 2 * RTE_ETHER_ADDR_LEN, 2861 2 * RTE_ETHER_ADDR_LEN, olx); 2862 pdst += 2 * RTE_ETHER_ADDR_LEN; 2863 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2864 ((RTE_ETHER_TYPE_VLAN << 16) | 2865 loc->mbuf->vlan_tci); 2866 pdst += sizeof(struct rte_vlan_hdr); 2867 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2868 } 2869 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2870 /* 2871 * The WQEBB space availability is checked by caller. 2872 * Here we should be aware of WQE ring buffer wraparound only. 2873 */ 2874 part = (uint8_t *)txq->wqes_end - pdst; 2875 part = RTE_MIN(part, inlen - tlen); 2876 MLX5_ASSERT(part); 2877 do { 2878 unsigned int copy; 2879 2880 /* 2881 * Copying may be interrupted inside the routine 2882 * if run into no inline hint flag. 2883 */ 2884 copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); 2885 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 2886 tlen += copy; 2887 if (likely(inlen <= tlen) || copy < part) { 2888 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 2889 pdst += copy; 2890 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2891 return (struct mlx5_wqe_dseg *)pdst; 2892 } 2893 pdst = (uint8_t *)txq->wqes; 2894 part = inlen - tlen; 2895 } while (true); 2896 } 2897 2898 /** 2899 * Build the Data Segment of pointer type. 2900 * 2901 * @param txq 2902 * Pointer to TX queue structure. 2903 * @param loc 2904 * Pointer to burst routine local context. 2905 * @param dseg 2906 * Pointer to WQE to fill with built Data Segment. 2907 * @param buf 2908 * Data buffer to point. 2909 * @param len 2910 * Data buffer length. 2911 * @param olx 2912 * Configured Tx offloads mask. It is fully defined at 2913 * compile time and may be used for optimization. 2914 */ 2915 static __rte_always_inline void 2916 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 2917 struct mlx5_txq_local *__rte_restrict loc, 2918 struct mlx5_wqe_dseg *__rte_restrict dseg, 2919 uint8_t *buf, 2920 unsigned int len, 2921 unsigned int olx __rte_unused) 2922 2923 { 2924 MLX5_ASSERT(len); 2925 dseg->bcount = rte_cpu_to_be_32(len); 2926 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2927 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2928 } 2929 2930 /** 2931 * Build the Data Segment of pointer type or inline 2932 * if data length is less than buffer in minimal 2933 * Data Segment size. 2934 * 2935 * @param txq 2936 * Pointer to TX queue structure. 2937 * @param loc 2938 * Pointer to burst routine local context. 2939 * @param dseg 2940 * Pointer to WQE to fill with built Data Segment. 2941 * @param buf 2942 * Data buffer to point. 2943 * @param len 2944 * Data buffer length. 2945 * @param olx 2946 * Configured Tx offloads mask. It is fully defined at 2947 * compile time and may be used for optimization. 2948 */ 2949 static __rte_always_inline void 2950 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 2951 struct mlx5_txq_local *__rte_restrict loc, 2952 struct mlx5_wqe_dseg *__rte_restrict dseg, 2953 uint8_t *buf, 2954 unsigned int len, 2955 unsigned int olx __rte_unused) 2956 2957 { 2958 uintptr_t dst, src; 2959 2960 MLX5_ASSERT(len); 2961 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2962 dseg->bcount = rte_cpu_to_be_32(len); 2963 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2964 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2965 2966 return; 2967 } 2968 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2969 /* Unrolled implementation of generic rte_memcpy. */ 2970 dst = (uintptr_t)&dseg->inline_data[0]; 2971 src = (uintptr_t)buf; 2972 if (len & 0x08) { 2973 #ifdef RTE_ARCH_STRICT_ALIGN 2974 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2975 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2976 dst += sizeof(uint32_t); 2977 src += sizeof(uint32_t); 2978 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2979 dst += sizeof(uint32_t); 2980 src += sizeof(uint32_t); 2981 #else 2982 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2983 dst += sizeof(uint64_t); 2984 src += sizeof(uint64_t); 2985 #endif 2986 } 2987 if (len & 0x04) { 2988 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2989 dst += sizeof(uint32_t); 2990 src += sizeof(uint32_t); 2991 } 2992 if (len & 0x02) { 2993 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2994 dst += sizeof(uint16_t); 2995 src += sizeof(uint16_t); 2996 } 2997 if (len & 0x01) 2998 *(uint8_t *)dst = *(uint8_t *)src; 2999 } 3000 3001 /** 3002 * Build the Data Segment of inlined data from single 3003 * segment packet, no VLAN insertion. 3004 * 3005 * @param txq 3006 * Pointer to TX queue structure. 3007 * @param loc 3008 * Pointer to burst routine local context. 3009 * @param dseg 3010 * Pointer to WQE to fill with built Data Segment. 3011 * @param buf 3012 * Data buffer to point. 3013 * @param len 3014 * Data buffer length. 3015 * @param olx 3016 * Configured Tx offloads mask. It is fully defined at 3017 * compile time and may be used for optimization. 3018 * 3019 * @return 3020 * Pointer to the next Data Segment after inlined data. 3021 * Ring buffer wraparound check is needed. We do not 3022 * do it here because it may not be needed for the 3023 * last packet in the eMPW session. 3024 */ 3025 static __rte_always_inline struct mlx5_wqe_dseg * 3026 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 3027 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 3028 struct mlx5_wqe_dseg *__rte_restrict dseg, 3029 uint8_t *buf, 3030 unsigned int len, 3031 unsigned int olx __rte_unused) 3032 { 3033 unsigned int part; 3034 uint8_t *pdst; 3035 3036 if (!MLX5_TXOFF_CONFIG(MPW)) { 3037 /* Store the descriptor byte counter for eMPW sessions. */ 3038 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 3039 pdst = &dseg->inline_data[0]; 3040 } else { 3041 /* The entire legacy MPW session counter is stored on close. */ 3042 pdst = (uint8_t *)dseg; 3043 } 3044 /* 3045 * The WQEBB space availability is checked by caller. 3046 * Here we should be aware of WQE ring buffer wraparound only. 3047 */ 3048 part = (uint8_t *)txq->wqes_end - pdst; 3049 part = RTE_MIN(part, len); 3050 do { 3051 rte_memcpy(pdst, buf, part); 3052 len -= part; 3053 if (likely(!len)) { 3054 pdst += part; 3055 if (!MLX5_TXOFF_CONFIG(MPW)) 3056 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 3057 /* Note: no final wraparound check here. */ 3058 return (struct mlx5_wqe_dseg *)pdst; 3059 } 3060 pdst = (uint8_t *)txq->wqes; 3061 buf += part; 3062 part = len; 3063 } while (true); 3064 } 3065 3066 /** 3067 * Build the Data Segment of inlined data from single 3068 * segment packet with VLAN insertion. 3069 * 3070 * @param txq 3071 * Pointer to TX queue structure. 3072 * @param loc 3073 * Pointer to burst routine local context. 3074 * @param dseg 3075 * Pointer to the dseg fill with built Data Segment. 3076 * @param buf 3077 * Data buffer to point. 3078 * @param len 3079 * Data buffer length. 3080 * @param olx 3081 * Configured Tx offloads mask. It is fully defined at 3082 * compile time and may be used for optimization. 3083 * 3084 * @return 3085 * Pointer to the next Data Segment after inlined data. 3086 * Ring buffer wraparound check is needed. 3087 */ 3088 static __rte_always_inline struct mlx5_wqe_dseg * 3089 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 3090 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 3091 struct mlx5_wqe_dseg *__rte_restrict dseg, 3092 uint8_t *buf, 3093 unsigned int len, 3094 unsigned int olx __rte_unused) 3095 3096 { 3097 unsigned int part; 3098 uint8_t *pdst; 3099 3100 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 3101 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 3102 (2 * RTE_ETHER_ADDR_LEN), 3103 "invalid Data Segment data size"); 3104 if (!MLX5_TXOFF_CONFIG(MPW)) { 3105 /* Store the descriptor byte counter for eMPW sessions. */ 3106 dseg->bcount = rte_cpu_to_be_32 3107 ((len + sizeof(struct rte_vlan_hdr)) | 3108 MLX5_ETH_WQE_DATA_INLINE); 3109 pdst = &dseg->inline_data[0]; 3110 } else { 3111 /* The entire legacy MPW session counter is stored on close. */ 3112 pdst = (uint8_t *)dseg; 3113 } 3114 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 3115 buf += MLX5_DSEG_MIN_INLINE_SIZE; 3116 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 3117 len -= MLX5_DSEG_MIN_INLINE_SIZE; 3118 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 3119 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 3120 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 3121 pdst = (uint8_t *)txq->wqes; 3122 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 3123 loc->mbuf->vlan_tci); 3124 pdst += sizeof(struct rte_vlan_hdr); 3125 /* 3126 * The WQEBB space availability is checked by caller. 3127 * Here we should be aware of WQE ring buffer wraparound only. 3128 */ 3129 part = (uint8_t *)txq->wqes_end - pdst; 3130 part = RTE_MIN(part, len); 3131 do { 3132 rte_memcpy(pdst, buf, part); 3133 len -= part; 3134 if (likely(!len)) { 3135 pdst += part; 3136 if (!MLX5_TXOFF_CONFIG(MPW)) 3137 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 3138 /* Note: no final wraparound check here. */ 3139 return (struct mlx5_wqe_dseg *)pdst; 3140 } 3141 pdst = (uint8_t *)txq->wqes; 3142 buf += part; 3143 part = len; 3144 } while (true); 3145 } 3146 3147 /** 3148 * Build the Ethernet Segment with optionally inlined data with 3149 * VLAN insertion and following Data Segments (if any) from 3150 * multi-segment packet. Used by ordinary send and TSO. 3151 * 3152 * @param txq 3153 * Pointer to TX queue structure. 3154 * @param loc 3155 * Pointer to burst routine local context. 3156 * @param wqe 3157 * Pointer to WQE to fill with built Ethernet/Data Segments. 3158 * @param vlan 3159 * Length of VLAN header to insert, 0 means no VLAN insertion. 3160 * @param inlen 3161 * Data length to inline. For TSO this parameter specifies 3162 * exact value, for ordinary send routine can be aligned by 3163 * caller to provide better WQE space saving and data buffer 3164 * start address alignment. This length includes VLAN header 3165 * being inserted. 3166 * @param tso 3167 * Zero means ordinary send, inlined data can be extended, 3168 * otherwise this is TSO, inlined data length is fixed. 3169 * @param olx 3170 * Configured Tx offloads mask. It is fully defined at 3171 * compile time and may be used for optimization. 3172 * 3173 * @return 3174 * Actual size of built WQE in segments. 3175 */ 3176 static __rte_always_inline unsigned int 3177 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 3178 struct mlx5_txq_local *__rte_restrict loc, 3179 struct mlx5_wqe *__rte_restrict wqe, 3180 unsigned int vlan, 3181 unsigned int inlen, 3182 unsigned int tso, 3183 unsigned int olx __rte_unused) 3184 { 3185 struct mlx5_wqe_dseg *__rte_restrict dseg; 3186 unsigned int ds; 3187 3188 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 3189 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 3190 loc->mbuf_off = 0; 3191 3192 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 3193 if (!loc->mbuf_nseg) 3194 goto dseg_done; 3195 /* 3196 * There are still some mbuf remaining, not inlined. 3197 * The first mbuf may be partially inlined and we 3198 * must process the possible non-zero data offset. 3199 */ 3200 if (loc->mbuf_off) { 3201 unsigned int dlen; 3202 uint8_t *dptr; 3203 3204 /* 3205 * Exhausted packets must be dropped before. 3206 * Non-zero offset means there are some data 3207 * remained in the packet. 3208 */ 3209 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 3210 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 3211 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 3212 loc->mbuf_off); 3213 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 3214 /* 3215 * Build the pointer/minimal data Data Segment. 3216 * Do ring buffer wrapping check in advance. 3217 */ 3218 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3219 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3220 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 3221 /* Store the mbuf to be freed on completion. */ 3222 MLX5_ASSERT(loc->elts_free); 3223 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3224 --loc->elts_free; 3225 ++dseg; 3226 if (--loc->mbuf_nseg == 0) 3227 goto dseg_done; 3228 loc->mbuf = loc->mbuf->next; 3229 loc->mbuf_off = 0; 3230 } 3231 do { 3232 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3233 struct rte_mbuf *mbuf; 3234 3235 /* Zero length segment found, just skip. */ 3236 mbuf = loc->mbuf; 3237 loc->mbuf = loc->mbuf->next; 3238 rte_pktmbuf_free_seg(mbuf); 3239 if (--loc->mbuf_nseg == 0) 3240 break; 3241 } else { 3242 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3243 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3244 mlx5_tx_dseg_iptr 3245 (txq, loc, dseg, 3246 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3247 rte_pktmbuf_data_len(loc->mbuf), olx); 3248 MLX5_ASSERT(loc->elts_free); 3249 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3250 --loc->elts_free; 3251 ++dseg; 3252 if (--loc->mbuf_nseg == 0) 3253 break; 3254 loc->mbuf = loc->mbuf->next; 3255 } 3256 } while (true); 3257 3258 dseg_done: 3259 /* Calculate actual segments used from the dseg pointer. */ 3260 if ((uintptr_t)wqe < (uintptr_t)dseg) 3261 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3262 else 3263 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3264 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3265 return ds; 3266 } 3267 3268 /** 3269 * The routine checks timestamp flag in the current packet, 3270 * and push WAIT WQE into the queue if scheduling is required. 3271 * 3272 * @param txq 3273 * Pointer to TX queue structure. 3274 * @param loc 3275 * Pointer to burst routine local context. 3276 * @param olx 3277 * Configured Tx offloads mask. It is fully defined at 3278 * compile time and may be used for optimization. 3279 * 3280 * @return 3281 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3282 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 3283 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 3284 * Local context variables partially updated. 3285 */ 3286 static __rte_always_inline enum mlx5_txcmp_code 3287 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 3288 struct mlx5_txq_local *restrict loc, 3289 unsigned int olx) 3290 { 3291 if (MLX5_TXOFF_CONFIG(TXPP) && 3292 loc->mbuf->ol_flags & txq->ts_mask) { 3293 struct mlx5_wqe *wqe; 3294 uint64_t ts; 3295 int32_t wci; 3296 3297 /* 3298 * Estimate the required space quickly and roughly. 3299 * We would like to ensure the packet can be pushed 3300 * to the queue and we won't get the orphan WAIT WQE. 3301 */ 3302 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 3303 loc->elts_free < NB_SEGS(loc->mbuf)) 3304 return MLX5_TXCMP_CODE_EXIT; 3305 /* Convert the timestamp into completion to wait. */ 3306 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 3307 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 3308 if (unlikely(wci < 0)) 3309 return MLX5_TXCMP_CODE_SINGLE; 3310 /* Build the WAIT WQE with specified completion. */ 3311 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3312 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 3313 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 3314 ++txq->wqe_ci; 3315 --loc->wqe_free; 3316 return MLX5_TXCMP_CODE_MULTI; 3317 } 3318 return MLX5_TXCMP_CODE_SINGLE; 3319 } 3320 3321 /** 3322 * Tx one packet function for multi-segment TSO. Supports all 3323 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3324 * sends one packet per WQE. 3325 * 3326 * This routine is responsible for storing processed mbuf 3327 * into elts ring buffer and update elts_head. 3328 * 3329 * @param txq 3330 * Pointer to TX queue structure. 3331 * @param loc 3332 * Pointer to burst routine local context. 3333 * @param olx 3334 * Configured Tx offloads mask. It is fully defined at 3335 * compile time and may be used for optimization. 3336 * 3337 * @return 3338 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3339 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3340 * Local context variables partially updated. 3341 */ 3342 static __rte_always_inline enum mlx5_txcmp_code 3343 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 3344 struct mlx5_txq_local *__rte_restrict loc, 3345 unsigned int olx) 3346 { 3347 struct mlx5_wqe *__rte_restrict wqe; 3348 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3349 3350 if (MLX5_TXOFF_CONFIG(TXPP)) { 3351 enum mlx5_txcmp_code wret; 3352 3353 /* Generate WAIT for scheduling if requested. */ 3354 wret = mlx5_tx_schedule_send(txq, loc, olx); 3355 if (wret == MLX5_TXCMP_CODE_EXIT) 3356 return MLX5_TXCMP_CODE_EXIT; 3357 if (wret == MLX5_TXCMP_CODE_ERROR) 3358 return MLX5_TXCMP_CODE_ERROR; 3359 } 3360 /* 3361 * Calculate data length to be inlined to estimate 3362 * the required space in WQE ring buffer. 3363 */ 3364 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3365 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3366 vlan = sizeof(struct rte_vlan_hdr); 3367 inlen = loc->mbuf->l2_len + vlan + 3368 loc->mbuf->l3_len + loc->mbuf->l4_len; 3369 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3370 return MLX5_TXCMP_CODE_ERROR; 3371 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3372 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3373 /* Packet must contain all TSO headers. */ 3374 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3375 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3376 inlen > (dlen + vlan))) 3377 return MLX5_TXCMP_CODE_ERROR; 3378 MLX5_ASSERT(inlen >= txq->inlen_mode); 3379 /* 3380 * Check whether there are enough free WQEBBs: 3381 * - Control Segment 3382 * - Ethernet Segment 3383 * - First Segment of inlined Ethernet data 3384 * - ... data continued ... 3385 * - Data Segments of pointer/min inline type 3386 */ 3387 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3388 MLX5_ESEG_MIN_INLINE_SIZE + 3389 MLX5_WSEG_SIZE + 3390 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3391 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3392 return MLX5_TXCMP_CODE_EXIT; 3393 /* Check for maximal WQE size. */ 3394 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3395 return MLX5_TXCMP_CODE_ERROR; 3396 #ifdef MLX5_PMD_SOFT_COUNTERS 3397 /* Update sent data bytes/packets counters. */ 3398 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3399 loc->mbuf->tso_segsz; 3400 /* 3401 * One will be added for mbuf itself 3402 * at the end of the mlx5_tx_burst from 3403 * loc->pkts_sent field. 3404 */ 3405 --ntcp; 3406 txq->stats.opackets += ntcp; 3407 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3408 #endif 3409 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3410 loc->wqe_last = wqe; 3411 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3412 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3413 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3414 txq->wqe_ci += (ds + 3) / 4; 3415 loc->wqe_free -= (ds + 3) / 4; 3416 return MLX5_TXCMP_CODE_MULTI; 3417 } 3418 3419 /** 3420 * Tx one packet function for multi-segment SEND. Supports all 3421 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3422 * sends one packet per WQE, without any data inlining in 3423 * Ethernet Segment. 3424 * 3425 * This routine is responsible for storing processed mbuf 3426 * into elts ring buffer and update elts_head. 3427 * 3428 * @param txq 3429 * Pointer to TX queue structure. 3430 * @param loc 3431 * Pointer to burst routine local context. 3432 * @param olx 3433 * Configured Tx offloads mask. It is fully defined at 3434 * compile time and may be used for optimization. 3435 * 3436 * @return 3437 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3438 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3439 * Local context variables partially updated. 3440 */ 3441 static __rte_always_inline enum mlx5_txcmp_code 3442 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 3443 struct mlx5_txq_local *__rte_restrict loc, 3444 unsigned int olx) 3445 { 3446 struct mlx5_wqe_dseg *__rte_restrict dseg; 3447 struct mlx5_wqe *__rte_restrict wqe; 3448 unsigned int ds, nseg; 3449 3450 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3451 if (MLX5_TXOFF_CONFIG(TXPP)) { 3452 enum mlx5_txcmp_code wret; 3453 3454 /* Generate WAIT for scheduling if requested. */ 3455 wret = mlx5_tx_schedule_send(txq, loc, olx); 3456 if (wret == MLX5_TXCMP_CODE_EXIT) 3457 return MLX5_TXCMP_CODE_EXIT; 3458 if (wret == MLX5_TXCMP_CODE_ERROR) 3459 return MLX5_TXCMP_CODE_ERROR; 3460 } 3461 /* 3462 * No inline at all, it means the CPU cycles saving 3463 * is prioritized at configuration, we should not 3464 * copy any packet data to WQE. 3465 */ 3466 nseg = NB_SEGS(loc->mbuf); 3467 ds = 2 + nseg; 3468 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3469 return MLX5_TXCMP_CODE_EXIT; 3470 /* Check for maximal WQE size. */ 3471 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3472 return MLX5_TXCMP_CODE_ERROR; 3473 /* 3474 * Some Tx offloads may cause an error if 3475 * packet is not long enough, check against 3476 * assumed minimal length. 3477 */ 3478 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3479 return MLX5_TXCMP_CODE_ERROR; 3480 #ifdef MLX5_PMD_SOFT_COUNTERS 3481 /* Update sent data bytes counter. */ 3482 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3483 if (MLX5_TXOFF_CONFIG(VLAN) && 3484 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3485 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3486 #endif 3487 /* 3488 * SEND WQE, one WQEBB: 3489 * - Control Segment, SEND opcode 3490 * - Ethernet Segment, optional VLAN, no inline 3491 * - Data Segments, pointer only type 3492 */ 3493 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3494 loc->wqe_last = wqe; 3495 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3496 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3497 dseg = &wqe->dseg[0]; 3498 do { 3499 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3500 struct rte_mbuf *mbuf; 3501 3502 /* 3503 * Zero length segment found, have to 3504 * correct total size of WQE in segments. 3505 * It is supposed to be rare occasion, so 3506 * in normal case (no zero length segments) 3507 * we avoid extra writing to the Control 3508 * Segment. 3509 */ 3510 --ds; 3511 wqe->cseg.sq_ds -= RTE_BE32(1); 3512 mbuf = loc->mbuf; 3513 loc->mbuf = mbuf->next; 3514 rte_pktmbuf_free_seg(mbuf); 3515 if (--nseg == 0) 3516 break; 3517 } else { 3518 mlx5_tx_dseg_ptr 3519 (txq, loc, dseg, 3520 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3521 rte_pktmbuf_data_len(loc->mbuf), olx); 3522 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3523 --loc->elts_free; 3524 if (--nseg == 0) 3525 break; 3526 ++dseg; 3527 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3528 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3529 loc->mbuf = loc->mbuf->next; 3530 } 3531 } while (true); 3532 txq->wqe_ci += (ds + 3) / 4; 3533 loc->wqe_free -= (ds + 3) / 4; 3534 return MLX5_TXCMP_CODE_MULTI; 3535 } 3536 3537 /** 3538 * Tx one packet function for multi-segment SEND. Supports all 3539 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3540 * sends one packet per WQE, with data inlining in 3541 * Ethernet Segment and minimal Data Segments. 3542 * 3543 * This routine is responsible for storing processed mbuf 3544 * into elts ring buffer and update elts_head. 3545 * 3546 * @param txq 3547 * Pointer to TX queue structure. 3548 * @param loc 3549 * Pointer to burst routine local context. 3550 * @param olx 3551 * Configured Tx offloads mask. It is fully defined at 3552 * compile time and may be used for optimization. 3553 * 3554 * @return 3555 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3556 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3557 * Local context variables partially updated. 3558 */ 3559 static __rte_always_inline enum mlx5_txcmp_code 3560 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 3561 struct mlx5_txq_local *__rte_restrict loc, 3562 unsigned int olx) 3563 { 3564 struct mlx5_wqe *__rte_restrict wqe; 3565 unsigned int ds, inlen, dlen, vlan = 0; 3566 3567 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3568 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3569 if (MLX5_TXOFF_CONFIG(TXPP)) { 3570 enum mlx5_txcmp_code wret; 3571 3572 /* Generate WAIT for scheduling if requested. */ 3573 wret = mlx5_tx_schedule_send(txq, loc, olx); 3574 if (wret == MLX5_TXCMP_CODE_EXIT) 3575 return MLX5_TXCMP_CODE_EXIT; 3576 if (wret == MLX5_TXCMP_CODE_ERROR) 3577 return MLX5_TXCMP_CODE_ERROR; 3578 } 3579 /* 3580 * First calculate data length to be inlined 3581 * to estimate the required space for WQE. 3582 */ 3583 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3584 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3585 vlan = sizeof(struct rte_vlan_hdr); 3586 inlen = dlen + vlan; 3587 /* Check against minimal length. */ 3588 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3589 return MLX5_TXCMP_CODE_ERROR; 3590 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3591 if (inlen > txq->inlen_send || 3592 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 3593 struct rte_mbuf *mbuf; 3594 unsigned int nxlen; 3595 uintptr_t start; 3596 3597 /* 3598 * Packet length exceeds the allowed inline 3599 * data length, check whether the minimal 3600 * inlining is required. 3601 */ 3602 if (txq->inlen_mode) { 3603 MLX5_ASSERT(txq->inlen_mode >= 3604 MLX5_ESEG_MIN_INLINE_SIZE); 3605 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 3606 inlen = txq->inlen_mode; 3607 } else { 3608 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 3609 !vlan || txq->vlan_en) { 3610 /* 3611 * VLAN insertion will be done inside by HW. 3612 * It is not utmost effective - VLAN flag is 3613 * checked twice, but we should proceed the 3614 * inlining length correctly and take into 3615 * account the VLAN header being inserted. 3616 */ 3617 return mlx5_tx_packet_multi_send 3618 (txq, loc, olx); 3619 } 3620 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3621 } 3622 /* 3623 * Now we know the minimal amount of data is requested 3624 * to inline. Check whether we should inline the buffers 3625 * from the chain beginning to eliminate some mbufs. 3626 */ 3627 mbuf = loc->mbuf; 3628 nxlen = rte_pktmbuf_data_len(mbuf); 3629 if (unlikely(nxlen <= txq->inlen_send)) { 3630 /* We can inline first mbuf at least. */ 3631 if (nxlen < inlen) { 3632 unsigned int smlen; 3633 3634 /* Scan mbufs till inlen filled. */ 3635 do { 3636 smlen = nxlen; 3637 mbuf = NEXT(mbuf); 3638 MLX5_ASSERT(mbuf); 3639 nxlen = rte_pktmbuf_data_len(mbuf); 3640 nxlen += smlen; 3641 } while (unlikely(nxlen < inlen)); 3642 if (unlikely(nxlen > txq->inlen_send)) { 3643 /* We cannot inline entire mbuf. */ 3644 smlen = inlen - smlen; 3645 start = rte_pktmbuf_mtod_offset 3646 (mbuf, uintptr_t, smlen); 3647 goto do_align; 3648 } 3649 } 3650 do { 3651 inlen = nxlen; 3652 mbuf = NEXT(mbuf); 3653 /* There should be not end of packet. */ 3654 MLX5_ASSERT(mbuf); 3655 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3656 } while (unlikely(nxlen < txq->inlen_send)); 3657 } 3658 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3659 /* 3660 * Check whether we can do inline to align start 3661 * address of data buffer to cacheline. 3662 */ 3663 do_align: 3664 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3665 if (unlikely(start)) { 3666 start += inlen; 3667 if (start <= txq->inlen_send) 3668 inlen = start; 3669 } 3670 } 3671 /* 3672 * Check whether there are enough free WQEBBs: 3673 * - Control Segment 3674 * - Ethernet Segment 3675 * - First Segment of inlined Ethernet data 3676 * - ... data continued ... 3677 * - Data Segments of pointer/min inline type 3678 * 3679 * Estimate the number of Data Segments conservatively, 3680 * supposing no any mbufs is being freed during inlining. 3681 */ 3682 MLX5_ASSERT(inlen <= txq->inlen_send); 3683 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3684 MLX5_ESEG_MIN_INLINE_SIZE + 3685 MLX5_WSEG_SIZE + 3686 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3687 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3688 return MLX5_TXCMP_CODE_EXIT; 3689 /* Check for maximal WQE size. */ 3690 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3691 return MLX5_TXCMP_CODE_ERROR; 3692 #ifdef MLX5_PMD_SOFT_COUNTERS 3693 /* Update sent data bytes/packets counters. */ 3694 txq->stats.obytes += dlen + vlan; 3695 #endif 3696 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3697 loc->wqe_last = wqe; 3698 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3699 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3700 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3701 txq->wqe_ci += (ds + 3) / 4; 3702 loc->wqe_free -= (ds + 3) / 4; 3703 return MLX5_TXCMP_CODE_MULTI; 3704 } 3705 3706 /** 3707 * Tx burst function for multi-segment packets. Supports all 3708 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3709 * sends one packet per WQE. Function stops sending if it 3710 * encounters the single-segment packet. 3711 * 3712 * This routine is responsible for storing processed mbuf 3713 * into elts ring buffer and update elts_head. 3714 * 3715 * @param txq 3716 * Pointer to TX queue structure. 3717 * @param[in] pkts 3718 * Packets to transmit. 3719 * @param pkts_n 3720 * Number of packets in array. 3721 * @param loc 3722 * Pointer to burst routine local context. 3723 * @param olx 3724 * Configured Tx offloads mask. It is fully defined at 3725 * compile time and may be used for optimization. 3726 * 3727 * @return 3728 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3729 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3730 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3731 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3732 * Local context variables updated. 3733 */ 3734 static __rte_always_inline enum mlx5_txcmp_code 3735 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 3736 struct rte_mbuf **__rte_restrict pkts, 3737 unsigned int pkts_n, 3738 struct mlx5_txq_local *__rte_restrict loc, 3739 unsigned int olx) 3740 { 3741 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3742 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3743 pkts += loc->pkts_sent + 1; 3744 pkts_n -= loc->pkts_sent; 3745 for (;;) { 3746 enum mlx5_txcmp_code ret; 3747 3748 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3749 /* 3750 * Estimate the number of free elts quickly but 3751 * conservatively. Some segment may be fully inlined 3752 * and freed, ignore this here - precise estimation 3753 * is costly. 3754 */ 3755 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3756 return MLX5_TXCMP_CODE_EXIT; 3757 if (MLX5_TXOFF_CONFIG(TSO) && 3758 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3759 /* Proceed with multi-segment TSO. */ 3760 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3761 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3762 /* Proceed with multi-segment SEND with inlining. */ 3763 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3764 } else { 3765 /* Proceed with multi-segment SEND w/o inlining. */ 3766 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3767 } 3768 if (ret == MLX5_TXCMP_CODE_EXIT) 3769 return MLX5_TXCMP_CODE_EXIT; 3770 if (ret == MLX5_TXCMP_CODE_ERROR) 3771 return MLX5_TXCMP_CODE_ERROR; 3772 /* WQE is built, go to the next packet. */ 3773 ++loc->pkts_sent; 3774 --pkts_n; 3775 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3776 return MLX5_TXCMP_CODE_EXIT; 3777 loc->mbuf = *pkts++; 3778 if (pkts_n > 1) 3779 rte_prefetch0(*pkts); 3780 if (likely(NB_SEGS(loc->mbuf) > 1)) 3781 continue; 3782 /* Here ends the series of multi-segment packets. */ 3783 if (MLX5_TXOFF_CONFIG(TSO) && 3784 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3785 return MLX5_TXCMP_CODE_TSO; 3786 return MLX5_TXCMP_CODE_SINGLE; 3787 } 3788 MLX5_ASSERT(false); 3789 } 3790 3791 /** 3792 * Tx burst function for single-segment packets with TSO. 3793 * Supports all types of Tx offloads, except multi-packets. 3794 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3795 * Function stops sending if it encounters the multi-segment 3796 * packet or packet without TSO requested. 3797 * 3798 * The routine is responsible for storing processed mbuf 3799 * into elts ring buffer and update elts_head if inline 3800 * offloads is requested due to possible early freeing 3801 * of the inlined mbufs (can not store pkts array in elts 3802 * as a batch). 3803 * 3804 * @param txq 3805 * Pointer to TX queue structure. 3806 * @param[in] pkts 3807 * Packets to transmit. 3808 * @param pkts_n 3809 * Number of packets in array. 3810 * @param loc 3811 * Pointer to burst routine local context. 3812 * @param olx 3813 * Configured Tx offloads mask. It is fully defined at 3814 * compile time and may be used for optimization. 3815 * 3816 * @return 3817 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3818 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3819 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3820 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3821 * Local context variables updated. 3822 */ 3823 static __rte_always_inline enum mlx5_txcmp_code 3824 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 3825 struct rte_mbuf **__rte_restrict pkts, 3826 unsigned int pkts_n, 3827 struct mlx5_txq_local *__rte_restrict loc, 3828 unsigned int olx) 3829 { 3830 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3831 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3832 pkts += loc->pkts_sent + 1; 3833 pkts_n -= loc->pkts_sent; 3834 for (;;) { 3835 struct mlx5_wqe_dseg *__rte_restrict dseg; 3836 struct mlx5_wqe *__rte_restrict wqe; 3837 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3838 uint8_t *dptr; 3839 3840 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3841 if (MLX5_TXOFF_CONFIG(TXPP)) { 3842 enum mlx5_txcmp_code wret; 3843 3844 /* Generate WAIT for scheduling if requested. */ 3845 wret = mlx5_tx_schedule_send(txq, loc, olx); 3846 if (wret == MLX5_TXCMP_CODE_EXIT) 3847 return MLX5_TXCMP_CODE_EXIT; 3848 if (wret == MLX5_TXCMP_CODE_ERROR) 3849 return MLX5_TXCMP_CODE_ERROR; 3850 } 3851 dlen = rte_pktmbuf_data_len(loc->mbuf); 3852 if (MLX5_TXOFF_CONFIG(VLAN) && 3853 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3854 vlan = sizeof(struct rte_vlan_hdr); 3855 } 3856 /* 3857 * First calculate the WQE size to check 3858 * whether we have enough space in ring buffer. 3859 */ 3860 hlen = loc->mbuf->l2_len + vlan + 3861 loc->mbuf->l3_len + loc->mbuf->l4_len; 3862 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3863 return MLX5_TXCMP_CODE_ERROR; 3864 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3865 hlen += loc->mbuf->outer_l2_len + 3866 loc->mbuf->outer_l3_len; 3867 /* Segment must contain all TSO headers. */ 3868 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3869 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3870 hlen > (dlen + vlan))) 3871 return MLX5_TXCMP_CODE_ERROR; 3872 /* 3873 * Check whether there are enough free WQEBBs: 3874 * - Control Segment 3875 * - Ethernet Segment 3876 * - First Segment of inlined Ethernet data 3877 * - ... data continued ... 3878 * - Finishing Data Segment of pointer type 3879 */ 3880 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3881 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3882 if (loc->wqe_free < ((ds + 3) / 4)) 3883 return MLX5_TXCMP_CODE_EXIT; 3884 #ifdef MLX5_PMD_SOFT_COUNTERS 3885 /* Update sent data bytes/packets counters. */ 3886 ntcp = (dlen + vlan - hlen + 3887 loc->mbuf->tso_segsz - 1) / 3888 loc->mbuf->tso_segsz; 3889 /* 3890 * One will be added for mbuf itself at the end 3891 * of the mlx5_tx_burst from loc->pkts_sent field. 3892 */ 3893 --ntcp; 3894 txq->stats.opackets += ntcp; 3895 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3896 #endif 3897 /* 3898 * Build the TSO WQE: 3899 * - Control Segment 3900 * - Ethernet Segment with hlen bytes inlined 3901 * - Data Segment of pointer type 3902 */ 3903 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3904 loc->wqe_last = wqe; 3905 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3906 MLX5_OPCODE_TSO, olx); 3907 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3908 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3909 dlen -= hlen - vlan; 3910 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3911 /* 3912 * WQE is built, update the loop parameters 3913 * and go to the next packet. 3914 */ 3915 txq->wqe_ci += (ds + 3) / 4; 3916 loc->wqe_free -= (ds + 3) / 4; 3917 if (MLX5_TXOFF_CONFIG(INLINE)) 3918 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3919 --loc->elts_free; 3920 ++loc->pkts_sent; 3921 --pkts_n; 3922 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3923 return MLX5_TXCMP_CODE_EXIT; 3924 loc->mbuf = *pkts++; 3925 if (pkts_n > 1) 3926 rte_prefetch0(*pkts); 3927 if (MLX5_TXOFF_CONFIG(MULTI) && 3928 unlikely(NB_SEGS(loc->mbuf) > 1)) 3929 return MLX5_TXCMP_CODE_MULTI; 3930 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3931 return MLX5_TXCMP_CODE_SINGLE; 3932 /* Continue with the next TSO packet. */ 3933 } 3934 MLX5_ASSERT(false); 3935 } 3936 3937 /** 3938 * Analyze the packet and select the best method to send. 3939 * 3940 * @param txq 3941 * Pointer to TX queue structure. 3942 * @param loc 3943 * Pointer to burst routine local context. 3944 * @param olx 3945 * Configured Tx offloads mask. It is fully defined at 3946 * compile time and may be used for optimization. 3947 * @param newp 3948 * The predefined flag whether do complete check for 3949 * multi-segment packets and TSO. 3950 * 3951 * @return 3952 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3953 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3954 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3955 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3956 */ 3957 static __rte_always_inline enum mlx5_txcmp_code 3958 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 3959 struct mlx5_txq_local *__rte_restrict loc, 3960 unsigned int olx, 3961 bool newp) 3962 { 3963 /* Check for multi-segment packet. */ 3964 if (newp && 3965 MLX5_TXOFF_CONFIG(MULTI) && 3966 unlikely(NB_SEGS(loc->mbuf) > 1)) 3967 return MLX5_TXCMP_CODE_MULTI; 3968 /* Check for TSO packet. */ 3969 if (newp && 3970 MLX5_TXOFF_CONFIG(TSO) && 3971 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3972 return MLX5_TXCMP_CODE_TSO; 3973 /* Check if eMPW is enabled at all. */ 3974 if (!MLX5_TXOFF_CONFIG(EMPW)) 3975 return MLX5_TXCMP_CODE_SINGLE; 3976 /* Check if eMPW can be engaged. */ 3977 if (MLX5_TXOFF_CONFIG(VLAN) && 3978 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3979 (!MLX5_TXOFF_CONFIG(INLINE) || 3980 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3981 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3982 /* 3983 * eMPW does not support VLAN insertion offload, 3984 * we have to inline the entire packet but 3985 * packet is too long for inlining. 3986 */ 3987 return MLX5_TXCMP_CODE_SINGLE; 3988 } 3989 return MLX5_TXCMP_CODE_EMPW; 3990 } 3991 3992 /** 3993 * Check the next packet attributes to match with the eMPW batch ones. 3994 * In addition, for legacy MPW the packet length is checked either. 3995 * 3996 * @param txq 3997 * Pointer to TX queue structure. 3998 * @param es 3999 * Pointer to Ethernet Segment of eMPW batch. 4000 * @param loc 4001 * Pointer to burst routine local context. 4002 * @param dlen 4003 * Length of previous packet in MPW descriptor. 4004 * @param olx 4005 * Configured Tx offloads mask. It is fully defined at 4006 * compile time and may be used for optimization. 4007 * 4008 * @return 4009 * true - packet match with eMPW batch attributes. 4010 * false - no match, eMPW should be restarted. 4011 */ 4012 static __rte_always_inline bool 4013 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 4014 struct mlx5_wqe_eseg *__rte_restrict es, 4015 struct mlx5_txq_local *__rte_restrict loc, 4016 uint32_t dlen, 4017 unsigned int olx) 4018 { 4019 uint8_t swp_flags = 0; 4020 4021 /* Compare the checksum flags, if any. */ 4022 if (MLX5_TXOFF_CONFIG(CSUM) && 4023 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 4024 return false; 4025 /* Compare the Software Parser offsets and flags. */ 4026 if (MLX5_TXOFF_CONFIG(SWP) && 4027 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 4028 es->swp_flags != swp_flags)) 4029 return false; 4030 /* Fill metadata field if needed. */ 4031 if (MLX5_TXOFF_CONFIG(METADATA) && 4032 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 4033 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 4034 return false; 4035 /* Legacy MPW can send packets with the same lengt only. */ 4036 if (MLX5_TXOFF_CONFIG(MPW) && 4037 dlen != rte_pktmbuf_data_len(loc->mbuf)) 4038 return false; 4039 /* There must be no VLAN packets in eMPW loop. */ 4040 if (MLX5_TXOFF_CONFIG(VLAN)) 4041 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 4042 /* Check if the scheduling is requested. */ 4043 if (MLX5_TXOFF_CONFIG(TXPP) && 4044 loc->mbuf->ol_flags & txq->ts_mask) 4045 return false; 4046 return true; 4047 } 4048 4049 /* 4050 * Update send loop variables and WQE for eMPW loop 4051 * without data inlining. Number of Data Segments is 4052 * equal to the number of sent packets. 4053 * 4054 * @param txq 4055 * Pointer to TX queue structure. 4056 * @param loc 4057 * Pointer to burst routine local context. 4058 * @param ds 4059 * Number of packets/Data Segments/Packets. 4060 * @param slen 4061 * Accumulated statistics, bytes sent 4062 * @param olx 4063 * Configured Tx offloads mask. It is fully defined at 4064 * compile time and may be used for optimization. 4065 * 4066 * @return 4067 * true - packet match with eMPW batch attributes. 4068 * false - no match, eMPW should be restarted. 4069 */ 4070 static __rte_always_inline void 4071 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 4072 struct mlx5_txq_local *__rte_restrict loc, 4073 unsigned int ds, 4074 unsigned int slen, 4075 unsigned int olx __rte_unused) 4076 { 4077 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4078 #ifdef MLX5_PMD_SOFT_COUNTERS 4079 /* Update sent data bytes counter. */ 4080 txq->stats.obytes += slen; 4081 #else 4082 (void)slen; 4083 #endif 4084 loc->elts_free -= ds; 4085 loc->pkts_sent += ds; 4086 ds += 2; 4087 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 4088 txq->wqe_ci += (ds + 3) / 4; 4089 loc->wqe_free -= (ds + 3) / 4; 4090 } 4091 4092 /* 4093 * Update send loop variables and WQE for eMPW loop 4094 * with data inlining. Gets the size of pushed descriptors 4095 * and data to the WQE. 4096 * 4097 * @param txq 4098 * Pointer to TX queue structure. 4099 * @param loc 4100 * Pointer to burst routine local context. 4101 * @param len 4102 * Total size of descriptor/data in bytes. 4103 * @param slen 4104 * Accumulated statistics, data bytes sent. 4105 * @param wqem 4106 * The base WQE for the eMPW/MPW descriptor. 4107 * @param olx 4108 * Configured Tx offloads mask. It is fully defined at 4109 * compile time and may be used for optimization. 4110 * 4111 * @return 4112 * true - packet match with eMPW batch attributes. 4113 * false - no match, eMPW should be restarted. 4114 */ 4115 static __rte_always_inline void 4116 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 4117 struct mlx5_txq_local *__rte_restrict loc, 4118 unsigned int len, 4119 unsigned int slen, 4120 struct mlx5_wqe *__rte_restrict wqem, 4121 unsigned int olx __rte_unused) 4122 { 4123 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 4124 4125 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4126 #ifdef MLX5_PMD_SOFT_COUNTERS 4127 /* Update sent data bytes counter. */ 4128 txq->stats.obytes += slen; 4129 #else 4130 (void)slen; 4131 #endif 4132 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 4133 /* 4134 * If the legacy MPW session contains the inline packets 4135 * we should set the only inline data segment length 4136 * and align the total length to the segment size. 4137 */ 4138 MLX5_ASSERT(len > sizeof(dseg->bcount)); 4139 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 4140 MLX5_ETH_WQE_DATA_INLINE); 4141 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 4142 } else { 4143 /* 4144 * The session is not legacy MPW or contains the 4145 * data buffer pointer segments. 4146 */ 4147 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 4148 len = len / MLX5_WSEG_SIZE + 2; 4149 } 4150 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 4151 txq->wqe_ci += (len + 3) / 4; 4152 loc->wqe_free -= (len + 3) / 4; 4153 loc->wqe_last = wqem; 4154 } 4155 4156 /** 4157 * The set of Tx burst functions for single-segment packets 4158 * without TSO and with Multi-Packet Writing feature support. 4159 * Supports all types of Tx offloads, except multi-packets 4160 * and TSO. 4161 * 4162 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 4163 * as many packet per WQE as it can. If eMPW is not configured 4164 * or packet can not be sent with eMPW (VLAN insertion) the 4165 * ordinary SEND opcode is used and only one packet placed 4166 * in WQE. 4167 * 4168 * Functions stop sending if it encounters the multi-segment 4169 * packet or packet with TSO requested. 4170 * 4171 * The routines are responsible for storing processed mbuf 4172 * into elts ring buffer and update elts_head if inlining 4173 * offload is requested. Otherwise the copying mbufs to elts 4174 * can be postponed and completed at the end of burst routine. 4175 * 4176 * @param txq 4177 * Pointer to TX queue structure. 4178 * @param[in] pkts 4179 * Packets to transmit. 4180 * @param pkts_n 4181 * Number of packets in array. 4182 * @param loc 4183 * Pointer to burst routine local context. 4184 * @param olx 4185 * Configured Tx offloads mask. It is fully defined at 4186 * compile time and may be used for optimization. 4187 * 4188 * @return 4189 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 4190 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 4191 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 4192 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 4193 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 4194 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 4195 * 4196 * Local context variables updated. 4197 * 4198 * 4199 * The routine sends packets with MLX5_OPCODE_EMPW 4200 * without inlining, this is dedicated optimized branch. 4201 * No VLAN insertion is supported. 4202 */ 4203 static __rte_always_inline enum mlx5_txcmp_code 4204 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 4205 struct rte_mbuf **__rte_restrict pkts, 4206 unsigned int pkts_n, 4207 struct mlx5_txq_local *__rte_restrict loc, 4208 unsigned int olx) 4209 { 4210 /* 4211 * Subroutine is the part of mlx5_tx_burst_single() 4212 * and sends single-segment packet with eMPW opcode 4213 * without data inlining. 4214 */ 4215 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4216 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4217 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4218 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4219 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4220 pkts += loc->pkts_sent + 1; 4221 pkts_n -= loc->pkts_sent; 4222 for (;;) { 4223 struct mlx5_wqe_dseg *__rte_restrict dseg; 4224 struct mlx5_wqe_eseg *__rte_restrict eseg; 4225 enum mlx5_txcmp_code ret; 4226 unsigned int part, loop; 4227 unsigned int slen = 0; 4228 4229 next_empw: 4230 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4231 if (MLX5_TXOFF_CONFIG(TXPP)) { 4232 enum mlx5_txcmp_code wret; 4233 4234 /* Generate WAIT for scheduling if requested. */ 4235 wret = mlx5_tx_schedule_send(txq, loc, olx); 4236 if (wret == MLX5_TXCMP_CODE_EXIT) 4237 return MLX5_TXCMP_CODE_EXIT; 4238 if (wret == MLX5_TXCMP_CODE_ERROR) 4239 return MLX5_TXCMP_CODE_ERROR; 4240 } 4241 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4242 MLX5_MPW_MAX_PACKETS : 4243 MLX5_EMPW_MAX_PACKETS); 4244 if (unlikely(loc->elts_free < part)) { 4245 /* We have no enough elts to save all mbufs. */ 4246 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 4247 return MLX5_TXCMP_CODE_EXIT; 4248 /* But we still able to send at least minimal eMPW. */ 4249 part = loc->elts_free; 4250 } 4251 /* Check whether we have enough WQEs */ 4252 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 4253 if (unlikely(loc->wqe_free < 4254 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4255 return MLX5_TXCMP_CODE_EXIT; 4256 part = (loc->wqe_free * 4) - 2; 4257 } 4258 if (likely(part > 1)) 4259 rte_prefetch0(*pkts); 4260 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4261 /* 4262 * Build eMPW title WQEBB: 4263 * - Control Segment, eMPW opcode 4264 * - Ethernet Segment, no inline 4265 */ 4266 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 4267 MLX5_OPCODE_ENHANCED_MPSW, olx); 4268 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4269 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4270 eseg = &loc->wqe_last->eseg; 4271 dseg = &loc->wqe_last->dseg[0]; 4272 loop = part; 4273 /* Store the packet length for legacy MPW. */ 4274 if (MLX5_TXOFF_CONFIG(MPW)) 4275 eseg->mss = rte_cpu_to_be_16 4276 (rte_pktmbuf_data_len(loc->mbuf)); 4277 for (;;) { 4278 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4279 #ifdef MLX5_PMD_SOFT_COUNTERS 4280 /* Update sent data bytes counter. */ 4281 slen += dlen; 4282 #endif 4283 mlx5_tx_dseg_ptr 4284 (txq, loc, dseg, 4285 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4286 dlen, olx); 4287 if (unlikely(--loop == 0)) 4288 break; 4289 loc->mbuf = *pkts++; 4290 if (likely(loop > 1)) 4291 rte_prefetch0(*pkts); 4292 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4293 /* 4294 * Unroll the completion code to avoid 4295 * returning variable value - it results in 4296 * unoptimized sequent checking in caller. 4297 */ 4298 if (ret == MLX5_TXCMP_CODE_MULTI) { 4299 part -= loop; 4300 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4301 if (unlikely(!loc->elts_free || 4302 !loc->wqe_free)) 4303 return MLX5_TXCMP_CODE_EXIT; 4304 return MLX5_TXCMP_CODE_MULTI; 4305 } 4306 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4307 if (ret == MLX5_TXCMP_CODE_TSO) { 4308 part -= loop; 4309 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4310 if (unlikely(!loc->elts_free || 4311 !loc->wqe_free)) 4312 return MLX5_TXCMP_CODE_EXIT; 4313 return MLX5_TXCMP_CODE_TSO; 4314 } 4315 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4316 part -= loop; 4317 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4318 if (unlikely(!loc->elts_free || 4319 !loc->wqe_free)) 4320 return MLX5_TXCMP_CODE_EXIT; 4321 return MLX5_TXCMP_CODE_SINGLE; 4322 } 4323 if (ret != MLX5_TXCMP_CODE_EMPW) { 4324 MLX5_ASSERT(false); 4325 part -= loop; 4326 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4327 return MLX5_TXCMP_CODE_ERROR; 4328 } 4329 /* 4330 * Check whether packet parameters coincide 4331 * within assumed eMPW batch: 4332 * - check sum settings 4333 * - metadata value 4334 * - software parser settings 4335 * - packets length (legacy MPW only) 4336 * - scheduling is not required 4337 */ 4338 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 4339 MLX5_ASSERT(loop); 4340 part -= loop; 4341 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4342 if (unlikely(!loc->elts_free || 4343 !loc->wqe_free)) 4344 return MLX5_TXCMP_CODE_EXIT; 4345 pkts_n -= part; 4346 goto next_empw; 4347 } 4348 /* Packet attributes match, continue the same eMPW. */ 4349 ++dseg; 4350 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4351 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4352 } 4353 /* eMPW is built successfully, update loop parameters. */ 4354 MLX5_ASSERT(!loop); 4355 MLX5_ASSERT(pkts_n >= part); 4356 #ifdef MLX5_PMD_SOFT_COUNTERS 4357 /* Update sent data bytes counter. */ 4358 txq->stats.obytes += slen; 4359 #endif 4360 loc->elts_free -= part; 4361 loc->pkts_sent += part; 4362 txq->wqe_ci += (2 + part + 3) / 4; 4363 loc->wqe_free -= (2 + part + 3) / 4; 4364 pkts_n -= part; 4365 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4366 return MLX5_TXCMP_CODE_EXIT; 4367 loc->mbuf = *pkts++; 4368 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4369 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4370 return ret; 4371 /* Continue sending eMPW batches. */ 4372 } 4373 MLX5_ASSERT(false); 4374 } 4375 4376 /** 4377 * The routine sends packets with MLX5_OPCODE_EMPW 4378 * with inlining, optionally supports VLAN insertion. 4379 */ 4380 static __rte_always_inline enum mlx5_txcmp_code 4381 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 4382 struct rte_mbuf **__rte_restrict pkts, 4383 unsigned int pkts_n, 4384 struct mlx5_txq_local *__rte_restrict loc, 4385 unsigned int olx) 4386 { 4387 /* 4388 * Subroutine is the part of mlx5_tx_burst_single() 4389 * and sends single-segment packet with eMPW opcode 4390 * with data inlining. 4391 */ 4392 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4393 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4394 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4395 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4396 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4397 pkts += loc->pkts_sent + 1; 4398 pkts_n -= loc->pkts_sent; 4399 for (;;) { 4400 struct mlx5_wqe_dseg *__rte_restrict dseg; 4401 struct mlx5_wqe *__rte_restrict wqem; 4402 enum mlx5_txcmp_code ret; 4403 unsigned int room, part, nlim; 4404 unsigned int slen = 0; 4405 4406 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4407 if (MLX5_TXOFF_CONFIG(TXPP)) { 4408 enum mlx5_txcmp_code wret; 4409 4410 /* Generate WAIT for scheduling if requested. */ 4411 wret = mlx5_tx_schedule_send(txq, loc, olx); 4412 if (wret == MLX5_TXCMP_CODE_EXIT) 4413 return MLX5_TXCMP_CODE_EXIT; 4414 if (wret == MLX5_TXCMP_CODE_ERROR) 4415 return MLX5_TXCMP_CODE_ERROR; 4416 } 4417 /* 4418 * Limits the amount of packets in one WQE 4419 * to improve CQE latency generation. 4420 */ 4421 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4422 MLX5_MPW_INLINE_MAX_PACKETS : 4423 MLX5_EMPW_MAX_PACKETS); 4424 /* Check whether we have minimal amount WQEs */ 4425 if (unlikely(loc->wqe_free < 4426 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4427 return MLX5_TXCMP_CODE_EXIT; 4428 if (likely(pkts_n > 1)) 4429 rte_prefetch0(*pkts); 4430 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4431 /* 4432 * Build eMPW title WQEBB: 4433 * - Control Segment, eMPW opcode, zero DS 4434 * - Ethernet Segment, no inline 4435 */ 4436 mlx5_tx_cseg_init(txq, loc, wqem, 0, 4437 MLX5_OPCODE_ENHANCED_MPSW, olx); 4438 mlx5_tx_eseg_none(txq, loc, wqem, 4439 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4440 dseg = &wqem->dseg[0]; 4441 /* Store the packet length for legacy MPW. */ 4442 if (MLX5_TXOFF_CONFIG(MPW)) 4443 wqem->eseg.mss = rte_cpu_to_be_16 4444 (rte_pktmbuf_data_len(loc->mbuf)); 4445 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4446 loc->wqe_free) * MLX5_WQE_SIZE - 4447 MLX5_WQE_CSEG_SIZE - 4448 MLX5_WQE_ESEG_SIZE; 4449 /* Limit the room for legacy MPW sessions for performance. */ 4450 if (MLX5_TXOFF_CONFIG(MPW)) 4451 room = RTE_MIN(room, 4452 RTE_MAX(txq->inlen_empw + 4453 sizeof(dseg->bcount) + 4454 (MLX5_TXOFF_CONFIG(VLAN) ? 4455 sizeof(struct rte_vlan_hdr) : 0), 4456 MLX5_MPW_INLINE_MAX_PACKETS * 4457 MLX5_WQE_DSEG_SIZE)); 4458 /* Build WQE till we have space, packets and resources. */ 4459 part = room; 4460 for (;;) { 4461 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4462 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4463 unsigned int tlen; 4464 4465 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4466 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 4467 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4468 /* 4469 * Some Tx offloads may cause an error if 4470 * packet is not long enough, check against 4471 * assumed minimal length. 4472 */ 4473 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4474 part -= room; 4475 if (unlikely(!part)) 4476 return MLX5_TXCMP_CODE_ERROR; 4477 /* 4478 * We have some successfully built 4479 * packet Data Segments to send. 4480 */ 4481 mlx5_tx_idone_empw(txq, loc, part, 4482 slen, wqem, olx); 4483 return MLX5_TXCMP_CODE_ERROR; 4484 } 4485 /* Inline or not inline - that's the Question. */ 4486 if (dlen > txq->inlen_empw || 4487 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 4488 goto pointer_empw; 4489 if (MLX5_TXOFF_CONFIG(MPW)) { 4490 if (dlen > txq->inlen_send) 4491 goto pointer_empw; 4492 tlen = dlen; 4493 if (part == room) { 4494 /* Open new inline MPW session. */ 4495 tlen += sizeof(dseg->bcount); 4496 dseg->bcount = RTE_BE32(0); 4497 dseg = RTE_PTR_ADD 4498 (dseg, sizeof(dseg->bcount)); 4499 } else { 4500 /* 4501 * No pointer and inline descriptor 4502 * intermix for legacy MPW sessions. 4503 */ 4504 if (wqem->dseg[0].bcount) 4505 break; 4506 } 4507 } else { 4508 tlen = sizeof(dseg->bcount) + dlen; 4509 } 4510 /* Inline entire packet, optional VLAN insertion. */ 4511 if (MLX5_TXOFF_CONFIG(VLAN) && 4512 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4513 /* 4514 * The packet length must be checked in 4515 * mlx5_tx_able_to_empw() and packet 4516 * fits into inline length guaranteed. 4517 */ 4518 MLX5_ASSERT((dlen + 4519 sizeof(struct rte_vlan_hdr)) <= 4520 txq->inlen_empw); 4521 tlen += sizeof(struct rte_vlan_hdr); 4522 if (room < tlen) 4523 break; 4524 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4525 dptr, dlen, olx); 4526 #ifdef MLX5_PMD_SOFT_COUNTERS 4527 /* Update sent data bytes counter. */ 4528 slen += sizeof(struct rte_vlan_hdr); 4529 #endif 4530 } else { 4531 if (room < tlen) 4532 break; 4533 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4534 dptr, dlen, olx); 4535 } 4536 if (!MLX5_TXOFF_CONFIG(MPW)) 4537 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4538 MLX5_ASSERT(room >= tlen); 4539 room -= tlen; 4540 /* 4541 * Packet data are completely inlined, 4542 * free the packet immediately. 4543 */ 4544 rte_pktmbuf_free_seg(loc->mbuf); 4545 goto next_mbuf; 4546 pointer_empw: 4547 /* 4548 * No pointer and inline descriptor 4549 * intermix for legacy MPW sessions. 4550 */ 4551 if (MLX5_TXOFF_CONFIG(MPW) && 4552 part != room && 4553 wqem->dseg[0].bcount == RTE_BE32(0)) 4554 break; 4555 /* 4556 * Not inlinable VLAN packets are 4557 * proceeded outside of this routine. 4558 */ 4559 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4560 if (MLX5_TXOFF_CONFIG(VLAN)) 4561 MLX5_ASSERT(!(loc->mbuf->ol_flags & 4562 PKT_TX_VLAN_PKT)); 4563 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4564 /* We have to store mbuf in elts.*/ 4565 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4566 room -= MLX5_WQE_DSEG_SIZE; 4567 /* Ring buffer wraparound is checked at the loop end.*/ 4568 ++dseg; 4569 next_mbuf: 4570 #ifdef MLX5_PMD_SOFT_COUNTERS 4571 /* Update sent data bytes counter. */ 4572 slen += dlen; 4573 #endif 4574 loc->pkts_sent++; 4575 loc->elts_free--; 4576 pkts_n--; 4577 if (unlikely(!pkts_n || !loc->elts_free)) { 4578 /* 4579 * We have no resources/packets to 4580 * continue build descriptors. 4581 */ 4582 part -= room; 4583 mlx5_tx_idone_empw(txq, loc, part, 4584 slen, wqem, olx); 4585 return MLX5_TXCMP_CODE_EXIT; 4586 } 4587 loc->mbuf = *pkts++; 4588 if (likely(pkts_n > 1)) 4589 rte_prefetch0(*pkts); 4590 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4591 /* 4592 * Unroll the completion code to avoid 4593 * returning variable value - it results in 4594 * unoptimized sequent checking in caller. 4595 */ 4596 if (ret == MLX5_TXCMP_CODE_MULTI) { 4597 part -= room; 4598 mlx5_tx_idone_empw(txq, loc, part, 4599 slen, wqem, olx); 4600 if (unlikely(!loc->elts_free || 4601 !loc->wqe_free)) 4602 return MLX5_TXCMP_CODE_EXIT; 4603 return MLX5_TXCMP_CODE_MULTI; 4604 } 4605 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4606 if (ret == MLX5_TXCMP_CODE_TSO) { 4607 part -= room; 4608 mlx5_tx_idone_empw(txq, loc, part, 4609 slen, wqem, olx); 4610 if (unlikely(!loc->elts_free || 4611 !loc->wqe_free)) 4612 return MLX5_TXCMP_CODE_EXIT; 4613 return MLX5_TXCMP_CODE_TSO; 4614 } 4615 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4616 part -= room; 4617 mlx5_tx_idone_empw(txq, loc, part, 4618 slen, wqem, olx); 4619 if (unlikely(!loc->elts_free || 4620 !loc->wqe_free)) 4621 return MLX5_TXCMP_CODE_EXIT; 4622 return MLX5_TXCMP_CODE_SINGLE; 4623 } 4624 if (ret != MLX5_TXCMP_CODE_EMPW) { 4625 MLX5_ASSERT(false); 4626 part -= room; 4627 mlx5_tx_idone_empw(txq, loc, part, 4628 slen, wqem, olx); 4629 return MLX5_TXCMP_CODE_ERROR; 4630 } 4631 /* Check if we have minimal room left. */ 4632 nlim--; 4633 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4634 break; 4635 /* 4636 * Check whether packet parameters coincide 4637 * within assumed eMPW batch: 4638 * - check sum settings 4639 * - metadata value 4640 * - software parser settings 4641 * - packets length (legacy MPW only) 4642 * - scheduling is not required 4643 */ 4644 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 4645 loc, dlen, olx)) 4646 break; 4647 /* Packet attributes match, continue the same eMPW. */ 4648 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4649 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4650 } 4651 /* 4652 * We get here to close an existing eMPW 4653 * session and start the new one. 4654 */ 4655 MLX5_ASSERT(pkts_n); 4656 part -= room; 4657 if (unlikely(!part)) 4658 return MLX5_TXCMP_CODE_EXIT; 4659 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 4660 if (unlikely(!loc->elts_free || 4661 !loc->wqe_free)) 4662 return MLX5_TXCMP_CODE_EXIT; 4663 /* Continue the loop with new eMPW session. */ 4664 } 4665 MLX5_ASSERT(false); 4666 } 4667 4668 /** 4669 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4670 * Data inlining and VLAN insertion are supported. 4671 */ 4672 static __rte_always_inline enum mlx5_txcmp_code 4673 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 4674 struct rte_mbuf **__rte_restrict pkts, 4675 unsigned int pkts_n, 4676 struct mlx5_txq_local *__rte_restrict loc, 4677 unsigned int olx) 4678 { 4679 /* 4680 * Subroutine is the part of mlx5_tx_burst_single() 4681 * and sends single-segment packet with SEND opcode. 4682 */ 4683 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4684 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4685 pkts += loc->pkts_sent + 1; 4686 pkts_n -= loc->pkts_sent; 4687 for (;;) { 4688 struct mlx5_wqe *__rte_restrict wqe; 4689 enum mlx5_txcmp_code ret; 4690 4691 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4692 if (MLX5_TXOFF_CONFIG(TXPP)) { 4693 enum mlx5_txcmp_code wret; 4694 4695 /* Generate WAIT for scheduling if requested. */ 4696 wret = mlx5_tx_schedule_send(txq, loc, olx); 4697 if (wret == MLX5_TXCMP_CODE_EXIT) 4698 return MLX5_TXCMP_CODE_EXIT; 4699 if (wret == MLX5_TXCMP_CODE_ERROR) 4700 return MLX5_TXCMP_CODE_ERROR; 4701 } 4702 if (MLX5_TXOFF_CONFIG(INLINE)) { 4703 unsigned int inlen, vlan = 0; 4704 4705 inlen = rte_pktmbuf_data_len(loc->mbuf); 4706 if (MLX5_TXOFF_CONFIG(VLAN) && 4707 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4708 vlan = sizeof(struct rte_vlan_hdr); 4709 inlen += vlan; 4710 static_assert((sizeof(struct rte_vlan_hdr) + 4711 sizeof(struct rte_ether_hdr)) == 4712 MLX5_ESEG_MIN_INLINE_SIZE, 4713 "invalid min inline data size"); 4714 } 4715 /* 4716 * If inlining is enabled at configuration time 4717 * the limit must be not less than minimal size. 4718 * Otherwise we would do extra check for data 4719 * size to avoid crashes due to length overflow. 4720 */ 4721 MLX5_ASSERT(txq->inlen_send >= 4722 MLX5_ESEG_MIN_INLINE_SIZE); 4723 if (inlen <= txq->inlen_send) { 4724 unsigned int seg_n, wqe_n; 4725 4726 rte_prefetch0(rte_pktmbuf_mtod 4727 (loc->mbuf, uint8_t *)); 4728 /* Check against minimal length. */ 4729 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4730 return MLX5_TXCMP_CODE_ERROR; 4731 if (loc->mbuf->ol_flags & 4732 PKT_TX_DYNF_NOINLINE) { 4733 /* 4734 * The hint flag not to inline packet 4735 * data is set. Check whether we can 4736 * follow the hint. 4737 */ 4738 if ((!MLX5_TXOFF_CONFIG(EMPW) && 4739 txq->inlen_mode) || 4740 (MLX5_TXOFF_CONFIG(MPW) && 4741 txq->inlen_mode)) { 4742 if (inlen <= txq->inlen_send) 4743 goto single_inline; 4744 /* 4745 * The hardware requires the 4746 * minimal inline data header. 4747 */ 4748 goto single_min_inline; 4749 } 4750 if (MLX5_TXOFF_CONFIG(VLAN) && 4751 vlan && !txq->vlan_en) { 4752 /* 4753 * We must insert VLAN tag 4754 * by software means. 4755 */ 4756 goto single_part_inline; 4757 } 4758 goto single_no_inline; 4759 } 4760 single_inline: 4761 /* 4762 * Completely inlined packet data WQE: 4763 * - Control Segment, SEND opcode 4764 * - Ethernet Segment, no VLAN insertion 4765 * - Data inlined, VLAN optionally inserted 4766 * - Alignment to MLX5_WSEG_SIZE 4767 * Have to estimate amount of WQEBBs 4768 */ 4769 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4770 MLX5_ESEG_MIN_INLINE_SIZE + 4771 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4772 /* Check if there are enough WQEBBs. */ 4773 wqe_n = (seg_n + 3) / 4; 4774 if (wqe_n > loc->wqe_free) 4775 return MLX5_TXCMP_CODE_EXIT; 4776 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4777 loc->wqe_last = wqe; 4778 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4779 MLX5_OPCODE_SEND, olx); 4780 mlx5_tx_eseg_data(txq, loc, wqe, 4781 vlan, inlen, 0, olx); 4782 txq->wqe_ci += wqe_n; 4783 loc->wqe_free -= wqe_n; 4784 /* 4785 * Packet data are completely inlined, 4786 * free the packet immediately. 4787 */ 4788 rte_pktmbuf_free_seg(loc->mbuf); 4789 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4790 MLX5_TXOFF_CONFIG(MPW)) && 4791 txq->inlen_mode) { 4792 /* 4793 * If minimal inlining is requested the eMPW 4794 * feature should be disabled due to data is 4795 * inlined into Ethernet Segment, which can 4796 * not contain inlined data for eMPW due to 4797 * segment shared for all packets. 4798 */ 4799 struct mlx5_wqe_dseg *__rte_restrict dseg; 4800 unsigned int ds; 4801 uint8_t *dptr; 4802 4803 /* 4804 * The inline-mode settings require 4805 * to inline the specified amount of 4806 * data bytes to the Ethernet Segment. 4807 * We should check the free space in 4808 * WQE ring buffer to inline partially. 4809 */ 4810 single_min_inline: 4811 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 4812 MLX5_ASSERT(inlen > txq->inlen_mode); 4813 MLX5_ASSERT(txq->inlen_mode >= 4814 MLX5_ESEG_MIN_INLINE_SIZE); 4815 /* 4816 * Check whether there are enough free WQEBBs: 4817 * - Control Segment 4818 * - Ethernet Segment 4819 * - First Segment of inlined Ethernet data 4820 * - ... data continued ... 4821 * - Finishing Data Segment of pointer type 4822 */ 4823 ds = (MLX5_WQE_CSEG_SIZE + 4824 MLX5_WQE_ESEG_SIZE + 4825 MLX5_WQE_DSEG_SIZE + 4826 txq->inlen_mode - 4827 MLX5_ESEG_MIN_INLINE_SIZE + 4828 MLX5_WQE_DSEG_SIZE + 4829 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4830 if (loc->wqe_free < ((ds + 3) / 4)) 4831 return MLX5_TXCMP_CODE_EXIT; 4832 /* 4833 * Build the ordinary SEND WQE: 4834 * - Control Segment 4835 * - Ethernet Segment, inline inlen_mode bytes 4836 * - Data Segment of pointer type 4837 */ 4838 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4839 loc->wqe_last = wqe; 4840 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4841 MLX5_OPCODE_SEND, olx); 4842 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4843 txq->inlen_mode, 4844 0, olx); 4845 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4846 txq->inlen_mode - vlan; 4847 inlen -= txq->inlen_mode; 4848 mlx5_tx_dseg_ptr(txq, loc, dseg, 4849 dptr, inlen, olx); 4850 /* 4851 * WQE is built, update the loop parameters 4852 * and got to the next packet. 4853 */ 4854 txq->wqe_ci += (ds + 3) / 4; 4855 loc->wqe_free -= (ds + 3) / 4; 4856 /* We have to store mbuf in elts.*/ 4857 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4858 txq->elts[txq->elts_head++ & txq->elts_m] = 4859 loc->mbuf; 4860 --loc->elts_free; 4861 } else { 4862 uint8_t *dptr; 4863 unsigned int dlen; 4864 4865 /* 4866 * Partially inlined packet data WQE, we have 4867 * some space in title WQEBB, we can fill it 4868 * with some packet data. It takes one WQEBB, 4869 * it is available, no extra space check: 4870 * - Control Segment, SEND opcode 4871 * - Ethernet Segment, no VLAN insertion 4872 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4873 * - Data Segment, pointer type 4874 * 4875 * We also get here if VLAN insertion is not 4876 * supported by HW, the inline is enabled. 4877 */ 4878 single_part_inline: 4879 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4880 loc->wqe_last = wqe; 4881 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4882 MLX5_OPCODE_SEND, olx); 4883 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4884 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4885 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4886 /* 4887 * The length check is performed above, by 4888 * comparing with txq->inlen_send. We should 4889 * not get overflow here. 4890 */ 4891 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4892 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4893 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4894 dptr, dlen, olx); 4895 ++txq->wqe_ci; 4896 --loc->wqe_free; 4897 /* We have to store mbuf in elts.*/ 4898 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4899 txq->elts[txq->elts_head++ & txq->elts_m] = 4900 loc->mbuf; 4901 --loc->elts_free; 4902 } 4903 #ifdef MLX5_PMD_SOFT_COUNTERS 4904 /* Update sent data bytes counter. */ 4905 txq->stats.obytes += vlan + 4906 rte_pktmbuf_data_len(loc->mbuf); 4907 #endif 4908 } else { 4909 /* 4910 * No inline at all, it means the CPU cycles saving 4911 * is prioritized at configuration, we should not 4912 * copy any packet data to WQE. 4913 * 4914 * SEND WQE, one WQEBB: 4915 * - Control Segment, SEND opcode 4916 * - Ethernet Segment, optional VLAN, no inline 4917 * - Data Segment, pointer type 4918 */ 4919 single_no_inline: 4920 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4921 loc->wqe_last = wqe; 4922 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4923 MLX5_OPCODE_SEND, olx); 4924 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4925 mlx5_tx_dseg_ptr 4926 (txq, loc, &wqe->dseg[0], 4927 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4928 rte_pktmbuf_data_len(loc->mbuf), olx); 4929 ++txq->wqe_ci; 4930 --loc->wqe_free; 4931 /* 4932 * We should not store mbuf pointer in elts 4933 * if no inlining is configured, this is done 4934 * by calling routine in a batch copy. 4935 */ 4936 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4937 --loc->elts_free; 4938 #ifdef MLX5_PMD_SOFT_COUNTERS 4939 /* Update sent data bytes counter. */ 4940 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4941 if (MLX5_TXOFF_CONFIG(VLAN) && 4942 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4943 txq->stats.obytes += 4944 sizeof(struct rte_vlan_hdr); 4945 #endif 4946 } 4947 ++loc->pkts_sent; 4948 --pkts_n; 4949 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4950 return MLX5_TXCMP_CODE_EXIT; 4951 loc->mbuf = *pkts++; 4952 if (pkts_n > 1) 4953 rte_prefetch0(*pkts); 4954 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4955 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4956 return ret; 4957 } 4958 MLX5_ASSERT(false); 4959 } 4960 4961 static __rte_always_inline enum mlx5_txcmp_code 4962 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 4963 struct rte_mbuf **__rte_restrict pkts, 4964 unsigned int pkts_n, 4965 struct mlx5_txq_local *__rte_restrict loc, 4966 unsigned int olx) 4967 { 4968 enum mlx5_txcmp_code ret; 4969 4970 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4971 if (ret == MLX5_TXCMP_CODE_SINGLE) 4972 goto ordinary_send; 4973 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 4974 for (;;) { 4975 /* Optimize for inline/no inline eMPW send. */ 4976 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4977 mlx5_tx_burst_empw_inline 4978 (txq, pkts, pkts_n, loc, olx) : 4979 mlx5_tx_burst_empw_simple 4980 (txq, pkts, pkts_n, loc, olx); 4981 if (ret != MLX5_TXCMP_CODE_SINGLE) 4982 return ret; 4983 /* The resources to send one packet should remain. */ 4984 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4985 ordinary_send: 4986 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4987 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 4988 if (ret != MLX5_TXCMP_CODE_EMPW) 4989 return ret; 4990 /* The resources to send one packet should remain. */ 4991 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4992 } 4993 } 4994 4995 /** 4996 * DPDK Tx callback template. This is configured template 4997 * used to generate routines optimized for specified offload setup. 4998 * One of this generated functions is chosen at SQ configuration 4999 * time. 5000 * 5001 * @param txq 5002 * Generic pointer to TX queue structure. 5003 * @param[in] pkts 5004 * Packets to transmit. 5005 * @param pkts_n 5006 * Number of packets in array. 5007 * @param olx 5008 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 5009 * values. Should be static to take compile time static configuration 5010 * advantages. 5011 * 5012 * @return 5013 * Number of packets successfully transmitted (<= pkts_n). 5014 */ 5015 static __rte_always_inline uint16_t 5016 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 5017 struct rte_mbuf **__rte_restrict pkts, 5018 uint16_t pkts_n, 5019 unsigned int olx) 5020 { 5021 struct mlx5_txq_local loc; 5022 enum mlx5_txcmp_code ret; 5023 unsigned int part; 5024 5025 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5026 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5027 if (unlikely(!pkts_n)) 5028 return 0; 5029 loc.pkts_sent = 0; 5030 loc.pkts_copy = 0; 5031 loc.wqe_last = NULL; 5032 5033 send_loop: 5034 loc.pkts_loop = loc.pkts_sent; 5035 /* 5036 * Check if there are some CQEs, if any: 5037 * - process an encountered errors 5038 * - process the completed WQEs 5039 * - free related mbufs 5040 * - doorbell the NIC about processed CQEs 5041 */ 5042 rte_prefetch0(*(pkts + loc.pkts_sent)); 5043 mlx5_tx_handle_completion(txq, olx); 5044 /* 5045 * Calculate the number of available resources - elts and WQEs. 5046 * There are two possible different scenarios: 5047 * - no data inlining into WQEs, one WQEBB may contains up to 5048 * four packets, in this case elts become scarce resource 5049 * - data inlining into WQEs, one packet may require multiple 5050 * WQEBBs, the WQEs become the limiting factor. 5051 */ 5052 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5053 loc.elts_free = txq->elts_s - 5054 (uint16_t)(txq->elts_head - txq->elts_tail); 5055 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5056 loc.wqe_free = txq->wqe_s - 5057 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 5058 if (unlikely(!loc.elts_free || !loc.wqe_free)) 5059 goto burst_exit; 5060 for (;;) { 5061 /* 5062 * Fetch the packet from array. Usually this is 5063 * the first packet in series of multi/single 5064 * segment packets. 5065 */ 5066 loc.mbuf = *(pkts + loc.pkts_sent); 5067 /* Dedicated branch for multi-segment packets. */ 5068 if (MLX5_TXOFF_CONFIG(MULTI) && 5069 unlikely(NB_SEGS(loc.mbuf) > 1)) { 5070 /* 5071 * Multi-segment packet encountered. 5072 * Hardware is able to process it only 5073 * with SEND/TSO opcodes, one packet 5074 * per WQE, do it in dedicated routine. 5075 */ 5076 enter_send_multi: 5077 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 5078 part = loc.pkts_sent - loc.pkts_copy; 5079 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5080 /* 5081 * There are some single-segment mbufs not 5082 * stored in elts. The mbufs must be in the 5083 * same order as WQEs, so we must copy the 5084 * mbufs to elts here, before the coming 5085 * multi-segment packet mbufs is appended. 5086 */ 5087 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 5088 part, olx); 5089 loc.pkts_copy = loc.pkts_sent; 5090 } 5091 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5092 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 5093 if (!MLX5_TXOFF_CONFIG(INLINE)) 5094 loc.pkts_copy = loc.pkts_sent; 5095 /* 5096 * These returned code checks are supposed 5097 * to be optimized out due to routine inlining. 5098 */ 5099 if (ret == MLX5_TXCMP_CODE_EXIT) { 5100 /* 5101 * The routine returns this code when 5102 * all packets are sent or there is no 5103 * enough resources to complete request. 5104 */ 5105 break; 5106 } 5107 if (ret == MLX5_TXCMP_CODE_ERROR) { 5108 /* 5109 * The routine returns this code when 5110 * some error in the incoming packets 5111 * format occurred. 5112 */ 5113 txq->stats.oerrors++; 5114 break; 5115 } 5116 if (ret == MLX5_TXCMP_CODE_SINGLE) { 5117 /* 5118 * The single-segment packet was encountered 5119 * in the array, try to send it with the 5120 * best optimized way, possible engaging eMPW. 5121 */ 5122 goto enter_send_single; 5123 } 5124 if (MLX5_TXOFF_CONFIG(TSO) && 5125 ret == MLX5_TXCMP_CODE_TSO) { 5126 /* 5127 * The single-segment TSO packet was 5128 * encountered in the array. 5129 */ 5130 goto enter_send_tso; 5131 } 5132 /* We must not get here. Something is going wrong. */ 5133 MLX5_ASSERT(false); 5134 txq->stats.oerrors++; 5135 break; 5136 } 5137 /* Dedicated branch for single-segment TSO packets. */ 5138 if (MLX5_TXOFF_CONFIG(TSO) && 5139 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 5140 /* 5141 * TSO might require special way for inlining 5142 * (dedicated parameters) and is sent with 5143 * MLX5_OPCODE_TSO opcode only, provide this 5144 * in dedicated branch. 5145 */ 5146 enter_send_tso: 5147 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 5148 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5149 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 5150 /* 5151 * These returned code checks are supposed 5152 * to be optimized out due to routine inlining. 5153 */ 5154 if (ret == MLX5_TXCMP_CODE_EXIT) 5155 break; 5156 if (ret == MLX5_TXCMP_CODE_ERROR) { 5157 txq->stats.oerrors++; 5158 break; 5159 } 5160 if (ret == MLX5_TXCMP_CODE_SINGLE) 5161 goto enter_send_single; 5162 if (MLX5_TXOFF_CONFIG(MULTI) && 5163 ret == MLX5_TXCMP_CODE_MULTI) { 5164 /* 5165 * The multi-segment packet was 5166 * encountered in the array. 5167 */ 5168 goto enter_send_multi; 5169 } 5170 /* We must not get here. Something is going wrong. */ 5171 MLX5_ASSERT(false); 5172 txq->stats.oerrors++; 5173 break; 5174 } 5175 /* 5176 * The dedicated branch for the single-segment packets 5177 * without TSO. Often these ones can be sent using 5178 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 5179 * The routine builds the WQEs till it encounters 5180 * the TSO or multi-segment packet (in case if these 5181 * offloads are requested at SQ configuration time). 5182 */ 5183 enter_send_single: 5184 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5185 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 5186 /* 5187 * These returned code checks are supposed 5188 * to be optimized out due to routine inlining. 5189 */ 5190 if (ret == MLX5_TXCMP_CODE_EXIT) 5191 break; 5192 if (ret == MLX5_TXCMP_CODE_ERROR) { 5193 txq->stats.oerrors++; 5194 break; 5195 } 5196 if (MLX5_TXOFF_CONFIG(MULTI) && 5197 ret == MLX5_TXCMP_CODE_MULTI) { 5198 /* 5199 * The multi-segment packet was 5200 * encountered in the array. 5201 */ 5202 goto enter_send_multi; 5203 } 5204 if (MLX5_TXOFF_CONFIG(TSO) && 5205 ret == MLX5_TXCMP_CODE_TSO) { 5206 /* 5207 * The single-segment TSO packet was 5208 * encountered in the array. 5209 */ 5210 goto enter_send_tso; 5211 } 5212 /* We must not get here. Something is going wrong. */ 5213 MLX5_ASSERT(false); 5214 txq->stats.oerrors++; 5215 break; 5216 } 5217 /* 5218 * Main Tx loop is completed, do the rest: 5219 * - set completion request if thresholds are reached 5220 * - doorbell the hardware 5221 * - copy the rest of mbufs to elts (if any) 5222 */ 5223 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 5224 loc.pkts_sent >= loc.pkts_copy); 5225 /* Take a shortcut if nothing is sent. */ 5226 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 5227 goto burst_exit; 5228 /* Request CQE generation if limits are reached. */ 5229 mlx5_tx_request_completion(txq, &loc, olx); 5230 /* 5231 * Ring QP doorbell immediately after WQE building completion 5232 * to improve latencies. The pure software related data treatment 5233 * can be completed after doorbell. Tx CQEs for this SQ are 5234 * processed in this thread only by the polling. 5235 * 5236 * The rdma core library can map doorbell register in two ways, 5237 * depending on the environment variable "MLX5_SHUT_UP_BF": 5238 * 5239 * - as regular cached memory, the variable is either missing or 5240 * set to zero. This type of mapping may cause the significant 5241 * doorbell register writing latency and requires explicit 5242 * memory write barrier to mitigate this issue and prevent 5243 * write combining. 5244 * 5245 * - as non-cached memory, the variable is present and set to 5246 * not "0" value. This type of mapping may cause performance 5247 * impact under heavy loading conditions but the explicit write 5248 * memory barrier is not required and it may improve core 5249 * performance. 5250 * 5251 * - the legacy behaviour (prior 19.08 release) was to use some 5252 * heuristics to decide whether write memory barrier should 5253 * be performed. This behavior is supported with specifying 5254 * tx_db_nc=2, write barrier is skipped if application 5255 * provides the full recommended burst of packets, it 5256 * supposes the next packets are coming and the write barrier 5257 * will be issued on the next burst (after descriptor writing, 5258 * at least). 5259 */ 5260 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 5261 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 5262 /* Not all of the mbufs may be stored into elts yet. */ 5263 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 5264 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5265 /* 5266 * There are some single-segment mbufs not stored in elts. 5267 * It can be only if the last packet was single-segment. 5268 * The copying is gathered into one place due to it is 5269 * a good opportunity to optimize that with SIMD. 5270 * Unfortunately if inlining is enabled the gaps in 5271 * pointer array may happen due to early freeing of the 5272 * inlined mbufs. 5273 */ 5274 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 5275 loc.pkts_copy = loc.pkts_sent; 5276 } 5277 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5278 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5279 if (pkts_n > loc.pkts_sent) { 5280 /* 5281 * If burst size is large there might be no enough CQE 5282 * fetched from completion queue and no enough resources 5283 * freed to send all the packets. 5284 */ 5285 goto send_loop; 5286 } 5287 burst_exit: 5288 #ifdef MLX5_PMD_SOFT_COUNTERS 5289 /* Increment sent packets counter. */ 5290 txq->stats.opackets += loc.pkts_sent; 5291 #endif 5292 return loc.pkts_sent; 5293 } 5294 5295 /* Generate routines with Enhanced Multi-Packet Write support. */ 5296 MLX5_TXOFF_DECL(full_empw, 5297 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 5298 5299 MLX5_TXOFF_DECL(none_empw, 5300 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5301 5302 MLX5_TXOFF_DECL(md_empw, 5303 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5304 5305 MLX5_TXOFF_DECL(mt_empw, 5306 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5307 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5308 5309 MLX5_TXOFF_DECL(mtsc_empw, 5310 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5311 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5312 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5313 5314 MLX5_TXOFF_DECL(mti_empw, 5315 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5316 MLX5_TXOFF_CONFIG_INLINE | 5317 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5318 5319 MLX5_TXOFF_DECL(mtv_empw, 5320 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5321 MLX5_TXOFF_CONFIG_VLAN | 5322 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5323 5324 MLX5_TXOFF_DECL(mtiv_empw, 5325 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5326 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5327 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5328 5329 MLX5_TXOFF_DECL(sc_empw, 5330 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5331 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5332 5333 MLX5_TXOFF_DECL(sci_empw, 5334 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5335 MLX5_TXOFF_CONFIG_INLINE | 5336 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5337 5338 MLX5_TXOFF_DECL(scv_empw, 5339 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5340 MLX5_TXOFF_CONFIG_VLAN | 5341 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5342 5343 MLX5_TXOFF_DECL(sciv_empw, 5344 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5345 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5346 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5347 5348 MLX5_TXOFF_DECL(i_empw, 5349 MLX5_TXOFF_CONFIG_INLINE | 5350 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5351 5352 MLX5_TXOFF_DECL(v_empw, 5353 MLX5_TXOFF_CONFIG_VLAN | 5354 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5355 5356 MLX5_TXOFF_DECL(iv_empw, 5357 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5358 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5359 5360 /* Generate routines without Enhanced Multi-Packet Write support. */ 5361 MLX5_TXOFF_DECL(full, 5362 MLX5_TXOFF_CONFIG_FULL) 5363 5364 MLX5_TXOFF_DECL(none, 5365 MLX5_TXOFF_CONFIG_NONE) 5366 5367 MLX5_TXOFF_DECL(md, 5368 MLX5_TXOFF_CONFIG_METADATA) 5369 5370 MLX5_TXOFF_DECL(mt, 5371 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5372 MLX5_TXOFF_CONFIG_METADATA) 5373 5374 MLX5_TXOFF_DECL(mtsc, 5375 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5376 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5377 MLX5_TXOFF_CONFIG_METADATA) 5378 5379 MLX5_TXOFF_DECL(mti, 5380 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5381 MLX5_TXOFF_CONFIG_INLINE | 5382 MLX5_TXOFF_CONFIG_METADATA) 5383 5384 5385 MLX5_TXOFF_DECL(mtv, 5386 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5387 MLX5_TXOFF_CONFIG_VLAN | 5388 MLX5_TXOFF_CONFIG_METADATA) 5389 5390 5391 MLX5_TXOFF_DECL(mtiv, 5392 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5393 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5394 MLX5_TXOFF_CONFIG_METADATA) 5395 5396 MLX5_TXOFF_DECL(sc, 5397 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5398 MLX5_TXOFF_CONFIG_METADATA) 5399 5400 MLX5_TXOFF_DECL(sci, 5401 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5402 MLX5_TXOFF_CONFIG_INLINE | 5403 MLX5_TXOFF_CONFIG_METADATA) 5404 5405 5406 MLX5_TXOFF_DECL(scv, 5407 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5408 MLX5_TXOFF_CONFIG_VLAN | 5409 MLX5_TXOFF_CONFIG_METADATA) 5410 5411 5412 MLX5_TXOFF_DECL(sciv, 5413 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5414 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5415 MLX5_TXOFF_CONFIG_METADATA) 5416 5417 MLX5_TXOFF_DECL(i, 5418 MLX5_TXOFF_CONFIG_INLINE | 5419 MLX5_TXOFF_CONFIG_METADATA) 5420 5421 MLX5_TXOFF_DECL(v, 5422 MLX5_TXOFF_CONFIG_VLAN | 5423 MLX5_TXOFF_CONFIG_METADATA) 5424 5425 MLX5_TXOFF_DECL(iv, 5426 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5427 MLX5_TXOFF_CONFIG_METADATA) 5428 5429 /* Generate routines with timestamp scheduling. */ 5430 MLX5_TXOFF_DECL(full_ts_nompw, 5431 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5432 5433 MLX5_TXOFF_DECL(full_ts_nompwi, 5434 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5435 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5436 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5437 MLX5_TXOFF_CONFIG_TXPP) 5438 5439 MLX5_TXOFF_DECL(full_ts, 5440 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5441 MLX5_TXOFF_CONFIG_EMPW) 5442 5443 MLX5_TXOFF_DECL(full_ts_noi, 5444 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5445 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5446 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5447 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5448 5449 MLX5_TXOFF_DECL(none_ts, 5450 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5451 MLX5_TXOFF_CONFIG_EMPW) 5452 5453 MLX5_TXOFF_DECL(mdi_ts, 5454 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5455 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5456 5457 MLX5_TXOFF_DECL(mti_ts, 5458 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5459 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5460 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5461 5462 MLX5_TXOFF_DECL(mtiv_ts, 5463 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5464 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5465 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5466 MLX5_TXOFF_CONFIG_EMPW) 5467 5468 /* 5469 * Generate routines with Legacy Multi-Packet Write support. 5470 * This mode is supported by ConnectX-4 Lx only and imposes 5471 * offload limitations, not supported: 5472 * - ACL/Flows (metadata are becoming meaningless) 5473 * - WQE Inline headers 5474 * - SRIOV (E-Switch offloads) 5475 * - VLAN insertion 5476 * - tunnel encapsulation/decapsulation 5477 * - TSO 5478 */ 5479 MLX5_TXOFF_DECL(none_mpw, 5480 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5481 MLX5_TXOFF_CONFIG_MPW) 5482 5483 MLX5_TXOFF_DECL(mci_mpw, 5484 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5485 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5486 MLX5_TXOFF_CONFIG_MPW) 5487 5488 MLX5_TXOFF_DECL(mc_mpw, 5489 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5490 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5491 5492 MLX5_TXOFF_DECL(i_mpw, 5493 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5494 MLX5_TXOFF_CONFIG_MPW) 5495 5496 /* 5497 * Array of declared and compiled Tx burst function and corresponding 5498 * supported offloads set. The array is used to select the Tx burst 5499 * function for specified offloads set at Tx queue configuration time. 5500 */ 5501 const struct { 5502 eth_tx_burst_t func; 5503 unsigned int olx; 5504 } txoff_func[] = { 5505 MLX5_TXOFF_INFO(full_empw, 5506 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5507 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5508 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5509 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5510 5511 MLX5_TXOFF_INFO(none_empw, 5512 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5513 5514 MLX5_TXOFF_INFO(md_empw, 5515 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5516 5517 MLX5_TXOFF_INFO(mt_empw, 5518 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5519 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5520 5521 MLX5_TXOFF_INFO(mtsc_empw, 5522 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5523 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5524 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5525 5526 MLX5_TXOFF_INFO(mti_empw, 5527 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5528 MLX5_TXOFF_CONFIG_INLINE | 5529 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5530 5531 MLX5_TXOFF_INFO(mtv_empw, 5532 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5533 MLX5_TXOFF_CONFIG_VLAN | 5534 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5535 5536 MLX5_TXOFF_INFO(mtiv_empw, 5537 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5538 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5539 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5540 5541 MLX5_TXOFF_INFO(sc_empw, 5542 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5543 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5544 5545 MLX5_TXOFF_INFO(sci_empw, 5546 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5547 MLX5_TXOFF_CONFIG_INLINE | 5548 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5549 5550 MLX5_TXOFF_INFO(scv_empw, 5551 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5552 MLX5_TXOFF_CONFIG_VLAN | 5553 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5554 5555 MLX5_TXOFF_INFO(sciv_empw, 5556 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5557 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5558 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5559 5560 MLX5_TXOFF_INFO(i_empw, 5561 MLX5_TXOFF_CONFIG_INLINE | 5562 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5563 5564 MLX5_TXOFF_INFO(v_empw, 5565 MLX5_TXOFF_CONFIG_VLAN | 5566 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5567 5568 MLX5_TXOFF_INFO(iv_empw, 5569 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5570 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5571 5572 MLX5_TXOFF_INFO(full_ts_nompw, 5573 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5574 5575 MLX5_TXOFF_INFO(full_ts_nompwi, 5576 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5577 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5578 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5579 MLX5_TXOFF_CONFIG_TXPP) 5580 5581 MLX5_TXOFF_INFO(full_ts, 5582 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5583 MLX5_TXOFF_CONFIG_EMPW) 5584 5585 MLX5_TXOFF_INFO(full_ts_noi, 5586 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5587 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5588 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5589 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5590 5591 MLX5_TXOFF_INFO(none_ts, 5592 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5593 MLX5_TXOFF_CONFIG_EMPW) 5594 5595 MLX5_TXOFF_INFO(mdi_ts, 5596 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5597 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5598 5599 MLX5_TXOFF_INFO(mti_ts, 5600 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5601 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5602 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5603 5604 MLX5_TXOFF_INFO(mtiv_ts, 5605 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5606 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5607 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5608 MLX5_TXOFF_CONFIG_EMPW) 5609 5610 MLX5_TXOFF_INFO(full, 5611 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5612 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5613 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5614 MLX5_TXOFF_CONFIG_METADATA) 5615 5616 MLX5_TXOFF_INFO(none, 5617 MLX5_TXOFF_CONFIG_NONE) 5618 5619 MLX5_TXOFF_INFO(md, 5620 MLX5_TXOFF_CONFIG_METADATA) 5621 5622 MLX5_TXOFF_INFO(mt, 5623 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5624 MLX5_TXOFF_CONFIG_METADATA) 5625 5626 MLX5_TXOFF_INFO(mtsc, 5627 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5628 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5629 MLX5_TXOFF_CONFIG_METADATA) 5630 5631 MLX5_TXOFF_INFO(mti, 5632 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5633 MLX5_TXOFF_CONFIG_INLINE | 5634 MLX5_TXOFF_CONFIG_METADATA) 5635 5636 MLX5_TXOFF_INFO(mtv, 5637 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5638 MLX5_TXOFF_CONFIG_VLAN | 5639 MLX5_TXOFF_CONFIG_METADATA) 5640 5641 MLX5_TXOFF_INFO(mtiv, 5642 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5643 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5644 MLX5_TXOFF_CONFIG_METADATA) 5645 5646 MLX5_TXOFF_INFO(sc, 5647 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5648 MLX5_TXOFF_CONFIG_METADATA) 5649 5650 MLX5_TXOFF_INFO(sci, 5651 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5652 MLX5_TXOFF_CONFIG_INLINE | 5653 MLX5_TXOFF_CONFIG_METADATA) 5654 5655 MLX5_TXOFF_INFO(scv, 5656 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5657 MLX5_TXOFF_CONFIG_VLAN | 5658 MLX5_TXOFF_CONFIG_METADATA) 5659 5660 MLX5_TXOFF_INFO(sciv, 5661 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5662 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5663 MLX5_TXOFF_CONFIG_METADATA) 5664 5665 MLX5_TXOFF_INFO(i, 5666 MLX5_TXOFF_CONFIG_INLINE | 5667 MLX5_TXOFF_CONFIG_METADATA) 5668 5669 MLX5_TXOFF_INFO(v, 5670 MLX5_TXOFF_CONFIG_VLAN | 5671 MLX5_TXOFF_CONFIG_METADATA) 5672 5673 MLX5_TXOFF_INFO(iv, 5674 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5675 MLX5_TXOFF_CONFIG_METADATA) 5676 5677 MLX5_TXOFF_INFO(none_mpw, 5678 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5679 MLX5_TXOFF_CONFIG_MPW) 5680 5681 MLX5_TXOFF_INFO(mci_mpw, 5682 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5683 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5684 MLX5_TXOFF_CONFIG_MPW) 5685 5686 MLX5_TXOFF_INFO(mc_mpw, 5687 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5688 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5689 5690 MLX5_TXOFF_INFO(i_mpw, 5691 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5692 MLX5_TXOFF_CONFIG_MPW) 5693 }; 5694 5695 /** 5696 * Configure the Tx function to use. The routine checks configured 5697 * Tx offloads for the device and selects appropriate Tx burst 5698 * routine. There are multiple Tx burst routines compiled from 5699 * the same template in the most optimal way for the dedicated 5700 * Tx offloads set. 5701 * 5702 * @param dev 5703 * Pointer to private data structure. 5704 * 5705 * @return 5706 * Pointer to selected Tx burst function. 5707 */ 5708 eth_tx_burst_t 5709 mlx5_select_tx_function(struct rte_eth_dev *dev) 5710 { 5711 struct mlx5_priv *priv = dev->data->dev_private; 5712 struct mlx5_dev_config *config = &priv->config; 5713 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5714 unsigned int diff = 0, olx = 0, i, m; 5715 5716 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 5717 MLX5_DSEG_MAX, "invalid WQE max size"); 5718 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 5719 "invalid WQE Control Segment size"); 5720 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 5721 "invalid WQE Ethernet Segment size"); 5722 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 5723 "invalid WQE Data Segment size"); 5724 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 5725 "invalid WQE size"); 5726 MLX5_ASSERT(priv); 5727 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5728 /* We should support Multi-Segment Packets. */ 5729 olx |= MLX5_TXOFF_CONFIG_MULTI; 5730 } 5731 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5732 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5733 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5734 DEV_TX_OFFLOAD_IP_TNL_TSO | 5735 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5736 /* We should support TCP Send Offload. */ 5737 olx |= MLX5_TXOFF_CONFIG_TSO; 5738 } 5739 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5740 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5741 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5742 /* We should support Software Parser for Tunnels. */ 5743 olx |= MLX5_TXOFF_CONFIG_SWP; 5744 } 5745 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5746 DEV_TX_OFFLOAD_UDP_CKSUM | 5747 DEV_TX_OFFLOAD_TCP_CKSUM | 5748 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5749 /* We should support IP/TCP/UDP Checksums. */ 5750 olx |= MLX5_TXOFF_CONFIG_CSUM; 5751 } 5752 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5753 /* We should support VLAN insertion. */ 5754 olx |= MLX5_TXOFF_CONFIG_VLAN; 5755 } 5756 if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && 5757 rte_mbuf_dynflag_lookup 5758 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) > 0 && 5759 rte_mbuf_dynfield_lookup 5760 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) > 0) { 5761 /* Offload configured, dynamic entities registered. */ 5762 olx |= MLX5_TXOFF_CONFIG_TXPP; 5763 } 5764 if (priv->txqs_n && (*priv->txqs)[0]) { 5765 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5766 5767 if (txd->inlen_send) { 5768 /* 5769 * Check the data inline requirements. Data inline 5770 * is enabled on per device basis, we can check 5771 * the first Tx queue only. 5772 * 5773 * If device does not support VLAN insertion in WQE 5774 * and some queues are requested to perform VLAN 5775 * insertion offload than inline must be enabled. 5776 */ 5777 olx |= MLX5_TXOFF_CONFIG_INLINE; 5778 } 5779 } 5780 if (config->mps == MLX5_MPW_ENHANCED && 5781 config->txq_inline_min <= 0) { 5782 /* 5783 * The NIC supports Enhanced Multi-Packet Write 5784 * and does not require minimal inline data. 5785 */ 5786 olx |= MLX5_TXOFF_CONFIG_EMPW; 5787 } 5788 if (rte_flow_dynf_metadata_avail()) { 5789 /* We should support Flow metadata. */ 5790 olx |= MLX5_TXOFF_CONFIG_METADATA; 5791 } 5792 if (config->mps == MLX5_MPW) { 5793 /* 5794 * The NIC supports Legacy Multi-Packet Write. 5795 * The MLX5_TXOFF_CONFIG_MPW controls the 5796 * descriptor building method in combination 5797 * with MLX5_TXOFF_CONFIG_EMPW. 5798 */ 5799 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5800 MLX5_TXOFF_CONFIG_SWP | 5801 MLX5_TXOFF_CONFIG_VLAN | 5802 MLX5_TXOFF_CONFIG_METADATA))) 5803 olx |= MLX5_TXOFF_CONFIG_EMPW | 5804 MLX5_TXOFF_CONFIG_MPW; 5805 } 5806 /* 5807 * Scan the routines table to find the minimal 5808 * satisfying routine with requested offloads. 5809 */ 5810 m = RTE_DIM(txoff_func); 5811 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5812 unsigned int tmp; 5813 5814 tmp = txoff_func[i].olx; 5815 if (tmp == olx) { 5816 /* Meets requested offloads exactly.*/ 5817 m = i; 5818 break; 5819 } 5820 if ((tmp & olx) != olx) { 5821 /* Does not meet requested offloads at all. */ 5822 continue; 5823 } 5824 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) 5825 /* Do not enable legacy MPW if not configured. */ 5826 continue; 5827 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5828 /* Do not enable eMPW if not configured. */ 5829 continue; 5830 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5831 /* Do not enable inlining if not configured. */ 5832 continue; 5833 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) 5834 /* Do not enable scheduling if not configured. */ 5835 continue; 5836 /* 5837 * Some routine meets the requirements. 5838 * Check whether it has minimal amount 5839 * of not requested offloads. 5840 */ 5841 tmp = __builtin_popcountl(tmp & ~olx); 5842 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5843 /* First or better match, save and continue. */ 5844 m = i; 5845 diff = tmp; 5846 continue; 5847 } 5848 if (tmp == diff) { 5849 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5850 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5851 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5852 /* Lighter not requested offload. */ 5853 m = i; 5854 } 5855 } 5856 } 5857 if (m >= RTE_DIM(txoff_func)) { 5858 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5859 " for requested offloads %04X", 5860 dev->data->port_id, olx); 5861 return NULL; 5862 } 5863 DRV_LOG(DEBUG, "port %u has selected Tx function" 5864 " supporting offloads %04X/%04X", 5865 dev->data->port_id, olx, txoff_func[m].olx); 5866 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5867 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5868 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5869 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5870 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5871 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5872 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5873 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5874 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5875 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5876 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5877 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5878 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5879 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5880 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) 5881 DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); 5882 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5883 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5884 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5885 else 5886 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5887 } 5888 return txoff_func[m].func; 5889 } 5890 5891 /** 5892 * DPDK callback to get the TX queue information 5893 * 5894 * @param dev 5895 * Pointer to the device structure. 5896 * 5897 * @param tx_queue_id 5898 * Tx queue identificator. 5899 * 5900 * @param qinfo 5901 * Pointer to the TX queue information structure. 5902 * 5903 * @return 5904 * None. 5905 */ 5906 5907 void 5908 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 5909 struct rte_eth_txq_info *qinfo) 5910 { 5911 struct mlx5_priv *priv = dev->data->dev_private; 5912 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5913 struct mlx5_txq_ctrl *txq_ctrl = 5914 container_of(txq, struct mlx5_txq_ctrl, txq); 5915 5916 if (!txq) 5917 return; 5918 qinfo->nb_desc = txq->elts_s; 5919 qinfo->conf.tx_thresh.pthresh = 0; 5920 qinfo->conf.tx_thresh.hthresh = 0; 5921 qinfo->conf.tx_thresh.wthresh = 0; 5922 qinfo->conf.tx_rs_thresh = 0; 5923 qinfo->conf.tx_free_thresh = 0; 5924 qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; 5925 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 5926 } 5927 5928 /** 5929 * DPDK callback to get the TX packet burst mode information 5930 * 5931 * @param dev 5932 * Pointer to the device structure. 5933 * 5934 * @param tx_queue_id 5935 * Tx queue identificatior. 5936 * 5937 * @param mode 5938 * Pointer to the burts mode information. 5939 * 5940 * @return 5941 * 0 as success, -EINVAL as failure. 5942 */ 5943 5944 int 5945 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, 5946 uint16_t tx_queue_id __rte_unused, 5947 struct rte_eth_burst_mode *mode) 5948 { 5949 eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; 5950 unsigned int i, olx; 5951 5952 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5953 if (pkt_burst == txoff_func[i].func) { 5954 olx = txoff_func[i].olx; 5955 snprintf(mode->info, sizeof(mode->info), 5956 "%s%s%s%s%s%s%s%s%s", 5957 (olx & MLX5_TXOFF_CONFIG_EMPW) ? 5958 ((olx & MLX5_TXOFF_CONFIG_MPW) ? 5959 "Legacy MPW" : "Enhanced MPW") : "No MPW", 5960 (olx & MLX5_TXOFF_CONFIG_MULTI) ? 5961 " + MULTI" : "", 5962 (olx & MLX5_TXOFF_CONFIG_TSO) ? 5963 " + TSO" : "", 5964 (olx & MLX5_TXOFF_CONFIG_SWP) ? 5965 " + SWP" : "", 5966 (olx & MLX5_TXOFF_CONFIG_CSUM) ? 5967 " + CSUM" : "", 5968 (olx & MLX5_TXOFF_CONFIG_INLINE) ? 5969 " + INLINE" : "", 5970 (olx & MLX5_TXOFF_CONFIG_VLAN) ? 5971 " + VLAN" : "", 5972 (olx & MLX5_TXOFF_CONFIG_METADATA) ? 5973 " + METADATA" : "", 5974 (olx & MLX5_TXOFF_CONFIG_TXPP) ? 5975 " + TXPP" : ""); 5976 return 0; 5977 } 5978 } 5979 return -EINVAL; 5980 } 5981