1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_glue.h> 20 #include <mlx5_devx_cmds.h> 21 #include <mlx5_prm.h> 22 #include <mlx5_common.h> 23 24 #include "mlx5_defs.h" 25 #include "mlx5.h" 26 #include "mlx5_mr.h" 27 #include "mlx5_utils.h" 28 #include "mlx5_rxtx.h" 29 #include "mlx5_autoconf.h" 30 31 /* TX burst subroutines return codes. */ 32 enum mlx5_txcmp_code { 33 MLX5_TXCMP_CODE_EXIT = 0, 34 MLX5_TXCMP_CODE_ERROR, 35 MLX5_TXCMP_CODE_SINGLE, 36 MLX5_TXCMP_CODE_MULTI, 37 MLX5_TXCMP_CODE_TSO, 38 MLX5_TXCMP_CODE_EMPW, 39 }; 40 41 /* 42 * These defines are used to configure Tx burst routine option set 43 * supported at compile time. The not specified options are optimized out 44 * out due to if conditions can be explicitly calculated at compile time. 45 * The offloads with bigger runtime check (require more CPU cycles to 46 * skip) overhead should have the bigger index - this is needed to 47 * select the better matching routine function if no exact match and 48 * some offloads are not actually requested. 49 */ 50 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 51 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 52 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 53 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 54 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 55 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 56 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 57 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 58 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 59 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 60 61 /* The most common offloads groups. */ 62 #define MLX5_TXOFF_CONFIG_NONE 0 63 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 64 MLX5_TXOFF_CONFIG_TSO | \ 65 MLX5_TXOFF_CONFIG_SWP | \ 66 MLX5_TXOFF_CONFIG_CSUM | \ 67 MLX5_TXOFF_CONFIG_INLINE | \ 68 MLX5_TXOFF_CONFIG_VLAN | \ 69 MLX5_TXOFF_CONFIG_METADATA) 70 71 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 72 73 #define MLX5_TXOFF_DECL(func, olx) \ 74 static uint16_t mlx5_tx_burst_##func(void *txq, \ 75 struct rte_mbuf **pkts, \ 76 uint16_t pkts_n) \ 77 { \ 78 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 79 pkts, pkts_n, (olx)); \ 80 } 81 82 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 83 84 static __rte_always_inline uint32_t 85 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 86 87 static __rte_always_inline int 88 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 89 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 90 91 static __rte_always_inline uint32_t 92 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 93 94 static __rte_always_inline void 95 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 96 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); 97 98 static __rte_always_inline void 99 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 100 const unsigned int strd_n); 101 102 static int 103 mlx5_queue_state_modify(struct rte_eth_dev *dev, 104 struct mlx5_mp_arg_queue_state_modify *sm); 105 106 static inline void 107 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 108 volatile struct mlx5_cqe *__rte_restrict cqe, 109 uint32_t phcsum); 110 111 static inline void 112 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 113 volatile struct mlx5_cqe *__rte_restrict cqe, 114 uint32_t len); 115 116 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 117 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 118 }; 119 120 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 121 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 122 123 uint64_t rte_net_mlx5_dynf_inline_mask; 124 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 125 126 /** 127 * Build a table to translate Rx completion flags to packet type. 128 * 129 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 130 */ 131 void 132 mlx5_set_ptype_table(void) 133 { 134 unsigned int i; 135 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 136 137 /* Last entry must not be overwritten, reserved for errored packet. */ 138 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 139 (*p)[i] = RTE_PTYPE_UNKNOWN; 140 /* 141 * The index to the array should have: 142 * bit[1:0] = l3_hdr_type 143 * bit[4:2] = l4_hdr_type 144 * bit[5] = ip_frag 145 * bit[6] = tunneled 146 * bit[7] = outer_l3_type 147 */ 148 /* L2 */ 149 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 150 /* L3 */ 151 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 152 RTE_PTYPE_L4_NONFRAG; 153 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 154 RTE_PTYPE_L4_NONFRAG; 155 /* Fragmented */ 156 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 157 RTE_PTYPE_L4_FRAG; 158 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 159 RTE_PTYPE_L4_FRAG; 160 /* TCP */ 161 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 162 RTE_PTYPE_L4_TCP; 163 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 164 RTE_PTYPE_L4_TCP; 165 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 166 RTE_PTYPE_L4_TCP; 167 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 168 RTE_PTYPE_L4_TCP; 169 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 170 RTE_PTYPE_L4_TCP; 171 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 172 RTE_PTYPE_L4_TCP; 173 /* UDP */ 174 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 175 RTE_PTYPE_L4_UDP; 176 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 177 RTE_PTYPE_L4_UDP; 178 /* Repeat with outer_l3_type being set. Just in case. */ 179 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 180 RTE_PTYPE_L4_NONFRAG; 181 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 182 RTE_PTYPE_L4_NONFRAG; 183 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 184 RTE_PTYPE_L4_FRAG; 185 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 186 RTE_PTYPE_L4_FRAG; 187 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 188 RTE_PTYPE_L4_TCP; 189 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 190 RTE_PTYPE_L4_TCP; 191 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 192 RTE_PTYPE_L4_TCP; 193 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 194 RTE_PTYPE_L4_TCP; 195 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 196 RTE_PTYPE_L4_TCP; 197 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 198 RTE_PTYPE_L4_TCP; 199 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 200 RTE_PTYPE_L4_UDP; 201 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 202 RTE_PTYPE_L4_UDP; 203 /* Tunneled - L3 */ 204 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 205 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 206 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 207 RTE_PTYPE_INNER_L4_NONFRAG; 208 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 209 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 210 RTE_PTYPE_INNER_L4_NONFRAG; 211 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 212 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 213 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 214 RTE_PTYPE_INNER_L4_NONFRAG; 215 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 216 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 217 RTE_PTYPE_INNER_L4_NONFRAG; 218 /* Tunneled - Fragmented */ 219 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 220 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 221 RTE_PTYPE_INNER_L4_FRAG; 222 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 223 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 224 RTE_PTYPE_INNER_L4_FRAG; 225 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 226 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 227 RTE_PTYPE_INNER_L4_FRAG; 228 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 229 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 230 RTE_PTYPE_INNER_L4_FRAG; 231 /* Tunneled - TCP */ 232 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 233 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 234 RTE_PTYPE_INNER_L4_TCP; 235 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 236 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 237 RTE_PTYPE_INNER_L4_TCP; 238 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 239 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 240 RTE_PTYPE_INNER_L4_TCP; 241 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 242 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 243 RTE_PTYPE_INNER_L4_TCP; 244 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 245 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 246 RTE_PTYPE_INNER_L4_TCP; 247 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 248 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 249 RTE_PTYPE_INNER_L4_TCP; 250 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 251 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 252 RTE_PTYPE_INNER_L4_TCP; 253 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 254 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 255 RTE_PTYPE_INNER_L4_TCP; 256 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 257 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 258 RTE_PTYPE_INNER_L4_TCP; 259 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 260 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L4_TCP; 262 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 263 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L4_TCP; 265 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 266 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L4_TCP; 268 /* Tunneled - UDP */ 269 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 270 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L4_UDP; 272 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 273 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L4_UDP; 275 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 276 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L4_UDP; 278 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 279 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 280 RTE_PTYPE_INNER_L4_UDP; 281 } 282 283 /** 284 * Build a table to translate packet to checksum type of Verbs. 285 */ 286 void 287 mlx5_set_cksum_table(void) 288 { 289 unsigned int i; 290 uint8_t v; 291 292 /* 293 * The index should have: 294 * bit[0] = PKT_TX_TCP_SEG 295 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 296 * bit[4] = PKT_TX_IP_CKSUM 297 * bit[8] = PKT_TX_OUTER_IP_CKSUM 298 * bit[9] = tunnel 299 */ 300 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 301 v = 0; 302 if (i & (1 << 9)) { 303 /* Tunneled packet. */ 304 if (i & (1 << 8)) /* Outer IP. */ 305 v |= MLX5_ETH_WQE_L3_CSUM; 306 if (i & (1 << 4)) /* Inner IP. */ 307 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 308 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 309 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 310 } else { 311 /* No tunnel. */ 312 if (i & (1 << 4)) /* IP. */ 313 v |= MLX5_ETH_WQE_L3_CSUM; 314 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 315 v |= MLX5_ETH_WQE_L4_CSUM; 316 } 317 mlx5_cksum_table[i] = v; 318 } 319 } 320 321 /** 322 * Build a table to translate packet type of mbuf to SWP type of Verbs. 323 */ 324 void 325 mlx5_set_swp_types_table(void) 326 { 327 unsigned int i; 328 uint8_t v; 329 330 /* 331 * The index should have: 332 * bit[0:1] = PKT_TX_L4_MASK 333 * bit[4] = PKT_TX_IPV6 334 * bit[8] = PKT_TX_OUTER_IPV6 335 * bit[9] = PKT_TX_OUTER_UDP 336 */ 337 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 338 v = 0; 339 if (i & (1 << 8)) 340 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 341 if (i & (1 << 9)) 342 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 343 if (i & (1 << 4)) 344 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 345 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 346 v |= MLX5_ETH_WQE_L4_INNER_UDP; 347 mlx5_swp_types_table[i] = v; 348 } 349 } 350 351 /** 352 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 353 * Flags must be preliminary initialized to zero. 354 * 355 * @param loc 356 * Pointer to burst routine local context. 357 * @param swp_flags 358 * Pointer to store Software Parser flags 359 * @param olx 360 * Configured Tx offloads mask. It is fully defined at 361 * compile time and may be used for optimization. 362 * 363 * @return 364 * Software Parser offsets packed in dword. 365 * Software Parser flags are set by pointer. 366 */ 367 static __rte_always_inline uint32_t 368 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 369 uint8_t *swp_flags, 370 unsigned int olx) 371 { 372 uint64_t ol, tunnel; 373 unsigned int idx, off; 374 uint32_t set; 375 376 if (!MLX5_TXOFF_CONFIG(SWP)) 377 return 0; 378 ol = loc->mbuf->ol_flags; 379 tunnel = ol & PKT_TX_TUNNEL_MASK; 380 /* 381 * Check whether Software Parser is required. 382 * Only customized tunnels may ask for. 383 */ 384 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 385 return 0; 386 /* 387 * The index should have: 388 * bit[0:1] = PKT_TX_L4_MASK 389 * bit[4] = PKT_TX_IPV6 390 * bit[8] = PKT_TX_OUTER_IPV6 391 * bit[9] = PKT_TX_OUTER_UDP 392 */ 393 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 394 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 395 *swp_flags = mlx5_swp_types_table[idx]; 396 /* 397 * Set offsets for SW parser. Since ConnectX-5, SW parser just 398 * complements HW parser. SW parser starts to engage only if HW parser 399 * can't reach a header. For the older devices, HW parser will not kick 400 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 401 * should be set regardless of HW offload. 402 */ 403 off = loc->mbuf->outer_l2_len; 404 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 405 off += sizeof(struct rte_vlan_hdr); 406 set = (off >> 1) << 8; /* Outer L3 offset. */ 407 off += loc->mbuf->outer_l3_len; 408 if (tunnel == PKT_TX_TUNNEL_UDP) 409 set |= off >> 1; /* Outer L4 offset. */ 410 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 411 const uint64_t csum = ol & PKT_TX_L4_MASK; 412 off += loc->mbuf->l2_len; 413 set |= (off >> 1) << 24; /* Inner L3 offset. */ 414 if (csum == PKT_TX_TCP_CKSUM || 415 csum == PKT_TX_UDP_CKSUM || 416 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 417 off += loc->mbuf->l3_len; 418 set |= (off >> 1) << 16; /* Inner L4 offset. */ 419 } 420 } 421 set = rte_cpu_to_le_32(set); 422 return set; 423 } 424 425 /** 426 * Convert the Checksum offloads to Verbs. 427 * 428 * @param buf 429 * Pointer to the mbuf. 430 * 431 * @return 432 * Converted checksum flags. 433 */ 434 static __rte_always_inline uint8_t 435 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 436 { 437 uint32_t idx; 438 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 439 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 440 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 441 442 /* 443 * The index should have: 444 * bit[0] = PKT_TX_TCP_SEG 445 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 446 * bit[4] = PKT_TX_IP_CKSUM 447 * bit[8] = PKT_TX_OUTER_IP_CKSUM 448 * bit[9] = tunnel 449 */ 450 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 451 return mlx5_cksum_table[idx]; 452 } 453 454 /** 455 * Internal function to compute the number of used descriptors in an RX queue 456 * 457 * @param rxq 458 * The Rx queue. 459 * 460 * @return 461 * The number of used rx descriptor. 462 */ 463 static uint32_t 464 rx_queue_count(struct mlx5_rxq_data *rxq) 465 { 466 struct rxq_zip *zip = &rxq->zip; 467 volatile struct mlx5_cqe *cqe; 468 const unsigned int cqe_n = (1 << rxq->cqe_n); 469 const unsigned int cqe_cnt = cqe_n - 1; 470 unsigned int cq_ci; 471 unsigned int used; 472 473 /* if we are processing a compressed cqe */ 474 if (zip->ai) { 475 used = zip->cqe_cnt - zip->ca; 476 cq_ci = zip->cq_ci; 477 } else { 478 used = 0; 479 cq_ci = rxq->cq_ci; 480 } 481 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 482 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 483 int8_t op_own; 484 unsigned int n; 485 486 op_own = cqe->op_own; 487 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 488 n = rte_be_to_cpu_32(cqe->byte_cnt); 489 else 490 n = 1; 491 cq_ci += n; 492 used += n; 493 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 494 } 495 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 496 return used; 497 } 498 499 /** 500 * DPDK callback to check the status of a rx descriptor. 501 * 502 * @param rx_queue 503 * The Rx queue. 504 * @param[in] offset 505 * The index of the descriptor in the ring. 506 * 507 * @return 508 * The status of the tx descriptor. 509 */ 510 int 511 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 512 { 513 struct mlx5_rxq_data *rxq = rx_queue; 514 struct mlx5_rxq_ctrl *rxq_ctrl = 515 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 516 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 517 518 if (dev->rx_pkt_burst != mlx5_rx_burst) { 519 rte_errno = ENOTSUP; 520 return -rte_errno; 521 } 522 if (offset >= (1 << rxq->elts_n)) { 523 rte_errno = EINVAL; 524 return -rte_errno; 525 } 526 if (offset < rx_queue_count(rxq)) 527 return RTE_ETH_RX_DESC_DONE; 528 return RTE_ETH_RX_DESC_AVAIL; 529 } 530 531 /** 532 * DPDK callback to get the RX queue information 533 * 534 * @param dev 535 * Pointer to the device structure. 536 * 537 * @param rx_queue_id 538 * Rx queue identificator. 539 * 540 * @param qinfo 541 * Pointer to the RX queue information structure. 542 * 543 * @return 544 * None. 545 */ 546 547 void 548 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 549 struct rte_eth_rxq_info *qinfo) 550 { 551 struct mlx5_priv *priv = dev->data->dev_private; 552 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; 553 struct mlx5_rxq_ctrl *rxq_ctrl = 554 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 555 556 if (!rxq) 557 return; 558 qinfo->mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 559 rxq->mprq_mp : rxq->mp; 560 qinfo->conf.rx_thresh.pthresh = 0; 561 qinfo->conf.rx_thresh.hthresh = 0; 562 qinfo->conf.rx_thresh.wthresh = 0; 563 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 564 qinfo->conf.rx_drop_en = 1; 565 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; 566 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 567 qinfo->scattered_rx = dev->data->scattered_rx; 568 qinfo->nb_desc = 1 << rxq->elts_n; 569 } 570 571 /** 572 * DPDK callback to get the RX packet burst mode information 573 * 574 * @param dev 575 * Pointer to the device structure. 576 * 577 * @param rx_queue_id 578 * Rx queue identificatior. 579 * 580 * @param mode 581 * Pointer to the burts mode information. 582 * 583 * @return 584 * 0 as success, -EINVAL as failure. 585 */ 586 587 int 588 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 589 uint16_t rx_queue_id __rte_unused, 590 struct rte_eth_burst_mode *mode) 591 { 592 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 593 594 if (pkt_burst == mlx5_rx_burst) { 595 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 596 } else if (pkt_burst == mlx5_rx_burst_mprq) { 597 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 598 } else if (pkt_burst == mlx5_rx_burst_vec) { 599 #if defined RTE_ARCH_X86_64 600 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 601 #elif defined RTE_ARCH_ARM64 602 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 603 #elif defined RTE_ARCH_PPC_64 604 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 605 #else 606 return -EINVAL; 607 #endif 608 } else { 609 return -EINVAL; 610 } 611 return 0; 612 } 613 614 /** 615 * DPDK callback to get the number of used descriptors in a RX queue 616 * 617 * @param dev 618 * Pointer to the device structure. 619 * 620 * @param rx_queue_id 621 * The Rx queue. 622 * 623 * @return 624 * The number of used rx descriptor. 625 * -EINVAL if the queue is invalid 626 */ 627 uint32_t 628 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 629 { 630 struct mlx5_priv *priv = dev->data->dev_private; 631 struct mlx5_rxq_data *rxq; 632 633 if (dev->rx_pkt_burst != mlx5_rx_burst) { 634 rte_errno = ENOTSUP; 635 return -rte_errno; 636 } 637 rxq = (*priv->rxqs)[rx_queue_id]; 638 if (!rxq) { 639 rte_errno = EINVAL; 640 return -rte_errno; 641 } 642 return rx_queue_count(rxq); 643 } 644 645 #define MLX5_SYSTEM_LOG_DIR "/var/log" 646 /** 647 * Dump debug information to log file. 648 * 649 * @param fname 650 * The file name. 651 * @param hex_title 652 * If not NULL this string is printed as a header to the output 653 * and the output will be in hexadecimal view. 654 * @param buf 655 * This is the buffer address to print out. 656 * @param len 657 * The number of bytes to dump out. 658 */ 659 void 660 mlx5_dump_debug_information(const char *fname, const char *hex_title, 661 const void *buf, unsigned int hex_len) 662 { 663 FILE *fd; 664 665 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 666 fd = fopen(path, "a+"); 667 if (!fd) { 668 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 669 MKSTR(path2, "./%s", fname); 670 fd = fopen(path2, "a+"); 671 if (!fd) { 672 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 673 return; 674 } 675 DRV_LOG(INFO, "New debug dump in file %s", path2); 676 } else { 677 DRV_LOG(INFO, "New debug dump in file %s", path); 678 } 679 if (hex_title) 680 rte_hexdump(fd, hex_title, buf, hex_len); 681 else 682 fprintf(fd, "%s", (const char *)buf); 683 fprintf(fd, "\n\n\n"); 684 fclose(fd); 685 } 686 687 /** 688 * Move QP from error state to running state and initialize indexes. 689 * 690 * @param txq_ctrl 691 * Pointer to TX queue control structure. 692 * 693 * @return 694 * 0 on success, else -1. 695 */ 696 static int 697 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 698 { 699 struct mlx5_mp_arg_queue_state_modify sm = { 700 .is_wq = 0, 701 .queue_id = txq_ctrl->txq.idx, 702 }; 703 704 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 705 return -1; 706 txq_ctrl->txq.wqe_ci = 0; 707 txq_ctrl->txq.wqe_pi = 0; 708 txq_ctrl->txq.elts_comp = 0; 709 return 0; 710 } 711 712 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 713 static int 714 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 715 { 716 static const uint8_t magic[] = "seen"; 717 int ret = 1; 718 unsigned int i; 719 720 for (i = 0; i < sizeof(magic); ++i) 721 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 722 ret = 0; 723 err_cqe->rsvd1[i] = magic[i]; 724 } 725 return ret; 726 } 727 728 /** 729 * Handle error CQE. 730 * 731 * @param txq 732 * Pointer to TX queue structure. 733 * @param error_cqe 734 * Pointer to the error CQE. 735 * 736 * @return 737 * Negative value if queue recovery failed, otherwise 738 * the error completion entry is handled successfully. 739 */ 740 static int 741 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, 742 volatile struct mlx5_err_cqe *err_cqe) 743 { 744 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 745 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 746 struct mlx5_txq_ctrl *txq_ctrl = 747 container_of(txq, struct mlx5_txq_ctrl, txq); 748 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 749 int seen = check_err_cqe_seen(err_cqe); 750 751 if (!seen && txq_ctrl->dump_file_n < 752 txq_ctrl->priv->config.max_dump_files_num) { 753 MKSTR(err_str, "Unexpected CQE error syndrome " 754 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 755 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 756 txq->cqe_s, txq->qp_num_8s >> 8, 757 rte_be_to_cpu_16(err_cqe->wqe_counter), 758 txq->wqe_ci, txq->cq_ci); 759 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 760 PORT_ID(txq_ctrl->priv), txq->idx, 761 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 762 mlx5_dump_debug_information(name, NULL, err_str, 0); 763 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 764 (const void *)((uintptr_t) 765 txq->cqes), 766 sizeof(*err_cqe) * 767 (1 << txq->cqe_n)); 768 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 769 (const void *)((uintptr_t) 770 txq->wqes), 771 MLX5_WQE_SIZE * 772 (1 << txq->wqe_n)); 773 txq_ctrl->dump_file_n++; 774 } 775 if (!seen) 776 /* 777 * Count errors in WQEs units. 778 * Later it can be improved to count error packets, 779 * for example, by SQ parsing to find how much packets 780 * should be counted for each WQE. 781 */ 782 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 783 new_wqe_pi) & wqe_m; 784 if (tx_recover_qp(txq_ctrl)) { 785 /* Recovering failed - retry later on the same WQE. */ 786 return -1; 787 } 788 /* Release all the remaining buffers. */ 789 txq_free_elts(txq_ctrl); 790 } 791 return 0; 792 } 793 794 /** 795 * Translate RX completion flags to packet type. 796 * 797 * @param[in] rxq 798 * Pointer to RX queue structure. 799 * @param[in] cqe 800 * Pointer to CQE. 801 * 802 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 803 * 804 * @return 805 * Packet type for struct rte_mbuf. 806 */ 807 static inline uint32_t 808 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 809 { 810 uint8_t idx; 811 uint8_t pinfo = cqe->pkt_info; 812 uint16_t ptype = cqe->hdr_type_etc; 813 814 /* 815 * The index to the array should have: 816 * bit[1:0] = l3_hdr_type 817 * bit[4:2] = l4_hdr_type 818 * bit[5] = ip_frag 819 * bit[6] = tunneled 820 * bit[7] = outer_l3_type 821 */ 822 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 823 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 824 } 825 826 /** 827 * Initialize Rx WQ and indexes. 828 * 829 * @param[in] rxq 830 * Pointer to RX queue structure. 831 */ 832 void 833 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 834 { 835 const unsigned int wqe_n = 1 << rxq->elts_n; 836 unsigned int i; 837 838 for (i = 0; (i != wqe_n); ++i) { 839 volatile struct mlx5_wqe_data_seg *scat; 840 uintptr_t addr; 841 uint32_t byte_count; 842 843 if (mlx5_rxq_mprq_enabled(rxq)) { 844 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 845 846 scat = &((volatile struct mlx5_wqe_mprq *) 847 rxq->wqes)[i].dseg; 848 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 849 1 << rxq->strd_num_n); 850 byte_count = (1 << rxq->strd_sz_n) * 851 (1 << rxq->strd_num_n); 852 } else { 853 struct rte_mbuf *buf = (*rxq->elts)[i]; 854 855 scat = &((volatile struct mlx5_wqe_data_seg *) 856 rxq->wqes)[i]; 857 addr = rte_pktmbuf_mtod(buf, uintptr_t); 858 byte_count = DATA_LEN(buf); 859 } 860 /* scat->addr must be able to store a pointer. */ 861 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 862 *scat = (struct mlx5_wqe_data_seg){ 863 .addr = rte_cpu_to_be_64(addr), 864 .byte_count = rte_cpu_to_be_32(byte_count), 865 .lkey = mlx5_rx_addr2mr(rxq, addr), 866 }; 867 } 868 rxq->consumed_strd = 0; 869 rxq->decompressed = 0; 870 rxq->rq_pi = 0; 871 rxq->zip = (struct rxq_zip){ 872 .ai = 0, 873 }; 874 /* Update doorbell counter. */ 875 rxq->rq_ci = wqe_n >> rxq->sges_n; 876 rte_io_wmb(); 877 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 878 } 879 880 /** 881 * Modify a Verbs/DevX queue state. 882 * This must be called from the primary process. 883 * 884 * @param dev 885 * Pointer to Ethernet device. 886 * @param sm 887 * State modify request parameters. 888 * 889 * @return 890 * 0 in case of success else non-zero value and rte_errno is set. 891 */ 892 int 893 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 894 const struct mlx5_mp_arg_queue_state_modify *sm) 895 { 896 int ret; 897 struct mlx5_priv *priv = dev->data->dev_private; 898 899 if (sm->is_wq) { 900 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 901 struct mlx5_rxq_ctrl *rxq_ctrl = 902 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 903 904 if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { 905 struct ibv_wq_attr mod = { 906 .attr_mask = IBV_WQ_ATTR_STATE, 907 .wq_state = sm->state, 908 }; 909 910 ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod); 911 } else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */ 912 struct mlx5_devx_modify_rq_attr rq_attr; 913 914 memset(&rq_attr, 0, sizeof(rq_attr)); 915 if (sm->state == IBV_WQS_RESET) { 916 rq_attr.rq_state = MLX5_RQC_STATE_ERR; 917 rq_attr.state = MLX5_RQC_STATE_RST; 918 } else if (sm->state == IBV_WQS_RDY) { 919 rq_attr.rq_state = MLX5_RQC_STATE_RST; 920 rq_attr.state = MLX5_RQC_STATE_RDY; 921 } else if (sm->state == IBV_WQS_ERR) { 922 rq_attr.rq_state = MLX5_RQC_STATE_RDY; 923 rq_attr.state = MLX5_RQC_STATE_ERR; 924 } 925 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, 926 &rq_attr); 927 } 928 if (ret) { 929 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 930 sm->state, strerror(errno)); 931 rte_errno = errno; 932 return ret; 933 } 934 } else { 935 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 936 struct mlx5_txq_ctrl *txq_ctrl = 937 container_of(txq, struct mlx5_txq_ctrl, txq); 938 939 if (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_DEVX_SQ) { 940 struct mlx5_devx_modify_sq_attr msq_attr = { 0 }; 941 942 /* Change queue state to reset. */ 943 msq_attr.sq_state = MLX5_SQC_STATE_ERR; 944 msq_attr.state = MLX5_SQC_STATE_RST; 945 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, 946 &msq_attr); 947 if (ret) { 948 DRV_LOG(ERR, "Cannot change the " 949 "Tx QP state to RESET %s", 950 strerror(errno)); 951 rte_errno = errno; 952 return ret; 953 } 954 /* Change queue state to ready. */ 955 msq_attr.sq_state = MLX5_SQC_STATE_RST; 956 msq_attr.state = MLX5_SQC_STATE_RDY; 957 ret = mlx5_devx_cmd_modify_sq(txq_ctrl->obj->sq_devx, 958 &msq_attr); 959 if (ret) { 960 DRV_LOG(ERR, "Cannot change the " 961 "Tx QP state to READY %s", 962 strerror(errno)); 963 rte_errno = errno; 964 return ret; 965 } 966 } else { 967 struct ibv_qp_attr mod = { 968 .qp_state = IBV_QPS_RESET, 969 .port_num = (uint8_t)priv->dev_port, 970 }; 971 struct ibv_qp *qp = txq_ctrl->obj->qp; 972 973 MLX5_ASSERT 974 (txq_ctrl->obj->type == MLX5_TXQ_OBJ_TYPE_IBV); 975 976 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 977 if (ret) { 978 DRV_LOG(ERR, "Cannot change the " 979 "Tx QP state to RESET %s", 980 strerror(errno)); 981 rte_errno = errno; 982 return ret; 983 } 984 mod.qp_state = IBV_QPS_INIT; 985 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 986 if (ret) { 987 DRV_LOG(ERR, "Cannot change the " 988 "Tx QP state to INIT %s", 989 strerror(errno)); 990 rte_errno = errno; 991 return ret; 992 } 993 mod.qp_state = IBV_QPS_RTR; 994 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 995 if (ret) { 996 DRV_LOG(ERR, "Cannot change the " 997 "Tx QP state to RTR %s", 998 strerror(errno)); 999 rte_errno = errno; 1000 return ret; 1001 } 1002 mod.qp_state = IBV_QPS_RTS; 1003 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 1004 if (ret) { 1005 DRV_LOG(ERR, "Cannot change the " 1006 "Tx QP state to RTS %s", 1007 strerror(errno)); 1008 rte_errno = errno; 1009 return ret; 1010 } 1011 } 1012 } 1013 return 0; 1014 } 1015 1016 /** 1017 * Modify a Verbs queue state. 1018 * 1019 * @param dev 1020 * Pointer to Ethernet device. 1021 * @param sm 1022 * State modify request parameters. 1023 * 1024 * @return 1025 * 0 in case of success else non-zero value. 1026 */ 1027 static int 1028 mlx5_queue_state_modify(struct rte_eth_dev *dev, 1029 struct mlx5_mp_arg_queue_state_modify *sm) 1030 { 1031 struct mlx5_priv *priv = dev->data->dev_private; 1032 int ret = 0; 1033 1034 switch (rte_eal_process_type()) { 1035 case RTE_PROC_PRIMARY: 1036 ret = mlx5_queue_state_modify_primary(dev, sm); 1037 break; 1038 case RTE_PROC_SECONDARY: 1039 ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); 1040 break; 1041 default: 1042 break; 1043 } 1044 return ret; 1045 } 1046 1047 /** 1048 * Handle a Rx error. 1049 * The function inserts the RQ state to reset when the first error CQE is 1050 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 1051 * it moves the RQ state to ready and initializes the RQ. 1052 * Next CQE identification and error counting are in the caller responsibility. 1053 * 1054 * @param[in] rxq 1055 * Pointer to RX queue structure. 1056 * @param[in] vec 1057 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 1058 * 0 when called from non-vectorized Rx burst. 1059 * 1060 * @return 1061 * -1 in case of recovery error, otherwise the CQE status. 1062 */ 1063 int 1064 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 1065 { 1066 const uint16_t cqe_n = 1 << rxq->cqe_n; 1067 const uint16_t cqe_mask = cqe_n - 1; 1068 const unsigned int wqe_n = 1 << rxq->elts_n; 1069 struct mlx5_rxq_ctrl *rxq_ctrl = 1070 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 1071 union { 1072 volatile struct mlx5_cqe *cqe; 1073 volatile struct mlx5_err_cqe *err_cqe; 1074 } u = { 1075 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 1076 }; 1077 struct mlx5_mp_arg_queue_state_modify sm; 1078 int ret; 1079 1080 switch (rxq->err_state) { 1081 case MLX5_RXQ_ERR_STATE_NO_ERROR: 1082 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 1083 /* Fall-through */ 1084 case MLX5_RXQ_ERR_STATE_NEED_RESET: 1085 sm.is_wq = 1; 1086 sm.queue_id = rxq->idx; 1087 sm.state = IBV_WQS_RESET; 1088 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 1089 return -1; 1090 if (rxq_ctrl->dump_file_n < 1091 rxq_ctrl->priv->config.max_dump_files_num) { 1092 MKSTR(err_str, "Unexpected CQE error syndrome " 1093 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 1094 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 1095 rxq->cqn, rxq_ctrl->wqn, 1096 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 1097 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 1098 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 1099 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 1100 mlx5_dump_debug_information(name, NULL, err_str, 0); 1101 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 1102 (const void *)((uintptr_t) 1103 rxq->cqes), 1104 sizeof(*u.cqe) * cqe_n); 1105 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 1106 (const void *)((uintptr_t) 1107 rxq->wqes), 1108 16 * wqe_n); 1109 rxq_ctrl->dump_file_n++; 1110 } 1111 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 1112 /* Fall-through */ 1113 case MLX5_RXQ_ERR_STATE_NEED_READY: 1114 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1115 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1116 rte_io_wmb(); 1117 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1118 rte_io_wmb(); 1119 /* 1120 * The RQ consumer index must be zeroed while moving 1121 * from RESET state to RDY state. 1122 */ 1123 *rxq->rq_db = rte_cpu_to_be_32(0); 1124 rte_io_wmb(); 1125 sm.is_wq = 1; 1126 sm.queue_id = rxq->idx; 1127 sm.state = IBV_WQS_RDY; 1128 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1129 &sm)) 1130 return -1; 1131 if (vec) { 1132 const uint16_t q_mask = wqe_n - 1; 1133 uint16_t elt_idx; 1134 struct rte_mbuf **elt; 1135 int i; 1136 unsigned int n = wqe_n - (rxq->rq_ci - 1137 rxq->rq_pi); 1138 1139 for (i = 0; i < (int)n; ++i) { 1140 elt_idx = (rxq->rq_ci + i) & q_mask; 1141 elt = &(*rxq->elts)[elt_idx]; 1142 *elt = rte_mbuf_raw_alloc(rxq->mp); 1143 if (!*elt) { 1144 for (i--; i >= 0; --i) { 1145 elt_idx = (rxq->rq_ci + 1146 i) & q_mask; 1147 elt = &(*rxq->elts) 1148 [elt_idx]; 1149 rte_pktmbuf_free_seg 1150 (*elt); 1151 } 1152 return -1; 1153 } 1154 } 1155 for (i = 0; i < (int)wqe_n; ++i) { 1156 elt = &(*rxq->elts)[i]; 1157 DATA_LEN(*elt) = 1158 (uint16_t)((*elt)->buf_len - 1159 rte_pktmbuf_headroom(*elt)); 1160 } 1161 /* Padding with a fake mbuf for vec Rx. */ 1162 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1163 (*rxq->elts)[wqe_n + i] = 1164 &rxq->fake_mbuf; 1165 } 1166 mlx5_rxq_initialize(rxq); 1167 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1168 } 1169 return ret; 1170 default: 1171 return -1; 1172 } 1173 } 1174 1175 /** 1176 * Get size of the next packet for a given CQE. For compressed CQEs, the 1177 * consumer index is updated only once all packets of the current one have 1178 * been processed. 1179 * 1180 * @param rxq 1181 * Pointer to RX queue. 1182 * @param cqe 1183 * CQE to process. 1184 * @param[out] mcqe 1185 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1186 * written. 1187 * 1188 * @return 1189 * 0 in case of empty CQE, otherwise the packet size in bytes. 1190 */ 1191 static inline int 1192 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1193 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1194 { 1195 struct rxq_zip *zip = &rxq->zip; 1196 uint16_t cqe_n = cqe_cnt + 1; 1197 int len; 1198 uint16_t idx, end; 1199 1200 do { 1201 len = 0; 1202 /* Process compressed data in the CQE and mini arrays. */ 1203 if (zip->ai) { 1204 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1205 (volatile struct mlx5_mini_cqe8 (*)[8]) 1206 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1207 cqe_cnt].pkt_info); 1208 1209 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1210 *mcqe = &(*mc)[zip->ai & 7]; 1211 if ((++zip->ai & 7) == 0) { 1212 /* Invalidate consumed CQEs */ 1213 idx = zip->ca; 1214 end = zip->na; 1215 while (idx != end) { 1216 (*rxq->cqes)[idx & cqe_cnt].op_own = 1217 MLX5_CQE_INVALIDATE; 1218 ++idx; 1219 } 1220 /* 1221 * Increment consumer index to skip the number 1222 * of CQEs consumed. Hardware leaves holes in 1223 * the CQ ring for software use. 1224 */ 1225 zip->ca = zip->na; 1226 zip->na += 8; 1227 } 1228 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1229 /* Invalidate the rest */ 1230 idx = zip->ca; 1231 end = zip->cq_ci; 1232 1233 while (idx != end) { 1234 (*rxq->cqes)[idx & cqe_cnt].op_own = 1235 MLX5_CQE_INVALIDATE; 1236 ++idx; 1237 } 1238 rxq->cq_ci = zip->cq_ci; 1239 zip->ai = 0; 1240 } 1241 /* 1242 * No compressed data, get next CQE and verify if it is 1243 * compressed. 1244 */ 1245 } else { 1246 int ret; 1247 int8_t op_own; 1248 1249 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1250 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1251 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1252 rxq->err_state)) { 1253 ret = mlx5_rx_err_handle(rxq, 0); 1254 if (ret == MLX5_CQE_STATUS_HW_OWN || 1255 ret == -1) 1256 return 0; 1257 } else { 1258 return 0; 1259 } 1260 } 1261 ++rxq->cq_ci; 1262 op_own = cqe->op_own; 1263 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1264 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1265 (volatile struct mlx5_mini_cqe8 (*)[8]) 1266 (uintptr_t)(&(*rxq->cqes) 1267 [rxq->cq_ci & 1268 cqe_cnt].pkt_info); 1269 1270 /* Fix endianness. */ 1271 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1272 /* 1273 * Current mini array position is the one 1274 * returned by check_cqe64(). 1275 * 1276 * If completion comprises several mini arrays, 1277 * as a special case the second one is located 1278 * 7 CQEs after the initial CQE instead of 8 1279 * for subsequent ones. 1280 */ 1281 zip->ca = rxq->cq_ci; 1282 zip->na = zip->ca + 7; 1283 /* Compute the next non compressed CQE. */ 1284 --rxq->cq_ci; 1285 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1286 /* Get packet size to return. */ 1287 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1288 *mcqe = &(*mc)[0]; 1289 zip->ai = 1; 1290 /* Prefetch all to be invalidated */ 1291 idx = zip->ca; 1292 end = zip->cq_ci; 1293 while (idx != end) { 1294 rte_prefetch0(&(*rxq->cqes)[(idx) & 1295 cqe_cnt]); 1296 ++idx; 1297 } 1298 } else { 1299 len = rte_be_to_cpu_32(cqe->byte_cnt); 1300 } 1301 } 1302 if (unlikely(rxq->err_state)) { 1303 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1304 ++rxq->stats.idropped; 1305 } else { 1306 return len; 1307 } 1308 } while (1); 1309 } 1310 1311 /** 1312 * Translate RX completion flags to offload flags. 1313 * 1314 * @param[in] cqe 1315 * Pointer to CQE. 1316 * 1317 * @return 1318 * Offload flags (ol_flags) for struct rte_mbuf. 1319 */ 1320 static inline uint32_t 1321 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1322 { 1323 uint32_t ol_flags = 0; 1324 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1325 1326 ol_flags = 1327 TRANSPOSE(flags, 1328 MLX5_CQE_RX_L3_HDR_VALID, 1329 PKT_RX_IP_CKSUM_GOOD) | 1330 TRANSPOSE(flags, 1331 MLX5_CQE_RX_L4_HDR_VALID, 1332 PKT_RX_L4_CKSUM_GOOD); 1333 return ol_flags; 1334 } 1335 1336 /** 1337 * Fill in mbuf fields from RX completion flags. 1338 * Note that pkt->ol_flags should be initialized outside of this function. 1339 * 1340 * @param rxq 1341 * Pointer to RX queue. 1342 * @param pkt 1343 * mbuf to fill. 1344 * @param cqe 1345 * CQE to process. 1346 * @param rss_hash_res 1347 * Packet RSS Hash result. 1348 */ 1349 static inline void 1350 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1351 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res) 1352 { 1353 /* Update packet information. */ 1354 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); 1355 if (rss_hash_res && rxq->rss_hash) { 1356 pkt->hash.rss = rss_hash_res; 1357 pkt->ol_flags |= PKT_RX_RSS_HASH; 1358 } 1359 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1360 pkt->ol_flags |= PKT_RX_FDIR; 1361 if (cqe->sop_drop_qpn != 1362 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1363 uint32_t mark = cqe->sop_drop_qpn; 1364 1365 pkt->ol_flags |= PKT_RX_FDIR_ID; 1366 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1367 } 1368 } 1369 if (rxq->dynf_meta && cqe->flow_table_metadata) { 1370 pkt->ol_flags |= rxq->flow_meta_mask; 1371 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = 1372 cqe->flow_table_metadata; 1373 } 1374 if (rxq->csum) 1375 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1376 if (rxq->vlan_strip && 1377 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1378 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1379 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1380 } 1381 if (rxq->hw_timestamp) { 1382 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 1383 1384 if (rxq->rt_timestamp) 1385 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 1386 pkt->timestamp = ts; 1387 pkt->ol_flags |= PKT_RX_TIMESTAMP; 1388 } 1389 } 1390 1391 /** 1392 * DPDK callback for RX. 1393 * 1394 * @param dpdk_rxq 1395 * Generic pointer to RX queue structure. 1396 * @param[out] pkts 1397 * Array to store received packets. 1398 * @param pkts_n 1399 * Maximum number of packets in array. 1400 * 1401 * @return 1402 * Number of packets successfully received (<= pkts_n). 1403 */ 1404 uint16_t 1405 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1406 { 1407 struct mlx5_rxq_data *rxq = dpdk_rxq; 1408 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1409 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1410 const unsigned int sges_n = rxq->sges_n; 1411 struct rte_mbuf *pkt = NULL; 1412 struct rte_mbuf *seg = NULL; 1413 volatile struct mlx5_cqe *cqe = 1414 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1415 unsigned int i = 0; 1416 unsigned int rq_ci = rxq->rq_ci << sges_n; 1417 int len = 0; /* keep its value across iterations. */ 1418 1419 while (pkts_n) { 1420 unsigned int idx = rq_ci & wqe_cnt; 1421 volatile struct mlx5_wqe_data_seg *wqe = 1422 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1423 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1424 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1425 uint32_t rss_hash_res; 1426 1427 if (pkt) 1428 NEXT(seg) = rep; 1429 seg = rep; 1430 rte_prefetch0(seg); 1431 rte_prefetch0(cqe); 1432 rte_prefetch0(wqe); 1433 rep = rte_mbuf_raw_alloc(rxq->mp); 1434 if (unlikely(rep == NULL)) { 1435 ++rxq->stats.rx_nombuf; 1436 if (!pkt) { 1437 /* 1438 * no buffers before we even started, 1439 * bail out silently. 1440 */ 1441 break; 1442 } 1443 while (pkt != seg) { 1444 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 1445 rep = NEXT(pkt); 1446 NEXT(pkt) = NULL; 1447 NB_SEGS(pkt) = 1; 1448 rte_mbuf_raw_free(pkt); 1449 pkt = rep; 1450 } 1451 break; 1452 } 1453 if (!pkt) { 1454 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1455 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1456 if (!len) { 1457 rte_mbuf_raw_free(rep); 1458 break; 1459 } 1460 pkt = seg; 1461 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 1462 pkt->ol_flags &= EXT_ATTACHED_MBUF; 1463 /* If compressed, take hash result from mini-CQE. */ 1464 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? 1465 cqe->rx_hash_res : 1466 mcqe->rx_hash_result); 1467 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1468 if (rxq->crc_present) 1469 len -= RTE_ETHER_CRC_LEN; 1470 PKT_LEN(pkt) = len; 1471 if (cqe->lro_num_seg > 1) { 1472 mlx5_lro_update_hdr 1473 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1474 len); 1475 pkt->ol_flags |= PKT_RX_LRO; 1476 pkt->tso_segsz = len / cqe->lro_num_seg; 1477 } 1478 } 1479 DATA_LEN(rep) = DATA_LEN(seg); 1480 PKT_LEN(rep) = PKT_LEN(seg); 1481 SET_DATA_OFF(rep, DATA_OFF(seg)); 1482 PORT(rep) = PORT(seg); 1483 (*rxq->elts)[idx] = rep; 1484 /* 1485 * Fill NIC descriptor with the new buffer. The lkey and size 1486 * of the buffers are already known, only the buffer address 1487 * changes. 1488 */ 1489 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1490 /* If there's only one MR, no need to replace LKey in WQE. */ 1491 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1492 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1493 if (len > DATA_LEN(seg)) { 1494 len -= DATA_LEN(seg); 1495 ++NB_SEGS(pkt); 1496 ++rq_ci; 1497 continue; 1498 } 1499 DATA_LEN(seg) = len; 1500 #ifdef MLX5_PMD_SOFT_COUNTERS 1501 /* Increment bytes counter. */ 1502 rxq->stats.ibytes += PKT_LEN(pkt); 1503 #endif 1504 /* Return packet. */ 1505 *(pkts++) = pkt; 1506 pkt = NULL; 1507 --pkts_n; 1508 ++i; 1509 /* Align consumer index to the next stride. */ 1510 rq_ci >>= sges_n; 1511 ++rq_ci; 1512 rq_ci <<= sges_n; 1513 } 1514 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1515 return 0; 1516 /* Update the consumer index. */ 1517 rxq->rq_ci = rq_ci >> sges_n; 1518 rte_io_wmb(); 1519 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1520 rte_io_wmb(); 1521 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1522 #ifdef MLX5_PMD_SOFT_COUNTERS 1523 /* Increment packets counter. */ 1524 rxq->stats.ipackets += i; 1525 #endif 1526 return i; 1527 } 1528 1529 /** 1530 * Update LRO packet TCP header. 1531 * The HW LRO feature doesn't update the TCP header after coalescing the 1532 * TCP segments but supplies information in CQE to fill it by SW. 1533 * 1534 * @param tcp 1535 * Pointer to the TCP header. 1536 * @param cqe 1537 * Pointer to the completion entry.. 1538 * @param phcsum 1539 * The L3 pseudo-header checksum. 1540 */ 1541 static inline void 1542 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1543 volatile struct mlx5_cqe *__rte_restrict cqe, 1544 uint32_t phcsum) 1545 { 1546 uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1547 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1548 /* 1549 * The HW calculates only the TCP payload checksum, need to complete 1550 * the TCP header checksum and the L3 pseudo-header checksum. 1551 */ 1552 uint32_t csum = phcsum + cqe->csum; 1553 1554 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1555 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1556 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1557 tcp->recv_ack = cqe->lro_ack_seq_num; 1558 tcp->rx_win = cqe->lro_tcp_win; 1559 } 1560 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1561 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1562 tcp->cksum = 0; 1563 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1564 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1565 csum = (~csum) & 0xffff; 1566 if (csum == 0) 1567 csum = 0xffff; 1568 tcp->cksum = csum; 1569 } 1570 1571 /** 1572 * Update LRO packet headers. 1573 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1574 * TCP segments but supply information in CQE to fill it by SW. 1575 * 1576 * @param padd 1577 * The packet address. 1578 * @param cqe 1579 * Pointer to the completion entry.. 1580 * @param len 1581 * The packet length. 1582 */ 1583 static inline void 1584 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1585 volatile struct mlx5_cqe *__rte_restrict cqe, 1586 uint32_t len) 1587 { 1588 union { 1589 struct rte_ether_hdr *eth; 1590 struct rte_vlan_hdr *vlan; 1591 struct rte_ipv4_hdr *ipv4; 1592 struct rte_ipv6_hdr *ipv6; 1593 struct rte_tcp_hdr *tcp; 1594 uint8_t *hdr; 1595 } h = { 1596 .hdr = padd, 1597 }; 1598 uint16_t proto = h.eth->ether_type; 1599 uint32_t phcsum; 1600 1601 h.eth++; 1602 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1603 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1604 proto = h.vlan->eth_proto; 1605 h.vlan++; 1606 } 1607 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1608 h.ipv4->time_to_live = cqe->lro_min_ttl; 1609 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1610 h.ipv4->hdr_checksum = 0; 1611 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1612 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1613 h.ipv4++; 1614 } else { 1615 h.ipv6->hop_limits = cqe->lro_min_ttl; 1616 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1617 sizeof(*h.ipv6)); 1618 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1619 h.ipv6++; 1620 } 1621 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum); 1622 } 1623 1624 void 1625 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1626 { 1627 struct mlx5_mprq_buf *buf = opaque; 1628 1629 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 1630 rte_mempool_put(buf->mp, buf); 1631 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 1632 __ATOMIC_RELAXED) == 0)) { 1633 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1634 rte_mempool_put(buf->mp, buf); 1635 } 1636 } 1637 1638 void 1639 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1640 { 1641 mlx5_mprq_buf_free_cb(NULL, buf); 1642 } 1643 1644 static inline void 1645 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 1646 const unsigned int strd_n) 1647 { 1648 struct mlx5_mprq_buf *rep = rxq->mprq_repl; 1649 volatile struct mlx5_wqe_data_seg *wqe = 1650 &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; 1651 void *addr; 1652 1653 MLX5_ASSERT(rep != NULL); 1654 /* Replace MPRQ buf. */ 1655 (*rxq->mprq_bufs)[rq_idx] = rep; 1656 /* Replace WQE. */ 1657 addr = mlx5_mprq_buf_addr(rep, strd_n); 1658 wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); 1659 /* If there's only one MR, no need to replace LKey in WQE. */ 1660 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1661 wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); 1662 /* Stash a mbuf for next replacement. */ 1663 if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep))) 1664 rxq->mprq_repl = rep; 1665 else 1666 rxq->mprq_repl = NULL; 1667 } 1668 1669 /** 1670 * DPDK callback for RX with Multi-Packet RQ support. 1671 * 1672 * @param dpdk_rxq 1673 * Generic pointer to RX queue structure. 1674 * @param[out] pkts 1675 * Array to store received packets. 1676 * @param pkts_n 1677 * Maximum number of packets in array. 1678 * 1679 * @return 1680 * Number of packets successfully received (<= pkts_n). 1681 */ 1682 uint16_t 1683 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1684 { 1685 struct mlx5_rxq_data *rxq = dpdk_rxq; 1686 const unsigned int strd_n = 1 << rxq->strd_num_n; 1687 const unsigned int strd_sz = 1 << rxq->strd_sz_n; 1688 const unsigned int strd_shift = 1689 MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; 1690 const unsigned int cq_mask = (1 << rxq->cqe_n) - 1; 1691 const unsigned int wq_mask = (1 << rxq->elts_n) - 1; 1692 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1693 unsigned int i = 0; 1694 uint32_t rq_ci = rxq->rq_ci; 1695 uint16_t consumed_strd = rxq->consumed_strd; 1696 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1697 1698 while (i < pkts_n) { 1699 struct rte_mbuf *pkt; 1700 void *addr; 1701 int ret; 1702 uint32_t len; 1703 uint16_t strd_cnt; 1704 uint16_t strd_idx; 1705 uint32_t offset; 1706 uint32_t byte_cnt; 1707 int32_t hdrm_overlap; 1708 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1709 uint32_t rss_hash_res = 0; 1710 1711 if (consumed_strd == strd_n) { 1712 /* Replace WQE only if the buffer is still in use. */ 1713 if (__atomic_load_n(&buf->refcnt, 1714 __ATOMIC_RELAXED) > 1) { 1715 mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n); 1716 /* Release the old buffer. */ 1717 mlx5_mprq_buf_free(buf); 1718 } else if (unlikely(rxq->mprq_repl == NULL)) { 1719 struct mlx5_mprq_buf *rep; 1720 1721 /* 1722 * Currently, the MPRQ mempool is out of buffer 1723 * and doing memcpy regardless of the size of Rx 1724 * packet. Retry allocation to get back to 1725 * normal. 1726 */ 1727 if (!rte_mempool_get(rxq->mprq_mp, 1728 (void **)&rep)) 1729 rxq->mprq_repl = rep; 1730 } 1731 /* Advance to the next WQE. */ 1732 consumed_strd = 0; 1733 ++rq_ci; 1734 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1735 } 1736 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1737 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1738 if (!ret) 1739 break; 1740 byte_cnt = ret; 1741 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1742 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1743 MLX5_ASSERT(strd_cnt); 1744 consumed_strd += strd_cnt; 1745 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1746 continue; 1747 if (mcqe == NULL) { 1748 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1749 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); 1750 } else { 1751 /* mini-CQE for MPRQ doesn't have hash result. */ 1752 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); 1753 } 1754 MLX5_ASSERT(strd_idx < strd_n); 1755 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1756 wq_mask)); 1757 pkt = rte_pktmbuf_alloc(rxq->mp); 1758 if (unlikely(pkt == NULL)) { 1759 ++rxq->stats.rx_nombuf; 1760 break; 1761 } 1762 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1763 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1764 if (rxq->crc_present) 1765 len -= RTE_ETHER_CRC_LEN; 1766 offset = strd_idx * strd_sz + strd_shift; 1767 addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); 1768 hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz; 1769 /* 1770 * Memcpy packets to the target mbuf if: 1771 * - The size of packet is smaller than mprq_max_memcpy_len. 1772 * - Out of buffer in the Mempool for Multi-Packet RQ. 1773 * - The packet's stride overlaps a headroom and scatter is off. 1774 */ 1775 if (len <= rxq->mprq_max_memcpy_len || 1776 rxq->mprq_repl == NULL || 1777 (hdrm_overlap > 0 && !rxq->strd_scatter_en)) { 1778 if (likely(rte_pktmbuf_tailroom(pkt) >= len)) { 1779 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), 1780 addr, len); 1781 DATA_LEN(pkt) = len; 1782 } else if (rxq->strd_scatter_en) { 1783 struct rte_mbuf *prev = pkt; 1784 uint32_t seg_len = 1785 RTE_MIN(rte_pktmbuf_tailroom(pkt), len); 1786 uint32_t rem_len = len - seg_len; 1787 1788 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), 1789 addr, seg_len); 1790 DATA_LEN(pkt) = seg_len; 1791 while (rem_len) { 1792 struct rte_mbuf *next = 1793 rte_pktmbuf_alloc(rxq->mp); 1794 1795 if (unlikely(next == NULL)) { 1796 rte_pktmbuf_free(pkt); 1797 ++rxq->stats.rx_nombuf; 1798 goto out; 1799 } 1800 NEXT(prev) = next; 1801 SET_DATA_OFF(next, 0); 1802 addr = RTE_PTR_ADD(addr, seg_len); 1803 seg_len = RTE_MIN 1804 (rte_pktmbuf_tailroom(next), 1805 rem_len); 1806 rte_memcpy 1807 (rte_pktmbuf_mtod(next, void *), 1808 addr, seg_len); 1809 DATA_LEN(next) = seg_len; 1810 rem_len -= seg_len; 1811 prev = next; 1812 ++NB_SEGS(pkt); 1813 } 1814 } else { 1815 rte_pktmbuf_free_seg(pkt); 1816 ++rxq->stats.idropped; 1817 continue; 1818 } 1819 } else { 1820 rte_iova_t buf_iova; 1821 struct rte_mbuf_ext_shared_info *shinfo; 1822 uint16_t buf_len = strd_cnt * strd_sz; 1823 void *buf_addr; 1824 1825 /* Increment the refcnt of the whole chunk. */ 1826 __atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED); 1827 MLX5_ASSERT(__atomic_load_n(&buf->refcnt, 1828 __ATOMIC_RELAXED) <= strd_n + 1); 1829 buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM); 1830 /* 1831 * MLX5 device doesn't use iova but it is necessary in a 1832 * case where the Rx packet is transmitted via a 1833 * different PMD. 1834 */ 1835 buf_iova = rte_mempool_virt2iova(buf) + 1836 RTE_PTR_DIFF(buf_addr, buf); 1837 shinfo = &buf->shinfos[strd_idx]; 1838 rte_mbuf_ext_refcnt_set(shinfo, 1); 1839 /* 1840 * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when 1841 * attaching the stride to mbuf and more offload flags 1842 * will be added below by calling rxq_cq_to_mbuf(). 1843 * Other fields will be overwritten. 1844 */ 1845 rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, 1846 buf_len, shinfo); 1847 /* Set mbuf head-room. */ 1848 SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM); 1849 MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF); 1850 MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >= 1851 len - (hdrm_overlap > 0 ? hdrm_overlap : 0)); 1852 DATA_LEN(pkt) = len; 1853 /* 1854 * Copy the last fragment of a packet (up to headroom 1855 * size bytes) in case there is a stride overlap with 1856 * a next packet's headroom. Allocate a separate mbuf 1857 * to store this fragment and link it. Scatter is on. 1858 */ 1859 if (hdrm_overlap > 0) { 1860 MLX5_ASSERT(rxq->strd_scatter_en); 1861 struct rte_mbuf *seg = 1862 rte_pktmbuf_alloc(rxq->mp); 1863 1864 if (unlikely(seg == NULL)) { 1865 rte_pktmbuf_free_seg(pkt); 1866 ++rxq->stats.rx_nombuf; 1867 break; 1868 } 1869 SET_DATA_OFF(seg, 0); 1870 rte_memcpy(rte_pktmbuf_mtod(seg, void *), 1871 RTE_PTR_ADD(addr, len - hdrm_overlap), 1872 hdrm_overlap); 1873 DATA_LEN(seg) = hdrm_overlap; 1874 DATA_LEN(pkt) = len - hdrm_overlap; 1875 NEXT(pkt) = seg; 1876 NB_SEGS(pkt) = 2; 1877 } 1878 } 1879 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1880 if (cqe->lro_num_seg > 1) { 1881 mlx5_lro_update_hdr(addr, cqe, len); 1882 pkt->ol_flags |= PKT_RX_LRO; 1883 pkt->tso_segsz = len / cqe->lro_num_seg; 1884 } 1885 PKT_LEN(pkt) = len; 1886 PORT(pkt) = rxq->port_id; 1887 #ifdef MLX5_PMD_SOFT_COUNTERS 1888 /* Increment bytes counter. */ 1889 rxq->stats.ibytes += PKT_LEN(pkt); 1890 #endif 1891 /* Return packet. */ 1892 *(pkts++) = pkt; 1893 ++i; 1894 } 1895 out: 1896 /* Update the consumer indexes. */ 1897 rxq->consumed_strd = consumed_strd; 1898 rte_io_wmb(); 1899 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1900 if (rq_ci != rxq->rq_ci) { 1901 rxq->rq_ci = rq_ci; 1902 rte_io_wmb(); 1903 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1904 } 1905 #ifdef MLX5_PMD_SOFT_COUNTERS 1906 /* Increment packets counter. */ 1907 rxq->stats.ipackets += i; 1908 #endif 1909 return i; 1910 } 1911 1912 /** 1913 * Dummy DPDK callback for TX. 1914 * 1915 * This function is used to temporarily replace the real callback during 1916 * unsafe control operations on the queue, or in case of error. 1917 * 1918 * @param dpdk_txq 1919 * Generic pointer to TX queue structure. 1920 * @param[in] pkts 1921 * Packets to transmit. 1922 * @param pkts_n 1923 * Number of packets in array. 1924 * 1925 * @return 1926 * Number of packets successfully transmitted (<= pkts_n). 1927 */ 1928 uint16_t 1929 removed_tx_burst(void *dpdk_txq __rte_unused, 1930 struct rte_mbuf **pkts __rte_unused, 1931 uint16_t pkts_n __rte_unused) 1932 { 1933 rte_mb(); 1934 return 0; 1935 } 1936 1937 /** 1938 * Dummy DPDK callback for RX. 1939 * 1940 * This function is used to temporarily replace the real callback during 1941 * unsafe control operations on the queue, or in case of error. 1942 * 1943 * @param dpdk_rxq 1944 * Generic pointer to RX queue structure. 1945 * @param[out] pkts 1946 * Array to store received packets. 1947 * @param pkts_n 1948 * Maximum number of packets in array. 1949 * 1950 * @return 1951 * Number of packets successfully received (<= pkts_n). 1952 */ 1953 uint16_t 1954 removed_rx_burst(void *dpdk_txq __rte_unused, 1955 struct rte_mbuf **pkts __rte_unused, 1956 uint16_t pkts_n __rte_unused) 1957 { 1958 rte_mb(); 1959 return 0; 1960 } 1961 1962 /* 1963 * Vectorized Rx/Tx routines are not compiled in when required vector 1964 * instructions are not supported on a target architecture. The following null 1965 * stubs are needed for linkage when those are not included outside of this file 1966 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1967 */ 1968 1969 __rte_weak uint16_t 1970 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1971 struct rte_mbuf **pkts __rte_unused, 1972 uint16_t pkts_n __rte_unused) 1973 { 1974 return 0; 1975 } 1976 1977 __rte_weak int 1978 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1979 { 1980 return -ENOTSUP; 1981 } 1982 1983 __rte_weak int 1984 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1985 { 1986 return -ENOTSUP; 1987 } 1988 1989 /** 1990 * Free the mbufs from the linear array of pointers. 1991 * 1992 * @param pkts 1993 * Pointer to array of packets to be free. 1994 * @param pkts_n 1995 * Number of packets to be freed. 1996 * @param olx 1997 * Configured Tx offloads mask. It is fully defined at 1998 * compile time and may be used for optimization. 1999 */ 2000 static __rte_always_inline void 2001 mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, 2002 unsigned int pkts_n, 2003 unsigned int olx __rte_unused) 2004 { 2005 struct rte_mempool *pool = NULL; 2006 struct rte_mbuf **p_free = NULL; 2007 struct rte_mbuf *mbuf; 2008 unsigned int n_free = 0; 2009 2010 /* 2011 * The implemented algorithm eliminates 2012 * copying pointers to temporary array 2013 * for rte_mempool_put_bulk() calls. 2014 */ 2015 MLX5_ASSERT(pkts); 2016 MLX5_ASSERT(pkts_n); 2017 for (;;) { 2018 for (;;) { 2019 /* 2020 * Decrement mbuf reference counter, detach 2021 * indirect and external buffers if needed. 2022 */ 2023 mbuf = rte_pktmbuf_prefree_seg(*pkts); 2024 if (likely(mbuf != NULL)) { 2025 MLX5_ASSERT(mbuf == *pkts); 2026 if (likely(n_free != 0)) { 2027 if (unlikely(pool != mbuf->pool)) 2028 /* From different pool. */ 2029 break; 2030 } else { 2031 /* Start new scan array. */ 2032 pool = mbuf->pool; 2033 p_free = pkts; 2034 } 2035 ++n_free; 2036 ++pkts; 2037 --pkts_n; 2038 if (unlikely(pkts_n == 0)) { 2039 mbuf = NULL; 2040 break; 2041 } 2042 } else { 2043 /* 2044 * This happens if mbuf is still referenced. 2045 * We can't put it back to the pool, skip. 2046 */ 2047 ++pkts; 2048 --pkts_n; 2049 if (unlikely(n_free != 0)) 2050 /* There is some array to free.*/ 2051 break; 2052 if (unlikely(pkts_n == 0)) 2053 /* Last mbuf, nothing to free. */ 2054 return; 2055 } 2056 } 2057 for (;;) { 2058 /* 2059 * This loop is implemented to avoid multiple 2060 * inlining of rte_mempool_put_bulk(). 2061 */ 2062 MLX5_ASSERT(pool); 2063 MLX5_ASSERT(p_free); 2064 MLX5_ASSERT(n_free); 2065 /* 2066 * Free the array of pre-freed mbufs 2067 * belonging to the same memory pool. 2068 */ 2069 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 2070 if (unlikely(mbuf != NULL)) { 2071 /* There is the request to start new scan. */ 2072 pool = mbuf->pool; 2073 p_free = pkts++; 2074 n_free = 1; 2075 --pkts_n; 2076 if (likely(pkts_n != 0)) 2077 break; 2078 /* 2079 * This is the last mbuf to be freed. 2080 * Do one more loop iteration to complete. 2081 * This is rare case of the last unique mbuf. 2082 */ 2083 mbuf = NULL; 2084 continue; 2085 } 2086 if (likely(pkts_n == 0)) 2087 return; 2088 n_free = 0; 2089 break; 2090 } 2091 } 2092 } 2093 2094 /** 2095 * Free the mbuf from the elts ring buffer till new tail. 2096 * 2097 * @param txq 2098 * Pointer to Tx queue structure. 2099 * @param tail 2100 * Index in elts to free up to, becomes new elts tail. 2101 * @param olx 2102 * Configured Tx offloads mask. It is fully defined at 2103 * compile time and may be used for optimization. 2104 */ 2105 static __rte_always_inline void 2106 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 2107 uint16_t tail, 2108 unsigned int olx __rte_unused) 2109 { 2110 uint16_t n_elts = tail - txq->elts_tail; 2111 2112 MLX5_ASSERT(n_elts); 2113 MLX5_ASSERT(n_elts <= txq->elts_s); 2114 /* 2115 * Implement a loop to support ring buffer wraparound 2116 * with single inlining of mlx5_tx_free_mbuf(). 2117 */ 2118 do { 2119 unsigned int part; 2120 2121 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 2122 part = RTE_MIN(part, n_elts); 2123 MLX5_ASSERT(part); 2124 MLX5_ASSERT(part <= txq->elts_s); 2125 mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], 2126 part, olx); 2127 txq->elts_tail += part; 2128 n_elts -= part; 2129 } while (n_elts); 2130 } 2131 2132 /** 2133 * Store the mbuf being sent into elts ring buffer. 2134 * On Tx completion these mbufs will be freed. 2135 * 2136 * @param txq 2137 * Pointer to Tx queue structure. 2138 * @param pkts 2139 * Pointer to array of packets to be stored. 2140 * @param pkts_n 2141 * Number of packets to be stored. 2142 * @param olx 2143 * Configured Tx offloads mask. It is fully defined at 2144 * compile time and may be used for optimization. 2145 */ 2146 static __rte_always_inline void 2147 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 2148 struct rte_mbuf **__rte_restrict pkts, 2149 unsigned int pkts_n, 2150 unsigned int olx __rte_unused) 2151 { 2152 unsigned int part; 2153 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 2154 2155 MLX5_ASSERT(pkts); 2156 MLX5_ASSERT(pkts_n); 2157 part = txq->elts_s - (txq->elts_head & txq->elts_m); 2158 MLX5_ASSERT(part); 2159 MLX5_ASSERT(part <= txq->elts_s); 2160 /* This code is a good candidate for vectorizing with SIMD. */ 2161 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 2162 (void *)pkts, 2163 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 2164 txq->elts_head += pkts_n; 2165 if (unlikely(part < pkts_n)) 2166 /* The copy is wrapping around the elts array. */ 2167 rte_memcpy((void *)elts, (void *)(pkts + part), 2168 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2169 } 2170 2171 /** 2172 * Update completion queue consuming index via doorbell 2173 * and flush the completed data buffers. 2174 * 2175 * @param txq 2176 * Pointer to TX queue structure. 2177 * @param valid CQE pointer 2178 * if not NULL update txq->wqe_pi and flush the buffers 2179 * @param olx 2180 * Configured Tx offloads mask. It is fully defined at 2181 * compile time and may be used for optimization. 2182 */ 2183 static __rte_always_inline void 2184 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, 2185 volatile struct mlx5_cqe *last_cqe, 2186 unsigned int olx __rte_unused) 2187 { 2188 if (likely(last_cqe != NULL)) { 2189 uint16_t tail; 2190 2191 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 2192 tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; 2193 if (likely(tail != txq->elts_tail)) { 2194 mlx5_tx_free_elts(txq, tail, olx); 2195 MLX5_ASSERT(tail == txq->elts_tail); 2196 } 2197 } 2198 } 2199 2200 /** 2201 * Manage TX completions. This routine checks the CQ for 2202 * arrived CQEs, deduces the last accomplished WQE in SQ, 2203 * updates SQ producing index and frees all completed mbufs. 2204 * 2205 * @param txq 2206 * Pointer to TX queue structure. 2207 * @param olx 2208 * Configured Tx offloads mask. It is fully defined at 2209 * compile time and may be used for optimization. 2210 * 2211 * NOTE: not inlined intentionally, it makes tx_burst 2212 * routine smaller, simple and faster - from experiments. 2213 */ 2214 static void 2215 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 2216 unsigned int olx __rte_unused) 2217 { 2218 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2219 volatile struct mlx5_cqe *last_cqe = NULL; 2220 bool ring_doorbell = false; 2221 int ret; 2222 2223 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 2224 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 2225 do { 2226 volatile struct mlx5_cqe *cqe; 2227 2228 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2229 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2230 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2231 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2232 /* No new CQEs in completion queue. */ 2233 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 2234 break; 2235 } 2236 /* 2237 * Some error occurred, try to restart. 2238 * We have no barrier after WQE related Doorbell 2239 * written, make sure all writes are completed 2240 * here, before we might perform SQ reset. 2241 */ 2242 rte_wmb(); 2243 ret = mlx5_tx_error_cqe_handle 2244 (txq, (volatile struct mlx5_err_cqe *)cqe); 2245 if (unlikely(ret < 0)) { 2246 /* 2247 * Some error occurred on queue error 2248 * handling, we do not advance the index 2249 * here, allowing to retry on next call. 2250 */ 2251 return; 2252 } 2253 /* 2254 * We are going to fetch all entries with 2255 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. 2256 * The send queue is supposed to be empty. 2257 */ 2258 ring_doorbell = true; 2259 ++txq->cq_ci; 2260 txq->cq_pi = txq->cq_ci; 2261 last_cqe = NULL; 2262 continue; 2263 } 2264 /* Normal transmit completion. */ 2265 MLX5_ASSERT(txq->cq_ci != txq->cq_pi); 2266 MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == 2267 cqe->wqe_counter); 2268 ring_doorbell = true; 2269 ++txq->cq_ci; 2270 last_cqe = cqe; 2271 /* 2272 * We have to restrict the amount of processed CQEs 2273 * in one tx_burst routine call. The CQ may be large 2274 * and many CQEs may be updated by the NIC in one 2275 * transaction. Buffers freeing is time consuming, 2276 * multiple iterations may introduce significant 2277 * latency. 2278 */ 2279 if (likely(--count == 0)) 2280 break; 2281 } while (true); 2282 if (likely(ring_doorbell)) { 2283 /* Ring doorbell to notify hardware. */ 2284 rte_compiler_barrier(); 2285 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2286 mlx5_tx_comp_flush(txq, last_cqe, olx); 2287 } 2288 } 2289 2290 /** 2291 * Check if the completion request flag should be set in the last WQE. 2292 * Both pushed mbufs and WQEs are monitored and the completion request 2293 * flag is set if any of thresholds is reached. 2294 * 2295 * @param txq 2296 * Pointer to TX queue structure. 2297 * @param loc 2298 * Pointer to burst routine local context. 2299 * @param olx 2300 * Configured Tx offloads mask. It is fully defined at 2301 * compile time and may be used for optimization. 2302 */ 2303 static __rte_always_inline void 2304 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 2305 struct mlx5_txq_local *__rte_restrict loc, 2306 unsigned int olx) 2307 { 2308 uint16_t head = txq->elts_head; 2309 unsigned int part; 2310 2311 part = MLX5_TXOFF_CONFIG(INLINE) ? 2312 0 : loc->pkts_sent - loc->pkts_copy; 2313 head += part; 2314 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2315 (MLX5_TXOFF_CONFIG(INLINE) && 2316 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2317 volatile struct mlx5_wqe *last = loc->wqe_last; 2318 2319 MLX5_ASSERT(last); 2320 txq->elts_comp = head; 2321 if (MLX5_TXOFF_CONFIG(INLINE)) 2322 txq->wqe_comp = txq->wqe_ci; 2323 /* Request unconditional completion on last WQE. */ 2324 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2325 MLX5_COMP_MODE_OFFSET); 2326 /* Save elts_head in dedicated free on completion queue. */ 2327 #ifdef RTE_LIBRTE_MLX5_DEBUG 2328 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 2329 (last->cseg.opcode >> 8) << 16; 2330 #else 2331 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 2332 #endif 2333 /* A CQE slot must always be available. */ 2334 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 2335 } 2336 } 2337 2338 /** 2339 * DPDK callback to check the status of a tx descriptor. 2340 * 2341 * @param tx_queue 2342 * The tx queue. 2343 * @param[in] offset 2344 * The index of the descriptor in the ring. 2345 * 2346 * @return 2347 * The status of the tx descriptor. 2348 */ 2349 int 2350 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2351 { 2352 struct mlx5_txq_data *__rte_restrict txq = tx_queue; 2353 uint16_t used; 2354 2355 mlx5_tx_handle_completion(txq, 0); 2356 used = txq->elts_head - txq->elts_tail; 2357 if (offset < used) 2358 return RTE_ETH_TX_DESC_FULL; 2359 return RTE_ETH_TX_DESC_DONE; 2360 } 2361 2362 /** 2363 * Build the Control Segment with specified opcode: 2364 * - MLX5_OPCODE_SEND 2365 * - MLX5_OPCODE_ENHANCED_MPSW 2366 * - MLX5_OPCODE_TSO 2367 * 2368 * @param txq 2369 * Pointer to TX queue structure. 2370 * @param loc 2371 * Pointer to burst routine local context. 2372 * @param wqe 2373 * Pointer to WQE to fill with built Control Segment. 2374 * @param ds 2375 * Supposed length of WQE in segments. 2376 * @param opcode 2377 * SQ WQE opcode to put into Control Segment. 2378 * @param olx 2379 * Configured Tx offloads mask. It is fully defined at 2380 * compile time and may be used for optimization. 2381 */ 2382 static __rte_always_inline void 2383 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 2384 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2385 struct mlx5_wqe *__rte_restrict wqe, 2386 unsigned int ds, 2387 unsigned int opcode, 2388 unsigned int olx __rte_unused) 2389 { 2390 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 2391 2392 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2393 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2394 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2395 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2396 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2397 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2398 MLX5_COMP_MODE_OFFSET); 2399 cs->misc = RTE_BE32(0); 2400 } 2401 2402 /** 2403 * Build the Synchronize Queue Segment with specified completion index. 2404 * 2405 * @param txq 2406 * Pointer to TX queue structure. 2407 * @param loc 2408 * Pointer to burst routine local context. 2409 * @param wqe 2410 * Pointer to WQE to fill with built Control Segment. 2411 * @param wci 2412 * Completion index in Clock Queue to wait. 2413 * @param olx 2414 * Configured Tx offloads mask. It is fully defined at 2415 * compile time and may be used for optimization. 2416 */ 2417 static __rte_always_inline void 2418 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 2419 struct mlx5_txq_local *restrict loc __rte_unused, 2420 struct mlx5_wqe *restrict wqe, 2421 unsigned int wci, 2422 unsigned int olx __rte_unused) 2423 { 2424 struct mlx5_wqe_qseg *qs; 2425 2426 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 2427 qs->max_index = rte_cpu_to_be_32(wci); 2428 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq->id); 2429 qs->reserved0 = RTE_BE32(0); 2430 qs->reserved1 = RTE_BE32(0); 2431 } 2432 2433 /** 2434 * Build the Ethernet Segment without inlined data. 2435 * Supports Software Parser, Checksums and VLAN 2436 * insertion Tx offload features. 2437 * 2438 * @param txq 2439 * Pointer to TX queue structure. 2440 * @param loc 2441 * Pointer to burst routine local context. 2442 * @param wqe 2443 * Pointer to WQE to fill with built Ethernet Segment. 2444 * @param olx 2445 * Configured Tx offloads mask. It is fully defined at 2446 * compile time and may be used for optimization. 2447 */ 2448 static __rte_always_inline void 2449 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2450 struct mlx5_txq_local *__rte_restrict loc, 2451 struct mlx5_wqe *__rte_restrict wqe, 2452 unsigned int olx) 2453 { 2454 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2455 uint32_t csum; 2456 2457 /* 2458 * Calculate and set check sum flags first, dword field 2459 * in segment may be shared with Software Parser flags. 2460 */ 2461 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2462 es->flags = rte_cpu_to_le_32(csum); 2463 /* 2464 * Calculate and set Software Parser offsets and flags. 2465 * These flags a set for custom UDP and IP tunnel packets. 2466 */ 2467 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2468 /* Fill metadata field if needed. */ 2469 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2470 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2471 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2472 /* Engage VLAN tag insertion feature if requested. */ 2473 if (MLX5_TXOFF_CONFIG(VLAN) && 2474 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2475 /* 2476 * We should get here only if device support 2477 * this feature correctly. 2478 */ 2479 MLX5_ASSERT(txq->vlan_en); 2480 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2481 loc->mbuf->vlan_tci); 2482 } else { 2483 es->inline_hdr = RTE_BE32(0); 2484 } 2485 } 2486 2487 /** 2488 * Build the Ethernet Segment with minimal inlined data 2489 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2490 * used to fill the gap in single WQEBB WQEs. 2491 * Supports Software Parser, Checksums and VLAN 2492 * insertion Tx offload features. 2493 * 2494 * @param txq 2495 * Pointer to TX queue structure. 2496 * @param loc 2497 * Pointer to burst routine local context. 2498 * @param wqe 2499 * Pointer to WQE to fill with built Ethernet Segment. 2500 * @param vlan 2501 * Length of VLAN tag insertion if any. 2502 * @param olx 2503 * Configured Tx offloads mask. It is fully defined at 2504 * compile time and may be used for optimization. 2505 */ 2506 static __rte_always_inline void 2507 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2508 struct mlx5_txq_local *__rte_restrict loc, 2509 struct mlx5_wqe *__rte_restrict wqe, 2510 unsigned int vlan, 2511 unsigned int olx) 2512 { 2513 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2514 uint32_t csum; 2515 uint8_t *psrc, *pdst; 2516 2517 /* 2518 * Calculate and set check sum flags first, dword field 2519 * in segment may be shared with Software Parser flags. 2520 */ 2521 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2522 es->flags = rte_cpu_to_le_32(csum); 2523 /* 2524 * Calculate and set Software Parser offsets and flags. 2525 * These flags a set for custom UDP and IP tunnel packets. 2526 */ 2527 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2528 /* Fill metadata field if needed. */ 2529 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2530 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2531 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2532 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2533 (sizeof(uint16_t) + 2534 sizeof(rte_v128u32_t)), 2535 "invalid Ethernet Segment data size"); 2536 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2537 (sizeof(uint16_t) + 2538 sizeof(struct rte_vlan_hdr) + 2539 2 * RTE_ETHER_ADDR_LEN), 2540 "invalid Ethernet Segment data size"); 2541 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2542 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2543 es->inline_data = *(unaligned_uint16_t *)psrc; 2544 psrc += sizeof(uint16_t); 2545 pdst = (uint8_t *)(es + 1); 2546 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2547 /* Implement VLAN tag insertion as part inline data. */ 2548 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2549 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2550 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2551 /* Insert VLAN ethertype + VLAN tag. */ 2552 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2553 ((RTE_ETHER_TYPE_VLAN << 16) | 2554 loc->mbuf->vlan_tci); 2555 pdst += sizeof(struct rte_vlan_hdr); 2556 /* Copy the rest two bytes from packet data. */ 2557 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2558 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2559 } else { 2560 /* Fill the gap in the title WQEBB with inline data. */ 2561 rte_mov16(pdst, psrc); 2562 } 2563 } 2564 2565 /** 2566 * Build the Ethernet Segment with entire packet 2567 * data inlining. Checks the boundary of WQEBB and 2568 * ring buffer wrapping, supports Software Parser, 2569 * Checksums and VLAN insertion Tx offload features. 2570 * 2571 * @param txq 2572 * Pointer to TX queue structure. 2573 * @param loc 2574 * Pointer to burst routine local context. 2575 * @param wqe 2576 * Pointer to WQE to fill with built Ethernet Segment. 2577 * @param vlan 2578 * Length of VLAN tag insertion if any. 2579 * @param inlen 2580 * Length of data to inline (VLAN included, if any). 2581 * @param tso 2582 * TSO flag, set mss field from the packet. 2583 * @param olx 2584 * Configured Tx offloads mask. It is fully defined at 2585 * compile time and may be used for optimization. 2586 * 2587 * @return 2588 * Pointer to the next Data Segment (aligned and wrapped around). 2589 */ 2590 static __rte_always_inline struct mlx5_wqe_dseg * 2591 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 2592 struct mlx5_txq_local *__rte_restrict loc, 2593 struct mlx5_wqe *__rte_restrict wqe, 2594 unsigned int vlan, 2595 unsigned int inlen, 2596 unsigned int tso, 2597 unsigned int olx) 2598 { 2599 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2600 uint32_t csum; 2601 uint8_t *psrc, *pdst; 2602 unsigned int part; 2603 2604 /* 2605 * Calculate and set check sum flags first, dword field 2606 * in segment may be shared with Software Parser flags. 2607 */ 2608 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2609 if (tso) { 2610 csum <<= 24; 2611 csum |= loc->mbuf->tso_segsz; 2612 es->flags = rte_cpu_to_be_32(csum); 2613 } else { 2614 es->flags = rte_cpu_to_le_32(csum); 2615 } 2616 /* 2617 * Calculate and set Software Parser offsets and flags. 2618 * These flags a set for custom UDP and IP tunnel packets. 2619 */ 2620 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2621 /* Fill metadata field if needed. */ 2622 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2623 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2624 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2625 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2626 (sizeof(uint16_t) + 2627 sizeof(rte_v128u32_t)), 2628 "invalid Ethernet Segment data size"); 2629 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2630 (sizeof(uint16_t) + 2631 sizeof(struct rte_vlan_hdr) + 2632 2 * RTE_ETHER_ADDR_LEN), 2633 "invalid Ethernet Segment data size"); 2634 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2635 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2636 es->inline_data = *(unaligned_uint16_t *)psrc; 2637 psrc += sizeof(uint16_t); 2638 pdst = (uint8_t *)(es + 1); 2639 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2640 /* Implement VLAN tag insertion as part inline data. */ 2641 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2642 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2643 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2644 /* Insert VLAN ethertype + VLAN tag. */ 2645 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2646 ((RTE_ETHER_TYPE_VLAN << 16) | 2647 loc->mbuf->vlan_tci); 2648 pdst += sizeof(struct rte_vlan_hdr); 2649 /* Copy the rest two bytes from packet data. */ 2650 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2651 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2652 psrc += sizeof(uint16_t); 2653 } else { 2654 /* Fill the gap in the title WQEBB with inline data. */ 2655 rte_mov16(pdst, psrc); 2656 psrc += sizeof(rte_v128u32_t); 2657 } 2658 pdst = (uint8_t *)(es + 2); 2659 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2660 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2661 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2662 if (!inlen) { 2663 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2664 return (struct mlx5_wqe_dseg *)pdst; 2665 } 2666 /* 2667 * The WQEBB space availability is checked by caller. 2668 * Here we should be aware of WQE ring buffer wraparound only. 2669 */ 2670 part = (uint8_t *)txq->wqes_end - pdst; 2671 part = RTE_MIN(part, inlen); 2672 do { 2673 rte_memcpy(pdst, psrc, part); 2674 inlen -= part; 2675 if (likely(!inlen)) { 2676 /* 2677 * If return value is not used by the caller 2678 * the code below will be optimized out. 2679 */ 2680 pdst += part; 2681 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2682 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2683 pdst = (uint8_t *)txq->wqes; 2684 return (struct mlx5_wqe_dseg *)pdst; 2685 } 2686 pdst = (uint8_t *)txq->wqes; 2687 psrc += part; 2688 part = inlen; 2689 } while (true); 2690 } 2691 2692 /** 2693 * Copy data from chain of mbuf to the specified linear buffer. 2694 * Checksums and VLAN insertion Tx offload features. If data 2695 * from some mbuf copied completely this mbuf is freed. Local 2696 * structure is used to keep the byte stream state. 2697 * 2698 * @param pdst 2699 * Pointer to the destination linear buffer. 2700 * @param loc 2701 * Pointer to burst routine local context. 2702 * @param len 2703 * Length of data to be copied. 2704 * @param must 2705 * Length of data to be copied ignoring no inline hint. 2706 * @param olx 2707 * Configured Tx offloads mask. It is fully defined at 2708 * compile time and may be used for optimization. 2709 * 2710 * @return 2711 * Number of actual copied data bytes. This is always greater than or 2712 * equal to must parameter and might be lesser than len in no inline 2713 * hint flag is encountered. 2714 */ 2715 static __rte_always_inline unsigned int 2716 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2717 struct mlx5_txq_local *__rte_restrict loc, 2718 unsigned int len, 2719 unsigned int must, 2720 unsigned int olx __rte_unused) 2721 { 2722 struct rte_mbuf *mbuf; 2723 unsigned int part, dlen, copy = 0; 2724 uint8_t *psrc; 2725 2726 MLX5_ASSERT(len); 2727 MLX5_ASSERT(must <= len); 2728 do { 2729 /* Allow zero length packets, must check first. */ 2730 dlen = rte_pktmbuf_data_len(loc->mbuf); 2731 if (dlen <= loc->mbuf_off) { 2732 /* Exhausted packet, just free. */ 2733 mbuf = loc->mbuf; 2734 loc->mbuf = mbuf->next; 2735 rte_pktmbuf_free_seg(mbuf); 2736 loc->mbuf_off = 0; 2737 MLX5_ASSERT(loc->mbuf_nseg > 1); 2738 MLX5_ASSERT(loc->mbuf); 2739 --loc->mbuf_nseg; 2740 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2741 unsigned int diff; 2742 2743 if (copy >= must) { 2744 /* 2745 * We already copied the minimal 2746 * requested amount of data. 2747 */ 2748 return copy; 2749 } 2750 diff = must - copy; 2751 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 2752 /* 2753 * Copy only the minimal required 2754 * part of the data buffer. 2755 */ 2756 len = diff; 2757 } 2758 } 2759 continue; 2760 } 2761 dlen -= loc->mbuf_off; 2762 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2763 loc->mbuf_off); 2764 part = RTE_MIN(len, dlen); 2765 rte_memcpy(pdst, psrc, part); 2766 copy += part; 2767 loc->mbuf_off += part; 2768 len -= part; 2769 if (!len) { 2770 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2771 loc->mbuf_off = 0; 2772 /* Exhausted packet, just free. */ 2773 mbuf = loc->mbuf; 2774 loc->mbuf = mbuf->next; 2775 rte_pktmbuf_free_seg(mbuf); 2776 loc->mbuf_off = 0; 2777 MLX5_ASSERT(loc->mbuf_nseg >= 1); 2778 --loc->mbuf_nseg; 2779 } 2780 return copy; 2781 } 2782 pdst += part; 2783 } while (true); 2784 } 2785 2786 /** 2787 * Build the Ethernet Segment with inlined data from 2788 * multi-segment packet. Checks the boundary of WQEBB 2789 * and ring buffer wrapping, supports Software Parser, 2790 * Checksums and VLAN insertion Tx offload features. 2791 * 2792 * @param txq 2793 * Pointer to TX queue structure. 2794 * @param loc 2795 * Pointer to burst routine local context. 2796 * @param wqe 2797 * Pointer to WQE to fill with built Ethernet Segment. 2798 * @param vlan 2799 * Length of VLAN tag insertion if any. 2800 * @param inlen 2801 * Length of data to inline (VLAN included, if any). 2802 * @param tso 2803 * TSO flag, set mss field from the packet. 2804 * @param olx 2805 * Configured Tx offloads mask. It is fully defined at 2806 * compile time and may be used for optimization. 2807 * 2808 * @return 2809 * Pointer to the next Data Segment (aligned and 2810 * possible NOT wrapped around - caller should do 2811 * wrapping check on its own). 2812 */ 2813 static __rte_always_inline struct mlx5_wqe_dseg * 2814 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 2815 struct mlx5_txq_local *__rte_restrict loc, 2816 struct mlx5_wqe *__rte_restrict wqe, 2817 unsigned int vlan, 2818 unsigned int inlen, 2819 unsigned int tso, 2820 unsigned int olx) 2821 { 2822 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2823 uint32_t csum; 2824 uint8_t *pdst; 2825 unsigned int part, tlen = 0; 2826 2827 /* 2828 * Calculate and set check sum flags first, uint32_t field 2829 * in segment may be shared with Software Parser flags. 2830 */ 2831 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2832 if (tso) { 2833 csum <<= 24; 2834 csum |= loc->mbuf->tso_segsz; 2835 es->flags = rte_cpu_to_be_32(csum); 2836 } else { 2837 es->flags = rte_cpu_to_le_32(csum); 2838 } 2839 /* 2840 * Calculate and set Software Parser offsets and flags. 2841 * These flags a set for custom UDP and IP tunnel packets. 2842 */ 2843 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2844 /* Fill metadata field if needed. */ 2845 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2846 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2847 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2848 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2849 (sizeof(uint16_t) + 2850 sizeof(rte_v128u32_t)), 2851 "invalid Ethernet Segment data size"); 2852 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2853 (sizeof(uint16_t) + 2854 sizeof(struct rte_vlan_hdr) + 2855 2 * RTE_ETHER_ADDR_LEN), 2856 "invalid Ethernet Segment data size"); 2857 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2858 pdst = (uint8_t *)&es->inline_data; 2859 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2860 /* Implement VLAN tag insertion as part inline data. */ 2861 mlx5_tx_mseg_memcpy(pdst, loc, 2862 2 * RTE_ETHER_ADDR_LEN, 2863 2 * RTE_ETHER_ADDR_LEN, olx); 2864 pdst += 2 * RTE_ETHER_ADDR_LEN; 2865 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2866 ((RTE_ETHER_TYPE_VLAN << 16) | 2867 loc->mbuf->vlan_tci); 2868 pdst += sizeof(struct rte_vlan_hdr); 2869 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2870 } 2871 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2872 /* 2873 * The WQEBB space availability is checked by caller. 2874 * Here we should be aware of WQE ring buffer wraparound only. 2875 */ 2876 part = (uint8_t *)txq->wqes_end - pdst; 2877 part = RTE_MIN(part, inlen - tlen); 2878 MLX5_ASSERT(part); 2879 do { 2880 unsigned int copy; 2881 2882 /* 2883 * Copying may be interrupted inside the routine 2884 * if run into no inline hint flag. 2885 */ 2886 copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); 2887 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 2888 tlen += copy; 2889 if (likely(inlen <= tlen) || copy < part) { 2890 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 2891 pdst += copy; 2892 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2893 return (struct mlx5_wqe_dseg *)pdst; 2894 } 2895 pdst = (uint8_t *)txq->wqes; 2896 part = inlen - tlen; 2897 } while (true); 2898 } 2899 2900 /** 2901 * Build the Data Segment of pointer type. 2902 * 2903 * @param txq 2904 * Pointer to TX queue structure. 2905 * @param loc 2906 * Pointer to burst routine local context. 2907 * @param dseg 2908 * Pointer to WQE to fill with built Data Segment. 2909 * @param buf 2910 * Data buffer to point. 2911 * @param len 2912 * Data buffer length. 2913 * @param olx 2914 * Configured Tx offloads mask. It is fully defined at 2915 * compile time and may be used for optimization. 2916 */ 2917 static __rte_always_inline void 2918 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 2919 struct mlx5_txq_local *__rte_restrict loc, 2920 struct mlx5_wqe_dseg *__rte_restrict dseg, 2921 uint8_t *buf, 2922 unsigned int len, 2923 unsigned int olx __rte_unused) 2924 2925 { 2926 MLX5_ASSERT(len); 2927 dseg->bcount = rte_cpu_to_be_32(len); 2928 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2929 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2930 } 2931 2932 /** 2933 * Build the Data Segment of pointer type or inline 2934 * if data length is less than buffer in minimal 2935 * Data Segment size. 2936 * 2937 * @param txq 2938 * Pointer to TX queue structure. 2939 * @param loc 2940 * Pointer to burst routine local context. 2941 * @param dseg 2942 * Pointer to WQE to fill with built Data Segment. 2943 * @param buf 2944 * Data buffer to point. 2945 * @param len 2946 * Data buffer length. 2947 * @param olx 2948 * Configured Tx offloads mask. It is fully defined at 2949 * compile time and may be used for optimization. 2950 */ 2951 static __rte_always_inline void 2952 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 2953 struct mlx5_txq_local *__rte_restrict loc, 2954 struct mlx5_wqe_dseg *__rte_restrict dseg, 2955 uint8_t *buf, 2956 unsigned int len, 2957 unsigned int olx __rte_unused) 2958 2959 { 2960 uintptr_t dst, src; 2961 2962 MLX5_ASSERT(len); 2963 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2964 dseg->bcount = rte_cpu_to_be_32(len); 2965 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2966 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2967 2968 return; 2969 } 2970 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2971 /* Unrolled implementation of generic rte_memcpy. */ 2972 dst = (uintptr_t)&dseg->inline_data[0]; 2973 src = (uintptr_t)buf; 2974 if (len & 0x08) { 2975 #ifdef RTE_ARCH_STRICT_ALIGN 2976 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2977 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2978 dst += sizeof(uint32_t); 2979 src += sizeof(uint32_t); 2980 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2981 dst += sizeof(uint32_t); 2982 src += sizeof(uint32_t); 2983 #else 2984 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2985 dst += sizeof(uint64_t); 2986 src += sizeof(uint64_t); 2987 #endif 2988 } 2989 if (len & 0x04) { 2990 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2991 dst += sizeof(uint32_t); 2992 src += sizeof(uint32_t); 2993 } 2994 if (len & 0x02) { 2995 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2996 dst += sizeof(uint16_t); 2997 src += sizeof(uint16_t); 2998 } 2999 if (len & 0x01) 3000 *(uint8_t *)dst = *(uint8_t *)src; 3001 } 3002 3003 /** 3004 * Build the Data Segment of inlined data from single 3005 * segment packet, no VLAN insertion. 3006 * 3007 * @param txq 3008 * Pointer to TX queue structure. 3009 * @param loc 3010 * Pointer to burst routine local context. 3011 * @param dseg 3012 * Pointer to WQE to fill with built Data Segment. 3013 * @param buf 3014 * Data buffer to point. 3015 * @param len 3016 * Data buffer length. 3017 * @param olx 3018 * Configured Tx offloads mask. It is fully defined at 3019 * compile time and may be used for optimization. 3020 * 3021 * @return 3022 * Pointer to the next Data Segment after inlined data. 3023 * Ring buffer wraparound check is needed. We do not 3024 * do it here because it may not be needed for the 3025 * last packet in the eMPW session. 3026 */ 3027 static __rte_always_inline struct mlx5_wqe_dseg * 3028 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 3029 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 3030 struct mlx5_wqe_dseg *__rte_restrict dseg, 3031 uint8_t *buf, 3032 unsigned int len, 3033 unsigned int olx __rte_unused) 3034 { 3035 unsigned int part; 3036 uint8_t *pdst; 3037 3038 if (!MLX5_TXOFF_CONFIG(MPW)) { 3039 /* Store the descriptor byte counter for eMPW sessions. */ 3040 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 3041 pdst = &dseg->inline_data[0]; 3042 } else { 3043 /* The entire legacy MPW session counter is stored on close. */ 3044 pdst = (uint8_t *)dseg; 3045 } 3046 /* 3047 * The WQEBB space availability is checked by caller. 3048 * Here we should be aware of WQE ring buffer wraparound only. 3049 */ 3050 part = (uint8_t *)txq->wqes_end - pdst; 3051 part = RTE_MIN(part, len); 3052 do { 3053 rte_memcpy(pdst, buf, part); 3054 len -= part; 3055 if (likely(!len)) { 3056 pdst += part; 3057 if (!MLX5_TXOFF_CONFIG(MPW)) 3058 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 3059 /* Note: no final wraparound check here. */ 3060 return (struct mlx5_wqe_dseg *)pdst; 3061 } 3062 pdst = (uint8_t *)txq->wqes; 3063 buf += part; 3064 part = len; 3065 } while (true); 3066 } 3067 3068 /** 3069 * Build the Data Segment of inlined data from single 3070 * segment packet with VLAN insertion. 3071 * 3072 * @param txq 3073 * Pointer to TX queue structure. 3074 * @param loc 3075 * Pointer to burst routine local context. 3076 * @param dseg 3077 * Pointer to the dseg fill with built Data Segment. 3078 * @param buf 3079 * Data buffer to point. 3080 * @param len 3081 * Data buffer length. 3082 * @param olx 3083 * Configured Tx offloads mask. It is fully defined at 3084 * compile time and may be used for optimization. 3085 * 3086 * @return 3087 * Pointer to the next Data Segment after inlined data. 3088 * Ring buffer wraparound check is needed. 3089 */ 3090 static __rte_always_inline struct mlx5_wqe_dseg * 3091 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 3092 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 3093 struct mlx5_wqe_dseg *__rte_restrict dseg, 3094 uint8_t *buf, 3095 unsigned int len, 3096 unsigned int olx __rte_unused) 3097 3098 { 3099 unsigned int part; 3100 uint8_t *pdst; 3101 3102 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 3103 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 3104 (2 * RTE_ETHER_ADDR_LEN), 3105 "invalid Data Segment data size"); 3106 if (!MLX5_TXOFF_CONFIG(MPW)) { 3107 /* Store the descriptor byte counter for eMPW sessions. */ 3108 dseg->bcount = rte_cpu_to_be_32 3109 ((len + sizeof(struct rte_vlan_hdr)) | 3110 MLX5_ETH_WQE_DATA_INLINE); 3111 pdst = &dseg->inline_data[0]; 3112 } else { 3113 /* The entire legacy MPW session counter is stored on close. */ 3114 pdst = (uint8_t *)dseg; 3115 } 3116 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 3117 buf += MLX5_DSEG_MIN_INLINE_SIZE; 3118 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 3119 len -= MLX5_DSEG_MIN_INLINE_SIZE; 3120 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 3121 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 3122 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 3123 pdst = (uint8_t *)txq->wqes; 3124 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 3125 loc->mbuf->vlan_tci); 3126 pdst += sizeof(struct rte_vlan_hdr); 3127 /* 3128 * The WQEBB space availability is checked by caller. 3129 * Here we should be aware of WQE ring buffer wraparound only. 3130 */ 3131 part = (uint8_t *)txq->wqes_end - pdst; 3132 part = RTE_MIN(part, len); 3133 do { 3134 rte_memcpy(pdst, buf, part); 3135 len -= part; 3136 if (likely(!len)) { 3137 pdst += part; 3138 if (!MLX5_TXOFF_CONFIG(MPW)) 3139 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 3140 /* Note: no final wraparound check here. */ 3141 return (struct mlx5_wqe_dseg *)pdst; 3142 } 3143 pdst = (uint8_t *)txq->wqes; 3144 buf += part; 3145 part = len; 3146 } while (true); 3147 } 3148 3149 /** 3150 * Build the Ethernet Segment with optionally inlined data with 3151 * VLAN insertion and following Data Segments (if any) from 3152 * multi-segment packet. Used by ordinary send and TSO. 3153 * 3154 * @param txq 3155 * Pointer to TX queue structure. 3156 * @param loc 3157 * Pointer to burst routine local context. 3158 * @param wqe 3159 * Pointer to WQE to fill with built Ethernet/Data Segments. 3160 * @param vlan 3161 * Length of VLAN header to insert, 0 means no VLAN insertion. 3162 * @param inlen 3163 * Data length to inline. For TSO this parameter specifies 3164 * exact value, for ordinary send routine can be aligned by 3165 * caller to provide better WQE space saving and data buffer 3166 * start address alignment. This length includes VLAN header 3167 * being inserted. 3168 * @param tso 3169 * Zero means ordinary send, inlined data can be extended, 3170 * otherwise this is TSO, inlined data length is fixed. 3171 * @param olx 3172 * Configured Tx offloads mask. It is fully defined at 3173 * compile time and may be used for optimization. 3174 * 3175 * @return 3176 * Actual size of built WQE in segments. 3177 */ 3178 static __rte_always_inline unsigned int 3179 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 3180 struct mlx5_txq_local *__rte_restrict loc, 3181 struct mlx5_wqe *__rte_restrict wqe, 3182 unsigned int vlan, 3183 unsigned int inlen, 3184 unsigned int tso, 3185 unsigned int olx __rte_unused) 3186 { 3187 struct mlx5_wqe_dseg *__rte_restrict dseg; 3188 unsigned int ds; 3189 3190 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 3191 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 3192 loc->mbuf_off = 0; 3193 3194 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 3195 if (!loc->mbuf_nseg) 3196 goto dseg_done; 3197 /* 3198 * There are still some mbuf remaining, not inlined. 3199 * The first mbuf may be partially inlined and we 3200 * must process the possible non-zero data offset. 3201 */ 3202 if (loc->mbuf_off) { 3203 unsigned int dlen; 3204 uint8_t *dptr; 3205 3206 /* 3207 * Exhausted packets must be dropped before. 3208 * Non-zero offset means there are some data 3209 * remained in the packet. 3210 */ 3211 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 3212 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 3213 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 3214 loc->mbuf_off); 3215 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 3216 /* 3217 * Build the pointer/minimal data Data Segment. 3218 * Do ring buffer wrapping check in advance. 3219 */ 3220 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3221 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3222 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 3223 /* Store the mbuf to be freed on completion. */ 3224 MLX5_ASSERT(loc->elts_free); 3225 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3226 --loc->elts_free; 3227 ++dseg; 3228 if (--loc->mbuf_nseg == 0) 3229 goto dseg_done; 3230 loc->mbuf = loc->mbuf->next; 3231 loc->mbuf_off = 0; 3232 } 3233 do { 3234 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3235 struct rte_mbuf *mbuf; 3236 3237 /* Zero length segment found, just skip. */ 3238 mbuf = loc->mbuf; 3239 loc->mbuf = loc->mbuf->next; 3240 rte_pktmbuf_free_seg(mbuf); 3241 if (--loc->mbuf_nseg == 0) 3242 break; 3243 } else { 3244 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3245 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3246 mlx5_tx_dseg_iptr 3247 (txq, loc, dseg, 3248 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3249 rte_pktmbuf_data_len(loc->mbuf), olx); 3250 MLX5_ASSERT(loc->elts_free); 3251 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3252 --loc->elts_free; 3253 ++dseg; 3254 if (--loc->mbuf_nseg == 0) 3255 break; 3256 loc->mbuf = loc->mbuf->next; 3257 } 3258 } while (true); 3259 3260 dseg_done: 3261 /* Calculate actual segments used from the dseg pointer. */ 3262 if ((uintptr_t)wqe < (uintptr_t)dseg) 3263 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3264 else 3265 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3266 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3267 return ds; 3268 } 3269 3270 /** 3271 * The routine checks timestamp flag in the current packet, 3272 * and push WAIT WQE into the queue if scheduling is required. 3273 * 3274 * @param txq 3275 * Pointer to TX queue structure. 3276 * @param loc 3277 * Pointer to burst routine local context. 3278 * @param olx 3279 * Configured Tx offloads mask. It is fully defined at 3280 * compile time and may be used for optimization. 3281 * 3282 * @return 3283 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3284 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 3285 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 3286 * Local context variables partially updated. 3287 */ 3288 static __rte_always_inline enum mlx5_txcmp_code 3289 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 3290 struct mlx5_txq_local *restrict loc, 3291 unsigned int olx) 3292 { 3293 if (MLX5_TXOFF_CONFIG(TXPP) && 3294 loc->mbuf->ol_flags & txq->ts_mask) { 3295 struct mlx5_wqe *wqe; 3296 uint64_t ts; 3297 int32_t wci; 3298 3299 /* 3300 * Estimate the required space quickly and roughly. 3301 * We would like to ensure the packet can be pushed 3302 * to the queue and we won't get the orphan WAIT WQE. 3303 */ 3304 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 3305 loc->elts_free < NB_SEGS(loc->mbuf)) 3306 return MLX5_TXCMP_CODE_EXIT; 3307 /* Convert the timestamp into completion to wait. */ 3308 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 3309 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 3310 if (unlikely(wci < 0)) 3311 return MLX5_TXCMP_CODE_SINGLE; 3312 /* Build the WAIT WQE with specified completion. */ 3313 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3314 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 3315 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 3316 ++txq->wqe_ci; 3317 --loc->wqe_free; 3318 return MLX5_TXCMP_CODE_MULTI; 3319 } 3320 return MLX5_TXCMP_CODE_SINGLE; 3321 } 3322 3323 /** 3324 * Tx one packet function for multi-segment TSO. Supports all 3325 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3326 * sends one packet per WQE. 3327 * 3328 * This routine is responsible for storing processed mbuf 3329 * into elts ring buffer and update elts_head. 3330 * 3331 * @param txq 3332 * Pointer to TX queue structure. 3333 * @param loc 3334 * Pointer to burst routine local context. 3335 * @param olx 3336 * Configured Tx offloads mask. It is fully defined at 3337 * compile time and may be used for optimization. 3338 * 3339 * @return 3340 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3341 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3342 * Local context variables partially updated. 3343 */ 3344 static __rte_always_inline enum mlx5_txcmp_code 3345 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 3346 struct mlx5_txq_local *__rte_restrict loc, 3347 unsigned int olx) 3348 { 3349 struct mlx5_wqe *__rte_restrict wqe; 3350 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3351 3352 if (MLX5_TXOFF_CONFIG(TXPP)) { 3353 enum mlx5_txcmp_code wret; 3354 3355 /* Generate WAIT for scheduling if requested. */ 3356 wret = mlx5_tx_schedule_send(txq, loc, olx); 3357 if (wret == MLX5_TXCMP_CODE_EXIT) 3358 return MLX5_TXCMP_CODE_EXIT; 3359 if (wret == MLX5_TXCMP_CODE_ERROR) 3360 return MLX5_TXCMP_CODE_ERROR; 3361 } 3362 /* 3363 * Calculate data length to be inlined to estimate 3364 * the required space in WQE ring buffer. 3365 */ 3366 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3367 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3368 vlan = sizeof(struct rte_vlan_hdr); 3369 inlen = loc->mbuf->l2_len + vlan + 3370 loc->mbuf->l3_len + loc->mbuf->l4_len; 3371 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3372 return MLX5_TXCMP_CODE_ERROR; 3373 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3374 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3375 /* Packet must contain all TSO headers. */ 3376 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3377 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3378 inlen > (dlen + vlan))) 3379 return MLX5_TXCMP_CODE_ERROR; 3380 MLX5_ASSERT(inlen >= txq->inlen_mode); 3381 /* 3382 * Check whether there are enough free WQEBBs: 3383 * - Control Segment 3384 * - Ethernet Segment 3385 * - First Segment of inlined Ethernet data 3386 * - ... data continued ... 3387 * - Data Segments of pointer/min inline type 3388 */ 3389 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3390 MLX5_ESEG_MIN_INLINE_SIZE + 3391 MLX5_WSEG_SIZE + 3392 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3393 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3394 return MLX5_TXCMP_CODE_EXIT; 3395 /* Check for maximal WQE size. */ 3396 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3397 return MLX5_TXCMP_CODE_ERROR; 3398 #ifdef MLX5_PMD_SOFT_COUNTERS 3399 /* Update sent data bytes/packets counters. */ 3400 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3401 loc->mbuf->tso_segsz; 3402 /* 3403 * One will be added for mbuf itself 3404 * at the end of the mlx5_tx_burst from 3405 * loc->pkts_sent field. 3406 */ 3407 --ntcp; 3408 txq->stats.opackets += ntcp; 3409 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3410 #endif 3411 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3412 loc->wqe_last = wqe; 3413 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3414 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3415 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3416 txq->wqe_ci += (ds + 3) / 4; 3417 loc->wqe_free -= (ds + 3) / 4; 3418 return MLX5_TXCMP_CODE_MULTI; 3419 } 3420 3421 /** 3422 * Tx one packet function for multi-segment SEND. Supports all 3423 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3424 * sends one packet per WQE, without any data inlining in 3425 * Ethernet Segment. 3426 * 3427 * This routine is responsible for storing processed mbuf 3428 * into elts ring buffer and update elts_head. 3429 * 3430 * @param txq 3431 * Pointer to TX queue structure. 3432 * @param loc 3433 * Pointer to burst routine local context. 3434 * @param olx 3435 * Configured Tx offloads mask. It is fully defined at 3436 * compile time and may be used for optimization. 3437 * 3438 * @return 3439 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3440 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3441 * Local context variables partially updated. 3442 */ 3443 static __rte_always_inline enum mlx5_txcmp_code 3444 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 3445 struct mlx5_txq_local *__rte_restrict loc, 3446 unsigned int olx) 3447 { 3448 struct mlx5_wqe_dseg *__rte_restrict dseg; 3449 struct mlx5_wqe *__rte_restrict wqe; 3450 unsigned int ds, nseg; 3451 3452 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3453 if (MLX5_TXOFF_CONFIG(TXPP)) { 3454 enum mlx5_txcmp_code wret; 3455 3456 /* Generate WAIT for scheduling if requested. */ 3457 wret = mlx5_tx_schedule_send(txq, loc, olx); 3458 if (wret == MLX5_TXCMP_CODE_EXIT) 3459 return MLX5_TXCMP_CODE_EXIT; 3460 if (wret == MLX5_TXCMP_CODE_ERROR) 3461 return MLX5_TXCMP_CODE_ERROR; 3462 } 3463 /* 3464 * No inline at all, it means the CPU cycles saving 3465 * is prioritized at configuration, we should not 3466 * copy any packet data to WQE. 3467 */ 3468 nseg = NB_SEGS(loc->mbuf); 3469 ds = 2 + nseg; 3470 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3471 return MLX5_TXCMP_CODE_EXIT; 3472 /* Check for maximal WQE size. */ 3473 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3474 return MLX5_TXCMP_CODE_ERROR; 3475 /* 3476 * Some Tx offloads may cause an error if 3477 * packet is not long enough, check against 3478 * assumed minimal length. 3479 */ 3480 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3481 return MLX5_TXCMP_CODE_ERROR; 3482 #ifdef MLX5_PMD_SOFT_COUNTERS 3483 /* Update sent data bytes counter. */ 3484 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3485 if (MLX5_TXOFF_CONFIG(VLAN) && 3486 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3487 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3488 #endif 3489 /* 3490 * SEND WQE, one WQEBB: 3491 * - Control Segment, SEND opcode 3492 * - Ethernet Segment, optional VLAN, no inline 3493 * - Data Segments, pointer only type 3494 */ 3495 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3496 loc->wqe_last = wqe; 3497 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3498 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3499 dseg = &wqe->dseg[0]; 3500 do { 3501 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3502 struct rte_mbuf *mbuf; 3503 3504 /* 3505 * Zero length segment found, have to 3506 * correct total size of WQE in segments. 3507 * It is supposed to be rare occasion, so 3508 * in normal case (no zero length segments) 3509 * we avoid extra writing to the Control 3510 * Segment. 3511 */ 3512 --ds; 3513 wqe->cseg.sq_ds -= RTE_BE32(1); 3514 mbuf = loc->mbuf; 3515 loc->mbuf = mbuf->next; 3516 rte_pktmbuf_free_seg(mbuf); 3517 if (--nseg == 0) 3518 break; 3519 } else { 3520 mlx5_tx_dseg_ptr 3521 (txq, loc, dseg, 3522 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3523 rte_pktmbuf_data_len(loc->mbuf), olx); 3524 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3525 --loc->elts_free; 3526 if (--nseg == 0) 3527 break; 3528 ++dseg; 3529 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3530 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3531 loc->mbuf = loc->mbuf->next; 3532 } 3533 } while (true); 3534 txq->wqe_ci += (ds + 3) / 4; 3535 loc->wqe_free -= (ds + 3) / 4; 3536 return MLX5_TXCMP_CODE_MULTI; 3537 } 3538 3539 /** 3540 * Tx one packet function for multi-segment SEND. Supports all 3541 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3542 * sends one packet per WQE, with data inlining in 3543 * Ethernet Segment and minimal Data Segments. 3544 * 3545 * This routine is responsible for storing processed mbuf 3546 * into elts ring buffer and update elts_head. 3547 * 3548 * @param txq 3549 * Pointer to TX queue structure. 3550 * @param loc 3551 * Pointer to burst routine local context. 3552 * @param olx 3553 * Configured Tx offloads mask. It is fully defined at 3554 * compile time and may be used for optimization. 3555 * 3556 * @return 3557 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3558 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3559 * Local context variables partially updated. 3560 */ 3561 static __rte_always_inline enum mlx5_txcmp_code 3562 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 3563 struct mlx5_txq_local *__rte_restrict loc, 3564 unsigned int olx) 3565 { 3566 struct mlx5_wqe *__rte_restrict wqe; 3567 unsigned int ds, inlen, dlen, vlan = 0; 3568 3569 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3570 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3571 if (MLX5_TXOFF_CONFIG(TXPP)) { 3572 enum mlx5_txcmp_code wret; 3573 3574 /* Generate WAIT for scheduling if requested. */ 3575 wret = mlx5_tx_schedule_send(txq, loc, olx); 3576 if (wret == MLX5_TXCMP_CODE_EXIT) 3577 return MLX5_TXCMP_CODE_EXIT; 3578 if (wret == MLX5_TXCMP_CODE_ERROR) 3579 return MLX5_TXCMP_CODE_ERROR; 3580 } 3581 /* 3582 * First calculate data length to be inlined 3583 * to estimate the required space for WQE. 3584 */ 3585 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3586 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3587 vlan = sizeof(struct rte_vlan_hdr); 3588 inlen = dlen + vlan; 3589 /* Check against minimal length. */ 3590 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3591 return MLX5_TXCMP_CODE_ERROR; 3592 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3593 if (inlen > txq->inlen_send || 3594 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 3595 struct rte_mbuf *mbuf; 3596 unsigned int nxlen; 3597 uintptr_t start; 3598 3599 /* 3600 * Packet length exceeds the allowed inline 3601 * data length, check whether the minimal 3602 * inlining is required. 3603 */ 3604 if (txq->inlen_mode) { 3605 MLX5_ASSERT(txq->inlen_mode >= 3606 MLX5_ESEG_MIN_INLINE_SIZE); 3607 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 3608 inlen = txq->inlen_mode; 3609 } else { 3610 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 3611 !vlan || txq->vlan_en) { 3612 /* 3613 * VLAN insertion will be done inside by HW. 3614 * It is not utmost effective - VLAN flag is 3615 * checked twice, but we should proceed the 3616 * inlining length correctly and take into 3617 * account the VLAN header being inserted. 3618 */ 3619 return mlx5_tx_packet_multi_send 3620 (txq, loc, olx); 3621 } 3622 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3623 } 3624 /* 3625 * Now we know the minimal amount of data is requested 3626 * to inline. Check whether we should inline the buffers 3627 * from the chain beginning to eliminate some mbufs. 3628 */ 3629 mbuf = loc->mbuf; 3630 nxlen = rte_pktmbuf_data_len(mbuf); 3631 if (unlikely(nxlen <= txq->inlen_send)) { 3632 /* We can inline first mbuf at least. */ 3633 if (nxlen < inlen) { 3634 unsigned int smlen; 3635 3636 /* Scan mbufs till inlen filled. */ 3637 do { 3638 smlen = nxlen; 3639 mbuf = NEXT(mbuf); 3640 MLX5_ASSERT(mbuf); 3641 nxlen = rte_pktmbuf_data_len(mbuf); 3642 nxlen += smlen; 3643 } while (unlikely(nxlen < inlen)); 3644 if (unlikely(nxlen > txq->inlen_send)) { 3645 /* We cannot inline entire mbuf. */ 3646 smlen = inlen - smlen; 3647 start = rte_pktmbuf_mtod_offset 3648 (mbuf, uintptr_t, smlen); 3649 goto do_align; 3650 } 3651 } 3652 do { 3653 inlen = nxlen; 3654 mbuf = NEXT(mbuf); 3655 /* There should be not end of packet. */ 3656 MLX5_ASSERT(mbuf); 3657 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3658 } while (unlikely(nxlen < txq->inlen_send)); 3659 } 3660 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3661 /* 3662 * Check whether we can do inline to align start 3663 * address of data buffer to cacheline. 3664 */ 3665 do_align: 3666 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3667 if (unlikely(start)) { 3668 start += inlen; 3669 if (start <= txq->inlen_send) 3670 inlen = start; 3671 } 3672 } 3673 /* 3674 * Check whether there are enough free WQEBBs: 3675 * - Control Segment 3676 * - Ethernet Segment 3677 * - First Segment of inlined Ethernet data 3678 * - ... data continued ... 3679 * - Data Segments of pointer/min inline type 3680 * 3681 * Estimate the number of Data Segments conservatively, 3682 * supposing no any mbufs is being freed during inlining. 3683 */ 3684 MLX5_ASSERT(inlen <= txq->inlen_send); 3685 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3686 MLX5_ESEG_MIN_INLINE_SIZE + 3687 MLX5_WSEG_SIZE + 3688 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3689 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3690 return MLX5_TXCMP_CODE_EXIT; 3691 /* Check for maximal WQE size. */ 3692 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3693 return MLX5_TXCMP_CODE_ERROR; 3694 #ifdef MLX5_PMD_SOFT_COUNTERS 3695 /* Update sent data bytes/packets counters. */ 3696 txq->stats.obytes += dlen + vlan; 3697 #endif 3698 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3699 loc->wqe_last = wqe; 3700 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3701 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3702 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3703 txq->wqe_ci += (ds + 3) / 4; 3704 loc->wqe_free -= (ds + 3) / 4; 3705 return MLX5_TXCMP_CODE_MULTI; 3706 } 3707 3708 /** 3709 * Tx burst function for multi-segment packets. Supports all 3710 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3711 * sends one packet per WQE. Function stops sending if it 3712 * encounters the single-segment packet. 3713 * 3714 * This routine is responsible for storing processed mbuf 3715 * into elts ring buffer and update elts_head. 3716 * 3717 * @param txq 3718 * Pointer to TX queue structure. 3719 * @param[in] pkts 3720 * Packets to transmit. 3721 * @param pkts_n 3722 * Number of packets in array. 3723 * @param loc 3724 * Pointer to burst routine local context. 3725 * @param olx 3726 * Configured Tx offloads mask. It is fully defined at 3727 * compile time and may be used for optimization. 3728 * 3729 * @return 3730 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3731 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3732 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3733 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3734 * Local context variables updated. 3735 */ 3736 static __rte_always_inline enum mlx5_txcmp_code 3737 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 3738 struct rte_mbuf **__rte_restrict pkts, 3739 unsigned int pkts_n, 3740 struct mlx5_txq_local *__rte_restrict loc, 3741 unsigned int olx) 3742 { 3743 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3744 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3745 pkts += loc->pkts_sent + 1; 3746 pkts_n -= loc->pkts_sent; 3747 for (;;) { 3748 enum mlx5_txcmp_code ret; 3749 3750 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3751 /* 3752 * Estimate the number of free elts quickly but 3753 * conservatively. Some segment may be fully inlined 3754 * and freed, ignore this here - precise estimation 3755 * is costly. 3756 */ 3757 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3758 return MLX5_TXCMP_CODE_EXIT; 3759 if (MLX5_TXOFF_CONFIG(TSO) && 3760 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3761 /* Proceed with multi-segment TSO. */ 3762 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3763 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3764 /* Proceed with multi-segment SEND with inlining. */ 3765 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3766 } else { 3767 /* Proceed with multi-segment SEND w/o inlining. */ 3768 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3769 } 3770 if (ret == MLX5_TXCMP_CODE_EXIT) 3771 return MLX5_TXCMP_CODE_EXIT; 3772 if (ret == MLX5_TXCMP_CODE_ERROR) 3773 return MLX5_TXCMP_CODE_ERROR; 3774 /* WQE is built, go to the next packet. */ 3775 ++loc->pkts_sent; 3776 --pkts_n; 3777 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3778 return MLX5_TXCMP_CODE_EXIT; 3779 loc->mbuf = *pkts++; 3780 if (pkts_n > 1) 3781 rte_prefetch0(*pkts); 3782 if (likely(NB_SEGS(loc->mbuf) > 1)) 3783 continue; 3784 /* Here ends the series of multi-segment packets. */ 3785 if (MLX5_TXOFF_CONFIG(TSO) && 3786 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3787 return MLX5_TXCMP_CODE_TSO; 3788 return MLX5_TXCMP_CODE_SINGLE; 3789 } 3790 MLX5_ASSERT(false); 3791 } 3792 3793 /** 3794 * Tx burst function for single-segment packets with TSO. 3795 * Supports all types of Tx offloads, except multi-packets. 3796 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3797 * Function stops sending if it encounters the multi-segment 3798 * packet or packet without TSO requested. 3799 * 3800 * The routine is responsible for storing processed mbuf 3801 * into elts ring buffer and update elts_head if inline 3802 * offloads is requested due to possible early freeing 3803 * of the inlined mbufs (can not store pkts array in elts 3804 * as a batch). 3805 * 3806 * @param txq 3807 * Pointer to TX queue structure. 3808 * @param[in] pkts 3809 * Packets to transmit. 3810 * @param pkts_n 3811 * Number of packets in array. 3812 * @param loc 3813 * Pointer to burst routine local context. 3814 * @param olx 3815 * Configured Tx offloads mask. It is fully defined at 3816 * compile time and may be used for optimization. 3817 * 3818 * @return 3819 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3820 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3821 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3822 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3823 * Local context variables updated. 3824 */ 3825 static __rte_always_inline enum mlx5_txcmp_code 3826 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 3827 struct rte_mbuf **__rte_restrict pkts, 3828 unsigned int pkts_n, 3829 struct mlx5_txq_local *__rte_restrict loc, 3830 unsigned int olx) 3831 { 3832 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3833 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3834 pkts += loc->pkts_sent + 1; 3835 pkts_n -= loc->pkts_sent; 3836 for (;;) { 3837 struct mlx5_wqe_dseg *__rte_restrict dseg; 3838 struct mlx5_wqe *__rte_restrict wqe; 3839 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3840 uint8_t *dptr; 3841 3842 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3843 if (MLX5_TXOFF_CONFIG(TXPP)) { 3844 enum mlx5_txcmp_code wret; 3845 3846 /* Generate WAIT for scheduling if requested. */ 3847 wret = mlx5_tx_schedule_send(txq, loc, olx); 3848 if (wret == MLX5_TXCMP_CODE_EXIT) 3849 return MLX5_TXCMP_CODE_EXIT; 3850 if (wret == MLX5_TXCMP_CODE_ERROR) 3851 return MLX5_TXCMP_CODE_ERROR; 3852 } 3853 dlen = rte_pktmbuf_data_len(loc->mbuf); 3854 if (MLX5_TXOFF_CONFIG(VLAN) && 3855 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3856 vlan = sizeof(struct rte_vlan_hdr); 3857 } 3858 /* 3859 * First calculate the WQE size to check 3860 * whether we have enough space in ring buffer. 3861 */ 3862 hlen = loc->mbuf->l2_len + vlan + 3863 loc->mbuf->l3_len + loc->mbuf->l4_len; 3864 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3865 return MLX5_TXCMP_CODE_ERROR; 3866 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3867 hlen += loc->mbuf->outer_l2_len + 3868 loc->mbuf->outer_l3_len; 3869 /* Segment must contain all TSO headers. */ 3870 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3871 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3872 hlen > (dlen + vlan))) 3873 return MLX5_TXCMP_CODE_ERROR; 3874 /* 3875 * Check whether there are enough free WQEBBs: 3876 * - Control Segment 3877 * - Ethernet Segment 3878 * - First Segment of inlined Ethernet data 3879 * - ... data continued ... 3880 * - Finishing Data Segment of pointer type 3881 */ 3882 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3883 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3884 if (loc->wqe_free < ((ds + 3) / 4)) 3885 return MLX5_TXCMP_CODE_EXIT; 3886 #ifdef MLX5_PMD_SOFT_COUNTERS 3887 /* Update sent data bytes/packets counters. */ 3888 ntcp = (dlen + vlan - hlen + 3889 loc->mbuf->tso_segsz - 1) / 3890 loc->mbuf->tso_segsz; 3891 /* 3892 * One will be added for mbuf itself at the end 3893 * of the mlx5_tx_burst from loc->pkts_sent field. 3894 */ 3895 --ntcp; 3896 txq->stats.opackets += ntcp; 3897 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3898 #endif 3899 /* 3900 * Build the TSO WQE: 3901 * - Control Segment 3902 * - Ethernet Segment with hlen bytes inlined 3903 * - Data Segment of pointer type 3904 */ 3905 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3906 loc->wqe_last = wqe; 3907 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3908 MLX5_OPCODE_TSO, olx); 3909 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3910 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3911 dlen -= hlen - vlan; 3912 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3913 /* 3914 * WQE is built, update the loop parameters 3915 * and go to the next packet. 3916 */ 3917 txq->wqe_ci += (ds + 3) / 4; 3918 loc->wqe_free -= (ds + 3) / 4; 3919 if (MLX5_TXOFF_CONFIG(INLINE)) 3920 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3921 --loc->elts_free; 3922 ++loc->pkts_sent; 3923 --pkts_n; 3924 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3925 return MLX5_TXCMP_CODE_EXIT; 3926 loc->mbuf = *pkts++; 3927 if (pkts_n > 1) 3928 rte_prefetch0(*pkts); 3929 if (MLX5_TXOFF_CONFIG(MULTI) && 3930 unlikely(NB_SEGS(loc->mbuf) > 1)) 3931 return MLX5_TXCMP_CODE_MULTI; 3932 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3933 return MLX5_TXCMP_CODE_SINGLE; 3934 /* Continue with the next TSO packet. */ 3935 } 3936 MLX5_ASSERT(false); 3937 } 3938 3939 /** 3940 * Analyze the packet and select the best method to send. 3941 * 3942 * @param txq 3943 * Pointer to TX queue structure. 3944 * @param loc 3945 * Pointer to burst routine local context. 3946 * @param olx 3947 * Configured Tx offloads mask. It is fully defined at 3948 * compile time and may be used for optimization. 3949 * @param newp 3950 * The predefined flag whether do complete check for 3951 * multi-segment packets and TSO. 3952 * 3953 * @return 3954 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3955 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3956 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3957 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3958 */ 3959 static __rte_always_inline enum mlx5_txcmp_code 3960 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 3961 struct mlx5_txq_local *__rte_restrict loc, 3962 unsigned int olx, 3963 bool newp) 3964 { 3965 /* Check for multi-segment packet. */ 3966 if (newp && 3967 MLX5_TXOFF_CONFIG(MULTI) && 3968 unlikely(NB_SEGS(loc->mbuf) > 1)) 3969 return MLX5_TXCMP_CODE_MULTI; 3970 /* Check for TSO packet. */ 3971 if (newp && 3972 MLX5_TXOFF_CONFIG(TSO) && 3973 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3974 return MLX5_TXCMP_CODE_TSO; 3975 /* Check if eMPW is enabled at all. */ 3976 if (!MLX5_TXOFF_CONFIG(EMPW)) 3977 return MLX5_TXCMP_CODE_SINGLE; 3978 /* Check if eMPW can be engaged. */ 3979 if (MLX5_TXOFF_CONFIG(VLAN) && 3980 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3981 (!MLX5_TXOFF_CONFIG(INLINE) || 3982 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3983 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3984 /* 3985 * eMPW does not support VLAN insertion offload, 3986 * we have to inline the entire packet but 3987 * packet is too long for inlining. 3988 */ 3989 return MLX5_TXCMP_CODE_SINGLE; 3990 } 3991 return MLX5_TXCMP_CODE_EMPW; 3992 } 3993 3994 /** 3995 * Check the next packet attributes to match with the eMPW batch ones. 3996 * In addition, for legacy MPW the packet length is checked either. 3997 * 3998 * @param txq 3999 * Pointer to TX queue structure. 4000 * @param es 4001 * Pointer to Ethernet Segment of eMPW batch. 4002 * @param loc 4003 * Pointer to burst routine local context. 4004 * @param dlen 4005 * Length of previous packet in MPW descriptor. 4006 * @param olx 4007 * Configured Tx offloads mask. It is fully defined at 4008 * compile time and may be used for optimization. 4009 * 4010 * @return 4011 * true - packet match with eMPW batch attributes. 4012 * false - no match, eMPW should be restarted. 4013 */ 4014 static __rte_always_inline bool 4015 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 4016 struct mlx5_wqe_eseg *__rte_restrict es, 4017 struct mlx5_txq_local *__rte_restrict loc, 4018 uint32_t dlen, 4019 unsigned int olx) 4020 { 4021 uint8_t swp_flags = 0; 4022 4023 /* Compare the checksum flags, if any. */ 4024 if (MLX5_TXOFF_CONFIG(CSUM) && 4025 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 4026 return false; 4027 /* Compare the Software Parser offsets and flags. */ 4028 if (MLX5_TXOFF_CONFIG(SWP) && 4029 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 4030 es->swp_flags != swp_flags)) 4031 return false; 4032 /* Fill metadata field if needed. */ 4033 if (MLX5_TXOFF_CONFIG(METADATA) && 4034 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 4035 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 4036 return false; 4037 /* Legacy MPW can send packets with the same lengt only. */ 4038 if (MLX5_TXOFF_CONFIG(MPW) && 4039 dlen != rte_pktmbuf_data_len(loc->mbuf)) 4040 return false; 4041 /* There must be no VLAN packets in eMPW loop. */ 4042 if (MLX5_TXOFF_CONFIG(VLAN)) 4043 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 4044 /* Check if the scheduling is requested. */ 4045 if (MLX5_TXOFF_CONFIG(TXPP) && 4046 loc->mbuf->ol_flags & txq->ts_mask) 4047 return false; 4048 return true; 4049 } 4050 4051 /* 4052 * Update send loop variables and WQE for eMPW loop 4053 * without data inlining. Number of Data Segments is 4054 * equal to the number of sent packets. 4055 * 4056 * @param txq 4057 * Pointer to TX queue structure. 4058 * @param loc 4059 * Pointer to burst routine local context. 4060 * @param ds 4061 * Number of packets/Data Segments/Packets. 4062 * @param slen 4063 * Accumulated statistics, bytes sent 4064 * @param olx 4065 * Configured Tx offloads mask. It is fully defined at 4066 * compile time and may be used for optimization. 4067 * 4068 * @return 4069 * true - packet match with eMPW batch attributes. 4070 * false - no match, eMPW should be restarted. 4071 */ 4072 static __rte_always_inline void 4073 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 4074 struct mlx5_txq_local *__rte_restrict loc, 4075 unsigned int ds, 4076 unsigned int slen, 4077 unsigned int olx __rte_unused) 4078 { 4079 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4080 #ifdef MLX5_PMD_SOFT_COUNTERS 4081 /* Update sent data bytes counter. */ 4082 txq->stats.obytes += slen; 4083 #else 4084 (void)slen; 4085 #endif 4086 loc->elts_free -= ds; 4087 loc->pkts_sent += ds; 4088 ds += 2; 4089 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 4090 txq->wqe_ci += (ds + 3) / 4; 4091 loc->wqe_free -= (ds + 3) / 4; 4092 } 4093 4094 /* 4095 * Update send loop variables and WQE for eMPW loop 4096 * with data inlining. Gets the size of pushed descriptors 4097 * and data to the WQE. 4098 * 4099 * @param txq 4100 * Pointer to TX queue structure. 4101 * @param loc 4102 * Pointer to burst routine local context. 4103 * @param len 4104 * Total size of descriptor/data in bytes. 4105 * @param slen 4106 * Accumulated statistics, data bytes sent. 4107 * @param wqem 4108 * The base WQE for the eMPW/MPW descriptor. 4109 * @param olx 4110 * Configured Tx offloads mask. It is fully defined at 4111 * compile time and may be used for optimization. 4112 * 4113 * @return 4114 * true - packet match with eMPW batch attributes. 4115 * false - no match, eMPW should be restarted. 4116 */ 4117 static __rte_always_inline void 4118 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 4119 struct mlx5_txq_local *__rte_restrict loc, 4120 unsigned int len, 4121 unsigned int slen, 4122 struct mlx5_wqe *__rte_restrict wqem, 4123 unsigned int olx __rte_unused) 4124 { 4125 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 4126 4127 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4128 #ifdef MLX5_PMD_SOFT_COUNTERS 4129 /* Update sent data bytes counter. */ 4130 txq->stats.obytes += slen; 4131 #else 4132 (void)slen; 4133 #endif 4134 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 4135 /* 4136 * If the legacy MPW session contains the inline packets 4137 * we should set the only inline data segment length 4138 * and align the total length to the segment size. 4139 */ 4140 MLX5_ASSERT(len > sizeof(dseg->bcount)); 4141 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 4142 MLX5_ETH_WQE_DATA_INLINE); 4143 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 4144 } else { 4145 /* 4146 * The session is not legacy MPW or contains the 4147 * data buffer pointer segments. 4148 */ 4149 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 4150 len = len / MLX5_WSEG_SIZE + 2; 4151 } 4152 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 4153 txq->wqe_ci += (len + 3) / 4; 4154 loc->wqe_free -= (len + 3) / 4; 4155 loc->wqe_last = wqem; 4156 } 4157 4158 /** 4159 * The set of Tx burst functions for single-segment packets 4160 * without TSO and with Multi-Packet Writing feature support. 4161 * Supports all types of Tx offloads, except multi-packets 4162 * and TSO. 4163 * 4164 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 4165 * as many packet per WQE as it can. If eMPW is not configured 4166 * or packet can not be sent with eMPW (VLAN insertion) the 4167 * ordinary SEND opcode is used and only one packet placed 4168 * in WQE. 4169 * 4170 * Functions stop sending if it encounters the multi-segment 4171 * packet or packet with TSO requested. 4172 * 4173 * The routines are responsible for storing processed mbuf 4174 * into elts ring buffer and update elts_head if inlining 4175 * offload is requested. Otherwise the copying mbufs to elts 4176 * can be postponed and completed at the end of burst routine. 4177 * 4178 * @param txq 4179 * Pointer to TX queue structure. 4180 * @param[in] pkts 4181 * Packets to transmit. 4182 * @param pkts_n 4183 * Number of packets in array. 4184 * @param loc 4185 * Pointer to burst routine local context. 4186 * @param olx 4187 * Configured Tx offloads mask. It is fully defined at 4188 * compile time and may be used for optimization. 4189 * 4190 * @return 4191 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 4192 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 4193 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 4194 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 4195 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 4196 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 4197 * 4198 * Local context variables updated. 4199 * 4200 * 4201 * The routine sends packets with MLX5_OPCODE_EMPW 4202 * without inlining, this is dedicated optimized branch. 4203 * No VLAN insertion is supported. 4204 */ 4205 static __rte_always_inline enum mlx5_txcmp_code 4206 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 4207 struct rte_mbuf **__rte_restrict pkts, 4208 unsigned int pkts_n, 4209 struct mlx5_txq_local *__rte_restrict loc, 4210 unsigned int olx) 4211 { 4212 /* 4213 * Subroutine is the part of mlx5_tx_burst_single() 4214 * and sends single-segment packet with eMPW opcode 4215 * without data inlining. 4216 */ 4217 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4218 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4219 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4220 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4221 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4222 pkts += loc->pkts_sent + 1; 4223 pkts_n -= loc->pkts_sent; 4224 for (;;) { 4225 struct mlx5_wqe_dseg *__rte_restrict dseg; 4226 struct mlx5_wqe_eseg *__rte_restrict eseg; 4227 enum mlx5_txcmp_code ret; 4228 unsigned int part, loop; 4229 unsigned int slen = 0; 4230 4231 next_empw: 4232 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4233 if (MLX5_TXOFF_CONFIG(TXPP)) { 4234 enum mlx5_txcmp_code wret; 4235 4236 /* Generate WAIT for scheduling if requested. */ 4237 wret = mlx5_tx_schedule_send(txq, loc, olx); 4238 if (wret == MLX5_TXCMP_CODE_EXIT) 4239 return MLX5_TXCMP_CODE_EXIT; 4240 if (wret == MLX5_TXCMP_CODE_ERROR) 4241 return MLX5_TXCMP_CODE_ERROR; 4242 } 4243 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4244 MLX5_MPW_MAX_PACKETS : 4245 MLX5_EMPW_MAX_PACKETS); 4246 if (unlikely(loc->elts_free < part)) { 4247 /* We have no enough elts to save all mbufs. */ 4248 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 4249 return MLX5_TXCMP_CODE_EXIT; 4250 /* But we still able to send at least minimal eMPW. */ 4251 part = loc->elts_free; 4252 } 4253 /* Check whether we have enough WQEs */ 4254 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 4255 if (unlikely(loc->wqe_free < 4256 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4257 return MLX5_TXCMP_CODE_EXIT; 4258 part = (loc->wqe_free * 4) - 2; 4259 } 4260 if (likely(part > 1)) 4261 rte_prefetch0(*pkts); 4262 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4263 /* 4264 * Build eMPW title WQEBB: 4265 * - Control Segment, eMPW opcode 4266 * - Ethernet Segment, no inline 4267 */ 4268 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 4269 MLX5_OPCODE_ENHANCED_MPSW, olx); 4270 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4271 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4272 eseg = &loc->wqe_last->eseg; 4273 dseg = &loc->wqe_last->dseg[0]; 4274 loop = part; 4275 /* Store the packet length for legacy MPW. */ 4276 if (MLX5_TXOFF_CONFIG(MPW)) 4277 eseg->mss = rte_cpu_to_be_16 4278 (rte_pktmbuf_data_len(loc->mbuf)); 4279 for (;;) { 4280 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4281 #ifdef MLX5_PMD_SOFT_COUNTERS 4282 /* Update sent data bytes counter. */ 4283 slen += dlen; 4284 #endif 4285 mlx5_tx_dseg_ptr 4286 (txq, loc, dseg, 4287 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4288 dlen, olx); 4289 if (unlikely(--loop == 0)) 4290 break; 4291 loc->mbuf = *pkts++; 4292 if (likely(loop > 1)) 4293 rte_prefetch0(*pkts); 4294 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4295 /* 4296 * Unroll the completion code to avoid 4297 * returning variable value - it results in 4298 * unoptimized sequent checking in caller. 4299 */ 4300 if (ret == MLX5_TXCMP_CODE_MULTI) { 4301 part -= loop; 4302 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4303 if (unlikely(!loc->elts_free || 4304 !loc->wqe_free)) 4305 return MLX5_TXCMP_CODE_EXIT; 4306 return MLX5_TXCMP_CODE_MULTI; 4307 } 4308 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4309 if (ret == MLX5_TXCMP_CODE_TSO) { 4310 part -= loop; 4311 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4312 if (unlikely(!loc->elts_free || 4313 !loc->wqe_free)) 4314 return MLX5_TXCMP_CODE_EXIT; 4315 return MLX5_TXCMP_CODE_TSO; 4316 } 4317 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4318 part -= loop; 4319 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4320 if (unlikely(!loc->elts_free || 4321 !loc->wqe_free)) 4322 return MLX5_TXCMP_CODE_EXIT; 4323 return MLX5_TXCMP_CODE_SINGLE; 4324 } 4325 if (ret != MLX5_TXCMP_CODE_EMPW) { 4326 MLX5_ASSERT(false); 4327 part -= loop; 4328 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4329 return MLX5_TXCMP_CODE_ERROR; 4330 } 4331 /* 4332 * Check whether packet parameters coincide 4333 * within assumed eMPW batch: 4334 * - check sum settings 4335 * - metadata value 4336 * - software parser settings 4337 * - packets length (legacy MPW only) 4338 * - scheduling is not required 4339 */ 4340 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 4341 MLX5_ASSERT(loop); 4342 part -= loop; 4343 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4344 if (unlikely(!loc->elts_free || 4345 !loc->wqe_free)) 4346 return MLX5_TXCMP_CODE_EXIT; 4347 pkts_n -= part; 4348 goto next_empw; 4349 } 4350 /* Packet attributes match, continue the same eMPW. */ 4351 ++dseg; 4352 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4353 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4354 } 4355 /* eMPW is built successfully, update loop parameters. */ 4356 MLX5_ASSERT(!loop); 4357 MLX5_ASSERT(pkts_n >= part); 4358 #ifdef MLX5_PMD_SOFT_COUNTERS 4359 /* Update sent data bytes counter. */ 4360 txq->stats.obytes += slen; 4361 #endif 4362 loc->elts_free -= part; 4363 loc->pkts_sent += part; 4364 txq->wqe_ci += (2 + part + 3) / 4; 4365 loc->wqe_free -= (2 + part + 3) / 4; 4366 pkts_n -= part; 4367 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4368 return MLX5_TXCMP_CODE_EXIT; 4369 loc->mbuf = *pkts++; 4370 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4371 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4372 return ret; 4373 /* Continue sending eMPW batches. */ 4374 } 4375 MLX5_ASSERT(false); 4376 } 4377 4378 /** 4379 * The routine sends packets with MLX5_OPCODE_EMPW 4380 * with inlining, optionally supports VLAN insertion. 4381 */ 4382 static __rte_always_inline enum mlx5_txcmp_code 4383 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 4384 struct rte_mbuf **__rte_restrict pkts, 4385 unsigned int pkts_n, 4386 struct mlx5_txq_local *__rte_restrict loc, 4387 unsigned int olx) 4388 { 4389 /* 4390 * Subroutine is the part of mlx5_tx_burst_single() 4391 * and sends single-segment packet with eMPW opcode 4392 * with data inlining. 4393 */ 4394 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4395 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4396 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4397 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4398 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4399 pkts += loc->pkts_sent + 1; 4400 pkts_n -= loc->pkts_sent; 4401 for (;;) { 4402 struct mlx5_wqe_dseg *__rte_restrict dseg; 4403 struct mlx5_wqe *__rte_restrict wqem; 4404 enum mlx5_txcmp_code ret; 4405 unsigned int room, part, nlim; 4406 unsigned int slen = 0; 4407 4408 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4409 if (MLX5_TXOFF_CONFIG(TXPP)) { 4410 enum mlx5_txcmp_code wret; 4411 4412 /* Generate WAIT for scheduling if requested. */ 4413 wret = mlx5_tx_schedule_send(txq, loc, olx); 4414 if (wret == MLX5_TXCMP_CODE_EXIT) 4415 return MLX5_TXCMP_CODE_EXIT; 4416 if (wret == MLX5_TXCMP_CODE_ERROR) 4417 return MLX5_TXCMP_CODE_ERROR; 4418 } 4419 /* 4420 * Limits the amount of packets in one WQE 4421 * to improve CQE latency generation. 4422 */ 4423 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4424 MLX5_MPW_INLINE_MAX_PACKETS : 4425 MLX5_EMPW_MAX_PACKETS); 4426 /* Check whether we have minimal amount WQEs */ 4427 if (unlikely(loc->wqe_free < 4428 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4429 return MLX5_TXCMP_CODE_EXIT; 4430 if (likely(pkts_n > 1)) 4431 rte_prefetch0(*pkts); 4432 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4433 /* 4434 * Build eMPW title WQEBB: 4435 * - Control Segment, eMPW opcode, zero DS 4436 * - Ethernet Segment, no inline 4437 */ 4438 mlx5_tx_cseg_init(txq, loc, wqem, 0, 4439 MLX5_OPCODE_ENHANCED_MPSW, olx); 4440 mlx5_tx_eseg_none(txq, loc, wqem, 4441 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4442 dseg = &wqem->dseg[0]; 4443 /* Store the packet length for legacy MPW. */ 4444 if (MLX5_TXOFF_CONFIG(MPW)) 4445 wqem->eseg.mss = rte_cpu_to_be_16 4446 (rte_pktmbuf_data_len(loc->mbuf)); 4447 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4448 loc->wqe_free) * MLX5_WQE_SIZE - 4449 MLX5_WQE_CSEG_SIZE - 4450 MLX5_WQE_ESEG_SIZE; 4451 /* Limit the room for legacy MPW sessions for performance. */ 4452 if (MLX5_TXOFF_CONFIG(MPW)) 4453 room = RTE_MIN(room, 4454 RTE_MAX(txq->inlen_empw + 4455 sizeof(dseg->bcount) + 4456 (MLX5_TXOFF_CONFIG(VLAN) ? 4457 sizeof(struct rte_vlan_hdr) : 0), 4458 MLX5_MPW_INLINE_MAX_PACKETS * 4459 MLX5_WQE_DSEG_SIZE)); 4460 /* Build WQE till we have space, packets and resources. */ 4461 part = room; 4462 for (;;) { 4463 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4464 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4465 unsigned int tlen; 4466 4467 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4468 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 4469 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4470 /* 4471 * Some Tx offloads may cause an error if 4472 * packet is not long enough, check against 4473 * assumed minimal length. 4474 */ 4475 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4476 part -= room; 4477 if (unlikely(!part)) 4478 return MLX5_TXCMP_CODE_ERROR; 4479 /* 4480 * We have some successfully built 4481 * packet Data Segments to send. 4482 */ 4483 mlx5_tx_idone_empw(txq, loc, part, 4484 slen, wqem, olx); 4485 return MLX5_TXCMP_CODE_ERROR; 4486 } 4487 /* Inline or not inline - that's the Question. */ 4488 if (dlen > txq->inlen_empw || 4489 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 4490 goto pointer_empw; 4491 if (MLX5_TXOFF_CONFIG(MPW)) { 4492 if (dlen > txq->inlen_send) 4493 goto pointer_empw; 4494 tlen = dlen; 4495 if (part == room) { 4496 /* Open new inline MPW session. */ 4497 tlen += sizeof(dseg->bcount); 4498 dseg->bcount = RTE_BE32(0); 4499 dseg = RTE_PTR_ADD 4500 (dseg, sizeof(dseg->bcount)); 4501 } else { 4502 /* 4503 * No pointer and inline descriptor 4504 * intermix for legacy MPW sessions. 4505 */ 4506 if (wqem->dseg[0].bcount) 4507 break; 4508 } 4509 } else { 4510 tlen = sizeof(dseg->bcount) + dlen; 4511 } 4512 /* Inline entire packet, optional VLAN insertion. */ 4513 if (MLX5_TXOFF_CONFIG(VLAN) && 4514 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4515 /* 4516 * The packet length must be checked in 4517 * mlx5_tx_able_to_empw() and packet 4518 * fits into inline length guaranteed. 4519 */ 4520 MLX5_ASSERT((dlen + 4521 sizeof(struct rte_vlan_hdr)) <= 4522 txq->inlen_empw); 4523 tlen += sizeof(struct rte_vlan_hdr); 4524 if (room < tlen) 4525 break; 4526 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4527 dptr, dlen, olx); 4528 #ifdef MLX5_PMD_SOFT_COUNTERS 4529 /* Update sent data bytes counter. */ 4530 slen += sizeof(struct rte_vlan_hdr); 4531 #endif 4532 } else { 4533 if (room < tlen) 4534 break; 4535 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4536 dptr, dlen, olx); 4537 } 4538 if (!MLX5_TXOFF_CONFIG(MPW)) 4539 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4540 MLX5_ASSERT(room >= tlen); 4541 room -= tlen; 4542 /* 4543 * Packet data are completely inlined, 4544 * free the packet immediately. 4545 */ 4546 rte_pktmbuf_free_seg(loc->mbuf); 4547 goto next_mbuf; 4548 pointer_empw: 4549 /* 4550 * No pointer and inline descriptor 4551 * intermix for legacy MPW sessions. 4552 */ 4553 if (MLX5_TXOFF_CONFIG(MPW) && 4554 part != room && 4555 wqem->dseg[0].bcount == RTE_BE32(0)) 4556 break; 4557 /* 4558 * Not inlinable VLAN packets are 4559 * proceeded outside of this routine. 4560 */ 4561 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4562 if (MLX5_TXOFF_CONFIG(VLAN)) 4563 MLX5_ASSERT(!(loc->mbuf->ol_flags & 4564 PKT_TX_VLAN_PKT)); 4565 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4566 /* We have to store mbuf in elts.*/ 4567 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4568 room -= MLX5_WQE_DSEG_SIZE; 4569 /* Ring buffer wraparound is checked at the loop end.*/ 4570 ++dseg; 4571 next_mbuf: 4572 #ifdef MLX5_PMD_SOFT_COUNTERS 4573 /* Update sent data bytes counter. */ 4574 slen += dlen; 4575 #endif 4576 loc->pkts_sent++; 4577 loc->elts_free--; 4578 pkts_n--; 4579 if (unlikely(!pkts_n || !loc->elts_free)) { 4580 /* 4581 * We have no resources/packets to 4582 * continue build descriptors. 4583 */ 4584 part -= room; 4585 mlx5_tx_idone_empw(txq, loc, part, 4586 slen, wqem, olx); 4587 return MLX5_TXCMP_CODE_EXIT; 4588 } 4589 loc->mbuf = *pkts++; 4590 if (likely(pkts_n > 1)) 4591 rte_prefetch0(*pkts); 4592 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4593 /* 4594 * Unroll the completion code to avoid 4595 * returning variable value - it results in 4596 * unoptimized sequent checking in caller. 4597 */ 4598 if (ret == MLX5_TXCMP_CODE_MULTI) { 4599 part -= room; 4600 mlx5_tx_idone_empw(txq, loc, part, 4601 slen, wqem, olx); 4602 if (unlikely(!loc->elts_free || 4603 !loc->wqe_free)) 4604 return MLX5_TXCMP_CODE_EXIT; 4605 return MLX5_TXCMP_CODE_MULTI; 4606 } 4607 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4608 if (ret == MLX5_TXCMP_CODE_TSO) { 4609 part -= room; 4610 mlx5_tx_idone_empw(txq, loc, part, 4611 slen, wqem, olx); 4612 if (unlikely(!loc->elts_free || 4613 !loc->wqe_free)) 4614 return MLX5_TXCMP_CODE_EXIT; 4615 return MLX5_TXCMP_CODE_TSO; 4616 } 4617 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4618 part -= room; 4619 mlx5_tx_idone_empw(txq, loc, part, 4620 slen, wqem, olx); 4621 if (unlikely(!loc->elts_free || 4622 !loc->wqe_free)) 4623 return MLX5_TXCMP_CODE_EXIT; 4624 return MLX5_TXCMP_CODE_SINGLE; 4625 } 4626 if (ret != MLX5_TXCMP_CODE_EMPW) { 4627 MLX5_ASSERT(false); 4628 part -= room; 4629 mlx5_tx_idone_empw(txq, loc, part, 4630 slen, wqem, olx); 4631 return MLX5_TXCMP_CODE_ERROR; 4632 } 4633 /* Check if we have minimal room left. */ 4634 nlim--; 4635 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4636 break; 4637 /* 4638 * Check whether packet parameters coincide 4639 * within assumed eMPW batch: 4640 * - check sum settings 4641 * - metadata value 4642 * - software parser settings 4643 * - packets length (legacy MPW only) 4644 * - scheduling is not required 4645 */ 4646 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 4647 loc, dlen, olx)) 4648 break; 4649 /* Packet attributes match, continue the same eMPW. */ 4650 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4651 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4652 } 4653 /* 4654 * We get here to close an existing eMPW 4655 * session and start the new one. 4656 */ 4657 MLX5_ASSERT(pkts_n); 4658 part -= room; 4659 if (unlikely(!part)) 4660 return MLX5_TXCMP_CODE_EXIT; 4661 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 4662 if (unlikely(!loc->elts_free || 4663 !loc->wqe_free)) 4664 return MLX5_TXCMP_CODE_EXIT; 4665 /* Continue the loop with new eMPW session. */ 4666 } 4667 MLX5_ASSERT(false); 4668 } 4669 4670 /** 4671 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4672 * Data inlining and VLAN insertion are supported. 4673 */ 4674 static __rte_always_inline enum mlx5_txcmp_code 4675 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 4676 struct rte_mbuf **__rte_restrict pkts, 4677 unsigned int pkts_n, 4678 struct mlx5_txq_local *__rte_restrict loc, 4679 unsigned int olx) 4680 { 4681 /* 4682 * Subroutine is the part of mlx5_tx_burst_single() 4683 * and sends single-segment packet with SEND opcode. 4684 */ 4685 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4686 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4687 pkts += loc->pkts_sent + 1; 4688 pkts_n -= loc->pkts_sent; 4689 for (;;) { 4690 struct mlx5_wqe *__rte_restrict wqe; 4691 enum mlx5_txcmp_code ret; 4692 4693 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4694 if (MLX5_TXOFF_CONFIG(TXPP)) { 4695 enum mlx5_txcmp_code wret; 4696 4697 /* Generate WAIT for scheduling if requested. */ 4698 wret = mlx5_tx_schedule_send(txq, loc, olx); 4699 if (wret == MLX5_TXCMP_CODE_EXIT) 4700 return MLX5_TXCMP_CODE_EXIT; 4701 if (wret == MLX5_TXCMP_CODE_ERROR) 4702 return MLX5_TXCMP_CODE_ERROR; 4703 } 4704 if (MLX5_TXOFF_CONFIG(INLINE)) { 4705 unsigned int inlen, vlan = 0; 4706 4707 inlen = rte_pktmbuf_data_len(loc->mbuf); 4708 if (MLX5_TXOFF_CONFIG(VLAN) && 4709 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4710 vlan = sizeof(struct rte_vlan_hdr); 4711 inlen += vlan; 4712 static_assert((sizeof(struct rte_vlan_hdr) + 4713 sizeof(struct rte_ether_hdr)) == 4714 MLX5_ESEG_MIN_INLINE_SIZE, 4715 "invalid min inline data size"); 4716 } 4717 /* 4718 * If inlining is enabled at configuration time 4719 * the limit must be not less than minimal size. 4720 * Otherwise we would do extra check for data 4721 * size to avoid crashes due to length overflow. 4722 */ 4723 MLX5_ASSERT(txq->inlen_send >= 4724 MLX5_ESEG_MIN_INLINE_SIZE); 4725 if (inlen <= txq->inlen_send) { 4726 unsigned int seg_n, wqe_n; 4727 4728 rte_prefetch0(rte_pktmbuf_mtod 4729 (loc->mbuf, uint8_t *)); 4730 /* Check against minimal length. */ 4731 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4732 return MLX5_TXCMP_CODE_ERROR; 4733 if (loc->mbuf->ol_flags & 4734 PKT_TX_DYNF_NOINLINE) { 4735 /* 4736 * The hint flag not to inline packet 4737 * data is set. Check whether we can 4738 * follow the hint. 4739 */ 4740 if ((!MLX5_TXOFF_CONFIG(EMPW) && 4741 txq->inlen_mode) || 4742 (MLX5_TXOFF_CONFIG(MPW) && 4743 txq->inlen_mode)) { 4744 if (inlen <= txq->inlen_send) 4745 goto single_inline; 4746 /* 4747 * The hardware requires the 4748 * minimal inline data header. 4749 */ 4750 goto single_min_inline; 4751 } 4752 if (MLX5_TXOFF_CONFIG(VLAN) && 4753 vlan && !txq->vlan_en) { 4754 /* 4755 * We must insert VLAN tag 4756 * by software means. 4757 */ 4758 goto single_part_inline; 4759 } 4760 goto single_no_inline; 4761 } 4762 single_inline: 4763 /* 4764 * Completely inlined packet data WQE: 4765 * - Control Segment, SEND opcode 4766 * - Ethernet Segment, no VLAN insertion 4767 * - Data inlined, VLAN optionally inserted 4768 * - Alignment to MLX5_WSEG_SIZE 4769 * Have to estimate amount of WQEBBs 4770 */ 4771 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4772 MLX5_ESEG_MIN_INLINE_SIZE + 4773 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4774 /* Check if there are enough WQEBBs. */ 4775 wqe_n = (seg_n + 3) / 4; 4776 if (wqe_n > loc->wqe_free) 4777 return MLX5_TXCMP_CODE_EXIT; 4778 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4779 loc->wqe_last = wqe; 4780 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4781 MLX5_OPCODE_SEND, olx); 4782 mlx5_tx_eseg_data(txq, loc, wqe, 4783 vlan, inlen, 0, olx); 4784 txq->wqe_ci += wqe_n; 4785 loc->wqe_free -= wqe_n; 4786 /* 4787 * Packet data are completely inlined, 4788 * free the packet immediately. 4789 */ 4790 rte_pktmbuf_free_seg(loc->mbuf); 4791 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4792 MLX5_TXOFF_CONFIG(MPW)) && 4793 txq->inlen_mode) { 4794 /* 4795 * If minimal inlining is requested the eMPW 4796 * feature should be disabled due to data is 4797 * inlined into Ethernet Segment, which can 4798 * not contain inlined data for eMPW due to 4799 * segment shared for all packets. 4800 */ 4801 struct mlx5_wqe_dseg *__rte_restrict dseg; 4802 unsigned int ds; 4803 uint8_t *dptr; 4804 4805 /* 4806 * The inline-mode settings require 4807 * to inline the specified amount of 4808 * data bytes to the Ethernet Segment. 4809 * We should check the free space in 4810 * WQE ring buffer to inline partially. 4811 */ 4812 single_min_inline: 4813 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 4814 MLX5_ASSERT(inlen > txq->inlen_mode); 4815 MLX5_ASSERT(txq->inlen_mode >= 4816 MLX5_ESEG_MIN_INLINE_SIZE); 4817 /* 4818 * Check whether there are enough free WQEBBs: 4819 * - Control Segment 4820 * - Ethernet Segment 4821 * - First Segment of inlined Ethernet data 4822 * - ... data continued ... 4823 * - Finishing Data Segment of pointer type 4824 */ 4825 ds = (MLX5_WQE_CSEG_SIZE + 4826 MLX5_WQE_ESEG_SIZE + 4827 MLX5_WQE_DSEG_SIZE + 4828 txq->inlen_mode - 4829 MLX5_ESEG_MIN_INLINE_SIZE + 4830 MLX5_WQE_DSEG_SIZE + 4831 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4832 if (loc->wqe_free < ((ds + 3) / 4)) 4833 return MLX5_TXCMP_CODE_EXIT; 4834 /* 4835 * Build the ordinary SEND WQE: 4836 * - Control Segment 4837 * - Ethernet Segment, inline inlen_mode bytes 4838 * - Data Segment of pointer type 4839 */ 4840 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4841 loc->wqe_last = wqe; 4842 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4843 MLX5_OPCODE_SEND, olx); 4844 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4845 txq->inlen_mode, 4846 0, olx); 4847 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4848 txq->inlen_mode - vlan; 4849 inlen -= txq->inlen_mode; 4850 mlx5_tx_dseg_ptr(txq, loc, dseg, 4851 dptr, inlen, olx); 4852 /* 4853 * WQE is built, update the loop parameters 4854 * and got to the next packet. 4855 */ 4856 txq->wqe_ci += (ds + 3) / 4; 4857 loc->wqe_free -= (ds + 3) / 4; 4858 /* We have to store mbuf in elts.*/ 4859 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4860 txq->elts[txq->elts_head++ & txq->elts_m] = 4861 loc->mbuf; 4862 --loc->elts_free; 4863 } else { 4864 uint8_t *dptr; 4865 unsigned int dlen; 4866 4867 /* 4868 * Partially inlined packet data WQE, we have 4869 * some space in title WQEBB, we can fill it 4870 * with some packet data. It takes one WQEBB, 4871 * it is available, no extra space check: 4872 * - Control Segment, SEND opcode 4873 * - Ethernet Segment, no VLAN insertion 4874 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4875 * - Data Segment, pointer type 4876 * 4877 * We also get here if VLAN insertion is not 4878 * supported by HW, the inline is enabled. 4879 */ 4880 single_part_inline: 4881 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4882 loc->wqe_last = wqe; 4883 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4884 MLX5_OPCODE_SEND, olx); 4885 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4886 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4887 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4888 /* 4889 * The length check is performed above, by 4890 * comparing with txq->inlen_send. We should 4891 * not get overflow here. 4892 */ 4893 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4894 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4895 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4896 dptr, dlen, olx); 4897 ++txq->wqe_ci; 4898 --loc->wqe_free; 4899 /* We have to store mbuf in elts.*/ 4900 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4901 txq->elts[txq->elts_head++ & txq->elts_m] = 4902 loc->mbuf; 4903 --loc->elts_free; 4904 } 4905 #ifdef MLX5_PMD_SOFT_COUNTERS 4906 /* Update sent data bytes counter. */ 4907 txq->stats.obytes += vlan + 4908 rte_pktmbuf_data_len(loc->mbuf); 4909 #endif 4910 } else { 4911 /* 4912 * No inline at all, it means the CPU cycles saving 4913 * is prioritized at configuration, we should not 4914 * copy any packet data to WQE. 4915 * 4916 * SEND WQE, one WQEBB: 4917 * - Control Segment, SEND opcode 4918 * - Ethernet Segment, optional VLAN, no inline 4919 * - Data Segment, pointer type 4920 */ 4921 single_no_inline: 4922 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4923 loc->wqe_last = wqe; 4924 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4925 MLX5_OPCODE_SEND, olx); 4926 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4927 mlx5_tx_dseg_ptr 4928 (txq, loc, &wqe->dseg[0], 4929 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4930 rte_pktmbuf_data_len(loc->mbuf), olx); 4931 ++txq->wqe_ci; 4932 --loc->wqe_free; 4933 /* 4934 * We should not store mbuf pointer in elts 4935 * if no inlining is configured, this is done 4936 * by calling routine in a batch copy. 4937 */ 4938 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4939 --loc->elts_free; 4940 #ifdef MLX5_PMD_SOFT_COUNTERS 4941 /* Update sent data bytes counter. */ 4942 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4943 if (MLX5_TXOFF_CONFIG(VLAN) && 4944 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4945 txq->stats.obytes += 4946 sizeof(struct rte_vlan_hdr); 4947 #endif 4948 } 4949 ++loc->pkts_sent; 4950 --pkts_n; 4951 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4952 return MLX5_TXCMP_CODE_EXIT; 4953 loc->mbuf = *pkts++; 4954 if (pkts_n > 1) 4955 rte_prefetch0(*pkts); 4956 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4957 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4958 return ret; 4959 } 4960 MLX5_ASSERT(false); 4961 } 4962 4963 static __rte_always_inline enum mlx5_txcmp_code 4964 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 4965 struct rte_mbuf **__rte_restrict pkts, 4966 unsigned int pkts_n, 4967 struct mlx5_txq_local *__rte_restrict loc, 4968 unsigned int olx) 4969 { 4970 enum mlx5_txcmp_code ret; 4971 4972 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4973 if (ret == MLX5_TXCMP_CODE_SINGLE) 4974 goto ordinary_send; 4975 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 4976 for (;;) { 4977 /* Optimize for inline/no inline eMPW send. */ 4978 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4979 mlx5_tx_burst_empw_inline 4980 (txq, pkts, pkts_n, loc, olx) : 4981 mlx5_tx_burst_empw_simple 4982 (txq, pkts, pkts_n, loc, olx); 4983 if (ret != MLX5_TXCMP_CODE_SINGLE) 4984 return ret; 4985 /* The resources to send one packet should remain. */ 4986 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4987 ordinary_send: 4988 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4989 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 4990 if (ret != MLX5_TXCMP_CODE_EMPW) 4991 return ret; 4992 /* The resources to send one packet should remain. */ 4993 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4994 } 4995 } 4996 4997 /** 4998 * DPDK Tx callback template. This is configured template 4999 * used to generate routines optimized for specified offload setup. 5000 * One of this generated functions is chosen at SQ configuration 5001 * time. 5002 * 5003 * @param txq 5004 * Generic pointer to TX queue structure. 5005 * @param[in] pkts 5006 * Packets to transmit. 5007 * @param pkts_n 5008 * Number of packets in array. 5009 * @param olx 5010 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 5011 * values. Should be static to take compile time static configuration 5012 * advantages. 5013 * 5014 * @return 5015 * Number of packets successfully transmitted (<= pkts_n). 5016 */ 5017 static __rte_always_inline uint16_t 5018 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 5019 struct rte_mbuf **__rte_restrict pkts, 5020 uint16_t pkts_n, 5021 unsigned int olx) 5022 { 5023 struct mlx5_txq_local loc; 5024 enum mlx5_txcmp_code ret; 5025 unsigned int part; 5026 5027 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5028 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5029 if (unlikely(!pkts_n)) 5030 return 0; 5031 loc.pkts_sent = 0; 5032 loc.pkts_copy = 0; 5033 loc.wqe_last = NULL; 5034 5035 send_loop: 5036 loc.pkts_loop = loc.pkts_sent; 5037 /* 5038 * Check if there are some CQEs, if any: 5039 * - process an encountered errors 5040 * - process the completed WQEs 5041 * - free related mbufs 5042 * - doorbell the NIC about processed CQEs 5043 */ 5044 rte_prefetch0(*(pkts + loc.pkts_sent)); 5045 mlx5_tx_handle_completion(txq, olx); 5046 /* 5047 * Calculate the number of available resources - elts and WQEs. 5048 * There are two possible different scenarios: 5049 * - no data inlining into WQEs, one WQEBB may contains up to 5050 * four packets, in this case elts become scarce resource 5051 * - data inlining into WQEs, one packet may require multiple 5052 * WQEBBs, the WQEs become the limiting factor. 5053 */ 5054 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5055 loc.elts_free = txq->elts_s - 5056 (uint16_t)(txq->elts_head - txq->elts_tail); 5057 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5058 loc.wqe_free = txq->wqe_s - 5059 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 5060 if (unlikely(!loc.elts_free || !loc.wqe_free)) 5061 goto burst_exit; 5062 for (;;) { 5063 /* 5064 * Fetch the packet from array. Usually this is 5065 * the first packet in series of multi/single 5066 * segment packets. 5067 */ 5068 loc.mbuf = *(pkts + loc.pkts_sent); 5069 /* Dedicated branch for multi-segment packets. */ 5070 if (MLX5_TXOFF_CONFIG(MULTI) && 5071 unlikely(NB_SEGS(loc.mbuf) > 1)) { 5072 /* 5073 * Multi-segment packet encountered. 5074 * Hardware is able to process it only 5075 * with SEND/TSO opcodes, one packet 5076 * per WQE, do it in dedicated routine. 5077 */ 5078 enter_send_multi: 5079 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 5080 part = loc.pkts_sent - loc.pkts_copy; 5081 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5082 /* 5083 * There are some single-segment mbufs not 5084 * stored in elts. The mbufs must be in the 5085 * same order as WQEs, so we must copy the 5086 * mbufs to elts here, before the coming 5087 * multi-segment packet mbufs is appended. 5088 */ 5089 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 5090 part, olx); 5091 loc.pkts_copy = loc.pkts_sent; 5092 } 5093 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5094 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 5095 if (!MLX5_TXOFF_CONFIG(INLINE)) 5096 loc.pkts_copy = loc.pkts_sent; 5097 /* 5098 * These returned code checks are supposed 5099 * to be optimized out due to routine inlining. 5100 */ 5101 if (ret == MLX5_TXCMP_CODE_EXIT) { 5102 /* 5103 * The routine returns this code when 5104 * all packets are sent or there is no 5105 * enough resources to complete request. 5106 */ 5107 break; 5108 } 5109 if (ret == MLX5_TXCMP_CODE_ERROR) { 5110 /* 5111 * The routine returns this code when 5112 * some error in the incoming packets 5113 * format occurred. 5114 */ 5115 txq->stats.oerrors++; 5116 break; 5117 } 5118 if (ret == MLX5_TXCMP_CODE_SINGLE) { 5119 /* 5120 * The single-segment packet was encountered 5121 * in the array, try to send it with the 5122 * best optimized way, possible engaging eMPW. 5123 */ 5124 goto enter_send_single; 5125 } 5126 if (MLX5_TXOFF_CONFIG(TSO) && 5127 ret == MLX5_TXCMP_CODE_TSO) { 5128 /* 5129 * The single-segment TSO packet was 5130 * encountered in the array. 5131 */ 5132 goto enter_send_tso; 5133 } 5134 /* We must not get here. Something is going wrong. */ 5135 MLX5_ASSERT(false); 5136 txq->stats.oerrors++; 5137 break; 5138 } 5139 /* Dedicated branch for single-segment TSO packets. */ 5140 if (MLX5_TXOFF_CONFIG(TSO) && 5141 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 5142 /* 5143 * TSO might require special way for inlining 5144 * (dedicated parameters) and is sent with 5145 * MLX5_OPCODE_TSO opcode only, provide this 5146 * in dedicated branch. 5147 */ 5148 enter_send_tso: 5149 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 5150 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5151 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 5152 /* 5153 * These returned code checks are supposed 5154 * to be optimized out due to routine inlining. 5155 */ 5156 if (ret == MLX5_TXCMP_CODE_EXIT) 5157 break; 5158 if (ret == MLX5_TXCMP_CODE_ERROR) { 5159 txq->stats.oerrors++; 5160 break; 5161 } 5162 if (ret == MLX5_TXCMP_CODE_SINGLE) 5163 goto enter_send_single; 5164 if (MLX5_TXOFF_CONFIG(MULTI) && 5165 ret == MLX5_TXCMP_CODE_MULTI) { 5166 /* 5167 * The multi-segment packet was 5168 * encountered in the array. 5169 */ 5170 goto enter_send_multi; 5171 } 5172 /* We must not get here. Something is going wrong. */ 5173 MLX5_ASSERT(false); 5174 txq->stats.oerrors++; 5175 break; 5176 } 5177 /* 5178 * The dedicated branch for the single-segment packets 5179 * without TSO. Often these ones can be sent using 5180 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 5181 * The routine builds the WQEs till it encounters 5182 * the TSO or multi-segment packet (in case if these 5183 * offloads are requested at SQ configuration time). 5184 */ 5185 enter_send_single: 5186 MLX5_ASSERT(pkts_n > loc.pkts_sent); 5187 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 5188 /* 5189 * These returned code checks are supposed 5190 * to be optimized out due to routine inlining. 5191 */ 5192 if (ret == MLX5_TXCMP_CODE_EXIT) 5193 break; 5194 if (ret == MLX5_TXCMP_CODE_ERROR) { 5195 txq->stats.oerrors++; 5196 break; 5197 } 5198 if (MLX5_TXOFF_CONFIG(MULTI) && 5199 ret == MLX5_TXCMP_CODE_MULTI) { 5200 /* 5201 * The multi-segment packet was 5202 * encountered in the array. 5203 */ 5204 goto enter_send_multi; 5205 } 5206 if (MLX5_TXOFF_CONFIG(TSO) && 5207 ret == MLX5_TXCMP_CODE_TSO) { 5208 /* 5209 * The single-segment TSO packet was 5210 * encountered in the array. 5211 */ 5212 goto enter_send_tso; 5213 } 5214 /* We must not get here. Something is going wrong. */ 5215 MLX5_ASSERT(false); 5216 txq->stats.oerrors++; 5217 break; 5218 } 5219 /* 5220 * Main Tx loop is completed, do the rest: 5221 * - set completion request if thresholds are reached 5222 * - doorbell the hardware 5223 * - copy the rest of mbufs to elts (if any) 5224 */ 5225 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 5226 loc.pkts_sent >= loc.pkts_copy); 5227 /* Take a shortcut if nothing is sent. */ 5228 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 5229 goto burst_exit; 5230 /* Request CQE generation if limits are reached. */ 5231 mlx5_tx_request_completion(txq, &loc, olx); 5232 /* 5233 * Ring QP doorbell immediately after WQE building completion 5234 * to improve latencies. The pure software related data treatment 5235 * can be completed after doorbell. Tx CQEs for this SQ are 5236 * processed in this thread only by the polling. 5237 * 5238 * The rdma core library can map doorbell register in two ways, 5239 * depending on the environment variable "MLX5_SHUT_UP_BF": 5240 * 5241 * - as regular cached memory, the variable is either missing or 5242 * set to zero. This type of mapping may cause the significant 5243 * doorbell register writing latency and requires explicit 5244 * memory write barrier to mitigate this issue and prevent 5245 * write combining. 5246 * 5247 * - as non-cached memory, the variable is present and set to 5248 * not "0" value. This type of mapping may cause performance 5249 * impact under heavy loading conditions but the explicit write 5250 * memory barrier is not required and it may improve core 5251 * performance. 5252 * 5253 * - the legacy behaviour (prior 19.08 release) was to use some 5254 * heuristics to decide whether write memory barrier should 5255 * be performed. This behavior is supported with specifying 5256 * tx_db_nc=2, write barrier is skipped if application 5257 * provides the full recommended burst of packets, it 5258 * supposes the next packets are coming and the write barrier 5259 * will be issued on the next burst (after descriptor writing, 5260 * at least). 5261 */ 5262 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 5263 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 5264 /* Not all of the mbufs may be stored into elts yet. */ 5265 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 5266 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5267 /* 5268 * There are some single-segment mbufs not stored in elts. 5269 * It can be only if the last packet was single-segment. 5270 * The copying is gathered into one place due to it is 5271 * a good opportunity to optimize that with SIMD. 5272 * Unfortunately if inlining is enabled the gaps in 5273 * pointer array may happen due to early freeing of the 5274 * inlined mbufs. 5275 */ 5276 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 5277 loc.pkts_copy = loc.pkts_sent; 5278 } 5279 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5280 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5281 if (pkts_n > loc.pkts_sent) { 5282 /* 5283 * If burst size is large there might be no enough CQE 5284 * fetched from completion queue and no enough resources 5285 * freed to send all the packets. 5286 */ 5287 goto send_loop; 5288 } 5289 burst_exit: 5290 #ifdef MLX5_PMD_SOFT_COUNTERS 5291 /* Increment sent packets counter. */ 5292 txq->stats.opackets += loc.pkts_sent; 5293 #endif 5294 return loc.pkts_sent; 5295 } 5296 5297 /* Generate routines with Enhanced Multi-Packet Write support. */ 5298 MLX5_TXOFF_DECL(full_empw, 5299 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 5300 5301 MLX5_TXOFF_DECL(none_empw, 5302 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5303 5304 MLX5_TXOFF_DECL(md_empw, 5305 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5306 5307 MLX5_TXOFF_DECL(mt_empw, 5308 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5309 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5310 5311 MLX5_TXOFF_DECL(mtsc_empw, 5312 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5313 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5314 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5315 5316 MLX5_TXOFF_DECL(mti_empw, 5317 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5318 MLX5_TXOFF_CONFIG_INLINE | 5319 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5320 5321 MLX5_TXOFF_DECL(mtv_empw, 5322 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5323 MLX5_TXOFF_CONFIG_VLAN | 5324 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5325 5326 MLX5_TXOFF_DECL(mtiv_empw, 5327 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5328 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5329 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5330 5331 MLX5_TXOFF_DECL(sc_empw, 5332 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5333 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5334 5335 MLX5_TXOFF_DECL(sci_empw, 5336 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5337 MLX5_TXOFF_CONFIG_INLINE | 5338 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5339 5340 MLX5_TXOFF_DECL(scv_empw, 5341 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5342 MLX5_TXOFF_CONFIG_VLAN | 5343 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5344 5345 MLX5_TXOFF_DECL(sciv_empw, 5346 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5347 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5348 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5349 5350 MLX5_TXOFF_DECL(i_empw, 5351 MLX5_TXOFF_CONFIG_INLINE | 5352 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5353 5354 MLX5_TXOFF_DECL(v_empw, 5355 MLX5_TXOFF_CONFIG_VLAN | 5356 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5357 5358 MLX5_TXOFF_DECL(iv_empw, 5359 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5360 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5361 5362 /* Generate routines without Enhanced Multi-Packet Write support. */ 5363 MLX5_TXOFF_DECL(full, 5364 MLX5_TXOFF_CONFIG_FULL) 5365 5366 MLX5_TXOFF_DECL(none, 5367 MLX5_TXOFF_CONFIG_NONE) 5368 5369 MLX5_TXOFF_DECL(md, 5370 MLX5_TXOFF_CONFIG_METADATA) 5371 5372 MLX5_TXOFF_DECL(mt, 5373 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5374 MLX5_TXOFF_CONFIG_METADATA) 5375 5376 MLX5_TXOFF_DECL(mtsc, 5377 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5378 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5379 MLX5_TXOFF_CONFIG_METADATA) 5380 5381 MLX5_TXOFF_DECL(mti, 5382 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5383 MLX5_TXOFF_CONFIG_INLINE | 5384 MLX5_TXOFF_CONFIG_METADATA) 5385 5386 5387 MLX5_TXOFF_DECL(mtv, 5388 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5389 MLX5_TXOFF_CONFIG_VLAN | 5390 MLX5_TXOFF_CONFIG_METADATA) 5391 5392 5393 MLX5_TXOFF_DECL(mtiv, 5394 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5395 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5396 MLX5_TXOFF_CONFIG_METADATA) 5397 5398 MLX5_TXOFF_DECL(sc, 5399 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5400 MLX5_TXOFF_CONFIG_METADATA) 5401 5402 MLX5_TXOFF_DECL(sci, 5403 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5404 MLX5_TXOFF_CONFIG_INLINE | 5405 MLX5_TXOFF_CONFIG_METADATA) 5406 5407 5408 MLX5_TXOFF_DECL(scv, 5409 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5410 MLX5_TXOFF_CONFIG_VLAN | 5411 MLX5_TXOFF_CONFIG_METADATA) 5412 5413 5414 MLX5_TXOFF_DECL(sciv, 5415 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5416 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5417 MLX5_TXOFF_CONFIG_METADATA) 5418 5419 MLX5_TXOFF_DECL(i, 5420 MLX5_TXOFF_CONFIG_INLINE | 5421 MLX5_TXOFF_CONFIG_METADATA) 5422 5423 MLX5_TXOFF_DECL(v, 5424 MLX5_TXOFF_CONFIG_VLAN | 5425 MLX5_TXOFF_CONFIG_METADATA) 5426 5427 MLX5_TXOFF_DECL(iv, 5428 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5429 MLX5_TXOFF_CONFIG_METADATA) 5430 5431 /* Generate routines with timestamp scheduling. */ 5432 MLX5_TXOFF_DECL(full_ts_nompw, 5433 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5434 5435 MLX5_TXOFF_DECL(full_ts_nompwi, 5436 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5437 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5438 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5439 MLX5_TXOFF_CONFIG_TXPP) 5440 5441 MLX5_TXOFF_DECL(full_ts, 5442 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5443 MLX5_TXOFF_CONFIG_EMPW) 5444 5445 MLX5_TXOFF_DECL(full_ts_noi, 5446 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5447 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5448 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5449 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5450 5451 MLX5_TXOFF_DECL(none_ts, 5452 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5453 MLX5_TXOFF_CONFIG_EMPW) 5454 5455 MLX5_TXOFF_DECL(mdi_ts, 5456 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5457 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5458 5459 MLX5_TXOFF_DECL(mti_ts, 5460 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5461 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5462 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5463 5464 MLX5_TXOFF_DECL(mtiv_ts, 5465 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5466 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5467 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5468 MLX5_TXOFF_CONFIG_EMPW) 5469 5470 /* 5471 * Generate routines with Legacy Multi-Packet Write support. 5472 * This mode is supported by ConnectX-4 Lx only and imposes 5473 * offload limitations, not supported: 5474 * - ACL/Flows (metadata are becoming meaningless) 5475 * - WQE Inline headers 5476 * - SRIOV (E-Switch offloads) 5477 * - VLAN insertion 5478 * - tunnel encapsulation/decapsulation 5479 * - TSO 5480 */ 5481 MLX5_TXOFF_DECL(none_mpw, 5482 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5483 MLX5_TXOFF_CONFIG_MPW) 5484 5485 MLX5_TXOFF_DECL(mci_mpw, 5486 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5487 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5488 MLX5_TXOFF_CONFIG_MPW) 5489 5490 MLX5_TXOFF_DECL(mc_mpw, 5491 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5492 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5493 5494 MLX5_TXOFF_DECL(i_mpw, 5495 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5496 MLX5_TXOFF_CONFIG_MPW) 5497 5498 /* 5499 * Array of declared and compiled Tx burst function and corresponding 5500 * supported offloads set. The array is used to select the Tx burst 5501 * function for specified offloads set at Tx queue configuration time. 5502 */ 5503 const struct { 5504 eth_tx_burst_t func; 5505 unsigned int olx; 5506 } txoff_func[] = { 5507 MLX5_TXOFF_INFO(full_empw, 5508 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5509 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5510 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5511 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5512 5513 MLX5_TXOFF_INFO(none_empw, 5514 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5515 5516 MLX5_TXOFF_INFO(md_empw, 5517 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5518 5519 MLX5_TXOFF_INFO(mt_empw, 5520 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5521 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5522 5523 MLX5_TXOFF_INFO(mtsc_empw, 5524 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5525 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5526 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5527 5528 MLX5_TXOFF_INFO(mti_empw, 5529 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5530 MLX5_TXOFF_CONFIG_INLINE | 5531 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5532 5533 MLX5_TXOFF_INFO(mtv_empw, 5534 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5535 MLX5_TXOFF_CONFIG_VLAN | 5536 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5537 5538 MLX5_TXOFF_INFO(mtiv_empw, 5539 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5540 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5541 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5542 5543 MLX5_TXOFF_INFO(sc_empw, 5544 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5545 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5546 5547 MLX5_TXOFF_INFO(sci_empw, 5548 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5549 MLX5_TXOFF_CONFIG_INLINE | 5550 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5551 5552 MLX5_TXOFF_INFO(scv_empw, 5553 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5554 MLX5_TXOFF_CONFIG_VLAN | 5555 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5556 5557 MLX5_TXOFF_INFO(sciv_empw, 5558 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5559 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5560 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5561 5562 MLX5_TXOFF_INFO(i_empw, 5563 MLX5_TXOFF_CONFIG_INLINE | 5564 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5565 5566 MLX5_TXOFF_INFO(v_empw, 5567 MLX5_TXOFF_CONFIG_VLAN | 5568 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5569 5570 MLX5_TXOFF_INFO(iv_empw, 5571 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5572 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5573 5574 MLX5_TXOFF_INFO(full_ts_nompw, 5575 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5576 5577 MLX5_TXOFF_INFO(full_ts_nompwi, 5578 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5579 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5580 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5581 MLX5_TXOFF_CONFIG_TXPP) 5582 5583 MLX5_TXOFF_INFO(full_ts, 5584 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5585 MLX5_TXOFF_CONFIG_EMPW) 5586 5587 MLX5_TXOFF_INFO(full_ts_noi, 5588 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5589 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5590 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5591 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5592 5593 MLX5_TXOFF_INFO(none_ts, 5594 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5595 MLX5_TXOFF_CONFIG_EMPW) 5596 5597 MLX5_TXOFF_INFO(mdi_ts, 5598 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5599 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5600 5601 MLX5_TXOFF_INFO(mti_ts, 5602 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5603 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5604 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5605 5606 MLX5_TXOFF_INFO(mtiv_ts, 5607 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5608 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5609 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5610 MLX5_TXOFF_CONFIG_EMPW) 5611 5612 MLX5_TXOFF_INFO(full, 5613 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5614 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5615 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5616 MLX5_TXOFF_CONFIG_METADATA) 5617 5618 MLX5_TXOFF_INFO(none, 5619 MLX5_TXOFF_CONFIG_NONE) 5620 5621 MLX5_TXOFF_INFO(md, 5622 MLX5_TXOFF_CONFIG_METADATA) 5623 5624 MLX5_TXOFF_INFO(mt, 5625 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5626 MLX5_TXOFF_CONFIG_METADATA) 5627 5628 MLX5_TXOFF_INFO(mtsc, 5629 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5630 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5631 MLX5_TXOFF_CONFIG_METADATA) 5632 5633 MLX5_TXOFF_INFO(mti, 5634 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5635 MLX5_TXOFF_CONFIG_INLINE | 5636 MLX5_TXOFF_CONFIG_METADATA) 5637 5638 MLX5_TXOFF_INFO(mtv, 5639 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5640 MLX5_TXOFF_CONFIG_VLAN | 5641 MLX5_TXOFF_CONFIG_METADATA) 5642 5643 MLX5_TXOFF_INFO(mtiv, 5644 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5645 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5646 MLX5_TXOFF_CONFIG_METADATA) 5647 5648 MLX5_TXOFF_INFO(sc, 5649 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5650 MLX5_TXOFF_CONFIG_METADATA) 5651 5652 MLX5_TXOFF_INFO(sci, 5653 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5654 MLX5_TXOFF_CONFIG_INLINE | 5655 MLX5_TXOFF_CONFIG_METADATA) 5656 5657 MLX5_TXOFF_INFO(scv, 5658 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5659 MLX5_TXOFF_CONFIG_VLAN | 5660 MLX5_TXOFF_CONFIG_METADATA) 5661 5662 MLX5_TXOFF_INFO(sciv, 5663 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5664 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5665 MLX5_TXOFF_CONFIG_METADATA) 5666 5667 MLX5_TXOFF_INFO(i, 5668 MLX5_TXOFF_CONFIG_INLINE | 5669 MLX5_TXOFF_CONFIG_METADATA) 5670 5671 MLX5_TXOFF_INFO(v, 5672 MLX5_TXOFF_CONFIG_VLAN | 5673 MLX5_TXOFF_CONFIG_METADATA) 5674 5675 MLX5_TXOFF_INFO(iv, 5676 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5677 MLX5_TXOFF_CONFIG_METADATA) 5678 5679 MLX5_TXOFF_INFO(none_mpw, 5680 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5681 MLX5_TXOFF_CONFIG_MPW) 5682 5683 MLX5_TXOFF_INFO(mci_mpw, 5684 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5685 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5686 MLX5_TXOFF_CONFIG_MPW) 5687 5688 MLX5_TXOFF_INFO(mc_mpw, 5689 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5690 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5691 5692 MLX5_TXOFF_INFO(i_mpw, 5693 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5694 MLX5_TXOFF_CONFIG_MPW) 5695 }; 5696 5697 /** 5698 * Configure the Tx function to use. The routine checks configured 5699 * Tx offloads for the device and selects appropriate Tx burst 5700 * routine. There are multiple Tx burst routines compiled from 5701 * the same template in the most optimal way for the dedicated 5702 * Tx offloads set. 5703 * 5704 * @param dev 5705 * Pointer to private data structure. 5706 * 5707 * @return 5708 * Pointer to selected Tx burst function. 5709 */ 5710 eth_tx_burst_t 5711 mlx5_select_tx_function(struct rte_eth_dev *dev) 5712 { 5713 struct mlx5_priv *priv = dev->data->dev_private; 5714 struct mlx5_dev_config *config = &priv->config; 5715 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5716 unsigned int diff = 0, olx = 0, i, m; 5717 5718 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 5719 MLX5_DSEG_MAX, "invalid WQE max size"); 5720 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 5721 "invalid WQE Control Segment size"); 5722 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 5723 "invalid WQE Ethernet Segment size"); 5724 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 5725 "invalid WQE Data Segment size"); 5726 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 5727 "invalid WQE size"); 5728 MLX5_ASSERT(priv); 5729 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5730 /* We should support Multi-Segment Packets. */ 5731 olx |= MLX5_TXOFF_CONFIG_MULTI; 5732 } 5733 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5734 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5735 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5736 DEV_TX_OFFLOAD_IP_TNL_TSO | 5737 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5738 /* We should support TCP Send Offload. */ 5739 olx |= MLX5_TXOFF_CONFIG_TSO; 5740 } 5741 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5742 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5743 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5744 /* We should support Software Parser for Tunnels. */ 5745 olx |= MLX5_TXOFF_CONFIG_SWP; 5746 } 5747 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5748 DEV_TX_OFFLOAD_UDP_CKSUM | 5749 DEV_TX_OFFLOAD_TCP_CKSUM | 5750 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5751 /* We should support IP/TCP/UDP Checksums. */ 5752 olx |= MLX5_TXOFF_CONFIG_CSUM; 5753 } 5754 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5755 /* We should support VLAN insertion. */ 5756 olx |= MLX5_TXOFF_CONFIG_VLAN; 5757 } 5758 if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && 5759 rte_mbuf_dynflag_lookup 5760 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) > 0 && 5761 rte_mbuf_dynfield_lookup 5762 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) > 0) { 5763 /* Offload configured, dynamic entities registered. */ 5764 olx |= MLX5_TXOFF_CONFIG_TXPP; 5765 } 5766 if (priv->txqs_n && (*priv->txqs)[0]) { 5767 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5768 5769 if (txd->inlen_send) { 5770 /* 5771 * Check the data inline requirements. Data inline 5772 * is enabled on per device basis, we can check 5773 * the first Tx queue only. 5774 * 5775 * If device does not support VLAN insertion in WQE 5776 * and some queues are requested to perform VLAN 5777 * insertion offload than inline must be enabled. 5778 */ 5779 olx |= MLX5_TXOFF_CONFIG_INLINE; 5780 } 5781 } 5782 if (config->mps == MLX5_MPW_ENHANCED && 5783 config->txq_inline_min <= 0) { 5784 /* 5785 * The NIC supports Enhanced Multi-Packet Write 5786 * and does not require minimal inline data. 5787 */ 5788 olx |= MLX5_TXOFF_CONFIG_EMPW; 5789 } 5790 if (rte_flow_dynf_metadata_avail()) { 5791 /* We should support Flow metadata. */ 5792 olx |= MLX5_TXOFF_CONFIG_METADATA; 5793 } 5794 if (config->mps == MLX5_MPW) { 5795 /* 5796 * The NIC supports Legacy Multi-Packet Write. 5797 * The MLX5_TXOFF_CONFIG_MPW controls the 5798 * descriptor building method in combination 5799 * with MLX5_TXOFF_CONFIG_EMPW. 5800 */ 5801 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5802 MLX5_TXOFF_CONFIG_SWP | 5803 MLX5_TXOFF_CONFIG_VLAN | 5804 MLX5_TXOFF_CONFIG_METADATA))) 5805 olx |= MLX5_TXOFF_CONFIG_EMPW | 5806 MLX5_TXOFF_CONFIG_MPW; 5807 } 5808 /* 5809 * Scan the routines table to find the minimal 5810 * satisfying routine with requested offloads. 5811 */ 5812 m = RTE_DIM(txoff_func); 5813 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5814 unsigned int tmp; 5815 5816 tmp = txoff_func[i].olx; 5817 if (tmp == olx) { 5818 /* Meets requested offloads exactly.*/ 5819 m = i; 5820 break; 5821 } 5822 if ((tmp & olx) != olx) { 5823 /* Does not meet requested offloads at all. */ 5824 continue; 5825 } 5826 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) 5827 /* Do not enable legacy MPW if not configured. */ 5828 continue; 5829 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5830 /* Do not enable eMPW if not configured. */ 5831 continue; 5832 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5833 /* Do not enable inlining if not configured. */ 5834 continue; 5835 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) 5836 /* Do not enable scheduling if not configured. */ 5837 continue; 5838 /* 5839 * Some routine meets the requirements. 5840 * Check whether it has minimal amount 5841 * of not requested offloads. 5842 */ 5843 tmp = __builtin_popcountl(tmp & ~olx); 5844 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5845 /* First or better match, save and continue. */ 5846 m = i; 5847 diff = tmp; 5848 continue; 5849 } 5850 if (tmp == diff) { 5851 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5852 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5853 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5854 /* Lighter not requested offload. */ 5855 m = i; 5856 } 5857 } 5858 } 5859 if (m >= RTE_DIM(txoff_func)) { 5860 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5861 " for requested offloads %04X", 5862 dev->data->port_id, olx); 5863 return NULL; 5864 } 5865 DRV_LOG(DEBUG, "port %u has selected Tx function" 5866 " supporting offloads %04X/%04X", 5867 dev->data->port_id, olx, txoff_func[m].olx); 5868 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5869 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5870 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5871 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5872 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5873 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5874 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5875 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5876 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5877 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5878 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5879 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5880 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5881 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5882 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) 5883 DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); 5884 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5885 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5886 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5887 else 5888 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5889 } 5890 return txoff_func[m].func; 5891 } 5892 5893 /** 5894 * DPDK callback to get the TX queue information 5895 * 5896 * @param dev 5897 * Pointer to the device structure. 5898 * 5899 * @param tx_queue_id 5900 * Tx queue identificator. 5901 * 5902 * @param qinfo 5903 * Pointer to the TX queue information structure. 5904 * 5905 * @return 5906 * None. 5907 */ 5908 5909 void 5910 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 5911 struct rte_eth_txq_info *qinfo) 5912 { 5913 struct mlx5_priv *priv = dev->data->dev_private; 5914 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5915 struct mlx5_txq_ctrl *txq_ctrl = 5916 container_of(txq, struct mlx5_txq_ctrl, txq); 5917 5918 if (!txq) 5919 return; 5920 qinfo->nb_desc = txq->elts_s; 5921 qinfo->conf.tx_thresh.pthresh = 0; 5922 qinfo->conf.tx_thresh.hthresh = 0; 5923 qinfo->conf.tx_thresh.wthresh = 0; 5924 qinfo->conf.tx_rs_thresh = 0; 5925 qinfo->conf.tx_free_thresh = 0; 5926 qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; 5927 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 5928 } 5929 5930 /** 5931 * DPDK callback to get the TX packet burst mode information 5932 * 5933 * @param dev 5934 * Pointer to the device structure. 5935 * 5936 * @param tx_queue_id 5937 * Tx queue identificatior. 5938 * 5939 * @param mode 5940 * Pointer to the burts mode information. 5941 * 5942 * @return 5943 * 0 as success, -EINVAL as failure. 5944 */ 5945 5946 int 5947 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, 5948 uint16_t tx_queue_id __rte_unused, 5949 struct rte_eth_burst_mode *mode) 5950 { 5951 eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; 5952 unsigned int i, olx; 5953 5954 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5955 if (pkt_burst == txoff_func[i].func) { 5956 olx = txoff_func[i].olx; 5957 snprintf(mode->info, sizeof(mode->info), 5958 "%s%s%s%s%s%s%s%s%s", 5959 (olx & MLX5_TXOFF_CONFIG_EMPW) ? 5960 ((olx & MLX5_TXOFF_CONFIG_MPW) ? 5961 "Legacy MPW" : "Enhanced MPW") : "No MPW", 5962 (olx & MLX5_TXOFF_CONFIG_MULTI) ? 5963 " + MULTI" : "", 5964 (olx & MLX5_TXOFF_CONFIG_TSO) ? 5965 " + TSO" : "", 5966 (olx & MLX5_TXOFF_CONFIG_SWP) ? 5967 " + SWP" : "", 5968 (olx & MLX5_TXOFF_CONFIG_CSUM) ? 5969 " + CSUM" : "", 5970 (olx & MLX5_TXOFF_CONFIG_INLINE) ? 5971 " + INLINE" : "", 5972 (olx & MLX5_TXOFF_CONFIG_VLAN) ? 5973 " + VLAN" : "", 5974 (olx & MLX5_TXOFF_CONFIG_METADATA) ? 5975 " + METADATA" : "", 5976 (olx & MLX5_TXOFF_CONFIG_TXPP) ? 5977 " + TXPP" : ""); 5978 return 0; 5979 } 5980 } 5981 return -EINVAL; 5982 } 5983