1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <assert.h> 7 #include <stdint.h> 8 #include <string.h> 9 #include <stdlib.h> 10 11 /* Verbs header. */ 12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 13 #ifdef PEDANTIC 14 #pragma GCC diagnostic ignored "-Wpedantic" 15 #endif 16 #include <infiniband/verbs.h> 17 #include <infiniband/mlx5dv.h> 18 #ifdef PEDANTIC 19 #pragma GCC diagnostic error "-Wpedantic" 20 #endif 21 22 #include <rte_mbuf.h> 23 #include <rte_mempool.h> 24 #include <rte_prefetch.h> 25 #include <rte_common.h> 26 #include <rte_branch_prediction.h> 27 #include <rte_ether.h> 28 #include <rte_cycles.h> 29 30 #include "mlx5.h" 31 #include "mlx5_utils.h" 32 #include "mlx5_rxtx.h" 33 #include "mlx5_autoconf.h" 34 #include "mlx5_defs.h" 35 #include "mlx5_prm.h" 36 37 /* TX burst subroutines return codes. */ 38 enum mlx5_txcmp_code { 39 MLX5_TXCMP_CODE_EXIT = 0, 40 MLX5_TXCMP_CODE_ERROR, 41 MLX5_TXCMP_CODE_SINGLE, 42 MLX5_TXCMP_CODE_MULTI, 43 MLX5_TXCMP_CODE_TSO, 44 MLX5_TXCMP_CODE_EMPW, 45 }; 46 47 /* 48 * These defines are used to configure Tx burst routine option set 49 * supported at compile time. The not specified options are optimized out 50 * out due to if conditions can be explicitly calculated at compile time. 51 * The offloads with bigger runtime check (require more CPU cycles to 52 * skip) overhead should have the bigger index - this is needed to 53 * select the better matching routine function if no exact match and 54 * some offloads are not actually requested. 55 */ 56 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 57 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 58 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 59 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 60 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 61 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 62 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 63 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 64 65 /* The most common offloads groups. */ 66 #define MLX5_TXOFF_CONFIG_NONE 0 67 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 68 MLX5_TXOFF_CONFIG_TSO | \ 69 MLX5_TXOFF_CONFIG_SWP | \ 70 MLX5_TXOFF_CONFIG_CSUM | \ 71 MLX5_TXOFF_CONFIG_INLINE | \ 72 MLX5_TXOFF_CONFIG_VLAN | \ 73 MLX5_TXOFF_CONFIG_METADATA) 74 75 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 76 77 #define MLX5_TXOFF_DECL(func, olx) \ 78 static uint16_t mlx5_tx_burst_##func(void *txq, \ 79 struct rte_mbuf **pkts, \ 80 uint16_t pkts_n) \ 81 { \ 82 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 83 pkts, pkts_n, (olx)); \ 84 } 85 86 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 87 88 static __rte_always_inline uint32_t 89 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 90 91 static __rte_always_inline int 92 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 93 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 94 95 static __rte_always_inline uint32_t 96 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 97 98 static __rte_always_inline void 99 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 100 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); 101 102 static __rte_always_inline void 103 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 104 const unsigned int strd_n); 105 106 static int 107 mlx5_queue_state_modify(struct rte_eth_dev *dev, 108 struct mlx5_mp_arg_queue_state_modify *sm); 109 110 static inline void 111 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, 112 volatile struct mlx5_cqe *restrict cqe, 113 uint32_t phcsum); 114 115 static inline void 116 mlx5_lro_update_hdr(uint8_t *restrict padd, 117 volatile struct mlx5_cqe *restrict cqe, 118 uint32_t len); 119 120 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 121 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 122 }; 123 124 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 125 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 126 127 /** 128 * Build a table to translate Rx completion flags to packet type. 129 * 130 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 131 */ 132 void 133 mlx5_set_ptype_table(void) 134 { 135 unsigned int i; 136 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 137 138 /* Last entry must not be overwritten, reserved for errored packet. */ 139 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 140 (*p)[i] = RTE_PTYPE_UNKNOWN; 141 /* 142 * The index to the array should have: 143 * bit[1:0] = l3_hdr_type 144 * bit[4:2] = l4_hdr_type 145 * bit[5] = ip_frag 146 * bit[6] = tunneled 147 * bit[7] = outer_l3_type 148 */ 149 /* L2 */ 150 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 151 /* L3 */ 152 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 153 RTE_PTYPE_L4_NONFRAG; 154 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 155 RTE_PTYPE_L4_NONFRAG; 156 /* Fragmented */ 157 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 158 RTE_PTYPE_L4_FRAG; 159 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 160 RTE_PTYPE_L4_FRAG; 161 /* TCP */ 162 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 163 RTE_PTYPE_L4_TCP; 164 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 165 RTE_PTYPE_L4_TCP; 166 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 167 RTE_PTYPE_L4_TCP; 168 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 169 RTE_PTYPE_L4_TCP; 170 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 171 RTE_PTYPE_L4_TCP; 172 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 173 RTE_PTYPE_L4_TCP; 174 /* UDP */ 175 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 176 RTE_PTYPE_L4_UDP; 177 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 178 RTE_PTYPE_L4_UDP; 179 /* Repeat with outer_l3_type being set. Just in case. */ 180 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 181 RTE_PTYPE_L4_NONFRAG; 182 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 183 RTE_PTYPE_L4_NONFRAG; 184 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 185 RTE_PTYPE_L4_FRAG; 186 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 187 RTE_PTYPE_L4_FRAG; 188 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 189 RTE_PTYPE_L4_TCP; 190 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 191 RTE_PTYPE_L4_TCP; 192 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 193 RTE_PTYPE_L4_TCP; 194 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 195 RTE_PTYPE_L4_TCP; 196 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 197 RTE_PTYPE_L4_TCP; 198 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 199 RTE_PTYPE_L4_TCP; 200 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 201 RTE_PTYPE_L4_UDP; 202 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 203 RTE_PTYPE_L4_UDP; 204 /* Tunneled - L3 */ 205 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 206 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 207 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 208 RTE_PTYPE_INNER_L4_NONFRAG; 209 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 210 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 211 RTE_PTYPE_INNER_L4_NONFRAG; 212 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 213 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 214 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 215 RTE_PTYPE_INNER_L4_NONFRAG; 216 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 217 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 218 RTE_PTYPE_INNER_L4_NONFRAG; 219 /* Tunneled - Fragmented */ 220 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 221 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 222 RTE_PTYPE_INNER_L4_FRAG; 223 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 224 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 225 RTE_PTYPE_INNER_L4_FRAG; 226 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 227 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 228 RTE_PTYPE_INNER_L4_FRAG; 229 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 230 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 231 RTE_PTYPE_INNER_L4_FRAG; 232 /* Tunneled - TCP */ 233 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 234 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 235 RTE_PTYPE_INNER_L4_TCP; 236 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 237 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 238 RTE_PTYPE_INNER_L4_TCP; 239 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 240 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 241 RTE_PTYPE_INNER_L4_TCP; 242 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 243 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 244 RTE_PTYPE_INNER_L4_TCP; 245 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 246 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 247 RTE_PTYPE_INNER_L4_TCP; 248 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 249 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 250 RTE_PTYPE_INNER_L4_TCP; 251 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 252 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 253 RTE_PTYPE_INNER_L4_TCP; 254 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 255 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 256 RTE_PTYPE_INNER_L4_TCP; 257 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 258 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 259 RTE_PTYPE_INNER_L4_TCP; 260 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 262 RTE_PTYPE_INNER_L4_TCP; 263 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 265 RTE_PTYPE_INNER_L4_TCP; 266 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 268 RTE_PTYPE_INNER_L4_TCP; 269 /* Tunneled - UDP */ 270 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 272 RTE_PTYPE_INNER_L4_UDP; 273 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 275 RTE_PTYPE_INNER_L4_UDP; 276 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 277 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 278 RTE_PTYPE_INNER_L4_UDP; 279 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 280 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 281 RTE_PTYPE_INNER_L4_UDP; 282 } 283 284 /** 285 * Build a table to translate packet to checksum type of Verbs. 286 */ 287 void 288 mlx5_set_cksum_table(void) 289 { 290 unsigned int i; 291 uint8_t v; 292 293 /* 294 * The index should have: 295 * bit[0] = PKT_TX_TCP_SEG 296 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 297 * bit[4] = PKT_TX_IP_CKSUM 298 * bit[8] = PKT_TX_OUTER_IP_CKSUM 299 * bit[9] = tunnel 300 */ 301 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 302 v = 0; 303 if (i & (1 << 9)) { 304 /* Tunneled packet. */ 305 if (i & (1 << 8)) /* Outer IP. */ 306 v |= MLX5_ETH_WQE_L3_CSUM; 307 if (i & (1 << 4)) /* Inner IP. */ 308 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 309 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 310 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 311 } else { 312 /* No tunnel. */ 313 if (i & (1 << 4)) /* IP. */ 314 v |= MLX5_ETH_WQE_L3_CSUM; 315 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 316 v |= MLX5_ETH_WQE_L4_CSUM; 317 } 318 mlx5_cksum_table[i] = v; 319 } 320 } 321 322 /** 323 * Build a table to translate packet type of mbuf to SWP type of Verbs. 324 */ 325 void 326 mlx5_set_swp_types_table(void) 327 { 328 unsigned int i; 329 uint8_t v; 330 331 /* 332 * The index should have: 333 * bit[0:1] = PKT_TX_L4_MASK 334 * bit[4] = PKT_TX_IPV6 335 * bit[8] = PKT_TX_OUTER_IPV6 336 * bit[9] = PKT_TX_OUTER_UDP 337 */ 338 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 339 v = 0; 340 if (i & (1 << 8)) 341 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 342 if (i & (1 << 9)) 343 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 344 if (i & (1 << 4)) 345 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 346 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 347 v |= MLX5_ETH_WQE_L4_INNER_UDP; 348 mlx5_swp_types_table[i] = v; 349 } 350 } 351 352 /** 353 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 354 * Flags must be preliminary initialized to zero. 355 * 356 * @param loc 357 * Pointer to burst routine local context. 358 * @param swp_flags 359 * Pointer to store Software Parser flags 360 * @param olx 361 * Configured Tx offloads mask. It is fully defined at 362 * compile time and may be used for optimization. 363 * 364 * @return 365 * Software Parser offsets packed in dword. 366 * Software Parser flags are set by pointer. 367 */ 368 static __rte_always_inline uint32_t 369 txq_mbuf_to_swp(struct mlx5_txq_local *restrict loc, 370 uint8_t *swp_flags, 371 unsigned int olx) 372 { 373 uint64_t ol, tunnel; 374 unsigned int idx, off; 375 uint32_t set; 376 377 if (!MLX5_TXOFF_CONFIG(SWP)) 378 return 0; 379 ol = loc->mbuf->ol_flags; 380 tunnel = ol & PKT_TX_TUNNEL_MASK; 381 /* 382 * Check whether Software Parser is required. 383 * Only customized tunnels may ask for. 384 */ 385 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 386 return 0; 387 /* 388 * The index should have: 389 * bit[0:1] = PKT_TX_L4_MASK 390 * bit[4] = PKT_TX_IPV6 391 * bit[8] = PKT_TX_OUTER_IPV6 392 * bit[9] = PKT_TX_OUTER_UDP 393 */ 394 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 395 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 396 *swp_flags = mlx5_swp_types_table[idx]; 397 /* 398 * Set offsets for SW parser. Since ConnectX-5, SW parser just 399 * complements HW parser. SW parser starts to engage only if HW parser 400 * can't reach a header. For the older devices, HW parser will not kick 401 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 402 * should be set regardless of HW offload. 403 */ 404 off = loc->mbuf->outer_l2_len; 405 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 406 off += sizeof(struct rte_vlan_hdr); 407 set = (off >> 1) << 8; /* Outer L3 offset. */ 408 off += loc->mbuf->outer_l3_len; 409 if (tunnel == PKT_TX_TUNNEL_UDP) 410 set |= off >> 1; /* Outer L4 offset. */ 411 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 412 const uint64_t csum = ol & PKT_TX_L4_MASK; 413 off += loc->mbuf->l2_len; 414 set |= (off >> 1) << 24; /* Inner L3 offset. */ 415 if (csum == PKT_TX_TCP_CKSUM || 416 csum == PKT_TX_UDP_CKSUM || 417 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 418 off += loc->mbuf->l3_len; 419 set |= (off >> 1) << 16; /* Inner L4 offset. */ 420 } 421 } 422 set = rte_cpu_to_le_32(set); 423 return set; 424 } 425 426 /** 427 * Convert the Checksum offloads to Verbs. 428 * 429 * @param buf 430 * Pointer to the mbuf. 431 * 432 * @return 433 * Converted checksum flags. 434 */ 435 static __rte_always_inline uint8_t 436 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 437 { 438 uint32_t idx; 439 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 440 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 441 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 442 443 /* 444 * The index should have: 445 * bit[0] = PKT_TX_TCP_SEG 446 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 447 * bit[4] = PKT_TX_IP_CKSUM 448 * bit[8] = PKT_TX_OUTER_IP_CKSUM 449 * bit[9] = tunnel 450 */ 451 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 452 return mlx5_cksum_table[idx]; 453 } 454 455 /** 456 * Internal function to compute the number of used descriptors in an RX queue 457 * 458 * @param rxq 459 * The Rx queue. 460 * 461 * @return 462 * The number of used rx descriptor. 463 */ 464 static uint32_t 465 rx_queue_count(struct mlx5_rxq_data *rxq) 466 { 467 struct rxq_zip *zip = &rxq->zip; 468 volatile struct mlx5_cqe *cqe; 469 const unsigned int cqe_n = (1 << rxq->cqe_n); 470 const unsigned int cqe_cnt = cqe_n - 1; 471 unsigned int cq_ci; 472 unsigned int used; 473 474 /* if we are processing a compressed cqe */ 475 if (zip->ai) { 476 used = zip->cqe_cnt - zip->ca; 477 cq_ci = zip->cq_ci; 478 } else { 479 used = 0; 480 cq_ci = rxq->cq_ci; 481 } 482 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 483 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 484 int8_t op_own; 485 unsigned int n; 486 487 op_own = cqe->op_own; 488 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 489 n = rte_be_to_cpu_32(cqe->byte_cnt); 490 else 491 n = 1; 492 cq_ci += n; 493 used += n; 494 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 495 } 496 used = RTE_MIN(used, (1U << rxq->elts_n) - 1); 497 return used; 498 } 499 500 /** 501 * DPDK callback to check the status of a rx descriptor. 502 * 503 * @param rx_queue 504 * The Rx queue. 505 * @param[in] offset 506 * The index of the descriptor in the ring. 507 * 508 * @return 509 * The status of the tx descriptor. 510 */ 511 int 512 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 513 { 514 struct mlx5_rxq_data *rxq = rx_queue; 515 struct mlx5_rxq_ctrl *rxq_ctrl = 516 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 517 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 518 519 if (dev->rx_pkt_burst != mlx5_rx_burst) { 520 rte_errno = ENOTSUP; 521 return -rte_errno; 522 } 523 if (offset >= (1 << rxq->elts_n)) { 524 rte_errno = EINVAL; 525 return -rte_errno; 526 } 527 if (offset < rx_queue_count(rxq)) 528 return RTE_ETH_RX_DESC_DONE; 529 return RTE_ETH_RX_DESC_AVAIL; 530 } 531 532 /** 533 * DPDK callback to get the number of used descriptors in a RX queue 534 * 535 * @param dev 536 * Pointer to the device structure. 537 * 538 * @param rx_queue_id 539 * The Rx queue. 540 * 541 * @return 542 * The number of used rx descriptor. 543 * -EINVAL if the queue is invalid 544 */ 545 uint32_t 546 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 547 { 548 struct mlx5_priv *priv = dev->data->dev_private; 549 struct mlx5_rxq_data *rxq; 550 551 if (dev->rx_pkt_burst != mlx5_rx_burst) { 552 rte_errno = ENOTSUP; 553 return -rte_errno; 554 } 555 rxq = (*priv->rxqs)[rx_queue_id]; 556 if (!rxq) { 557 rte_errno = EINVAL; 558 return -rte_errno; 559 } 560 return rx_queue_count(rxq); 561 } 562 563 #define MLX5_SYSTEM_LOG_DIR "/var/log" 564 /** 565 * Dump debug information to log file. 566 * 567 * @param fname 568 * The file name. 569 * @param hex_title 570 * If not NULL this string is printed as a header to the output 571 * and the output will be in hexadecimal view. 572 * @param buf 573 * This is the buffer address to print out. 574 * @param len 575 * The number of bytes to dump out. 576 */ 577 void 578 mlx5_dump_debug_information(const char *fname, const char *hex_title, 579 const void *buf, unsigned int hex_len) 580 { 581 FILE *fd; 582 583 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 584 fd = fopen(path, "a+"); 585 if (!fd) { 586 DRV_LOG(WARNING, "cannot open %s for debug dump\n", 587 path); 588 MKSTR(path2, "./%s", fname); 589 fd = fopen(path2, "a+"); 590 if (!fd) { 591 DRV_LOG(ERR, "cannot open %s for debug dump\n", 592 path2); 593 return; 594 } 595 DRV_LOG(INFO, "New debug dump in file %s\n", path2); 596 } else { 597 DRV_LOG(INFO, "New debug dump in file %s\n", path); 598 } 599 if (hex_title) 600 rte_hexdump(fd, hex_title, buf, hex_len); 601 else 602 fprintf(fd, "%s", (const char *)buf); 603 fprintf(fd, "\n\n\n"); 604 fclose(fd); 605 } 606 607 /** 608 * Move QP from error state to running state and initialize indexes. 609 * 610 * @param txq_ctrl 611 * Pointer to TX queue control structure. 612 * 613 * @return 614 * 0 on success, else -1. 615 */ 616 static int 617 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 618 { 619 struct mlx5_mp_arg_queue_state_modify sm = { 620 .is_wq = 0, 621 .queue_id = txq_ctrl->txq.idx, 622 }; 623 624 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 625 return -1; 626 txq_ctrl->txq.wqe_ci = 0; 627 txq_ctrl->txq.wqe_pi = 0; 628 txq_ctrl->txq.elts_comp = 0; 629 return 0; 630 } 631 632 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 633 static int 634 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 635 { 636 static const uint8_t magic[] = "seen"; 637 int ret = 1; 638 unsigned int i; 639 640 for (i = 0; i < sizeof(magic); ++i) 641 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 642 ret = 0; 643 err_cqe->rsvd1[i] = magic[i]; 644 } 645 return ret; 646 } 647 648 /** 649 * Handle error CQE. 650 * 651 * @param txq 652 * Pointer to TX queue structure. 653 * @param error_cqe 654 * Pointer to the error CQE. 655 * 656 * @return 657 * The last Tx buffer element to free. 658 */ 659 uint16_t 660 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *restrict txq, 661 volatile struct mlx5_err_cqe *err_cqe) 662 { 663 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 664 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 665 struct mlx5_txq_ctrl *txq_ctrl = 666 container_of(txq, struct mlx5_txq_ctrl, txq); 667 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 668 int seen = check_err_cqe_seen(err_cqe); 669 670 if (!seen && txq_ctrl->dump_file_n < 671 txq_ctrl->priv->config.max_dump_files_num) { 672 MKSTR(err_str, "Unexpected CQE error syndrome " 673 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 674 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 675 txq->cqe_s, txq->qp_num_8s >> 8, 676 rte_be_to_cpu_16(err_cqe->wqe_counter), 677 txq->wqe_ci, txq->cq_ci); 678 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 679 PORT_ID(txq_ctrl->priv), txq->idx, 680 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 681 mlx5_dump_debug_information(name, NULL, err_str, 0); 682 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 683 (const void *)((uintptr_t) 684 txq->cqes), 685 sizeof(*err_cqe) * 686 (1 << txq->cqe_n)); 687 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 688 (const void *)((uintptr_t) 689 txq->wqes), 690 MLX5_WQE_SIZE * 691 (1 << txq->wqe_n)); 692 txq_ctrl->dump_file_n++; 693 } 694 if (!seen) 695 /* 696 * Count errors in WQEs units. 697 * Later it can be improved to count error packets, 698 * for example, by SQ parsing to find how much packets 699 * should be counted for each WQE. 700 */ 701 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 702 new_wqe_pi) & wqe_m; 703 if (tx_recover_qp(txq_ctrl) == 0) { 704 txq->cq_ci++; 705 /* Release all the remaining buffers. */ 706 return txq->elts_head; 707 } 708 /* Recovering failed - try again later on the same WQE. */ 709 } else { 710 txq->cq_ci++; 711 } 712 /* Do not release buffers. */ 713 return txq->elts_tail; 714 } 715 716 /** 717 * Translate RX completion flags to packet type. 718 * 719 * @param[in] rxq 720 * Pointer to RX queue structure. 721 * @param[in] cqe 722 * Pointer to CQE. 723 * 724 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 725 * 726 * @return 727 * Packet type for struct rte_mbuf. 728 */ 729 static inline uint32_t 730 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 731 { 732 uint8_t idx; 733 uint8_t pinfo = cqe->pkt_info; 734 uint16_t ptype = cqe->hdr_type_etc; 735 736 /* 737 * The index to the array should have: 738 * bit[1:0] = l3_hdr_type 739 * bit[4:2] = l4_hdr_type 740 * bit[5] = ip_frag 741 * bit[6] = tunneled 742 * bit[7] = outer_l3_type 743 */ 744 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 745 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 746 } 747 748 /** 749 * Initialize Rx WQ and indexes. 750 * 751 * @param[in] rxq 752 * Pointer to RX queue structure. 753 */ 754 void 755 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 756 { 757 const unsigned int wqe_n = 1 << rxq->elts_n; 758 unsigned int i; 759 760 for (i = 0; (i != wqe_n); ++i) { 761 volatile struct mlx5_wqe_data_seg *scat; 762 uintptr_t addr; 763 uint32_t byte_count; 764 765 if (mlx5_rxq_mprq_enabled(rxq)) { 766 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 767 768 scat = &((volatile struct mlx5_wqe_mprq *) 769 rxq->wqes)[i].dseg; 770 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 771 1 << rxq->strd_num_n); 772 byte_count = (1 << rxq->strd_sz_n) * 773 (1 << rxq->strd_num_n); 774 } else { 775 struct rte_mbuf *buf = (*rxq->elts)[i]; 776 777 scat = &((volatile struct mlx5_wqe_data_seg *) 778 rxq->wqes)[i]; 779 addr = rte_pktmbuf_mtod(buf, uintptr_t); 780 byte_count = DATA_LEN(buf); 781 } 782 /* scat->addr must be able to store a pointer. */ 783 assert(sizeof(scat->addr) >= sizeof(uintptr_t)); 784 *scat = (struct mlx5_wqe_data_seg){ 785 .addr = rte_cpu_to_be_64(addr), 786 .byte_count = rte_cpu_to_be_32(byte_count), 787 .lkey = mlx5_rx_addr2mr(rxq, addr), 788 }; 789 } 790 rxq->consumed_strd = 0; 791 rxq->decompressed = 0; 792 rxq->rq_pi = 0; 793 rxq->zip = (struct rxq_zip){ 794 .ai = 0, 795 }; 796 /* Update doorbell counter. */ 797 rxq->rq_ci = wqe_n >> rxq->sges_n; 798 rte_cio_wmb(); 799 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 800 } 801 802 /** 803 * Modify a Verbs/DevX queue state. 804 * This must be called from the primary process. 805 * 806 * @param dev 807 * Pointer to Ethernet device. 808 * @param sm 809 * State modify request parameters. 810 * 811 * @return 812 * 0 in case of success else non-zero value and rte_errno is set. 813 */ 814 int 815 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 816 const struct mlx5_mp_arg_queue_state_modify *sm) 817 { 818 int ret; 819 struct mlx5_priv *priv = dev->data->dev_private; 820 821 if (sm->is_wq) { 822 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 823 struct mlx5_rxq_ctrl *rxq_ctrl = 824 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 825 826 if (rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_IBV) { 827 struct ibv_wq_attr mod = { 828 .attr_mask = IBV_WQ_ATTR_STATE, 829 .wq_state = sm->state, 830 }; 831 832 ret = mlx5_glue->modify_wq(rxq_ctrl->obj->wq, &mod); 833 } else { /* rxq_ctrl->obj->type == MLX5_RXQ_OBJ_TYPE_DEVX_RQ. */ 834 struct mlx5_devx_modify_rq_attr rq_attr; 835 836 memset(&rq_attr, 0, sizeof(rq_attr)); 837 if (sm->state == IBV_WQS_RESET) { 838 rq_attr.rq_state = MLX5_RQC_STATE_ERR; 839 rq_attr.state = MLX5_RQC_STATE_RST; 840 } else if (sm->state == IBV_WQS_RDY) { 841 rq_attr.rq_state = MLX5_RQC_STATE_RST; 842 rq_attr.state = MLX5_RQC_STATE_RDY; 843 } else if (sm->state == IBV_WQS_ERR) { 844 rq_attr.rq_state = MLX5_RQC_STATE_RDY; 845 rq_attr.state = MLX5_RQC_STATE_ERR; 846 } 847 ret = mlx5_devx_cmd_modify_rq(rxq_ctrl->obj->rq, 848 &rq_attr); 849 } 850 if (ret) { 851 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s\n", 852 sm->state, strerror(errno)); 853 rte_errno = errno; 854 return ret; 855 } 856 } else { 857 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 858 struct mlx5_txq_ctrl *txq_ctrl = 859 container_of(txq, struct mlx5_txq_ctrl, txq); 860 struct ibv_qp_attr mod = { 861 .qp_state = IBV_QPS_RESET, 862 .port_num = (uint8_t)priv->ibv_port, 863 }; 864 struct ibv_qp *qp = txq_ctrl->ibv->qp; 865 866 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 867 if (ret) { 868 DRV_LOG(ERR, "Cannot change the Tx QP state to RESET " 869 "%s\n", strerror(errno)); 870 rte_errno = errno; 871 return ret; 872 } 873 mod.qp_state = IBV_QPS_INIT; 874 ret = mlx5_glue->modify_qp(qp, &mod, 875 (IBV_QP_STATE | IBV_QP_PORT)); 876 if (ret) { 877 DRV_LOG(ERR, "Cannot change Tx QP state to INIT %s\n", 878 strerror(errno)); 879 rte_errno = errno; 880 return ret; 881 } 882 mod.qp_state = IBV_QPS_RTR; 883 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 884 if (ret) { 885 DRV_LOG(ERR, "Cannot change Tx QP state to RTR %s\n", 886 strerror(errno)); 887 rte_errno = errno; 888 return ret; 889 } 890 mod.qp_state = IBV_QPS_RTS; 891 ret = mlx5_glue->modify_qp(qp, &mod, IBV_QP_STATE); 892 if (ret) { 893 DRV_LOG(ERR, "Cannot change Tx QP state to RTS %s\n", 894 strerror(errno)); 895 rte_errno = errno; 896 return ret; 897 } 898 } 899 return 0; 900 } 901 902 /** 903 * Modify a Verbs queue state. 904 * 905 * @param dev 906 * Pointer to Ethernet device. 907 * @param sm 908 * State modify request parameters. 909 * 910 * @return 911 * 0 in case of success else non-zero value. 912 */ 913 static int 914 mlx5_queue_state_modify(struct rte_eth_dev *dev, 915 struct mlx5_mp_arg_queue_state_modify *sm) 916 { 917 int ret = 0; 918 919 switch (rte_eal_process_type()) { 920 case RTE_PROC_PRIMARY: 921 ret = mlx5_queue_state_modify_primary(dev, sm); 922 break; 923 case RTE_PROC_SECONDARY: 924 ret = mlx5_mp_req_queue_state_modify(dev, sm); 925 break; 926 default: 927 break; 928 } 929 return ret; 930 } 931 932 /** 933 * Handle a Rx error. 934 * The function inserts the RQ state to reset when the first error CQE is 935 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 936 * it moves the RQ state to ready and initializes the RQ. 937 * Next CQE identification and error counting are in the caller responsibility. 938 * 939 * @param[in] rxq 940 * Pointer to RX queue structure. 941 * @param[in] mbuf_prepare 942 * Whether to prepare mbufs for the RQ. 943 * 944 * @return 945 * -1 in case of recovery error, otherwise the CQE status. 946 */ 947 int 948 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t mbuf_prepare) 949 { 950 const uint16_t cqe_n = 1 << rxq->cqe_n; 951 const uint16_t cqe_mask = cqe_n - 1; 952 const unsigned int wqe_n = 1 << rxq->elts_n; 953 struct mlx5_rxq_ctrl *rxq_ctrl = 954 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 955 union { 956 volatile struct mlx5_cqe *cqe; 957 volatile struct mlx5_err_cqe *err_cqe; 958 } u = { 959 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 960 }; 961 struct mlx5_mp_arg_queue_state_modify sm; 962 int ret; 963 964 switch (rxq->err_state) { 965 case MLX5_RXQ_ERR_STATE_NO_ERROR: 966 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 967 /* Fall-through */ 968 case MLX5_RXQ_ERR_STATE_NEED_RESET: 969 sm.is_wq = 1; 970 sm.queue_id = rxq->idx; 971 sm.state = IBV_WQS_RESET; 972 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 973 return -1; 974 if (rxq_ctrl->dump_file_n < 975 rxq_ctrl->priv->config.max_dump_files_num) { 976 MKSTR(err_str, "Unexpected CQE error syndrome " 977 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 978 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 979 rxq->cqn, rxq_ctrl->wqn, 980 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 981 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 982 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 983 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 984 mlx5_dump_debug_information(name, NULL, err_str, 0); 985 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 986 (const void *)((uintptr_t) 987 rxq->cqes), 988 sizeof(*u.cqe) * cqe_n); 989 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 990 (const void *)((uintptr_t) 991 rxq->wqes), 992 16 * wqe_n); 993 rxq_ctrl->dump_file_n++; 994 } 995 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 996 /* Fall-through */ 997 case MLX5_RXQ_ERR_STATE_NEED_READY: 998 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 999 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1000 rte_cio_wmb(); 1001 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1002 rte_cio_wmb(); 1003 /* 1004 * The RQ consumer index must be zeroed while moving 1005 * from RESET state to RDY state. 1006 */ 1007 *rxq->rq_db = rte_cpu_to_be_32(0); 1008 rte_cio_wmb(); 1009 sm.is_wq = 1; 1010 sm.queue_id = rxq->idx; 1011 sm.state = IBV_WQS_RDY; 1012 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1013 &sm)) 1014 return -1; 1015 if (mbuf_prepare) { 1016 const uint16_t q_mask = wqe_n - 1; 1017 uint16_t elt_idx; 1018 struct rte_mbuf **elt; 1019 int i; 1020 unsigned int n = wqe_n - (rxq->rq_ci - 1021 rxq->rq_pi); 1022 1023 for (i = 0; i < (int)n; ++i) { 1024 elt_idx = (rxq->rq_ci + i) & q_mask; 1025 elt = &(*rxq->elts)[elt_idx]; 1026 *elt = rte_mbuf_raw_alloc(rxq->mp); 1027 if (!*elt) { 1028 for (i--; i >= 0; --i) { 1029 elt_idx = (rxq->rq_ci + 1030 i) & q_mask; 1031 elt = &(*rxq->elts) 1032 [elt_idx]; 1033 rte_pktmbuf_free_seg 1034 (*elt); 1035 } 1036 return -1; 1037 } 1038 } 1039 } 1040 mlx5_rxq_initialize(rxq); 1041 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1042 } 1043 return ret; 1044 default: 1045 return -1; 1046 } 1047 } 1048 1049 /** 1050 * Get size of the next packet for a given CQE. For compressed CQEs, the 1051 * consumer index is updated only once all packets of the current one have 1052 * been processed. 1053 * 1054 * @param rxq 1055 * Pointer to RX queue. 1056 * @param cqe 1057 * CQE to process. 1058 * @param[out] mcqe 1059 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1060 * written. 1061 * 1062 * @return 1063 * 0 in case of empty CQE, otherwise the packet size in bytes. 1064 */ 1065 static inline int 1066 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1067 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1068 { 1069 struct rxq_zip *zip = &rxq->zip; 1070 uint16_t cqe_n = cqe_cnt + 1; 1071 int len; 1072 uint16_t idx, end; 1073 1074 do { 1075 len = 0; 1076 /* Process compressed data in the CQE and mini arrays. */ 1077 if (zip->ai) { 1078 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1079 (volatile struct mlx5_mini_cqe8 (*)[8]) 1080 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1081 cqe_cnt].pkt_info); 1082 1083 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1084 *mcqe = &(*mc)[zip->ai & 7]; 1085 if ((++zip->ai & 7) == 0) { 1086 /* Invalidate consumed CQEs */ 1087 idx = zip->ca; 1088 end = zip->na; 1089 while (idx != end) { 1090 (*rxq->cqes)[idx & cqe_cnt].op_own = 1091 MLX5_CQE_INVALIDATE; 1092 ++idx; 1093 } 1094 /* 1095 * Increment consumer index to skip the number 1096 * of CQEs consumed. Hardware leaves holes in 1097 * the CQ ring for software use. 1098 */ 1099 zip->ca = zip->na; 1100 zip->na += 8; 1101 } 1102 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1103 /* Invalidate the rest */ 1104 idx = zip->ca; 1105 end = zip->cq_ci; 1106 1107 while (idx != end) { 1108 (*rxq->cqes)[idx & cqe_cnt].op_own = 1109 MLX5_CQE_INVALIDATE; 1110 ++idx; 1111 } 1112 rxq->cq_ci = zip->cq_ci; 1113 zip->ai = 0; 1114 } 1115 /* 1116 * No compressed data, get next CQE and verify if it is 1117 * compressed. 1118 */ 1119 } else { 1120 int ret; 1121 int8_t op_own; 1122 1123 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1124 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1125 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1126 rxq->err_state)) { 1127 ret = mlx5_rx_err_handle(rxq, 0); 1128 if (ret == MLX5_CQE_STATUS_HW_OWN || 1129 ret == -1) 1130 return 0; 1131 } else { 1132 return 0; 1133 } 1134 } 1135 ++rxq->cq_ci; 1136 op_own = cqe->op_own; 1137 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1138 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1139 (volatile struct mlx5_mini_cqe8 (*)[8]) 1140 (uintptr_t)(&(*rxq->cqes) 1141 [rxq->cq_ci & 1142 cqe_cnt].pkt_info); 1143 1144 /* Fix endianness. */ 1145 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1146 /* 1147 * Current mini array position is the one 1148 * returned by check_cqe64(). 1149 * 1150 * If completion comprises several mini arrays, 1151 * as a special case the second one is located 1152 * 7 CQEs after the initial CQE instead of 8 1153 * for subsequent ones. 1154 */ 1155 zip->ca = rxq->cq_ci; 1156 zip->na = zip->ca + 7; 1157 /* Compute the next non compressed CQE. */ 1158 --rxq->cq_ci; 1159 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1160 /* Get packet size to return. */ 1161 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1162 *mcqe = &(*mc)[0]; 1163 zip->ai = 1; 1164 /* Prefetch all to be invalidated */ 1165 idx = zip->ca; 1166 end = zip->cq_ci; 1167 while (idx != end) { 1168 rte_prefetch0(&(*rxq->cqes)[(idx) & 1169 cqe_cnt]); 1170 ++idx; 1171 } 1172 } else { 1173 len = rte_be_to_cpu_32(cqe->byte_cnt); 1174 } 1175 } 1176 if (unlikely(rxq->err_state)) { 1177 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1178 ++rxq->stats.idropped; 1179 } else { 1180 return len; 1181 } 1182 } while (1); 1183 } 1184 1185 /** 1186 * Translate RX completion flags to offload flags. 1187 * 1188 * @param[in] cqe 1189 * Pointer to CQE. 1190 * 1191 * @return 1192 * Offload flags (ol_flags) for struct rte_mbuf. 1193 */ 1194 static inline uint32_t 1195 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1196 { 1197 uint32_t ol_flags = 0; 1198 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1199 1200 ol_flags = 1201 TRANSPOSE(flags, 1202 MLX5_CQE_RX_L3_HDR_VALID, 1203 PKT_RX_IP_CKSUM_GOOD) | 1204 TRANSPOSE(flags, 1205 MLX5_CQE_RX_L4_HDR_VALID, 1206 PKT_RX_L4_CKSUM_GOOD); 1207 return ol_flags; 1208 } 1209 1210 /** 1211 * Fill in mbuf fields from RX completion flags. 1212 * Note that pkt->ol_flags should be initialized outside of this function. 1213 * 1214 * @param rxq 1215 * Pointer to RX queue. 1216 * @param pkt 1217 * mbuf to fill. 1218 * @param cqe 1219 * CQE to process. 1220 * @param rss_hash_res 1221 * Packet RSS Hash result. 1222 */ 1223 static inline void 1224 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1225 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res) 1226 { 1227 /* Update packet information. */ 1228 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); 1229 if (rss_hash_res && rxq->rss_hash) { 1230 pkt->hash.rss = rss_hash_res; 1231 pkt->ol_flags |= PKT_RX_RSS_HASH; 1232 } 1233 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1234 pkt->ol_flags |= PKT_RX_FDIR; 1235 if (cqe->sop_drop_qpn != 1236 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1237 uint32_t mark = cqe->sop_drop_qpn; 1238 1239 pkt->ol_flags |= PKT_RX_FDIR_ID; 1240 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1241 } 1242 } 1243 if (rxq->csum) 1244 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1245 if (rxq->vlan_strip && 1246 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1247 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1248 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1249 } 1250 if (rxq->hw_timestamp) { 1251 pkt->timestamp = rte_be_to_cpu_64(cqe->timestamp); 1252 pkt->ol_flags |= PKT_RX_TIMESTAMP; 1253 } 1254 } 1255 1256 /** 1257 * DPDK callback for RX. 1258 * 1259 * @param dpdk_rxq 1260 * Generic pointer to RX queue structure. 1261 * @param[out] pkts 1262 * Array to store received packets. 1263 * @param pkts_n 1264 * Maximum number of packets in array. 1265 * 1266 * @return 1267 * Number of packets successfully received (<= pkts_n). 1268 */ 1269 uint16_t 1270 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1271 { 1272 struct mlx5_rxq_data *rxq = dpdk_rxq; 1273 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1274 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1275 const unsigned int sges_n = rxq->sges_n; 1276 struct rte_mbuf *pkt = NULL; 1277 struct rte_mbuf *seg = NULL; 1278 volatile struct mlx5_cqe *cqe = 1279 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1280 unsigned int i = 0; 1281 unsigned int rq_ci = rxq->rq_ci << sges_n; 1282 int len = 0; /* keep its value across iterations. */ 1283 1284 while (pkts_n) { 1285 unsigned int idx = rq_ci & wqe_cnt; 1286 volatile struct mlx5_wqe_data_seg *wqe = 1287 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1288 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1289 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1290 uint32_t rss_hash_res; 1291 1292 if (pkt) 1293 NEXT(seg) = rep; 1294 seg = rep; 1295 rte_prefetch0(seg); 1296 rte_prefetch0(cqe); 1297 rte_prefetch0(wqe); 1298 rep = rte_mbuf_raw_alloc(rxq->mp); 1299 if (unlikely(rep == NULL)) { 1300 ++rxq->stats.rx_nombuf; 1301 if (!pkt) { 1302 /* 1303 * no buffers before we even started, 1304 * bail out silently. 1305 */ 1306 break; 1307 } 1308 while (pkt != seg) { 1309 assert(pkt != (*rxq->elts)[idx]); 1310 rep = NEXT(pkt); 1311 NEXT(pkt) = NULL; 1312 NB_SEGS(pkt) = 1; 1313 rte_mbuf_raw_free(pkt); 1314 pkt = rep; 1315 } 1316 break; 1317 } 1318 if (!pkt) { 1319 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1320 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1321 if (!len) { 1322 rte_mbuf_raw_free(rep); 1323 break; 1324 } 1325 pkt = seg; 1326 assert(len >= (rxq->crc_present << 2)); 1327 pkt->ol_flags = 0; 1328 /* If compressed, take hash result from mini-CQE. */ 1329 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? 1330 cqe->rx_hash_res : 1331 mcqe->rx_hash_result); 1332 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1333 if (rxq->crc_present) 1334 len -= RTE_ETHER_CRC_LEN; 1335 PKT_LEN(pkt) = len; 1336 if (cqe->lro_num_seg > 1) { 1337 mlx5_lro_update_hdr 1338 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1339 len); 1340 pkt->ol_flags |= PKT_RX_LRO; 1341 pkt->tso_segsz = len / cqe->lro_num_seg; 1342 } 1343 } 1344 DATA_LEN(rep) = DATA_LEN(seg); 1345 PKT_LEN(rep) = PKT_LEN(seg); 1346 SET_DATA_OFF(rep, DATA_OFF(seg)); 1347 PORT(rep) = PORT(seg); 1348 (*rxq->elts)[idx] = rep; 1349 /* 1350 * Fill NIC descriptor with the new buffer. The lkey and size 1351 * of the buffers are already known, only the buffer address 1352 * changes. 1353 */ 1354 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1355 /* If there's only one MR, no need to replace LKey in WQE. */ 1356 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1357 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1358 if (len > DATA_LEN(seg)) { 1359 len -= DATA_LEN(seg); 1360 ++NB_SEGS(pkt); 1361 ++rq_ci; 1362 continue; 1363 } 1364 DATA_LEN(seg) = len; 1365 #ifdef MLX5_PMD_SOFT_COUNTERS 1366 /* Increment bytes counter. */ 1367 rxq->stats.ibytes += PKT_LEN(pkt); 1368 #endif 1369 /* Return packet. */ 1370 *(pkts++) = pkt; 1371 pkt = NULL; 1372 --pkts_n; 1373 ++i; 1374 /* Align consumer index to the next stride. */ 1375 rq_ci >>= sges_n; 1376 ++rq_ci; 1377 rq_ci <<= sges_n; 1378 } 1379 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1380 return 0; 1381 /* Update the consumer index. */ 1382 rxq->rq_ci = rq_ci >> sges_n; 1383 rte_cio_wmb(); 1384 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1385 rte_cio_wmb(); 1386 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1387 #ifdef MLX5_PMD_SOFT_COUNTERS 1388 /* Increment packets counter. */ 1389 rxq->stats.ipackets += i; 1390 #endif 1391 return i; 1392 } 1393 1394 /** 1395 * Update LRO packet TCP header. 1396 * The HW LRO feature doesn't update the TCP header after coalescing the 1397 * TCP segments but supplies information in CQE to fill it by SW. 1398 * 1399 * @param tcp 1400 * Pointer to the TCP header. 1401 * @param cqe 1402 * Pointer to the completion entry.. 1403 * @param phcsum 1404 * The L3 pseudo-header checksum. 1405 */ 1406 static inline void 1407 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *restrict tcp, 1408 volatile struct mlx5_cqe *restrict cqe, 1409 uint32_t phcsum) 1410 { 1411 uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1412 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1413 /* 1414 * The HW calculates only the TCP payload checksum, need to complete 1415 * the TCP header checksum and the L3 pseudo-header checksum. 1416 */ 1417 uint32_t csum = phcsum + cqe->csum; 1418 1419 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1420 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1421 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1422 tcp->recv_ack = cqe->lro_ack_seq_num; 1423 tcp->rx_win = cqe->lro_tcp_win; 1424 } 1425 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1426 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1427 tcp->cksum = 0; 1428 csum += rte_raw_cksum(tcp, (tcp->data_off & 0xF) * 4); 1429 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1430 csum = (~csum) & 0xffff; 1431 if (csum == 0) 1432 csum = 0xffff; 1433 tcp->cksum = csum; 1434 } 1435 1436 /** 1437 * Update LRO packet headers. 1438 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1439 * TCP segments but supply information in CQE to fill it by SW. 1440 * 1441 * @param padd 1442 * The packet address. 1443 * @param cqe 1444 * Pointer to the completion entry.. 1445 * @param len 1446 * The packet length. 1447 */ 1448 static inline void 1449 mlx5_lro_update_hdr(uint8_t *restrict padd, 1450 volatile struct mlx5_cqe *restrict cqe, 1451 uint32_t len) 1452 { 1453 union { 1454 struct rte_ether_hdr *eth; 1455 struct rte_vlan_hdr *vlan; 1456 struct rte_ipv4_hdr *ipv4; 1457 struct rte_ipv6_hdr *ipv6; 1458 struct rte_tcp_hdr *tcp; 1459 uint8_t *hdr; 1460 } h = { 1461 .hdr = padd, 1462 }; 1463 uint16_t proto = h.eth->ether_type; 1464 uint32_t phcsum; 1465 1466 h.eth++; 1467 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1468 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1469 proto = h.vlan->eth_proto; 1470 h.vlan++; 1471 } 1472 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1473 h.ipv4->time_to_live = cqe->lro_min_ttl; 1474 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1475 h.ipv4->hdr_checksum = 0; 1476 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1477 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1478 h.ipv4++; 1479 } else { 1480 h.ipv6->hop_limits = cqe->lro_min_ttl; 1481 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1482 sizeof(*h.ipv6)); 1483 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1484 h.ipv6++; 1485 } 1486 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum); 1487 } 1488 1489 void 1490 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1491 { 1492 struct mlx5_mprq_buf *buf = opaque; 1493 1494 if (rte_atomic16_read(&buf->refcnt) == 1) { 1495 rte_mempool_put(buf->mp, buf); 1496 } else if (rte_atomic16_add_return(&buf->refcnt, -1) == 0) { 1497 rte_atomic16_set(&buf->refcnt, 1); 1498 rte_mempool_put(buf->mp, buf); 1499 } 1500 } 1501 1502 void 1503 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1504 { 1505 mlx5_mprq_buf_free_cb(NULL, buf); 1506 } 1507 1508 static inline void 1509 mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx, 1510 const unsigned int strd_n) 1511 { 1512 struct mlx5_mprq_buf *rep = rxq->mprq_repl; 1513 volatile struct mlx5_wqe_data_seg *wqe = 1514 &((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg; 1515 void *addr; 1516 1517 assert(rep != NULL); 1518 /* Replace MPRQ buf. */ 1519 (*rxq->mprq_bufs)[rq_idx] = rep; 1520 /* Replace WQE. */ 1521 addr = mlx5_mprq_buf_addr(rep, strd_n); 1522 wqe->addr = rte_cpu_to_be_64((uintptr_t)addr); 1523 /* If there's only one MR, no need to replace LKey in WQE. */ 1524 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1525 wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr); 1526 /* Stash a mbuf for next replacement. */ 1527 if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep))) 1528 rxq->mprq_repl = rep; 1529 else 1530 rxq->mprq_repl = NULL; 1531 } 1532 1533 /** 1534 * DPDK callback for RX with Multi-Packet RQ support. 1535 * 1536 * @param dpdk_rxq 1537 * Generic pointer to RX queue structure. 1538 * @param[out] pkts 1539 * Array to store received packets. 1540 * @param pkts_n 1541 * Maximum number of packets in array. 1542 * 1543 * @return 1544 * Number of packets successfully received (<= pkts_n). 1545 */ 1546 uint16_t 1547 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1548 { 1549 struct mlx5_rxq_data *rxq = dpdk_rxq; 1550 const unsigned int strd_n = 1 << rxq->strd_num_n; 1551 const unsigned int strd_sz = 1 << rxq->strd_sz_n; 1552 const unsigned int strd_shift = 1553 MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en; 1554 const unsigned int cq_mask = (1 << rxq->cqe_n) - 1; 1555 const unsigned int wq_mask = (1 << rxq->elts_n) - 1; 1556 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1557 unsigned int i = 0; 1558 uint32_t rq_ci = rxq->rq_ci; 1559 uint16_t consumed_strd = rxq->consumed_strd; 1560 uint16_t headroom_sz = rxq->strd_headroom_en * RTE_PKTMBUF_HEADROOM; 1561 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1562 1563 while (i < pkts_n) { 1564 struct rte_mbuf *pkt; 1565 void *addr; 1566 int ret; 1567 unsigned int len; 1568 uint16_t strd_cnt; 1569 uint16_t strd_idx; 1570 uint32_t offset; 1571 uint32_t byte_cnt; 1572 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1573 uint32_t rss_hash_res = 0; 1574 uint8_t lro_num_seg; 1575 1576 if (consumed_strd == strd_n) { 1577 /* Replace WQE only if the buffer is still in use. */ 1578 if (rte_atomic16_read(&buf->refcnt) > 1) { 1579 mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n); 1580 /* Release the old buffer. */ 1581 mlx5_mprq_buf_free(buf); 1582 } else if (unlikely(rxq->mprq_repl == NULL)) { 1583 struct mlx5_mprq_buf *rep; 1584 1585 /* 1586 * Currently, the MPRQ mempool is out of buffer 1587 * and doing memcpy regardless of the size of Rx 1588 * packet. Retry allocation to get back to 1589 * normal. 1590 */ 1591 if (!rte_mempool_get(rxq->mprq_mp, 1592 (void **)&rep)) 1593 rxq->mprq_repl = rep; 1594 } 1595 /* Advance to the next WQE. */ 1596 consumed_strd = 0; 1597 ++rq_ci; 1598 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1599 } 1600 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1601 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1602 if (!ret) 1603 break; 1604 byte_cnt = ret; 1605 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1606 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1607 assert(strd_cnt); 1608 consumed_strd += strd_cnt; 1609 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1610 continue; 1611 if (mcqe == NULL) { 1612 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1613 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); 1614 } else { 1615 /* mini-CQE for MPRQ doesn't have hash result. */ 1616 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); 1617 } 1618 assert(strd_idx < strd_n); 1619 assert(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & wq_mask)); 1620 lro_num_seg = cqe->lro_num_seg; 1621 /* 1622 * Currently configured to receive a packet per a stride. But if 1623 * MTU is adjusted through kernel interface, device could 1624 * consume multiple strides without raising an error. In this 1625 * case, the packet should be dropped because it is bigger than 1626 * the max_rx_pkt_len. 1627 */ 1628 if (unlikely(!lro_num_seg && strd_cnt > 1)) { 1629 ++rxq->stats.idropped; 1630 continue; 1631 } 1632 pkt = rte_pktmbuf_alloc(rxq->mp); 1633 if (unlikely(pkt == NULL)) { 1634 ++rxq->stats.rx_nombuf; 1635 break; 1636 } 1637 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1638 assert((int)len >= (rxq->crc_present << 2)); 1639 if (rxq->crc_present) 1640 len -= RTE_ETHER_CRC_LEN; 1641 offset = strd_idx * strd_sz + strd_shift; 1642 addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset); 1643 /* 1644 * Memcpy packets to the target mbuf if: 1645 * - The size of packet is smaller than mprq_max_memcpy_len. 1646 * - Out of buffer in the Mempool for Multi-Packet RQ. 1647 */ 1648 if (len <= rxq->mprq_max_memcpy_len || rxq->mprq_repl == NULL) { 1649 /* 1650 * When memcpy'ing packet due to out-of-buffer, the 1651 * packet must be smaller than the target mbuf. 1652 */ 1653 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { 1654 rte_pktmbuf_free_seg(pkt); 1655 ++rxq->stats.idropped; 1656 continue; 1657 } 1658 rte_memcpy(rte_pktmbuf_mtod(pkt, void *), addr, len); 1659 DATA_LEN(pkt) = len; 1660 } else { 1661 rte_iova_t buf_iova; 1662 struct rte_mbuf_ext_shared_info *shinfo; 1663 uint16_t buf_len = strd_cnt * strd_sz; 1664 void *buf_addr; 1665 1666 /* Increment the refcnt of the whole chunk. */ 1667 rte_atomic16_add_return(&buf->refcnt, 1); 1668 assert((uint16_t)rte_atomic16_read(&buf->refcnt) <= 1669 strd_n + 1); 1670 buf_addr = RTE_PTR_SUB(addr, headroom_sz); 1671 /* 1672 * MLX5 device doesn't use iova but it is necessary in a 1673 * case where the Rx packet is transmitted via a 1674 * different PMD. 1675 */ 1676 buf_iova = rte_mempool_virt2iova(buf) + 1677 RTE_PTR_DIFF(buf_addr, buf); 1678 shinfo = &buf->shinfos[strd_idx]; 1679 rte_mbuf_ext_refcnt_set(shinfo, 1); 1680 /* 1681 * EXT_ATTACHED_MBUF will be set to pkt->ol_flags when 1682 * attaching the stride to mbuf and more offload flags 1683 * will be added below by calling rxq_cq_to_mbuf(). 1684 * Other fields will be overwritten. 1685 */ 1686 rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova, 1687 buf_len, shinfo); 1688 /* Set mbuf head-room. */ 1689 pkt->data_off = headroom_sz; 1690 assert(pkt->ol_flags == EXT_ATTACHED_MBUF); 1691 /* 1692 * Prevent potential overflow due to MTU change through 1693 * kernel interface. 1694 */ 1695 if (unlikely(rte_pktmbuf_tailroom(pkt) < len)) { 1696 rte_pktmbuf_free_seg(pkt); 1697 ++rxq->stats.idropped; 1698 continue; 1699 } 1700 DATA_LEN(pkt) = len; 1701 /* 1702 * LRO packet may consume all the stride memory, in this 1703 * case packet head-room space is not guaranteed so must 1704 * to add an empty mbuf for the head-room. 1705 */ 1706 if (!rxq->strd_headroom_en) { 1707 struct rte_mbuf *headroom_mbuf = 1708 rte_pktmbuf_alloc(rxq->mp); 1709 1710 if (unlikely(headroom_mbuf == NULL)) { 1711 rte_pktmbuf_free_seg(pkt); 1712 ++rxq->stats.rx_nombuf; 1713 break; 1714 } 1715 PORT(pkt) = rxq->port_id; 1716 NEXT(headroom_mbuf) = pkt; 1717 pkt = headroom_mbuf; 1718 NB_SEGS(pkt) = 2; 1719 } 1720 } 1721 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1722 if (lro_num_seg > 1) { 1723 mlx5_lro_update_hdr(addr, cqe, len); 1724 pkt->ol_flags |= PKT_RX_LRO; 1725 pkt->tso_segsz = strd_sz; 1726 } 1727 PKT_LEN(pkt) = len; 1728 PORT(pkt) = rxq->port_id; 1729 #ifdef MLX5_PMD_SOFT_COUNTERS 1730 /* Increment bytes counter. */ 1731 rxq->stats.ibytes += PKT_LEN(pkt); 1732 #endif 1733 /* Return packet. */ 1734 *(pkts++) = pkt; 1735 ++i; 1736 } 1737 /* Update the consumer indexes. */ 1738 rxq->consumed_strd = consumed_strd; 1739 rte_cio_wmb(); 1740 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1741 if (rq_ci != rxq->rq_ci) { 1742 rxq->rq_ci = rq_ci; 1743 rte_cio_wmb(); 1744 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1745 } 1746 #ifdef MLX5_PMD_SOFT_COUNTERS 1747 /* Increment packets counter. */ 1748 rxq->stats.ipackets += i; 1749 #endif 1750 return i; 1751 } 1752 1753 /** 1754 * Dummy DPDK callback for TX. 1755 * 1756 * This function is used to temporarily replace the real callback during 1757 * unsafe control operations on the queue, or in case of error. 1758 * 1759 * @param dpdk_txq 1760 * Generic pointer to TX queue structure. 1761 * @param[in] pkts 1762 * Packets to transmit. 1763 * @param pkts_n 1764 * Number of packets in array. 1765 * 1766 * @return 1767 * Number of packets successfully transmitted (<= pkts_n). 1768 */ 1769 uint16_t 1770 removed_tx_burst(void *dpdk_txq __rte_unused, 1771 struct rte_mbuf **pkts __rte_unused, 1772 uint16_t pkts_n __rte_unused) 1773 { 1774 rte_mb(); 1775 return 0; 1776 } 1777 1778 /** 1779 * Dummy DPDK callback for RX. 1780 * 1781 * This function is used to temporarily replace the real callback during 1782 * unsafe control operations on the queue, or in case of error. 1783 * 1784 * @param dpdk_rxq 1785 * Generic pointer to RX queue structure. 1786 * @param[out] pkts 1787 * Array to store received packets. 1788 * @param pkts_n 1789 * Maximum number of packets in array. 1790 * 1791 * @return 1792 * Number of packets successfully received (<= pkts_n). 1793 */ 1794 uint16_t 1795 removed_rx_burst(void *dpdk_txq __rte_unused, 1796 struct rte_mbuf **pkts __rte_unused, 1797 uint16_t pkts_n __rte_unused) 1798 { 1799 rte_mb(); 1800 return 0; 1801 } 1802 1803 /* 1804 * Vectorized Rx/Tx routines are not compiled in when required vector 1805 * instructions are not supported on a target architecture. The following null 1806 * stubs are needed for linkage when those are not included outside of this file 1807 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1808 */ 1809 1810 __rte_weak uint16_t 1811 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1812 struct rte_mbuf **pkts __rte_unused, 1813 uint16_t pkts_n __rte_unused) 1814 { 1815 return 0; 1816 } 1817 1818 __rte_weak int 1819 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1820 { 1821 return -ENOTSUP; 1822 } 1823 1824 __rte_weak int 1825 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1826 { 1827 return -ENOTSUP; 1828 } 1829 1830 /** 1831 * Free the mbufs from the linear array of pointers. 1832 * 1833 * @param pkts 1834 * Pointer to array of packets to be free. 1835 * @param pkts_n 1836 * Number of packets to be freed. 1837 * @param olx 1838 * Configured Tx offloads mask. It is fully defined at 1839 * compile time and may be used for optimization. 1840 */ 1841 static __rte_always_inline void 1842 mlx5_tx_free_mbuf(struct rte_mbuf **restrict pkts, 1843 unsigned int pkts_n, 1844 unsigned int olx __rte_unused) 1845 { 1846 struct rte_mempool *pool = NULL; 1847 struct rte_mbuf **p_free = NULL; 1848 struct rte_mbuf *mbuf; 1849 unsigned int n_free = 0; 1850 1851 /* 1852 * The implemented algorithm eliminates 1853 * copying pointers to temporary array 1854 * for rte_mempool_put_bulk() calls. 1855 */ 1856 assert(pkts); 1857 assert(pkts_n); 1858 for (;;) { 1859 for (;;) { 1860 /* 1861 * Decrement mbuf reference counter, detach 1862 * indirect and external buffers if needed. 1863 */ 1864 mbuf = rte_pktmbuf_prefree_seg(*pkts); 1865 if (likely(mbuf != NULL)) { 1866 assert(mbuf == *pkts); 1867 if (likely(n_free != 0)) { 1868 if (unlikely(pool != mbuf->pool)) 1869 /* From different pool. */ 1870 break; 1871 } else { 1872 /* Start new scan array. */ 1873 pool = mbuf->pool; 1874 p_free = pkts; 1875 } 1876 ++n_free; 1877 ++pkts; 1878 --pkts_n; 1879 if (unlikely(pkts_n == 0)) { 1880 mbuf = NULL; 1881 break; 1882 } 1883 } else { 1884 /* 1885 * This happens if mbuf is still referenced. 1886 * We can't put it back to the pool, skip. 1887 */ 1888 ++pkts; 1889 --pkts_n; 1890 if (unlikely(n_free != 0)) 1891 /* There is some array to free.*/ 1892 break; 1893 if (unlikely(pkts_n == 0)) 1894 /* Last mbuf, nothing to free. */ 1895 return; 1896 } 1897 } 1898 for (;;) { 1899 /* 1900 * This loop is implemented to avoid multiple 1901 * inlining of rte_mempool_put_bulk(). 1902 */ 1903 assert(pool); 1904 assert(p_free); 1905 assert(n_free); 1906 /* 1907 * Free the array of pre-freed mbufs 1908 * belonging to the same memory pool. 1909 */ 1910 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 1911 if (unlikely(mbuf != NULL)) { 1912 /* There is the request to start new scan. */ 1913 pool = mbuf->pool; 1914 p_free = pkts++; 1915 n_free = 1; 1916 --pkts_n; 1917 if (likely(pkts_n != 0)) 1918 break; 1919 /* 1920 * This is the last mbuf to be freed. 1921 * Do one more loop iteration to complete. 1922 * This is rare case of the last unique mbuf. 1923 */ 1924 mbuf = NULL; 1925 continue; 1926 } 1927 if (likely(pkts_n == 0)) 1928 return; 1929 n_free = 0; 1930 break; 1931 } 1932 } 1933 } 1934 1935 /** 1936 * Free the mbuf from the elts ring buffer till new tail. 1937 * 1938 * @param txq 1939 * Pointer to Tx queue structure. 1940 * @param tail 1941 * Index in elts to free up to, becomes new elts tail. 1942 * @param olx 1943 * Configured Tx offloads mask. It is fully defined at 1944 * compile time and may be used for optimization. 1945 */ 1946 static __rte_always_inline void 1947 mlx5_tx_free_elts(struct mlx5_txq_data *restrict txq, 1948 uint16_t tail, 1949 unsigned int olx __rte_unused) 1950 { 1951 uint16_t n_elts = tail - txq->elts_tail; 1952 1953 assert(n_elts); 1954 assert(n_elts <= txq->elts_s); 1955 /* 1956 * Implement a loop to support ring buffer wraparound 1957 * with single inlining of mlx5_tx_free_mbuf(). 1958 */ 1959 do { 1960 unsigned int part; 1961 1962 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 1963 part = RTE_MIN(part, n_elts); 1964 assert(part); 1965 assert(part <= txq->elts_s); 1966 mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], 1967 part, olx); 1968 txq->elts_tail += part; 1969 n_elts -= part; 1970 } while (n_elts); 1971 } 1972 1973 /** 1974 * Store the mbuf being sent into elts ring buffer. 1975 * On Tx completion these mbufs will be freed. 1976 * 1977 * @param txq 1978 * Pointer to Tx queue structure. 1979 * @param pkts 1980 * Pointer to array of packets to be stored. 1981 * @param pkts_n 1982 * Number of packets to be stored. 1983 * @param olx 1984 * Configured Tx offloads mask. It is fully defined at 1985 * compile time and may be used for optimization. 1986 */ 1987 static __rte_always_inline void 1988 mlx5_tx_copy_elts(struct mlx5_txq_data *restrict txq, 1989 struct rte_mbuf **restrict pkts, 1990 unsigned int pkts_n, 1991 unsigned int olx __rte_unused) 1992 { 1993 unsigned int part; 1994 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 1995 1996 assert(pkts); 1997 assert(pkts_n); 1998 part = txq->elts_s - (txq->elts_head & txq->elts_m); 1999 assert(part); 2000 assert(part <= txq->elts_s); 2001 /* This code is a good candidate for vectorizing with SIMD. */ 2002 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 2003 (void *)pkts, 2004 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 2005 txq->elts_head += pkts_n; 2006 if (unlikely(part < pkts_n)) 2007 /* The copy is wrapping around the elts array. */ 2008 rte_memcpy((void *)elts, (void *)(pkts + part), 2009 (pkts_n - part) * sizeof(struct rte_mbuf *)); 2010 } 2011 2012 /** 2013 * Manage TX completions. This routine checks the CQ for 2014 * arrived CQEs, deduces the last accomplished WQE in SQ, 2015 * updates SQ producing index and frees all completed mbufs. 2016 * 2017 * @param txq 2018 * Pointer to TX queue structure. 2019 * @param olx 2020 * Configured Tx offloads mask. It is fully defined at 2021 * compile time and may be used for optimization. 2022 * 2023 * NOTE: not inlined intentionally, it makes tx_burst 2024 * routine smaller, simple and faster - from experiments. 2025 */ 2026 static void 2027 mlx5_tx_handle_completion(struct mlx5_txq_data *restrict txq, 2028 unsigned int olx __rte_unused) 2029 { 2030 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2031 bool update = false; 2032 uint16_t tail = txq->elts_tail; 2033 int ret; 2034 2035 do { 2036 volatile struct mlx5_cqe *cqe; 2037 2038 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2039 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2040 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2041 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2042 /* No new CQEs in completion queue. */ 2043 assert(ret == MLX5_CQE_STATUS_HW_OWN); 2044 break; 2045 } 2046 /* Some error occurred, try to restart. */ 2047 rte_wmb(); 2048 tail = mlx5_tx_error_cqe_handle 2049 (txq, (volatile struct mlx5_err_cqe *)cqe); 2050 if (likely(tail != txq->elts_tail)) { 2051 mlx5_tx_free_elts(txq, tail, olx); 2052 assert(tail == txq->elts_tail); 2053 } 2054 /* Allow flushing all CQEs from the queue. */ 2055 count = txq->cqe_s; 2056 } else { 2057 volatile struct mlx5_wqe_cseg *cseg; 2058 2059 /* Normal transmit completion. */ 2060 ++txq->cq_ci; 2061 rte_cio_rmb(); 2062 txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter); 2063 cseg = (volatile struct mlx5_wqe_cseg *) 2064 (txq->wqes + (txq->wqe_pi & txq->wqe_m)); 2065 tail = cseg->misc; 2066 } 2067 #ifndef NDEBUG 2068 if (txq->cq_pi) 2069 --txq->cq_pi; 2070 #endif 2071 update = true; 2072 /* 2073 * We have to restrict the amount of processed CQEs 2074 * in one tx_burst routine call. The CQ may be large 2075 * and many CQEs may be updated by the NIC in one 2076 * transaction. Buffers freeing is time consuming, 2077 * multiple iterations may introduce significant 2078 * latency. 2079 */ 2080 } while (--count); 2081 if (likely(tail != txq->elts_tail)) { 2082 /* Free data buffers from elts. */ 2083 mlx5_tx_free_elts(txq, tail, olx); 2084 assert(tail == txq->elts_tail); 2085 } 2086 if (likely(update)) { 2087 /* Update the consumer index. */ 2088 rte_compiler_barrier(); 2089 *txq->cq_db = 2090 rte_cpu_to_be_32(txq->cq_ci); 2091 } 2092 } 2093 2094 /** 2095 * Check if the completion request flag should be set in the last WQE. 2096 * Both pushed mbufs and WQEs are monitored and the completion request 2097 * flag is set if any of thresholds is reached. 2098 * 2099 * @param txq 2100 * Pointer to TX queue structure. 2101 * @param loc 2102 * Pointer to burst routine local context. 2103 * @param olx 2104 * Configured Tx offloads mask. It is fully defined at 2105 * compile time and may be used for optimization. 2106 */ 2107 static __rte_always_inline void 2108 mlx5_tx_request_completion(struct mlx5_txq_data *restrict txq, 2109 struct mlx5_txq_local *restrict loc, 2110 unsigned int olx) 2111 { 2112 uint16_t head = txq->elts_head; 2113 unsigned int part; 2114 2115 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc->pkts_sent - 2116 (MLX5_TXOFF_CONFIG(MULTI) ? loc->pkts_copy : 0); 2117 head += part; 2118 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2119 (MLX5_TXOFF_CONFIG(INLINE) && 2120 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2121 volatile struct mlx5_wqe *last = loc->wqe_last; 2122 2123 txq->elts_comp = head; 2124 if (MLX5_TXOFF_CONFIG(INLINE)) 2125 txq->wqe_comp = txq->wqe_ci; 2126 /* Request unconditional completion on last WQE. */ 2127 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2128 MLX5_COMP_MODE_OFFSET); 2129 /* Save elts_head in unused "immediate" field of WQE. */ 2130 last->cseg.misc = head; 2131 /* 2132 * A CQE slot must always be available. Count the 2133 * issued CEQ "always" request instead of production 2134 * index due to here can be CQE with errors and 2135 * difference with ci may become inconsistent. 2136 */ 2137 assert(txq->cqe_s > ++txq->cq_pi); 2138 } 2139 } 2140 2141 /** 2142 * DPDK callback to check the status of a tx descriptor. 2143 * 2144 * @param tx_queue 2145 * The tx queue. 2146 * @param[in] offset 2147 * The index of the descriptor in the ring. 2148 * 2149 * @return 2150 * The status of the tx descriptor. 2151 */ 2152 int 2153 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2154 { 2155 struct mlx5_txq_data *restrict txq = tx_queue; 2156 uint16_t used; 2157 2158 mlx5_tx_handle_completion(txq, 0); 2159 used = txq->elts_head - txq->elts_tail; 2160 if (offset < used) 2161 return RTE_ETH_TX_DESC_FULL; 2162 return RTE_ETH_TX_DESC_DONE; 2163 } 2164 2165 /** 2166 * Build the Control Segment with specified opcode: 2167 * - MLX5_OPCODE_SEND 2168 * - MLX5_OPCODE_ENHANCED_MPSW 2169 * - MLX5_OPCODE_TSO 2170 * 2171 * @param txq 2172 * Pointer to TX queue structure. 2173 * @param loc 2174 * Pointer to burst routine local context. 2175 * @param wqe 2176 * Pointer to WQE to fill with built Control Segment. 2177 * @param ds 2178 * Supposed length of WQE in segments. 2179 * @param opcode 2180 * SQ WQE opcode to put into Control Segment. 2181 * @param olx 2182 * Configured Tx offloads mask. It is fully defined at 2183 * compile time and may be used for optimization. 2184 */ 2185 static __rte_always_inline void 2186 mlx5_tx_cseg_init(struct mlx5_txq_data *restrict txq, 2187 struct mlx5_txq_local *restrict loc __rte_unused, 2188 struct mlx5_wqe *restrict wqe, 2189 unsigned int ds, 2190 unsigned int opcode, 2191 unsigned int olx __rte_unused) 2192 { 2193 struct mlx5_wqe_cseg *restrict cs = &wqe->cseg; 2194 2195 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2196 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2197 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2198 MLX5_COMP_MODE_OFFSET); 2199 cs->misc = RTE_BE32(0); 2200 } 2201 2202 /** 2203 * Build the Ethernet Segment without inlined data. 2204 * Supports Software Parser, Checksums and VLAN 2205 * insertion Tx offload features. 2206 * 2207 * @param txq 2208 * Pointer to TX queue structure. 2209 * @param loc 2210 * Pointer to burst routine local context. 2211 * @param wqe 2212 * Pointer to WQE to fill with built Ethernet Segment. 2213 * @param olx 2214 * Configured Tx offloads mask. It is fully defined at 2215 * compile time and may be used for optimization. 2216 */ 2217 static __rte_always_inline void 2218 mlx5_tx_eseg_none(struct mlx5_txq_data *restrict txq __rte_unused, 2219 struct mlx5_txq_local *restrict loc, 2220 struct mlx5_wqe *restrict wqe, 2221 unsigned int olx) 2222 { 2223 struct mlx5_wqe_eseg *restrict es = &wqe->eseg; 2224 uint32_t csum; 2225 2226 /* 2227 * Calculate and set check sum flags first, dword field 2228 * in segment may be shared with Software Parser flags. 2229 */ 2230 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2231 es->flags = rte_cpu_to_le_32(csum); 2232 /* 2233 * Calculate and set Software Parser offsets and flags. 2234 * These flags a set for custom UDP and IP tunnel packets. 2235 */ 2236 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2237 /* Fill metadata field if needed. */ 2238 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2239 loc->mbuf->ol_flags & PKT_TX_METADATA ? 2240 loc->mbuf->tx_metadata : 0 : 0; 2241 /* Engage VLAN tag insertion feature if requested. */ 2242 if (MLX5_TXOFF_CONFIG(VLAN) && 2243 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2244 /* 2245 * We should get here only if device support 2246 * this feature correctly. 2247 */ 2248 assert(txq->vlan_en); 2249 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2250 loc->mbuf->vlan_tci); 2251 } else { 2252 es->inline_hdr = RTE_BE32(0); 2253 } 2254 } 2255 2256 /** 2257 * Build the Ethernet Segment with minimal inlined data 2258 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2259 * used to fill the gap in single WQEBB WQEs. 2260 * Supports Software Parser, Checksums and VLAN 2261 * insertion Tx offload features. 2262 * 2263 * @param txq 2264 * Pointer to TX queue structure. 2265 * @param loc 2266 * Pointer to burst routine local context. 2267 * @param wqe 2268 * Pointer to WQE to fill with built Ethernet Segment. 2269 * @param vlan 2270 * Length of VLAN tag insertion if any. 2271 * @param olx 2272 * Configured Tx offloads mask. It is fully defined at 2273 * compile time and may be used for optimization. 2274 */ 2275 static __rte_always_inline void 2276 mlx5_tx_eseg_dmin(struct mlx5_txq_data *restrict txq __rte_unused, 2277 struct mlx5_txq_local *restrict loc, 2278 struct mlx5_wqe *restrict wqe, 2279 unsigned int vlan, 2280 unsigned int olx) 2281 { 2282 struct mlx5_wqe_eseg *restrict es = &wqe->eseg; 2283 uint32_t csum; 2284 uint8_t *psrc, *pdst; 2285 2286 /* 2287 * Calculate and set check sum flags first, dword field 2288 * in segment may be shared with Software Parser flags. 2289 */ 2290 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2291 es->flags = rte_cpu_to_le_32(csum); 2292 /* 2293 * Calculate and set Software Parser offsets and flags. 2294 * These flags a set for custom UDP and IP tunnel packets. 2295 */ 2296 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2297 /* Fill metadata field if needed. */ 2298 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2299 loc->mbuf->ol_flags & PKT_TX_METADATA ? 2300 loc->mbuf->tx_metadata : 0 : 0; 2301 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2302 (sizeof(uint16_t) + 2303 sizeof(rte_v128u32_t)), 2304 "invalid Ethernet Segment data size"); 2305 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2306 (sizeof(uint16_t) + 2307 sizeof(struct rte_vlan_hdr) + 2308 2 * RTE_ETHER_ADDR_LEN), 2309 "invalid Ethernet Segment data size"); 2310 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2311 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2312 es->inline_data = *(unaligned_uint16_t *)psrc; 2313 psrc += sizeof(uint16_t); 2314 pdst = (uint8_t *)(es + 1); 2315 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2316 /* Implement VLAN tag insertion as part inline data. */ 2317 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2318 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2319 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2320 /* Insert VLAN ethertype + VLAN tag. */ 2321 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2322 ((RTE_ETHER_TYPE_VLAN << 16) | 2323 loc->mbuf->vlan_tci); 2324 pdst += sizeof(struct rte_vlan_hdr); 2325 /* Copy the rest two bytes from packet data. */ 2326 assert(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2327 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2328 } else { 2329 /* Fill the gap in the title WQEBB with inline data. */ 2330 rte_mov16(pdst, psrc); 2331 } 2332 } 2333 2334 /** 2335 * Build the Ethernet Segment with entire packet 2336 * data inlining. Checks the boundary of WQEBB and 2337 * ring buffer wrapping, supports Software Parser, 2338 * Checksums and VLAN insertion Tx offload features. 2339 * 2340 * @param txq 2341 * Pointer to TX queue structure. 2342 * @param loc 2343 * Pointer to burst routine local context. 2344 * @param wqe 2345 * Pointer to WQE to fill with built Ethernet Segment. 2346 * @param vlan 2347 * Length of VLAN tag insertion if any. 2348 * @param inlen 2349 * Length of data to inline (VLAN included, if any). 2350 * @param tso 2351 * TSO flag, set mss field from the packet. 2352 * @param olx 2353 * Configured Tx offloads mask. It is fully defined at 2354 * compile time and may be used for optimization. 2355 * 2356 * @return 2357 * Pointer to the next Data Segment (aligned and wrapped around). 2358 */ 2359 static __rte_always_inline struct mlx5_wqe_dseg * 2360 mlx5_tx_eseg_data(struct mlx5_txq_data *restrict txq, 2361 struct mlx5_txq_local *restrict loc, 2362 struct mlx5_wqe *restrict wqe, 2363 unsigned int vlan, 2364 unsigned int inlen, 2365 unsigned int tso, 2366 unsigned int olx) 2367 { 2368 struct mlx5_wqe_eseg *restrict es = &wqe->eseg; 2369 uint32_t csum; 2370 uint8_t *psrc, *pdst; 2371 unsigned int part; 2372 2373 /* 2374 * Calculate and set check sum flags first, dword field 2375 * in segment may be shared with Software Parser flags. 2376 */ 2377 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2378 if (tso) { 2379 csum <<= 24; 2380 csum |= loc->mbuf->tso_segsz; 2381 es->flags = rte_cpu_to_be_32(csum); 2382 } else { 2383 es->flags = rte_cpu_to_le_32(csum); 2384 } 2385 /* 2386 * Calculate and set Software Parser offsets and flags. 2387 * These flags a set for custom UDP and IP tunnel packets. 2388 */ 2389 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2390 /* Fill metadata field if needed. */ 2391 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2392 loc->mbuf->ol_flags & PKT_TX_METADATA ? 2393 loc->mbuf->tx_metadata : 0 : 0; 2394 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2395 (sizeof(uint16_t) + 2396 sizeof(rte_v128u32_t)), 2397 "invalid Ethernet Segment data size"); 2398 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2399 (sizeof(uint16_t) + 2400 sizeof(struct rte_vlan_hdr) + 2401 2 * RTE_ETHER_ADDR_LEN), 2402 "invalid Ethernet Segment data size"); 2403 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2404 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2405 es->inline_data = *(unaligned_uint16_t *)psrc; 2406 psrc += sizeof(uint16_t); 2407 pdst = (uint8_t *)(es + 1); 2408 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2409 /* Implement VLAN tag insertion as part inline data. */ 2410 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2411 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2412 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2413 /* Insert VLAN ethertype + VLAN tag. */ 2414 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2415 ((RTE_ETHER_TYPE_VLAN << 16) | 2416 loc->mbuf->vlan_tci); 2417 pdst += sizeof(struct rte_vlan_hdr); 2418 /* Copy the rest two bytes from packet data. */ 2419 assert(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2420 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2421 psrc += sizeof(uint16_t); 2422 } else { 2423 /* Fill the gap in the title WQEBB with inline data. */ 2424 rte_mov16(pdst, psrc); 2425 psrc += sizeof(rte_v128u32_t); 2426 } 2427 pdst = (uint8_t *)(es + 2); 2428 assert(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2429 assert(pdst < (uint8_t *)txq->wqes_end); 2430 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2431 if (!inlen) { 2432 assert(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2433 return (struct mlx5_wqe_dseg *)pdst; 2434 } 2435 /* 2436 * The WQEBB space availability is checked by caller. 2437 * Here we should be aware of WQE ring buffer wraparound only. 2438 */ 2439 part = (uint8_t *)txq->wqes_end - pdst; 2440 part = RTE_MIN(part, inlen); 2441 do { 2442 rte_memcpy(pdst, psrc, part); 2443 inlen -= part; 2444 if (likely(!inlen)) { 2445 /* 2446 * If return value is not used by the caller 2447 * the code below will be optimized out. 2448 */ 2449 pdst += part; 2450 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2451 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2452 pdst = (uint8_t *)txq->wqes; 2453 return (struct mlx5_wqe_dseg *)pdst; 2454 } 2455 pdst = (uint8_t *)txq->wqes; 2456 psrc += part; 2457 part = inlen; 2458 } while (true); 2459 } 2460 2461 /** 2462 * Copy data from chain of mbuf to the specified linear buffer. 2463 * Checksums and VLAN insertion Tx offload features. If data 2464 * from some mbuf copied completely this mbuf is freed. Local 2465 * structure is used to keep the byte stream state. 2466 * 2467 * @param pdst 2468 * Pointer to the destination linear buffer. 2469 * @param loc 2470 * Pointer to burst routine local context. 2471 * @param len 2472 * Length of data to be copied. 2473 * @param olx 2474 * Configured Tx offloads mask. It is fully defined at 2475 * compile time and may be used for optimization. 2476 */ 2477 static __rte_always_inline void 2478 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2479 struct mlx5_txq_local *restrict loc, 2480 unsigned int len, 2481 unsigned int olx __rte_unused) 2482 { 2483 struct rte_mbuf *mbuf; 2484 unsigned int part, dlen; 2485 uint8_t *psrc; 2486 2487 assert(len); 2488 do { 2489 /* Allow zero length packets, must check first. */ 2490 dlen = rte_pktmbuf_data_len(loc->mbuf); 2491 if (dlen <= loc->mbuf_off) { 2492 /* Exhausted packet, just free. */ 2493 mbuf = loc->mbuf; 2494 loc->mbuf = mbuf->next; 2495 rte_pktmbuf_free_seg(mbuf); 2496 loc->mbuf_off = 0; 2497 assert(loc->mbuf_nseg > 1); 2498 assert(loc->mbuf); 2499 --loc->mbuf_nseg; 2500 continue; 2501 } 2502 dlen -= loc->mbuf_off; 2503 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2504 loc->mbuf_off); 2505 part = RTE_MIN(len, dlen); 2506 rte_memcpy(pdst, psrc, part); 2507 loc->mbuf_off += part; 2508 len -= part; 2509 if (!len) { 2510 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2511 loc->mbuf_off = 0; 2512 /* Exhausted packet, just free. */ 2513 mbuf = loc->mbuf; 2514 loc->mbuf = mbuf->next; 2515 rte_pktmbuf_free_seg(mbuf); 2516 loc->mbuf_off = 0; 2517 assert(loc->mbuf_nseg >= 1); 2518 --loc->mbuf_nseg; 2519 } 2520 return; 2521 } 2522 pdst += part; 2523 } while (true); 2524 } 2525 2526 /** 2527 * Build the Ethernet Segment with inlined data from 2528 * multi-segment packet. Checks the boundary of WQEBB 2529 * and ring buffer wrapping, supports Software Parser, 2530 * Checksums and VLAN insertion Tx offload features. 2531 * 2532 * @param txq 2533 * Pointer to TX queue structure. 2534 * @param loc 2535 * Pointer to burst routine local context. 2536 * @param wqe 2537 * Pointer to WQE to fill with built Ethernet Segment. 2538 * @param vlan 2539 * Length of VLAN tag insertion if any. 2540 * @param inlen 2541 * Length of data to inline (VLAN included, if any). 2542 * @param tso 2543 * TSO flag, set mss field from the packet. 2544 * @param olx 2545 * Configured Tx offloads mask. It is fully defined at 2546 * compile time and may be used for optimization. 2547 * 2548 * @return 2549 * Pointer to the next Data Segment (aligned and 2550 * possible NOT wrapped around - caller should do 2551 * wrapping check on its own). 2552 */ 2553 static __rte_always_inline struct mlx5_wqe_dseg * 2554 mlx5_tx_eseg_mdat(struct mlx5_txq_data *restrict txq, 2555 struct mlx5_txq_local *restrict loc, 2556 struct mlx5_wqe *restrict wqe, 2557 unsigned int vlan, 2558 unsigned int inlen, 2559 unsigned int tso, 2560 unsigned int olx) 2561 { 2562 struct mlx5_wqe_eseg *restrict es = &wqe->eseg; 2563 uint32_t csum; 2564 uint8_t *pdst; 2565 unsigned int part; 2566 2567 /* 2568 * Calculate and set check sum flags first, uint32_t field 2569 * in segment may be shared with Software Parser flags. 2570 */ 2571 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2572 if (tso) { 2573 csum <<= 24; 2574 csum |= loc->mbuf->tso_segsz; 2575 es->flags = rte_cpu_to_be_32(csum); 2576 } else { 2577 es->flags = rte_cpu_to_le_32(csum); 2578 } 2579 /* 2580 * Calculate and set Software Parser offsets and flags. 2581 * These flags a set for custom UDP and IP tunnel packets. 2582 */ 2583 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2584 /* Fill metadata field if needed. */ 2585 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2586 loc->mbuf->ol_flags & PKT_TX_METADATA ? 2587 loc->mbuf->tx_metadata : 0 : 0; 2588 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2589 (sizeof(uint16_t) + 2590 sizeof(rte_v128u32_t)), 2591 "invalid Ethernet Segment data size"); 2592 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2593 (sizeof(uint16_t) + 2594 sizeof(struct rte_vlan_hdr) + 2595 2 * RTE_ETHER_ADDR_LEN), 2596 "invalid Ethernet Segment data size"); 2597 assert(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 2598 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2599 pdst = (uint8_t *)&es->inline_data; 2600 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2601 /* Implement VLAN tag insertion as part inline data. */ 2602 mlx5_tx_mseg_memcpy(pdst, loc, 2 * RTE_ETHER_ADDR_LEN, olx); 2603 pdst += 2 * RTE_ETHER_ADDR_LEN; 2604 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2605 ((RTE_ETHER_TYPE_VLAN << 16) | 2606 loc->mbuf->vlan_tci); 2607 pdst += sizeof(struct rte_vlan_hdr); 2608 inlen -= 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2609 } 2610 assert(pdst < (uint8_t *)txq->wqes_end); 2611 /* 2612 * The WQEBB space availability is checked by caller. 2613 * Here we should be aware of WQE ring buffer wraparound only. 2614 */ 2615 part = (uint8_t *)txq->wqes_end - pdst; 2616 part = RTE_MIN(part, inlen); 2617 assert(part); 2618 do { 2619 mlx5_tx_mseg_memcpy(pdst, loc, part, olx); 2620 inlen -= part; 2621 if (likely(!inlen)) { 2622 pdst += part; 2623 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2624 return (struct mlx5_wqe_dseg *)pdst; 2625 } 2626 pdst = (uint8_t *)txq->wqes; 2627 part = inlen; 2628 } while (true); 2629 } 2630 2631 /** 2632 * Build the Data Segment of pointer type. 2633 * 2634 * @param txq 2635 * Pointer to TX queue structure. 2636 * @param loc 2637 * Pointer to burst routine local context. 2638 * @param dseg 2639 * Pointer to WQE to fill with built Data Segment. 2640 * @param buf 2641 * Data buffer to point. 2642 * @param len 2643 * Data buffer length. 2644 * @param olx 2645 * Configured Tx offloads mask. It is fully defined at 2646 * compile time and may be used for optimization. 2647 */ 2648 static __rte_always_inline void 2649 mlx5_tx_dseg_ptr(struct mlx5_txq_data *restrict txq, 2650 struct mlx5_txq_local *restrict loc, 2651 struct mlx5_wqe_dseg *restrict dseg, 2652 uint8_t *buf, 2653 unsigned int len, 2654 unsigned int olx __rte_unused) 2655 2656 { 2657 assert(len); 2658 dseg->bcount = rte_cpu_to_be_32(len); 2659 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2660 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2661 } 2662 2663 /** 2664 * Build the Data Segment of pointer type or inline 2665 * if data length is less than buffer in minimal 2666 * Data Segment size. 2667 * 2668 * @param txq 2669 * Pointer to TX queue structure. 2670 * @param loc 2671 * Pointer to burst routine local context. 2672 * @param dseg 2673 * Pointer to WQE to fill with built Data Segment. 2674 * @param buf 2675 * Data buffer to point. 2676 * @param len 2677 * Data buffer length. 2678 * @param olx 2679 * Configured Tx offloads mask. It is fully defined at 2680 * compile time and may be used for optimization. 2681 */ 2682 static __rte_always_inline void 2683 mlx5_tx_dseg_iptr(struct mlx5_txq_data *restrict txq, 2684 struct mlx5_txq_local *restrict loc, 2685 struct mlx5_wqe_dseg *restrict dseg, 2686 uint8_t *buf, 2687 unsigned int len, 2688 unsigned int olx __rte_unused) 2689 2690 { 2691 uintptr_t dst, src; 2692 2693 assert(len); 2694 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2695 dseg->bcount = rte_cpu_to_be_32(len); 2696 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2697 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2698 2699 return; 2700 } 2701 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2702 /* Unrolled implementation of generic rte_memcpy. */ 2703 dst = (uintptr_t)&dseg->inline_data[0]; 2704 src = (uintptr_t)buf; 2705 #ifdef RTE_ARCH_STRICT_ALIGN 2706 memcpy(dst, src, len); 2707 #else 2708 if (len & 0x08) { 2709 *(uint64_t *)dst = *(uint64_t *)src; 2710 dst += sizeof(uint64_t); 2711 src += sizeof(uint64_t); 2712 } 2713 if (len & 0x04) { 2714 *(uint32_t *)dst = *(uint32_t *)src; 2715 dst += sizeof(uint32_t); 2716 src += sizeof(uint32_t); 2717 } 2718 if (len & 0x02) { 2719 *(uint16_t *)dst = *(uint16_t *)src; 2720 dst += sizeof(uint16_t); 2721 src += sizeof(uint16_t); 2722 } 2723 if (len & 0x01) 2724 *(uint8_t *)dst = *(uint8_t *)src; 2725 #endif 2726 } 2727 2728 /** 2729 * Build the Data Segment of inlined data from single 2730 * segment packet, no VLAN insertion. 2731 * 2732 * @param txq 2733 * Pointer to TX queue structure. 2734 * @param loc 2735 * Pointer to burst routine local context. 2736 * @param dseg 2737 * Pointer to WQE to fill with built Data Segment. 2738 * @param buf 2739 * Data buffer to point. 2740 * @param len 2741 * Data buffer length. 2742 * @param olx 2743 * Configured Tx offloads mask. It is fully defined at 2744 * compile time and may be used for optimization. 2745 * 2746 * @return 2747 * Pointer to the next Data Segment after inlined data. 2748 * Ring buffer wraparound check is needed. We do not 2749 * do it here because it may not be needed for the 2750 * last packet in the eMPW session. 2751 */ 2752 static __rte_always_inline struct mlx5_wqe_dseg * 2753 mlx5_tx_dseg_empw(struct mlx5_txq_data *restrict txq, 2754 struct mlx5_txq_local *restrict loc __rte_unused, 2755 struct mlx5_wqe_dseg *restrict dseg, 2756 uint8_t *buf, 2757 unsigned int len, 2758 unsigned int olx __rte_unused) 2759 { 2760 unsigned int part; 2761 uint8_t *pdst; 2762 2763 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2764 pdst = &dseg->inline_data[0]; 2765 /* 2766 * The WQEBB space availability is checked by caller. 2767 * Here we should be aware of WQE ring buffer wraparound only. 2768 */ 2769 part = (uint8_t *)txq->wqes_end - pdst; 2770 part = RTE_MIN(part, len); 2771 do { 2772 rte_memcpy(pdst, buf, part); 2773 len -= part; 2774 if (likely(!len)) { 2775 pdst += part; 2776 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2777 /* Note: no final wraparound check here. */ 2778 return (struct mlx5_wqe_dseg *)pdst; 2779 } 2780 pdst = (uint8_t *)txq->wqes; 2781 buf += part; 2782 part = len; 2783 } while (true); 2784 } 2785 2786 /** 2787 * Build the Data Segment of inlined data from single 2788 * segment packet with VLAN insertion. 2789 * 2790 * @param txq 2791 * Pointer to TX queue structure. 2792 * @param loc 2793 * Pointer to burst routine local context. 2794 * @param dseg 2795 * Pointer to the dseg fill with built Data Segment. 2796 * @param buf 2797 * Data buffer to point. 2798 * @param len 2799 * Data buffer length. 2800 * @param olx 2801 * Configured Tx offloads mask. It is fully defined at 2802 * compile time and may be used for optimization. 2803 * 2804 * @return 2805 * Pointer to the next Data Segment after inlined data. 2806 * Ring buffer wraparound check is needed. 2807 */ 2808 static __rte_always_inline struct mlx5_wqe_dseg * 2809 mlx5_tx_dseg_vlan(struct mlx5_txq_data *restrict txq, 2810 struct mlx5_txq_local *restrict loc __rte_unused, 2811 struct mlx5_wqe_dseg *restrict dseg, 2812 uint8_t *buf, 2813 unsigned int len, 2814 unsigned int olx __rte_unused) 2815 2816 { 2817 unsigned int part; 2818 uint8_t *pdst; 2819 2820 assert(len > MLX5_ESEG_MIN_INLINE_SIZE); 2821 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 2822 (2 * RTE_ETHER_ADDR_LEN), 2823 "invalid Data Segment data size"); 2824 dseg->bcount = rte_cpu_to_be_32((len + sizeof(struct rte_vlan_hdr)) | 2825 MLX5_ETH_WQE_DATA_INLINE); 2826 pdst = &dseg->inline_data[0]; 2827 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 2828 buf += MLX5_DSEG_MIN_INLINE_SIZE; 2829 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 2830 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 2831 assert(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2832 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 2833 loc->mbuf->vlan_tci); 2834 pdst += sizeof(struct rte_vlan_hdr); 2835 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2836 pdst = (uint8_t *)txq->wqes; 2837 /* 2838 * The WQEBB space availability is checked by caller. 2839 * Here we should be aware of WQE ring buffer wraparound only. 2840 */ 2841 part = (uint8_t *)txq->wqes_end - pdst; 2842 part = RTE_MIN(part, len); 2843 do { 2844 rte_memcpy(pdst, buf, part); 2845 len -= part; 2846 if (likely(!len)) { 2847 pdst += part; 2848 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2849 /* Note: no final wraparound check here. */ 2850 return (struct mlx5_wqe_dseg *)pdst; 2851 } 2852 pdst = (uint8_t *)txq->wqes; 2853 buf += part; 2854 part = len; 2855 } while (true); 2856 } 2857 2858 /** 2859 * Build the Ethernet Segment with optionally inlined data with 2860 * VLAN insertion and following Data Segments (if any) from 2861 * multi-segment packet. Used by ordinary send and TSO. 2862 * 2863 * @param txq 2864 * Pointer to TX queue structure. 2865 * @param loc 2866 * Pointer to burst routine local context. 2867 * @param wqe 2868 * Pointer to WQE to fill with built Ethernet/Data Segments. 2869 * @param vlan 2870 * Length of VLAN header to insert, 0 means no VLAN insertion. 2871 * @param inlen 2872 * Data length to inline. For TSO this parameter specifies 2873 * exact value, for ordinary send routine can be aligned by 2874 * caller to provide better WQE space saving and data buffer 2875 * start address alignment. This length includes VLAN header 2876 * being inserted. 2877 * @param tso 2878 * Zero means ordinary send, inlined data can be extended, 2879 * otherwise this is TSO, inlined data length is fixed. 2880 * @param olx 2881 * Configured Tx offloads mask. It is fully defined at 2882 * compile time and may be used for optimization. 2883 * 2884 * @return 2885 * Actual size of built WQE in segments. 2886 */ 2887 static __rte_always_inline unsigned int 2888 mlx5_tx_mseg_build(struct mlx5_txq_data *restrict txq, 2889 struct mlx5_txq_local *restrict loc, 2890 struct mlx5_wqe *restrict wqe, 2891 unsigned int vlan, 2892 unsigned int inlen, 2893 unsigned int tso, 2894 unsigned int olx __rte_unused) 2895 { 2896 struct mlx5_wqe_dseg *restrict dseg; 2897 unsigned int ds; 2898 2899 assert((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 2900 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 2901 loc->mbuf_off = 0; 2902 2903 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 2904 if (!loc->mbuf_nseg) 2905 goto dseg_done; 2906 /* 2907 * There are still some mbuf remaining, not inlined. 2908 * The first mbuf may be partially inlined and we 2909 * must process the possible non-zero data offset. 2910 */ 2911 if (loc->mbuf_off) { 2912 unsigned int dlen; 2913 uint8_t *dptr; 2914 2915 /* 2916 * Exhausted packets must be dropped before. 2917 * Non-zero offset means there are some data 2918 * remained in the packet. 2919 */ 2920 assert(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 2921 assert(rte_pktmbuf_data_len(loc->mbuf)); 2922 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2923 loc->mbuf_off); 2924 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 2925 /* 2926 * Build the pointer/minimal data Data Segment. 2927 * Do ring buffer wrapping check in advance. 2928 */ 2929 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2930 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2931 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 2932 /* Store the mbuf to be freed on completion. */ 2933 assert(loc->elts_free); 2934 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2935 --loc->elts_free; 2936 ++dseg; 2937 if (--loc->mbuf_nseg == 0) 2938 goto dseg_done; 2939 loc->mbuf = loc->mbuf->next; 2940 loc->mbuf_off = 0; 2941 } 2942 do { 2943 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 2944 struct rte_mbuf *mbuf; 2945 2946 /* Zero length segment found, just skip. */ 2947 mbuf = loc->mbuf; 2948 loc->mbuf = loc->mbuf->next; 2949 rte_pktmbuf_free_seg(mbuf); 2950 if (--loc->mbuf_nseg == 0) 2951 break; 2952 } else { 2953 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2954 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2955 mlx5_tx_dseg_iptr 2956 (txq, loc, dseg, 2957 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2958 rte_pktmbuf_data_len(loc->mbuf), olx); 2959 assert(loc->elts_free); 2960 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2961 --loc->elts_free; 2962 ++dseg; 2963 if (--loc->mbuf_nseg == 0) 2964 break; 2965 loc->mbuf = loc->mbuf->next; 2966 } 2967 } while (true); 2968 2969 dseg_done: 2970 /* Calculate actual segments used from the dseg pointer. */ 2971 if ((uintptr_t)wqe < (uintptr_t)dseg) 2972 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 2973 else 2974 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 2975 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 2976 return ds; 2977 } 2978 2979 /** 2980 * Tx one packet function for multi-segment TSO. Supports all 2981 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 2982 * sends one packet per WQE. 2983 * 2984 * This routine is responsible for storing processed mbuf 2985 * into elts ring buffer and update elts_head. 2986 * 2987 * @param txq 2988 * Pointer to TX queue structure. 2989 * @param loc 2990 * Pointer to burst routine local context. 2991 * @param olx 2992 * Configured Tx offloads mask. It is fully defined at 2993 * compile time and may be used for optimization. 2994 * 2995 * @return 2996 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2997 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2998 * Local context variables partially updated. 2999 */ 3000 static __rte_always_inline enum mlx5_txcmp_code 3001 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *restrict txq, 3002 struct mlx5_txq_local *restrict loc, 3003 unsigned int olx) 3004 { 3005 struct mlx5_wqe *restrict wqe; 3006 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3007 3008 /* 3009 * Calculate data length to be inlined to estimate 3010 * the required space in WQE ring buffer. 3011 */ 3012 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3013 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3014 vlan = sizeof(struct rte_vlan_hdr); 3015 inlen = loc->mbuf->l2_len + vlan + 3016 loc->mbuf->l3_len + loc->mbuf->l4_len; 3017 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3018 return MLX5_TXCMP_CODE_ERROR; 3019 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3020 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3021 /* Packet must contain all TSO headers. */ 3022 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3023 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3024 inlen > (dlen + vlan))) 3025 return MLX5_TXCMP_CODE_ERROR; 3026 assert(inlen >= txq->inlen_mode); 3027 /* 3028 * Check whether there are enough free WQEBBs: 3029 * - Control Segment 3030 * - Ethernet Segment 3031 * - First Segment of inlined Ethernet data 3032 * - ... data continued ... 3033 * - Data Segments of pointer/min inline type 3034 */ 3035 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3036 MLX5_ESEG_MIN_INLINE_SIZE + 3037 MLX5_WSEG_SIZE + 3038 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3039 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3040 return MLX5_TXCMP_CODE_EXIT; 3041 /* Check for maximal WQE size. */ 3042 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3043 return MLX5_TXCMP_CODE_ERROR; 3044 #ifdef MLX5_PMD_SOFT_COUNTERS 3045 /* Update sent data bytes/packets counters. */ 3046 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3047 loc->mbuf->tso_segsz; 3048 /* 3049 * One will be added for mbuf itself 3050 * at the end of the mlx5_tx_burst from 3051 * loc->pkts_sent field. 3052 */ 3053 --ntcp; 3054 txq->stats.opackets += ntcp; 3055 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3056 #endif 3057 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3058 loc->wqe_last = wqe; 3059 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3060 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3061 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3062 txq->wqe_ci += (ds + 3) / 4; 3063 loc->wqe_free -= (ds + 3) / 4; 3064 /* Request CQE generation if limits are reached. */ 3065 mlx5_tx_request_completion(txq, loc, olx); 3066 return MLX5_TXCMP_CODE_MULTI; 3067 } 3068 3069 /** 3070 * Tx one packet function for multi-segment SEND. Supports all 3071 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3072 * sends one packet per WQE, without any data inlining in 3073 * Ethernet Segment. 3074 * 3075 * This routine is responsible for storing processed mbuf 3076 * into elts ring buffer and update elts_head. 3077 * 3078 * @param txq 3079 * Pointer to TX queue structure. 3080 * @param loc 3081 * Pointer to burst routine local context. 3082 * @param olx 3083 * Configured Tx offloads mask. It is fully defined at 3084 * compile time and may be used for optimization. 3085 * 3086 * @return 3087 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3088 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3089 * Local context variables partially updated. 3090 */ 3091 static __rte_always_inline enum mlx5_txcmp_code 3092 mlx5_tx_packet_multi_send(struct mlx5_txq_data *restrict txq, 3093 struct mlx5_txq_local *restrict loc, 3094 unsigned int olx) 3095 { 3096 struct mlx5_wqe_dseg *restrict dseg; 3097 struct mlx5_wqe *restrict wqe; 3098 unsigned int ds, nseg; 3099 3100 assert(NB_SEGS(loc->mbuf) > 1); 3101 /* 3102 * No inline at all, it means the CPU cycles saving 3103 * is prioritized at configuration, we should not 3104 * copy any packet data to WQE. 3105 */ 3106 nseg = NB_SEGS(loc->mbuf); 3107 ds = 2 + nseg; 3108 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3109 return MLX5_TXCMP_CODE_EXIT; 3110 /* Check for maximal WQE size. */ 3111 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3112 return MLX5_TXCMP_CODE_ERROR; 3113 /* 3114 * Some Tx offloads may cause an error if 3115 * packet is not long enough, check against 3116 * assumed minimal length. 3117 */ 3118 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3119 return MLX5_TXCMP_CODE_ERROR; 3120 #ifdef MLX5_PMD_SOFT_COUNTERS 3121 /* Update sent data bytes counter. */ 3122 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3123 if (MLX5_TXOFF_CONFIG(VLAN) && 3124 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3125 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3126 #endif 3127 /* 3128 * SEND WQE, one WQEBB: 3129 * - Control Segment, SEND opcode 3130 * - Ethernet Segment, optional VLAN, no inline 3131 * - Data Segments, pointer only type 3132 */ 3133 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3134 loc->wqe_last = wqe; 3135 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3136 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3137 dseg = &wqe->dseg[0]; 3138 do { 3139 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3140 struct rte_mbuf *mbuf; 3141 3142 /* 3143 * Zero length segment found, have to 3144 * correct total size of WQE in segments. 3145 * It is supposed to be rare occasion, so 3146 * in normal case (no zero length segments) 3147 * we avoid extra writing to the Control 3148 * Segment. 3149 */ 3150 --ds; 3151 wqe->cseg.sq_ds -= RTE_BE32(1); 3152 mbuf = loc->mbuf; 3153 loc->mbuf = mbuf->next; 3154 rte_pktmbuf_free_seg(mbuf); 3155 if (--nseg == 0) 3156 break; 3157 } else { 3158 mlx5_tx_dseg_ptr 3159 (txq, loc, dseg, 3160 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3161 rte_pktmbuf_data_len(loc->mbuf), olx); 3162 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3163 --loc->elts_free; 3164 if (--nseg == 0) 3165 break; 3166 ++dseg; 3167 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3168 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3169 loc->mbuf = loc->mbuf->next; 3170 } 3171 } while (true); 3172 txq->wqe_ci += (ds + 3) / 4; 3173 loc->wqe_free -= (ds + 3) / 4; 3174 /* Request CQE generation if limits are reached. */ 3175 mlx5_tx_request_completion(txq, loc, olx); 3176 return MLX5_TXCMP_CODE_MULTI; 3177 } 3178 3179 /** 3180 * Tx one packet function for multi-segment SEND. Supports all 3181 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3182 * sends one packet per WQE, with data inlining in 3183 * Ethernet Segment and minimal Data Segments. 3184 * 3185 * This routine is responsible for storing processed mbuf 3186 * into elts ring buffer and update elts_head. 3187 * 3188 * @param txq 3189 * Pointer to TX queue structure. 3190 * @param loc 3191 * Pointer to burst routine local context. 3192 * @param olx 3193 * Configured Tx offloads mask. It is fully defined at 3194 * compile time and may be used for optimization. 3195 * 3196 * @return 3197 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3198 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3199 * Local context variables partially updated. 3200 */ 3201 static __rte_always_inline enum mlx5_txcmp_code 3202 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *restrict txq, 3203 struct mlx5_txq_local *restrict loc, 3204 unsigned int olx) 3205 { 3206 struct mlx5_wqe *restrict wqe; 3207 unsigned int ds, inlen, dlen, vlan = 0; 3208 3209 assert(MLX5_TXOFF_CONFIG(INLINE)); 3210 assert(NB_SEGS(loc->mbuf) > 1); 3211 /* 3212 * First calculate data length to be inlined 3213 * to estimate the required space for WQE. 3214 */ 3215 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3216 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3217 vlan = sizeof(struct rte_vlan_hdr); 3218 inlen = dlen + vlan; 3219 /* Check against minimal length. */ 3220 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3221 return MLX5_TXCMP_CODE_ERROR; 3222 assert(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3223 if (inlen > txq->inlen_send) { 3224 struct rte_mbuf *mbuf; 3225 unsigned int nxlen; 3226 uintptr_t start; 3227 3228 /* 3229 * Packet length exceeds the allowed inline 3230 * data length, check whether the minimal 3231 * inlining is required. 3232 */ 3233 if (txq->inlen_mode) { 3234 assert(txq->inlen_mode >= MLX5_ESEG_MIN_INLINE_SIZE); 3235 assert(txq->inlen_mode <= txq->inlen_send); 3236 inlen = txq->inlen_mode; 3237 } else { 3238 if (!vlan || txq->vlan_en) { 3239 /* 3240 * VLAN insertion will be done inside by HW. 3241 * It is not utmost effective - VLAN flag is 3242 * checked twice, but we should proceed the 3243 * inlining length correctly and take into 3244 * account the VLAN header being inserted. 3245 */ 3246 return mlx5_tx_packet_multi_send 3247 (txq, loc, olx); 3248 } 3249 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3250 } 3251 /* 3252 * Now we know the minimal amount of data is requested 3253 * to inline. Check whether we should inline the buffers 3254 * from the chain beginning to eliminate some mbufs. 3255 */ 3256 mbuf = loc->mbuf; 3257 nxlen = rte_pktmbuf_data_len(mbuf); 3258 if (unlikely(nxlen <= txq->inlen_send)) { 3259 /* We can inline first mbuf at least. */ 3260 if (nxlen < inlen) { 3261 unsigned int smlen; 3262 3263 /* Scan mbufs till inlen filled. */ 3264 do { 3265 smlen = nxlen; 3266 mbuf = NEXT(mbuf); 3267 assert(mbuf); 3268 nxlen = rte_pktmbuf_data_len(mbuf); 3269 nxlen += smlen; 3270 } while (unlikely(nxlen < inlen)); 3271 if (unlikely(nxlen > txq->inlen_send)) { 3272 /* We cannot inline entire mbuf. */ 3273 smlen = inlen - smlen; 3274 start = rte_pktmbuf_mtod_offset 3275 (mbuf, uintptr_t, smlen); 3276 goto do_align; 3277 } 3278 } 3279 do { 3280 inlen = nxlen; 3281 mbuf = NEXT(mbuf); 3282 /* There should be not end of packet. */ 3283 assert(mbuf); 3284 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3285 } while (unlikely(nxlen < txq->inlen_send)); 3286 } 3287 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3288 /* 3289 * Check whether we can do inline to align start 3290 * address of data buffer to cacheline. 3291 */ 3292 do_align: 3293 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3294 if (unlikely(start)) { 3295 start += inlen; 3296 if (start <= txq->inlen_send) 3297 inlen = start; 3298 } 3299 } 3300 /* 3301 * Check whether there are enough free WQEBBs: 3302 * - Control Segment 3303 * - Ethernet Segment 3304 * - First Segment of inlined Ethernet data 3305 * - ... data continued ... 3306 * - Data Segments of pointer/min inline type 3307 * 3308 * Estimate the number of Data Segments conservatively, 3309 * supposing no any mbufs is being freed during inlining. 3310 */ 3311 assert(inlen <= txq->inlen_send); 3312 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3313 MLX5_ESEG_MIN_INLINE_SIZE + 3314 MLX5_WSEG_SIZE + 3315 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3316 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3317 return MLX5_TXCMP_CODE_EXIT; 3318 /* Check for maximal WQE size. */ 3319 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3320 return MLX5_TXCMP_CODE_ERROR; 3321 #ifdef MLX5_PMD_SOFT_COUNTERS 3322 /* Update sent data bytes/packets counters. */ 3323 txq->stats.obytes += dlen + vlan; 3324 #endif 3325 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3326 loc->wqe_last = wqe; 3327 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3328 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3329 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3330 txq->wqe_ci += (ds + 3) / 4; 3331 loc->wqe_free -= (ds + 3) / 4; 3332 /* Request CQE generation if limits are reached. */ 3333 mlx5_tx_request_completion(txq, loc, olx); 3334 return MLX5_TXCMP_CODE_MULTI; 3335 } 3336 3337 /** 3338 * Tx burst function for multi-segment packets. Supports all 3339 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3340 * sends one packet per WQE. Function stops sending if it 3341 * encounters the single-segment packet. 3342 * 3343 * This routine is responsible for storing processed mbuf 3344 * into elts ring buffer and update elts_head. 3345 * 3346 * @param txq 3347 * Pointer to TX queue structure. 3348 * @param[in] pkts 3349 * Packets to transmit. 3350 * @param pkts_n 3351 * Number of packets in array. 3352 * @param loc 3353 * Pointer to burst routine local context. 3354 * @param olx 3355 * Configured Tx offloads mask. It is fully defined at 3356 * compile time and may be used for optimization. 3357 * 3358 * @return 3359 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3360 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3361 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3362 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3363 * Local context variables updated. 3364 */ 3365 static __rte_always_inline enum mlx5_txcmp_code 3366 mlx5_tx_burst_mseg(struct mlx5_txq_data *restrict txq, 3367 struct rte_mbuf **restrict pkts, 3368 unsigned int pkts_n, 3369 struct mlx5_txq_local *restrict loc, 3370 unsigned int olx) 3371 { 3372 assert(loc->elts_free && loc->wqe_free); 3373 assert(pkts_n > loc->pkts_sent); 3374 pkts += loc->pkts_sent + 1; 3375 pkts_n -= loc->pkts_sent; 3376 for (;;) { 3377 enum mlx5_txcmp_code ret; 3378 3379 assert(NB_SEGS(loc->mbuf) > 1); 3380 /* 3381 * Estimate the number of free elts quickly but 3382 * conservatively. Some segment may be fully inlined 3383 * and freed, ignore this here - precise estimation 3384 * is costly. 3385 */ 3386 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3387 return MLX5_TXCMP_CODE_EXIT; 3388 if (MLX5_TXOFF_CONFIG(TSO) && 3389 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3390 /* Proceed with multi-segment TSO. */ 3391 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3392 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3393 /* Proceed with multi-segment SEND with inlining. */ 3394 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3395 } else { 3396 /* Proceed with multi-segment SEND w/o inlining. */ 3397 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3398 } 3399 if (ret == MLX5_TXCMP_CODE_EXIT) 3400 return MLX5_TXCMP_CODE_EXIT; 3401 if (ret == MLX5_TXCMP_CODE_ERROR) 3402 return MLX5_TXCMP_CODE_ERROR; 3403 /* WQE is built, go to the next packet. */ 3404 ++loc->pkts_sent; 3405 --pkts_n; 3406 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3407 return MLX5_TXCMP_CODE_EXIT; 3408 loc->mbuf = *pkts++; 3409 if (pkts_n > 1) 3410 rte_prefetch0(*pkts); 3411 if (likely(NB_SEGS(loc->mbuf) > 1)) 3412 continue; 3413 /* Here ends the series of multi-segment packets. */ 3414 if (MLX5_TXOFF_CONFIG(TSO) && 3415 unlikely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3416 return MLX5_TXCMP_CODE_TSO; 3417 return MLX5_TXCMP_CODE_SINGLE; 3418 } 3419 assert(false); 3420 } 3421 3422 /** 3423 * Tx burst function for single-segment packets with TSO. 3424 * Supports all types of Tx offloads, except multi-packets. 3425 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3426 * Function stops sending if it encounters the multi-segment 3427 * packet or packet without TSO requested. 3428 * 3429 * The routine is responsible for storing processed mbuf 3430 * into elts ring buffer and update elts_head if inline 3431 * offloads is requested due to possible early freeing 3432 * of the inlined mbufs (can not store pkts array in elts 3433 * as a batch). 3434 * 3435 * @param txq 3436 * Pointer to TX queue structure. 3437 * @param[in] pkts 3438 * Packets to transmit. 3439 * @param pkts_n 3440 * Number of packets in array. 3441 * @param loc 3442 * Pointer to burst routine local context. 3443 * @param olx 3444 * Configured Tx offloads mask. It is fully defined at 3445 * compile time and may be used for optimization. 3446 * 3447 * @return 3448 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3449 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3450 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3451 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3452 * Local context variables updated. 3453 */ 3454 static __rte_always_inline enum mlx5_txcmp_code 3455 mlx5_tx_burst_tso(struct mlx5_txq_data *restrict txq, 3456 struct rte_mbuf **restrict pkts, 3457 unsigned int pkts_n, 3458 struct mlx5_txq_local *restrict loc, 3459 unsigned int olx) 3460 { 3461 assert(loc->elts_free && loc->wqe_free); 3462 assert(pkts_n > loc->pkts_sent); 3463 pkts += loc->pkts_sent + 1; 3464 pkts_n -= loc->pkts_sent; 3465 for (;;) { 3466 struct mlx5_wqe_dseg *restrict dseg; 3467 struct mlx5_wqe *restrict wqe; 3468 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3469 uint8_t *dptr; 3470 3471 assert(NB_SEGS(loc->mbuf) == 1); 3472 dlen = rte_pktmbuf_data_len(loc->mbuf); 3473 if (MLX5_TXOFF_CONFIG(VLAN) && 3474 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3475 vlan = sizeof(struct rte_vlan_hdr); 3476 } 3477 /* 3478 * First calculate the WQE size to check 3479 * whether we have enough space in ring buffer. 3480 */ 3481 hlen = loc->mbuf->l2_len + vlan + 3482 loc->mbuf->l3_len + loc->mbuf->l4_len; 3483 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3484 return MLX5_TXCMP_CODE_ERROR; 3485 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3486 hlen += loc->mbuf->outer_l2_len + 3487 loc->mbuf->outer_l3_len; 3488 /* Segment must contain all TSO headers. */ 3489 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3490 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3491 hlen > (dlen + vlan))) 3492 return MLX5_TXCMP_CODE_ERROR; 3493 /* 3494 * Check whether there are enough free WQEBBs: 3495 * - Control Segment 3496 * - Ethernet Segment 3497 * - First Segment of inlined Ethernet data 3498 * - ... data continued ... 3499 * - Finishing Data Segment of pointer type 3500 */ 3501 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3502 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3503 if (loc->wqe_free < ((ds + 3) / 4)) 3504 return MLX5_TXCMP_CODE_EXIT; 3505 #ifdef MLX5_PMD_SOFT_COUNTERS 3506 /* Update sent data bytes/packets counters. */ 3507 ntcp = (dlen + vlan - hlen + 3508 loc->mbuf->tso_segsz - 1) / 3509 loc->mbuf->tso_segsz; 3510 /* 3511 * One will be added for mbuf itself at the end 3512 * of the mlx5_tx_burst from loc->pkts_sent field. 3513 */ 3514 --ntcp; 3515 txq->stats.opackets += ntcp; 3516 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3517 #endif 3518 /* 3519 * Build the TSO WQE: 3520 * - Control Segment 3521 * - Ethernet Segment with hlen bytes inlined 3522 * - Data Segment of pointer type 3523 */ 3524 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3525 loc->wqe_last = wqe; 3526 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3527 MLX5_OPCODE_TSO, olx); 3528 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3529 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3530 dlen -= hlen - vlan; 3531 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3532 /* 3533 * WQE is built, update the loop parameters 3534 * and go to the next packet. 3535 */ 3536 txq->wqe_ci += (ds + 3) / 4; 3537 loc->wqe_free -= (ds + 3) / 4; 3538 if (MLX5_TXOFF_CONFIG(INLINE)) 3539 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3540 --loc->elts_free; 3541 ++loc->pkts_sent; 3542 --pkts_n; 3543 /* Request CQE generation if limits are reached. */ 3544 mlx5_tx_request_completion(txq, loc, olx); 3545 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3546 return MLX5_TXCMP_CODE_EXIT; 3547 loc->mbuf = *pkts++; 3548 if (pkts_n > 1) 3549 rte_prefetch0(*pkts); 3550 if (MLX5_TXOFF_CONFIG(MULTI) && 3551 unlikely(NB_SEGS(loc->mbuf) > 1)) 3552 return MLX5_TXCMP_CODE_MULTI; 3553 if (unlikely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3554 return MLX5_TXCMP_CODE_SINGLE; 3555 /* Continue with the next TSO packet. */ 3556 } 3557 assert(false); 3558 } 3559 3560 /** 3561 * Analyze the packet and select the best method to send. 3562 * 3563 * @param txq 3564 * Pointer to TX queue structure. 3565 * @param loc 3566 * Pointer to burst routine local context. 3567 * @param olx 3568 * Configured Tx offloads mask. It is fully defined at 3569 * compile time and may be used for optimization. 3570 * @param newp 3571 * The predefined flag whether do complete check for 3572 * multi-segment packets and TSO. 3573 * 3574 * @return 3575 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3576 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3577 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3578 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3579 */ 3580 static __rte_always_inline enum mlx5_txcmp_code 3581 mlx5_tx_able_to_empw(struct mlx5_txq_data *restrict txq, 3582 struct mlx5_txq_local *restrict loc, 3583 unsigned int olx, 3584 bool newp) 3585 { 3586 /* Check for multi-segment packet. */ 3587 if (newp && 3588 MLX5_TXOFF_CONFIG(MULTI) && 3589 unlikely(NB_SEGS(loc->mbuf) > 1)) 3590 return MLX5_TXCMP_CODE_MULTI; 3591 /* Check for TSO packet. */ 3592 if (newp && 3593 MLX5_TXOFF_CONFIG(TSO) && 3594 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3595 return MLX5_TXCMP_CODE_TSO; 3596 /* Check if eMPW is enabled at all. */ 3597 if (!MLX5_TXOFF_CONFIG(EMPW)) 3598 return MLX5_TXCMP_CODE_SINGLE; 3599 /* Check if eMPW can be engaged. */ 3600 if (MLX5_TXOFF_CONFIG(VLAN) && 3601 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3602 (!MLX5_TXOFF_CONFIG(INLINE) || 3603 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3604 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3605 /* 3606 * eMPW does not support VLAN insertion offload, 3607 * we have to inline the entire packet but 3608 * packet is too long for inlining. 3609 */ 3610 return MLX5_TXCMP_CODE_SINGLE; 3611 } 3612 return MLX5_TXCMP_CODE_EMPW; 3613 } 3614 3615 /** 3616 * Check the next packet attributes to match with the eMPW batch ones. 3617 * 3618 * @param txq 3619 * Pointer to TX queue structure. 3620 * @param es 3621 * Pointer to Ethernet Segment of eMPW batch. 3622 * @param loc 3623 * Pointer to burst routine local context. 3624 * @param olx 3625 * Configured Tx offloads mask. It is fully defined at 3626 * compile time and may be used for optimization. 3627 * 3628 * @return 3629 * true - packet match with eMPW batch attributes. 3630 * false - no match, eMPW should be restarted. 3631 */ 3632 static __rte_always_inline bool 3633 mlx5_tx_match_empw(struct mlx5_txq_data *restrict txq __rte_unused, 3634 struct mlx5_wqe_eseg *restrict es, 3635 struct mlx5_txq_local *restrict loc, 3636 unsigned int olx) 3637 { 3638 uint8_t swp_flags = 0; 3639 3640 /* Compare the checksum flags, if any. */ 3641 if (MLX5_TXOFF_CONFIG(CSUM) && 3642 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 3643 return false; 3644 /* Compare the Software Parser offsets and flags. */ 3645 if (MLX5_TXOFF_CONFIG(SWP) && 3646 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 3647 es->swp_flags != swp_flags)) 3648 return false; 3649 /* Fill metadata field if needed. */ 3650 if (MLX5_TXOFF_CONFIG(METADATA) && 3651 es->metadata != (loc->mbuf->ol_flags & PKT_TX_METADATA ? 3652 loc->mbuf->tx_metadata : 0)) 3653 return false; 3654 /* There must be no VLAN packets in eMPW loop. */ 3655 if (MLX5_TXOFF_CONFIG(VLAN)) 3656 assert(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 3657 return true; 3658 } 3659 3660 /* 3661 * Update send loop variables and WQE for eMPW loop 3662 * without data inlining. Number of Data Segments is 3663 * equal to the number of sent packets. 3664 * 3665 * @param txq 3666 * Pointer to TX queue structure. 3667 * @param loc 3668 * Pointer to burst routine local context. 3669 * @param ds 3670 * Number of packets/Data Segments/Packets. 3671 * @param slen 3672 * Accumulated statistics, bytes sent 3673 * @param olx 3674 * Configured Tx offloads mask. It is fully defined at 3675 * compile time and may be used for optimization. 3676 * 3677 * @return 3678 * true - packet match with eMPW batch attributes. 3679 * false - no match, eMPW should be restarted. 3680 */ 3681 static __rte_always_inline void 3682 mlx5_tx_sdone_empw(struct mlx5_txq_data *restrict txq, 3683 struct mlx5_txq_local *restrict loc, 3684 unsigned int ds, 3685 unsigned int slen, 3686 unsigned int olx) 3687 { 3688 assert(!MLX5_TXOFF_CONFIG(INLINE)); 3689 #ifdef MLX5_PMD_SOFT_COUNTERS 3690 /* Update sent data bytes counter. */ 3691 txq->stats.obytes += slen; 3692 #else 3693 (void)slen; 3694 #endif 3695 loc->elts_free -= ds; 3696 loc->pkts_sent += ds; 3697 ds += 2; 3698 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3699 txq->wqe_ci += (ds + 3) / 4; 3700 loc->wqe_free -= (ds + 3) / 4; 3701 /* Request CQE generation if limits are reached. */ 3702 mlx5_tx_request_completion(txq, loc, olx); 3703 } 3704 3705 /* 3706 * Update send loop variables and WQE for eMPW loop 3707 * with data inlining. Gets the size of pushed descriptors 3708 * and data to the WQE. 3709 * 3710 * @param txq 3711 * Pointer to TX queue structure. 3712 * @param loc 3713 * Pointer to burst routine local context. 3714 * @param len 3715 * Total size of descriptor/data in bytes. 3716 * @param slen 3717 * Accumulated statistics, data bytes sent. 3718 * @param olx 3719 * Configured Tx offloads mask. It is fully defined at 3720 * compile time and may be used for optimization. 3721 * 3722 * @return 3723 * true - packet match with eMPW batch attributes. 3724 * false - no match, eMPW should be restarted. 3725 */ 3726 static __rte_always_inline void 3727 mlx5_tx_idone_empw(struct mlx5_txq_data *restrict txq, 3728 struct mlx5_txq_local *restrict loc, 3729 unsigned int len, 3730 unsigned int slen, 3731 unsigned int olx __rte_unused) 3732 { 3733 assert(MLX5_TXOFF_CONFIG(INLINE)); 3734 assert((len % MLX5_WSEG_SIZE) == 0); 3735 #ifdef MLX5_PMD_SOFT_COUNTERS 3736 /* Update sent data bytes counter. */ 3737 txq->stats.obytes += slen; 3738 #else 3739 (void)slen; 3740 #endif 3741 len = len / MLX5_WSEG_SIZE + 2; 3742 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 3743 txq->wqe_ci += (len + 3) / 4; 3744 loc->wqe_free -= (len + 3) / 4; 3745 /* Request CQE generation if limits are reached. */ 3746 mlx5_tx_request_completion(txq, loc, olx); 3747 } 3748 3749 /** 3750 * The set of Tx burst functions for single-segment packets 3751 * without TSO and with Multi-Packet Writing feature support. 3752 * Supports all types of Tx offloads, except multi-packets 3753 * and TSO. 3754 * 3755 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 3756 * as many packet per WQE as it can. If eMPW is not configured 3757 * or packet can not be sent with eMPW (VLAN insertion) the 3758 * ordinary SEND opcode is used and only one packet placed 3759 * in WQE. 3760 * 3761 * Functions stop sending if it encounters the multi-segment 3762 * packet or packet with TSO requested. 3763 * 3764 * The routines are responsible for storing processed mbuf 3765 * into elts ring buffer and update elts_head if inlining 3766 * offload is requested. Otherwise the copying mbufs to elts 3767 * can be postponed and completed at the end of burst routine. 3768 * 3769 * @param txq 3770 * Pointer to TX queue structure. 3771 * @param[in] pkts 3772 * Packets to transmit. 3773 * @param pkts_n 3774 * Number of packets in array. 3775 * @param loc 3776 * Pointer to burst routine local context. 3777 * @param olx 3778 * Configured Tx offloads mask. It is fully defined at 3779 * compile time and may be used for optimization. 3780 * 3781 * @return 3782 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3783 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3784 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3785 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 3786 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 3787 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 3788 * 3789 * Local context variables updated. 3790 * 3791 * 3792 * The routine sends packets with MLX5_OPCODE_EMPW 3793 * without inlining, this is dedicated optimized branch. 3794 * No VLAN insertion is supported. 3795 */ 3796 static __rte_always_inline enum mlx5_txcmp_code 3797 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *restrict txq, 3798 struct rte_mbuf **restrict pkts, 3799 unsigned int pkts_n, 3800 struct mlx5_txq_local *restrict loc, 3801 unsigned int olx) 3802 { 3803 /* 3804 * Subroutine is the part of mlx5_tx_burst_single() 3805 * and sends single-segment packet with eMPW opcode 3806 * without data inlining. 3807 */ 3808 assert(!MLX5_TXOFF_CONFIG(INLINE)); 3809 assert(MLX5_TXOFF_CONFIG(EMPW)); 3810 assert(loc->elts_free && loc->wqe_free); 3811 assert(pkts_n > loc->pkts_sent); 3812 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 3813 pkts += loc->pkts_sent + 1; 3814 pkts_n -= loc->pkts_sent; 3815 for (;;) { 3816 struct mlx5_wqe_dseg *restrict dseg; 3817 struct mlx5_wqe_eseg *restrict eseg; 3818 enum mlx5_txcmp_code ret; 3819 unsigned int part, loop; 3820 unsigned int slen = 0; 3821 3822 next_empw: 3823 part = RTE_MIN(pkts_n, MLX5_EMPW_MAX_PACKETS); 3824 if (unlikely(loc->elts_free < part)) { 3825 /* We have no enough elts to save all mbufs. */ 3826 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 3827 return MLX5_TXCMP_CODE_EXIT; 3828 /* But we still able to send at least minimal eMPW. */ 3829 part = loc->elts_free; 3830 } 3831 /* Check whether we have enough WQEs */ 3832 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 3833 if (unlikely(loc->wqe_free < 3834 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 3835 return MLX5_TXCMP_CODE_EXIT; 3836 part = (loc->wqe_free * 4) - 2; 3837 } 3838 if (likely(part > 1)) 3839 rte_prefetch0(*pkts); 3840 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3841 /* 3842 * Build eMPW title WQEBB: 3843 * - Control Segment, eMPW opcode 3844 * - Ethernet Segment, no inline 3845 */ 3846 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 3847 MLX5_OPCODE_ENHANCED_MPSW, olx); 3848 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 3849 olx & ~MLX5_TXOFF_CONFIG_VLAN); 3850 eseg = &loc->wqe_last->eseg; 3851 dseg = &loc->wqe_last->dseg[0]; 3852 loop = part; 3853 for (;;) { 3854 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 3855 #ifdef MLX5_PMD_SOFT_COUNTERS 3856 /* Update sent data bytes counter. */ 3857 slen += dlen; 3858 #endif 3859 mlx5_tx_dseg_ptr 3860 (txq, loc, dseg, 3861 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3862 dlen, olx); 3863 if (unlikely(--loop == 0)) 3864 break; 3865 loc->mbuf = *pkts++; 3866 if (likely(loop > 1)) 3867 rte_prefetch0(*pkts); 3868 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3869 /* 3870 * Unroll the completion code to avoid 3871 * returning variable value - it results in 3872 * unoptimized sequent checking in caller. 3873 */ 3874 if (ret == MLX5_TXCMP_CODE_MULTI) { 3875 part -= loop; 3876 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3877 if (unlikely(!loc->elts_free || 3878 !loc->wqe_free)) 3879 return MLX5_TXCMP_CODE_EXIT; 3880 return MLX5_TXCMP_CODE_MULTI; 3881 } 3882 if (ret == MLX5_TXCMP_CODE_TSO) { 3883 part -= loop; 3884 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3885 if (unlikely(!loc->elts_free || 3886 !loc->wqe_free)) 3887 return MLX5_TXCMP_CODE_EXIT; 3888 return MLX5_TXCMP_CODE_TSO; 3889 } 3890 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3891 part -= loop; 3892 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3893 if (unlikely(!loc->elts_free || 3894 !loc->wqe_free)) 3895 return MLX5_TXCMP_CODE_EXIT; 3896 return MLX5_TXCMP_CODE_SINGLE; 3897 } 3898 if (ret != MLX5_TXCMP_CODE_EMPW) { 3899 assert(false); 3900 part -= loop; 3901 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3902 return MLX5_TXCMP_CODE_ERROR; 3903 } 3904 /* 3905 * Check whether packet parameters coincide 3906 * within assumed eMPW batch: 3907 * - check sum settings 3908 * - metadata value 3909 * - software parser settings 3910 */ 3911 if (!mlx5_tx_match_empw(txq, eseg, loc, olx)) { 3912 assert(loop); 3913 part -= loop; 3914 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 3915 if (unlikely(!loc->elts_free || 3916 !loc->wqe_free)) 3917 return MLX5_TXCMP_CODE_EXIT; 3918 pkts_n -= part; 3919 goto next_empw; 3920 } 3921 /* Packet attributes match, continue the same eMPW. */ 3922 ++dseg; 3923 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3924 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3925 } 3926 /* eMPW is built successfully, update loop parameters. */ 3927 assert(!loop); 3928 assert(pkts_n >= part); 3929 #ifdef MLX5_PMD_SOFT_COUNTERS 3930 /* Update sent data bytes counter. */ 3931 txq->stats.obytes += slen; 3932 #endif 3933 loc->elts_free -= part; 3934 loc->pkts_sent += part; 3935 txq->wqe_ci += (2 + part + 3) / 4; 3936 loc->wqe_free -= (2 + part + 3) / 4; 3937 pkts_n -= part; 3938 /* Request CQE generation if limits are reached. */ 3939 mlx5_tx_request_completion(txq, loc, olx); 3940 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3941 return MLX5_TXCMP_CODE_EXIT; 3942 loc->mbuf = *pkts++; 3943 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3944 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 3945 return ret; 3946 /* Continue sending eMPW batches. */ 3947 } 3948 assert(false); 3949 } 3950 3951 /** 3952 * The routine sends packets with MLX5_OPCODE_EMPW 3953 * with inlining, optionally supports VLAN insertion. 3954 */ 3955 static __rte_always_inline enum mlx5_txcmp_code 3956 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *restrict txq, 3957 struct rte_mbuf **restrict pkts, 3958 unsigned int pkts_n, 3959 struct mlx5_txq_local *restrict loc, 3960 unsigned int olx) 3961 { 3962 /* 3963 * Subroutine is the part of mlx5_tx_burst_single() 3964 * and sends single-segment packet with eMPW opcode 3965 * with data inlining. 3966 */ 3967 assert(MLX5_TXOFF_CONFIG(INLINE)); 3968 assert(MLX5_TXOFF_CONFIG(EMPW)); 3969 assert(loc->elts_free && loc->wqe_free); 3970 assert(pkts_n > loc->pkts_sent); 3971 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 3972 pkts += loc->pkts_sent + 1; 3973 pkts_n -= loc->pkts_sent; 3974 for (;;) { 3975 struct mlx5_wqe_dseg *restrict dseg; 3976 struct mlx5_wqe_eseg *restrict eseg; 3977 enum mlx5_txcmp_code ret; 3978 unsigned int room, part, nlim; 3979 unsigned int slen = 0; 3980 3981 /* 3982 * Limits the amount of packets in one WQE 3983 * to improve CQE latency generation. 3984 */ 3985 nlim = RTE_MIN(pkts_n, MLX5_EMPW_MAX_PACKETS); 3986 /* Check whether we have minimal amount WQEs */ 3987 if (unlikely(loc->wqe_free < 3988 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 3989 return MLX5_TXCMP_CODE_EXIT; 3990 if (likely(pkts_n > 1)) 3991 rte_prefetch0(*pkts); 3992 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3993 /* 3994 * Build eMPW title WQEBB: 3995 * - Control Segment, eMPW opcode, zero DS 3996 * - Ethernet Segment, no inline 3997 */ 3998 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, 0, 3999 MLX5_OPCODE_ENHANCED_MPSW, olx); 4000 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4001 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4002 eseg = &loc->wqe_last->eseg; 4003 dseg = &loc->wqe_last->dseg[0]; 4004 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4005 loc->wqe_free) * MLX5_WQE_SIZE - 4006 MLX5_WQE_CSEG_SIZE - 4007 MLX5_WQE_ESEG_SIZE; 4008 /* Build WQE till we have space, packets and resources. */ 4009 part = room; 4010 for (;;) { 4011 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4012 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4013 unsigned int tlen; 4014 4015 assert(room >= MLX5_WQE_DSEG_SIZE); 4016 assert((room % MLX5_WQE_DSEG_SIZE) == 0); 4017 assert((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4018 /* 4019 * Some Tx offloads may cause an error if 4020 * packet is not long enough, check against 4021 * assumed minimal length. 4022 */ 4023 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4024 part -= room; 4025 if (unlikely(!part)) 4026 return MLX5_TXCMP_CODE_ERROR; 4027 /* 4028 * We have some successfully built 4029 * packet Data Segments to send. 4030 */ 4031 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4032 return MLX5_TXCMP_CODE_ERROR; 4033 } 4034 /* Inline or not inline - that's the Question. */ 4035 if (dlen > txq->inlen_empw) 4036 goto pointer_empw; 4037 /* Inline entire packet, optional VLAN insertion. */ 4038 tlen = sizeof(dseg->bcount) + dlen; 4039 if (MLX5_TXOFF_CONFIG(VLAN) && 4040 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4041 /* 4042 * The packet length must be checked in 4043 * mlx5_tx_able_to_empw() and packet 4044 * fits into inline length guaranteed. 4045 */ 4046 assert((dlen + sizeof(struct rte_vlan_hdr)) <= 4047 txq->inlen_empw); 4048 tlen += sizeof(struct rte_vlan_hdr); 4049 if (room < tlen) 4050 break; 4051 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4052 dptr, dlen, olx); 4053 #ifdef MLX5_PMD_SOFT_COUNTERS 4054 /* Update sent data bytes counter. */ 4055 slen += sizeof(struct rte_vlan_hdr); 4056 #endif 4057 } else { 4058 if (room < tlen) 4059 break; 4060 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4061 dptr, dlen, olx); 4062 } 4063 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4064 assert(room >= tlen); 4065 room -= tlen; 4066 /* 4067 * Packet data are completely inlined, 4068 * free the packet immediately. 4069 */ 4070 rte_pktmbuf_free_seg(loc->mbuf); 4071 goto next_mbuf; 4072 pointer_empw: 4073 /* 4074 * Not inlinable VLAN packets are 4075 * proceeded outside of this routine. 4076 */ 4077 assert(room >= MLX5_WQE_DSEG_SIZE); 4078 if (MLX5_TXOFF_CONFIG(VLAN)) 4079 assert(!(loc->mbuf->ol_flags & 4080 PKT_TX_VLAN_PKT)); 4081 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4082 /* We have to store mbuf in elts.*/ 4083 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4084 room -= MLX5_WQE_DSEG_SIZE; 4085 /* Ring buffer wraparound is checked at the loop end.*/ 4086 ++dseg; 4087 next_mbuf: 4088 #ifdef MLX5_PMD_SOFT_COUNTERS 4089 /* Update sent data bytes counter. */ 4090 slen += dlen; 4091 #endif 4092 loc->pkts_sent++; 4093 loc->elts_free--; 4094 pkts_n--; 4095 if (unlikely(!pkts_n || !loc->elts_free)) { 4096 /* 4097 * We have no resources/packets to 4098 * continue build descriptors. 4099 */ 4100 part -= room; 4101 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4102 return MLX5_TXCMP_CODE_EXIT; 4103 } 4104 loc->mbuf = *pkts++; 4105 if (likely(pkts_n > 1)) 4106 rte_prefetch0(*pkts); 4107 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4108 /* 4109 * Unroll the completion code to avoid 4110 * returning variable value - it results in 4111 * unoptimized sequent checking in caller. 4112 */ 4113 if (ret == MLX5_TXCMP_CODE_MULTI) { 4114 part -= room; 4115 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4116 if (unlikely(!loc->elts_free || 4117 !loc->wqe_free)) 4118 return MLX5_TXCMP_CODE_EXIT; 4119 return MLX5_TXCMP_CODE_MULTI; 4120 } 4121 if (ret == MLX5_TXCMP_CODE_TSO) { 4122 part -= room; 4123 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4124 if (unlikely(!loc->elts_free || 4125 !loc->wqe_free)) 4126 return MLX5_TXCMP_CODE_EXIT; 4127 return MLX5_TXCMP_CODE_TSO; 4128 } 4129 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4130 part -= room; 4131 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4132 if (unlikely(!loc->elts_free || 4133 !loc->wqe_free)) 4134 return MLX5_TXCMP_CODE_EXIT; 4135 return MLX5_TXCMP_CODE_SINGLE; 4136 } 4137 if (ret != MLX5_TXCMP_CODE_EMPW) { 4138 assert(false); 4139 part -= room; 4140 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4141 return MLX5_TXCMP_CODE_ERROR; 4142 } 4143 /* Check if we have minimal room left. */ 4144 nlim--; 4145 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4146 break; 4147 /* 4148 * Check whether packet parameters coincide 4149 * within assumed eMPW batch: 4150 * - check sum settings 4151 * - metadata value 4152 * - software parser settings 4153 */ 4154 if (!mlx5_tx_match_empw(txq, eseg, loc, olx)) 4155 break; 4156 /* Packet attributes match, continue the same eMPW. */ 4157 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4158 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4159 } 4160 /* 4161 * We get here to close an existing eMPW 4162 * session and start the new one. 4163 */ 4164 assert(pkts_n); 4165 part -= room; 4166 if (unlikely(!part)) 4167 return MLX5_TXCMP_CODE_EXIT; 4168 mlx5_tx_idone_empw(txq, loc, part, slen, olx); 4169 if (unlikely(!loc->elts_free || 4170 !loc->wqe_free)) 4171 return MLX5_TXCMP_CODE_EXIT; 4172 /* Continue the loop with new eMPW session. */ 4173 } 4174 assert(false); 4175 } 4176 4177 /** 4178 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4179 * Data inlining and VLAN insertion are supported. 4180 */ 4181 static __rte_always_inline enum mlx5_txcmp_code 4182 mlx5_tx_burst_single_send(struct mlx5_txq_data *restrict txq, 4183 struct rte_mbuf **restrict pkts, 4184 unsigned int pkts_n, 4185 struct mlx5_txq_local *restrict loc, 4186 unsigned int olx) 4187 { 4188 /* 4189 * Subroutine is the part of mlx5_tx_burst_single() 4190 * and sends single-segment packet with SEND opcode. 4191 */ 4192 assert(loc->elts_free && loc->wqe_free); 4193 assert(pkts_n > loc->pkts_sent); 4194 pkts += loc->pkts_sent + 1; 4195 pkts_n -= loc->pkts_sent; 4196 for (;;) { 4197 struct mlx5_wqe *restrict wqe; 4198 enum mlx5_txcmp_code ret; 4199 4200 assert(NB_SEGS(loc->mbuf) == 1); 4201 if (MLX5_TXOFF_CONFIG(INLINE)) { 4202 unsigned int inlen, vlan = 0; 4203 4204 inlen = rte_pktmbuf_data_len(loc->mbuf); 4205 if (MLX5_TXOFF_CONFIG(VLAN) && 4206 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4207 vlan = sizeof(struct rte_vlan_hdr); 4208 inlen += vlan; 4209 static_assert((sizeof(struct rte_vlan_hdr) + 4210 sizeof(struct rte_ether_hdr)) == 4211 MLX5_ESEG_MIN_INLINE_SIZE, 4212 "invalid min inline data size"); 4213 } 4214 /* 4215 * If inlining is enabled at configuration time 4216 * the limit must be not less than minimal size. 4217 * Otherwise we would do extra check for data 4218 * size to avoid crashes due to length overflow. 4219 */ 4220 assert(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 4221 if (inlen <= txq->inlen_send) { 4222 unsigned int seg_n, wqe_n; 4223 4224 rte_prefetch0(rte_pktmbuf_mtod 4225 (loc->mbuf, uint8_t *)); 4226 /* Check against minimal length. */ 4227 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4228 return MLX5_TXCMP_CODE_ERROR; 4229 /* 4230 * Completely inlined packet data WQE: 4231 * - Control Segment, SEND opcode 4232 * - Ethernet Segment, no VLAN insertion 4233 * - Data inlined, VLAN optionally inserted 4234 * - Alignment to MLX5_WSEG_SIZE 4235 * Have to estimate amount of WQEBBs 4236 */ 4237 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4238 MLX5_ESEG_MIN_INLINE_SIZE + 4239 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4240 /* Check if there are enough WQEBBs. */ 4241 wqe_n = (seg_n + 3) / 4; 4242 if (wqe_n > loc->wqe_free) 4243 return MLX5_TXCMP_CODE_EXIT; 4244 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4245 loc->wqe_last = wqe; 4246 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4247 MLX5_OPCODE_SEND, olx); 4248 mlx5_tx_eseg_data(txq, loc, wqe, 4249 vlan, inlen, 0, olx); 4250 txq->wqe_ci += wqe_n; 4251 loc->wqe_free -= wqe_n; 4252 /* 4253 * Packet data are completely inlined, 4254 * free the packet immediately. 4255 */ 4256 rte_pktmbuf_free_seg(loc->mbuf); 4257 } else if (!MLX5_TXOFF_CONFIG(EMPW) && 4258 txq->inlen_mode) { 4259 /* 4260 * If minimal inlining is requested the eMPW 4261 * feature should be disabled due to data is 4262 * inlined into Ethernet Segment, which can 4263 * not contain inlined data for eMPW due to 4264 * segment shared for all packets. 4265 */ 4266 struct mlx5_wqe_dseg *restrict dseg; 4267 unsigned int ds; 4268 uint8_t *dptr; 4269 4270 /* 4271 * The inline-mode settings require 4272 * to inline the specified amount of 4273 * data bytes to the Ethernet Segment. 4274 * We should check the free space in 4275 * WQE ring buffer to inline partially. 4276 */ 4277 assert(txq->inlen_send >= txq->inlen_mode); 4278 assert(inlen > txq->inlen_mode); 4279 assert(txq->inlen_mode >= 4280 MLX5_ESEG_MIN_INLINE_SIZE); 4281 /* 4282 * Check whether there are enough free WQEBBs: 4283 * - Control Segment 4284 * - Ethernet Segment 4285 * - First Segment of inlined Ethernet data 4286 * - ... data continued ... 4287 * - Finishing Data Segment of pointer type 4288 */ 4289 ds = (MLX5_WQE_CSEG_SIZE + 4290 MLX5_WQE_ESEG_SIZE + 4291 MLX5_WQE_DSEG_SIZE + 4292 txq->inlen_mode - 4293 MLX5_ESEG_MIN_INLINE_SIZE + 4294 MLX5_WQE_DSEG_SIZE + 4295 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4296 if (loc->wqe_free < ((ds + 3) / 4)) 4297 return MLX5_TXCMP_CODE_EXIT; 4298 /* 4299 * Build the ordinary SEND WQE: 4300 * - Control Segment 4301 * - Ethernet Segment, inline inlen_mode bytes 4302 * - Data Segment of pointer type 4303 */ 4304 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4305 loc->wqe_last = wqe; 4306 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4307 MLX5_OPCODE_SEND, olx); 4308 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4309 txq->inlen_mode, 4310 0, olx); 4311 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4312 txq->inlen_mode - vlan; 4313 inlen -= txq->inlen_mode; 4314 mlx5_tx_dseg_ptr(txq, loc, dseg, 4315 dptr, inlen, olx); 4316 /* 4317 * WQE is built, update the loop parameters 4318 * and got to the next packet. 4319 */ 4320 txq->wqe_ci += (ds + 3) / 4; 4321 loc->wqe_free -= (ds + 3) / 4; 4322 /* We have to store mbuf in elts.*/ 4323 assert(MLX5_TXOFF_CONFIG(INLINE)); 4324 txq->elts[txq->elts_head++ & txq->elts_m] = 4325 loc->mbuf; 4326 --loc->elts_free; 4327 } else { 4328 uint8_t *dptr; 4329 unsigned int dlen; 4330 4331 /* 4332 * Partially inlined packet data WQE, we have 4333 * some space in title WQEBB, we can fill it 4334 * with some packet data. It takes one WQEBB, 4335 * it is available, no extra space check: 4336 * - Control Segment, SEND opcode 4337 * - Ethernet Segment, no VLAN insertion 4338 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4339 * - Data Segment, pointer type 4340 * 4341 * We also get here if VLAN insertion is not 4342 * supported by HW, the inline is enabled. 4343 */ 4344 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4345 loc->wqe_last = wqe; 4346 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4347 MLX5_OPCODE_SEND, olx); 4348 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4349 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4350 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4351 /* 4352 * The length check is performed above, by 4353 * comparing with txq->inlen_send. We should 4354 * not get overflow here. 4355 */ 4356 assert(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4357 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4358 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4359 dptr, dlen, olx); 4360 ++txq->wqe_ci; 4361 --loc->wqe_free; 4362 /* We have to store mbuf in elts.*/ 4363 assert(MLX5_TXOFF_CONFIG(INLINE)); 4364 txq->elts[txq->elts_head++ & txq->elts_m] = 4365 loc->mbuf; 4366 --loc->elts_free; 4367 } 4368 #ifdef MLX5_PMD_SOFT_COUNTERS 4369 /* Update sent data bytes counter. */ 4370 txq->stats.obytes += vlan + 4371 rte_pktmbuf_data_len(loc->mbuf); 4372 #endif 4373 } else { 4374 /* 4375 * No inline at all, it means the CPU cycles saving 4376 * is prioritized at configuration, we should not 4377 * copy any packet data to WQE. 4378 * 4379 * SEND WQE, one WQEBB: 4380 * - Control Segment, SEND opcode 4381 * - Ethernet Segment, optional VLAN, no inline 4382 * - Data Segment, pointer type 4383 */ 4384 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4385 loc->wqe_last = wqe; 4386 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4387 MLX5_OPCODE_SEND, olx); 4388 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4389 mlx5_tx_dseg_ptr 4390 (txq, loc, &wqe->dseg[0], 4391 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4392 rte_pktmbuf_data_len(loc->mbuf), olx); 4393 ++txq->wqe_ci; 4394 --loc->wqe_free; 4395 /* 4396 * We should not store mbuf pointer in elts 4397 * if no inlining is configured, this is done 4398 * by calling routine in a batch copy. 4399 */ 4400 assert(!MLX5_TXOFF_CONFIG(INLINE)); 4401 --loc->elts_free; 4402 #ifdef MLX5_PMD_SOFT_COUNTERS 4403 /* Update sent data bytes counter. */ 4404 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4405 if (MLX5_TXOFF_CONFIG(VLAN) && 4406 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4407 txq->stats.obytes += 4408 sizeof(struct rte_vlan_hdr); 4409 #endif 4410 } 4411 ++loc->pkts_sent; 4412 --pkts_n; 4413 /* Request CQE generation if limits are reached. */ 4414 mlx5_tx_request_completion(txq, loc, olx); 4415 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4416 return MLX5_TXCMP_CODE_EXIT; 4417 loc->mbuf = *pkts++; 4418 if (pkts_n > 1) 4419 rte_prefetch0(*pkts); 4420 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4421 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4422 return ret; 4423 } 4424 assert(false); 4425 } 4426 4427 static __rte_always_inline enum mlx5_txcmp_code 4428 mlx5_tx_burst_single(struct mlx5_txq_data *restrict txq, 4429 struct rte_mbuf **restrict pkts, 4430 unsigned int pkts_n, 4431 struct mlx5_txq_local *restrict loc, 4432 unsigned int olx) 4433 { 4434 enum mlx5_txcmp_code ret; 4435 4436 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4437 if (ret == MLX5_TXCMP_CODE_SINGLE) 4438 goto ordinary_send; 4439 assert(ret == MLX5_TXCMP_CODE_EMPW); 4440 for (;;) { 4441 /* Optimize for inline/no inline eMPW send. */ 4442 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4443 mlx5_tx_burst_empw_inline 4444 (txq, pkts, pkts_n, loc, olx) : 4445 mlx5_tx_burst_empw_simple 4446 (txq, pkts, pkts_n, loc, olx); 4447 if (ret != MLX5_TXCMP_CODE_SINGLE) 4448 return ret; 4449 /* The resources to send one packet should remain. */ 4450 assert(loc->elts_free && loc->wqe_free); 4451 ordinary_send: 4452 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4453 assert(ret != MLX5_TXCMP_CODE_SINGLE); 4454 if (ret != MLX5_TXCMP_CODE_EMPW) 4455 return ret; 4456 /* The resources to send one packet should remain. */ 4457 assert(loc->elts_free && loc->wqe_free); 4458 } 4459 } 4460 4461 /** 4462 * DPDK Tx callback template. This is configured template 4463 * used to generate routines optimized for specified offload setup. 4464 * One of this generated functions is chosen at SQ configuration 4465 * time. 4466 * 4467 * @param txq 4468 * Generic pointer to TX queue structure. 4469 * @param[in] pkts 4470 * Packets to transmit. 4471 * @param pkts_n 4472 * Number of packets in array. 4473 * @param olx 4474 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 4475 * values. Should be static to take compile time static configuration 4476 * advantages. 4477 * 4478 * @return 4479 * Number of packets successfully transmitted (<= pkts_n). 4480 */ 4481 static __rte_always_inline uint16_t 4482 mlx5_tx_burst_tmpl(struct mlx5_txq_data *restrict txq, 4483 struct rte_mbuf **restrict pkts, 4484 uint16_t pkts_n, 4485 unsigned int olx) 4486 { 4487 struct mlx5_txq_local loc; 4488 enum mlx5_txcmp_code ret; 4489 unsigned int part; 4490 4491 assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4492 assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4493 /* 4494 * Check if there are some CQEs, if any: 4495 * - process an encountered errors 4496 * - process the completed WQEs 4497 * - free related mbufs 4498 * - doorbell the NIC about processed CQEs 4499 */ 4500 if (unlikely(!pkts_n)) 4501 return 0; 4502 rte_prefetch0(*pkts); 4503 mlx5_tx_handle_completion(txq, olx); 4504 /* 4505 * Calculate the number of available resources - elts and WQEs. 4506 * There are two possible different scenarios: 4507 * - no data inlining into WQEs, one WQEBB may contains upto 4508 * four packets, in this case elts become scarce resource 4509 * - data inlining into WQEs, one packet may require multiple 4510 * WQEBBs, the WQEs become the limiting factor. 4511 */ 4512 assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4513 loc.elts_free = txq->elts_s - 4514 (uint16_t)(txq->elts_head - txq->elts_tail); 4515 assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4516 loc.wqe_free = txq->wqe_s - 4517 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 4518 if (unlikely(!loc.elts_free || !loc.wqe_free)) 4519 return 0; 4520 loc.pkts_sent = 0; 4521 loc.pkts_copy = 0; 4522 loc.wqe_last = NULL; 4523 for (;;) { 4524 /* 4525 * Fetch the packet from array. Usually this is 4526 * the first packet in series of multi/single 4527 * segment packets. 4528 */ 4529 loc.mbuf = *(pkts + loc.pkts_sent); 4530 /* Dedicated branch for multi-segment packets. */ 4531 if (MLX5_TXOFF_CONFIG(MULTI) && 4532 unlikely(NB_SEGS(loc.mbuf) > 1)) { 4533 /* 4534 * Multi-segment packet encountered. 4535 * Hardware is able to process it only 4536 * with SEND/TSO opcodes, one packet 4537 * per WQE, do it in dedicated routine. 4538 */ 4539 enter_send_multi: 4540 assert(loc.pkts_sent >= loc.pkts_copy); 4541 part = loc.pkts_sent - loc.pkts_copy; 4542 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4543 /* 4544 * There are some single-segment mbufs not 4545 * stored in elts. The mbufs must be in the 4546 * same order as WQEs, so we must copy the 4547 * mbufs to elts here, before the coming 4548 * multi-segment packet mbufs is appended. 4549 */ 4550 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 4551 part, olx); 4552 loc.pkts_copy = loc.pkts_sent; 4553 } 4554 assert(pkts_n > loc.pkts_sent); 4555 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 4556 if (!MLX5_TXOFF_CONFIG(INLINE)) 4557 loc.pkts_copy = loc.pkts_sent; 4558 /* 4559 * These returned code checks are supposed 4560 * to be optimized out due to routine inlining. 4561 */ 4562 if (ret == MLX5_TXCMP_CODE_EXIT) { 4563 /* 4564 * The routine returns this code when 4565 * all packets are sent or there is no 4566 * enough resources to complete request. 4567 */ 4568 break; 4569 } 4570 if (ret == MLX5_TXCMP_CODE_ERROR) { 4571 /* 4572 * The routine returns this code when 4573 * some error in the incoming packets 4574 * format occurred. 4575 */ 4576 txq->stats.oerrors++; 4577 break; 4578 } 4579 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4580 /* 4581 * The single-segment packet was encountered 4582 * in the array, try to send it with the 4583 * best optimized way, possible engaging eMPW. 4584 */ 4585 goto enter_send_single; 4586 } 4587 if (MLX5_TXOFF_CONFIG(TSO) && 4588 ret == MLX5_TXCMP_CODE_TSO) { 4589 /* 4590 * The single-segment TSO packet was 4591 * encountered in the array. 4592 */ 4593 goto enter_send_tso; 4594 } 4595 /* We must not get here. Something is going wrong. */ 4596 assert(false); 4597 txq->stats.oerrors++; 4598 break; 4599 } 4600 /* Dedicated branch for single-segment TSO packets. */ 4601 if (MLX5_TXOFF_CONFIG(TSO) && 4602 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 4603 /* 4604 * TSO might require special way for inlining 4605 * (dedicated parameters) and is sent with 4606 * MLX5_OPCODE_TSO opcode only, provide this 4607 * in dedicated branch. 4608 */ 4609 enter_send_tso: 4610 assert(NB_SEGS(loc.mbuf) == 1); 4611 assert(pkts_n > loc.pkts_sent); 4612 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 4613 /* 4614 * These returned code checks are supposed 4615 * to be optimized out due to routine inlining. 4616 */ 4617 if (ret == MLX5_TXCMP_CODE_EXIT) 4618 break; 4619 if (ret == MLX5_TXCMP_CODE_ERROR) { 4620 txq->stats.oerrors++; 4621 break; 4622 } 4623 if (ret == MLX5_TXCMP_CODE_SINGLE) 4624 goto enter_send_single; 4625 if (MLX5_TXOFF_CONFIG(MULTI) && 4626 ret == MLX5_TXCMP_CODE_MULTI) { 4627 /* 4628 * The multi-segment packet was 4629 * encountered in the array. 4630 */ 4631 goto enter_send_multi; 4632 } 4633 /* We must not get here. Something is going wrong. */ 4634 assert(false); 4635 txq->stats.oerrors++; 4636 break; 4637 } 4638 /* 4639 * The dedicated branch for the single-segment packets 4640 * without TSO. Often these ones can be sent using 4641 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 4642 * The routine builds the WQEs till it encounters 4643 * the TSO or multi-segment packet (in case if these 4644 * offloads are requested at SQ configuration time). 4645 */ 4646 enter_send_single: 4647 assert(pkts_n > loc.pkts_sent); 4648 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 4649 /* 4650 * These returned code checks are supposed 4651 * to be optimized out due to routine inlining. 4652 */ 4653 if (ret == MLX5_TXCMP_CODE_EXIT) 4654 break; 4655 if (ret == MLX5_TXCMP_CODE_ERROR) { 4656 txq->stats.oerrors++; 4657 break; 4658 } 4659 if (MLX5_TXOFF_CONFIG(MULTI) && 4660 ret == MLX5_TXCMP_CODE_MULTI) { 4661 /* 4662 * The multi-segment packet was 4663 * encountered in the array. 4664 */ 4665 goto enter_send_multi; 4666 } 4667 if (MLX5_TXOFF_CONFIG(TSO) && 4668 ret == MLX5_TXCMP_CODE_TSO) { 4669 /* 4670 * The single-segment TSO packet was 4671 * encountered in the array. 4672 */ 4673 goto enter_send_tso; 4674 } 4675 /* We must not get here. Something is going wrong. */ 4676 assert(false); 4677 txq->stats.oerrors++; 4678 break; 4679 } 4680 /* 4681 * Main Tx loop is completed, do the rest: 4682 * - set completion request if thresholds are reached 4683 * - doorbell the hardware 4684 * - copy the rest of mbufs to elts (if any) 4685 */ 4686 assert(MLX5_TXOFF_CONFIG(INLINE) || loc.pkts_sent >= loc.pkts_copy); 4687 /* Take a shortcut if nothing is sent. */ 4688 if (unlikely(loc.pkts_sent == 0)) 4689 return 0; 4690 /* 4691 * Ring QP doorbell immediately after WQE building completion 4692 * to improve latencies. The pure software related data treatment 4693 * can be completed after doorbell. Tx CQEs for this SQ are 4694 * processed in this thread only by the polling. 4695 */ 4696 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, 0); 4697 /* Not all of the mbufs may be stored into elts yet. */ 4698 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - 4699 (MLX5_TXOFF_CONFIG(MULTI) ? loc.pkts_copy : 0); 4700 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4701 /* 4702 * There are some single-segment mbufs not stored in elts. 4703 * It can be only if the last packet was single-segment. 4704 * The copying is gathered into one place due to it is 4705 * a good opportunity to optimize that with SIMD. 4706 * Unfortunately if inlining is enabled the gaps in 4707 * pointer array may happen due to early freeing of the 4708 * inlined mbufs. 4709 */ 4710 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 4711 } 4712 #ifdef MLX5_PMD_SOFT_COUNTERS 4713 /* Increment sent packets counter. */ 4714 txq->stats.opackets += loc.pkts_sent; 4715 #endif 4716 assert(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4717 assert(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4718 return loc.pkts_sent; 4719 } 4720 4721 /* Generate routines with Enhanced Multi-Packet Write support. */ 4722 MLX5_TXOFF_DECL(full_empw, 4723 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 4724 4725 MLX5_TXOFF_DECL(none_empw, 4726 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 4727 4728 MLX5_TXOFF_DECL(md_empw, 4729 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4730 4731 MLX5_TXOFF_DECL(mt_empw, 4732 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4733 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4734 4735 MLX5_TXOFF_DECL(mtsc_empw, 4736 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4737 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4738 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4739 4740 MLX5_TXOFF_DECL(mti_empw, 4741 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4742 MLX5_TXOFF_CONFIG_INLINE | 4743 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4744 4745 MLX5_TXOFF_DECL(mtv_empw, 4746 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4747 MLX5_TXOFF_CONFIG_VLAN | 4748 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4749 4750 MLX5_TXOFF_DECL(mtiv_empw, 4751 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4752 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4753 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4754 4755 MLX5_TXOFF_DECL(sc_empw, 4756 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4757 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4758 4759 MLX5_TXOFF_DECL(sci_empw, 4760 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4761 MLX5_TXOFF_CONFIG_INLINE | 4762 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4763 4764 MLX5_TXOFF_DECL(scv_empw, 4765 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4766 MLX5_TXOFF_CONFIG_VLAN | 4767 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4768 4769 MLX5_TXOFF_DECL(sciv_empw, 4770 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4771 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4772 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4773 4774 MLX5_TXOFF_DECL(i_empw, 4775 MLX5_TXOFF_CONFIG_INLINE | 4776 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4777 4778 MLX5_TXOFF_DECL(v_empw, 4779 MLX5_TXOFF_CONFIG_VLAN | 4780 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4781 4782 MLX5_TXOFF_DECL(iv_empw, 4783 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4784 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4785 4786 /* Generate routines without Enhanced Multi-Packet Write support. */ 4787 MLX5_TXOFF_DECL(full, 4788 MLX5_TXOFF_CONFIG_FULL) 4789 4790 MLX5_TXOFF_DECL(none, 4791 MLX5_TXOFF_CONFIG_NONE) 4792 4793 MLX5_TXOFF_DECL(md, 4794 MLX5_TXOFF_CONFIG_METADATA) 4795 4796 MLX5_TXOFF_DECL(mt, 4797 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4798 MLX5_TXOFF_CONFIG_METADATA) 4799 4800 MLX5_TXOFF_DECL(mtsc, 4801 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4802 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4803 MLX5_TXOFF_CONFIG_METADATA) 4804 4805 MLX5_TXOFF_DECL(mti, 4806 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4807 MLX5_TXOFF_CONFIG_INLINE | 4808 MLX5_TXOFF_CONFIG_METADATA) 4809 4810 4811 MLX5_TXOFF_DECL(mtv, 4812 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4813 MLX5_TXOFF_CONFIG_VLAN | 4814 MLX5_TXOFF_CONFIG_METADATA) 4815 4816 4817 MLX5_TXOFF_DECL(mtiv, 4818 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4819 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4820 MLX5_TXOFF_CONFIG_METADATA) 4821 4822 MLX5_TXOFF_DECL(sc, 4823 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4824 MLX5_TXOFF_CONFIG_METADATA) 4825 4826 MLX5_TXOFF_DECL(sci, 4827 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4828 MLX5_TXOFF_CONFIG_INLINE | 4829 MLX5_TXOFF_CONFIG_METADATA) 4830 4831 4832 MLX5_TXOFF_DECL(scv, 4833 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4834 MLX5_TXOFF_CONFIG_VLAN | 4835 MLX5_TXOFF_CONFIG_METADATA) 4836 4837 4838 MLX5_TXOFF_DECL(sciv, 4839 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4840 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4841 MLX5_TXOFF_CONFIG_METADATA) 4842 4843 MLX5_TXOFF_DECL(i, 4844 MLX5_TXOFF_CONFIG_INLINE | 4845 MLX5_TXOFF_CONFIG_METADATA) 4846 4847 MLX5_TXOFF_DECL(v, 4848 MLX5_TXOFF_CONFIG_VLAN | 4849 MLX5_TXOFF_CONFIG_METADATA) 4850 4851 MLX5_TXOFF_DECL(iv, 4852 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4853 MLX5_TXOFF_CONFIG_METADATA) 4854 4855 /* 4856 * Array of declared and compiled Tx burst function and corresponding 4857 * supported offloads set. The array is used to select the Tx burst 4858 * function for specified offloads set at Tx queue configuration time. 4859 */ 4860 const struct { 4861 eth_tx_burst_t func; 4862 unsigned int olx; 4863 } txoff_func[] = { 4864 MLX5_TXOFF_INFO(full_empw, 4865 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4866 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4867 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4868 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4869 4870 MLX5_TXOFF_INFO(none_empw, 4871 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 4872 4873 MLX5_TXOFF_INFO(md_empw, 4874 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4875 4876 MLX5_TXOFF_INFO(mt_empw, 4877 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4878 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4879 4880 MLX5_TXOFF_INFO(mtsc_empw, 4881 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4882 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4883 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4884 4885 MLX5_TXOFF_INFO(mti_empw, 4886 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4887 MLX5_TXOFF_CONFIG_INLINE | 4888 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4889 4890 MLX5_TXOFF_INFO(mtv_empw, 4891 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4892 MLX5_TXOFF_CONFIG_VLAN | 4893 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4894 4895 MLX5_TXOFF_INFO(mtiv_empw, 4896 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4897 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4898 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4899 4900 MLX5_TXOFF_INFO(sc_empw, 4901 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4902 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4903 4904 MLX5_TXOFF_INFO(sci_empw, 4905 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4906 MLX5_TXOFF_CONFIG_INLINE | 4907 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4908 4909 MLX5_TXOFF_INFO(scv_empw, 4910 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4911 MLX5_TXOFF_CONFIG_VLAN | 4912 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4913 4914 MLX5_TXOFF_INFO(sciv_empw, 4915 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4916 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4917 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4918 4919 MLX5_TXOFF_INFO(i_empw, 4920 MLX5_TXOFF_CONFIG_INLINE | 4921 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4922 4923 MLX5_TXOFF_INFO(v_empw, 4924 MLX5_TXOFF_CONFIG_VLAN | 4925 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4926 4927 MLX5_TXOFF_INFO(iv_empw, 4928 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4929 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 4930 4931 MLX5_TXOFF_INFO(full, 4932 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4933 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4934 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4935 MLX5_TXOFF_CONFIG_METADATA) 4936 4937 MLX5_TXOFF_INFO(none, 4938 MLX5_TXOFF_CONFIG_NONE) 4939 4940 MLX5_TXOFF_INFO(md, 4941 MLX5_TXOFF_CONFIG_METADATA) 4942 4943 MLX5_TXOFF_INFO(mt, 4944 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4945 MLX5_TXOFF_CONFIG_METADATA) 4946 4947 MLX5_TXOFF_INFO(mtsc, 4948 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4949 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4950 MLX5_TXOFF_CONFIG_METADATA) 4951 4952 MLX5_TXOFF_INFO(mti, 4953 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4954 MLX5_TXOFF_CONFIG_INLINE | 4955 MLX5_TXOFF_CONFIG_METADATA) 4956 4957 4958 MLX5_TXOFF_INFO(mtv, 4959 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4960 MLX5_TXOFF_CONFIG_VLAN | 4961 MLX5_TXOFF_CONFIG_METADATA) 4962 4963 MLX5_TXOFF_INFO(mtiv, 4964 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 4965 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4966 MLX5_TXOFF_CONFIG_METADATA) 4967 4968 MLX5_TXOFF_INFO(sc, 4969 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4970 MLX5_TXOFF_CONFIG_METADATA) 4971 4972 MLX5_TXOFF_INFO(sci, 4973 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4974 MLX5_TXOFF_CONFIG_INLINE | 4975 MLX5_TXOFF_CONFIG_METADATA) 4976 4977 MLX5_TXOFF_INFO(scv, 4978 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4979 MLX5_TXOFF_CONFIG_VLAN | 4980 MLX5_TXOFF_CONFIG_METADATA) 4981 4982 MLX5_TXOFF_INFO(sciv, 4983 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 4984 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4985 MLX5_TXOFF_CONFIG_METADATA) 4986 4987 MLX5_TXOFF_INFO(i, 4988 MLX5_TXOFF_CONFIG_INLINE | 4989 MLX5_TXOFF_CONFIG_METADATA) 4990 4991 MLX5_TXOFF_INFO(v, 4992 MLX5_TXOFF_CONFIG_VLAN | 4993 MLX5_TXOFF_CONFIG_METADATA) 4994 4995 MLX5_TXOFF_INFO(iv, 4996 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 4997 MLX5_TXOFF_CONFIG_METADATA) 4998 }; 4999 5000 /** 5001 * Configure the Tx function to use. The routine checks configured 5002 * Tx offloads for the device and selects appropriate Tx burst 5003 * routine. There are multiple Tx burst routines compiled from 5004 * the same template in the most optimal way for the dedicated 5005 * Tx offloads set. 5006 * 5007 * @param dev 5008 * Pointer to private data structure. 5009 * 5010 * @return 5011 * Pointer to selected Tx burst function. 5012 */ 5013 eth_tx_burst_t 5014 mlx5_select_tx_function(struct rte_eth_dev *dev) 5015 { 5016 struct mlx5_priv *priv = dev->data->dev_private; 5017 struct mlx5_dev_config *config = &priv->config; 5018 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5019 unsigned int diff = 0, olx = 0, i, m; 5020 5021 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 5022 MLX5_DSEG_MAX, "invalid WQE max size"); 5023 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 5024 "invalid WQE Control Segment size"); 5025 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 5026 "invalid WQE Ethernet Segment size"); 5027 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 5028 "invalid WQE Data Segment size"); 5029 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 5030 "invalid WQE size"); 5031 assert(priv); 5032 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5033 /* We should support Multi-Segment Packets. */ 5034 olx |= MLX5_TXOFF_CONFIG_MULTI; 5035 } 5036 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5037 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5038 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5039 DEV_TX_OFFLOAD_IP_TNL_TSO | 5040 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5041 /* We should support TCP Send Offload. */ 5042 olx |= MLX5_TXOFF_CONFIG_TSO; 5043 } 5044 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5045 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5046 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5047 /* We should support Software Parser for Tunnels. */ 5048 olx |= MLX5_TXOFF_CONFIG_SWP; 5049 } 5050 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5051 DEV_TX_OFFLOAD_UDP_CKSUM | 5052 DEV_TX_OFFLOAD_TCP_CKSUM | 5053 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5054 /* We should support IP/TCP/UDP Checksums. */ 5055 olx |= MLX5_TXOFF_CONFIG_CSUM; 5056 } 5057 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5058 /* We should support VLAN insertion. */ 5059 olx |= MLX5_TXOFF_CONFIG_VLAN; 5060 } 5061 if (priv->txqs_n && (*priv->txqs)[0]) { 5062 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5063 5064 if (txd->inlen_send) { 5065 /* 5066 * Check the data inline requirements. Data inline 5067 * is enabled on per device basis, we can check 5068 * the first Tx queue only. 5069 * 5070 * If device does not support VLAN insertion in WQE 5071 * and some queues are requested to perform VLAN 5072 * insertion offload than inline must be enabled. 5073 */ 5074 olx |= MLX5_TXOFF_CONFIG_INLINE; 5075 } 5076 } 5077 if (config->mps == MLX5_MPW_ENHANCED && 5078 config->txq_inline_min <= 0) { 5079 /* 5080 * The NIC supports Enhanced Multi-Packet Write. 5081 * We do not support legacy MPW due to its 5082 * hardware related problems, so we just ignore 5083 * legacy MLX5_MPW settings. There should be no 5084 * minimal required inline data. 5085 */ 5086 olx |= MLX5_TXOFF_CONFIG_EMPW; 5087 } 5088 if (tx_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) { 5089 /* We should support Flow metadata. */ 5090 olx |= MLX5_TXOFF_CONFIG_METADATA; 5091 } 5092 /* 5093 * Scan the routines table to find the minimal 5094 * satisfying routine with requested offloads. 5095 */ 5096 m = RTE_DIM(txoff_func); 5097 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5098 unsigned int tmp; 5099 5100 tmp = txoff_func[i].olx; 5101 if (tmp == olx) { 5102 /* Meets requested offloads exactly.*/ 5103 m = i; 5104 break; 5105 } 5106 if ((tmp & olx) != olx) { 5107 /* Does not meet requested offloads at all. */ 5108 continue; 5109 } 5110 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5111 /* Do not enable eMPW if not configured. */ 5112 continue; 5113 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5114 /* Do not enable inlining if not configured. */ 5115 continue; 5116 /* 5117 * Some routine meets the requirements. 5118 * Check whether it has minimal amount 5119 * of not requested offloads. 5120 */ 5121 tmp = __builtin_popcountl(tmp & ~olx); 5122 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5123 /* First or better match, save and continue. */ 5124 m = i; 5125 diff = tmp; 5126 continue; 5127 } 5128 if (tmp == diff) { 5129 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5130 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5131 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5132 /* Lighter not requested offload. */ 5133 m = i; 5134 } 5135 } 5136 } 5137 if (m >= RTE_DIM(txoff_func)) { 5138 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5139 " for requested offloads %04X", 5140 dev->data->port_id, olx); 5141 return NULL; 5142 } 5143 DRV_LOG(DEBUG, "port %u has selected Tx function" 5144 " supporting offloads %04X/%04X", 5145 dev->data->port_id, olx, txoff_func[m].olx); 5146 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5147 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5148 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5149 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5150 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5151 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5152 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5153 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5154 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5155 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5156 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5157 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5158 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5159 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5160 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) 5161 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5162 return txoff_func[m].func; 5163 } 5164 5165 5166