1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015-2019 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 22 #include "mlx5_autoconf.h" 23 #include "mlx5_defs.h" 24 #include "mlx5.h" 25 #include "mlx5_mr.h" 26 #include "mlx5_utils.h" 27 #include "mlx5_rxtx.h" 28 29 /* TX burst subroutines return codes. */ 30 enum mlx5_txcmp_code { 31 MLX5_TXCMP_CODE_EXIT = 0, 32 MLX5_TXCMP_CODE_ERROR, 33 MLX5_TXCMP_CODE_SINGLE, 34 MLX5_TXCMP_CODE_MULTI, 35 MLX5_TXCMP_CODE_TSO, 36 MLX5_TXCMP_CODE_EMPW, 37 }; 38 39 /* 40 * These defines are used to configure Tx burst routine option set 41 * supported at compile time. The not specified options are optimized out 42 * out due to if conditions can be explicitly calculated at compile time. 43 * The offloads with bigger runtime check (require more CPU cycles to 44 * skip) overhead should have the bigger index - this is needed to 45 * select the better matching routine function if no exact match and 46 * some offloads are not actually requested. 47 */ 48 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 49 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 50 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 51 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 52 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 53 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 54 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 55 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 56 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 57 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 58 59 /* The most common offloads groups. */ 60 #define MLX5_TXOFF_CONFIG_NONE 0 61 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 62 MLX5_TXOFF_CONFIG_TSO | \ 63 MLX5_TXOFF_CONFIG_SWP | \ 64 MLX5_TXOFF_CONFIG_CSUM | \ 65 MLX5_TXOFF_CONFIG_INLINE | \ 66 MLX5_TXOFF_CONFIG_VLAN | \ 67 MLX5_TXOFF_CONFIG_METADATA) 68 69 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 70 71 #define MLX5_TXOFF_DECL(func, olx) \ 72 static uint16_t mlx5_tx_burst_##func(void *txq, \ 73 struct rte_mbuf **pkts, \ 74 uint16_t pkts_n) \ 75 { \ 76 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 77 pkts, pkts_n, (olx)); \ 78 } 79 80 #define MLX5_TXOFF_INFO(func, olx) {mlx5_tx_burst_##func, olx}, 81 82 static __rte_always_inline uint32_t 83 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe); 84 85 static __rte_always_inline int 86 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 87 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 88 89 static __rte_always_inline uint32_t 90 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 91 92 static __rte_always_inline void 93 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 94 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res); 95 96 static int 97 mlx5_queue_state_modify(struct rte_eth_dev *dev, 98 struct mlx5_mp_arg_queue_state_modify *sm); 99 100 static inline void 101 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 102 volatile struct mlx5_cqe *__rte_restrict cqe, 103 uint32_t phcsum); 104 105 static inline void 106 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 107 volatile struct mlx5_cqe *__rte_restrict cqe, 108 uint32_t len); 109 110 uint32_t mlx5_ptype_table[] __rte_cache_aligned = { 111 [0xff] = RTE_PTYPE_ALL_MASK, /* Last entry for errored packet. */ 112 }; 113 114 uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 115 uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 116 117 uint64_t rte_net_mlx5_dynf_inline_mask; 118 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 119 120 /** 121 * Build a table to translate Rx completion flags to packet type. 122 * 123 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 124 */ 125 void 126 mlx5_set_ptype_table(void) 127 { 128 unsigned int i; 129 uint32_t (*p)[RTE_DIM(mlx5_ptype_table)] = &mlx5_ptype_table; 130 131 /* Last entry must not be overwritten, reserved for errored packet. */ 132 for (i = 0; i < RTE_DIM(mlx5_ptype_table) - 1; ++i) 133 (*p)[i] = RTE_PTYPE_UNKNOWN; 134 /* 135 * The index to the array should have: 136 * bit[1:0] = l3_hdr_type 137 * bit[4:2] = l4_hdr_type 138 * bit[5] = ip_frag 139 * bit[6] = tunneled 140 * bit[7] = outer_l3_type 141 */ 142 /* L2 */ 143 (*p)[0x00] = RTE_PTYPE_L2_ETHER; 144 /* L3 */ 145 (*p)[0x01] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 146 RTE_PTYPE_L4_NONFRAG; 147 (*p)[0x02] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 148 RTE_PTYPE_L4_NONFRAG; 149 /* Fragmented */ 150 (*p)[0x21] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 151 RTE_PTYPE_L4_FRAG; 152 (*p)[0x22] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 153 RTE_PTYPE_L4_FRAG; 154 /* TCP */ 155 (*p)[0x05] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 156 RTE_PTYPE_L4_TCP; 157 (*p)[0x06] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 158 RTE_PTYPE_L4_TCP; 159 (*p)[0x0d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 160 RTE_PTYPE_L4_TCP; 161 (*p)[0x0e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 162 RTE_PTYPE_L4_TCP; 163 (*p)[0x11] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 164 RTE_PTYPE_L4_TCP; 165 (*p)[0x12] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 166 RTE_PTYPE_L4_TCP; 167 /* UDP */ 168 (*p)[0x09] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 169 RTE_PTYPE_L4_UDP; 170 (*p)[0x0a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 171 RTE_PTYPE_L4_UDP; 172 /* Repeat with outer_l3_type being set. Just in case. */ 173 (*p)[0x81] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 174 RTE_PTYPE_L4_NONFRAG; 175 (*p)[0x82] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 176 RTE_PTYPE_L4_NONFRAG; 177 (*p)[0xa1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 178 RTE_PTYPE_L4_FRAG; 179 (*p)[0xa2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 180 RTE_PTYPE_L4_FRAG; 181 (*p)[0x85] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 182 RTE_PTYPE_L4_TCP; 183 (*p)[0x86] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 184 RTE_PTYPE_L4_TCP; 185 (*p)[0x8d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 186 RTE_PTYPE_L4_TCP; 187 (*p)[0x8e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 188 RTE_PTYPE_L4_TCP; 189 (*p)[0x91] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 190 RTE_PTYPE_L4_TCP; 191 (*p)[0x92] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 192 RTE_PTYPE_L4_TCP; 193 (*p)[0x89] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 194 RTE_PTYPE_L4_UDP; 195 (*p)[0x8a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 196 RTE_PTYPE_L4_UDP; 197 /* Tunneled - L3 */ 198 (*p)[0x40] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN; 199 (*p)[0x41] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 200 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 201 RTE_PTYPE_INNER_L4_NONFRAG; 202 (*p)[0x42] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 203 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 204 RTE_PTYPE_INNER_L4_NONFRAG; 205 (*p)[0xc0] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN; 206 (*p)[0xc1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 207 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 208 RTE_PTYPE_INNER_L4_NONFRAG; 209 (*p)[0xc2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 210 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 211 RTE_PTYPE_INNER_L4_NONFRAG; 212 /* Tunneled - Fragmented */ 213 (*p)[0x61] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 214 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 215 RTE_PTYPE_INNER_L4_FRAG; 216 (*p)[0x62] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 217 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 218 RTE_PTYPE_INNER_L4_FRAG; 219 (*p)[0xe1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 220 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 221 RTE_PTYPE_INNER_L4_FRAG; 222 (*p)[0xe2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 223 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 224 RTE_PTYPE_INNER_L4_FRAG; 225 /* Tunneled - TCP */ 226 (*p)[0x45] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 227 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 228 RTE_PTYPE_INNER_L4_TCP; 229 (*p)[0x46] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 230 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 231 RTE_PTYPE_INNER_L4_TCP; 232 (*p)[0x4d] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 233 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 234 RTE_PTYPE_INNER_L4_TCP; 235 (*p)[0x4e] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 236 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 237 RTE_PTYPE_INNER_L4_TCP; 238 (*p)[0x51] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 239 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 240 RTE_PTYPE_INNER_L4_TCP; 241 (*p)[0x52] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 242 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 243 RTE_PTYPE_INNER_L4_TCP; 244 (*p)[0xc5] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 245 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 246 RTE_PTYPE_INNER_L4_TCP; 247 (*p)[0xc6] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 248 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 249 RTE_PTYPE_INNER_L4_TCP; 250 (*p)[0xcd] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 251 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 252 RTE_PTYPE_INNER_L4_TCP; 253 (*p)[0xce] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 254 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 255 RTE_PTYPE_INNER_L4_TCP; 256 (*p)[0xd1] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 257 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 258 RTE_PTYPE_INNER_L4_TCP; 259 (*p)[0xd2] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 260 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 261 RTE_PTYPE_INNER_L4_TCP; 262 /* Tunneled - UDP */ 263 (*p)[0x49] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 264 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 265 RTE_PTYPE_INNER_L4_UDP; 266 (*p)[0x4a] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | 267 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 268 RTE_PTYPE_INNER_L4_UDP; 269 (*p)[0xc9] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 270 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | 271 RTE_PTYPE_INNER_L4_UDP; 272 (*p)[0xca] = RTE_PTYPE_L2_ETHER | RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | 273 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN | 274 RTE_PTYPE_INNER_L4_UDP; 275 } 276 277 /** 278 * Build a table to translate packet to checksum type of Verbs. 279 */ 280 void 281 mlx5_set_cksum_table(void) 282 { 283 unsigned int i; 284 uint8_t v; 285 286 /* 287 * The index should have: 288 * bit[0] = PKT_TX_TCP_SEG 289 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 290 * bit[4] = PKT_TX_IP_CKSUM 291 * bit[8] = PKT_TX_OUTER_IP_CKSUM 292 * bit[9] = tunnel 293 */ 294 for (i = 0; i < RTE_DIM(mlx5_cksum_table); ++i) { 295 v = 0; 296 if (i & (1 << 9)) { 297 /* Tunneled packet. */ 298 if (i & (1 << 8)) /* Outer IP. */ 299 v |= MLX5_ETH_WQE_L3_CSUM; 300 if (i & (1 << 4)) /* Inner IP. */ 301 v |= MLX5_ETH_WQE_L3_INNER_CSUM; 302 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 303 v |= MLX5_ETH_WQE_L4_INNER_CSUM; 304 } else { 305 /* No tunnel. */ 306 if (i & (1 << 4)) /* IP. */ 307 v |= MLX5_ETH_WQE_L3_CSUM; 308 if (i & (3 << 2 | 1 << 0)) /* L4 or TSO. */ 309 v |= MLX5_ETH_WQE_L4_CSUM; 310 } 311 mlx5_cksum_table[i] = v; 312 } 313 } 314 315 /** 316 * Build a table to translate packet type of mbuf to SWP type of Verbs. 317 */ 318 void 319 mlx5_set_swp_types_table(void) 320 { 321 unsigned int i; 322 uint8_t v; 323 324 /* 325 * The index should have: 326 * bit[0:1] = PKT_TX_L4_MASK 327 * bit[4] = PKT_TX_IPV6 328 * bit[8] = PKT_TX_OUTER_IPV6 329 * bit[9] = PKT_TX_OUTER_UDP 330 */ 331 for (i = 0; i < RTE_DIM(mlx5_swp_types_table); ++i) { 332 v = 0; 333 if (i & (1 << 8)) 334 v |= MLX5_ETH_WQE_L3_OUTER_IPV6; 335 if (i & (1 << 9)) 336 v |= MLX5_ETH_WQE_L4_OUTER_UDP; 337 if (i & (1 << 4)) 338 v |= MLX5_ETH_WQE_L3_INNER_IPV6; 339 if ((i & 3) == (PKT_TX_UDP_CKSUM >> 52)) 340 v |= MLX5_ETH_WQE_L4_INNER_UDP; 341 mlx5_swp_types_table[i] = v; 342 } 343 } 344 345 /** 346 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 347 * Flags must be preliminary initialized to zero. 348 * 349 * @param loc 350 * Pointer to burst routine local context. 351 * @param swp_flags 352 * Pointer to store Software Parser flags 353 * @param olx 354 * Configured Tx offloads mask. It is fully defined at 355 * compile time and may be used for optimization. 356 * 357 * @return 358 * Software Parser offsets packed in dword. 359 * Software Parser flags are set by pointer. 360 */ 361 static __rte_always_inline uint32_t 362 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 363 uint8_t *swp_flags, 364 unsigned int olx) 365 { 366 uint64_t ol, tunnel; 367 unsigned int idx, off; 368 uint32_t set; 369 370 if (!MLX5_TXOFF_CONFIG(SWP)) 371 return 0; 372 ol = loc->mbuf->ol_flags; 373 tunnel = ol & PKT_TX_TUNNEL_MASK; 374 /* 375 * Check whether Software Parser is required. 376 * Only customized tunnels may ask for. 377 */ 378 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 379 return 0; 380 /* 381 * The index should have: 382 * bit[0:1] = PKT_TX_L4_MASK 383 * bit[4] = PKT_TX_IPV6 384 * bit[8] = PKT_TX_OUTER_IPV6 385 * bit[9] = PKT_TX_OUTER_UDP 386 */ 387 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 388 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 389 *swp_flags = mlx5_swp_types_table[idx]; 390 /* 391 * Set offsets for SW parser. Since ConnectX-5, SW parser just 392 * complements HW parser. SW parser starts to engage only if HW parser 393 * can't reach a header. For the older devices, HW parser will not kick 394 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 395 * should be set regardless of HW offload. 396 */ 397 off = loc->mbuf->outer_l2_len; 398 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 399 off += sizeof(struct rte_vlan_hdr); 400 set = (off >> 1) << 8; /* Outer L3 offset. */ 401 off += loc->mbuf->outer_l3_len; 402 if (tunnel == PKT_TX_TUNNEL_UDP) 403 set |= off >> 1; /* Outer L4 offset. */ 404 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 405 const uint64_t csum = ol & PKT_TX_L4_MASK; 406 off += loc->mbuf->l2_len; 407 set |= (off >> 1) << 24; /* Inner L3 offset. */ 408 if (csum == PKT_TX_TCP_CKSUM || 409 csum == PKT_TX_UDP_CKSUM || 410 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 411 off += loc->mbuf->l3_len; 412 set |= (off >> 1) << 16; /* Inner L4 offset. */ 413 } 414 } 415 set = rte_cpu_to_le_32(set); 416 return set; 417 } 418 419 /** 420 * Convert the Checksum offloads to Verbs. 421 * 422 * @param buf 423 * Pointer to the mbuf. 424 * 425 * @return 426 * Converted checksum flags. 427 */ 428 static __rte_always_inline uint8_t 429 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 430 { 431 uint32_t idx; 432 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 433 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 434 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 435 436 /* 437 * The index should have: 438 * bit[0] = PKT_TX_TCP_SEG 439 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 440 * bit[4] = PKT_TX_IP_CKSUM 441 * bit[8] = PKT_TX_OUTER_IP_CKSUM 442 * bit[9] = tunnel 443 */ 444 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 445 return mlx5_cksum_table[idx]; 446 } 447 448 /** 449 * Internal function to compute the number of used descriptors in an RX queue 450 * 451 * @param rxq 452 * The Rx queue. 453 * 454 * @return 455 * The number of used rx descriptor. 456 */ 457 static uint32_t 458 rx_queue_count(struct mlx5_rxq_data *rxq) 459 { 460 struct rxq_zip *zip = &rxq->zip; 461 volatile struct mlx5_cqe *cqe; 462 unsigned int cq_ci = rxq->cq_ci; 463 const unsigned int cqe_n = (1 << rxq->cqe_n); 464 const unsigned int cqe_cnt = cqe_n - 1; 465 unsigned int used = 0; 466 467 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 468 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 469 int8_t op_own; 470 unsigned int n; 471 472 op_own = cqe->op_own; 473 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 474 if (unlikely(zip->ai)) 475 n = zip->cqe_cnt - zip->ai; 476 else 477 n = rte_be_to_cpu_32(cqe->byte_cnt); 478 else 479 n = 1; 480 cq_ci += n; 481 used += n; 482 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 483 } 484 used = RTE_MIN(used, cqe_n); 485 return used; 486 } 487 488 /** 489 * DPDK callback to check the status of a rx descriptor. 490 * 491 * @param rx_queue 492 * The Rx queue. 493 * @param[in] offset 494 * The index of the descriptor in the ring. 495 * 496 * @return 497 * The status of the tx descriptor. 498 */ 499 int 500 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 501 { 502 struct mlx5_rxq_data *rxq = rx_queue; 503 struct mlx5_rxq_ctrl *rxq_ctrl = 504 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 505 struct rte_eth_dev *dev = ETH_DEV(rxq_ctrl->priv); 506 507 if (dev->rx_pkt_burst == NULL || 508 dev->rx_pkt_burst == removed_rx_burst) { 509 rte_errno = ENOTSUP; 510 return -rte_errno; 511 } 512 if (offset >= (1 << rxq->cqe_n)) { 513 rte_errno = EINVAL; 514 return -rte_errno; 515 } 516 if (offset < rx_queue_count(rxq)) 517 return RTE_ETH_RX_DESC_DONE; 518 return RTE_ETH_RX_DESC_AVAIL; 519 } 520 521 /** 522 * DPDK callback to get the RX queue information 523 * 524 * @param dev 525 * Pointer to the device structure. 526 * 527 * @param rx_queue_id 528 * Rx queue identificator. 529 * 530 * @param qinfo 531 * Pointer to the RX queue information structure. 532 * 533 * @return 534 * None. 535 */ 536 537 void 538 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 539 struct rte_eth_rxq_info *qinfo) 540 { 541 struct mlx5_priv *priv = dev->data->dev_private; 542 struct mlx5_rxq_data *rxq = (*priv->rxqs)[rx_queue_id]; 543 struct mlx5_rxq_ctrl *rxq_ctrl = 544 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 545 546 if (!rxq) 547 return; 548 qinfo->mp = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ? 549 rxq->mprq_mp : rxq->mp; 550 qinfo->conf.rx_thresh.pthresh = 0; 551 qinfo->conf.rx_thresh.hthresh = 0; 552 qinfo->conf.rx_thresh.wthresh = 0; 553 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 554 qinfo->conf.rx_drop_en = 1; 555 qinfo->conf.rx_deferred_start = rxq_ctrl ? 0 : 1; 556 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 557 qinfo->scattered_rx = dev->data->scattered_rx; 558 qinfo->nb_desc = 1 << rxq->elts_n; 559 } 560 561 /** 562 * DPDK callback to get the RX packet burst mode information 563 * 564 * @param dev 565 * Pointer to the device structure. 566 * 567 * @param rx_queue_id 568 * Rx queue identificatior. 569 * 570 * @param mode 571 * Pointer to the burts mode information. 572 * 573 * @return 574 * 0 as success, -EINVAL as failure. 575 */ 576 577 int 578 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 579 uint16_t rx_queue_id __rte_unused, 580 struct rte_eth_burst_mode *mode) 581 { 582 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 583 struct mlx5_priv *priv = dev->data->dev_private; 584 struct mlx5_rxq_data *rxq; 585 586 rxq = (*priv->rxqs)[rx_queue_id]; 587 if (!rxq) { 588 rte_errno = EINVAL; 589 return -rte_errno; 590 } 591 if (pkt_burst == mlx5_rx_burst) { 592 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 593 } else if (pkt_burst == mlx5_rx_burst_mprq) { 594 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 595 } else if (pkt_burst == mlx5_rx_burst_vec) { 596 #if defined RTE_ARCH_X86_64 597 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 598 #elif defined RTE_ARCH_ARM64 599 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 600 #elif defined RTE_ARCH_PPC_64 601 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 602 #else 603 return -EINVAL; 604 #endif 605 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { 606 #if defined RTE_ARCH_X86_64 607 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); 608 #elif defined RTE_ARCH_ARM64 609 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); 610 #elif defined RTE_ARCH_PPC_64 611 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); 612 #else 613 return -EINVAL; 614 #endif 615 } else { 616 return -EINVAL; 617 } 618 return 0; 619 } 620 621 /** 622 * DPDK callback to get the number of used descriptors in a RX queue 623 * 624 * @param dev 625 * Pointer to the device structure. 626 * 627 * @param rx_queue_id 628 * The Rx queue. 629 * 630 * @return 631 * The number of used rx descriptor. 632 * -EINVAL if the queue is invalid 633 */ 634 uint32_t 635 mlx5_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id) 636 { 637 struct mlx5_priv *priv = dev->data->dev_private; 638 struct mlx5_rxq_data *rxq; 639 640 if (dev->rx_pkt_burst == NULL || 641 dev->rx_pkt_burst == removed_rx_burst) { 642 rte_errno = ENOTSUP; 643 return -rte_errno; 644 } 645 rxq = (*priv->rxqs)[rx_queue_id]; 646 if (!rxq) { 647 rte_errno = EINVAL; 648 return -rte_errno; 649 } 650 return rx_queue_count(rxq); 651 } 652 653 #define MLX5_SYSTEM_LOG_DIR "/var/log" 654 /** 655 * Dump debug information to log file. 656 * 657 * @param fname 658 * The file name. 659 * @param hex_title 660 * If not NULL this string is printed as a header to the output 661 * and the output will be in hexadecimal view. 662 * @param buf 663 * This is the buffer address to print out. 664 * @param len 665 * The number of bytes to dump out. 666 */ 667 void 668 mlx5_dump_debug_information(const char *fname, const char *hex_title, 669 const void *buf, unsigned int hex_len) 670 { 671 FILE *fd; 672 673 MKSTR(path, "%s/%s", MLX5_SYSTEM_LOG_DIR, fname); 674 fd = fopen(path, "a+"); 675 if (!fd) { 676 DRV_LOG(WARNING, "cannot open %s for debug dump", path); 677 MKSTR(path2, "./%s", fname); 678 fd = fopen(path2, "a+"); 679 if (!fd) { 680 DRV_LOG(ERR, "cannot open %s for debug dump", path2); 681 return; 682 } 683 DRV_LOG(INFO, "New debug dump in file %s", path2); 684 } else { 685 DRV_LOG(INFO, "New debug dump in file %s", path); 686 } 687 if (hex_title) 688 rte_hexdump(fd, hex_title, buf, hex_len); 689 else 690 fprintf(fd, "%s", (const char *)buf); 691 fprintf(fd, "\n\n\n"); 692 fclose(fd); 693 } 694 695 /** 696 * Move QP from error state to running state and initialize indexes. 697 * 698 * @param txq_ctrl 699 * Pointer to TX queue control structure. 700 * 701 * @return 702 * 0 on success, else -1. 703 */ 704 static int 705 tx_recover_qp(struct mlx5_txq_ctrl *txq_ctrl) 706 { 707 struct mlx5_mp_arg_queue_state_modify sm = { 708 .is_wq = 0, 709 .queue_id = txq_ctrl->txq.idx, 710 }; 711 712 if (mlx5_queue_state_modify(ETH_DEV(txq_ctrl->priv), &sm)) 713 return -1; 714 txq_ctrl->txq.wqe_ci = 0; 715 txq_ctrl->txq.wqe_pi = 0; 716 txq_ctrl->txq.elts_comp = 0; 717 return 0; 718 } 719 720 /* Return 1 if the error CQE is signed otherwise, sign it and return 0. */ 721 static int 722 check_err_cqe_seen(volatile struct mlx5_err_cqe *err_cqe) 723 { 724 static const uint8_t magic[] = "seen"; 725 int ret = 1; 726 unsigned int i; 727 728 for (i = 0; i < sizeof(magic); ++i) 729 if (!ret || err_cqe->rsvd1[i] != magic[i]) { 730 ret = 0; 731 err_cqe->rsvd1[i] = magic[i]; 732 } 733 return ret; 734 } 735 736 /** 737 * Handle error CQE. 738 * 739 * @param txq 740 * Pointer to TX queue structure. 741 * @param error_cqe 742 * Pointer to the error CQE. 743 * 744 * @return 745 * Negative value if queue recovery failed, otherwise 746 * the error completion entry is handled successfully. 747 */ 748 static int 749 mlx5_tx_error_cqe_handle(struct mlx5_txq_data *__rte_restrict txq, 750 volatile struct mlx5_err_cqe *err_cqe) 751 { 752 if (err_cqe->syndrome != MLX5_CQE_SYNDROME_WR_FLUSH_ERR) { 753 const uint16_t wqe_m = ((1 << txq->wqe_n) - 1); 754 struct mlx5_txq_ctrl *txq_ctrl = 755 container_of(txq, struct mlx5_txq_ctrl, txq); 756 uint16_t new_wqe_pi = rte_be_to_cpu_16(err_cqe->wqe_counter); 757 int seen = check_err_cqe_seen(err_cqe); 758 759 if (!seen && txq_ctrl->dump_file_n < 760 txq_ctrl->priv->config.max_dump_files_num) { 761 MKSTR(err_str, "Unexpected CQE error syndrome " 762 "0x%02x CQN = %u SQN = %u wqe_counter = %u " 763 "wq_ci = %u cq_ci = %u", err_cqe->syndrome, 764 txq->cqe_s, txq->qp_num_8s >> 8, 765 rte_be_to_cpu_16(err_cqe->wqe_counter), 766 txq->wqe_ci, txq->cq_ci); 767 MKSTR(name, "dpdk_mlx5_port_%u_txq_%u_index_%u_%u", 768 PORT_ID(txq_ctrl->priv), txq->idx, 769 txq_ctrl->dump_file_n, (uint32_t)rte_rdtsc()); 770 mlx5_dump_debug_information(name, NULL, err_str, 0); 771 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 772 (const void *)((uintptr_t) 773 txq->cqes), 774 sizeof(*err_cqe) * 775 (1 << txq->cqe_n)); 776 mlx5_dump_debug_information(name, "MLX5 Error SQ:", 777 (const void *)((uintptr_t) 778 txq->wqes), 779 MLX5_WQE_SIZE * 780 (1 << txq->wqe_n)); 781 txq_ctrl->dump_file_n++; 782 } 783 if (!seen) 784 /* 785 * Count errors in WQEs units. 786 * Later it can be improved to count error packets, 787 * for example, by SQ parsing to find how much packets 788 * should be counted for each WQE. 789 */ 790 txq->stats.oerrors += ((txq->wqe_ci & wqe_m) - 791 new_wqe_pi) & wqe_m; 792 if (tx_recover_qp(txq_ctrl)) { 793 /* Recovering failed - retry later on the same WQE. */ 794 return -1; 795 } 796 /* Release all the remaining buffers. */ 797 txq_free_elts(txq_ctrl); 798 } 799 return 0; 800 } 801 802 /** 803 * Translate RX completion flags to packet type. 804 * 805 * @param[in] rxq 806 * Pointer to RX queue structure. 807 * @param[in] cqe 808 * Pointer to CQE. 809 * 810 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 811 * 812 * @return 813 * Packet type for struct rte_mbuf. 814 */ 815 static inline uint32_t 816 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe) 817 { 818 uint8_t idx; 819 uint8_t pinfo = cqe->pkt_info; 820 uint16_t ptype = cqe->hdr_type_etc; 821 822 /* 823 * The index to the array should have: 824 * bit[1:0] = l3_hdr_type 825 * bit[4:2] = l4_hdr_type 826 * bit[5] = ip_frag 827 * bit[6] = tunneled 828 * bit[7] = outer_l3_type 829 */ 830 idx = ((pinfo & 0x3) << 6) | ((ptype & 0xfc00) >> 10); 831 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 832 } 833 834 /** 835 * Initialize Rx WQ and indexes. 836 * 837 * @param[in] rxq 838 * Pointer to RX queue structure. 839 */ 840 void 841 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 842 { 843 const unsigned int wqe_n = 1 << rxq->elts_n; 844 unsigned int i; 845 846 for (i = 0; (i != wqe_n); ++i) { 847 volatile struct mlx5_wqe_data_seg *scat; 848 uintptr_t addr; 849 uint32_t byte_count; 850 851 if (mlx5_rxq_mprq_enabled(rxq)) { 852 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 853 854 scat = &((volatile struct mlx5_wqe_mprq *) 855 rxq->wqes)[i].dseg; 856 addr = (uintptr_t)mlx5_mprq_buf_addr(buf, 857 1 << rxq->strd_num_n); 858 byte_count = (1 << rxq->strd_sz_n) * 859 (1 << rxq->strd_num_n); 860 } else { 861 struct rte_mbuf *buf = (*rxq->elts)[i]; 862 863 scat = &((volatile struct mlx5_wqe_data_seg *) 864 rxq->wqes)[i]; 865 addr = rte_pktmbuf_mtod(buf, uintptr_t); 866 byte_count = DATA_LEN(buf); 867 } 868 /* scat->addr must be able to store a pointer. */ 869 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 870 *scat = (struct mlx5_wqe_data_seg){ 871 .addr = rte_cpu_to_be_64(addr), 872 .byte_count = rte_cpu_to_be_32(byte_count), 873 .lkey = mlx5_rx_addr2mr(rxq, addr), 874 }; 875 } 876 rxq->consumed_strd = 0; 877 rxq->decompressed = 0; 878 rxq->rq_pi = 0; 879 rxq->zip = (struct rxq_zip){ 880 .ai = 0, 881 }; 882 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? 883 (wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0; 884 /* Update doorbell counter. */ 885 rxq->rq_ci = wqe_n >> rxq->sges_n; 886 rte_io_wmb(); 887 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 888 } 889 890 /** 891 * Modify a Verbs/DevX queue state. 892 * This must be called from the primary process. 893 * 894 * @param dev 895 * Pointer to Ethernet device. 896 * @param sm 897 * State modify request parameters. 898 * 899 * @return 900 * 0 in case of success else non-zero value and rte_errno is set. 901 */ 902 int 903 mlx5_queue_state_modify_primary(struct rte_eth_dev *dev, 904 const struct mlx5_mp_arg_queue_state_modify *sm) 905 { 906 int ret; 907 struct mlx5_priv *priv = dev->data->dev_private; 908 909 if (sm->is_wq) { 910 struct mlx5_rxq_data *rxq = (*priv->rxqs)[sm->queue_id]; 911 struct mlx5_rxq_ctrl *rxq_ctrl = 912 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 913 914 ret = priv->obj_ops.rxq_obj_modify(rxq_ctrl->obj, sm->state); 915 if (ret) { 916 DRV_LOG(ERR, "Cannot change Rx WQ state to %u - %s", 917 sm->state, strerror(errno)); 918 rte_errno = errno; 919 return ret; 920 } 921 } else { 922 struct mlx5_txq_data *txq = (*priv->txqs)[sm->queue_id]; 923 struct mlx5_txq_ctrl *txq_ctrl = 924 container_of(txq, struct mlx5_txq_ctrl, txq); 925 926 ret = priv->obj_ops.txq_obj_modify(txq_ctrl->obj, 927 MLX5_TXQ_MOD_ERR2RDY, 928 (uint8_t)priv->dev_port); 929 if (ret) 930 return ret; 931 } 932 return 0; 933 } 934 935 /** 936 * Modify a Verbs queue state. 937 * 938 * @param dev 939 * Pointer to Ethernet device. 940 * @param sm 941 * State modify request parameters. 942 * 943 * @return 944 * 0 in case of success else non-zero value. 945 */ 946 static int 947 mlx5_queue_state_modify(struct rte_eth_dev *dev, 948 struct mlx5_mp_arg_queue_state_modify *sm) 949 { 950 struct mlx5_priv *priv = dev->data->dev_private; 951 int ret = 0; 952 953 switch (rte_eal_process_type()) { 954 case RTE_PROC_PRIMARY: 955 ret = mlx5_queue_state_modify_primary(dev, sm); 956 break; 957 case RTE_PROC_SECONDARY: 958 ret = mlx5_mp_req_queue_state_modify(&priv->mp_id, sm); 959 break; 960 default: 961 break; 962 } 963 return ret; 964 } 965 966 /** 967 * Handle a Rx error. 968 * The function inserts the RQ state to reset when the first error CQE is 969 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 970 * it moves the RQ state to ready and initializes the RQ. 971 * Next CQE identification and error counting are in the caller responsibility. 972 * 973 * @param[in] rxq 974 * Pointer to RX queue structure. 975 * @param[in] vec 976 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 977 * 0 when called from non-vectorized Rx burst. 978 * 979 * @return 980 * -1 in case of recovery error, otherwise the CQE status. 981 */ 982 int 983 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 984 { 985 const uint16_t cqe_n = 1 << rxq->cqe_n; 986 const uint16_t cqe_mask = cqe_n - 1; 987 const uint16_t wqe_n = 1 << rxq->elts_n; 988 const uint16_t strd_n = 1 << rxq->strd_num_n; 989 struct mlx5_rxq_ctrl *rxq_ctrl = 990 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 991 union { 992 volatile struct mlx5_cqe *cqe; 993 volatile struct mlx5_err_cqe *err_cqe; 994 } u = { 995 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 996 }; 997 struct mlx5_mp_arg_queue_state_modify sm; 998 int ret; 999 1000 switch (rxq->err_state) { 1001 case MLX5_RXQ_ERR_STATE_NO_ERROR: 1002 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 1003 /* Fall-through */ 1004 case MLX5_RXQ_ERR_STATE_NEED_RESET: 1005 sm.is_wq = 1; 1006 sm.queue_id = rxq->idx; 1007 sm.state = IBV_WQS_RESET; 1008 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), &sm)) 1009 return -1; 1010 if (rxq_ctrl->dump_file_n < 1011 rxq_ctrl->priv->config.max_dump_files_num) { 1012 MKSTR(err_str, "Unexpected CQE error syndrome " 1013 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 1014 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 1015 rxq->cqn, rxq_ctrl->wqn, 1016 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 1017 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 1018 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 1019 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 1020 mlx5_dump_debug_information(name, NULL, err_str, 0); 1021 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 1022 (const void *)((uintptr_t) 1023 rxq->cqes), 1024 sizeof(*u.cqe) * cqe_n); 1025 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 1026 (const void *)((uintptr_t) 1027 rxq->wqes), 1028 16 * wqe_n); 1029 rxq_ctrl->dump_file_n++; 1030 } 1031 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 1032 /* Fall-through */ 1033 case MLX5_RXQ_ERR_STATE_NEED_READY: 1034 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 1035 if (ret == MLX5_CQE_STATUS_HW_OWN) { 1036 rte_io_wmb(); 1037 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1038 rte_io_wmb(); 1039 /* 1040 * The RQ consumer index must be zeroed while moving 1041 * from RESET state to RDY state. 1042 */ 1043 *rxq->rq_db = rte_cpu_to_be_32(0); 1044 rte_io_wmb(); 1045 sm.is_wq = 1; 1046 sm.queue_id = rxq->idx; 1047 sm.state = IBV_WQS_RDY; 1048 if (mlx5_queue_state_modify(ETH_DEV(rxq_ctrl->priv), 1049 &sm)) 1050 return -1; 1051 if (vec) { 1052 const uint32_t elts_n = 1053 mlx5_rxq_mprq_enabled(rxq) ? 1054 wqe_n * strd_n : wqe_n; 1055 const uint32_t e_mask = elts_n - 1; 1056 uint32_t elts_ci = 1057 mlx5_rxq_mprq_enabled(rxq) ? 1058 rxq->elts_ci : rxq->rq_ci; 1059 uint32_t elt_idx; 1060 struct rte_mbuf **elt; 1061 int i; 1062 unsigned int n = elts_n - (elts_ci - 1063 rxq->rq_pi); 1064 1065 for (i = 0; i < (int)n; ++i) { 1066 elt_idx = (elts_ci + i) & e_mask; 1067 elt = &(*rxq->elts)[elt_idx]; 1068 *elt = rte_mbuf_raw_alloc(rxq->mp); 1069 if (!*elt) { 1070 for (i--; i >= 0; --i) { 1071 elt_idx = (elts_ci + 1072 i) & elts_n; 1073 elt = &(*rxq->elts) 1074 [elt_idx]; 1075 rte_pktmbuf_free_seg 1076 (*elt); 1077 } 1078 return -1; 1079 } 1080 } 1081 for (i = 0; i < (int)elts_n; ++i) { 1082 elt = &(*rxq->elts)[i]; 1083 DATA_LEN(*elt) = 1084 (uint16_t)((*elt)->buf_len - 1085 rte_pktmbuf_headroom(*elt)); 1086 } 1087 /* Padding with a fake mbuf for vec Rx. */ 1088 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 1089 (*rxq->elts)[elts_n + i] = 1090 &rxq->fake_mbuf; 1091 } 1092 mlx5_rxq_initialize(rxq); 1093 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 1094 } 1095 return ret; 1096 default: 1097 return -1; 1098 } 1099 } 1100 1101 /** 1102 * Get size of the next packet for a given CQE. For compressed CQEs, the 1103 * consumer index is updated only once all packets of the current one have 1104 * been processed. 1105 * 1106 * @param rxq 1107 * Pointer to RX queue. 1108 * @param cqe 1109 * CQE to process. 1110 * @param[out] mcqe 1111 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 1112 * written. 1113 * 1114 * @return 1115 * 0 in case of empty CQE, otherwise the packet size in bytes. 1116 */ 1117 static inline int 1118 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 1119 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 1120 { 1121 struct rxq_zip *zip = &rxq->zip; 1122 uint16_t cqe_n = cqe_cnt + 1; 1123 int len; 1124 uint16_t idx, end; 1125 1126 do { 1127 len = 0; 1128 /* Process compressed data in the CQE and mini arrays. */ 1129 if (zip->ai) { 1130 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1131 (volatile struct mlx5_mini_cqe8 (*)[8]) 1132 (uintptr_t)(&(*rxq->cqes)[zip->ca & 1133 cqe_cnt].pkt_info); 1134 1135 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt); 1136 *mcqe = &(*mc)[zip->ai & 7]; 1137 if ((++zip->ai & 7) == 0) { 1138 /* Invalidate consumed CQEs */ 1139 idx = zip->ca; 1140 end = zip->na; 1141 while (idx != end) { 1142 (*rxq->cqes)[idx & cqe_cnt].op_own = 1143 MLX5_CQE_INVALIDATE; 1144 ++idx; 1145 } 1146 /* 1147 * Increment consumer index to skip the number 1148 * of CQEs consumed. Hardware leaves holes in 1149 * the CQ ring for software use. 1150 */ 1151 zip->ca = zip->na; 1152 zip->na += 8; 1153 } 1154 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 1155 /* Invalidate the rest */ 1156 idx = zip->ca; 1157 end = zip->cq_ci; 1158 1159 while (idx != end) { 1160 (*rxq->cqes)[idx & cqe_cnt].op_own = 1161 MLX5_CQE_INVALIDATE; 1162 ++idx; 1163 } 1164 rxq->cq_ci = zip->cq_ci; 1165 zip->ai = 0; 1166 } 1167 /* 1168 * No compressed data, get next CQE and verify if it is 1169 * compressed. 1170 */ 1171 } else { 1172 int ret; 1173 int8_t op_own; 1174 1175 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 1176 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1177 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 1178 rxq->err_state)) { 1179 ret = mlx5_rx_err_handle(rxq, 0); 1180 if (ret == MLX5_CQE_STATUS_HW_OWN || 1181 ret == -1) 1182 return 0; 1183 } else { 1184 return 0; 1185 } 1186 } 1187 ++rxq->cq_ci; 1188 op_own = cqe->op_own; 1189 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 1190 volatile struct mlx5_mini_cqe8 (*mc)[8] = 1191 (volatile struct mlx5_mini_cqe8 (*)[8]) 1192 (uintptr_t)(&(*rxq->cqes) 1193 [rxq->cq_ci & 1194 cqe_cnt].pkt_info); 1195 1196 /* Fix endianness. */ 1197 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 1198 /* 1199 * Current mini array position is the one 1200 * returned by check_cqe64(). 1201 * 1202 * If completion comprises several mini arrays, 1203 * as a special case the second one is located 1204 * 7 CQEs after the initial CQE instead of 8 1205 * for subsequent ones. 1206 */ 1207 zip->ca = rxq->cq_ci; 1208 zip->na = zip->ca + 7; 1209 /* Compute the next non compressed CQE. */ 1210 --rxq->cq_ci; 1211 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 1212 /* Get packet size to return. */ 1213 len = rte_be_to_cpu_32((*mc)[0].byte_cnt); 1214 *mcqe = &(*mc)[0]; 1215 zip->ai = 1; 1216 /* Prefetch all to be invalidated */ 1217 idx = zip->ca; 1218 end = zip->cq_ci; 1219 while (idx != end) { 1220 rte_prefetch0(&(*rxq->cqes)[(idx) & 1221 cqe_cnt]); 1222 ++idx; 1223 } 1224 } else { 1225 len = rte_be_to_cpu_32(cqe->byte_cnt); 1226 } 1227 } 1228 if (unlikely(rxq->err_state)) { 1229 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1230 ++rxq->stats.idropped; 1231 } else { 1232 return len; 1233 } 1234 } while (1); 1235 } 1236 1237 /** 1238 * Translate RX completion flags to offload flags. 1239 * 1240 * @param[in] cqe 1241 * Pointer to CQE. 1242 * 1243 * @return 1244 * Offload flags (ol_flags) for struct rte_mbuf. 1245 */ 1246 static inline uint32_t 1247 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 1248 { 1249 uint32_t ol_flags = 0; 1250 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 1251 1252 ol_flags = 1253 TRANSPOSE(flags, 1254 MLX5_CQE_RX_L3_HDR_VALID, 1255 PKT_RX_IP_CKSUM_GOOD) | 1256 TRANSPOSE(flags, 1257 MLX5_CQE_RX_L4_HDR_VALID, 1258 PKT_RX_L4_CKSUM_GOOD); 1259 return ol_flags; 1260 } 1261 1262 /** 1263 * Fill in mbuf fields from RX completion flags. 1264 * Note that pkt->ol_flags should be initialized outside of this function. 1265 * 1266 * @param rxq 1267 * Pointer to RX queue. 1268 * @param pkt 1269 * mbuf to fill. 1270 * @param cqe 1271 * CQE to process. 1272 * @param rss_hash_res 1273 * Packet RSS Hash result. 1274 */ 1275 static inline void 1276 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 1277 volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res) 1278 { 1279 /* Update packet information. */ 1280 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe); 1281 if (rss_hash_res && rxq->rss_hash) { 1282 pkt->hash.rss = rss_hash_res; 1283 pkt->ol_flags |= PKT_RX_RSS_HASH; 1284 } 1285 if (rxq->mark && MLX5_FLOW_MARK_IS_VALID(cqe->sop_drop_qpn)) { 1286 pkt->ol_flags |= PKT_RX_FDIR; 1287 if (cqe->sop_drop_qpn != 1288 rte_cpu_to_be_32(MLX5_FLOW_MARK_DEFAULT)) { 1289 uint32_t mark = cqe->sop_drop_qpn; 1290 1291 pkt->ol_flags |= PKT_RX_FDIR_ID; 1292 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 1293 } 1294 } 1295 if (rxq->dynf_meta && cqe->flow_table_metadata) { 1296 pkt->ol_flags |= rxq->flow_meta_mask; 1297 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, uint32_t *) = 1298 cqe->flow_table_metadata; 1299 } 1300 if (rxq->csum) 1301 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 1302 if (rxq->vlan_strip && 1303 (cqe->hdr_type_etc & rte_cpu_to_be_16(MLX5_CQE_VLAN_STRIPPED))) { 1304 pkt->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; 1305 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 1306 } 1307 if (rxq->hw_timestamp) { 1308 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 1309 1310 if (rxq->rt_timestamp) 1311 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 1312 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 1313 pkt->ol_flags |= rxq->timestamp_rx_flag; 1314 } 1315 } 1316 1317 /** 1318 * DPDK callback for RX. 1319 * 1320 * @param dpdk_rxq 1321 * Generic pointer to RX queue structure. 1322 * @param[out] pkts 1323 * Array to store received packets. 1324 * @param pkts_n 1325 * Maximum number of packets in array. 1326 * 1327 * @return 1328 * Number of packets successfully received (<= pkts_n). 1329 */ 1330 uint16_t 1331 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1332 { 1333 struct mlx5_rxq_data *rxq = dpdk_rxq; 1334 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 1335 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 1336 const unsigned int sges_n = rxq->sges_n; 1337 struct rte_mbuf *pkt = NULL; 1338 struct rte_mbuf *seg = NULL; 1339 volatile struct mlx5_cqe *cqe = 1340 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1341 unsigned int i = 0; 1342 unsigned int rq_ci = rxq->rq_ci << sges_n; 1343 int len = 0; /* keep its value across iterations. */ 1344 1345 while (pkts_n) { 1346 unsigned int idx = rq_ci & wqe_cnt; 1347 volatile struct mlx5_wqe_data_seg *wqe = 1348 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 1349 struct rte_mbuf *rep = (*rxq->elts)[idx]; 1350 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1351 uint32_t rss_hash_res; 1352 1353 if (pkt) 1354 NEXT(seg) = rep; 1355 seg = rep; 1356 rte_prefetch0(seg); 1357 rte_prefetch0(cqe); 1358 rte_prefetch0(wqe); 1359 /* Allocate the buf from the same pool. */ 1360 rep = rte_mbuf_raw_alloc(seg->pool); 1361 if (unlikely(rep == NULL)) { 1362 ++rxq->stats.rx_nombuf; 1363 if (!pkt) { 1364 /* 1365 * no buffers before we even started, 1366 * bail out silently. 1367 */ 1368 break; 1369 } 1370 while (pkt != seg) { 1371 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 1372 rep = NEXT(pkt); 1373 NEXT(pkt) = NULL; 1374 NB_SEGS(pkt) = 1; 1375 rte_mbuf_raw_free(pkt); 1376 pkt = rep; 1377 } 1378 break; 1379 } 1380 if (!pkt) { 1381 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 1382 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 1383 if (!len) { 1384 rte_mbuf_raw_free(rep); 1385 break; 1386 } 1387 pkt = seg; 1388 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 1389 pkt->ol_flags &= EXT_ATTACHED_MBUF; 1390 /* If compressed, take hash result from mini-CQE. */ 1391 rss_hash_res = rte_be_to_cpu_32(mcqe == NULL ? 1392 cqe->rx_hash_res : 1393 mcqe->rx_hash_result); 1394 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1395 if (rxq->crc_present) 1396 len -= RTE_ETHER_CRC_LEN; 1397 PKT_LEN(pkt) = len; 1398 if (cqe->lro_num_seg > 1) { 1399 mlx5_lro_update_hdr 1400 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1401 len); 1402 pkt->ol_flags |= PKT_RX_LRO; 1403 pkt->tso_segsz = len / cqe->lro_num_seg; 1404 } 1405 } 1406 DATA_LEN(rep) = DATA_LEN(seg); 1407 PKT_LEN(rep) = PKT_LEN(seg); 1408 SET_DATA_OFF(rep, DATA_OFF(seg)); 1409 PORT(rep) = PORT(seg); 1410 (*rxq->elts)[idx] = rep; 1411 /* 1412 * Fill NIC descriptor with the new buffer. The lkey and size 1413 * of the buffers are already known, only the buffer address 1414 * changes. 1415 */ 1416 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1417 /* If there's only one MR, no need to replace LKey in WQE. */ 1418 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1419 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1420 if (len > DATA_LEN(seg)) { 1421 len -= DATA_LEN(seg); 1422 ++NB_SEGS(pkt); 1423 ++rq_ci; 1424 continue; 1425 } 1426 DATA_LEN(seg) = len; 1427 #ifdef MLX5_PMD_SOFT_COUNTERS 1428 /* Increment bytes counter. */ 1429 rxq->stats.ibytes += PKT_LEN(pkt); 1430 #endif 1431 /* Return packet. */ 1432 *(pkts++) = pkt; 1433 pkt = NULL; 1434 --pkts_n; 1435 ++i; 1436 /* Align consumer index to the next stride. */ 1437 rq_ci >>= sges_n; 1438 ++rq_ci; 1439 rq_ci <<= sges_n; 1440 } 1441 if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci))) 1442 return 0; 1443 /* Update the consumer index. */ 1444 rxq->rq_ci = rq_ci >> sges_n; 1445 rte_io_wmb(); 1446 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1447 rte_io_wmb(); 1448 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1449 #ifdef MLX5_PMD_SOFT_COUNTERS 1450 /* Increment packets counter. */ 1451 rxq->stats.ipackets += i; 1452 #endif 1453 return i; 1454 } 1455 1456 /** 1457 * Update LRO packet TCP header. 1458 * The HW LRO feature doesn't update the TCP header after coalescing the 1459 * TCP segments but supplies information in CQE to fill it by SW. 1460 * 1461 * @param tcp 1462 * Pointer to the TCP header. 1463 * @param cqe 1464 * Pointer to the completion entry.. 1465 * @param phcsum 1466 * The L3 pseudo-header checksum. 1467 */ 1468 static inline void 1469 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1470 volatile struct mlx5_cqe *__rte_restrict cqe, 1471 uint32_t phcsum) 1472 { 1473 uint8_t l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1474 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1475 /* 1476 * The HW calculates only the TCP payload checksum, need to complete 1477 * the TCP header checksum and the L3 pseudo-header checksum. 1478 */ 1479 uint32_t csum = phcsum + cqe->csum; 1480 1481 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1482 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1483 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1484 tcp->recv_ack = cqe->lro_ack_seq_num; 1485 tcp->rx_win = cqe->lro_tcp_win; 1486 } 1487 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1488 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1489 tcp->cksum = 0; 1490 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1491 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1492 csum = (~csum) & 0xffff; 1493 if (csum == 0) 1494 csum = 0xffff; 1495 tcp->cksum = csum; 1496 } 1497 1498 /** 1499 * Update LRO packet headers. 1500 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1501 * TCP segments but supply information in CQE to fill it by SW. 1502 * 1503 * @param padd 1504 * The packet address. 1505 * @param cqe 1506 * Pointer to the completion entry.. 1507 * @param len 1508 * The packet length. 1509 */ 1510 static inline void 1511 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1512 volatile struct mlx5_cqe *__rte_restrict cqe, 1513 uint32_t len) 1514 { 1515 union { 1516 struct rte_ether_hdr *eth; 1517 struct rte_vlan_hdr *vlan; 1518 struct rte_ipv4_hdr *ipv4; 1519 struct rte_ipv6_hdr *ipv6; 1520 struct rte_tcp_hdr *tcp; 1521 uint8_t *hdr; 1522 } h = { 1523 .hdr = padd, 1524 }; 1525 uint16_t proto = h.eth->ether_type; 1526 uint32_t phcsum; 1527 1528 h.eth++; 1529 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1530 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1531 proto = h.vlan->eth_proto; 1532 h.vlan++; 1533 } 1534 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1535 h.ipv4->time_to_live = cqe->lro_min_ttl; 1536 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1537 h.ipv4->hdr_checksum = 0; 1538 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1539 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1540 h.ipv4++; 1541 } else { 1542 h.ipv6->hop_limits = cqe->lro_min_ttl; 1543 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1544 sizeof(*h.ipv6)); 1545 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1546 h.ipv6++; 1547 } 1548 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum); 1549 } 1550 1551 void 1552 mlx5_mprq_buf_free_cb(void *addr __rte_unused, void *opaque) 1553 { 1554 struct mlx5_mprq_buf *buf = opaque; 1555 1556 if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) == 1) { 1557 rte_mempool_put(buf->mp, buf); 1558 } else if (unlikely(__atomic_sub_fetch(&buf->refcnt, 1, 1559 __ATOMIC_RELAXED) == 0)) { 1560 __atomic_store_n(&buf->refcnt, 1, __ATOMIC_RELAXED); 1561 rte_mempool_put(buf->mp, buf); 1562 } 1563 } 1564 1565 void 1566 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1567 { 1568 mlx5_mprq_buf_free_cb(NULL, buf); 1569 } 1570 1571 /** 1572 * DPDK callback for RX with Multi-Packet RQ support. 1573 * 1574 * @param dpdk_rxq 1575 * Generic pointer to RX queue structure. 1576 * @param[out] pkts 1577 * Array to store received packets. 1578 * @param pkts_n 1579 * Maximum number of packets in array. 1580 * 1581 * @return 1582 * Number of packets successfully received (<= pkts_n). 1583 */ 1584 uint16_t 1585 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1586 { 1587 struct mlx5_rxq_data *rxq = dpdk_rxq; 1588 const uint32_t strd_n = 1 << rxq->strd_num_n; 1589 const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; 1590 const uint32_t wq_mask = (1 << rxq->elts_n) - 1; 1591 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1592 unsigned int i = 0; 1593 uint32_t rq_ci = rxq->rq_ci; 1594 uint16_t consumed_strd = rxq->consumed_strd; 1595 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1596 1597 while (i < pkts_n) { 1598 struct rte_mbuf *pkt; 1599 int ret; 1600 uint32_t len; 1601 uint16_t strd_cnt; 1602 uint16_t strd_idx; 1603 uint32_t byte_cnt; 1604 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1605 uint32_t rss_hash_res = 0; 1606 enum mlx5_rqx_code rxq_code; 1607 1608 if (consumed_strd == strd_n) { 1609 /* Replace WQE if the buffer is still in use. */ 1610 mprq_buf_replace(rxq, rq_ci & wq_mask); 1611 /* Advance to the next WQE. */ 1612 consumed_strd = 0; 1613 ++rq_ci; 1614 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1615 } 1616 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1617 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1618 if (!ret) 1619 break; 1620 byte_cnt = ret; 1621 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1622 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1623 MLX5_ASSERT(strd_cnt); 1624 consumed_strd += strd_cnt; 1625 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1626 continue; 1627 if (mcqe == NULL) { 1628 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 1629 strd_idx = rte_be_to_cpu_16(cqe->wqe_counter); 1630 } else { 1631 /* mini-CQE for MPRQ doesn't have hash result. */ 1632 strd_idx = rte_be_to_cpu_16(mcqe->stride_idx); 1633 } 1634 MLX5_ASSERT(strd_idx < strd_n); 1635 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1636 wq_mask)); 1637 pkt = rte_pktmbuf_alloc(rxq->mp); 1638 if (unlikely(pkt == NULL)) { 1639 ++rxq->stats.rx_nombuf; 1640 break; 1641 } 1642 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1643 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1644 if (rxq->crc_present) 1645 len -= RTE_ETHER_CRC_LEN; 1646 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, 1647 strd_idx, strd_cnt); 1648 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 1649 rte_pktmbuf_free_seg(pkt); 1650 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 1651 ++rxq->stats.idropped; 1652 continue; 1653 } 1654 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 1655 ++rxq->stats.rx_nombuf; 1656 break; 1657 } 1658 } 1659 rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res); 1660 if (cqe->lro_num_seg > 1) { 1661 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), 1662 cqe, len); 1663 pkt->ol_flags |= PKT_RX_LRO; 1664 pkt->tso_segsz = len / cqe->lro_num_seg; 1665 } 1666 PKT_LEN(pkt) = len; 1667 PORT(pkt) = rxq->port_id; 1668 #ifdef MLX5_PMD_SOFT_COUNTERS 1669 /* Increment bytes counter. */ 1670 rxq->stats.ibytes += PKT_LEN(pkt); 1671 #endif 1672 /* Return packet. */ 1673 *(pkts++) = pkt; 1674 ++i; 1675 } 1676 /* Update the consumer indexes. */ 1677 rxq->consumed_strd = consumed_strd; 1678 rte_io_wmb(); 1679 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1680 if (rq_ci != rxq->rq_ci) { 1681 rxq->rq_ci = rq_ci; 1682 rte_io_wmb(); 1683 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1684 } 1685 #ifdef MLX5_PMD_SOFT_COUNTERS 1686 /* Increment packets counter. */ 1687 rxq->stats.ipackets += i; 1688 #endif 1689 return i; 1690 } 1691 1692 /** 1693 * Dummy DPDK callback for TX. 1694 * 1695 * This function is used to temporarily replace the real callback during 1696 * unsafe control operations on the queue, or in case of error. 1697 * 1698 * @param dpdk_txq 1699 * Generic pointer to TX queue structure. 1700 * @param[in] pkts 1701 * Packets to transmit. 1702 * @param pkts_n 1703 * Number of packets in array. 1704 * 1705 * @return 1706 * Number of packets successfully transmitted (<= pkts_n). 1707 */ 1708 uint16_t 1709 removed_tx_burst(void *dpdk_txq __rte_unused, 1710 struct rte_mbuf **pkts __rte_unused, 1711 uint16_t pkts_n __rte_unused) 1712 { 1713 rte_mb(); 1714 return 0; 1715 } 1716 1717 /** 1718 * Dummy DPDK callback for RX. 1719 * 1720 * This function is used to temporarily replace the real callback during 1721 * unsafe control operations on the queue, or in case of error. 1722 * 1723 * @param dpdk_rxq 1724 * Generic pointer to RX queue structure. 1725 * @param[out] pkts 1726 * Array to store received packets. 1727 * @param pkts_n 1728 * Maximum number of packets in array. 1729 * 1730 * @return 1731 * Number of packets successfully received (<= pkts_n). 1732 */ 1733 uint16_t 1734 removed_rx_burst(void *dpdk_txq __rte_unused, 1735 struct rte_mbuf **pkts __rte_unused, 1736 uint16_t pkts_n __rte_unused) 1737 { 1738 rte_mb(); 1739 return 0; 1740 } 1741 1742 /* 1743 * Vectorized Rx/Tx routines are not compiled in when required vector 1744 * instructions are not supported on a target architecture. The following null 1745 * stubs are needed for linkage when those are not included outside of this file 1746 * (e.g. mlx5_rxtx_vec_sse.c for x86). 1747 */ 1748 1749 __rte_weak uint16_t 1750 mlx5_rx_burst_vec(void *dpdk_txq __rte_unused, 1751 struct rte_mbuf **pkts __rte_unused, 1752 uint16_t pkts_n __rte_unused) 1753 { 1754 return 0; 1755 } 1756 1757 __rte_weak uint16_t 1758 mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused, 1759 struct rte_mbuf **pkts __rte_unused, 1760 uint16_t pkts_n __rte_unused) 1761 { 1762 return 0; 1763 } 1764 1765 __rte_weak int 1766 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1767 { 1768 return -ENOTSUP; 1769 } 1770 1771 __rte_weak int 1772 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1773 { 1774 return -ENOTSUP; 1775 } 1776 1777 /** 1778 * Free the mbufs from the linear array of pointers. 1779 * 1780 * @param pkts 1781 * Pointer to array of packets to be free. 1782 * @param pkts_n 1783 * Number of packets to be freed. 1784 * @param olx 1785 * Configured Tx offloads mask. It is fully defined at 1786 * compile time and may be used for optimization. 1787 */ 1788 static __rte_always_inline void 1789 mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts, 1790 unsigned int pkts_n, 1791 unsigned int olx __rte_unused) 1792 { 1793 struct rte_mempool *pool = NULL; 1794 struct rte_mbuf **p_free = NULL; 1795 struct rte_mbuf *mbuf; 1796 unsigned int n_free = 0; 1797 1798 /* 1799 * The implemented algorithm eliminates 1800 * copying pointers to temporary array 1801 * for rte_mempool_put_bulk() calls. 1802 */ 1803 MLX5_ASSERT(pkts); 1804 MLX5_ASSERT(pkts_n); 1805 for (;;) { 1806 for (;;) { 1807 /* 1808 * Decrement mbuf reference counter, detach 1809 * indirect and external buffers if needed. 1810 */ 1811 mbuf = rte_pktmbuf_prefree_seg(*pkts); 1812 if (likely(mbuf != NULL)) { 1813 MLX5_ASSERT(mbuf == *pkts); 1814 if (likely(n_free != 0)) { 1815 if (unlikely(pool != mbuf->pool)) 1816 /* From different pool. */ 1817 break; 1818 } else { 1819 /* Start new scan array. */ 1820 pool = mbuf->pool; 1821 p_free = pkts; 1822 } 1823 ++n_free; 1824 ++pkts; 1825 --pkts_n; 1826 if (unlikely(pkts_n == 0)) { 1827 mbuf = NULL; 1828 break; 1829 } 1830 } else { 1831 /* 1832 * This happens if mbuf is still referenced. 1833 * We can't put it back to the pool, skip. 1834 */ 1835 ++pkts; 1836 --pkts_n; 1837 if (unlikely(n_free != 0)) 1838 /* There is some array to free.*/ 1839 break; 1840 if (unlikely(pkts_n == 0)) 1841 /* Last mbuf, nothing to free. */ 1842 return; 1843 } 1844 } 1845 for (;;) { 1846 /* 1847 * This loop is implemented to avoid multiple 1848 * inlining of rte_mempool_put_bulk(). 1849 */ 1850 MLX5_ASSERT(pool); 1851 MLX5_ASSERT(p_free); 1852 MLX5_ASSERT(n_free); 1853 /* 1854 * Free the array of pre-freed mbufs 1855 * belonging to the same memory pool. 1856 */ 1857 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 1858 if (unlikely(mbuf != NULL)) { 1859 /* There is the request to start new scan. */ 1860 pool = mbuf->pool; 1861 p_free = pkts++; 1862 n_free = 1; 1863 --pkts_n; 1864 if (likely(pkts_n != 0)) 1865 break; 1866 /* 1867 * This is the last mbuf to be freed. 1868 * Do one more loop iteration to complete. 1869 * This is rare case of the last unique mbuf. 1870 */ 1871 mbuf = NULL; 1872 continue; 1873 } 1874 if (likely(pkts_n == 0)) 1875 return; 1876 n_free = 0; 1877 break; 1878 } 1879 } 1880 } 1881 1882 /** 1883 * Free the mbuf from the elts ring buffer till new tail. 1884 * 1885 * @param txq 1886 * Pointer to Tx queue structure. 1887 * @param tail 1888 * Index in elts to free up to, becomes new elts tail. 1889 * @param olx 1890 * Configured Tx offloads mask. It is fully defined at 1891 * compile time and may be used for optimization. 1892 */ 1893 static __rte_always_inline void 1894 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 1895 uint16_t tail, 1896 unsigned int olx __rte_unused) 1897 { 1898 uint16_t n_elts = tail - txq->elts_tail; 1899 1900 MLX5_ASSERT(n_elts); 1901 MLX5_ASSERT(n_elts <= txq->elts_s); 1902 /* 1903 * Implement a loop to support ring buffer wraparound 1904 * with single inlining of mlx5_tx_free_mbuf(). 1905 */ 1906 do { 1907 unsigned int part; 1908 1909 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 1910 part = RTE_MIN(part, n_elts); 1911 MLX5_ASSERT(part); 1912 MLX5_ASSERT(part <= txq->elts_s); 1913 mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m], 1914 part, olx); 1915 txq->elts_tail += part; 1916 n_elts -= part; 1917 } while (n_elts); 1918 } 1919 1920 /** 1921 * Store the mbuf being sent into elts ring buffer. 1922 * On Tx completion these mbufs will be freed. 1923 * 1924 * @param txq 1925 * Pointer to Tx queue structure. 1926 * @param pkts 1927 * Pointer to array of packets to be stored. 1928 * @param pkts_n 1929 * Number of packets to be stored. 1930 * @param olx 1931 * Configured Tx offloads mask. It is fully defined at 1932 * compile time and may be used for optimization. 1933 */ 1934 static __rte_always_inline void 1935 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 1936 struct rte_mbuf **__rte_restrict pkts, 1937 unsigned int pkts_n, 1938 unsigned int olx __rte_unused) 1939 { 1940 unsigned int part; 1941 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 1942 1943 MLX5_ASSERT(pkts); 1944 MLX5_ASSERT(pkts_n); 1945 part = txq->elts_s - (txq->elts_head & txq->elts_m); 1946 MLX5_ASSERT(part); 1947 MLX5_ASSERT(part <= txq->elts_s); 1948 /* This code is a good candidate for vectorizing with SIMD. */ 1949 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 1950 (void *)pkts, 1951 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 1952 txq->elts_head += pkts_n; 1953 if (unlikely(part < pkts_n)) 1954 /* The copy is wrapping around the elts array. */ 1955 rte_memcpy((void *)elts, (void *)(pkts + part), 1956 (pkts_n - part) * sizeof(struct rte_mbuf *)); 1957 } 1958 1959 /** 1960 * Update completion queue consuming index via doorbell 1961 * and flush the completed data buffers. 1962 * 1963 * @param txq 1964 * Pointer to TX queue structure. 1965 * @param valid CQE pointer 1966 * if not NULL update txq->wqe_pi and flush the buffers 1967 * @param olx 1968 * Configured Tx offloads mask. It is fully defined at 1969 * compile time and may be used for optimization. 1970 */ 1971 static __rte_always_inline void 1972 mlx5_tx_comp_flush(struct mlx5_txq_data *__rte_restrict txq, 1973 volatile struct mlx5_cqe *last_cqe, 1974 unsigned int olx __rte_unused) 1975 { 1976 if (likely(last_cqe != NULL)) { 1977 uint16_t tail; 1978 1979 txq->wqe_pi = rte_be_to_cpu_16(last_cqe->wqe_counter); 1980 tail = txq->fcqs[(txq->cq_ci - 1) & txq->cqe_m]; 1981 if (likely(tail != txq->elts_tail)) { 1982 mlx5_tx_free_elts(txq, tail, olx); 1983 MLX5_ASSERT(tail == txq->elts_tail); 1984 } 1985 } 1986 } 1987 1988 /** 1989 * Manage TX completions. This routine checks the CQ for 1990 * arrived CQEs, deduces the last accomplished WQE in SQ, 1991 * updates SQ producing index and frees all completed mbufs. 1992 * 1993 * @param txq 1994 * Pointer to TX queue structure. 1995 * @param olx 1996 * Configured Tx offloads mask. It is fully defined at 1997 * compile time and may be used for optimization. 1998 * 1999 * NOTE: not inlined intentionally, it makes tx_burst 2000 * routine smaller, simple and faster - from experiments. 2001 */ 2002 static void 2003 mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 2004 unsigned int olx __rte_unused) 2005 { 2006 unsigned int count = MLX5_TX_COMP_MAX_CQE; 2007 volatile struct mlx5_cqe *last_cqe = NULL; 2008 bool ring_doorbell = false; 2009 int ret; 2010 2011 static_assert(MLX5_CQE_STATUS_HW_OWN < 0, "Must be negative value"); 2012 static_assert(MLX5_CQE_STATUS_SW_OWN < 0, "Must be negative value"); 2013 do { 2014 volatile struct mlx5_cqe *cqe; 2015 2016 cqe = &txq->cqes[txq->cq_ci & txq->cqe_m]; 2017 ret = check_cqe(cqe, txq->cqe_s, txq->cq_ci); 2018 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 2019 if (likely(ret != MLX5_CQE_STATUS_ERR)) { 2020 /* No new CQEs in completion queue. */ 2021 MLX5_ASSERT(ret == MLX5_CQE_STATUS_HW_OWN); 2022 break; 2023 } 2024 /* 2025 * Some error occurred, try to restart. 2026 * We have no barrier after WQE related Doorbell 2027 * written, make sure all writes are completed 2028 * here, before we might perform SQ reset. 2029 */ 2030 rte_wmb(); 2031 ret = mlx5_tx_error_cqe_handle 2032 (txq, (volatile struct mlx5_err_cqe *)cqe); 2033 if (unlikely(ret < 0)) { 2034 /* 2035 * Some error occurred on queue error 2036 * handling, we do not advance the index 2037 * here, allowing to retry on next call. 2038 */ 2039 return; 2040 } 2041 /* 2042 * We are going to fetch all entries with 2043 * MLX5_CQE_SYNDROME_WR_FLUSH_ERR status. 2044 * The send queue is supposed to be empty. 2045 */ 2046 ring_doorbell = true; 2047 ++txq->cq_ci; 2048 txq->cq_pi = txq->cq_ci; 2049 last_cqe = NULL; 2050 continue; 2051 } 2052 /* Normal transmit completion. */ 2053 MLX5_ASSERT(txq->cq_ci != txq->cq_pi); 2054 MLX5_ASSERT((txq->fcqs[txq->cq_ci & txq->cqe_m] >> 16) == 2055 cqe->wqe_counter); 2056 ring_doorbell = true; 2057 ++txq->cq_ci; 2058 last_cqe = cqe; 2059 /* 2060 * We have to restrict the amount of processed CQEs 2061 * in one tx_burst routine call. The CQ may be large 2062 * and many CQEs may be updated by the NIC in one 2063 * transaction. Buffers freeing is time consuming, 2064 * multiple iterations may introduce significant 2065 * latency. 2066 */ 2067 if (likely(--count == 0)) 2068 break; 2069 } while (true); 2070 if (likely(ring_doorbell)) { 2071 /* Ring doorbell to notify hardware. */ 2072 rte_compiler_barrier(); 2073 *txq->cq_db = rte_cpu_to_be_32(txq->cq_ci); 2074 mlx5_tx_comp_flush(txq, last_cqe, olx); 2075 } 2076 } 2077 2078 /** 2079 * Check if the completion request flag should be set in the last WQE. 2080 * Both pushed mbufs and WQEs are monitored and the completion request 2081 * flag is set if any of thresholds is reached. 2082 * 2083 * @param txq 2084 * Pointer to TX queue structure. 2085 * @param loc 2086 * Pointer to burst routine local context. 2087 * @param olx 2088 * Configured Tx offloads mask. It is fully defined at 2089 * compile time and may be used for optimization. 2090 */ 2091 static __rte_always_inline void 2092 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 2093 struct mlx5_txq_local *__rte_restrict loc, 2094 unsigned int olx) 2095 { 2096 uint16_t head = txq->elts_head; 2097 unsigned int part; 2098 2099 part = MLX5_TXOFF_CONFIG(INLINE) ? 2100 0 : loc->pkts_sent - loc->pkts_copy; 2101 head += part; 2102 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 2103 (MLX5_TXOFF_CONFIG(INLINE) && 2104 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 2105 volatile struct mlx5_wqe *last = loc->wqe_last; 2106 2107 MLX5_ASSERT(last); 2108 txq->elts_comp = head; 2109 if (MLX5_TXOFF_CONFIG(INLINE)) 2110 txq->wqe_comp = txq->wqe_ci; 2111 /* Request unconditional completion on last WQE. */ 2112 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 2113 MLX5_COMP_MODE_OFFSET); 2114 /* Save elts_head in dedicated free on completion queue. */ 2115 #ifdef RTE_LIBRTE_MLX5_DEBUG 2116 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 2117 (last->cseg.opcode >> 8) << 16; 2118 #else 2119 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 2120 #endif 2121 /* A CQE slot must always be available. */ 2122 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 2123 } 2124 } 2125 2126 /** 2127 * DPDK callback to check the status of a tx descriptor. 2128 * 2129 * @param tx_queue 2130 * The tx queue. 2131 * @param[in] offset 2132 * The index of the descriptor in the ring. 2133 * 2134 * @return 2135 * The status of the tx descriptor. 2136 */ 2137 int 2138 mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset) 2139 { 2140 struct mlx5_txq_data *__rte_restrict txq = tx_queue; 2141 uint16_t used; 2142 2143 mlx5_tx_handle_completion(txq, 0); 2144 used = txq->elts_head - txq->elts_tail; 2145 if (offset < used) 2146 return RTE_ETH_TX_DESC_FULL; 2147 return RTE_ETH_TX_DESC_DONE; 2148 } 2149 2150 /** 2151 * Build the Control Segment with specified opcode: 2152 * - MLX5_OPCODE_SEND 2153 * - MLX5_OPCODE_ENHANCED_MPSW 2154 * - MLX5_OPCODE_TSO 2155 * 2156 * @param txq 2157 * Pointer to TX queue structure. 2158 * @param loc 2159 * Pointer to burst routine local context. 2160 * @param wqe 2161 * Pointer to WQE to fill with built Control Segment. 2162 * @param ds 2163 * Supposed length of WQE in segments. 2164 * @param opcode 2165 * SQ WQE opcode to put into Control Segment. 2166 * @param olx 2167 * Configured Tx offloads mask. It is fully defined at 2168 * compile time and may be used for optimization. 2169 */ 2170 static __rte_always_inline void 2171 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 2172 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2173 struct mlx5_wqe *__rte_restrict wqe, 2174 unsigned int ds, 2175 unsigned int opcode, 2176 unsigned int olx __rte_unused) 2177 { 2178 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 2179 2180 /* For legacy MPW replace the EMPW by TSO with modifier. */ 2181 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 2182 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 2183 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 2184 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2185 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 2186 MLX5_COMP_MODE_OFFSET); 2187 cs->misc = RTE_BE32(0); 2188 } 2189 2190 /** 2191 * Build the Synchronize Queue Segment with specified completion index. 2192 * 2193 * @param txq 2194 * Pointer to TX queue structure. 2195 * @param loc 2196 * Pointer to burst routine local context. 2197 * @param wqe 2198 * Pointer to WQE to fill with built Control Segment. 2199 * @param wci 2200 * Completion index in Clock Queue to wait. 2201 * @param olx 2202 * Configured Tx offloads mask. It is fully defined at 2203 * compile time and may be used for optimization. 2204 */ 2205 static __rte_always_inline void 2206 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 2207 struct mlx5_txq_local *restrict loc __rte_unused, 2208 struct mlx5_wqe *restrict wqe, 2209 unsigned int wci, 2210 unsigned int olx __rte_unused) 2211 { 2212 struct mlx5_wqe_qseg *qs; 2213 2214 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 2215 qs->max_index = rte_cpu_to_be_32(wci); 2216 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq->id); 2217 qs->reserved0 = RTE_BE32(0); 2218 qs->reserved1 = RTE_BE32(0); 2219 } 2220 2221 /** 2222 * Build the Ethernet Segment without inlined data. 2223 * Supports Software Parser, Checksums and VLAN 2224 * insertion Tx offload features. 2225 * 2226 * @param txq 2227 * Pointer to TX queue structure. 2228 * @param loc 2229 * Pointer to burst routine local context. 2230 * @param wqe 2231 * Pointer to WQE to fill with built Ethernet Segment. 2232 * @param olx 2233 * Configured Tx offloads mask. It is fully defined at 2234 * compile time and may be used for optimization. 2235 */ 2236 static __rte_always_inline void 2237 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2238 struct mlx5_txq_local *__rte_restrict loc, 2239 struct mlx5_wqe *__rte_restrict wqe, 2240 unsigned int olx) 2241 { 2242 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2243 uint32_t csum; 2244 2245 /* 2246 * Calculate and set check sum flags first, dword field 2247 * in segment may be shared with Software Parser flags. 2248 */ 2249 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2250 es->flags = rte_cpu_to_le_32(csum); 2251 /* 2252 * Calculate and set Software Parser offsets and flags. 2253 * These flags a set for custom UDP and IP tunnel packets. 2254 */ 2255 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2256 /* Fill metadata field if needed. */ 2257 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2258 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2259 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2260 /* Engage VLAN tag insertion feature if requested. */ 2261 if (MLX5_TXOFF_CONFIG(VLAN) && 2262 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2263 /* 2264 * We should get here only if device support 2265 * this feature correctly. 2266 */ 2267 MLX5_ASSERT(txq->vlan_en); 2268 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 2269 loc->mbuf->vlan_tci); 2270 } else { 2271 es->inline_hdr = RTE_BE32(0); 2272 } 2273 } 2274 2275 /** 2276 * Build the Ethernet Segment with minimal inlined data 2277 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 2278 * used to fill the gap in single WQEBB WQEs. 2279 * Supports Software Parser, Checksums and VLAN 2280 * insertion Tx offload features. 2281 * 2282 * @param txq 2283 * Pointer to TX queue structure. 2284 * @param loc 2285 * Pointer to burst routine local context. 2286 * @param wqe 2287 * Pointer to WQE to fill with built Ethernet Segment. 2288 * @param vlan 2289 * Length of VLAN tag insertion if any. 2290 * @param olx 2291 * Configured Tx offloads mask. It is fully defined at 2292 * compile time and may be used for optimization. 2293 */ 2294 static __rte_always_inline void 2295 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 2296 struct mlx5_txq_local *__rte_restrict loc, 2297 struct mlx5_wqe *__rte_restrict wqe, 2298 unsigned int vlan, 2299 unsigned int olx) 2300 { 2301 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2302 uint32_t csum; 2303 uint8_t *psrc, *pdst; 2304 2305 /* 2306 * Calculate and set check sum flags first, dword field 2307 * in segment may be shared with Software Parser flags. 2308 */ 2309 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2310 es->flags = rte_cpu_to_le_32(csum); 2311 /* 2312 * Calculate and set Software Parser offsets and flags. 2313 * These flags a set for custom UDP and IP tunnel packets. 2314 */ 2315 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2316 /* Fill metadata field if needed. */ 2317 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2318 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2319 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2320 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2321 (sizeof(uint16_t) + 2322 sizeof(rte_v128u32_t)), 2323 "invalid Ethernet Segment data size"); 2324 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2325 (sizeof(uint16_t) + 2326 sizeof(struct rte_vlan_hdr) + 2327 2 * RTE_ETHER_ADDR_LEN), 2328 "invalid Ethernet Segment data size"); 2329 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2330 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 2331 es->inline_data = *(unaligned_uint16_t *)psrc; 2332 psrc += sizeof(uint16_t); 2333 pdst = (uint8_t *)(es + 1); 2334 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2335 /* Implement VLAN tag insertion as part inline data. */ 2336 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2337 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2338 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2339 /* Insert VLAN ethertype + VLAN tag. */ 2340 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2341 ((RTE_ETHER_TYPE_VLAN << 16) | 2342 loc->mbuf->vlan_tci); 2343 pdst += sizeof(struct rte_vlan_hdr); 2344 /* Copy the rest two bytes from packet data. */ 2345 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2346 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2347 } else { 2348 /* Fill the gap in the title WQEBB with inline data. */ 2349 rte_mov16(pdst, psrc); 2350 } 2351 } 2352 2353 /** 2354 * Build the Ethernet Segment with entire packet 2355 * data inlining. Checks the boundary of WQEBB and 2356 * ring buffer wrapping, supports Software Parser, 2357 * Checksums and VLAN insertion Tx offload features. 2358 * 2359 * @param txq 2360 * Pointer to TX queue structure. 2361 * @param loc 2362 * Pointer to burst routine local context. 2363 * @param wqe 2364 * Pointer to WQE to fill with built Ethernet Segment. 2365 * @param vlan 2366 * Length of VLAN tag insertion if any. 2367 * @param inlen 2368 * Length of data to inline (VLAN included, if any). 2369 * @param tso 2370 * TSO flag, set mss field from the packet. 2371 * @param olx 2372 * Configured Tx offloads mask. It is fully defined at 2373 * compile time and may be used for optimization. 2374 * 2375 * @return 2376 * Pointer to the next Data Segment (aligned and wrapped around). 2377 */ 2378 static __rte_always_inline struct mlx5_wqe_dseg * 2379 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 2380 struct mlx5_txq_local *__rte_restrict loc, 2381 struct mlx5_wqe *__rte_restrict wqe, 2382 unsigned int vlan, 2383 unsigned int inlen, 2384 unsigned int tso, 2385 unsigned int olx) 2386 { 2387 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2388 uint32_t csum; 2389 uint8_t *psrc, *pdst; 2390 unsigned int part; 2391 2392 /* 2393 * Calculate and set check sum flags first, dword field 2394 * in segment may be shared with Software Parser flags. 2395 */ 2396 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2397 if (tso) { 2398 csum <<= 24; 2399 csum |= loc->mbuf->tso_segsz; 2400 es->flags = rte_cpu_to_be_32(csum); 2401 } else { 2402 es->flags = rte_cpu_to_le_32(csum); 2403 } 2404 /* 2405 * Calculate and set Software Parser offsets and flags. 2406 * These flags a set for custom UDP and IP tunnel packets. 2407 */ 2408 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2409 /* Fill metadata field if needed. */ 2410 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2411 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2412 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2413 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2414 (sizeof(uint16_t) + 2415 sizeof(rte_v128u32_t)), 2416 "invalid Ethernet Segment data size"); 2417 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2418 (sizeof(uint16_t) + 2419 sizeof(struct rte_vlan_hdr) + 2420 2 * RTE_ETHER_ADDR_LEN), 2421 "invalid Ethernet Segment data size"); 2422 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2423 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 2424 es->inline_data = *(unaligned_uint16_t *)psrc; 2425 psrc += sizeof(uint16_t); 2426 pdst = (uint8_t *)(es + 1); 2427 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2428 /* Implement VLAN tag insertion as part inline data. */ 2429 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 2430 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2431 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 2432 /* Insert VLAN ethertype + VLAN tag. */ 2433 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2434 ((RTE_ETHER_TYPE_VLAN << 16) | 2435 loc->mbuf->vlan_tci); 2436 pdst += sizeof(struct rte_vlan_hdr); 2437 /* Copy the rest two bytes from packet data. */ 2438 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 2439 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 2440 psrc += sizeof(uint16_t); 2441 } else { 2442 /* Fill the gap in the title WQEBB with inline data. */ 2443 rte_mov16(pdst, psrc); 2444 psrc += sizeof(rte_v128u32_t); 2445 } 2446 pdst = (uint8_t *)(es + 2); 2447 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2448 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2449 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 2450 if (!inlen) { 2451 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2452 return (struct mlx5_wqe_dseg *)pdst; 2453 } 2454 /* 2455 * The WQEBB space availability is checked by caller. 2456 * Here we should be aware of WQE ring buffer wraparound only. 2457 */ 2458 part = (uint8_t *)txq->wqes_end - pdst; 2459 part = RTE_MIN(part, inlen); 2460 do { 2461 rte_memcpy(pdst, psrc, part); 2462 inlen -= part; 2463 if (likely(!inlen)) { 2464 /* 2465 * If return value is not used by the caller 2466 * the code below will be optimized out. 2467 */ 2468 pdst += part; 2469 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2470 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2471 pdst = (uint8_t *)txq->wqes; 2472 return (struct mlx5_wqe_dseg *)pdst; 2473 } 2474 pdst = (uint8_t *)txq->wqes; 2475 psrc += part; 2476 part = inlen; 2477 } while (true); 2478 } 2479 2480 /** 2481 * Copy data from chain of mbuf to the specified linear buffer. 2482 * Checksums and VLAN insertion Tx offload features. If data 2483 * from some mbuf copied completely this mbuf is freed. Local 2484 * structure is used to keep the byte stream state. 2485 * 2486 * @param pdst 2487 * Pointer to the destination linear buffer. 2488 * @param loc 2489 * Pointer to burst routine local context. 2490 * @param len 2491 * Length of data to be copied. 2492 * @param must 2493 * Length of data to be copied ignoring no inline hint. 2494 * @param olx 2495 * Configured Tx offloads mask. It is fully defined at 2496 * compile time and may be used for optimization. 2497 * 2498 * @return 2499 * Number of actual copied data bytes. This is always greater than or 2500 * equal to must parameter and might be lesser than len in no inline 2501 * hint flag is encountered. 2502 */ 2503 static __rte_always_inline unsigned int 2504 mlx5_tx_mseg_memcpy(uint8_t *pdst, 2505 struct mlx5_txq_local *__rte_restrict loc, 2506 unsigned int len, 2507 unsigned int must, 2508 unsigned int olx __rte_unused) 2509 { 2510 struct rte_mbuf *mbuf; 2511 unsigned int part, dlen, copy = 0; 2512 uint8_t *psrc; 2513 2514 MLX5_ASSERT(len); 2515 MLX5_ASSERT(must <= len); 2516 do { 2517 /* Allow zero length packets, must check first. */ 2518 dlen = rte_pktmbuf_data_len(loc->mbuf); 2519 if (dlen <= loc->mbuf_off) { 2520 /* Exhausted packet, just free. */ 2521 mbuf = loc->mbuf; 2522 loc->mbuf = mbuf->next; 2523 rte_pktmbuf_free_seg(mbuf); 2524 loc->mbuf_off = 0; 2525 MLX5_ASSERT(loc->mbuf_nseg > 1); 2526 MLX5_ASSERT(loc->mbuf); 2527 --loc->mbuf_nseg; 2528 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2529 unsigned int diff; 2530 2531 if (copy >= must) { 2532 /* 2533 * We already copied the minimal 2534 * requested amount of data. 2535 */ 2536 return copy; 2537 } 2538 diff = must - copy; 2539 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 2540 /* 2541 * Copy only the minimal required 2542 * part of the data buffer. 2543 */ 2544 len = diff; 2545 } 2546 } 2547 continue; 2548 } 2549 dlen -= loc->mbuf_off; 2550 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 2551 loc->mbuf_off); 2552 part = RTE_MIN(len, dlen); 2553 rte_memcpy(pdst, psrc, part); 2554 copy += part; 2555 loc->mbuf_off += part; 2556 len -= part; 2557 if (!len) { 2558 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 2559 loc->mbuf_off = 0; 2560 /* Exhausted packet, just free. */ 2561 mbuf = loc->mbuf; 2562 loc->mbuf = mbuf->next; 2563 rte_pktmbuf_free_seg(mbuf); 2564 loc->mbuf_off = 0; 2565 MLX5_ASSERT(loc->mbuf_nseg >= 1); 2566 --loc->mbuf_nseg; 2567 } 2568 return copy; 2569 } 2570 pdst += part; 2571 } while (true); 2572 } 2573 2574 /** 2575 * Build the Ethernet Segment with inlined data from 2576 * multi-segment packet. Checks the boundary of WQEBB 2577 * and ring buffer wrapping, supports Software Parser, 2578 * Checksums and VLAN insertion Tx offload features. 2579 * 2580 * @param txq 2581 * Pointer to TX queue structure. 2582 * @param loc 2583 * Pointer to burst routine local context. 2584 * @param wqe 2585 * Pointer to WQE to fill with built Ethernet Segment. 2586 * @param vlan 2587 * Length of VLAN tag insertion if any. 2588 * @param inlen 2589 * Length of data to inline (VLAN included, if any). 2590 * @param tso 2591 * TSO flag, set mss field from the packet. 2592 * @param olx 2593 * Configured Tx offloads mask. It is fully defined at 2594 * compile time and may be used for optimization. 2595 * 2596 * @return 2597 * Pointer to the next Data Segment (aligned and 2598 * possible NOT wrapped around - caller should do 2599 * wrapping check on its own). 2600 */ 2601 static __rte_always_inline struct mlx5_wqe_dseg * 2602 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 2603 struct mlx5_txq_local *__rte_restrict loc, 2604 struct mlx5_wqe *__rte_restrict wqe, 2605 unsigned int vlan, 2606 unsigned int inlen, 2607 unsigned int tso, 2608 unsigned int olx) 2609 { 2610 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 2611 uint32_t csum; 2612 uint8_t *pdst; 2613 unsigned int part, tlen = 0; 2614 2615 /* 2616 * Calculate and set check sum flags first, uint32_t field 2617 * in segment may be shared with Software Parser flags. 2618 */ 2619 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 2620 if (tso) { 2621 csum <<= 24; 2622 csum |= loc->mbuf->tso_segsz; 2623 es->flags = rte_cpu_to_be_32(csum); 2624 } else { 2625 es->flags = rte_cpu_to_le_32(csum); 2626 } 2627 /* 2628 * Calculate and set Software Parser offsets and flags. 2629 * These flags a set for custom UDP and IP tunnel packets. 2630 */ 2631 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 2632 /* Fill metadata field if needed. */ 2633 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 2634 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2635 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0 : 0; 2636 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2637 (sizeof(uint16_t) + 2638 sizeof(rte_v128u32_t)), 2639 "invalid Ethernet Segment data size"); 2640 static_assert(MLX5_ESEG_MIN_INLINE_SIZE == 2641 (sizeof(uint16_t) + 2642 sizeof(struct rte_vlan_hdr) + 2643 2 * RTE_ETHER_ADDR_LEN), 2644 "invalid Ethernet Segment data size"); 2645 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 2646 pdst = (uint8_t *)&es->inline_data; 2647 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 2648 /* Implement VLAN tag insertion as part inline data. */ 2649 mlx5_tx_mseg_memcpy(pdst, loc, 2650 2 * RTE_ETHER_ADDR_LEN, 2651 2 * RTE_ETHER_ADDR_LEN, olx); 2652 pdst += 2 * RTE_ETHER_ADDR_LEN; 2653 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 2654 ((RTE_ETHER_TYPE_VLAN << 16) | 2655 loc->mbuf->vlan_tci); 2656 pdst += sizeof(struct rte_vlan_hdr); 2657 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 2658 } 2659 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 2660 /* 2661 * The WQEBB space availability is checked by caller. 2662 * Here we should be aware of WQE ring buffer wraparound only. 2663 */ 2664 part = (uint8_t *)txq->wqes_end - pdst; 2665 part = RTE_MIN(part, inlen - tlen); 2666 MLX5_ASSERT(part); 2667 do { 2668 unsigned int copy; 2669 2670 /* 2671 * Copying may be interrupted inside the routine 2672 * if run into no inline hint flag. 2673 */ 2674 copy = tlen >= txq->inlen_mode ? 0 : (txq->inlen_mode - tlen); 2675 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 2676 tlen += copy; 2677 if (likely(inlen <= tlen) || copy < part) { 2678 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 2679 pdst += copy; 2680 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2681 return (struct mlx5_wqe_dseg *)pdst; 2682 } 2683 pdst = (uint8_t *)txq->wqes; 2684 part = inlen - tlen; 2685 } while (true); 2686 } 2687 2688 /** 2689 * Build the Data Segment of pointer type. 2690 * 2691 * @param txq 2692 * Pointer to TX queue structure. 2693 * @param loc 2694 * Pointer to burst routine local context. 2695 * @param dseg 2696 * Pointer to WQE to fill with built Data Segment. 2697 * @param buf 2698 * Data buffer to point. 2699 * @param len 2700 * Data buffer length. 2701 * @param olx 2702 * Configured Tx offloads mask. It is fully defined at 2703 * compile time and may be used for optimization. 2704 */ 2705 static __rte_always_inline void 2706 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 2707 struct mlx5_txq_local *__rte_restrict loc, 2708 struct mlx5_wqe_dseg *__rte_restrict dseg, 2709 uint8_t *buf, 2710 unsigned int len, 2711 unsigned int olx __rte_unused) 2712 2713 { 2714 MLX5_ASSERT(len); 2715 dseg->bcount = rte_cpu_to_be_32(len); 2716 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2717 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2718 } 2719 2720 /** 2721 * Build the Data Segment of pointer type or inline 2722 * if data length is less than buffer in minimal 2723 * Data Segment size. 2724 * 2725 * @param txq 2726 * Pointer to TX queue structure. 2727 * @param loc 2728 * Pointer to burst routine local context. 2729 * @param dseg 2730 * Pointer to WQE to fill with built Data Segment. 2731 * @param buf 2732 * Data buffer to point. 2733 * @param len 2734 * Data buffer length. 2735 * @param olx 2736 * Configured Tx offloads mask. It is fully defined at 2737 * compile time and may be used for optimization. 2738 */ 2739 static __rte_always_inline void 2740 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 2741 struct mlx5_txq_local *__rte_restrict loc, 2742 struct mlx5_wqe_dseg *__rte_restrict dseg, 2743 uint8_t *buf, 2744 unsigned int len, 2745 unsigned int olx __rte_unused) 2746 2747 { 2748 uintptr_t dst, src; 2749 2750 MLX5_ASSERT(len); 2751 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 2752 dseg->bcount = rte_cpu_to_be_32(len); 2753 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 2754 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 2755 2756 return; 2757 } 2758 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2759 /* Unrolled implementation of generic rte_memcpy. */ 2760 dst = (uintptr_t)&dseg->inline_data[0]; 2761 src = (uintptr_t)buf; 2762 if (len & 0x08) { 2763 #ifdef RTE_ARCH_STRICT_ALIGN 2764 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 2765 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2766 dst += sizeof(uint32_t); 2767 src += sizeof(uint32_t); 2768 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2769 dst += sizeof(uint32_t); 2770 src += sizeof(uint32_t); 2771 #else 2772 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 2773 dst += sizeof(uint64_t); 2774 src += sizeof(uint64_t); 2775 #endif 2776 } 2777 if (len & 0x04) { 2778 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 2779 dst += sizeof(uint32_t); 2780 src += sizeof(uint32_t); 2781 } 2782 if (len & 0x02) { 2783 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 2784 dst += sizeof(uint16_t); 2785 src += sizeof(uint16_t); 2786 } 2787 if (len & 0x01) 2788 *(uint8_t *)dst = *(uint8_t *)src; 2789 } 2790 2791 /** 2792 * Build the Data Segment of inlined data from single 2793 * segment packet, no VLAN insertion. 2794 * 2795 * @param txq 2796 * Pointer to TX queue structure. 2797 * @param loc 2798 * Pointer to burst routine local context. 2799 * @param dseg 2800 * Pointer to WQE to fill with built Data Segment. 2801 * @param buf 2802 * Data buffer to point. 2803 * @param len 2804 * Data buffer length. 2805 * @param olx 2806 * Configured Tx offloads mask. It is fully defined at 2807 * compile time and may be used for optimization. 2808 * 2809 * @return 2810 * Pointer to the next Data Segment after inlined data. 2811 * Ring buffer wraparound check is needed. We do not 2812 * do it here because it may not be needed for the 2813 * last packet in the eMPW session. 2814 */ 2815 static __rte_always_inline struct mlx5_wqe_dseg * 2816 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 2817 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2818 struct mlx5_wqe_dseg *__rte_restrict dseg, 2819 uint8_t *buf, 2820 unsigned int len, 2821 unsigned int olx __rte_unused) 2822 { 2823 unsigned int part; 2824 uint8_t *pdst; 2825 2826 if (!MLX5_TXOFF_CONFIG(MPW)) { 2827 /* Store the descriptor byte counter for eMPW sessions. */ 2828 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 2829 pdst = &dseg->inline_data[0]; 2830 } else { 2831 /* The entire legacy MPW session counter is stored on close. */ 2832 pdst = (uint8_t *)dseg; 2833 } 2834 /* 2835 * The WQEBB space availability is checked by caller. 2836 * Here we should be aware of WQE ring buffer wraparound only. 2837 */ 2838 part = (uint8_t *)txq->wqes_end - pdst; 2839 part = RTE_MIN(part, len); 2840 do { 2841 rte_memcpy(pdst, buf, part); 2842 len -= part; 2843 if (likely(!len)) { 2844 pdst += part; 2845 if (!MLX5_TXOFF_CONFIG(MPW)) 2846 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2847 /* Note: no final wraparound check here. */ 2848 return (struct mlx5_wqe_dseg *)pdst; 2849 } 2850 pdst = (uint8_t *)txq->wqes; 2851 buf += part; 2852 part = len; 2853 } while (true); 2854 } 2855 2856 /** 2857 * Build the Data Segment of inlined data from single 2858 * segment packet with VLAN insertion. 2859 * 2860 * @param txq 2861 * Pointer to TX queue structure. 2862 * @param loc 2863 * Pointer to burst routine local context. 2864 * @param dseg 2865 * Pointer to the dseg fill with built Data Segment. 2866 * @param buf 2867 * Data buffer to point. 2868 * @param len 2869 * Data buffer length. 2870 * @param olx 2871 * Configured Tx offloads mask. It is fully defined at 2872 * compile time and may be used for optimization. 2873 * 2874 * @return 2875 * Pointer to the next Data Segment after inlined data. 2876 * Ring buffer wraparound check is needed. 2877 */ 2878 static __rte_always_inline struct mlx5_wqe_dseg * 2879 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 2880 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 2881 struct mlx5_wqe_dseg *__rte_restrict dseg, 2882 uint8_t *buf, 2883 unsigned int len, 2884 unsigned int olx __rte_unused) 2885 2886 { 2887 unsigned int part; 2888 uint8_t *pdst; 2889 2890 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 2891 static_assert(MLX5_DSEG_MIN_INLINE_SIZE == 2892 (2 * RTE_ETHER_ADDR_LEN), 2893 "invalid Data Segment data size"); 2894 if (!MLX5_TXOFF_CONFIG(MPW)) { 2895 /* Store the descriptor byte counter for eMPW sessions. */ 2896 dseg->bcount = rte_cpu_to_be_32 2897 ((len + sizeof(struct rte_vlan_hdr)) | 2898 MLX5_ETH_WQE_DATA_INLINE); 2899 pdst = &dseg->inline_data[0]; 2900 } else { 2901 /* The entire legacy MPW session counter is stored on close. */ 2902 pdst = (uint8_t *)dseg; 2903 } 2904 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 2905 buf += MLX5_DSEG_MIN_INLINE_SIZE; 2906 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 2907 len -= MLX5_DSEG_MIN_INLINE_SIZE; 2908 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 2909 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 2910 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 2911 pdst = (uint8_t *)txq->wqes; 2912 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 2913 loc->mbuf->vlan_tci); 2914 pdst += sizeof(struct rte_vlan_hdr); 2915 /* 2916 * The WQEBB space availability is checked by caller. 2917 * Here we should be aware of WQE ring buffer wraparound only. 2918 */ 2919 part = (uint8_t *)txq->wqes_end - pdst; 2920 part = RTE_MIN(part, len); 2921 do { 2922 rte_memcpy(pdst, buf, part); 2923 len -= part; 2924 if (likely(!len)) { 2925 pdst += part; 2926 if (!MLX5_TXOFF_CONFIG(MPW)) 2927 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 2928 /* Note: no final wraparound check here. */ 2929 return (struct mlx5_wqe_dseg *)pdst; 2930 } 2931 pdst = (uint8_t *)txq->wqes; 2932 buf += part; 2933 part = len; 2934 } while (true); 2935 } 2936 2937 /** 2938 * Build the Ethernet Segment with optionally inlined data with 2939 * VLAN insertion and following Data Segments (if any) from 2940 * multi-segment packet. Used by ordinary send and TSO. 2941 * 2942 * @param txq 2943 * Pointer to TX queue structure. 2944 * @param loc 2945 * Pointer to burst routine local context. 2946 * @param wqe 2947 * Pointer to WQE to fill with built Ethernet/Data Segments. 2948 * @param vlan 2949 * Length of VLAN header to insert, 0 means no VLAN insertion. 2950 * @param inlen 2951 * Data length to inline. For TSO this parameter specifies 2952 * exact value, for ordinary send routine can be aligned by 2953 * caller to provide better WQE space saving and data buffer 2954 * start address alignment. This length includes VLAN header 2955 * being inserted. 2956 * @param tso 2957 * Zero means ordinary send, inlined data can be extended, 2958 * otherwise this is TSO, inlined data length is fixed. 2959 * @param olx 2960 * Configured Tx offloads mask. It is fully defined at 2961 * compile time and may be used for optimization. 2962 * 2963 * @return 2964 * Actual size of built WQE in segments. 2965 */ 2966 static __rte_always_inline unsigned int 2967 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 2968 struct mlx5_txq_local *__rte_restrict loc, 2969 struct mlx5_wqe *__rte_restrict wqe, 2970 unsigned int vlan, 2971 unsigned int inlen, 2972 unsigned int tso, 2973 unsigned int olx __rte_unused) 2974 { 2975 struct mlx5_wqe_dseg *__rte_restrict dseg; 2976 unsigned int ds; 2977 2978 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 2979 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 2980 loc->mbuf_off = 0; 2981 2982 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 2983 if (!loc->mbuf_nseg) 2984 goto dseg_done; 2985 /* 2986 * There are still some mbuf remaining, not inlined. 2987 * The first mbuf may be partially inlined and we 2988 * must process the possible non-zero data offset. 2989 */ 2990 if (loc->mbuf_off) { 2991 unsigned int dlen; 2992 uint8_t *dptr; 2993 2994 /* 2995 * Exhausted packets must be dropped before. 2996 * Non-zero offset means there are some data 2997 * remained in the packet. 2998 */ 2999 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 3000 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 3001 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 3002 loc->mbuf_off); 3003 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 3004 /* 3005 * Build the pointer/minimal data Data Segment. 3006 * Do ring buffer wrapping check in advance. 3007 */ 3008 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3009 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3010 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 3011 /* Store the mbuf to be freed on completion. */ 3012 MLX5_ASSERT(loc->elts_free); 3013 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3014 --loc->elts_free; 3015 ++dseg; 3016 if (--loc->mbuf_nseg == 0) 3017 goto dseg_done; 3018 loc->mbuf = loc->mbuf->next; 3019 loc->mbuf_off = 0; 3020 } 3021 do { 3022 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3023 struct rte_mbuf *mbuf; 3024 3025 /* Zero length segment found, just skip. */ 3026 mbuf = loc->mbuf; 3027 loc->mbuf = loc->mbuf->next; 3028 rte_pktmbuf_free_seg(mbuf); 3029 if (--loc->mbuf_nseg == 0) 3030 break; 3031 } else { 3032 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3033 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3034 mlx5_tx_dseg_iptr 3035 (txq, loc, dseg, 3036 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3037 rte_pktmbuf_data_len(loc->mbuf), olx); 3038 MLX5_ASSERT(loc->elts_free); 3039 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3040 --loc->elts_free; 3041 ++dseg; 3042 if (--loc->mbuf_nseg == 0) 3043 break; 3044 loc->mbuf = loc->mbuf->next; 3045 } 3046 } while (true); 3047 3048 dseg_done: 3049 /* Calculate actual segments used from the dseg pointer. */ 3050 if ((uintptr_t)wqe < (uintptr_t)dseg) 3051 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 3052 else 3053 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 3054 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 3055 return ds; 3056 } 3057 3058 /** 3059 * The routine checks timestamp flag in the current packet, 3060 * and push WAIT WQE into the queue if scheduling is required. 3061 * 3062 * @param txq 3063 * Pointer to TX queue structure. 3064 * @param loc 3065 * Pointer to burst routine local context. 3066 * @param olx 3067 * Configured Tx offloads mask. It is fully defined at 3068 * compile time and may be used for optimization. 3069 * 3070 * @return 3071 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3072 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 3073 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 3074 * Local context variables partially updated. 3075 */ 3076 static __rte_always_inline enum mlx5_txcmp_code 3077 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 3078 struct mlx5_txq_local *restrict loc, 3079 unsigned int olx) 3080 { 3081 if (MLX5_TXOFF_CONFIG(TXPP) && 3082 loc->mbuf->ol_flags & txq->ts_mask) { 3083 struct mlx5_wqe *wqe; 3084 uint64_t ts; 3085 int32_t wci; 3086 3087 /* 3088 * Estimate the required space quickly and roughly. 3089 * We would like to ensure the packet can be pushed 3090 * to the queue and we won't get the orphan WAIT WQE. 3091 */ 3092 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 3093 loc->elts_free < NB_SEGS(loc->mbuf)) 3094 return MLX5_TXCMP_CODE_EXIT; 3095 /* Convert the timestamp into completion to wait. */ 3096 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 3097 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 3098 if (unlikely(wci < 0)) 3099 return MLX5_TXCMP_CODE_SINGLE; 3100 /* Build the WAIT WQE with specified completion. */ 3101 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3102 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 3103 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 3104 ++txq->wqe_ci; 3105 --loc->wqe_free; 3106 return MLX5_TXCMP_CODE_MULTI; 3107 } 3108 return MLX5_TXCMP_CODE_SINGLE; 3109 } 3110 3111 /** 3112 * Tx one packet function for multi-segment TSO. Supports all 3113 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 3114 * sends one packet per WQE. 3115 * 3116 * This routine is responsible for storing processed mbuf 3117 * into elts ring buffer and update elts_head. 3118 * 3119 * @param txq 3120 * Pointer to TX queue structure. 3121 * @param loc 3122 * Pointer to burst routine local context. 3123 * @param olx 3124 * Configured Tx offloads mask. It is fully defined at 3125 * compile time and may be used for optimization. 3126 * 3127 * @return 3128 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3129 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3130 * Local context variables partially updated. 3131 */ 3132 static __rte_always_inline enum mlx5_txcmp_code 3133 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 3134 struct mlx5_txq_local *__rte_restrict loc, 3135 unsigned int olx) 3136 { 3137 struct mlx5_wqe *__rte_restrict wqe; 3138 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 3139 3140 if (MLX5_TXOFF_CONFIG(TXPP)) { 3141 enum mlx5_txcmp_code wret; 3142 3143 /* Generate WAIT for scheduling if requested. */ 3144 wret = mlx5_tx_schedule_send(txq, loc, olx); 3145 if (wret == MLX5_TXCMP_CODE_EXIT) 3146 return MLX5_TXCMP_CODE_EXIT; 3147 if (wret == MLX5_TXCMP_CODE_ERROR) 3148 return MLX5_TXCMP_CODE_ERROR; 3149 } 3150 /* 3151 * Calculate data length to be inlined to estimate 3152 * the required space in WQE ring buffer. 3153 */ 3154 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3155 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3156 vlan = sizeof(struct rte_vlan_hdr); 3157 inlen = loc->mbuf->l2_len + vlan + 3158 loc->mbuf->l3_len + loc->mbuf->l4_len; 3159 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 3160 return MLX5_TXCMP_CODE_ERROR; 3161 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3162 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 3163 /* Packet must contain all TSO headers. */ 3164 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 3165 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3166 inlen > (dlen + vlan))) 3167 return MLX5_TXCMP_CODE_ERROR; 3168 MLX5_ASSERT(inlen >= txq->inlen_mode); 3169 /* 3170 * Check whether there are enough free WQEBBs: 3171 * - Control Segment 3172 * - Ethernet Segment 3173 * - First Segment of inlined Ethernet data 3174 * - ... data continued ... 3175 * - Data Segments of pointer/min inline type 3176 */ 3177 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3178 MLX5_ESEG_MIN_INLINE_SIZE + 3179 MLX5_WSEG_SIZE + 3180 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3181 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3182 return MLX5_TXCMP_CODE_EXIT; 3183 /* Check for maximal WQE size. */ 3184 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3185 return MLX5_TXCMP_CODE_ERROR; 3186 #ifdef MLX5_PMD_SOFT_COUNTERS 3187 /* Update sent data bytes/packets counters. */ 3188 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 3189 loc->mbuf->tso_segsz; 3190 /* 3191 * One will be added for mbuf itself 3192 * at the end of the mlx5_tx_burst from 3193 * loc->pkts_sent field. 3194 */ 3195 --ntcp; 3196 txq->stats.opackets += ntcp; 3197 txq->stats.obytes += dlen + vlan + ntcp * inlen; 3198 #endif 3199 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3200 loc->wqe_last = wqe; 3201 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 3202 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 3203 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3204 txq->wqe_ci += (ds + 3) / 4; 3205 loc->wqe_free -= (ds + 3) / 4; 3206 return MLX5_TXCMP_CODE_MULTI; 3207 } 3208 3209 /** 3210 * Tx one packet function for multi-segment SEND. Supports all 3211 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3212 * sends one packet per WQE, without any data inlining in 3213 * Ethernet Segment. 3214 * 3215 * This routine is responsible for storing processed mbuf 3216 * into elts ring buffer and update elts_head. 3217 * 3218 * @param txq 3219 * Pointer to TX queue structure. 3220 * @param loc 3221 * Pointer to burst routine local context. 3222 * @param olx 3223 * Configured Tx offloads mask. It is fully defined at 3224 * compile time and may be used for optimization. 3225 * 3226 * @return 3227 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3228 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3229 * Local context variables partially updated. 3230 */ 3231 static __rte_always_inline enum mlx5_txcmp_code 3232 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 3233 struct mlx5_txq_local *__rte_restrict loc, 3234 unsigned int olx) 3235 { 3236 struct mlx5_wqe_dseg *__rte_restrict dseg; 3237 struct mlx5_wqe *__rte_restrict wqe; 3238 unsigned int ds, nseg; 3239 3240 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3241 if (MLX5_TXOFF_CONFIG(TXPP)) { 3242 enum mlx5_txcmp_code wret; 3243 3244 /* Generate WAIT for scheduling if requested. */ 3245 wret = mlx5_tx_schedule_send(txq, loc, olx); 3246 if (wret == MLX5_TXCMP_CODE_EXIT) 3247 return MLX5_TXCMP_CODE_EXIT; 3248 if (wret == MLX5_TXCMP_CODE_ERROR) 3249 return MLX5_TXCMP_CODE_ERROR; 3250 } 3251 /* 3252 * No inline at all, it means the CPU cycles saving 3253 * is prioritized at configuration, we should not 3254 * copy any packet data to WQE. 3255 */ 3256 nseg = NB_SEGS(loc->mbuf); 3257 ds = 2 + nseg; 3258 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3259 return MLX5_TXCMP_CODE_EXIT; 3260 /* Check for maximal WQE size. */ 3261 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3262 return MLX5_TXCMP_CODE_ERROR; 3263 /* 3264 * Some Tx offloads may cause an error if 3265 * packet is not long enough, check against 3266 * assumed minimal length. 3267 */ 3268 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 3269 return MLX5_TXCMP_CODE_ERROR; 3270 #ifdef MLX5_PMD_SOFT_COUNTERS 3271 /* Update sent data bytes counter. */ 3272 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 3273 if (MLX5_TXOFF_CONFIG(VLAN) && 3274 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3275 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 3276 #endif 3277 /* 3278 * SEND WQE, one WQEBB: 3279 * - Control Segment, SEND opcode 3280 * - Ethernet Segment, optional VLAN, no inline 3281 * - Data Segments, pointer only type 3282 */ 3283 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3284 loc->wqe_last = wqe; 3285 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 3286 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3287 dseg = &wqe->dseg[0]; 3288 do { 3289 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 3290 struct rte_mbuf *mbuf; 3291 3292 /* 3293 * Zero length segment found, have to 3294 * correct total size of WQE in segments. 3295 * It is supposed to be rare occasion, so 3296 * in normal case (no zero length segments) 3297 * we avoid extra writing to the Control 3298 * Segment. 3299 */ 3300 --ds; 3301 wqe->cseg.sq_ds -= RTE_BE32(1); 3302 mbuf = loc->mbuf; 3303 loc->mbuf = mbuf->next; 3304 rte_pktmbuf_free_seg(mbuf); 3305 if (--nseg == 0) 3306 break; 3307 } else { 3308 mlx5_tx_dseg_ptr 3309 (txq, loc, dseg, 3310 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3311 rte_pktmbuf_data_len(loc->mbuf), olx); 3312 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3313 --loc->elts_free; 3314 if (--nseg == 0) 3315 break; 3316 ++dseg; 3317 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3318 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3319 loc->mbuf = loc->mbuf->next; 3320 } 3321 } while (true); 3322 txq->wqe_ci += (ds + 3) / 4; 3323 loc->wqe_free -= (ds + 3) / 4; 3324 return MLX5_TXCMP_CODE_MULTI; 3325 } 3326 3327 /** 3328 * Tx one packet function for multi-segment SEND. Supports all 3329 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 3330 * sends one packet per WQE, with data inlining in 3331 * Ethernet Segment and minimal Data Segments. 3332 * 3333 * This routine is responsible for storing processed mbuf 3334 * into elts ring buffer and update elts_head. 3335 * 3336 * @param txq 3337 * Pointer to TX queue structure. 3338 * @param loc 3339 * Pointer to burst routine local context. 3340 * @param olx 3341 * Configured Tx offloads mask. It is fully defined at 3342 * compile time and may be used for optimization. 3343 * 3344 * @return 3345 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3346 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3347 * Local context variables partially updated. 3348 */ 3349 static __rte_always_inline enum mlx5_txcmp_code 3350 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 3351 struct mlx5_txq_local *__rte_restrict loc, 3352 unsigned int olx) 3353 { 3354 struct mlx5_wqe *__rte_restrict wqe; 3355 unsigned int ds, inlen, dlen, vlan = 0; 3356 3357 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3358 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3359 if (MLX5_TXOFF_CONFIG(TXPP)) { 3360 enum mlx5_txcmp_code wret; 3361 3362 /* Generate WAIT for scheduling if requested. */ 3363 wret = mlx5_tx_schedule_send(txq, loc, olx); 3364 if (wret == MLX5_TXCMP_CODE_EXIT) 3365 return MLX5_TXCMP_CODE_EXIT; 3366 if (wret == MLX5_TXCMP_CODE_ERROR) 3367 return MLX5_TXCMP_CODE_ERROR; 3368 } 3369 /* 3370 * First calculate data length to be inlined 3371 * to estimate the required space for WQE. 3372 */ 3373 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 3374 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3375 vlan = sizeof(struct rte_vlan_hdr); 3376 inlen = dlen + vlan; 3377 /* Check against minimal length. */ 3378 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3379 return MLX5_TXCMP_CODE_ERROR; 3380 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 3381 if (inlen > txq->inlen_send || 3382 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 3383 struct rte_mbuf *mbuf; 3384 unsigned int nxlen; 3385 uintptr_t start; 3386 3387 /* 3388 * Packet length exceeds the allowed inline 3389 * data length, check whether the minimal 3390 * inlining is required. 3391 */ 3392 if (txq->inlen_mode) { 3393 MLX5_ASSERT(txq->inlen_mode >= 3394 MLX5_ESEG_MIN_INLINE_SIZE); 3395 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 3396 inlen = txq->inlen_mode; 3397 } else { 3398 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 3399 !vlan || txq->vlan_en) { 3400 /* 3401 * VLAN insertion will be done inside by HW. 3402 * It is not utmost effective - VLAN flag is 3403 * checked twice, but we should proceed the 3404 * inlining length correctly and take into 3405 * account the VLAN header being inserted. 3406 */ 3407 return mlx5_tx_packet_multi_send 3408 (txq, loc, olx); 3409 } 3410 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 3411 } 3412 /* 3413 * Now we know the minimal amount of data is requested 3414 * to inline. Check whether we should inline the buffers 3415 * from the chain beginning to eliminate some mbufs. 3416 */ 3417 mbuf = loc->mbuf; 3418 nxlen = rte_pktmbuf_data_len(mbuf); 3419 if (unlikely(nxlen <= txq->inlen_send)) { 3420 /* We can inline first mbuf at least. */ 3421 if (nxlen < inlen) { 3422 unsigned int smlen; 3423 3424 /* Scan mbufs till inlen filled. */ 3425 do { 3426 smlen = nxlen; 3427 mbuf = NEXT(mbuf); 3428 MLX5_ASSERT(mbuf); 3429 nxlen = rte_pktmbuf_data_len(mbuf); 3430 nxlen += smlen; 3431 } while (unlikely(nxlen < inlen)); 3432 if (unlikely(nxlen > txq->inlen_send)) { 3433 /* We cannot inline entire mbuf. */ 3434 smlen = inlen - smlen; 3435 start = rte_pktmbuf_mtod_offset 3436 (mbuf, uintptr_t, smlen); 3437 goto do_align; 3438 } 3439 } 3440 do { 3441 inlen = nxlen; 3442 mbuf = NEXT(mbuf); 3443 /* There should be not end of packet. */ 3444 MLX5_ASSERT(mbuf); 3445 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 3446 } while (unlikely(nxlen < txq->inlen_send)); 3447 } 3448 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 3449 /* 3450 * Check whether we can do inline to align start 3451 * address of data buffer to cacheline. 3452 */ 3453 do_align: 3454 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 3455 if (unlikely(start)) { 3456 start += inlen; 3457 if (start <= txq->inlen_send) 3458 inlen = start; 3459 } 3460 } 3461 /* 3462 * Check whether there are enough free WQEBBs: 3463 * - Control Segment 3464 * - Ethernet Segment 3465 * - First Segment of inlined Ethernet data 3466 * - ... data continued ... 3467 * - Data Segments of pointer/min inline type 3468 * 3469 * Estimate the number of Data Segments conservatively, 3470 * supposing no any mbufs is being freed during inlining. 3471 */ 3472 MLX5_ASSERT(inlen <= txq->inlen_send); 3473 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 3474 MLX5_ESEG_MIN_INLINE_SIZE + 3475 MLX5_WSEG_SIZE + 3476 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3477 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 3478 return MLX5_TXCMP_CODE_EXIT; 3479 /* Check for maximal WQE size. */ 3480 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 3481 return MLX5_TXCMP_CODE_ERROR; 3482 #ifdef MLX5_PMD_SOFT_COUNTERS 3483 /* Update sent data bytes/packets counters. */ 3484 txq->stats.obytes += dlen + vlan; 3485 #endif 3486 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3487 loc->wqe_last = wqe; 3488 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 3489 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 3490 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3491 txq->wqe_ci += (ds + 3) / 4; 3492 loc->wqe_free -= (ds + 3) / 4; 3493 return MLX5_TXCMP_CODE_MULTI; 3494 } 3495 3496 /** 3497 * Tx burst function for multi-segment packets. Supports all 3498 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 3499 * sends one packet per WQE. Function stops sending if it 3500 * encounters the single-segment packet. 3501 * 3502 * This routine is responsible for storing processed mbuf 3503 * into elts ring buffer and update elts_head. 3504 * 3505 * @param txq 3506 * Pointer to TX queue structure. 3507 * @param[in] pkts 3508 * Packets to transmit. 3509 * @param pkts_n 3510 * Number of packets in array. 3511 * @param loc 3512 * Pointer to burst routine local context. 3513 * @param olx 3514 * Configured Tx offloads mask. It is fully defined at 3515 * compile time and may be used for optimization. 3516 * 3517 * @return 3518 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3519 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3520 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3521 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 3522 * Local context variables updated. 3523 */ 3524 static __rte_always_inline enum mlx5_txcmp_code 3525 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 3526 struct rte_mbuf **__rte_restrict pkts, 3527 unsigned int pkts_n, 3528 struct mlx5_txq_local *__rte_restrict loc, 3529 unsigned int olx) 3530 { 3531 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3532 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3533 pkts += loc->pkts_sent + 1; 3534 pkts_n -= loc->pkts_sent; 3535 for (;;) { 3536 enum mlx5_txcmp_code ret; 3537 3538 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 3539 /* 3540 * Estimate the number of free elts quickly but 3541 * conservatively. Some segment may be fully inlined 3542 * and freed, ignore this here - precise estimation 3543 * is costly. 3544 */ 3545 if (loc->elts_free < NB_SEGS(loc->mbuf)) 3546 return MLX5_TXCMP_CODE_EXIT; 3547 if (MLX5_TXOFF_CONFIG(TSO) && 3548 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3549 /* Proceed with multi-segment TSO. */ 3550 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 3551 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 3552 /* Proceed with multi-segment SEND with inlining. */ 3553 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 3554 } else { 3555 /* Proceed with multi-segment SEND w/o inlining. */ 3556 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 3557 } 3558 if (ret == MLX5_TXCMP_CODE_EXIT) 3559 return MLX5_TXCMP_CODE_EXIT; 3560 if (ret == MLX5_TXCMP_CODE_ERROR) 3561 return MLX5_TXCMP_CODE_ERROR; 3562 /* WQE is built, go to the next packet. */ 3563 ++loc->pkts_sent; 3564 --pkts_n; 3565 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3566 return MLX5_TXCMP_CODE_EXIT; 3567 loc->mbuf = *pkts++; 3568 if (pkts_n > 1) 3569 rte_prefetch0(*pkts); 3570 if (likely(NB_SEGS(loc->mbuf) > 1)) 3571 continue; 3572 /* Here ends the series of multi-segment packets. */ 3573 if (MLX5_TXOFF_CONFIG(TSO) && 3574 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3575 return MLX5_TXCMP_CODE_TSO; 3576 return MLX5_TXCMP_CODE_SINGLE; 3577 } 3578 MLX5_ASSERT(false); 3579 } 3580 3581 /** 3582 * Tx burst function for single-segment packets with TSO. 3583 * Supports all types of Tx offloads, except multi-packets. 3584 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 3585 * Function stops sending if it encounters the multi-segment 3586 * packet or packet without TSO requested. 3587 * 3588 * The routine is responsible for storing processed mbuf 3589 * into elts ring buffer and update elts_head if inline 3590 * offloads is requested due to possible early freeing 3591 * of the inlined mbufs (can not store pkts array in elts 3592 * as a batch). 3593 * 3594 * @param txq 3595 * Pointer to TX queue structure. 3596 * @param[in] pkts 3597 * Packets to transmit. 3598 * @param pkts_n 3599 * Number of packets in array. 3600 * @param loc 3601 * Pointer to burst routine local context. 3602 * @param olx 3603 * Configured Tx offloads mask. It is fully defined at 3604 * compile time and may be used for optimization. 3605 * 3606 * @return 3607 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3608 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3609 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 3610 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3611 * Local context variables updated. 3612 */ 3613 static __rte_always_inline enum mlx5_txcmp_code 3614 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 3615 struct rte_mbuf **__rte_restrict pkts, 3616 unsigned int pkts_n, 3617 struct mlx5_txq_local *__rte_restrict loc, 3618 unsigned int olx) 3619 { 3620 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3621 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3622 pkts += loc->pkts_sent + 1; 3623 pkts_n -= loc->pkts_sent; 3624 for (;;) { 3625 struct mlx5_wqe_dseg *__rte_restrict dseg; 3626 struct mlx5_wqe *__rte_restrict wqe; 3627 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 3628 uint8_t *dptr; 3629 3630 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3631 if (MLX5_TXOFF_CONFIG(TXPP)) { 3632 enum mlx5_txcmp_code wret; 3633 3634 /* Generate WAIT for scheduling if requested. */ 3635 wret = mlx5_tx_schedule_send(txq, loc, olx); 3636 if (wret == MLX5_TXCMP_CODE_EXIT) 3637 return MLX5_TXCMP_CODE_EXIT; 3638 if (wret == MLX5_TXCMP_CODE_ERROR) 3639 return MLX5_TXCMP_CODE_ERROR; 3640 } 3641 dlen = rte_pktmbuf_data_len(loc->mbuf); 3642 if (MLX5_TXOFF_CONFIG(VLAN) && 3643 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3644 vlan = sizeof(struct rte_vlan_hdr); 3645 } 3646 /* 3647 * First calculate the WQE size to check 3648 * whether we have enough space in ring buffer. 3649 */ 3650 hlen = loc->mbuf->l2_len + vlan + 3651 loc->mbuf->l3_len + loc->mbuf->l4_len; 3652 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 3653 return MLX5_TXCMP_CODE_ERROR; 3654 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 3655 hlen += loc->mbuf->outer_l2_len + 3656 loc->mbuf->outer_l3_len; 3657 /* Segment must contain all TSO headers. */ 3658 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 3659 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 3660 hlen > (dlen + vlan))) 3661 return MLX5_TXCMP_CODE_ERROR; 3662 /* 3663 * Check whether there are enough free WQEBBs: 3664 * - Control Segment 3665 * - Ethernet Segment 3666 * - First Segment of inlined Ethernet data 3667 * - ... data continued ... 3668 * - Finishing Data Segment of pointer type 3669 */ 3670 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 3671 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3672 if (loc->wqe_free < ((ds + 3) / 4)) 3673 return MLX5_TXCMP_CODE_EXIT; 3674 #ifdef MLX5_PMD_SOFT_COUNTERS 3675 /* Update sent data bytes/packets counters. */ 3676 ntcp = (dlen + vlan - hlen + 3677 loc->mbuf->tso_segsz - 1) / 3678 loc->mbuf->tso_segsz; 3679 /* 3680 * One will be added for mbuf itself at the end 3681 * of the mlx5_tx_burst from loc->pkts_sent field. 3682 */ 3683 --ntcp; 3684 txq->stats.opackets += ntcp; 3685 txq->stats.obytes += dlen + vlan + ntcp * hlen; 3686 #endif 3687 /* 3688 * Build the TSO WQE: 3689 * - Control Segment 3690 * - Ethernet Segment with hlen bytes inlined 3691 * - Data Segment of pointer type 3692 */ 3693 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3694 loc->wqe_last = wqe; 3695 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3696 MLX5_OPCODE_TSO, olx); 3697 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 3698 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 3699 dlen -= hlen - vlan; 3700 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3701 /* 3702 * WQE is built, update the loop parameters 3703 * and go to the next packet. 3704 */ 3705 txq->wqe_ci += (ds + 3) / 4; 3706 loc->wqe_free -= (ds + 3) / 4; 3707 if (MLX5_TXOFF_CONFIG(INLINE)) 3708 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3709 --loc->elts_free; 3710 ++loc->pkts_sent; 3711 --pkts_n; 3712 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3713 return MLX5_TXCMP_CODE_EXIT; 3714 loc->mbuf = *pkts++; 3715 if (pkts_n > 1) 3716 rte_prefetch0(*pkts); 3717 if (MLX5_TXOFF_CONFIG(MULTI) && 3718 unlikely(NB_SEGS(loc->mbuf) > 1)) 3719 return MLX5_TXCMP_CODE_MULTI; 3720 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 3721 return MLX5_TXCMP_CODE_SINGLE; 3722 /* Continue with the next TSO packet. */ 3723 } 3724 MLX5_ASSERT(false); 3725 } 3726 3727 /** 3728 * Analyze the packet and select the best method to send. 3729 * 3730 * @param txq 3731 * Pointer to TX queue structure. 3732 * @param loc 3733 * Pointer to burst routine local context. 3734 * @param olx 3735 * Configured Tx offloads mask. It is fully defined at 3736 * compile time and may be used for optimization. 3737 * @param newp 3738 * The predefined flag whether do complete check for 3739 * multi-segment packets and TSO. 3740 * 3741 * @return 3742 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3743 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 3744 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 3745 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 3746 */ 3747 static __rte_always_inline enum mlx5_txcmp_code 3748 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 3749 struct mlx5_txq_local *__rte_restrict loc, 3750 unsigned int olx, 3751 bool newp) 3752 { 3753 /* Check for multi-segment packet. */ 3754 if (newp && 3755 MLX5_TXOFF_CONFIG(MULTI) && 3756 unlikely(NB_SEGS(loc->mbuf) > 1)) 3757 return MLX5_TXCMP_CODE_MULTI; 3758 /* Check for TSO packet. */ 3759 if (newp && 3760 MLX5_TXOFF_CONFIG(TSO) && 3761 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 3762 return MLX5_TXCMP_CODE_TSO; 3763 /* Check if eMPW is enabled at all. */ 3764 if (!MLX5_TXOFF_CONFIG(EMPW)) 3765 return MLX5_TXCMP_CODE_SINGLE; 3766 /* Check if eMPW can be engaged. */ 3767 if (MLX5_TXOFF_CONFIG(VLAN) && 3768 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 3769 (!MLX5_TXOFF_CONFIG(INLINE) || 3770 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 3771 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 3772 /* 3773 * eMPW does not support VLAN insertion offload, 3774 * we have to inline the entire packet but 3775 * packet is too long for inlining. 3776 */ 3777 return MLX5_TXCMP_CODE_SINGLE; 3778 } 3779 return MLX5_TXCMP_CODE_EMPW; 3780 } 3781 3782 /** 3783 * Check the next packet attributes to match with the eMPW batch ones. 3784 * In addition, for legacy MPW the packet length is checked either. 3785 * 3786 * @param txq 3787 * Pointer to TX queue structure. 3788 * @param es 3789 * Pointer to Ethernet Segment of eMPW batch. 3790 * @param loc 3791 * Pointer to burst routine local context. 3792 * @param dlen 3793 * Length of previous packet in MPW descriptor. 3794 * @param olx 3795 * Configured Tx offloads mask. It is fully defined at 3796 * compile time and may be used for optimization. 3797 * 3798 * @return 3799 * true - packet match with eMPW batch attributes. 3800 * false - no match, eMPW should be restarted. 3801 */ 3802 static __rte_always_inline bool 3803 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 3804 struct mlx5_wqe_eseg *__rte_restrict es, 3805 struct mlx5_txq_local *__rte_restrict loc, 3806 uint32_t dlen, 3807 unsigned int olx) 3808 { 3809 uint8_t swp_flags = 0; 3810 3811 /* Compare the checksum flags, if any. */ 3812 if (MLX5_TXOFF_CONFIG(CSUM) && 3813 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 3814 return false; 3815 /* Compare the Software Parser offsets and flags. */ 3816 if (MLX5_TXOFF_CONFIG(SWP) && 3817 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 3818 es->swp_flags != swp_flags)) 3819 return false; 3820 /* Fill metadata field if needed. */ 3821 if (MLX5_TXOFF_CONFIG(METADATA) && 3822 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 3823 *RTE_FLOW_DYNF_METADATA(loc->mbuf) : 0)) 3824 return false; 3825 /* Legacy MPW can send packets with the same lengt only. */ 3826 if (MLX5_TXOFF_CONFIG(MPW) && 3827 dlen != rte_pktmbuf_data_len(loc->mbuf)) 3828 return false; 3829 /* There must be no VLAN packets in eMPW loop. */ 3830 if (MLX5_TXOFF_CONFIG(VLAN)) 3831 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 3832 /* Check if the scheduling is requested. */ 3833 if (MLX5_TXOFF_CONFIG(TXPP) && 3834 loc->mbuf->ol_flags & txq->ts_mask) 3835 return false; 3836 return true; 3837 } 3838 3839 /* 3840 * Update send loop variables and WQE for eMPW loop 3841 * without data inlining. Number of Data Segments is 3842 * equal to the number of sent packets. 3843 * 3844 * @param txq 3845 * Pointer to TX queue structure. 3846 * @param loc 3847 * Pointer to burst routine local context. 3848 * @param ds 3849 * Number of packets/Data Segments/Packets. 3850 * @param slen 3851 * Accumulated statistics, bytes sent 3852 * @param olx 3853 * Configured Tx offloads mask. It is fully defined at 3854 * compile time and may be used for optimization. 3855 * 3856 * @return 3857 * true - packet match with eMPW batch attributes. 3858 * false - no match, eMPW should be restarted. 3859 */ 3860 static __rte_always_inline void 3861 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 3862 struct mlx5_txq_local *__rte_restrict loc, 3863 unsigned int ds, 3864 unsigned int slen, 3865 unsigned int olx __rte_unused) 3866 { 3867 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3868 #ifdef MLX5_PMD_SOFT_COUNTERS 3869 /* Update sent data bytes counter. */ 3870 txq->stats.obytes += slen; 3871 #else 3872 (void)slen; 3873 #endif 3874 loc->elts_free -= ds; 3875 loc->pkts_sent += ds; 3876 ds += 2; 3877 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 3878 txq->wqe_ci += (ds + 3) / 4; 3879 loc->wqe_free -= (ds + 3) / 4; 3880 } 3881 3882 /* 3883 * Update send loop variables and WQE for eMPW loop 3884 * with data inlining. Gets the size of pushed descriptors 3885 * and data to the WQE. 3886 * 3887 * @param txq 3888 * Pointer to TX queue structure. 3889 * @param loc 3890 * Pointer to burst routine local context. 3891 * @param len 3892 * Total size of descriptor/data in bytes. 3893 * @param slen 3894 * Accumulated statistics, data bytes sent. 3895 * @param wqem 3896 * The base WQE for the eMPW/MPW descriptor. 3897 * @param olx 3898 * Configured Tx offloads mask. It is fully defined at 3899 * compile time and may be used for optimization. 3900 * 3901 * @return 3902 * true - packet match with eMPW batch attributes. 3903 * false - no match, eMPW should be restarted. 3904 */ 3905 static __rte_always_inline void 3906 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 3907 struct mlx5_txq_local *__rte_restrict loc, 3908 unsigned int len, 3909 unsigned int slen, 3910 struct mlx5_wqe *__rte_restrict wqem, 3911 unsigned int olx __rte_unused) 3912 { 3913 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 3914 3915 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3916 #ifdef MLX5_PMD_SOFT_COUNTERS 3917 /* Update sent data bytes counter. */ 3918 txq->stats.obytes += slen; 3919 #else 3920 (void)slen; 3921 #endif 3922 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 3923 /* 3924 * If the legacy MPW session contains the inline packets 3925 * we should set the only inline data segment length 3926 * and align the total length to the segment size. 3927 */ 3928 MLX5_ASSERT(len > sizeof(dseg->bcount)); 3929 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 3930 MLX5_ETH_WQE_DATA_INLINE); 3931 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 3932 } else { 3933 /* 3934 * The session is not legacy MPW or contains the 3935 * data buffer pointer segments. 3936 */ 3937 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 3938 len = len / MLX5_WSEG_SIZE + 2; 3939 } 3940 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 3941 txq->wqe_ci += (len + 3) / 4; 3942 loc->wqe_free -= (len + 3) / 4; 3943 loc->wqe_last = wqem; 3944 } 3945 3946 /** 3947 * The set of Tx burst functions for single-segment packets 3948 * without TSO and with Multi-Packet Writing feature support. 3949 * Supports all types of Tx offloads, except multi-packets 3950 * and TSO. 3951 * 3952 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends 3953 * as many packet per WQE as it can. If eMPW is not configured 3954 * or packet can not be sent with eMPW (VLAN insertion) the 3955 * ordinary SEND opcode is used and only one packet placed 3956 * in WQE. 3957 * 3958 * Functions stop sending if it encounters the multi-segment 3959 * packet or packet with TSO requested. 3960 * 3961 * The routines are responsible for storing processed mbuf 3962 * into elts ring buffer and update elts_head if inlining 3963 * offload is requested. Otherwise the copying mbufs to elts 3964 * can be postponed and completed at the end of burst routine. 3965 * 3966 * @param txq 3967 * Pointer to TX queue structure. 3968 * @param[in] pkts 3969 * Packets to transmit. 3970 * @param pkts_n 3971 * Number of packets in array. 3972 * @param loc 3973 * Pointer to burst routine local context. 3974 * @param olx 3975 * Configured Tx offloads mask. It is fully defined at 3976 * compile time and may be used for optimization. 3977 * 3978 * @return 3979 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 3980 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 3981 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 3982 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 3983 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 3984 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 3985 * 3986 * Local context variables updated. 3987 * 3988 * 3989 * The routine sends packets with MLX5_OPCODE_EMPW 3990 * without inlining, this is dedicated optimized branch. 3991 * No VLAN insertion is supported. 3992 */ 3993 static __rte_always_inline enum mlx5_txcmp_code 3994 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 3995 struct rte_mbuf **__rte_restrict pkts, 3996 unsigned int pkts_n, 3997 struct mlx5_txq_local *__rte_restrict loc, 3998 unsigned int olx) 3999 { 4000 /* 4001 * Subroutine is the part of mlx5_tx_burst_single() 4002 * and sends single-segment packet with eMPW opcode 4003 * without data inlining. 4004 */ 4005 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4006 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4007 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4008 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4009 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4010 pkts += loc->pkts_sent + 1; 4011 pkts_n -= loc->pkts_sent; 4012 for (;;) { 4013 struct mlx5_wqe_dseg *__rte_restrict dseg; 4014 struct mlx5_wqe_eseg *__rte_restrict eseg; 4015 enum mlx5_txcmp_code ret; 4016 unsigned int part, loop; 4017 unsigned int slen = 0; 4018 4019 next_empw: 4020 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4021 if (MLX5_TXOFF_CONFIG(TXPP)) { 4022 enum mlx5_txcmp_code wret; 4023 4024 /* Generate WAIT for scheduling if requested. */ 4025 wret = mlx5_tx_schedule_send(txq, loc, olx); 4026 if (wret == MLX5_TXCMP_CODE_EXIT) 4027 return MLX5_TXCMP_CODE_EXIT; 4028 if (wret == MLX5_TXCMP_CODE_ERROR) 4029 return MLX5_TXCMP_CODE_ERROR; 4030 } 4031 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4032 MLX5_MPW_MAX_PACKETS : 4033 MLX5_EMPW_MAX_PACKETS); 4034 if (unlikely(loc->elts_free < part)) { 4035 /* We have no enough elts to save all mbufs. */ 4036 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 4037 return MLX5_TXCMP_CODE_EXIT; 4038 /* But we still able to send at least minimal eMPW. */ 4039 part = loc->elts_free; 4040 } 4041 /* Check whether we have enough WQEs */ 4042 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 4043 if (unlikely(loc->wqe_free < 4044 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4045 return MLX5_TXCMP_CODE_EXIT; 4046 part = (loc->wqe_free * 4) - 2; 4047 } 4048 if (likely(part > 1)) 4049 rte_prefetch0(*pkts); 4050 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4051 /* 4052 * Build eMPW title WQEBB: 4053 * - Control Segment, eMPW opcode 4054 * - Ethernet Segment, no inline 4055 */ 4056 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 4057 MLX5_OPCODE_ENHANCED_MPSW, olx); 4058 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 4059 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4060 eseg = &loc->wqe_last->eseg; 4061 dseg = &loc->wqe_last->dseg[0]; 4062 loop = part; 4063 /* Store the packet length for legacy MPW. */ 4064 if (MLX5_TXOFF_CONFIG(MPW)) 4065 eseg->mss = rte_cpu_to_be_16 4066 (rte_pktmbuf_data_len(loc->mbuf)); 4067 for (;;) { 4068 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4069 #ifdef MLX5_PMD_SOFT_COUNTERS 4070 /* Update sent data bytes counter. */ 4071 slen += dlen; 4072 #endif 4073 mlx5_tx_dseg_ptr 4074 (txq, loc, dseg, 4075 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4076 dlen, olx); 4077 if (unlikely(--loop == 0)) 4078 break; 4079 loc->mbuf = *pkts++; 4080 if (likely(loop > 1)) 4081 rte_prefetch0(*pkts); 4082 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4083 /* 4084 * Unroll the completion code to avoid 4085 * returning variable value - it results in 4086 * unoptimized sequent checking in caller. 4087 */ 4088 if (ret == MLX5_TXCMP_CODE_MULTI) { 4089 part -= loop; 4090 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4091 if (unlikely(!loc->elts_free || 4092 !loc->wqe_free)) 4093 return MLX5_TXCMP_CODE_EXIT; 4094 return MLX5_TXCMP_CODE_MULTI; 4095 } 4096 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4097 if (ret == MLX5_TXCMP_CODE_TSO) { 4098 part -= loop; 4099 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4100 if (unlikely(!loc->elts_free || 4101 !loc->wqe_free)) 4102 return MLX5_TXCMP_CODE_EXIT; 4103 return MLX5_TXCMP_CODE_TSO; 4104 } 4105 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4106 part -= loop; 4107 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4108 if (unlikely(!loc->elts_free || 4109 !loc->wqe_free)) 4110 return MLX5_TXCMP_CODE_EXIT; 4111 return MLX5_TXCMP_CODE_SINGLE; 4112 } 4113 if (ret != MLX5_TXCMP_CODE_EMPW) { 4114 MLX5_ASSERT(false); 4115 part -= loop; 4116 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4117 return MLX5_TXCMP_CODE_ERROR; 4118 } 4119 /* 4120 * Check whether packet parameters coincide 4121 * within assumed eMPW batch: 4122 * - check sum settings 4123 * - metadata value 4124 * - software parser settings 4125 * - packets length (legacy MPW only) 4126 * - scheduling is not required 4127 */ 4128 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 4129 MLX5_ASSERT(loop); 4130 part -= loop; 4131 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 4132 if (unlikely(!loc->elts_free || 4133 !loc->wqe_free)) 4134 return MLX5_TXCMP_CODE_EXIT; 4135 pkts_n -= part; 4136 goto next_empw; 4137 } 4138 /* Packet attributes match, continue the same eMPW. */ 4139 ++dseg; 4140 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4141 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4142 } 4143 /* eMPW is built successfully, update loop parameters. */ 4144 MLX5_ASSERT(!loop); 4145 MLX5_ASSERT(pkts_n >= part); 4146 #ifdef MLX5_PMD_SOFT_COUNTERS 4147 /* Update sent data bytes counter. */ 4148 txq->stats.obytes += slen; 4149 #endif 4150 loc->elts_free -= part; 4151 loc->pkts_sent += part; 4152 txq->wqe_ci += (2 + part + 3) / 4; 4153 loc->wqe_free -= (2 + part + 3) / 4; 4154 pkts_n -= part; 4155 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4156 return MLX5_TXCMP_CODE_EXIT; 4157 loc->mbuf = *pkts++; 4158 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4159 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 4160 return ret; 4161 /* Continue sending eMPW batches. */ 4162 } 4163 MLX5_ASSERT(false); 4164 } 4165 4166 /** 4167 * The routine sends packets with MLX5_OPCODE_EMPW 4168 * with inlining, optionally supports VLAN insertion. 4169 */ 4170 static __rte_always_inline enum mlx5_txcmp_code 4171 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 4172 struct rte_mbuf **__rte_restrict pkts, 4173 unsigned int pkts_n, 4174 struct mlx5_txq_local *__rte_restrict loc, 4175 unsigned int olx) 4176 { 4177 /* 4178 * Subroutine is the part of mlx5_tx_burst_single() 4179 * and sends single-segment packet with eMPW opcode 4180 * with data inlining. 4181 */ 4182 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4183 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 4184 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4185 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4186 static_assert(MLX5_EMPW_MIN_PACKETS >= 2, "invalid min size"); 4187 pkts += loc->pkts_sent + 1; 4188 pkts_n -= loc->pkts_sent; 4189 for (;;) { 4190 struct mlx5_wqe_dseg *__rte_restrict dseg; 4191 struct mlx5_wqe *__rte_restrict wqem; 4192 enum mlx5_txcmp_code ret; 4193 unsigned int room, part, nlim; 4194 unsigned int slen = 0; 4195 4196 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4197 if (MLX5_TXOFF_CONFIG(TXPP)) { 4198 enum mlx5_txcmp_code wret; 4199 4200 /* Generate WAIT for scheduling if requested. */ 4201 wret = mlx5_tx_schedule_send(txq, loc, olx); 4202 if (wret == MLX5_TXCMP_CODE_EXIT) 4203 return MLX5_TXCMP_CODE_EXIT; 4204 if (wret == MLX5_TXCMP_CODE_ERROR) 4205 return MLX5_TXCMP_CODE_ERROR; 4206 } 4207 /* 4208 * Limits the amount of packets in one WQE 4209 * to improve CQE latency generation. 4210 */ 4211 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 4212 MLX5_MPW_INLINE_MAX_PACKETS : 4213 MLX5_EMPW_MAX_PACKETS); 4214 /* Check whether we have minimal amount WQEs */ 4215 if (unlikely(loc->wqe_free < 4216 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 4217 return MLX5_TXCMP_CODE_EXIT; 4218 if (likely(pkts_n > 1)) 4219 rte_prefetch0(*pkts); 4220 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4221 /* 4222 * Build eMPW title WQEBB: 4223 * - Control Segment, eMPW opcode, zero DS 4224 * - Ethernet Segment, no inline 4225 */ 4226 mlx5_tx_cseg_init(txq, loc, wqem, 0, 4227 MLX5_OPCODE_ENHANCED_MPSW, olx); 4228 mlx5_tx_eseg_none(txq, loc, wqem, 4229 olx & ~MLX5_TXOFF_CONFIG_VLAN); 4230 dseg = &wqem->dseg[0]; 4231 /* Store the packet length for legacy MPW. */ 4232 if (MLX5_TXOFF_CONFIG(MPW)) 4233 wqem->eseg.mss = rte_cpu_to_be_16 4234 (rte_pktmbuf_data_len(loc->mbuf)); 4235 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 4236 loc->wqe_free) * MLX5_WQE_SIZE - 4237 MLX5_WQE_CSEG_SIZE - 4238 MLX5_WQE_ESEG_SIZE; 4239 /* Limit the room for legacy MPW sessions for performance. */ 4240 if (MLX5_TXOFF_CONFIG(MPW)) 4241 room = RTE_MIN(room, 4242 RTE_MAX(txq->inlen_empw + 4243 sizeof(dseg->bcount) + 4244 (MLX5_TXOFF_CONFIG(VLAN) ? 4245 sizeof(struct rte_vlan_hdr) : 0), 4246 MLX5_MPW_INLINE_MAX_PACKETS * 4247 MLX5_WQE_DSEG_SIZE)); 4248 /* Build WQE till we have space, packets and resources. */ 4249 part = room; 4250 for (;;) { 4251 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 4252 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 4253 unsigned int tlen; 4254 4255 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4256 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 4257 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 4258 /* 4259 * Some Tx offloads may cause an error if 4260 * packet is not long enough, check against 4261 * assumed minimal length. 4262 */ 4263 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 4264 part -= room; 4265 if (unlikely(!part)) 4266 return MLX5_TXCMP_CODE_ERROR; 4267 /* 4268 * We have some successfully built 4269 * packet Data Segments to send. 4270 */ 4271 mlx5_tx_idone_empw(txq, loc, part, 4272 slen, wqem, olx); 4273 return MLX5_TXCMP_CODE_ERROR; 4274 } 4275 /* Inline or not inline - that's the Question. */ 4276 if (dlen > txq->inlen_empw || 4277 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 4278 goto pointer_empw; 4279 if (MLX5_TXOFF_CONFIG(MPW)) { 4280 if (dlen > txq->inlen_send) 4281 goto pointer_empw; 4282 tlen = dlen; 4283 if (part == room) { 4284 /* Open new inline MPW session. */ 4285 tlen += sizeof(dseg->bcount); 4286 dseg->bcount = RTE_BE32(0); 4287 dseg = RTE_PTR_ADD 4288 (dseg, sizeof(dseg->bcount)); 4289 } else { 4290 /* 4291 * No pointer and inline descriptor 4292 * intermix for legacy MPW sessions. 4293 */ 4294 if (wqem->dseg[0].bcount) 4295 break; 4296 } 4297 } else { 4298 tlen = sizeof(dseg->bcount) + dlen; 4299 } 4300 /* Inline entire packet, optional VLAN insertion. */ 4301 if (MLX5_TXOFF_CONFIG(VLAN) && 4302 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4303 /* 4304 * The packet length must be checked in 4305 * mlx5_tx_able_to_empw() and packet 4306 * fits into inline length guaranteed. 4307 */ 4308 MLX5_ASSERT((dlen + 4309 sizeof(struct rte_vlan_hdr)) <= 4310 txq->inlen_empw); 4311 tlen += sizeof(struct rte_vlan_hdr); 4312 if (room < tlen) 4313 break; 4314 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 4315 dptr, dlen, olx); 4316 #ifdef MLX5_PMD_SOFT_COUNTERS 4317 /* Update sent data bytes counter. */ 4318 slen += sizeof(struct rte_vlan_hdr); 4319 #endif 4320 } else { 4321 if (room < tlen) 4322 break; 4323 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 4324 dptr, dlen, olx); 4325 } 4326 if (!MLX5_TXOFF_CONFIG(MPW)) 4327 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 4328 MLX5_ASSERT(room >= tlen); 4329 room -= tlen; 4330 /* 4331 * Packet data are completely inlined, 4332 * free the packet immediately. 4333 */ 4334 rte_pktmbuf_free_seg(loc->mbuf); 4335 goto next_mbuf; 4336 pointer_empw: 4337 /* 4338 * No pointer and inline descriptor 4339 * intermix for legacy MPW sessions. 4340 */ 4341 if (MLX5_TXOFF_CONFIG(MPW) && 4342 part != room && 4343 wqem->dseg[0].bcount == RTE_BE32(0)) 4344 break; 4345 /* 4346 * Not inlinable VLAN packets are 4347 * proceeded outside of this routine. 4348 */ 4349 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 4350 if (MLX5_TXOFF_CONFIG(VLAN)) 4351 MLX5_ASSERT(!(loc->mbuf->ol_flags & 4352 PKT_TX_VLAN_PKT)); 4353 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 4354 /* We have to store mbuf in elts.*/ 4355 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 4356 room -= MLX5_WQE_DSEG_SIZE; 4357 /* Ring buffer wraparound is checked at the loop end.*/ 4358 ++dseg; 4359 next_mbuf: 4360 #ifdef MLX5_PMD_SOFT_COUNTERS 4361 /* Update sent data bytes counter. */ 4362 slen += dlen; 4363 #endif 4364 loc->pkts_sent++; 4365 loc->elts_free--; 4366 pkts_n--; 4367 if (unlikely(!pkts_n || !loc->elts_free)) { 4368 /* 4369 * We have no resources/packets to 4370 * continue build descriptors. 4371 */ 4372 part -= room; 4373 mlx5_tx_idone_empw(txq, loc, part, 4374 slen, wqem, olx); 4375 return MLX5_TXCMP_CODE_EXIT; 4376 } 4377 loc->mbuf = *pkts++; 4378 if (likely(pkts_n > 1)) 4379 rte_prefetch0(*pkts); 4380 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4381 /* 4382 * Unroll the completion code to avoid 4383 * returning variable value - it results in 4384 * unoptimized sequent checking in caller. 4385 */ 4386 if (ret == MLX5_TXCMP_CODE_MULTI) { 4387 part -= room; 4388 mlx5_tx_idone_empw(txq, loc, part, 4389 slen, wqem, olx); 4390 if (unlikely(!loc->elts_free || 4391 !loc->wqe_free)) 4392 return MLX5_TXCMP_CODE_EXIT; 4393 return MLX5_TXCMP_CODE_MULTI; 4394 } 4395 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4396 if (ret == MLX5_TXCMP_CODE_TSO) { 4397 part -= room; 4398 mlx5_tx_idone_empw(txq, loc, part, 4399 slen, wqem, olx); 4400 if (unlikely(!loc->elts_free || 4401 !loc->wqe_free)) 4402 return MLX5_TXCMP_CODE_EXIT; 4403 return MLX5_TXCMP_CODE_TSO; 4404 } 4405 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4406 part -= room; 4407 mlx5_tx_idone_empw(txq, loc, part, 4408 slen, wqem, olx); 4409 if (unlikely(!loc->elts_free || 4410 !loc->wqe_free)) 4411 return MLX5_TXCMP_CODE_EXIT; 4412 return MLX5_TXCMP_CODE_SINGLE; 4413 } 4414 if (ret != MLX5_TXCMP_CODE_EMPW) { 4415 MLX5_ASSERT(false); 4416 part -= room; 4417 mlx5_tx_idone_empw(txq, loc, part, 4418 slen, wqem, olx); 4419 return MLX5_TXCMP_CODE_ERROR; 4420 } 4421 /* Check if we have minimal room left. */ 4422 nlim--; 4423 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 4424 break; 4425 /* 4426 * Check whether packet parameters coincide 4427 * within assumed eMPW batch: 4428 * - check sum settings 4429 * - metadata value 4430 * - software parser settings 4431 * - packets length (legacy MPW only) 4432 * - scheduling is not required 4433 */ 4434 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 4435 loc, dlen, olx)) 4436 break; 4437 /* Packet attributes match, continue the same eMPW. */ 4438 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 4439 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 4440 } 4441 /* 4442 * We get here to close an existing eMPW 4443 * session and start the new one. 4444 */ 4445 MLX5_ASSERT(pkts_n); 4446 part -= room; 4447 if (unlikely(!part)) 4448 return MLX5_TXCMP_CODE_EXIT; 4449 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 4450 if (unlikely(!loc->elts_free || 4451 !loc->wqe_free)) 4452 return MLX5_TXCMP_CODE_EXIT; 4453 /* Continue the loop with new eMPW session. */ 4454 } 4455 MLX5_ASSERT(false); 4456 } 4457 4458 /** 4459 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 4460 * Data inlining and VLAN insertion are supported. 4461 */ 4462 static __rte_always_inline enum mlx5_txcmp_code 4463 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 4464 struct rte_mbuf **__rte_restrict pkts, 4465 unsigned int pkts_n, 4466 struct mlx5_txq_local *__rte_restrict loc, 4467 unsigned int olx) 4468 { 4469 /* 4470 * Subroutine is the part of mlx5_tx_burst_single() 4471 * and sends single-segment packet with SEND opcode. 4472 */ 4473 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4474 MLX5_ASSERT(pkts_n > loc->pkts_sent); 4475 pkts += loc->pkts_sent + 1; 4476 pkts_n -= loc->pkts_sent; 4477 for (;;) { 4478 struct mlx5_wqe *__rte_restrict wqe; 4479 enum mlx5_txcmp_code ret; 4480 4481 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 4482 if (MLX5_TXOFF_CONFIG(TXPP)) { 4483 enum mlx5_txcmp_code wret; 4484 4485 /* Generate WAIT for scheduling if requested. */ 4486 wret = mlx5_tx_schedule_send(txq, loc, olx); 4487 if (wret == MLX5_TXCMP_CODE_EXIT) 4488 return MLX5_TXCMP_CODE_EXIT; 4489 if (wret == MLX5_TXCMP_CODE_ERROR) 4490 return MLX5_TXCMP_CODE_ERROR; 4491 } 4492 if (MLX5_TXOFF_CONFIG(INLINE)) { 4493 unsigned int inlen, vlan = 0; 4494 4495 inlen = rte_pktmbuf_data_len(loc->mbuf); 4496 if (MLX5_TXOFF_CONFIG(VLAN) && 4497 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 4498 vlan = sizeof(struct rte_vlan_hdr); 4499 inlen += vlan; 4500 static_assert((sizeof(struct rte_vlan_hdr) + 4501 sizeof(struct rte_ether_hdr)) == 4502 MLX5_ESEG_MIN_INLINE_SIZE, 4503 "invalid min inline data size"); 4504 } 4505 /* 4506 * If inlining is enabled at configuration time 4507 * the limit must be not less than minimal size. 4508 * Otherwise we would do extra check for data 4509 * size to avoid crashes due to length overflow. 4510 */ 4511 MLX5_ASSERT(txq->inlen_send >= 4512 MLX5_ESEG_MIN_INLINE_SIZE); 4513 if (inlen <= txq->inlen_send) { 4514 unsigned int seg_n, wqe_n; 4515 4516 rte_prefetch0(rte_pktmbuf_mtod 4517 (loc->mbuf, uint8_t *)); 4518 /* Check against minimal length. */ 4519 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 4520 return MLX5_TXCMP_CODE_ERROR; 4521 if (loc->mbuf->ol_flags & 4522 PKT_TX_DYNF_NOINLINE) { 4523 /* 4524 * The hint flag not to inline packet 4525 * data is set. Check whether we can 4526 * follow the hint. 4527 */ 4528 if ((!MLX5_TXOFF_CONFIG(EMPW) && 4529 txq->inlen_mode) || 4530 (MLX5_TXOFF_CONFIG(MPW) && 4531 txq->inlen_mode)) { 4532 if (inlen <= txq->inlen_send) 4533 goto single_inline; 4534 /* 4535 * The hardware requires the 4536 * minimal inline data header. 4537 */ 4538 goto single_min_inline; 4539 } 4540 if (MLX5_TXOFF_CONFIG(VLAN) && 4541 vlan && !txq->vlan_en) { 4542 /* 4543 * We must insert VLAN tag 4544 * by software means. 4545 */ 4546 goto single_part_inline; 4547 } 4548 goto single_no_inline; 4549 } 4550 single_inline: 4551 /* 4552 * Completely inlined packet data WQE: 4553 * - Control Segment, SEND opcode 4554 * - Ethernet Segment, no VLAN insertion 4555 * - Data inlined, VLAN optionally inserted 4556 * - Alignment to MLX5_WSEG_SIZE 4557 * Have to estimate amount of WQEBBs 4558 */ 4559 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 4560 MLX5_ESEG_MIN_INLINE_SIZE + 4561 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4562 /* Check if there are enough WQEBBs. */ 4563 wqe_n = (seg_n + 3) / 4; 4564 if (wqe_n > loc->wqe_free) 4565 return MLX5_TXCMP_CODE_EXIT; 4566 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4567 loc->wqe_last = wqe; 4568 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 4569 MLX5_OPCODE_SEND, olx); 4570 mlx5_tx_eseg_data(txq, loc, wqe, 4571 vlan, inlen, 0, olx); 4572 txq->wqe_ci += wqe_n; 4573 loc->wqe_free -= wqe_n; 4574 /* 4575 * Packet data are completely inlined, 4576 * free the packet immediately. 4577 */ 4578 rte_pktmbuf_free_seg(loc->mbuf); 4579 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 4580 MLX5_TXOFF_CONFIG(MPW)) && 4581 txq->inlen_mode) { 4582 /* 4583 * If minimal inlining is requested the eMPW 4584 * feature should be disabled due to data is 4585 * inlined into Ethernet Segment, which can 4586 * not contain inlined data for eMPW due to 4587 * segment shared for all packets. 4588 */ 4589 struct mlx5_wqe_dseg *__rte_restrict dseg; 4590 unsigned int ds; 4591 uint8_t *dptr; 4592 4593 /* 4594 * The inline-mode settings require 4595 * to inline the specified amount of 4596 * data bytes to the Ethernet Segment. 4597 * We should check the free space in 4598 * WQE ring buffer to inline partially. 4599 */ 4600 single_min_inline: 4601 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 4602 MLX5_ASSERT(inlen > txq->inlen_mode); 4603 MLX5_ASSERT(txq->inlen_mode >= 4604 MLX5_ESEG_MIN_INLINE_SIZE); 4605 /* 4606 * Check whether there are enough free WQEBBs: 4607 * - Control Segment 4608 * - Ethernet Segment 4609 * - First Segment of inlined Ethernet data 4610 * - ... data continued ... 4611 * - Finishing Data Segment of pointer type 4612 */ 4613 ds = (MLX5_WQE_CSEG_SIZE + 4614 MLX5_WQE_ESEG_SIZE + 4615 MLX5_WQE_DSEG_SIZE + 4616 txq->inlen_mode - 4617 MLX5_ESEG_MIN_INLINE_SIZE + 4618 MLX5_WQE_DSEG_SIZE + 4619 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 4620 if (loc->wqe_free < ((ds + 3) / 4)) 4621 return MLX5_TXCMP_CODE_EXIT; 4622 /* 4623 * Build the ordinary SEND WQE: 4624 * - Control Segment 4625 * - Ethernet Segment, inline inlen_mode bytes 4626 * - Data Segment of pointer type 4627 */ 4628 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4629 loc->wqe_last = wqe; 4630 mlx5_tx_cseg_init(txq, loc, wqe, ds, 4631 MLX5_OPCODE_SEND, olx); 4632 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 4633 txq->inlen_mode, 4634 0, olx); 4635 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4636 txq->inlen_mode - vlan; 4637 inlen -= txq->inlen_mode; 4638 mlx5_tx_dseg_ptr(txq, loc, dseg, 4639 dptr, inlen, olx); 4640 /* 4641 * WQE is built, update the loop parameters 4642 * and got to the next packet. 4643 */ 4644 txq->wqe_ci += (ds + 3) / 4; 4645 loc->wqe_free -= (ds + 3) / 4; 4646 /* We have to store mbuf in elts.*/ 4647 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4648 txq->elts[txq->elts_head++ & txq->elts_m] = 4649 loc->mbuf; 4650 --loc->elts_free; 4651 } else { 4652 uint8_t *dptr; 4653 unsigned int dlen; 4654 4655 /* 4656 * Partially inlined packet data WQE, we have 4657 * some space in title WQEBB, we can fill it 4658 * with some packet data. It takes one WQEBB, 4659 * it is available, no extra space check: 4660 * - Control Segment, SEND opcode 4661 * - Ethernet Segment, no VLAN insertion 4662 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 4663 * - Data Segment, pointer type 4664 * 4665 * We also get here if VLAN insertion is not 4666 * supported by HW, the inline is enabled. 4667 */ 4668 single_part_inline: 4669 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4670 loc->wqe_last = wqe; 4671 mlx5_tx_cseg_init(txq, loc, wqe, 4, 4672 MLX5_OPCODE_SEND, olx); 4673 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 4674 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 4675 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 4676 /* 4677 * The length check is performed above, by 4678 * comparing with txq->inlen_send. We should 4679 * not get overflow here. 4680 */ 4681 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 4682 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 4683 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 4684 dptr, dlen, olx); 4685 ++txq->wqe_ci; 4686 --loc->wqe_free; 4687 /* We have to store mbuf in elts.*/ 4688 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 4689 txq->elts[txq->elts_head++ & txq->elts_m] = 4690 loc->mbuf; 4691 --loc->elts_free; 4692 } 4693 #ifdef MLX5_PMD_SOFT_COUNTERS 4694 /* Update sent data bytes counter. */ 4695 txq->stats.obytes += vlan + 4696 rte_pktmbuf_data_len(loc->mbuf); 4697 #endif 4698 } else { 4699 /* 4700 * No inline at all, it means the CPU cycles saving 4701 * is prioritized at configuration, we should not 4702 * copy any packet data to WQE. 4703 * 4704 * SEND WQE, one WQEBB: 4705 * - Control Segment, SEND opcode 4706 * - Ethernet Segment, optional VLAN, no inline 4707 * - Data Segment, pointer type 4708 */ 4709 single_no_inline: 4710 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 4711 loc->wqe_last = wqe; 4712 mlx5_tx_cseg_init(txq, loc, wqe, 3, 4713 MLX5_OPCODE_SEND, olx); 4714 mlx5_tx_eseg_none(txq, loc, wqe, olx); 4715 mlx5_tx_dseg_ptr 4716 (txq, loc, &wqe->dseg[0], 4717 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 4718 rte_pktmbuf_data_len(loc->mbuf), olx); 4719 ++txq->wqe_ci; 4720 --loc->wqe_free; 4721 /* 4722 * We should not store mbuf pointer in elts 4723 * if no inlining is configured, this is done 4724 * by calling routine in a batch copy. 4725 */ 4726 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 4727 --loc->elts_free; 4728 #ifdef MLX5_PMD_SOFT_COUNTERS 4729 /* Update sent data bytes counter. */ 4730 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 4731 if (MLX5_TXOFF_CONFIG(VLAN) && 4732 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 4733 txq->stats.obytes += 4734 sizeof(struct rte_vlan_hdr); 4735 #endif 4736 } 4737 ++loc->pkts_sent; 4738 --pkts_n; 4739 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 4740 return MLX5_TXCMP_CODE_EXIT; 4741 loc->mbuf = *pkts++; 4742 if (pkts_n > 1) 4743 rte_prefetch0(*pkts); 4744 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 4745 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 4746 return ret; 4747 } 4748 MLX5_ASSERT(false); 4749 } 4750 4751 static __rte_always_inline enum mlx5_txcmp_code 4752 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 4753 struct rte_mbuf **__rte_restrict pkts, 4754 unsigned int pkts_n, 4755 struct mlx5_txq_local *__rte_restrict loc, 4756 unsigned int olx) 4757 { 4758 enum mlx5_txcmp_code ret; 4759 4760 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 4761 if (ret == MLX5_TXCMP_CODE_SINGLE) 4762 goto ordinary_send; 4763 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 4764 for (;;) { 4765 /* Optimize for inline/no inline eMPW send. */ 4766 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 4767 mlx5_tx_burst_empw_inline 4768 (txq, pkts, pkts_n, loc, olx) : 4769 mlx5_tx_burst_empw_simple 4770 (txq, pkts, pkts_n, loc, olx); 4771 if (ret != MLX5_TXCMP_CODE_SINGLE) 4772 return ret; 4773 /* The resources to send one packet should remain. */ 4774 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4775 ordinary_send: 4776 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 4777 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 4778 if (ret != MLX5_TXCMP_CODE_EMPW) 4779 return ret; 4780 /* The resources to send one packet should remain. */ 4781 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 4782 } 4783 } 4784 4785 /** 4786 * DPDK Tx callback template. This is configured template 4787 * used to generate routines optimized for specified offload setup. 4788 * One of this generated functions is chosen at SQ configuration 4789 * time. 4790 * 4791 * @param txq 4792 * Generic pointer to TX queue structure. 4793 * @param[in] pkts 4794 * Packets to transmit. 4795 * @param pkts_n 4796 * Number of packets in array. 4797 * @param olx 4798 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 4799 * values. Should be static to take compile time static configuration 4800 * advantages. 4801 * 4802 * @return 4803 * Number of packets successfully transmitted (<= pkts_n). 4804 */ 4805 static __rte_always_inline uint16_t 4806 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 4807 struct rte_mbuf **__rte_restrict pkts, 4808 uint16_t pkts_n, 4809 unsigned int olx) 4810 { 4811 struct mlx5_txq_local loc; 4812 enum mlx5_txcmp_code ret; 4813 unsigned int part; 4814 4815 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4816 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4817 if (unlikely(!pkts_n)) 4818 return 0; 4819 loc.pkts_sent = 0; 4820 loc.pkts_copy = 0; 4821 loc.wqe_last = NULL; 4822 4823 send_loop: 4824 loc.pkts_loop = loc.pkts_sent; 4825 /* 4826 * Check if there are some CQEs, if any: 4827 * - process an encountered errors 4828 * - process the completed WQEs 4829 * - free related mbufs 4830 * - doorbell the NIC about processed CQEs 4831 */ 4832 rte_prefetch0(*(pkts + loc.pkts_sent)); 4833 mlx5_tx_handle_completion(txq, olx); 4834 /* 4835 * Calculate the number of available resources - elts and WQEs. 4836 * There are two possible different scenarios: 4837 * - no data inlining into WQEs, one WQEBB may contains up to 4838 * four packets, in this case elts become scarce resource 4839 * - data inlining into WQEs, one packet may require multiple 4840 * WQEBBs, the WQEs become the limiting factor. 4841 */ 4842 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 4843 loc.elts_free = txq->elts_s - 4844 (uint16_t)(txq->elts_head - txq->elts_tail); 4845 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 4846 loc.wqe_free = txq->wqe_s - 4847 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 4848 if (unlikely(!loc.elts_free || !loc.wqe_free)) 4849 goto burst_exit; 4850 for (;;) { 4851 /* 4852 * Fetch the packet from array. Usually this is 4853 * the first packet in series of multi/single 4854 * segment packets. 4855 */ 4856 loc.mbuf = *(pkts + loc.pkts_sent); 4857 /* Dedicated branch for multi-segment packets. */ 4858 if (MLX5_TXOFF_CONFIG(MULTI) && 4859 unlikely(NB_SEGS(loc.mbuf) > 1)) { 4860 /* 4861 * Multi-segment packet encountered. 4862 * Hardware is able to process it only 4863 * with SEND/TSO opcodes, one packet 4864 * per WQE, do it in dedicated routine. 4865 */ 4866 enter_send_multi: 4867 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 4868 part = loc.pkts_sent - loc.pkts_copy; 4869 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 4870 /* 4871 * There are some single-segment mbufs not 4872 * stored in elts. The mbufs must be in the 4873 * same order as WQEs, so we must copy the 4874 * mbufs to elts here, before the coming 4875 * multi-segment packet mbufs is appended. 4876 */ 4877 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 4878 part, olx); 4879 loc.pkts_copy = loc.pkts_sent; 4880 } 4881 MLX5_ASSERT(pkts_n > loc.pkts_sent); 4882 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 4883 if (!MLX5_TXOFF_CONFIG(INLINE)) 4884 loc.pkts_copy = loc.pkts_sent; 4885 /* 4886 * These returned code checks are supposed 4887 * to be optimized out due to routine inlining. 4888 */ 4889 if (ret == MLX5_TXCMP_CODE_EXIT) { 4890 /* 4891 * The routine returns this code when 4892 * all packets are sent or there is no 4893 * enough resources to complete request. 4894 */ 4895 break; 4896 } 4897 if (ret == MLX5_TXCMP_CODE_ERROR) { 4898 /* 4899 * The routine returns this code when 4900 * some error in the incoming packets 4901 * format occurred. 4902 */ 4903 txq->stats.oerrors++; 4904 break; 4905 } 4906 if (ret == MLX5_TXCMP_CODE_SINGLE) { 4907 /* 4908 * The single-segment packet was encountered 4909 * in the array, try to send it with the 4910 * best optimized way, possible engaging eMPW. 4911 */ 4912 goto enter_send_single; 4913 } 4914 if (MLX5_TXOFF_CONFIG(TSO) && 4915 ret == MLX5_TXCMP_CODE_TSO) { 4916 /* 4917 * The single-segment TSO packet was 4918 * encountered in the array. 4919 */ 4920 goto enter_send_tso; 4921 } 4922 /* We must not get here. Something is going wrong. */ 4923 MLX5_ASSERT(false); 4924 txq->stats.oerrors++; 4925 break; 4926 } 4927 /* Dedicated branch for single-segment TSO packets. */ 4928 if (MLX5_TXOFF_CONFIG(TSO) && 4929 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 4930 /* 4931 * TSO might require special way for inlining 4932 * (dedicated parameters) and is sent with 4933 * MLX5_OPCODE_TSO opcode only, provide this 4934 * in dedicated branch. 4935 */ 4936 enter_send_tso: 4937 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 4938 MLX5_ASSERT(pkts_n > loc.pkts_sent); 4939 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 4940 /* 4941 * These returned code checks are supposed 4942 * to be optimized out due to routine inlining. 4943 */ 4944 if (ret == MLX5_TXCMP_CODE_EXIT) 4945 break; 4946 if (ret == MLX5_TXCMP_CODE_ERROR) { 4947 txq->stats.oerrors++; 4948 break; 4949 } 4950 if (ret == MLX5_TXCMP_CODE_SINGLE) 4951 goto enter_send_single; 4952 if (MLX5_TXOFF_CONFIG(MULTI) && 4953 ret == MLX5_TXCMP_CODE_MULTI) { 4954 /* 4955 * The multi-segment packet was 4956 * encountered in the array. 4957 */ 4958 goto enter_send_multi; 4959 } 4960 /* We must not get here. Something is going wrong. */ 4961 MLX5_ASSERT(false); 4962 txq->stats.oerrors++; 4963 break; 4964 } 4965 /* 4966 * The dedicated branch for the single-segment packets 4967 * without TSO. Often these ones can be sent using 4968 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 4969 * The routine builds the WQEs till it encounters 4970 * the TSO or multi-segment packet (in case if these 4971 * offloads are requested at SQ configuration time). 4972 */ 4973 enter_send_single: 4974 MLX5_ASSERT(pkts_n > loc.pkts_sent); 4975 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 4976 /* 4977 * These returned code checks are supposed 4978 * to be optimized out due to routine inlining. 4979 */ 4980 if (ret == MLX5_TXCMP_CODE_EXIT) 4981 break; 4982 if (ret == MLX5_TXCMP_CODE_ERROR) { 4983 txq->stats.oerrors++; 4984 break; 4985 } 4986 if (MLX5_TXOFF_CONFIG(MULTI) && 4987 ret == MLX5_TXCMP_CODE_MULTI) { 4988 /* 4989 * The multi-segment packet was 4990 * encountered in the array. 4991 */ 4992 goto enter_send_multi; 4993 } 4994 if (MLX5_TXOFF_CONFIG(TSO) && 4995 ret == MLX5_TXCMP_CODE_TSO) { 4996 /* 4997 * The single-segment TSO packet was 4998 * encountered in the array. 4999 */ 5000 goto enter_send_tso; 5001 } 5002 /* We must not get here. Something is going wrong. */ 5003 MLX5_ASSERT(false); 5004 txq->stats.oerrors++; 5005 break; 5006 } 5007 /* 5008 * Main Tx loop is completed, do the rest: 5009 * - set completion request if thresholds are reached 5010 * - doorbell the hardware 5011 * - copy the rest of mbufs to elts (if any) 5012 */ 5013 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 5014 loc.pkts_sent >= loc.pkts_copy); 5015 /* Take a shortcut if nothing is sent. */ 5016 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 5017 goto burst_exit; 5018 /* Request CQE generation if limits are reached. */ 5019 mlx5_tx_request_completion(txq, &loc, olx); 5020 /* 5021 * Ring QP doorbell immediately after WQE building completion 5022 * to improve latencies. The pure software related data treatment 5023 * can be completed after doorbell. Tx CQEs for this SQ are 5024 * processed in this thread only by the polling. 5025 * 5026 * The rdma core library can map doorbell register in two ways, 5027 * depending on the environment variable "MLX5_SHUT_UP_BF": 5028 * 5029 * - as regular cached memory, the variable is either missing or 5030 * set to zero. This type of mapping may cause the significant 5031 * doorbell register writing latency and requires explicit 5032 * memory write barrier to mitigate this issue and prevent 5033 * write combining. 5034 * 5035 * - as non-cached memory, the variable is present and set to 5036 * not "0" value. This type of mapping may cause performance 5037 * impact under heavy loading conditions but the explicit write 5038 * memory barrier is not required and it may improve core 5039 * performance. 5040 * 5041 * - the legacy behaviour (prior 19.08 release) was to use some 5042 * heuristics to decide whether write memory barrier should 5043 * be performed. This behavior is supported with specifying 5044 * tx_db_nc=2, write barrier is skipped if application 5045 * provides the full recommended burst of packets, it 5046 * supposes the next packets are coming and the write barrier 5047 * will be issued on the next burst (after descriptor writing, 5048 * at least). 5049 */ 5050 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 5051 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 5052 /* Not all of the mbufs may be stored into elts yet. */ 5053 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 5054 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 5055 /* 5056 * There are some single-segment mbufs not stored in elts. 5057 * It can be only if the last packet was single-segment. 5058 * The copying is gathered into one place due to it is 5059 * a good opportunity to optimize that with SIMD. 5060 * Unfortunately if inlining is enabled the gaps in 5061 * pointer array may happen due to early freeing of the 5062 * inlined mbufs. 5063 */ 5064 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 5065 loc.pkts_copy = loc.pkts_sent; 5066 } 5067 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 5068 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 5069 if (pkts_n > loc.pkts_sent) { 5070 /* 5071 * If burst size is large there might be no enough CQE 5072 * fetched from completion queue and no enough resources 5073 * freed to send all the packets. 5074 */ 5075 goto send_loop; 5076 } 5077 burst_exit: 5078 #ifdef MLX5_PMD_SOFT_COUNTERS 5079 /* Increment sent packets counter. */ 5080 txq->stats.opackets += loc.pkts_sent; 5081 #endif 5082 return loc.pkts_sent; 5083 } 5084 5085 /* Generate routines with Enhanced Multi-Packet Write support. */ 5086 MLX5_TXOFF_DECL(full_empw, 5087 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_EMPW) 5088 5089 MLX5_TXOFF_DECL(none_empw, 5090 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5091 5092 MLX5_TXOFF_DECL(md_empw, 5093 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5094 5095 MLX5_TXOFF_DECL(mt_empw, 5096 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5097 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5098 5099 MLX5_TXOFF_DECL(mtsc_empw, 5100 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5101 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5102 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5103 5104 MLX5_TXOFF_DECL(mti_empw, 5105 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5106 MLX5_TXOFF_CONFIG_INLINE | 5107 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5108 5109 MLX5_TXOFF_DECL(mtv_empw, 5110 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5111 MLX5_TXOFF_CONFIG_VLAN | 5112 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5113 5114 MLX5_TXOFF_DECL(mtiv_empw, 5115 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5116 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5117 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5118 5119 MLX5_TXOFF_DECL(sc_empw, 5120 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5121 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5122 5123 MLX5_TXOFF_DECL(sci_empw, 5124 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5125 MLX5_TXOFF_CONFIG_INLINE | 5126 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5127 5128 MLX5_TXOFF_DECL(scv_empw, 5129 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5130 MLX5_TXOFF_CONFIG_VLAN | 5131 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5132 5133 MLX5_TXOFF_DECL(sciv_empw, 5134 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5135 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5136 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5137 5138 MLX5_TXOFF_DECL(i_empw, 5139 MLX5_TXOFF_CONFIG_INLINE | 5140 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5141 5142 MLX5_TXOFF_DECL(v_empw, 5143 MLX5_TXOFF_CONFIG_VLAN | 5144 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5145 5146 MLX5_TXOFF_DECL(iv_empw, 5147 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5148 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5149 5150 /* Generate routines without Enhanced Multi-Packet Write support. */ 5151 MLX5_TXOFF_DECL(full, 5152 MLX5_TXOFF_CONFIG_FULL) 5153 5154 MLX5_TXOFF_DECL(none, 5155 MLX5_TXOFF_CONFIG_NONE) 5156 5157 MLX5_TXOFF_DECL(md, 5158 MLX5_TXOFF_CONFIG_METADATA) 5159 5160 MLX5_TXOFF_DECL(mt, 5161 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5162 MLX5_TXOFF_CONFIG_METADATA) 5163 5164 MLX5_TXOFF_DECL(mtsc, 5165 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5166 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5167 MLX5_TXOFF_CONFIG_METADATA) 5168 5169 MLX5_TXOFF_DECL(mti, 5170 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5171 MLX5_TXOFF_CONFIG_INLINE | 5172 MLX5_TXOFF_CONFIG_METADATA) 5173 5174 5175 MLX5_TXOFF_DECL(mtv, 5176 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5177 MLX5_TXOFF_CONFIG_VLAN | 5178 MLX5_TXOFF_CONFIG_METADATA) 5179 5180 5181 MLX5_TXOFF_DECL(mtiv, 5182 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5183 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5184 MLX5_TXOFF_CONFIG_METADATA) 5185 5186 MLX5_TXOFF_DECL(sc, 5187 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5188 MLX5_TXOFF_CONFIG_METADATA) 5189 5190 MLX5_TXOFF_DECL(sci, 5191 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5192 MLX5_TXOFF_CONFIG_INLINE | 5193 MLX5_TXOFF_CONFIG_METADATA) 5194 5195 5196 MLX5_TXOFF_DECL(scv, 5197 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5198 MLX5_TXOFF_CONFIG_VLAN | 5199 MLX5_TXOFF_CONFIG_METADATA) 5200 5201 5202 MLX5_TXOFF_DECL(sciv, 5203 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5204 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5205 MLX5_TXOFF_CONFIG_METADATA) 5206 5207 MLX5_TXOFF_DECL(i, 5208 MLX5_TXOFF_CONFIG_INLINE | 5209 MLX5_TXOFF_CONFIG_METADATA) 5210 5211 MLX5_TXOFF_DECL(v, 5212 MLX5_TXOFF_CONFIG_VLAN | 5213 MLX5_TXOFF_CONFIG_METADATA) 5214 5215 MLX5_TXOFF_DECL(iv, 5216 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5217 MLX5_TXOFF_CONFIG_METADATA) 5218 5219 /* Generate routines with timestamp scheduling. */ 5220 MLX5_TXOFF_DECL(full_ts_nompw, 5221 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5222 5223 MLX5_TXOFF_DECL(full_ts_nompwi, 5224 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5225 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5226 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5227 MLX5_TXOFF_CONFIG_TXPP) 5228 5229 MLX5_TXOFF_DECL(full_ts, 5230 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5231 MLX5_TXOFF_CONFIG_EMPW) 5232 5233 MLX5_TXOFF_DECL(full_ts_noi, 5234 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5235 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5236 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5237 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5238 5239 MLX5_TXOFF_DECL(none_ts, 5240 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5241 MLX5_TXOFF_CONFIG_EMPW) 5242 5243 MLX5_TXOFF_DECL(mdi_ts, 5244 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5245 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5246 5247 MLX5_TXOFF_DECL(mti_ts, 5248 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5249 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5250 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5251 5252 MLX5_TXOFF_DECL(mtiv_ts, 5253 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5254 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5255 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5256 MLX5_TXOFF_CONFIG_EMPW) 5257 5258 /* 5259 * Generate routines with Legacy Multi-Packet Write support. 5260 * This mode is supported by ConnectX-4 Lx only and imposes 5261 * offload limitations, not supported: 5262 * - ACL/Flows (metadata are becoming meaningless) 5263 * - WQE Inline headers 5264 * - SRIOV (E-Switch offloads) 5265 * - VLAN insertion 5266 * - tunnel encapsulation/decapsulation 5267 * - TSO 5268 */ 5269 MLX5_TXOFF_DECL(none_mpw, 5270 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5271 MLX5_TXOFF_CONFIG_MPW) 5272 5273 MLX5_TXOFF_DECL(mci_mpw, 5274 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5275 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5276 MLX5_TXOFF_CONFIG_MPW) 5277 5278 MLX5_TXOFF_DECL(mc_mpw, 5279 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5280 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5281 5282 MLX5_TXOFF_DECL(i_mpw, 5283 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5284 MLX5_TXOFF_CONFIG_MPW) 5285 5286 /* 5287 * Array of declared and compiled Tx burst function and corresponding 5288 * supported offloads set. The array is used to select the Tx burst 5289 * function for specified offloads set at Tx queue configuration time. 5290 */ 5291 const struct { 5292 eth_tx_burst_t func; 5293 unsigned int olx; 5294 } txoff_func[] = { 5295 MLX5_TXOFF_INFO(full_empw, 5296 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5297 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5298 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5299 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5300 5301 MLX5_TXOFF_INFO(none_empw, 5302 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW) 5303 5304 MLX5_TXOFF_INFO(md_empw, 5305 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5306 5307 MLX5_TXOFF_INFO(mt_empw, 5308 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5309 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5310 5311 MLX5_TXOFF_INFO(mtsc_empw, 5312 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5313 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5314 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5315 5316 MLX5_TXOFF_INFO(mti_empw, 5317 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5318 MLX5_TXOFF_CONFIG_INLINE | 5319 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5320 5321 MLX5_TXOFF_INFO(mtv_empw, 5322 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5323 MLX5_TXOFF_CONFIG_VLAN | 5324 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5325 5326 MLX5_TXOFF_INFO(mtiv_empw, 5327 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5328 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5329 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5330 5331 MLX5_TXOFF_INFO(sc_empw, 5332 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5333 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5334 5335 MLX5_TXOFF_INFO(sci_empw, 5336 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5337 MLX5_TXOFF_CONFIG_INLINE | 5338 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5339 5340 MLX5_TXOFF_INFO(scv_empw, 5341 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5342 MLX5_TXOFF_CONFIG_VLAN | 5343 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5344 5345 MLX5_TXOFF_INFO(sciv_empw, 5346 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5347 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5348 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5349 5350 MLX5_TXOFF_INFO(i_empw, 5351 MLX5_TXOFF_CONFIG_INLINE | 5352 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5353 5354 MLX5_TXOFF_INFO(v_empw, 5355 MLX5_TXOFF_CONFIG_VLAN | 5356 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5357 5358 MLX5_TXOFF_INFO(iv_empw, 5359 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5360 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_EMPW) 5361 5362 MLX5_TXOFF_INFO(full_ts_nompw, 5363 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP) 5364 5365 MLX5_TXOFF_INFO(full_ts_nompwi, 5366 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5367 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5368 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5369 MLX5_TXOFF_CONFIG_TXPP) 5370 5371 MLX5_TXOFF_INFO(full_ts, 5372 MLX5_TXOFF_CONFIG_FULL | MLX5_TXOFF_CONFIG_TXPP | 5373 MLX5_TXOFF_CONFIG_EMPW) 5374 5375 MLX5_TXOFF_INFO(full_ts_noi, 5376 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5377 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5378 MLX5_TXOFF_CONFIG_VLAN | MLX5_TXOFF_CONFIG_METADATA | 5379 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5380 5381 MLX5_TXOFF_INFO(none_ts, 5382 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_TXPP | 5383 MLX5_TXOFF_CONFIG_EMPW) 5384 5385 MLX5_TXOFF_INFO(mdi_ts, 5386 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5387 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5388 5389 MLX5_TXOFF_INFO(mti_ts, 5390 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5391 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_METADATA | 5392 MLX5_TXOFF_CONFIG_TXPP | MLX5_TXOFF_CONFIG_EMPW) 5393 5394 MLX5_TXOFF_INFO(mtiv_ts, 5395 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5396 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5397 MLX5_TXOFF_CONFIG_METADATA | MLX5_TXOFF_CONFIG_TXPP | 5398 MLX5_TXOFF_CONFIG_EMPW) 5399 5400 MLX5_TXOFF_INFO(full, 5401 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5402 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5403 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5404 MLX5_TXOFF_CONFIG_METADATA) 5405 5406 MLX5_TXOFF_INFO(none, 5407 MLX5_TXOFF_CONFIG_NONE) 5408 5409 MLX5_TXOFF_INFO(md, 5410 MLX5_TXOFF_CONFIG_METADATA) 5411 5412 MLX5_TXOFF_INFO(mt, 5413 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5414 MLX5_TXOFF_CONFIG_METADATA) 5415 5416 MLX5_TXOFF_INFO(mtsc, 5417 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5418 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5419 MLX5_TXOFF_CONFIG_METADATA) 5420 5421 MLX5_TXOFF_INFO(mti, 5422 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5423 MLX5_TXOFF_CONFIG_INLINE | 5424 MLX5_TXOFF_CONFIG_METADATA) 5425 5426 MLX5_TXOFF_INFO(mtv, 5427 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5428 MLX5_TXOFF_CONFIG_VLAN | 5429 MLX5_TXOFF_CONFIG_METADATA) 5430 5431 MLX5_TXOFF_INFO(mtiv, 5432 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_TSO | 5433 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5434 MLX5_TXOFF_CONFIG_METADATA) 5435 5436 MLX5_TXOFF_INFO(sc, 5437 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5438 MLX5_TXOFF_CONFIG_METADATA) 5439 5440 MLX5_TXOFF_INFO(sci, 5441 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5442 MLX5_TXOFF_CONFIG_INLINE | 5443 MLX5_TXOFF_CONFIG_METADATA) 5444 5445 MLX5_TXOFF_INFO(scv, 5446 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5447 MLX5_TXOFF_CONFIG_VLAN | 5448 MLX5_TXOFF_CONFIG_METADATA) 5449 5450 MLX5_TXOFF_INFO(sciv, 5451 MLX5_TXOFF_CONFIG_SWP | MLX5_TXOFF_CONFIG_CSUM | 5452 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5453 MLX5_TXOFF_CONFIG_METADATA) 5454 5455 MLX5_TXOFF_INFO(i, 5456 MLX5_TXOFF_CONFIG_INLINE | 5457 MLX5_TXOFF_CONFIG_METADATA) 5458 5459 MLX5_TXOFF_INFO(v, 5460 MLX5_TXOFF_CONFIG_VLAN | 5461 MLX5_TXOFF_CONFIG_METADATA) 5462 5463 MLX5_TXOFF_INFO(iv, 5464 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_VLAN | 5465 MLX5_TXOFF_CONFIG_METADATA) 5466 5467 MLX5_TXOFF_INFO(none_mpw, 5468 MLX5_TXOFF_CONFIG_NONE | MLX5_TXOFF_CONFIG_EMPW | 5469 MLX5_TXOFF_CONFIG_MPW) 5470 5471 MLX5_TXOFF_INFO(mci_mpw, 5472 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5473 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5474 MLX5_TXOFF_CONFIG_MPW) 5475 5476 MLX5_TXOFF_INFO(mc_mpw, 5477 MLX5_TXOFF_CONFIG_MULTI | MLX5_TXOFF_CONFIG_CSUM | 5478 MLX5_TXOFF_CONFIG_EMPW | MLX5_TXOFF_CONFIG_MPW) 5479 5480 MLX5_TXOFF_INFO(i_mpw, 5481 MLX5_TXOFF_CONFIG_INLINE | MLX5_TXOFF_CONFIG_EMPW | 5482 MLX5_TXOFF_CONFIG_MPW) 5483 }; 5484 5485 /** 5486 * Configure the Tx function to use. The routine checks configured 5487 * Tx offloads for the device and selects appropriate Tx burst 5488 * routine. There are multiple Tx burst routines compiled from 5489 * the same template in the most optimal way for the dedicated 5490 * Tx offloads set. 5491 * 5492 * @param dev 5493 * Pointer to private data structure. 5494 * 5495 * @return 5496 * Pointer to selected Tx burst function. 5497 */ 5498 eth_tx_burst_t 5499 mlx5_select_tx_function(struct rte_eth_dev *dev) 5500 { 5501 struct mlx5_priv *priv = dev->data->dev_private; 5502 struct mlx5_dev_config *config = &priv->config; 5503 uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads; 5504 unsigned int diff = 0, olx = 0, i, m; 5505 5506 static_assert(MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE <= 5507 MLX5_DSEG_MAX, "invalid WQE max size"); 5508 static_assert(MLX5_WQE_CSEG_SIZE == MLX5_WSEG_SIZE, 5509 "invalid WQE Control Segment size"); 5510 static_assert(MLX5_WQE_ESEG_SIZE == MLX5_WSEG_SIZE, 5511 "invalid WQE Ethernet Segment size"); 5512 static_assert(MLX5_WQE_DSEG_SIZE == MLX5_WSEG_SIZE, 5513 "invalid WQE Data Segment size"); 5514 static_assert(MLX5_WQE_SIZE == 4 * MLX5_WSEG_SIZE, 5515 "invalid WQE size"); 5516 MLX5_ASSERT(priv); 5517 if (tx_offloads & DEV_TX_OFFLOAD_MULTI_SEGS) { 5518 /* We should support Multi-Segment Packets. */ 5519 olx |= MLX5_TXOFF_CONFIG_MULTI; 5520 } 5521 if (tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO | 5522 DEV_TX_OFFLOAD_VXLAN_TNL_TSO | 5523 DEV_TX_OFFLOAD_GRE_TNL_TSO | 5524 DEV_TX_OFFLOAD_IP_TNL_TSO | 5525 DEV_TX_OFFLOAD_UDP_TNL_TSO)) { 5526 /* We should support TCP Send Offload. */ 5527 olx |= MLX5_TXOFF_CONFIG_TSO; 5528 } 5529 if (tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO | 5530 DEV_TX_OFFLOAD_UDP_TNL_TSO | 5531 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5532 /* We should support Software Parser for Tunnels. */ 5533 olx |= MLX5_TXOFF_CONFIG_SWP; 5534 } 5535 if (tx_offloads & (DEV_TX_OFFLOAD_IPV4_CKSUM | 5536 DEV_TX_OFFLOAD_UDP_CKSUM | 5537 DEV_TX_OFFLOAD_TCP_CKSUM | 5538 DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM)) { 5539 /* We should support IP/TCP/UDP Checksums. */ 5540 olx |= MLX5_TXOFF_CONFIG_CSUM; 5541 } 5542 if (tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT) { 5543 /* We should support VLAN insertion. */ 5544 olx |= MLX5_TXOFF_CONFIG_VLAN; 5545 } 5546 if (tx_offloads & DEV_TX_OFFLOAD_SEND_ON_TIMESTAMP && 5547 rte_mbuf_dynflag_lookup 5548 (RTE_MBUF_DYNFLAG_TX_TIMESTAMP_NAME, NULL) >= 0 && 5549 rte_mbuf_dynfield_lookup 5550 (RTE_MBUF_DYNFIELD_TIMESTAMP_NAME, NULL) >= 0) { 5551 /* Offload configured, dynamic entities registered. */ 5552 olx |= MLX5_TXOFF_CONFIG_TXPP; 5553 } 5554 if (priv->txqs_n && (*priv->txqs)[0]) { 5555 struct mlx5_txq_data *txd = (*priv->txqs)[0]; 5556 5557 if (txd->inlen_send) { 5558 /* 5559 * Check the data inline requirements. Data inline 5560 * is enabled on per device basis, we can check 5561 * the first Tx queue only. 5562 * 5563 * If device does not support VLAN insertion in WQE 5564 * and some queues are requested to perform VLAN 5565 * insertion offload than inline must be enabled. 5566 */ 5567 olx |= MLX5_TXOFF_CONFIG_INLINE; 5568 } 5569 } 5570 if (config->mps == MLX5_MPW_ENHANCED && 5571 config->txq_inline_min <= 0) { 5572 /* 5573 * The NIC supports Enhanced Multi-Packet Write 5574 * and does not require minimal inline data. 5575 */ 5576 olx |= MLX5_TXOFF_CONFIG_EMPW; 5577 } 5578 if (rte_flow_dynf_metadata_avail()) { 5579 /* We should support Flow metadata. */ 5580 olx |= MLX5_TXOFF_CONFIG_METADATA; 5581 } 5582 if (config->mps == MLX5_MPW) { 5583 /* 5584 * The NIC supports Legacy Multi-Packet Write. 5585 * The MLX5_TXOFF_CONFIG_MPW controls the 5586 * descriptor building method in combination 5587 * with MLX5_TXOFF_CONFIG_EMPW. 5588 */ 5589 if (!(olx & (MLX5_TXOFF_CONFIG_TSO | 5590 MLX5_TXOFF_CONFIG_SWP | 5591 MLX5_TXOFF_CONFIG_VLAN | 5592 MLX5_TXOFF_CONFIG_METADATA))) 5593 olx |= MLX5_TXOFF_CONFIG_EMPW | 5594 MLX5_TXOFF_CONFIG_MPW; 5595 } 5596 /* 5597 * Scan the routines table to find the minimal 5598 * satisfying routine with requested offloads. 5599 */ 5600 m = RTE_DIM(txoff_func); 5601 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5602 unsigned int tmp; 5603 5604 tmp = txoff_func[i].olx; 5605 if (tmp == olx) { 5606 /* Meets requested offloads exactly.*/ 5607 m = i; 5608 break; 5609 } 5610 if ((tmp & olx) != olx) { 5611 /* Does not meet requested offloads at all. */ 5612 continue; 5613 } 5614 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_MPW) 5615 /* Do not enable legacy MPW if not configured. */ 5616 continue; 5617 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_EMPW) 5618 /* Do not enable eMPW if not configured. */ 5619 continue; 5620 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_INLINE) 5621 /* Do not enable inlining if not configured. */ 5622 continue; 5623 if ((olx ^ tmp) & MLX5_TXOFF_CONFIG_TXPP) 5624 /* Do not enable scheduling if not configured. */ 5625 continue; 5626 /* 5627 * Some routine meets the requirements. 5628 * Check whether it has minimal amount 5629 * of not requested offloads. 5630 */ 5631 tmp = __builtin_popcountl(tmp & ~olx); 5632 if (m >= RTE_DIM(txoff_func) || tmp < diff) { 5633 /* First or better match, save and continue. */ 5634 m = i; 5635 diff = tmp; 5636 continue; 5637 } 5638 if (tmp == diff) { 5639 tmp = txoff_func[i].olx ^ txoff_func[m].olx; 5640 if (__builtin_ffsl(txoff_func[i].olx & ~tmp) < 5641 __builtin_ffsl(txoff_func[m].olx & ~tmp)) { 5642 /* Lighter not requested offload. */ 5643 m = i; 5644 } 5645 } 5646 } 5647 if (m >= RTE_DIM(txoff_func)) { 5648 DRV_LOG(DEBUG, "port %u has no selected Tx function" 5649 " for requested offloads %04X", 5650 dev->data->port_id, olx); 5651 return NULL; 5652 } 5653 DRV_LOG(DEBUG, "port %u has selected Tx function" 5654 " supporting offloads %04X/%04X", 5655 dev->data->port_id, olx, txoff_func[m].olx); 5656 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MULTI) 5657 DRV_LOG(DEBUG, "\tMULTI (multi segment)"); 5658 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TSO) 5659 DRV_LOG(DEBUG, "\tTSO (TCP send offload)"); 5660 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_SWP) 5661 DRV_LOG(DEBUG, "\tSWP (software parser)"); 5662 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_CSUM) 5663 DRV_LOG(DEBUG, "\tCSUM (checksum offload)"); 5664 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_INLINE) 5665 DRV_LOG(DEBUG, "\tINLIN (inline data)"); 5666 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_VLAN) 5667 DRV_LOG(DEBUG, "\tVLANI (VLAN insertion)"); 5668 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_METADATA) 5669 DRV_LOG(DEBUG, "\tMETAD (tx Flow metadata)"); 5670 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_TXPP) 5671 DRV_LOG(DEBUG, "\tMETAD (tx Scheduling)"); 5672 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_EMPW) { 5673 if (txoff_func[m].olx & MLX5_TXOFF_CONFIG_MPW) 5674 DRV_LOG(DEBUG, "\tMPW (Legacy MPW)"); 5675 else 5676 DRV_LOG(DEBUG, "\tEMPW (Enhanced MPW)"); 5677 } 5678 return txoff_func[m].func; 5679 } 5680 5681 /** 5682 * DPDK callback to get the TX queue information 5683 * 5684 * @param dev 5685 * Pointer to the device structure. 5686 * 5687 * @param tx_queue_id 5688 * Tx queue identificator. 5689 * 5690 * @param qinfo 5691 * Pointer to the TX queue information structure. 5692 * 5693 * @return 5694 * None. 5695 */ 5696 5697 void 5698 mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 5699 struct rte_eth_txq_info *qinfo) 5700 { 5701 struct mlx5_priv *priv = dev->data->dev_private; 5702 struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id]; 5703 struct mlx5_txq_ctrl *txq_ctrl = 5704 container_of(txq, struct mlx5_txq_ctrl, txq); 5705 5706 if (!txq) 5707 return; 5708 qinfo->nb_desc = txq->elts_s; 5709 qinfo->conf.tx_thresh.pthresh = 0; 5710 qinfo->conf.tx_thresh.hthresh = 0; 5711 qinfo->conf.tx_thresh.wthresh = 0; 5712 qinfo->conf.tx_rs_thresh = 0; 5713 qinfo->conf.tx_free_thresh = 0; 5714 qinfo->conf.tx_deferred_start = txq_ctrl ? 0 : 1; 5715 qinfo->conf.offloads = dev->data->dev_conf.txmode.offloads; 5716 } 5717 5718 /** 5719 * DPDK callback to get the TX packet burst mode information 5720 * 5721 * @param dev 5722 * Pointer to the device structure. 5723 * 5724 * @param tx_queue_id 5725 * Tx queue identificatior. 5726 * 5727 * @param mode 5728 * Pointer to the burts mode information. 5729 * 5730 * @return 5731 * 0 as success, -EINVAL as failure. 5732 */ 5733 5734 int 5735 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, 5736 uint16_t tx_queue_id __rte_unused, 5737 struct rte_eth_burst_mode *mode) 5738 { 5739 eth_tx_burst_t pkt_burst = dev->tx_pkt_burst; 5740 unsigned int i, olx; 5741 5742 for (i = 0; i < RTE_DIM(txoff_func); i++) { 5743 if (pkt_burst == txoff_func[i].func) { 5744 olx = txoff_func[i].olx; 5745 snprintf(mode->info, sizeof(mode->info), 5746 "%s%s%s%s%s%s%s%s%s", 5747 (olx & MLX5_TXOFF_CONFIG_EMPW) ? 5748 ((olx & MLX5_TXOFF_CONFIG_MPW) ? 5749 "Legacy MPW" : "Enhanced MPW") : "No MPW", 5750 (olx & MLX5_TXOFF_CONFIG_MULTI) ? 5751 " + MULTI" : "", 5752 (olx & MLX5_TXOFF_CONFIG_TSO) ? 5753 " + TSO" : "", 5754 (olx & MLX5_TXOFF_CONFIG_SWP) ? 5755 " + SWP" : "", 5756 (olx & MLX5_TXOFF_CONFIG_CSUM) ? 5757 " + CSUM" : "", 5758 (olx & MLX5_TXOFF_CONFIG_INLINE) ? 5759 " + INLINE" : "", 5760 (olx & MLX5_TXOFF_CONFIG_VLAN) ? 5761 " + VLAN" : "", 5762 (olx & MLX5_TXOFF_CONFIG_METADATA) ? 5763 " + METADATA" : "", 5764 (olx & MLX5_TXOFF_CONFIG_TXPP) ? 5765 " + TXPP" : ""); 5766 return 0; 5767 } 5768 } 5769 return -EINVAL; 5770 } 5771