1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common.h> 18 #include <mlx5_common_mr.h> 19 20 #include "mlx5.h" 21 #include "mlx5_autoconf.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 142 uint16_t inlen_send; /* Ordinary send data inline size. */ 143 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 144 uint16_t inlen_mode; /* Minimal data length to inline. */ 145 uint32_t qp_num_8s; /* QP number shifted by 8. */ 146 uint64_t offloads; /* Offloads for Tx Queue. */ 147 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 148 struct mlx5_wqe *wqes; /* Work queue. */ 149 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 150 #ifdef RTE_LIBRTE_MLX5_DEBUG 151 uint32_t *fcqs; /* Free completion queue (debug extended). */ 152 #else 153 uint16_t *fcqs; /* Free completion queue. */ 154 #endif 155 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 156 volatile uint32_t *qp_db; /* Work queue doorbell. */ 157 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 158 uint16_t port_id; /* Port ID of device. */ 159 uint16_t idx; /* Queue index. */ 160 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 161 int32_t ts_offset; /* Timestamp field dynamic offset. */ 162 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 163 struct mlx5_txq_stats stats; /* TX queue counters. */ 164 struct mlx5_uar_data uar_data; 165 struct rte_mbuf *elts[0]; 166 /* Storage for queued packets, must be the last field. */ 167 } __rte_cache_aligned; 168 169 enum mlx5_txq_type { 170 MLX5_TXQ_TYPE_STANDARD, /* Standard Tx queue. */ 171 MLX5_TXQ_TYPE_HAIRPIN, /* Hairpin Tx queue. */ 172 }; 173 174 /* TX queue control descriptor. */ 175 struct mlx5_txq_ctrl { 176 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 177 uint32_t refcnt; /* Reference counter. */ 178 unsigned int socket; /* CPU socket ID for allocations. */ 179 enum mlx5_txq_type type; /* The txq ctrl type. */ 180 unsigned int max_inline_data; /* Max inline data. */ 181 unsigned int max_tso_header; /* Max TSO header size. */ 182 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 183 struct mlx5_priv *priv; /* Back pointer to private data. */ 184 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 185 uint16_t dump_file_n; /* Number of dump files. */ 186 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 187 uint32_t hairpin_status; /* Hairpin binding status. */ 188 struct mlx5_txq_data txq; /* Data path structure. */ 189 /* Must be the last field in the structure, contains elts[]. */ 190 }; 191 192 /* mlx5_txq.c */ 193 194 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 195 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 196 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 197 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 198 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 199 unsigned int socket, const struct rte_eth_txconf *conf); 200 int mlx5_tx_hairpin_queue_setup 201 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 202 const struct rte_eth_hairpin_conf *hairpin_conf); 203 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 204 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 205 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 206 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 207 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 208 uint16_t desc, unsigned int socket, 209 const struct rte_eth_txconf *conf); 210 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 211 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 212 const struct rte_eth_hairpin_conf *hairpin_conf); 213 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 214 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 215 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 216 int mlx5_txq_verify(struct rte_eth_dev *dev); 217 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 218 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 219 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 220 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 221 222 /* mlx5_tx.c */ 223 224 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 225 unsigned int olx __rte_unused); 226 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 227 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 228 struct rte_eth_txq_info *qinfo); 229 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 230 struct rte_eth_burst_mode *mode); 231 232 /* mlx5_tx_empw.c */ 233 234 MLX5_TXOFF_PRE_DECL(full_empw); 235 MLX5_TXOFF_PRE_DECL(none_empw); 236 MLX5_TXOFF_PRE_DECL(md_empw); 237 MLX5_TXOFF_PRE_DECL(mt_empw); 238 MLX5_TXOFF_PRE_DECL(mtsc_empw); 239 MLX5_TXOFF_PRE_DECL(mti_empw); 240 MLX5_TXOFF_PRE_DECL(mtv_empw); 241 MLX5_TXOFF_PRE_DECL(mtiv_empw); 242 MLX5_TXOFF_PRE_DECL(sc_empw); 243 MLX5_TXOFF_PRE_DECL(sci_empw); 244 MLX5_TXOFF_PRE_DECL(scv_empw); 245 MLX5_TXOFF_PRE_DECL(sciv_empw); 246 MLX5_TXOFF_PRE_DECL(i_empw); 247 MLX5_TXOFF_PRE_DECL(v_empw); 248 MLX5_TXOFF_PRE_DECL(iv_empw); 249 250 /* mlx5_tx_nompw.c */ 251 252 MLX5_TXOFF_PRE_DECL(full); 253 MLX5_TXOFF_PRE_DECL(none); 254 MLX5_TXOFF_PRE_DECL(md); 255 MLX5_TXOFF_PRE_DECL(mt); 256 MLX5_TXOFF_PRE_DECL(mtsc); 257 MLX5_TXOFF_PRE_DECL(mti); 258 MLX5_TXOFF_PRE_DECL(mtv); 259 MLX5_TXOFF_PRE_DECL(mtiv); 260 MLX5_TXOFF_PRE_DECL(sc); 261 MLX5_TXOFF_PRE_DECL(sci); 262 MLX5_TXOFF_PRE_DECL(scv); 263 MLX5_TXOFF_PRE_DECL(sciv); 264 MLX5_TXOFF_PRE_DECL(i); 265 MLX5_TXOFF_PRE_DECL(v); 266 MLX5_TXOFF_PRE_DECL(iv); 267 268 /* mlx5_tx_txpp.c */ 269 270 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 271 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 272 MLX5_TXOFF_PRE_DECL(full_ts); 273 MLX5_TXOFF_PRE_DECL(full_ts_noi); 274 MLX5_TXOFF_PRE_DECL(none_ts); 275 MLX5_TXOFF_PRE_DECL(mdi_ts); 276 MLX5_TXOFF_PRE_DECL(mti_ts); 277 MLX5_TXOFF_PRE_DECL(mtiv_ts); 278 279 /* mlx5_tx_mpw.c */ 280 281 MLX5_TXOFF_PRE_DECL(none_mpw); 282 MLX5_TXOFF_PRE_DECL(mci_mpw); 283 MLX5_TXOFF_PRE_DECL(mc_mpw); 284 MLX5_TXOFF_PRE_DECL(i_mpw); 285 286 static __rte_always_inline struct mlx5_uar_data * 287 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 288 { 289 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 290 } 291 292 /** 293 * Ring TX queue doorbell and flush the update by write memory barrier. 294 * 295 * @param txq 296 * Pointer to TX queue structure. 297 * @param wqe 298 * Pointer to the last WQE posted in the NIC. 299 */ 300 static __rte_always_inline void 301 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 302 { 303 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 304 txq->wqe_ci, txq->qp_db, 1); 305 } 306 307 /** 308 * Convert timestamp from mbuf format to linear counter 309 * of Clock Queue completions (24 bits). 310 * 311 * @param sh 312 * Pointer to the device shared context to fetch Tx 313 * packet pacing timestamp and parameters. 314 * @param ts 315 * Timestamp from mbuf to convert. 316 * @return 317 * positive or zero value - completion ID to wait. 318 * negative value - conversion error. 319 */ 320 static __rte_always_inline int32_t 321 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 322 { 323 uint64_t ts, ci; 324 uint32_t tick; 325 326 do { 327 /* 328 * Read atomically two uint64_t fields and compare lsb bits. 329 * It there is no match - the timestamp was updated in 330 * the service thread, data should be re-read. 331 */ 332 rte_compiler_barrier(); 333 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 334 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 335 rte_compiler_barrier(); 336 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 337 break; 338 } while (true); 339 /* Perform the skew correction, positive value to send earlier. */ 340 mts -= sh->txpp.skew; 341 mts -= ts; 342 if (unlikely(mts >= UINT64_MAX / 2)) { 343 /* We have negative integer, mts is in the past. */ 344 __atomic_fetch_add(&sh->txpp.err_ts_past, 345 1, __ATOMIC_RELAXED); 346 return -1; 347 } 348 tick = sh->txpp.tick; 349 MLX5_ASSERT(tick); 350 /* Convert delta to completions, round up. */ 351 mts = (mts + tick - 1) / tick; 352 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 353 /* We have mts is too distant future. */ 354 __atomic_fetch_add(&sh->txpp.err_ts_future, 355 1, __ATOMIC_RELAXED); 356 return -1; 357 } 358 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 359 ci += mts; 360 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 361 return ci; 362 } 363 364 /** 365 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 366 * Flags must be preliminary initialized to zero. 367 * 368 * @param loc 369 * Pointer to burst routine local context. 370 * @param swp_flags 371 * Pointer to store Software Parser flags. 372 * @param olx 373 * Configured Tx offloads mask. It is fully defined at 374 * compile time and may be used for optimization. 375 * 376 * @return 377 * Software Parser offsets packed in dword. 378 * Software Parser flags are set by pointer. 379 */ 380 static __rte_always_inline uint32_t 381 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 382 uint8_t *swp_flags, 383 unsigned int olx) 384 { 385 uint64_t ol, tunnel; 386 unsigned int idx, off; 387 uint32_t set; 388 389 if (!MLX5_TXOFF_CONFIG(SWP)) 390 return 0; 391 ol = loc->mbuf->ol_flags; 392 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 393 /* 394 * Check whether Software Parser is required. 395 * Only customized tunnels may ask for. 396 */ 397 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 398 return 0; 399 /* 400 * The index should have: 401 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 402 * bit[4] = RTE_MBUF_F_TX_IPV6 403 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 404 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 405 */ 406 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 407 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 408 *swp_flags = mlx5_swp_types_table[idx]; 409 /* 410 * Set offsets for SW parser. Since ConnectX-5, SW parser just 411 * complements HW parser. SW parser starts to engage only if HW parser 412 * can't reach a header. For the older devices, HW parser will not kick 413 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 414 * should be set regardless of HW offload. 415 */ 416 off = loc->mbuf->outer_l2_len; 417 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 418 off += sizeof(struct rte_vlan_hdr); 419 set = (off >> 1) << 8; /* Outer L3 offset. */ 420 off += loc->mbuf->outer_l3_len; 421 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 422 set |= off >> 1; /* Outer L4 offset. */ 423 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 424 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 425 off += loc->mbuf->l2_len; 426 set |= (off >> 1) << 24; /* Inner L3 offset. */ 427 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 428 csum == RTE_MBUF_F_TX_UDP_CKSUM || 429 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 430 off += loc->mbuf->l3_len; 431 set |= (off >> 1) << 16; /* Inner L4 offset. */ 432 } 433 } 434 set = rte_cpu_to_le_32(set); 435 return set; 436 } 437 438 /** 439 * Convert the Checksum offloads to Verbs. 440 * 441 * @param buf 442 * Pointer to the mbuf. 443 * 444 * @return 445 * Converted checksum flags. 446 */ 447 static __rte_always_inline uint8_t 448 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 449 { 450 uint32_t idx; 451 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 452 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 453 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 454 455 /* 456 * The index should have: 457 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 458 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 459 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 460 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 461 * bit[9] = tunnel 462 */ 463 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 464 return mlx5_cksum_table[idx]; 465 } 466 467 /** 468 * Free the mbufs from the linear array of pointers. 469 * 470 * @param txq 471 * Pointer to Tx queue structure. 472 * @param pkts 473 * Pointer to array of packets to be free. 474 * @param pkts_n 475 * Number of packets to be freed. 476 * @param olx 477 * Configured Tx offloads mask. It is fully defined at 478 * compile time and may be used for optimization. 479 */ 480 static __rte_always_inline void 481 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 482 struct rte_mbuf **__rte_restrict pkts, 483 unsigned int pkts_n, 484 unsigned int olx __rte_unused) 485 { 486 struct rte_mempool *pool = NULL; 487 struct rte_mbuf **p_free = NULL; 488 struct rte_mbuf *mbuf; 489 unsigned int n_free = 0; 490 491 /* 492 * The implemented algorithm eliminates 493 * copying pointers to temporary array 494 * for rte_mempool_put_bulk() calls. 495 */ 496 MLX5_ASSERT(pkts); 497 MLX5_ASSERT(pkts_n); 498 /* 499 * Free mbufs directly to the pool in bulk 500 * if fast free offload is engaged 501 */ 502 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 503 mbuf = *pkts; 504 pool = mbuf->pool; 505 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 506 return; 507 } 508 for (;;) { 509 for (;;) { 510 /* 511 * Decrement mbuf reference counter, detach 512 * indirect and external buffers if needed. 513 */ 514 mbuf = rte_pktmbuf_prefree_seg(*pkts); 515 if (likely(mbuf != NULL)) { 516 MLX5_ASSERT(mbuf == *pkts); 517 if (likely(n_free != 0)) { 518 if (unlikely(pool != mbuf->pool)) 519 /* From different pool. */ 520 break; 521 } else { 522 /* Start new scan array. */ 523 pool = mbuf->pool; 524 p_free = pkts; 525 } 526 ++n_free; 527 ++pkts; 528 --pkts_n; 529 if (unlikely(pkts_n == 0)) { 530 mbuf = NULL; 531 break; 532 } 533 } else { 534 /* 535 * This happens if mbuf is still referenced. 536 * We can't put it back to the pool, skip. 537 */ 538 ++pkts; 539 --pkts_n; 540 if (unlikely(n_free != 0)) 541 /* There is some array to free.*/ 542 break; 543 if (unlikely(pkts_n == 0)) 544 /* Last mbuf, nothing to free. */ 545 return; 546 } 547 } 548 for (;;) { 549 /* 550 * This loop is implemented to avoid multiple 551 * inlining of rte_mempool_put_bulk(). 552 */ 553 MLX5_ASSERT(pool); 554 MLX5_ASSERT(p_free); 555 MLX5_ASSERT(n_free); 556 /* 557 * Free the array of pre-freed mbufs 558 * belonging to the same memory pool. 559 */ 560 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 561 if (unlikely(mbuf != NULL)) { 562 /* There is the request to start new scan. */ 563 pool = mbuf->pool; 564 p_free = pkts++; 565 n_free = 1; 566 --pkts_n; 567 if (likely(pkts_n != 0)) 568 break; 569 /* 570 * This is the last mbuf to be freed. 571 * Do one more loop iteration to complete. 572 * This is rare case of the last unique mbuf. 573 */ 574 mbuf = NULL; 575 continue; 576 } 577 if (likely(pkts_n == 0)) 578 return; 579 n_free = 0; 580 break; 581 } 582 } 583 } 584 585 /** 586 * No inline version to free buffers for optimal call 587 * on the tx_burst completion. 588 */ 589 static __rte_noinline void 590 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 591 struct rte_mbuf **__rte_restrict pkts, 592 unsigned int pkts_n, 593 unsigned int olx __rte_unused) 594 { 595 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 596 } 597 598 /** 599 * Free the mbuf from the elts ring buffer till new tail. 600 * 601 * @param txq 602 * Pointer to Tx queue structure. 603 * @param tail 604 * Index in elts to free up to, becomes new elts tail. 605 * @param olx 606 * Configured Tx offloads mask. It is fully defined at 607 * compile time and may be used for optimization. 608 */ 609 static __rte_always_inline void 610 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 611 uint16_t tail, 612 unsigned int olx __rte_unused) 613 { 614 uint16_t n_elts = tail - txq->elts_tail; 615 616 MLX5_ASSERT(n_elts); 617 MLX5_ASSERT(n_elts <= txq->elts_s); 618 /* 619 * Implement a loop to support ring buffer wraparound 620 * with single inlining of mlx5_tx_free_mbuf(). 621 */ 622 do { 623 unsigned int part; 624 625 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 626 part = RTE_MIN(part, n_elts); 627 MLX5_ASSERT(part); 628 MLX5_ASSERT(part <= txq->elts_s); 629 mlx5_tx_free_mbuf(txq, 630 &txq->elts[txq->elts_tail & txq->elts_m], 631 part, olx); 632 txq->elts_tail += part; 633 n_elts -= part; 634 } while (n_elts); 635 } 636 637 /** 638 * Store the mbuf being sent into elts ring buffer. 639 * On Tx completion these mbufs will be freed. 640 * 641 * @param txq 642 * Pointer to Tx queue structure. 643 * @param pkts 644 * Pointer to array of packets to be stored. 645 * @param pkts_n 646 * Number of packets to be stored. 647 * @param olx 648 * Configured Tx offloads mask. It is fully defined at 649 * compile time and may be used for optimization. 650 */ 651 static __rte_always_inline void 652 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 653 struct rte_mbuf **__rte_restrict pkts, 654 unsigned int pkts_n, 655 unsigned int olx __rte_unused) 656 { 657 unsigned int part; 658 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 659 660 MLX5_ASSERT(pkts); 661 MLX5_ASSERT(pkts_n); 662 part = txq->elts_s - (txq->elts_head & txq->elts_m); 663 MLX5_ASSERT(part); 664 MLX5_ASSERT(part <= txq->elts_s); 665 /* This code is a good candidate for vectorizing with SIMD. */ 666 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 667 (void *)pkts, 668 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 669 txq->elts_head += pkts_n; 670 if (unlikely(part < pkts_n)) 671 /* The copy is wrapping around the elts array. */ 672 rte_memcpy((void *)elts, (void *)(pkts + part), 673 (pkts_n - part) * sizeof(struct rte_mbuf *)); 674 } 675 676 /** 677 * Check if the completion request flag should be set in the last WQE. 678 * Both pushed mbufs and WQEs are monitored and the completion request 679 * flag is set if any of thresholds is reached. 680 * 681 * @param txq 682 * Pointer to TX queue structure. 683 * @param loc 684 * Pointer to burst routine local context. 685 * @param olx 686 * Configured Tx offloads mask. It is fully defined at 687 * compile time and may be used for optimization. 688 */ 689 static __rte_always_inline void 690 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 691 struct mlx5_txq_local *__rte_restrict loc, 692 unsigned int olx) 693 { 694 uint16_t head = txq->elts_head; 695 unsigned int part; 696 697 part = MLX5_TXOFF_CONFIG(INLINE) ? 698 0 : loc->pkts_sent - loc->pkts_copy; 699 head += part; 700 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 701 (MLX5_TXOFF_CONFIG(INLINE) && 702 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 703 volatile struct mlx5_wqe *last = loc->wqe_last; 704 705 MLX5_ASSERT(last); 706 txq->elts_comp = head; 707 if (MLX5_TXOFF_CONFIG(INLINE)) 708 txq->wqe_comp = txq->wqe_ci; 709 /* Request unconditional completion on last WQE. */ 710 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 711 MLX5_COMP_MODE_OFFSET); 712 /* Save elts_head in dedicated free on completion queue. */ 713 #ifdef RTE_LIBRTE_MLX5_DEBUG 714 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 715 (last->cseg.opcode >> 8) << 16; 716 #else 717 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 718 #endif 719 /* A CQE slot must always be available. */ 720 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 721 } 722 } 723 724 /** 725 * Build the Control Segment with specified opcode: 726 * - MLX5_OPCODE_SEND 727 * - MLX5_OPCODE_ENHANCED_MPSW 728 * - MLX5_OPCODE_TSO 729 * 730 * @param txq 731 * Pointer to TX queue structure. 732 * @param loc 733 * Pointer to burst routine local context. 734 * @param wqe 735 * Pointer to WQE to fill with built Control Segment. 736 * @param ds 737 * Supposed length of WQE in segments. 738 * @param opcode 739 * SQ WQE opcode to put into Control Segment. 740 * @param olx 741 * Configured Tx offloads mask. It is fully defined at 742 * compile time and may be used for optimization. 743 */ 744 static __rte_always_inline void 745 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 746 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 747 struct mlx5_wqe *__rte_restrict wqe, 748 unsigned int ds, 749 unsigned int opcode, 750 unsigned int olx __rte_unused) 751 { 752 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 753 754 /* For legacy MPW replace the EMPW by TSO with modifier. */ 755 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 756 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 757 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 758 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 759 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 760 MLX5_COMP_MODE_OFFSET); 761 cs->misc = RTE_BE32(0); 762 } 763 764 /** 765 * Build the Synchronize Queue Segment with specified completion index. 766 * 767 * @param txq 768 * Pointer to TX queue structure. 769 * @param loc 770 * Pointer to burst routine local context. 771 * @param wqe 772 * Pointer to WQE to fill with built Control Segment. 773 * @param wci 774 * Completion index in Clock Queue to wait. 775 * @param olx 776 * Configured Tx offloads mask. It is fully defined at 777 * compile time and may be used for optimization. 778 */ 779 static __rte_always_inline void 780 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 781 struct mlx5_txq_local *restrict loc __rte_unused, 782 struct mlx5_wqe *restrict wqe, 783 unsigned int wci, 784 unsigned int olx __rte_unused) 785 { 786 struct mlx5_wqe_qseg *qs; 787 788 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 789 qs->max_index = rte_cpu_to_be_32(wci); 790 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 791 qs->reserved0 = RTE_BE32(0); 792 qs->reserved1 = RTE_BE32(0); 793 } 794 795 /** 796 * Build the Ethernet Segment without inlined data. 797 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 798 * 799 * @param txq 800 * Pointer to TX queue structure. 801 * @param loc 802 * Pointer to burst routine local context. 803 * @param wqe 804 * Pointer to WQE to fill with built Ethernet Segment. 805 * @param olx 806 * Configured Tx offloads mask. It is fully defined at 807 * compile time and may be used for optimization. 808 */ 809 static __rte_always_inline void 810 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 811 struct mlx5_txq_local *__rte_restrict loc, 812 struct mlx5_wqe *__rte_restrict wqe, 813 unsigned int olx) 814 { 815 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 816 uint32_t csum; 817 818 /* 819 * Calculate and set check sum flags first, dword field 820 * in segment may be shared with Software Parser flags. 821 */ 822 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 823 es->flags = rte_cpu_to_le_32(csum); 824 /* 825 * Calculate and set Software Parser offsets and flags. 826 * These flags a set for custom UDP and IP tunnel packets. 827 */ 828 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 829 /* Fill metadata field if needed. */ 830 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 831 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 832 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 833 0 : 0; 834 /* Engage VLAN tag insertion feature if requested. */ 835 if (MLX5_TXOFF_CONFIG(VLAN) && 836 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 837 /* 838 * We should get here only if device support 839 * this feature correctly. 840 */ 841 MLX5_ASSERT(txq->vlan_en); 842 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 843 loc->mbuf->vlan_tci); 844 } else { 845 es->inline_hdr = RTE_BE32(0); 846 } 847 } 848 849 /** 850 * Build the Ethernet Segment with minimal inlined data 851 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 852 * used to fill the gap in single WQEBB WQEs. 853 * Supports Software Parser, Checksums and VLAN 854 * insertion Tx offload features. 855 * 856 * @param txq 857 * Pointer to TX queue structure. 858 * @param loc 859 * Pointer to burst routine local context. 860 * @param wqe 861 * Pointer to WQE to fill with built Ethernet Segment. 862 * @param vlan 863 * Length of VLAN tag insertion if any. 864 * @param olx 865 * Configured Tx offloads mask. It is fully defined at 866 * compile time and may be used for optimization. 867 */ 868 static __rte_always_inline void 869 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 870 struct mlx5_txq_local *__rte_restrict loc, 871 struct mlx5_wqe *__rte_restrict wqe, 872 unsigned int vlan, 873 unsigned int olx) 874 { 875 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 876 uint32_t csum; 877 uint8_t *psrc, *pdst; 878 879 /* 880 * Calculate and set check sum flags first, dword field 881 * in segment may be shared with Software Parser flags. 882 */ 883 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 884 es->flags = rte_cpu_to_le_32(csum); 885 /* 886 * Calculate and set Software Parser offsets and flags. 887 * These flags a set for custom UDP and IP tunnel packets. 888 */ 889 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 890 /* Fill metadata field if needed. */ 891 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 892 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 893 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 894 0 : 0; 895 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 896 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 897 es->inline_data = *(unaligned_uint16_t *)psrc; 898 psrc += sizeof(uint16_t); 899 pdst = (uint8_t *)(es + 1); 900 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 901 /* Implement VLAN tag insertion as part inline data. */ 902 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 903 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 904 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 905 /* Insert VLAN ethertype + VLAN tag. */ 906 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 907 ((RTE_ETHER_TYPE_VLAN << 16) | 908 loc->mbuf->vlan_tci); 909 pdst += sizeof(struct rte_vlan_hdr); 910 /* Copy the rest two bytes from packet data. */ 911 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 912 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 913 } else { 914 /* Fill the gap in the title WQEBB with inline data. */ 915 rte_mov16(pdst, psrc); 916 } 917 } 918 919 /** 920 * Build the Ethernet Segment with entire packet data inlining. Checks the 921 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 922 * Checksums and VLAN insertion Tx offload features. 923 * 924 * @param txq 925 * Pointer to TX queue structure. 926 * @param loc 927 * Pointer to burst routine local context. 928 * @param wqe 929 * Pointer to WQE to fill with built Ethernet Segment. 930 * @param vlan 931 * Length of VLAN tag insertion if any. 932 * @param inlen 933 * Length of data to inline (VLAN included, if any). 934 * @param tso 935 * TSO flag, set mss field from the packet. 936 * @param olx 937 * Configured Tx offloads mask. It is fully defined at 938 * compile time and may be used for optimization. 939 * 940 * @return 941 * Pointer to the next Data Segment (aligned and wrapped around). 942 */ 943 static __rte_always_inline struct mlx5_wqe_dseg * 944 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 945 struct mlx5_txq_local *__rte_restrict loc, 946 struct mlx5_wqe *__rte_restrict wqe, 947 unsigned int vlan, 948 unsigned int inlen, 949 unsigned int tso, 950 unsigned int olx) 951 { 952 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 953 uint32_t csum; 954 uint8_t *psrc, *pdst; 955 unsigned int part; 956 957 /* 958 * Calculate and set check sum flags first, dword field 959 * in segment may be shared with Software Parser flags. 960 */ 961 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 962 if (tso) { 963 csum <<= 24; 964 csum |= loc->mbuf->tso_segsz; 965 es->flags = rte_cpu_to_be_32(csum); 966 } else { 967 es->flags = rte_cpu_to_le_32(csum); 968 } 969 /* 970 * Calculate and set Software Parser offsets and flags. 971 * These flags a set for custom UDP and IP tunnel packets. 972 */ 973 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 974 /* Fill metadata field if needed. */ 975 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 976 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 977 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 978 0 : 0; 979 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 980 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 981 es->inline_data = *(unaligned_uint16_t *)psrc; 982 psrc += sizeof(uint16_t); 983 pdst = (uint8_t *)(es + 1); 984 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 985 /* Implement VLAN tag insertion as part inline data. */ 986 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 987 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 988 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 989 /* Insert VLAN ethertype + VLAN tag. */ 990 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 991 ((RTE_ETHER_TYPE_VLAN << 16) | 992 loc->mbuf->vlan_tci); 993 pdst += sizeof(struct rte_vlan_hdr); 994 /* Copy the rest two bytes from packet data. */ 995 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 996 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 997 psrc += sizeof(uint16_t); 998 } else { 999 /* Fill the gap in the title WQEBB with inline data. */ 1000 rte_mov16(pdst, psrc); 1001 psrc += sizeof(rte_v128u32_t); 1002 } 1003 pdst = (uint8_t *)(es + 2); 1004 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1005 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1006 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1007 if (!inlen) { 1008 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1009 return (struct mlx5_wqe_dseg *)pdst; 1010 } 1011 /* 1012 * The WQEBB space availability is checked by caller. 1013 * Here we should be aware of WQE ring buffer wraparound only. 1014 */ 1015 part = (uint8_t *)txq->wqes_end - pdst; 1016 part = RTE_MIN(part, inlen); 1017 do { 1018 rte_memcpy(pdst, psrc, part); 1019 inlen -= part; 1020 if (likely(!inlen)) { 1021 /* 1022 * If return value is not used by the caller 1023 * the code below will be optimized out. 1024 */ 1025 pdst += part; 1026 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1027 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1028 pdst = (uint8_t *)txq->wqes; 1029 return (struct mlx5_wqe_dseg *)pdst; 1030 } 1031 pdst = (uint8_t *)txq->wqes; 1032 psrc += part; 1033 part = inlen; 1034 } while (true); 1035 } 1036 1037 /** 1038 * Copy data from chain of mbuf to the specified linear buffer. 1039 * Checksums and VLAN insertion Tx offload features. If data 1040 * from some mbuf copied completely this mbuf is freed. Local 1041 * structure is used to keep the byte stream state. 1042 * 1043 * @param pdst 1044 * Pointer to the destination linear buffer. 1045 * @param loc 1046 * Pointer to burst routine local context. 1047 * @param len 1048 * Length of data to be copied. 1049 * @param must 1050 * Length of data to be copied ignoring no inline hint. 1051 * @param olx 1052 * Configured Tx offloads mask. It is fully defined at 1053 * compile time and may be used for optimization. 1054 * 1055 * @return 1056 * Number of actual copied data bytes. This is always greater than or 1057 * equal to must parameter and might be lesser than len in no inline 1058 * hint flag is encountered. 1059 */ 1060 static __rte_always_inline unsigned int 1061 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1062 struct mlx5_txq_local *__rte_restrict loc, 1063 unsigned int len, 1064 unsigned int must, 1065 unsigned int olx __rte_unused) 1066 { 1067 struct rte_mbuf *mbuf; 1068 unsigned int part, dlen, copy = 0; 1069 uint8_t *psrc; 1070 1071 MLX5_ASSERT(len); 1072 do { 1073 /* Allow zero length packets, must check first. */ 1074 dlen = rte_pktmbuf_data_len(loc->mbuf); 1075 if (dlen <= loc->mbuf_off) { 1076 /* Exhausted packet, just free. */ 1077 mbuf = loc->mbuf; 1078 loc->mbuf = mbuf->next; 1079 rte_pktmbuf_free_seg(mbuf); 1080 loc->mbuf_off = 0; 1081 MLX5_ASSERT(loc->mbuf_nseg > 1); 1082 MLX5_ASSERT(loc->mbuf); 1083 --loc->mbuf_nseg; 1084 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1085 unsigned int diff; 1086 1087 if (copy >= must) { 1088 /* 1089 * We already copied the minimal 1090 * requested amount of data. 1091 */ 1092 return copy; 1093 } 1094 diff = must - copy; 1095 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1096 /* 1097 * Copy only the minimal required 1098 * part of the data buffer. Limit amount 1099 * of data to be copied to the length of 1100 * available space. 1101 */ 1102 len = RTE_MIN(len, diff); 1103 } 1104 } 1105 continue; 1106 } 1107 dlen -= loc->mbuf_off; 1108 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1109 loc->mbuf_off); 1110 part = RTE_MIN(len, dlen); 1111 rte_memcpy(pdst, psrc, part); 1112 copy += part; 1113 loc->mbuf_off += part; 1114 len -= part; 1115 if (!len) { 1116 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1117 loc->mbuf_off = 0; 1118 /* Exhausted packet, just free. */ 1119 mbuf = loc->mbuf; 1120 loc->mbuf = mbuf->next; 1121 rte_pktmbuf_free_seg(mbuf); 1122 loc->mbuf_off = 0; 1123 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1124 --loc->mbuf_nseg; 1125 } 1126 return copy; 1127 } 1128 pdst += part; 1129 } while (true); 1130 } 1131 1132 /** 1133 * Build the Ethernet Segment with inlined data from multi-segment packet. 1134 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1135 * Parser, Checksums and VLAN insertion Tx offload features. 1136 * 1137 * @param txq 1138 * Pointer to TX queue structure. 1139 * @param loc 1140 * Pointer to burst routine local context. 1141 * @param wqe 1142 * Pointer to WQE to fill with built Ethernet Segment. 1143 * @param vlan 1144 * Length of VLAN tag insertion if any. 1145 * @param inlen 1146 * Length of data to inline (VLAN included, if any). 1147 * @param tso 1148 * TSO flag, set mss field from the packet. 1149 * @param olx 1150 * Configured Tx offloads mask. It is fully defined at 1151 * compile time and may be used for optimization. 1152 * 1153 * @return 1154 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1155 * around - caller should do wrapping check on its own). 1156 */ 1157 static __rte_always_inline struct mlx5_wqe_dseg * 1158 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1159 struct mlx5_txq_local *__rte_restrict loc, 1160 struct mlx5_wqe *__rte_restrict wqe, 1161 unsigned int vlan, 1162 unsigned int inlen, 1163 unsigned int tso, 1164 unsigned int olx) 1165 { 1166 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1167 uint32_t csum; 1168 uint8_t *pdst; 1169 unsigned int part, tlen = 0; 1170 1171 /* 1172 * Calculate and set check sum flags first, uint32_t field 1173 * in segment may be shared with Software Parser flags. 1174 */ 1175 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1176 if (tso) { 1177 csum <<= 24; 1178 csum |= loc->mbuf->tso_segsz; 1179 es->flags = rte_cpu_to_be_32(csum); 1180 } else { 1181 es->flags = rte_cpu_to_le_32(csum); 1182 } 1183 /* 1184 * Calculate and set Software Parser offsets and flags. 1185 * These flags a set for custom UDP and IP tunnel packets. 1186 */ 1187 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1188 /* Fill metadata field if needed. */ 1189 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1190 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1191 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1192 0 : 0; 1193 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1194 pdst = (uint8_t *)&es->inline_data; 1195 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1196 /* Implement VLAN tag insertion as part inline data. */ 1197 mlx5_tx_mseg_memcpy(pdst, loc, 1198 2 * RTE_ETHER_ADDR_LEN, 1199 2 * RTE_ETHER_ADDR_LEN, olx); 1200 pdst += 2 * RTE_ETHER_ADDR_LEN; 1201 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1202 ((RTE_ETHER_TYPE_VLAN << 16) | 1203 loc->mbuf->vlan_tci); 1204 pdst += sizeof(struct rte_vlan_hdr); 1205 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1206 } 1207 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1208 /* 1209 * The WQEBB space availability is checked by caller. 1210 * Here we should be aware of WQE ring buffer wraparound only. 1211 */ 1212 part = (uint8_t *)txq->wqes_end - pdst; 1213 part = RTE_MIN(part, inlen - tlen); 1214 MLX5_ASSERT(part); 1215 do { 1216 unsigned int copy; 1217 1218 /* 1219 * Copying may be interrupted inside the routine 1220 * if run into no inline hint flag. 1221 */ 1222 copy = tso ? inlen : txq->inlen_mode; 1223 copy = tlen >= copy ? 0 : (copy - tlen); 1224 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1225 tlen += copy; 1226 if (likely(inlen <= tlen) || copy < part) { 1227 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1228 pdst += copy; 1229 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1230 return (struct mlx5_wqe_dseg *)pdst; 1231 } 1232 pdst = (uint8_t *)txq->wqes; 1233 part = inlen - tlen; 1234 } while (true); 1235 } 1236 1237 /** 1238 * Build the Data Segment of pointer type. 1239 * 1240 * @param txq 1241 * Pointer to TX queue structure. 1242 * @param loc 1243 * Pointer to burst routine local context. 1244 * @param dseg 1245 * Pointer to WQE to fill with built Data Segment. 1246 * @param buf 1247 * Data buffer to point. 1248 * @param len 1249 * Data buffer length. 1250 * @param olx 1251 * Configured Tx offloads mask. It is fully defined at 1252 * compile time and may be used for optimization. 1253 */ 1254 static __rte_always_inline void 1255 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1256 struct mlx5_txq_local *__rte_restrict loc, 1257 struct mlx5_wqe_dseg *__rte_restrict dseg, 1258 uint8_t *buf, 1259 unsigned int len, 1260 unsigned int olx __rte_unused) 1261 1262 { 1263 MLX5_ASSERT(len); 1264 dseg->bcount = rte_cpu_to_be_32(len); 1265 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1266 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1267 } 1268 1269 /** 1270 * Build the Data Segment of pointer type or inline if data length is less than 1271 * buffer in minimal Data Segment size. 1272 * 1273 * @param txq 1274 * Pointer to TX queue structure. 1275 * @param loc 1276 * Pointer to burst routine local context. 1277 * @param dseg 1278 * Pointer to WQE to fill with built Data Segment. 1279 * @param buf 1280 * Data buffer to point. 1281 * @param len 1282 * Data buffer length. 1283 * @param olx 1284 * Configured Tx offloads mask. It is fully defined at 1285 * compile time and may be used for optimization. 1286 */ 1287 static __rte_always_inline void 1288 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1289 struct mlx5_txq_local *__rte_restrict loc, 1290 struct mlx5_wqe_dseg *__rte_restrict dseg, 1291 uint8_t *buf, 1292 unsigned int len, 1293 unsigned int olx __rte_unused) 1294 1295 { 1296 uintptr_t dst, src; 1297 1298 MLX5_ASSERT(len); 1299 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1300 dseg->bcount = rte_cpu_to_be_32(len); 1301 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1302 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1303 1304 return; 1305 } 1306 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1307 /* Unrolled implementation of generic rte_memcpy. */ 1308 dst = (uintptr_t)&dseg->inline_data[0]; 1309 src = (uintptr_t)buf; 1310 if (len & 0x08) { 1311 #ifdef RTE_ARCH_STRICT_ALIGN 1312 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1313 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1314 dst += sizeof(uint32_t); 1315 src += sizeof(uint32_t); 1316 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1317 dst += sizeof(uint32_t); 1318 src += sizeof(uint32_t); 1319 #else 1320 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1321 dst += sizeof(uint64_t); 1322 src += sizeof(uint64_t); 1323 #endif 1324 } 1325 if (len & 0x04) { 1326 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1327 dst += sizeof(uint32_t); 1328 src += sizeof(uint32_t); 1329 } 1330 if (len & 0x02) { 1331 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1332 dst += sizeof(uint16_t); 1333 src += sizeof(uint16_t); 1334 } 1335 if (len & 0x01) 1336 *(uint8_t *)dst = *(uint8_t *)src; 1337 } 1338 1339 /** 1340 * Build the Data Segment of inlined data from single 1341 * segment packet, no VLAN insertion. 1342 * 1343 * @param txq 1344 * Pointer to TX queue structure. 1345 * @param loc 1346 * Pointer to burst routine local context. 1347 * @param dseg 1348 * Pointer to WQE to fill with built Data Segment. 1349 * @param buf 1350 * Data buffer to point. 1351 * @param len 1352 * Data buffer length. 1353 * @param olx 1354 * Configured Tx offloads mask. It is fully defined at 1355 * compile time and may be used for optimization. 1356 * 1357 * @return 1358 * Pointer to the next Data Segment after inlined data. 1359 * Ring buffer wraparound check is needed. We do not do it here because it 1360 * may not be needed for the last packet in the eMPW session. 1361 */ 1362 static __rte_always_inline struct mlx5_wqe_dseg * 1363 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1364 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1365 struct mlx5_wqe_dseg *__rte_restrict dseg, 1366 uint8_t *buf, 1367 unsigned int len, 1368 unsigned int olx __rte_unused) 1369 { 1370 unsigned int part; 1371 uint8_t *pdst; 1372 1373 if (!MLX5_TXOFF_CONFIG(MPW)) { 1374 /* Store the descriptor byte counter for eMPW sessions. */ 1375 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1376 pdst = &dseg->inline_data[0]; 1377 } else { 1378 /* The entire legacy MPW session counter is stored on close. */ 1379 pdst = (uint8_t *)dseg; 1380 } 1381 /* 1382 * The WQEBB space availability is checked by caller. 1383 * Here we should be aware of WQE ring buffer wraparound only. 1384 */ 1385 part = (uint8_t *)txq->wqes_end - pdst; 1386 part = RTE_MIN(part, len); 1387 do { 1388 rte_memcpy(pdst, buf, part); 1389 len -= part; 1390 if (likely(!len)) { 1391 pdst += part; 1392 if (!MLX5_TXOFF_CONFIG(MPW)) 1393 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1394 /* Note: no final wraparound check here. */ 1395 return (struct mlx5_wqe_dseg *)pdst; 1396 } 1397 pdst = (uint8_t *)txq->wqes; 1398 buf += part; 1399 part = len; 1400 } while (true); 1401 } 1402 1403 /** 1404 * Build the Data Segment of inlined data from single 1405 * segment packet with VLAN insertion. 1406 * 1407 * @param txq 1408 * Pointer to TX queue structure. 1409 * @param loc 1410 * Pointer to burst routine local context. 1411 * @param dseg 1412 * Pointer to the dseg fill with built Data Segment. 1413 * @param buf 1414 * Data buffer to point. 1415 * @param len 1416 * Data buffer length. 1417 * @param olx 1418 * Configured Tx offloads mask. It is fully defined at 1419 * compile time and may be used for optimization. 1420 * 1421 * @return 1422 * Pointer to the next Data Segment after inlined data. 1423 * Ring buffer wraparound check is needed. 1424 */ 1425 static __rte_always_inline struct mlx5_wqe_dseg * 1426 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1427 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1428 struct mlx5_wqe_dseg *__rte_restrict dseg, 1429 uint8_t *buf, 1430 unsigned int len, 1431 unsigned int olx __rte_unused) 1432 1433 { 1434 unsigned int part; 1435 uint8_t *pdst; 1436 1437 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1438 if (!MLX5_TXOFF_CONFIG(MPW)) { 1439 /* Store the descriptor byte counter for eMPW sessions. */ 1440 dseg->bcount = rte_cpu_to_be_32 1441 ((len + sizeof(struct rte_vlan_hdr)) | 1442 MLX5_ETH_WQE_DATA_INLINE); 1443 pdst = &dseg->inline_data[0]; 1444 } else { 1445 /* The entire legacy MPW session counter is stored on close. */ 1446 pdst = (uint8_t *)dseg; 1447 } 1448 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1449 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1450 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1451 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1452 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1453 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1454 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1455 pdst = (uint8_t *)txq->wqes; 1456 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1457 loc->mbuf->vlan_tci); 1458 pdst += sizeof(struct rte_vlan_hdr); 1459 /* 1460 * The WQEBB space availability is checked by caller. 1461 * Here we should be aware of WQE ring buffer wraparound only. 1462 */ 1463 part = (uint8_t *)txq->wqes_end - pdst; 1464 part = RTE_MIN(part, len); 1465 do { 1466 rte_memcpy(pdst, buf, part); 1467 len -= part; 1468 if (likely(!len)) { 1469 pdst += part; 1470 if (!MLX5_TXOFF_CONFIG(MPW)) 1471 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1472 /* Note: no final wraparound check here. */ 1473 return (struct mlx5_wqe_dseg *)pdst; 1474 } 1475 pdst = (uint8_t *)txq->wqes; 1476 buf += part; 1477 part = len; 1478 } while (true); 1479 } 1480 1481 /** 1482 * Build the Ethernet Segment with optionally inlined data with 1483 * VLAN insertion and following Data Segments (if any) from 1484 * multi-segment packet. Used by ordinary send and TSO. 1485 * 1486 * @param txq 1487 * Pointer to TX queue structure. 1488 * @param loc 1489 * Pointer to burst routine local context. 1490 * @param wqe 1491 * Pointer to WQE to fill with built Ethernet/Data Segments. 1492 * @param vlan 1493 * Length of VLAN header to insert, 0 means no VLAN insertion. 1494 * @param inlen 1495 * Data length to inline. For TSO this parameter specifies exact value, 1496 * for ordinary send routine can be aligned by caller to provide better WQE 1497 * space saving and data buffer start address alignment. 1498 * This length includes VLAN header being inserted. 1499 * @param tso 1500 * Zero means ordinary send, inlined data can be extended, 1501 * otherwise this is TSO, inlined data length is fixed. 1502 * @param olx 1503 * Configured Tx offloads mask. It is fully defined at 1504 * compile time and may be used for optimization. 1505 * 1506 * @return 1507 * Actual size of built WQE in segments. 1508 */ 1509 static __rte_always_inline unsigned int 1510 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1511 struct mlx5_txq_local *__rte_restrict loc, 1512 struct mlx5_wqe *__rte_restrict wqe, 1513 unsigned int vlan, 1514 unsigned int inlen, 1515 unsigned int tso, 1516 unsigned int olx __rte_unused) 1517 { 1518 struct mlx5_wqe_dseg *__rte_restrict dseg; 1519 unsigned int ds; 1520 1521 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1522 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1523 loc->mbuf_off = 0; 1524 1525 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1526 if (!loc->mbuf_nseg) 1527 goto dseg_done; 1528 /* 1529 * There are still some mbuf remaining, not inlined. 1530 * The first mbuf may be partially inlined and we 1531 * must process the possible non-zero data offset. 1532 */ 1533 if (loc->mbuf_off) { 1534 unsigned int dlen; 1535 uint8_t *dptr; 1536 1537 /* 1538 * Exhausted packets must be dropped before. 1539 * Non-zero offset means there are some data 1540 * remained in the packet. 1541 */ 1542 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1543 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1544 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1545 loc->mbuf_off); 1546 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1547 /* 1548 * Build the pointer/minimal Data Segment. 1549 * Do ring buffer wrapping check in advance. 1550 */ 1551 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1552 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1553 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1554 /* Store the mbuf to be freed on completion. */ 1555 MLX5_ASSERT(loc->elts_free); 1556 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1557 --loc->elts_free; 1558 ++dseg; 1559 if (--loc->mbuf_nseg == 0) 1560 goto dseg_done; 1561 loc->mbuf = loc->mbuf->next; 1562 loc->mbuf_off = 0; 1563 } 1564 do { 1565 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1566 struct rte_mbuf *mbuf; 1567 1568 /* Zero length segment found, just skip. */ 1569 mbuf = loc->mbuf; 1570 loc->mbuf = loc->mbuf->next; 1571 rte_pktmbuf_free_seg(mbuf); 1572 if (--loc->mbuf_nseg == 0) 1573 break; 1574 } else { 1575 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1576 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1577 mlx5_tx_dseg_iptr 1578 (txq, loc, dseg, 1579 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1580 rte_pktmbuf_data_len(loc->mbuf), olx); 1581 MLX5_ASSERT(loc->elts_free); 1582 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1583 --loc->elts_free; 1584 ++dseg; 1585 if (--loc->mbuf_nseg == 0) 1586 break; 1587 loc->mbuf = loc->mbuf->next; 1588 } 1589 } while (true); 1590 1591 dseg_done: 1592 /* Calculate actual segments used from the dseg pointer. */ 1593 if ((uintptr_t)wqe < (uintptr_t)dseg) 1594 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1595 else 1596 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1597 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1598 return ds; 1599 } 1600 1601 /** 1602 * The routine checks timestamp flag in the current packet, 1603 * and push WAIT WQE into the queue if scheduling is required. 1604 * 1605 * @param txq 1606 * Pointer to TX queue structure. 1607 * @param loc 1608 * Pointer to burst routine local context. 1609 * @param olx 1610 * Configured Tx offloads mask. It is fully defined at 1611 * compile time and may be used for optimization. 1612 * 1613 * @return 1614 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1615 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1616 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1617 * Local context variables partially updated. 1618 */ 1619 static __rte_always_inline enum mlx5_txcmp_code 1620 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1621 struct mlx5_txq_local *restrict loc, 1622 unsigned int olx) 1623 { 1624 if (MLX5_TXOFF_CONFIG(TXPP) && 1625 loc->mbuf->ol_flags & txq->ts_mask) { 1626 struct mlx5_wqe *wqe; 1627 uint64_t ts; 1628 int32_t wci; 1629 1630 /* 1631 * Estimate the required space quickly and roughly. 1632 * We would like to ensure the packet can be pushed 1633 * to the queue and we won't get the orphan WAIT WQE. 1634 */ 1635 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1636 loc->elts_free < NB_SEGS(loc->mbuf)) 1637 return MLX5_TXCMP_CODE_EXIT; 1638 /* Convert the timestamp into completion to wait. */ 1639 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1640 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 1641 if (unlikely(wci < 0)) 1642 return MLX5_TXCMP_CODE_SINGLE; 1643 /* Build the WAIT WQE with specified completion. */ 1644 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1645 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 1646 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 1647 ++txq->wqe_ci; 1648 --loc->wqe_free; 1649 return MLX5_TXCMP_CODE_MULTI; 1650 } 1651 return MLX5_TXCMP_CODE_SINGLE; 1652 } 1653 1654 /** 1655 * Tx one packet function for multi-segment TSO. Supports all 1656 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1657 * sends one packet per WQE. 1658 * 1659 * This routine is responsible for storing processed mbuf 1660 * into elts ring buffer and update elts_head. 1661 * 1662 * @param txq 1663 * Pointer to TX queue structure. 1664 * @param loc 1665 * Pointer to burst routine local context. 1666 * @param olx 1667 * Configured Tx offloads mask. It is fully defined at 1668 * compile time and may be used for optimization. 1669 * 1670 * @return 1671 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1672 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1673 * Local context variables partially updated. 1674 */ 1675 static __rte_always_inline enum mlx5_txcmp_code 1676 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1677 struct mlx5_txq_local *__rte_restrict loc, 1678 unsigned int olx) 1679 { 1680 struct mlx5_wqe *__rte_restrict wqe; 1681 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1682 1683 if (MLX5_TXOFF_CONFIG(TXPP)) { 1684 enum mlx5_txcmp_code wret; 1685 1686 /* Generate WAIT for scheduling if requested. */ 1687 wret = mlx5_tx_schedule_send(txq, loc, olx); 1688 if (wret == MLX5_TXCMP_CODE_EXIT) 1689 return MLX5_TXCMP_CODE_EXIT; 1690 if (wret == MLX5_TXCMP_CODE_ERROR) 1691 return MLX5_TXCMP_CODE_ERROR; 1692 } 1693 /* 1694 * Calculate data length to be inlined to estimate 1695 * the required space in WQE ring buffer. 1696 */ 1697 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1698 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1699 vlan = sizeof(struct rte_vlan_hdr); 1700 inlen = loc->mbuf->l2_len + vlan + 1701 loc->mbuf->l3_len + loc->mbuf->l4_len; 1702 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1703 return MLX5_TXCMP_CODE_ERROR; 1704 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1705 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1706 /* Packet must contain all TSO headers. */ 1707 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1708 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1709 inlen > (dlen + vlan))) 1710 return MLX5_TXCMP_CODE_ERROR; 1711 /* 1712 * Check whether there are enough free WQEBBs: 1713 * - Control Segment 1714 * - Ethernet Segment 1715 * - First Segment of inlined Ethernet data 1716 * - ... data continued ... 1717 * - Data Segments of pointer/min inline type 1718 */ 1719 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1720 MLX5_ESEG_MIN_INLINE_SIZE + 1721 MLX5_WSEG_SIZE + 1722 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1723 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1724 return MLX5_TXCMP_CODE_EXIT; 1725 /* Check for maximal WQE size. */ 1726 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1727 return MLX5_TXCMP_CODE_ERROR; 1728 #ifdef MLX5_PMD_SOFT_COUNTERS 1729 /* Update sent data bytes/packets counters. */ 1730 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1731 loc->mbuf->tso_segsz; 1732 /* 1733 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1734 * from loc->pkts_sent field. 1735 */ 1736 --ntcp; 1737 txq->stats.opackets += ntcp; 1738 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1739 #endif 1740 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1741 loc->wqe_last = wqe; 1742 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1743 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1744 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1745 txq->wqe_ci += (ds + 3) / 4; 1746 loc->wqe_free -= (ds + 3) / 4; 1747 return MLX5_TXCMP_CODE_MULTI; 1748 } 1749 1750 /** 1751 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1752 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1753 * without any data inlining in Ethernet Segment. 1754 * 1755 * This routine is responsible for storing processed mbuf 1756 * into elts ring buffer and update elts_head. 1757 * 1758 * @param txq 1759 * Pointer to TX queue structure. 1760 * @param loc 1761 * Pointer to burst routine local context. 1762 * @param olx 1763 * Configured Tx offloads mask. It is fully defined at 1764 * compile time and may be used for optimization. 1765 * 1766 * @return 1767 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1768 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1769 * Local context variables partially updated. 1770 */ 1771 static __rte_always_inline enum mlx5_txcmp_code 1772 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1773 struct mlx5_txq_local *__rte_restrict loc, 1774 unsigned int olx) 1775 { 1776 struct mlx5_wqe_dseg *__rte_restrict dseg; 1777 struct mlx5_wqe *__rte_restrict wqe; 1778 unsigned int ds, nseg; 1779 1780 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1781 if (MLX5_TXOFF_CONFIG(TXPP)) { 1782 enum mlx5_txcmp_code wret; 1783 1784 /* Generate WAIT for scheduling if requested. */ 1785 wret = mlx5_tx_schedule_send(txq, loc, olx); 1786 if (wret == MLX5_TXCMP_CODE_EXIT) 1787 return MLX5_TXCMP_CODE_EXIT; 1788 if (wret == MLX5_TXCMP_CODE_ERROR) 1789 return MLX5_TXCMP_CODE_ERROR; 1790 } 1791 /* 1792 * No inline at all, it means the CPU cycles saving is prioritized at 1793 * configuration, we should not copy any packet data to WQE. 1794 */ 1795 nseg = NB_SEGS(loc->mbuf); 1796 ds = 2 + nseg; 1797 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1798 return MLX5_TXCMP_CODE_EXIT; 1799 /* Check for maximal WQE size. */ 1800 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1801 return MLX5_TXCMP_CODE_ERROR; 1802 /* 1803 * Some Tx offloads may cause an error if packet is not long enough, 1804 * check against assumed minimal length. 1805 */ 1806 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1807 return MLX5_TXCMP_CODE_ERROR; 1808 #ifdef MLX5_PMD_SOFT_COUNTERS 1809 /* Update sent data bytes counter. */ 1810 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1811 if (MLX5_TXOFF_CONFIG(VLAN) && 1812 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1813 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1814 #endif 1815 /* 1816 * SEND WQE, one WQEBB: 1817 * - Control Segment, SEND opcode 1818 * - Ethernet Segment, optional VLAN, no inline 1819 * - Data Segments, pointer only type 1820 */ 1821 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1822 loc->wqe_last = wqe; 1823 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1824 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1825 dseg = &wqe->dseg[0]; 1826 do { 1827 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1828 struct rte_mbuf *mbuf; 1829 1830 /* 1831 * Zero length segment found, have to correct total 1832 * size of WQE in segments. 1833 * It is supposed to be rare occasion, so in normal 1834 * case (no zero length segments) we avoid extra 1835 * writing to the Control Segment. 1836 */ 1837 --ds; 1838 wqe->cseg.sq_ds -= RTE_BE32(1); 1839 mbuf = loc->mbuf; 1840 loc->mbuf = mbuf->next; 1841 rte_pktmbuf_free_seg(mbuf); 1842 if (--nseg == 0) 1843 break; 1844 } else { 1845 mlx5_tx_dseg_ptr 1846 (txq, loc, dseg, 1847 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1848 rte_pktmbuf_data_len(loc->mbuf), olx); 1849 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1850 --loc->elts_free; 1851 if (--nseg == 0) 1852 break; 1853 ++dseg; 1854 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1855 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1856 loc->mbuf = loc->mbuf->next; 1857 } 1858 } while (true); 1859 txq->wqe_ci += (ds + 3) / 4; 1860 loc->wqe_free -= (ds + 3) / 4; 1861 return MLX5_TXCMP_CODE_MULTI; 1862 } 1863 1864 /** 1865 * Tx one packet function for multi-segment SEND. Supports all 1866 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1867 * sends one packet per WQE, with data inlining in 1868 * Ethernet Segment and minimal Data Segments. 1869 * 1870 * This routine is responsible for storing processed mbuf 1871 * into elts ring buffer and update elts_head. 1872 * 1873 * @param txq 1874 * Pointer to TX queue structure. 1875 * @param loc 1876 * Pointer to burst routine local context. 1877 * @param olx 1878 * Configured Tx offloads mask. It is fully defined at 1879 * compile time and may be used for optimization. 1880 * 1881 * @return 1882 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1883 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1884 * Local context variables partially updated. 1885 */ 1886 static __rte_always_inline enum mlx5_txcmp_code 1887 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 1888 struct mlx5_txq_local *__rte_restrict loc, 1889 unsigned int olx) 1890 { 1891 struct mlx5_wqe *__rte_restrict wqe; 1892 unsigned int ds, inlen, dlen, vlan = 0; 1893 1894 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 1895 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1896 if (MLX5_TXOFF_CONFIG(TXPP)) { 1897 enum mlx5_txcmp_code wret; 1898 1899 /* Generate WAIT for scheduling if requested. */ 1900 wret = mlx5_tx_schedule_send(txq, loc, olx); 1901 if (wret == MLX5_TXCMP_CODE_EXIT) 1902 return MLX5_TXCMP_CODE_EXIT; 1903 if (wret == MLX5_TXCMP_CODE_ERROR) 1904 return MLX5_TXCMP_CODE_ERROR; 1905 } 1906 /* 1907 * First calculate data length to be inlined 1908 * to estimate the required space for WQE. 1909 */ 1910 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1911 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1912 vlan = sizeof(struct rte_vlan_hdr); 1913 inlen = dlen + vlan; 1914 /* Check against minimal length. */ 1915 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 1916 return MLX5_TXCMP_CODE_ERROR; 1917 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1918 if (inlen > txq->inlen_send || 1919 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1920 struct rte_mbuf *mbuf; 1921 unsigned int nxlen; 1922 uintptr_t start; 1923 1924 mbuf = loc->mbuf; 1925 nxlen = rte_pktmbuf_data_len(mbuf); 1926 /* 1927 * Packet length exceeds the allowed inline data length, 1928 * check whether the minimal inlining is required. 1929 */ 1930 if (txq->inlen_mode) { 1931 MLX5_ASSERT(txq->inlen_mode >= 1932 MLX5_ESEG_MIN_INLINE_SIZE); 1933 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 1934 inlen = RTE_MIN(txq->inlen_mode, inlen); 1935 } else if (vlan && !txq->vlan_en) { 1936 /* 1937 * VLAN insertion is requested and hardware does not 1938 * support the offload, will do with software inline. 1939 */ 1940 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 1941 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 1942 nxlen > txq->inlen_send) { 1943 return mlx5_tx_packet_multi_send(txq, loc, olx); 1944 } else { 1945 goto do_first; 1946 } 1947 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 1948 goto do_build; 1949 /* 1950 * Now we know the minimal amount of data is requested 1951 * to inline. Check whether we should inline the buffers 1952 * from the chain beginning to eliminate some mbufs. 1953 */ 1954 if (unlikely(nxlen <= txq->inlen_send)) { 1955 /* We can inline first mbuf at least. */ 1956 if (nxlen < inlen) { 1957 unsigned int smlen; 1958 1959 /* Scan mbufs till inlen filled. */ 1960 do { 1961 smlen = nxlen; 1962 mbuf = NEXT(mbuf); 1963 MLX5_ASSERT(mbuf); 1964 nxlen = rte_pktmbuf_data_len(mbuf); 1965 nxlen += smlen; 1966 } while (unlikely(nxlen < inlen)); 1967 if (unlikely(nxlen > txq->inlen_send)) { 1968 /* We cannot inline entire mbuf. */ 1969 smlen = inlen - smlen; 1970 start = rte_pktmbuf_mtod_offset 1971 (mbuf, uintptr_t, smlen); 1972 goto do_align; 1973 } 1974 } 1975 do_first: 1976 do { 1977 inlen = nxlen; 1978 mbuf = NEXT(mbuf); 1979 /* There should be not end of packet. */ 1980 MLX5_ASSERT(mbuf); 1981 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 1982 break; 1983 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 1984 } while (unlikely(nxlen < txq->inlen_send)); 1985 } 1986 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 1987 /* 1988 * Check whether we can do inline to align start 1989 * address of data buffer to cacheline. 1990 */ 1991 do_align: 1992 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 1993 if (unlikely(start)) { 1994 start += inlen; 1995 if (start <= txq->inlen_send) 1996 inlen = start; 1997 } 1998 } 1999 /* 2000 * Check whether there are enough free WQEBBs: 2001 * - Control Segment 2002 * - Ethernet Segment 2003 * - First Segment of inlined Ethernet data 2004 * - ... data continued ... 2005 * - Data Segments of pointer/min inline type 2006 * 2007 * Estimate the number of Data Segments conservatively, 2008 * supposing no any mbufs is being freed during inlining. 2009 */ 2010 do_build: 2011 MLX5_ASSERT(inlen <= txq->inlen_send); 2012 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2013 MLX5_ESEG_MIN_INLINE_SIZE + 2014 MLX5_WSEG_SIZE + 2015 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2016 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2017 return MLX5_TXCMP_CODE_EXIT; 2018 /* Check for maximal WQE size. */ 2019 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 2020 return MLX5_TXCMP_CODE_ERROR; 2021 #ifdef MLX5_PMD_SOFT_COUNTERS 2022 /* Update sent data bytes/packets counters. */ 2023 txq->stats.obytes += dlen + vlan; 2024 #endif 2025 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2026 loc->wqe_last = wqe; 2027 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2028 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2029 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2030 txq->wqe_ci += (ds + 3) / 4; 2031 loc->wqe_free -= (ds + 3) / 4; 2032 return MLX5_TXCMP_CODE_MULTI; 2033 } 2034 2035 /** 2036 * Tx burst function for multi-segment packets. Supports all 2037 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2038 * sends one packet per WQE. Function stops sending if it 2039 * encounters the single-segment packet. 2040 * 2041 * This routine is responsible for storing processed mbuf 2042 * into elts ring buffer and update elts_head. 2043 * 2044 * @param txq 2045 * Pointer to TX queue structure. 2046 * @param[in] pkts 2047 * Packets to transmit. 2048 * @param pkts_n 2049 * Number of packets in array. 2050 * @param loc 2051 * Pointer to burst routine local context. 2052 * @param olx 2053 * Configured Tx offloads mask. It is fully defined at 2054 * compile time and may be used for optimization. 2055 * 2056 * @return 2057 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2058 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2059 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2060 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2061 * Local context variables updated. 2062 */ 2063 static __rte_always_inline enum mlx5_txcmp_code 2064 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2065 struct rte_mbuf **__rte_restrict pkts, 2066 unsigned int pkts_n, 2067 struct mlx5_txq_local *__rte_restrict loc, 2068 unsigned int olx) 2069 { 2070 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2071 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2072 pkts += loc->pkts_sent + 1; 2073 pkts_n -= loc->pkts_sent; 2074 for (;;) { 2075 enum mlx5_txcmp_code ret; 2076 2077 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2078 /* 2079 * Estimate the number of free elts quickly but conservatively. 2080 * Some segment may be fully inlined and freed, 2081 * ignore this here - precise estimation is costly. 2082 */ 2083 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2084 return MLX5_TXCMP_CODE_EXIT; 2085 if (MLX5_TXOFF_CONFIG(TSO) && 2086 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2087 /* Proceed with multi-segment TSO. */ 2088 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2089 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2090 /* Proceed with multi-segment SEND with inlining. */ 2091 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2092 } else { 2093 /* Proceed with multi-segment SEND w/o inlining. */ 2094 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2095 } 2096 if (ret == MLX5_TXCMP_CODE_EXIT) 2097 return MLX5_TXCMP_CODE_EXIT; 2098 if (ret == MLX5_TXCMP_CODE_ERROR) 2099 return MLX5_TXCMP_CODE_ERROR; 2100 /* WQE is built, go to the next packet. */ 2101 ++loc->pkts_sent; 2102 --pkts_n; 2103 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2104 return MLX5_TXCMP_CODE_EXIT; 2105 loc->mbuf = *pkts++; 2106 if (pkts_n > 1) 2107 rte_prefetch0(*pkts); 2108 if (likely(NB_SEGS(loc->mbuf) > 1)) 2109 continue; 2110 /* Here ends the series of multi-segment packets. */ 2111 if (MLX5_TXOFF_CONFIG(TSO) && 2112 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2113 return MLX5_TXCMP_CODE_TSO; 2114 return MLX5_TXCMP_CODE_SINGLE; 2115 } 2116 MLX5_ASSERT(false); 2117 } 2118 2119 /** 2120 * Tx burst function for single-segment packets with TSO. 2121 * Supports all types of Tx offloads, except multi-packets. 2122 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2123 * Function stops sending if it encounters the multi-segment 2124 * packet or packet without TSO requested. 2125 * 2126 * The routine is responsible for storing processed mbuf into elts ring buffer 2127 * and update elts_head if inline offloads is requested due to possible early 2128 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2129 * 2130 * @param txq 2131 * Pointer to TX queue structure. 2132 * @param[in] pkts 2133 * Packets to transmit. 2134 * @param pkts_n 2135 * Number of packets in array. 2136 * @param loc 2137 * Pointer to burst routine local context. 2138 * @param olx 2139 * Configured Tx offloads mask. It is fully defined at 2140 * compile time and may be used for optimization. 2141 * 2142 * @return 2143 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2144 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2145 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2146 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2147 * Local context variables updated. 2148 */ 2149 static __rte_always_inline enum mlx5_txcmp_code 2150 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2151 struct rte_mbuf **__rte_restrict pkts, 2152 unsigned int pkts_n, 2153 struct mlx5_txq_local *__rte_restrict loc, 2154 unsigned int olx) 2155 { 2156 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2157 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2158 pkts += loc->pkts_sent + 1; 2159 pkts_n -= loc->pkts_sent; 2160 for (;;) { 2161 struct mlx5_wqe_dseg *__rte_restrict dseg; 2162 struct mlx5_wqe *__rte_restrict wqe; 2163 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2164 uint8_t *dptr; 2165 2166 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2167 if (MLX5_TXOFF_CONFIG(TXPP)) { 2168 enum mlx5_txcmp_code wret; 2169 2170 /* Generate WAIT for scheduling if requested. */ 2171 wret = mlx5_tx_schedule_send(txq, loc, olx); 2172 if (wret == MLX5_TXCMP_CODE_EXIT) 2173 return MLX5_TXCMP_CODE_EXIT; 2174 if (wret == MLX5_TXCMP_CODE_ERROR) 2175 return MLX5_TXCMP_CODE_ERROR; 2176 } 2177 dlen = rte_pktmbuf_data_len(loc->mbuf); 2178 if (MLX5_TXOFF_CONFIG(VLAN) && 2179 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2180 vlan = sizeof(struct rte_vlan_hdr); 2181 } 2182 /* 2183 * First calculate the WQE size to check 2184 * whether we have enough space in ring buffer. 2185 */ 2186 hlen = loc->mbuf->l2_len + vlan + 2187 loc->mbuf->l3_len + loc->mbuf->l4_len; 2188 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2189 return MLX5_TXCMP_CODE_ERROR; 2190 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2191 hlen += loc->mbuf->outer_l2_len + 2192 loc->mbuf->outer_l3_len; 2193 /* Segment must contain all TSO headers. */ 2194 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2195 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2196 hlen > (dlen + vlan))) 2197 return MLX5_TXCMP_CODE_ERROR; 2198 /* 2199 * Check whether there are enough free WQEBBs: 2200 * - Control Segment 2201 * - Ethernet Segment 2202 * - First Segment of inlined Ethernet data 2203 * - ... data continued ... 2204 * - Finishing Data Segment of pointer type 2205 */ 2206 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2207 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2208 if (loc->wqe_free < ((ds + 3) / 4)) 2209 return MLX5_TXCMP_CODE_EXIT; 2210 #ifdef MLX5_PMD_SOFT_COUNTERS 2211 /* Update sent data bytes/packets counters. */ 2212 ntcp = (dlen + vlan - hlen + 2213 loc->mbuf->tso_segsz - 1) / 2214 loc->mbuf->tso_segsz; 2215 /* 2216 * One will be added for mbuf itself at the end 2217 * of the mlx5_tx_burst from loc->pkts_sent field. 2218 */ 2219 --ntcp; 2220 txq->stats.opackets += ntcp; 2221 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2222 #endif 2223 /* 2224 * Build the TSO WQE: 2225 * - Control Segment 2226 * - Ethernet Segment with hlen bytes inlined 2227 * - Data Segment of pointer type 2228 */ 2229 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2230 loc->wqe_last = wqe; 2231 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2232 MLX5_OPCODE_TSO, olx); 2233 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2234 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2235 dlen -= hlen - vlan; 2236 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2237 /* 2238 * WQE is built, update the loop parameters 2239 * and go to the next packet. 2240 */ 2241 txq->wqe_ci += (ds + 3) / 4; 2242 loc->wqe_free -= (ds + 3) / 4; 2243 if (MLX5_TXOFF_CONFIG(INLINE)) 2244 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2245 --loc->elts_free; 2246 ++loc->pkts_sent; 2247 --pkts_n; 2248 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2249 return MLX5_TXCMP_CODE_EXIT; 2250 loc->mbuf = *pkts++; 2251 if (pkts_n > 1) 2252 rte_prefetch0(*pkts); 2253 if (MLX5_TXOFF_CONFIG(MULTI) && 2254 unlikely(NB_SEGS(loc->mbuf) > 1)) 2255 return MLX5_TXCMP_CODE_MULTI; 2256 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2257 return MLX5_TXCMP_CODE_SINGLE; 2258 /* Continue with the next TSO packet. */ 2259 } 2260 MLX5_ASSERT(false); 2261 } 2262 2263 /** 2264 * Analyze the packet and select the best method to send. 2265 * 2266 * @param txq 2267 * Pointer to TX queue structure. 2268 * @param loc 2269 * Pointer to burst routine local context. 2270 * @param olx 2271 * Configured Tx offloads mask. It is fully defined at 2272 * compile time and may be used for optimization. 2273 * @param newp 2274 * The predefined flag whether do complete check for 2275 * multi-segment packets and TSO. 2276 * 2277 * @return 2278 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2279 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2280 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2281 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2282 */ 2283 static __rte_always_inline enum mlx5_txcmp_code 2284 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2285 struct mlx5_txq_local *__rte_restrict loc, 2286 unsigned int olx, 2287 bool newp) 2288 { 2289 /* Check for multi-segment packet. */ 2290 if (newp && 2291 MLX5_TXOFF_CONFIG(MULTI) && 2292 unlikely(NB_SEGS(loc->mbuf) > 1)) 2293 return MLX5_TXCMP_CODE_MULTI; 2294 /* Check for TSO packet. */ 2295 if (newp && 2296 MLX5_TXOFF_CONFIG(TSO) && 2297 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2298 return MLX5_TXCMP_CODE_TSO; 2299 /* Check if eMPW is enabled at all. */ 2300 if (!MLX5_TXOFF_CONFIG(EMPW)) 2301 return MLX5_TXCMP_CODE_SINGLE; 2302 /* Check if eMPW can be engaged. */ 2303 if (MLX5_TXOFF_CONFIG(VLAN) && 2304 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2305 (!MLX5_TXOFF_CONFIG(INLINE) || 2306 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2307 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2308 /* 2309 * eMPW does not support VLAN insertion offload, we have to 2310 * inline the entire packet but packet is too long for inlining. 2311 */ 2312 return MLX5_TXCMP_CODE_SINGLE; 2313 } 2314 return MLX5_TXCMP_CODE_EMPW; 2315 } 2316 2317 /** 2318 * Check the next packet attributes to match with the eMPW batch ones. 2319 * In addition, for legacy MPW the packet length is checked either. 2320 * 2321 * @param txq 2322 * Pointer to TX queue structure. 2323 * @param es 2324 * Pointer to Ethernet Segment of eMPW batch. 2325 * @param loc 2326 * Pointer to burst routine local context. 2327 * @param dlen 2328 * Length of previous packet in MPW descriptor. 2329 * @param olx 2330 * Configured Tx offloads mask. It is fully defined at 2331 * compile time and may be used for optimization. 2332 * 2333 * @return 2334 * true - packet match with eMPW batch attributes. 2335 * false - no match, eMPW should be restarted. 2336 */ 2337 static __rte_always_inline bool 2338 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2339 struct mlx5_wqe_eseg *__rte_restrict es, 2340 struct mlx5_txq_local *__rte_restrict loc, 2341 uint32_t dlen, 2342 unsigned int olx) 2343 { 2344 uint8_t swp_flags = 0; 2345 2346 /* Compare the checksum flags, if any. */ 2347 if (MLX5_TXOFF_CONFIG(CSUM) && 2348 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2349 return false; 2350 /* Compare the Software Parser offsets and flags. */ 2351 if (MLX5_TXOFF_CONFIG(SWP) && 2352 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2353 es->swp_flags != swp_flags)) 2354 return false; 2355 /* Fill metadata field if needed. */ 2356 if (MLX5_TXOFF_CONFIG(METADATA) && 2357 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2358 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2359 return false; 2360 /* Legacy MPW can send packets with the same length only. */ 2361 if (MLX5_TXOFF_CONFIG(MPW) && 2362 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2363 return false; 2364 /* There must be no VLAN packets in eMPW loop. */ 2365 if (MLX5_TXOFF_CONFIG(VLAN)) 2366 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2367 /* Check if the scheduling is requested. */ 2368 if (MLX5_TXOFF_CONFIG(TXPP) && 2369 loc->mbuf->ol_flags & txq->ts_mask) 2370 return false; 2371 return true; 2372 } 2373 2374 /** 2375 * Update send loop variables and WQE for eMPW loop without data inlining. 2376 * Number of Data Segments is equal to the number of sent packets. 2377 * 2378 * @param txq 2379 * Pointer to TX queue structure. 2380 * @param loc 2381 * Pointer to burst routine local context. 2382 * @param ds 2383 * Number of packets/Data Segments/Packets. 2384 * @param slen 2385 * Accumulated statistics, bytes sent. 2386 * @param olx 2387 * Configured Tx offloads mask. It is fully defined at 2388 * compile time and may be used for optimization. 2389 * 2390 * @return 2391 * true - packet match with eMPW batch attributes. 2392 * false - no match, eMPW should be restarted. 2393 */ 2394 static __rte_always_inline void 2395 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2396 struct mlx5_txq_local *__rte_restrict loc, 2397 unsigned int ds, 2398 unsigned int slen, 2399 unsigned int olx __rte_unused) 2400 { 2401 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2402 #ifdef MLX5_PMD_SOFT_COUNTERS 2403 /* Update sent data bytes counter. */ 2404 txq->stats.obytes += slen; 2405 #else 2406 (void)slen; 2407 #endif 2408 loc->elts_free -= ds; 2409 loc->pkts_sent += ds; 2410 ds += 2; 2411 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2412 txq->wqe_ci += (ds + 3) / 4; 2413 loc->wqe_free -= (ds + 3) / 4; 2414 } 2415 2416 /** 2417 * Update send loop variables and WQE for eMPW loop with data inlining. 2418 * Gets the size of pushed descriptors and data to the WQE. 2419 * 2420 * @param txq 2421 * Pointer to TX queue structure. 2422 * @param loc 2423 * Pointer to burst routine local context. 2424 * @param len 2425 * Total size of descriptor/data in bytes. 2426 * @param slen 2427 * Accumulated statistics, data bytes sent. 2428 * @param wqem 2429 * The base WQE for the eMPW/MPW descriptor. 2430 * @param olx 2431 * Configured Tx offloads mask. It is fully defined at 2432 * compile time and may be used for optimization. 2433 * 2434 * @return 2435 * true - packet match with eMPW batch attributes. 2436 * false - no match, eMPW should be restarted. 2437 */ 2438 static __rte_always_inline void 2439 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2440 struct mlx5_txq_local *__rte_restrict loc, 2441 unsigned int len, 2442 unsigned int slen, 2443 struct mlx5_wqe *__rte_restrict wqem, 2444 unsigned int olx __rte_unused) 2445 { 2446 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2447 2448 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2449 #ifdef MLX5_PMD_SOFT_COUNTERS 2450 /* Update sent data bytes counter. */ 2451 txq->stats.obytes += slen; 2452 #else 2453 (void)slen; 2454 #endif 2455 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2456 /* 2457 * If the legacy MPW session contains the inline packets 2458 * we should set the only inline data segment length 2459 * and align the total length to the segment size. 2460 */ 2461 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2462 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2463 MLX5_ETH_WQE_DATA_INLINE); 2464 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2465 } else { 2466 /* 2467 * The session is not legacy MPW or contains the 2468 * data buffer pointer segments. 2469 */ 2470 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2471 len = len / MLX5_WSEG_SIZE + 2; 2472 } 2473 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2474 txq->wqe_ci += (len + 3) / 4; 2475 loc->wqe_free -= (len + 3) / 4; 2476 loc->wqe_last = wqem; 2477 } 2478 2479 /** 2480 * The set of Tx burst functions for single-segment packets without TSO 2481 * and with Multi-Packet Writing feature support. 2482 * Supports all types of Tx offloads, except multi-packets and TSO. 2483 * 2484 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2485 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2486 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2487 * placed in WQE. 2488 * 2489 * Functions stop sending if it encounters the multi-segment packet or packet 2490 * with TSO requested. 2491 * 2492 * The routines are responsible for storing processed mbuf into elts ring buffer 2493 * and update elts_head if inlining offload is requested. Otherwise the copying 2494 * mbufs to elts can be postponed and completed at the end of burst routine. 2495 * 2496 * @param txq 2497 * Pointer to TX queue structure. 2498 * @param[in] pkts 2499 * Packets to transmit. 2500 * @param pkts_n 2501 * Number of packets in array. 2502 * @param loc 2503 * Pointer to burst routine local context. 2504 * @param olx 2505 * Configured Tx offloads mask. It is fully defined at 2506 * compile time and may be used for optimization. 2507 * 2508 * @return 2509 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2510 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2511 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2512 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2513 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2514 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2515 * 2516 * Local context variables updated. 2517 * 2518 * 2519 * The routine sends packets with MLX5_OPCODE_EMPW 2520 * without inlining, this is dedicated optimized branch. 2521 * No VLAN insertion is supported. 2522 */ 2523 static __rte_always_inline enum mlx5_txcmp_code 2524 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2525 struct rte_mbuf **__rte_restrict pkts, 2526 unsigned int pkts_n, 2527 struct mlx5_txq_local *__rte_restrict loc, 2528 unsigned int olx) 2529 { 2530 /* 2531 * Subroutine is the part of mlx5_tx_burst_single() and sends 2532 * single-segment packet with eMPW opcode without data inlining. 2533 */ 2534 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2535 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2536 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2537 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2538 pkts += loc->pkts_sent + 1; 2539 pkts_n -= loc->pkts_sent; 2540 for (;;) { 2541 struct mlx5_wqe_dseg *__rte_restrict dseg; 2542 struct mlx5_wqe_eseg *__rte_restrict eseg; 2543 enum mlx5_txcmp_code ret; 2544 unsigned int part, loop; 2545 unsigned int slen = 0; 2546 2547 next_empw: 2548 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2549 if (MLX5_TXOFF_CONFIG(TXPP)) { 2550 enum mlx5_txcmp_code wret; 2551 2552 /* Generate WAIT for scheduling if requested. */ 2553 wret = mlx5_tx_schedule_send(txq, loc, olx); 2554 if (wret == MLX5_TXCMP_CODE_EXIT) 2555 return MLX5_TXCMP_CODE_EXIT; 2556 if (wret == MLX5_TXCMP_CODE_ERROR) 2557 return MLX5_TXCMP_CODE_ERROR; 2558 } 2559 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2560 MLX5_MPW_MAX_PACKETS : 2561 MLX5_EMPW_MAX_PACKETS); 2562 if (unlikely(loc->elts_free < part)) { 2563 /* We have no enough elts to save all mbufs. */ 2564 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2565 return MLX5_TXCMP_CODE_EXIT; 2566 /* But we still able to send at least minimal eMPW. */ 2567 part = loc->elts_free; 2568 } 2569 /* Check whether we have enough WQEs */ 2570 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2571 if (unlikely(loc->wqe_free < 2572 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2573 return MLX5_TXCMP_CODE_EXIT; 2574 part = (loc->wqe_free * 4) - 2; 2575 } 2576 if (likely(part > 1)) 2577 rte_prefetch0(*pkts); 2578 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2579 /* 2580 * Build eMPW title WQEBB: 2581 * - Control Segment, eMPW opcode 2582 * - Ethernet Segment, no inline 2583 */ 2584 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2585 MLX5_OPCODE_ENHANCED_MPSW, olx); 2586 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2587 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2588 eseg = &loc->wqe_last->eseg; 2589 dseg = &loc->wqe_last->dseg[0]; 2590 loop = part; 2591 /* Store the packet length for legacy MPW. */ 2592 if (MLX5_TXOFF_CONFIG(MPW)) 2593 eseg->mss = rte_cpu_to_be_16 2594 (rte_pktmbuf_data_len(loc->mbuf)); 2595 for (;;) { 2596 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2597 #ifdef MLX5_PMD_SOFT_COUNTERS 2598 /* Update sent data bytes counter. */ 2599 slen += dlen; 2600 #endif 2601 mlx5_tx_dseg_ptr 2602 (txq, loc, dseg, 2603 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2604 dlen, olx); 2605 if (unlikely(--loop == 0)) 2606 break; 2607 loc->mbuf = *pkts++; 2608 if (likely(loop > 1)) 2609 rte_prefetch0(*pkts); 2610 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2611 /* 2612 * Unroll the completion code to avoid 2613 * returning variable value - it results in 2614 * unoptimized sequent checking in caller. 2615 */ 2616 if (ret == MLX5_TXCMP_CODE_MULTI) { 2617 part -= loop; 2618 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2619 if (unlikely(!loc->elts_free || 2620 !loc->wqe_free)) 2621 return MLX5_TXCMP_CODE_EXIT; 2622 return MLX5_TXCMP_CODE_MULTI; 2623 } 2624 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2625 if (ret == MLX5_TXCMP_CODE_TSO) { 2626 part -= loop; 2627 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2628 if (unlikely(!loc->elts_free || 2629 !loc->wqe_free)) 2630 return MLX5_TXCMP_CODE_EXIT; 2631 return MLX5_TXCMP_CODE_TSO; 2632 } 2633 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2634 part -= loop; 2635 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2636 if (unlikely(!loc->elts_free || 2637 !loc->wqe_free)) 2638 return MLX5_TXCMP_CODE_EXIT; 2639 return MLX5_TXCMP_CODE_SINGLE; 2640 } 2641 if (ret != MLX5_TXCMP_CODE_EMPW) { 2642 MLX5_ASSERT(false); 2643 part -= loop; 2644 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2645 return MLX5_TXCMP_CODE_ERROR; 2646 } 2647 /* 2648 * Check whether packet parameters coincide 2649 * within assumed eMPW batch: 2650 * - check sum settings 2651 * - metadata value 2652 * - software parser settings 2653 * - packets length (legacy MPW only) 2654 * - scheduling is not required 2655 */ 2656 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2657 MLX5_ASSERT(loop); 2658 part -= loop; 2659 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2660 if (unlikely(!loc->elts_free || 2661 !loc->wqe_free)) 2662 return MLX5_TXCMP_CODE_EXIT; 2663 pkts_n -= part; 2664 goto next_empw; 2665 } 2666 /* Packet attributes match, continue the same eMPW. */ 2667 ++dseg; 2668 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2669 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2670 } 2671 /* eMPW is built successfully, update loop parameters. */ 2672 MLX5_ASSERT(!loop); 2673 MLX5_ASSERT(pkts_n >= part); 2674 #ifdef MLX5_PMD_SOFT_COUNTERS 2675 /* Update sent data bytes counter. */ 2676 txq->stats.obytes += slen; 2677 #endif 2678 loc->elts_free -= part; 2679 loc->pkts_sent += part; 2680 txq->wqe_ci += (2 + part + 3) / 4; 2681 loc->wqe_free -= (2 + part + 3) / 4; 2682 pkts_n -= part; 2683 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2684 return MLX5_TXCMP_CODE_EXIT; 2685 loc->mbuf = *pkts++; 2686 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2687 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2688 return ret; 2689 /* Continue sending eMPW batches. */ 2690 } 2691 MLX5_ASSERT(false); 2692 } 2693 2694 /** 2695 * The routine sends packets with MLX5_OPCODE_EMPW 2696 * with inlining, optionally supports VLAN insertion. 2697 */ 2698 static __rte_always_inline enum mlx5_txcmp_code 2699 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2700 struct rte_mbuf **__rte_restrict pkts, 2701 unsigned int pkts_n, 2702 struct mlx5_txq_local *__rte_restrict loc, 2703 unsigned int olx) 2704 { 2705 /* 2706 * Subroutine is the part of mlx5_tx_burst_single() and sends 2707 * single-segment packet with eMPW opcode with data inlining. 2708 */ 2709 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2710 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2711 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2712 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2713 pkts += loc->pkts_sent + 1; 2714 pkts_n -= loc->pkts_sent; 2715 for (;;) { 2716 struct mlx5_wqe_dseg *__rte_restrict dseg; 2717 struct mlx5_wqe *__rte_restrict wqem; 2718 enum mlx5_txcmp_code ret; 2719 unsigned int room, part, nlim; 2720 unsigned int slen = 0; 2721 2722 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2723 if (MLX5_TXOFF_CONFIG(TXPP)) { 2724 enum mlx5_txcmp_code wret; 2725 2726 /* Generate WAIT for scheduling if requested. */ 2727 wret = mlx5_tx_schedule_send(txq, loc, olx); 2728 if (wret == MLX5_TXCMP_CODE_EXIT) 2729 return MLX5_TXCMP_CODE_EXIT; 2730 if (wret == MLX5_TXCMP_CODE_ERROR) 2731 return MLX5_TXCMP_CODE_ERROR; 2732 } 2733 /* 2734 * Limits the amount of packets in one WQE 2735 * to improve CQE latency generation. 2736 */ 2737 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2738 MLX5_MPW_INLINE_MAX_PACKETS : 2739 MLX5_EMPW_MAX_PACKETS); 2740 /* Check whether we have minimal amount WQEs */ 2741 if (unlikely(loc->wqe_free < 2742 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2743 return MLX5_TXCMP_CODE_EXIT; 2744 if (likely(pkts_n > 1)) 2745 rte_prefetch0(*pkts); 2746 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2747 /* 2748 * Build eMPW title WQEBB: 2749 * - Control Segment, eMPW opcode, zero DS 2750 * - Ethernet Segment, no inline 2751 */ 2752 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2753 MLX5_OPCODE_ENHANCED_MPSW, olx); 2754 mlx5_tx_eseg_none(txq, loc, wqem, 2755 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2756 dseg = &wqem->dseg[0]; 2757 /* Store the packet length for legacy MPW. */ 2758 if (MLX5_TXOFF_CONFIG(MPW)) 2759 wqem->eseg.mss = rte_cpu_to_be_16 2760 (rte_pktmbuf_data_len(loc->mbuf)); 2761 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2762 loc->wqe_free) * MLX5_WQE_SIZE - 2763 MLX5_WQE_CSEG_SIZE - 2764 MLX5_WQE_ESEG_SIZE; 2765 /* Limit the room for legacy MPW sessions for performance. */ 2766 if (MLX5_TXOFF_CONFIG(MPW)) 2767 room = RTE_MIN(room, 2768 RTE_MAX(txq->inlen_empw + 2769 sizeof(dseg->bcount) + 2770 (MLX5_TXOFF_CONFIG(VLAN) ? 2771 sizeof(struct rte_vlan_hdr) : 0), 2772 MLX5_MPW_INLINE_MAX_PACKETS * 2773 MLX5_WQE_DSEG_SIZE)); 2774 /* Build WQE till we have space, packets and resources. */ 2775 part = room; 2776 for (;;) { 2777 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2778 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2779 unsigned int tlen; 2780 2781 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2782 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2783 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2784 /* 2785 * Some Tx offloads may cause an error if packet is not 2786 * long enough, check against assumed minimal length. 2787 */ 2788 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2789 part -= room; 2790 if (unlikely(!part)) 2791 return MLX5_TXCMP_CODE_ERROR; 2792 /* 2793 * We have some successfully built 2794 * packet Data Segments to send. 2795 */ 2796 mlx5_tx_idone_empw(txq, loc, part, 2797 slen, wqem, olx); 2798 return MLX5_TXCMP_CODE_ERROR; 2799 } 2800 /* Inline or not inline - that's the Question. */ 2801 if (dlen > txq->inlen_empw || 2802 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2803 goto pointer_empw; 2804 if (MLX5_TXOFF_CONFIG(MPW)) { 2805 if (dlen > txq->inlen_send) 2806 goto pointer_empw; 2807 tlen = dlen; 2808 if (part == room) { 2809 /* Open new inline MPW session. */ 2810 tlen += sizeof(dseg->bcount); 2811 dseg->bcount = RTE_BE32(0); 2812 dseg = RTE_PTR_ADD 2813 (dseg, sizeof(dseg->bcount)); 2814 } else { 2815 /* 2816 * No pointer and inline descriptor 2817 * intermix for legacy MPW sessions. 2818 */ 2819 if (wqem->dseg[0].bcount) 2820 break; 2821 } 2822 } else { 2823 tlen = sizeof(dseg->bcount) + dlen; 2824 } 2825 /* Inline entire packet, optional VLAN insertion. */ 2826 if (MLX5_TXOFF_CONFIG(VLAN) && 2827 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2828 /* 2829 * The packet length must be checked in 2830 * mlx5_tx_able_to_empw() and packet 2831 * fits into inline length guaranteed. 2832 */ 2833 MLX5_ASSERT((dlen + 2834 sizeof(struct rte_vlan_hdr)) <= 2835 txq->inlen_empw); 2836 tlen += sizeof(struct rte_vlan_hdr); 2837 if (room < tlen) 2838 break; 2839 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2840 dptr, dlen, olx); 2841 #ifdef MLX5_PMD_SOFT_COUNTERS 2842 /* Update sent data bytes counter. */ 2843 slen += sizeof(struct rte_vlan_hdr); 2844 #endif 2845 } else { 2846 if (room < tlen) 2847 break; 2848 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2849 dptr, dlen, olx); 2850 } 2851 if (!MLX5_TXOFF_CONFIG(MPW)) 2852 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2853 MLX5_ASSERT(room >= tlen); 2854 room -= tlen; 2855 /* 2856 * Packet data are completely inline, 2857 * we can try to free the packet. 2858 */ 2859 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2860 /* 2861 * All the packets from the burst beginning 2862 * are inline, we can free mbufs directly 2863 * from the origin array on tx_burst exit(). 2864 */ 2865 loc->mbuf_free++; 2866 goto next_mbuf; 2867 } 2868 /* 2869 * In order no to call rte_pktmbuf_free_seg() here, 2870 * in the most inner loop (that might be very 2871 * expensive) we just save the mbuf in elts. 2872 */ 2873 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2874 loc->elts_free--; 2875 goto next_mbuf; 2876 pointer_empw: 2877 /* 2878 * No pointer and inline descriptor 2879 * intermix for legacy MPW sessions. 2880 */ 2881 if (MLX5_TXOFF_CONFIG(MPW) && 2882 part != room && 2883 wqem->dseg[0].bcount == RTE_BE32(0)) 2884 break; 2885 /* 2886 * Not inlinable VLAN packets are 2887 * proceeded outside of this routine. 2888 */ 2889 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2890 if (MLX5_TXOFF_CONFIG(VLAN)) 2891 MLX5_ASSERT(!(loc->mbuf->ol_flags & 2892 RTE_MBUF_F_TX_VLAN)); 2893 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2894 /* We have to store mbuf in elts.*/ 2895 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2896 loc->elts_free--; 2897 room -= MLX5_WQE_DSEG_SIZE; 2898 /* Ring buffer wraparound is checked at the loop end.*/ 2899 ++dseg; 2900 next_mbuf: 2901 #ifdef MLX5_PMD_SOFT_COUNTERS 2902 /* Update sent data bytes counter. */ 2903 slen += dlen; 2904 #endif 2905 loc->pkts_sent++; 2906 pkts_n--; 2907 if (unlikely(!pkts_n || !loc->elts_free)) { 2908 /* 2909 * We have no resources/packets to 2910 * continue build descriptors. 2911 */ 2912 part -= room; 2913 mlx5_tx_idone_empw(txq, loc, part, 2914 slen, wqem, olx); 2915 return MLX5_TXCMP_CODE_EXIT; 2916 } 2917 loc->mbuf = *pkts++; 2918 if (likely(pkts_n > 1)) 2919 rte_prefetch0(*pkts); 2920 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2921 /* 2922 * Unroll the completion code to avoid 2923 * returning variable value - it results in 2924 * unoptimized sequent checking in caller. 2925 */ 2926 if (ret == MLX5_TXCMP_CODE_MULTI) { 2927 part -= room; 2928 mlx5_tx_idone_empw(txq, loc, part, 2929 slen, wqem, olx); 2930 if (unlikely(!loc->elts_free || 2931 !loc->wqe_free)) 2932 return MLX5_TXCMP_CODE_EXIT; 2933 return MLX5_TXCMP_CODE_MULTI; 2934 } 2935 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2936 if (ret == MLX5_TXCMP_CODE_TSO) { 2937 part -= room; 2938 mlx5_tx_idone_empw(txq, loc, part, 2939 slen, wqem, olx); 2940 if (unlikely(!loc->elts_free || 2941 !loc->wqe_free)) 2942 return MLX5_TXCMP_CODE_EXIT; 2943 return MLX5_TXCMP_CODE_TSO; 2944 } 2945 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2946 part -= room; 2947 mlx5_tx_idone_empw(txq, loc, part, 2948 slen, wqem, olx); 2949 if (unlikely(!loc->elts_free || 2950 !loc->wqe_free)) 2951 return MLX5_TXCMP_CODE_EXIT; 2952 return MLX5_TXCMP_CODE_SINGLE; 2953 } 2954 if (ret != MLX5_TXCMP_CODE_EMPW) { 2955 MLX5_ASSERT(false); 2956 part -= room; 2957 mlx5_tx_idone_empw(txq, loc, part, 2958 slen, wqem, olx); 2959 return MLX5_TXCMP_CODE_ERROR; 2960 } 2961 /* Check if we have minimal room left. */ 2962 nlim--; 2963 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 2964 break; 2965 /* 2966 * Check whether packet parameters coincide 2967 * within assumed eMPW batch: 2968 * - check sum settings 2969 * - metadata value 2970 * - software parser settings 2971 * - packets length (legacy MPW only) 2972 * - scheduling is not required 2973 */ 2974 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 2975 loc, dlen, olx)) 2976 break; 2977 /* Packet attributes match, continue the same eMPW. */ 2978 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2979 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2980 } 2981 /* 2982 * We get here to close an existing eMPW 2983 * session and start the new one. 2984 */ 2985 MLX5_ASSERT(pkts_n); 2986 part -= room; 2987 if (unlikely(!part)) 2988 return MLX5_TXCMP_CODE_EXIT; 2989 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 2990 if (unlikely(!loc->elts_free || 2991 !loc->wqe_free)) 2992 return MLX5_TXCMP_CODE_EXIT; 2993 /* Continue the loop with new eMPW session. */ 2994 } 2995 MLX5_ASSERT(false); 2996 } 2997 2998 /** 2999 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3000 * Data inlining and VLAN insertion are supported. 3001 */ 3002 static __rte_always_inline enum mlx5_txcmp_code 3003 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3004 struct rte_mbuf **__rte_restrict pkts, 3005 unsigned int pkts_n, 3006 struct mlx5_txq_local *__rte_restrict loc, 3007 unsigned int olx) 3008 { 3009 /* 3010 * Subroutine is the part of mlx5_tx_burst_single() 3011 * and sends single-segment packet with SEND opcode. 3012 */ 3013 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3014 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3015 pkts += loc->pkts_sent + 1; 3016 pkts_n -= loc->pkts_sent; 3017 for (;;) { 3018 struct mlx5_wqe *__rte_restrict wqe; 3019 enum mlx5_txcmp_code ret; 3020 3021 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3022 if (MLX5_TXOFF_CONFIG(TXPP)) { 3023 enum mlx5_txcmp_code wret; 3024 3025 /* Generate WAIT for scheduling if requested. */ 3026 wret = mlx5_tx_schedule_send(txq, loc, olx); 3027 if (wret == MLX5_TXCMP_CODE_EXIT) 3028 return MLX5_TXCMP_CODE_EXIT; 3029 if (wret == MLX5_TXCMP_CODE_ERROR) 3030 return MLX5_TXCMP_CODE_ERROR; 3031 } 3032 if (MLX5_TXOFF_CONFIG(INLINE)) { 3033 unsigned int inlen, vlan = 0; 3034 3035 inlen = rte_pktmbuf_data_len(loc->mbuf); 3036 if (MLX5_TXOFF_CONFIG(VLAN) && 3037 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3038 vlan = sizeof(struct rte_vlan_hdr); 3039 inlen += vlan; 3040 } 3041 /* 3042 * If inlining is enabled at configuration time 3043 * the limit must be not less than minimal size. 3044 * Otherwise we would do extra check for data 3045 * size to avoid crashes due to length overflow. 3046 */ 3047 MLX5_ASSERT(txq->inlen_send >= 3048 MLX5_ESEG_MIN_INLINE_SIZE); 3049 if (inlen <= txq->inlen_send) { 3050 unsigned int seg_n, wqe_n; 3051 3052 rte_prefetch0(rte_pktmbuf_mtod 3053 (loc->mbuf, uint8_t *)); 3054 /* Check against minimal length. */ 3055 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3056 return MLX5_TXCMP_CODE_ERROR; 3057 if (loc->mbuf->ol_flags & 3058 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3059 /* 3060 * The hint flag not to inline packet 3061 * data is set. Check whether we can 3062 * follow the hint. 3063 */ 3064 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3065 txq->inlen_mode) || 3066 (MLX5_TXOFF_CONFIG(MPW) && 3067 txq->inlen_mode)) { 3068 if (inlen <= txq->inlen_send) 3069 goto single_inline; 3070 /* 3071 * The hardware requires the 3072 * minimal inline data header. 3073 */ 3074 goto single_min_inline; 3075 } 3076 if (MLX5_TXOFF_CONFIG(VLAN) && 3077 vlan && !txq->vlan_en) { 3078 /* 3079 * We must insert VLAN tag 3080 * by software means. 3081 */ 3082 goto single_part_inline; 3083 } 3084 goto single_no_inline; 3085 } 3086 single_inline: 3087 /* 3088 * Completely inlined packet data WQE: 3089 * - Control Segment, SEND opcode 3090 * - Ethernet Segment, no VLAN insertion 3091 * - Data inlined, VLAN optionally inserted 3092 * - Alignment to MLX5_WSEG_SIZE 3093 * Have to estimate amount of WQEBBs 3094 */ 3095 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3096 MLX5_ESEG_MIN_INLINE_SIZE + 3097 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3098 /* Check if there are enough WQEBBs. */ 3099 wqe_n = (seg_n + 3) / 4; 3100 if (wqe_n > loc->wqe_free) 3101 return MLX5_TXCMP_CODE_EXIT; 3102 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3103 loc->wqe_last = wqe; 3104 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3105 MLX5_OPCODE_SEND, olx); 3106 mlx5_tx_eseg_data(txq, loc, wqe, 3107 vlan, inlen, 0, olx); 3108 txq->wqe_ci += wqe_n; 3109 loc->wqe_free -= wqe_n; 3110 /* 3111 * Packet data are completely inlined, 3112 * free the packet immediately. 3113 */ 3114 rte_pktmbuf_free_seg(loc->mbuf); 3115 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3116 MLX5_TXOFF_CONFIG(MPW)) && 3117 txq->inlen_mode) { 3118 /* 3119 * If minimal inlining is requested the eMPW 3120 * feature should be disabled due to data is 3121 * inlined into Ethernet Segment, which can 3122 * not contain inlined data for eMPW due to 3123 * segment shared for all packets. 3124 */ 3125 struct mlx5_wqe_dseg *__rte_restrict dseg; 3126 unsigned int ds; 3127 uint8_t *dptr; 3128 3129 /* 3130 * The inline-mode settings require 3131 * to inline the specified amount of 3132 * data bytes to the Ethernet Segment. 3133 * We should check the free space in 3134 * WQE ring buffer to inline partially. 3135 */ 3136 single_min_inline: 3137 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3138 MLX5_ASSERT(inlen > txq->inlen_mode); 3139 MLX5_ASSERT(txq->inlen_mode >= 3140 MLX5_ESEG_MIN_INLINE_SIZE); 3141 /* 3142 * Check whether there are enough free WQEBBs: 3143 * - Control Segment 3144 * - Ethernet Segment 3145 * - First Segment of inlined Ethernet data 3146 * - ... data continued ... 3147 * - Finishing Data Segment of pointer type 3148 */ 3149 ds = (MLX5_WQE_CSEG_SIZE + 3150 MLX5_WQE_ESEG_SIZE + 3151 MLX5_WQE_DSEG_SIZE + 3152 txq->inlen_mode - 3153 MLX5_ESEG_MIN_INLINE_SIZE + 3154 MLX5_WQE_DSEG_SIZE + 3155 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3156 if (loc->wqe_free < ((ds + 3) / 4)) 3157 return MLX5_TXCMP_CODE_EXIT; 3158 /* 3159 * Build the ordinary SEND WQE: 3160 * - Control Segment 3161 * - Ethernet Segment, inline inlen_mode bytes 3162 * - Data Segment of pointer type 3163 */ 3164 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3165 loc->wqe_last = wqe; 3166 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3167 MLX5_OPCODE_SEND, olx); 3168 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3169 txq->inlen_mode, 3170 0, olx); 3171 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3172 txq->inlen_mode - vlan; 3173 inlen -= txq->inlen_mode; 3174 mlx5_tx_dseg_ptr(txq, loc, dseg, 3175 dptr, inlen, olx); 3176 /* 3177 * WQE is built, update the loop parameters 3178 * and got to the next packet. 3179 */ 3180 txq->wqe_ci += (ds + 3) / 4; 3181 loc->wqe_free -= (ds + 3) / 4; 3182 /* We have to store mbuf in elts.*/ 3183 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3184 txq->elts[txq->elts_head++ & txq->elts_m] = 3185 loc->mbuf; 3186 --loc->elts_free; 3187 } else { 3188 uint8_t *dptr; 3189 unsigned int dlen; 3190 3191 /* 3192 * Partially inlined packet data WQE, we have 3193 * some space in title WQEBB, we can fill it 3194 * with some packet data. It takes one WQEBB, 3195 * it is available, no extra space check: 3196 * - Control Segment, SEND opcode 3197 * - Ethernet Segment, no VLAN insertion 3198 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3199 * - Data Segment, pointer type 3200 * 3201 * We also get here if VLAN insertion is not 3202 * supported by HW, the inline is enabled. 3203 */ 3204 single_part_inline: 3205 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3206 loc->wqe_last = wqe; 3207 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3208 MLX5_OPCODE_SEND, olx); 3209 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3210 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3211 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3212 /* 3213 * The length check is performed above, by 3214 * comparing with txq->inlen_send. We should 3215 * not get overflow here. 3216 */ 3217 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3218 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3219 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3220 dptr, dlen, olx); 3221 ++txq->wqe_ci; 3222 --loc->wqe_free; 3223 /* We have to store mbuf in elts.*/ 3224 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3225 txq->elts[txq->elts_head++ & txq->elts_m] = 3226 loc->mbuf; 3227 --loc->elts_free; 3228 } 3229 #ifdef MLX5_PMD_SOFT_COUNTERS 3230 /* Update sent data bytes counter. */ 3231 txq->stats.obytes += vlan + 3232 rte_pktmbuf_data_len(loc->mbuf); 3233 #endif 3234 } else { 3235 /* 3236 * No inline at all, it means the CPU cycles saving 3237 * is prioritized at configuration, we should not 3238 * copy any packet data to WQE. 3239 * 3240 * SEND WQE, one WQEBB: 3241 * - Control Segment, SEND opcode 3242 * - Ethernet Segment, optional VLAN, no inline 3243 * - Data Segment, pointer type 3244 */ 3245 single_no_inline: 3246 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3247 loc->wqe_last = wqe; 3248 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3249 MLX5_OPCODE_SEND, olx); 3250 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3251 mlx5_tx_dseg_ptr 3252 (txq, loc, &wqe->dseg[0], 3253 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3254 rte_pktmbuf_data_len(loc->mbuf), olx); 3255 ++txq->wqe_ci; 3256 --loc->wqe_free; 3257 /* 3258 * We should not store mbuf pointer in elts 3259 * if no inlining is configured, this is done 3260 * by calling routine in a batch copy. 3261 */ 3262 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3263 --loc->elts_free; 3264 #ifdef MLX5_PMD_SOFT_COUNTERS 3265 /* Update sent data bytes counter. */ 3266 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3267 if (MLX5_TXOFF_CONFIG(VLAN) && 3268 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3269 txq->stats.obytes += 3270 sizeof(struct rte_vlan_hdr); 3271 #endif 3272 } 3273 ++loc->pkts_sent; 3274 --pkts_n; 3275 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3276 return MLX5_TXCMP_CODE_EXIT; 3277 loc->mbuf = *pkts++; 3278 if (pkts_n > 1) 3279 rte_prefetch0(*pkts); 3280 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3281 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3282 return ret; 3283 } 3284 MLX5_ASSERT(false); 3285 } 3286 3287 static __rte_always_inline enum mlx5_txcmp_code 3288 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3289 struct rte_mbuf **__rte_restrict pkts, 3290 unsigned int pkts_n, 3291 struct mlx5_txq_local *__rte_restrict loc, 3292 unsigned int olx) 3293 { 3294 enum mlx5_txcmp_code ret; 3295 3296 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3297 if (ret == MLX5_TXCMP_CODE_SINGLE) 3298 goto ordinary_send; 3299 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3300 for (;;) { 3301 /* Optimize for inline/no inline eMPW send. */ 3302 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3303 mlx5_tx_burst_empw_inline 3304 (txq, pkts, pkts_n, loc, olx) : 3305 mlx5_tx_burst_empw_simple 3306 (txq, pkts, pkts_n, loc, olx); 3307 if (ret != MLX5_TXCMP_CODE_SINGLE) 3308 return ret; 3309 /* The resources to send one packet should remain. */ 3310 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3311 ordinary_send: 3312 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3313 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3314 if (ret != MLX5_TXCMP_CODE_EMPW) 3315 return ret; 3316 /* The resources to send one packet should remain. */ 3317 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3318 } 3319 } 3320 3321 /** 3322 * DPDK Tx callback template. This is configured template used to generate 3323 * routines optimized for specified offload setup. 3324 * One of this generated functions is chosen at SQ configuration time. 3325 * 3326 * @param txq 3327 * Generic pointer to TX queue structure. 3328 * @param[in] pkts 3329 * Packets to transmit. 3330 * @param pkts_n 3331 * Number of packets in array. 3332 * @param olx 3333 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3334 * values. Should be static to take compile time static configuration 3335 * advantages. 3336 * 3337 * @return 3338 * Number of packets successfully transmitted (<= pkts_n). 3339 */ 3340 static __rte_always_inline uint16_t 3341 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3342 struct rte_mbuf **__rte_restrict pkts, 3343 uint16_t pkts_n, 3344 unsigned int olx) 3345 { 3346 struct mlx5_txq_local loc; 3347 enum mlx5_txcmp_code ret; 3348 unsigned int part; 3349 3350 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3351 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3352 if (unlikely(!pkts_n)) 3353 return 0; 3354 if (MLX5_TXOFF_CONFIG(INLINE)) 3355 loc.mbuf_free = 0; 3356 loc.pkts_sent = 0; 3357 loc.pkts_copy = 0; 3358 loc.wqe_last = NULL; 3359 3360 send_loop: 3361 loc.pkts_loop = loc.pkts_sent; 3362 /* 3363 * Check if there are some CQEs, if any: 3364 * - process an encountered errors 3365 * - process the completed WQEs 3366 * - free related mbufs 3367 * - doorbell the NIC about processed CQEs 3368 */ 3369 rte_prefetch0(*(pkts + loc.pkts_sent)); 3370 mlx5_tx_handle_completion(txq, olx); 3371 /* 3372 * Calculate the number of available resources - elts and WQEs. 3373 * There are two possible different scenarios: 3374 * - no data inlining into WQEs, one WQEBB may contains up to 3375 * four packets, in this case elts become scarce resource 3376 * - data inlining into WQEs, one packet may require multiple 3377 * WQEBBs, the WQEs become the limiting factor. 3378 */ 3379 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3380 loc.elts_free = txq->elts_s - 3381 (uint16_t)(txq->elts_head - txq->elts_tail); 3382 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3383 loc.wqe_free = txq->wqe_s - 3384 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3385 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3386 goto burst_exit; 3387 for (;;) { 3388 /* 3389 * Fetch the packet from array. Usually this is the first 3390 * packet in series of multi/single segment packets. 3391 */ 3392 loc.mbuf = *(pkts + loc.pkts_sent); 3393 /* Dedicated branch for multi-segment packets. */ 3394 if (MLX5_TXOFF_CONFIG(MULTI) && 3395 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3396 /* 3397 * Multi-segment packet encountered. 3398 * Hardware is able to process it only 3399 * with SEND/TSO opcodes, one packet 3400 * per WQE, do it in dedicated routine. 3401 */ 3402 enter_send_multi: 3403 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3404 part = loc.pkts_sent - loc.pkts_copy; 3405 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3406 /* 3407 * There are some single-segment mbufs not 3408 * stored in elts. The mbufs must be in the 3409 * same order as WQEs, so we must copy the 3410 * mbufs to elts here, before the coming 3411 * multi-segment packet mbufs is appended. 3412 */ 3413 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3414 part, olx); 3415 loc.pkts_copy = loc.pkts_sent; 3416 } 3417 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3418 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3419 if (!MLX5_TXOFF_CONFIG(INLINE)) 3420 loc.pkts_copy = loc.pkts_sent; 3421 /* 3422 * These returned code checks are supposed 3423 * to be optimized out due to routine inlining. 3424 */ 3425 if (ret == MLX5_TXCMP_CODE_EXIT) { 3426 /* 3427 * The routine returns this code when 3428 * all packets are sent or there is no 3429 * enough resources to complete request. 3430 */ 3431 break; 3432 } 3433 if (ret == MLX5_TXCMP_CODE_ERROR) { 3434 /* 3435 * The routine returns this code when some error 3436 * in the incoming packets format occurred. 3437 */ 3438 txq->stats.oerrors++; 3439 break; 3440 } 3441 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3442 /* 3443 * The single-segment packet was encountered 3444 * in the array, try to send it with the 3445 * best optimized way, possible engaging eMPW. 3446 */ 3447 goto enter_send_single; 3448 } 3449 if (MLX5_TXOFF_CONFIG(TSO) && 3450 ret == MLX5_TXCMP_CODE_TSO) { 3451 /* 3452 * The single-segment TSO packet was 3453 * encountered in the array. 3454 */ 3455 goto enter_send_tso; 3456 } 3457 /* We must not get here. Something is going wrong. */ 3458 MLX5_ASSERT(false); 3459 txq->stats.oerrors++; 3460 break; 3461 } 3462 /* Dedicated branch for single-segment TSO packets. */ 3463 if (MLX5_TXOFF_CONFIG(TSO) && 3464 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3465 /* 3466 * TSO might require special way for inlining 3467 * (dedicated parameters) and is sent with 3468 * MLX5_OPCODE_TSO opcode only, provide this 3469 * in dedicated branch. 3470 */ 3471 enter_send_tso: 3472 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3473 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3474 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3475 /* 3476 * These returned code checks are supposed 3477 * to be optimized out due to routine inlining. 3478 */ 3479 if (ret == MLX5_TXCMP_CODE_EXIT) 3480 break; 3481 if (ret == MLX5_TXCMP_CODE_ERROR) { 3482 txq->stats.oerrors++; 3483 break; 3484 } 3485 if (ret == MLX5_TXCMP_CODE_SINGLE) 3486 goto enter_send_single; 3487 if (MLX5_TXOFF_CONFIG(MULTI) && 3488 ret == MLX5_TXCMP_CODE_MULTI) { 3489 /* 3490 * The multi-segment packet was 3491 * encountered in the array. 3492 */ 3493 goto enter_send_multi; 3494 } 3495 /* We must not get here. Something is going wrong. */ 3496 MLX5_ASSERT(false); 3497 txq->stats.oerrors++; 3498 break; 3499 } 3500 /* 3501 * The dedicated branch for the single-segment packets 3502 * without TSO. Often these ones can be sent using 3503 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3504 * The routine builds the WQEs till it encounters 3505 * the TSO or multi-segment packet (in case if these 3506 * offloads are requested at SQ configuration time). 3507 */ 3508 enter_send_single: 3509 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3510 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3511 /* 3512 * These returned code checks are supposed 3513 * to be optimized out due to routine inlining. 3514 */ 3515 if (ret == MLX5_TXCMP_CODE_EXIT) 3516 break; 3517 if (ret == MLX5_TXCMP_CODE_ERROR) { 3518 txq->stats.oerrors++; 3519 break; 3520 } 3521 if (MLX5_TXOFF_CONFIG(MULTI) && 3522 ret == MLX5_TXCMP_CODE_MULTI) { 3523 /* 3524 * The multi-segment packet was 3525 * encountered in the array. 3526 */ 3527 goto enter_send_multi; 3528 } 3529 if (MLX5_TXOFF_CONFIG(TSO) && 3530 ret == MLX5_TXCMP_CODE_TSO) { 3531 /* 3532 * The single-segment TSO packet was 3533 * encountered in the array. 3534 */ 3535 goto enter_send_tso; 3536 } 3537 /* We must not get here. Something is going wrong. */ 3538 MLX5_ASSERT(false); 3539 txq->stats.oerrors++; 3540 break; 3541 } 3542 /* 3543 * Main Tx loop is completed, do the rest: 3544 * - set completion request if thresholds are reached 3545 * - doorbell the hardware 3546 * - copy the rest of mbufs to elts (if any) 3547 */ 3548 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3549 loc.pkts_sent >= loc.pkts_copy); 3550 /* Take a shortcut if nothing is sent. */ 3551 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3552 goto burst_exit; 3553 /* Request CQE generation if limits are reached. */ 3554 mlx5_tx_request_completion(txq, &loc, olx); 3555 /* 3556 * Ring QP doorbell immediately after WQE building completion 3557 * to improve latencies. The pure software related data treatment 3558 * can be completed after doorbell. Tx CQEs for this SQ are 3559 * processed in this thread only by the polling. 3560 * 3561 * The rdma core library can map doorbell register in two ways, 3562 * depending on the environment variable "MLX5_SHUT_UP_BF": 3563 * 3564 * - as regular cached memory, the variable is either missing or 3565 * set to zero. This type of mapping may cause the significant 3566 * doorbell register writing latency and requires explicit memory 3567 * write barrier to mitigate this issue and prevent write combining. 3568 * 3569 * - as non-cached memory, the variable is present and set to not "0" 3570 * value. This type of mapping may cause performance impact under 3571 * heavy loading conditions but the explicit write memory barrier is 3572 * not required and it may improve core performance. 3573 * 3574 * - the legacy behaviour (prior 19.08 release) was to use some 3575 * heuristics to decide whether write memory barrier should 3576 * be performed. This behavior is supported with specifying 3577 * tx_db_nc=2, write barrier is skipped if application provides 3578 * the full recommended burst of packets, it supposes the next 3579 * packets are coming and the write barrier will be issued on 3580 * the next burst (after descriptor writing, at least). 3581 */ 3582 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3583 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3584 txq->qp_db, !txq->db_nc && 3585 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3586 /* Not all of the mbufs may be stored into elts yet. */ 3587 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3588 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3589 /* 3590 * There are some single-segment mbufs not stored in elts. 3591 * It can be only if the last packet was single-segment. 3592 * The copying is gathered into one place due to it is 3593 * a good opportunity to optimize that with SIMD. 3594 * Unfortunately if inlining is enabled the gaps in pointer 3595 * array may happen due to early freeing of the inlined mbufs. 3596 */ 3597 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3598 loc.pkts_copy = loc.pkts_sent; 3599 } 3600 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3601 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3602 if (pkts_n > loc.pkts_sent) { 3603 /* 3604 * If burst size is large there might be no enough CQE 3605 * fetched from completion queue and no enough resources 3606 * freed to send all the packets. 3607 */ 3608 goto send_loop; 3609 } 3610 burst_exit: 3611 #ifdef MLX5_PMD_SOFT_COUNTERS 3612 /* Increment sent packets counter. */ 3613 txq->stats.opackets += loc.pkts_sent; 3614 #endif 3615 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3616 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3617 return loc.pkts_sent; 3618 } 3619 3620 #endif /* RTE_PMD_MLX5_TX_H_ */ 3621