1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 #include <rte_trace_point.h> 17 18 #include <mlx5_common.h> 19 #include <mlx5_common_mr.h> 20 21 #include "mlx5.h" 22 #include "mlx5_autoconf.h" 23 #include "mlx5_rxtx.h" 24 #include "mlx5_trace.h" 25 26 /* TX burst subroutines return codes. */ 27 enum mlx5_txcmp_code { 28 MLX5_TXCMP_CODE_EXIT = 0, 29 MLX5_TXCMP_CODE_ERROR, 30 MLX5_TXCMP_CODE_SINGLE, 31 MLX5_TXCMP_CODE_MULTI, 32 MLX5_TXCMP_CODE_TSO, 33 MLX5_TXCMP_CODE_EMPW, 34 }; 35 36 /* 37 * These defines are used to configure Tx burst routine option set supported 38 * at compile time. The not specified options are optimized out due to if 39 * conditions can be explicitly calculated at compile time. 40 * The offloads with bigger runtime check (require more CPU cycles toskip) 41 * overhead should have the bigger index - this is needed to select the better 42 * matching routine function if no exact match and some offloads are not 43 * actually requested. 44 */ 45 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 46 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 47 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 48 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 49 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 50 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 51 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 52 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 53 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 54 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 55 56 /* The most common offloads groups. */ 57 #define MLX5_TXOFF_CONFIG_NONE 0 58 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 59 MLX5_TXOFF_CONFIG_TSO | \ 60 MLX5_TXOFF_CONFIG_SWP | \ 61 MLX5_TXOFF_CONFIG_CSUM | \ 62 MLX5_TXOFF_CONFIG_INLINE | \ 63 MLX5_TXOFF_CONFIG_VLAN | \ 64 MLX5_TXOFF_CONFIG_METADATA) 65 66 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 67 68 #define MLX5_TXOFF_PRE_DECL(func) \ 69 uint16_t mlx5_tx_burst_##func(void *txq, \ 70 struct rte_mbuf **pkts, \ 71 uint16_t pkts_n) 72 73 #define MLX5_TXOFF_DECL(func, olx) \ 74 uint16_t mlx5_tx_burst_##func(void *txq, \ 75 struct rte_mbuf **pkts, \ 76 uint16_t pkts_n) \ 77 { \ 78 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 79 pkts, pkts_n, (olx)); \ 80 } 81 82 /* Mbuf dynamic flag offset for inline. */ 83 extern uint64_t rte_net_mlx5_dynf_inline_mask; 84 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 85 86 extern alignas(RTE_CACHE_LINE_SIZE) uint32_t mlx5_ptype_table[]; 87 extern alignas(RTE_CACHE_LINE_SIZE) uint8_t mlx5_cksum_table[1 << 10]; 88 extern alignas(RTE_CACHE_LINE_SIZE) uint8_t mlx5_swp_types_table[1 << 10]; 89 90 struct mlx5_txq_stats { 91 #ifdef MLX5_PMD_SOFT_COUNTERS 92 uint64_t opackets; /**< Total of successfully sent packets. */ 93 uint64_t obytes; /**< Total of successfully sent bytes. */ 94 #endif 95 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 96 }; 97 98 /* TX queue send local data. */ 99 __extension__ 100 struct mlx5_txq_local { 101 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 102 struct rte_mbuf *mbuf; /* first mbuf to process. */ 103 uint16_t pkts_copy; /* packets copied to elts. */ 104 uint16_t pkts_sent; /* packets sent. */ 105 uint16_t pkts_loop; /* packets sent on loop entry. */ 106 uint16_t elts_free; /* available elts remain. */ 107 uint16_t wqe_free; /* available wqe remain. */ 108 uint16_t mbuf_off; /* data offset in current mbuf. */ 109 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 110 uint16_t mbuf_free; /* number of inline mbufs to free. */ 111 }; 112 113 /* TX queue descriptor. */ 114 __extension__ 115 struct __rte_cache_aligned mlx5_txq_data { 116 uint16_t elts_head; /* Current counter in (*elts)[]. */ 117 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 118 uint16_t elts_comp; /* elts index since last completion request. */ 119 uint16_t elts_s; /* Number of mbuf elements. */ 120 uint16_t elts_m; /* Mask for mbuf elements indices. */ 121 /* Fields related to elts mbuf storage. */ 122 uint16_t wqe_ci; /* Consumer index for work queue. */ 123 uint16_t wqe_pi; /* Producer index for work queue. */ 124 uint16_t wqe_s; /* Number of WQ elements. */ 125 uint16_t wqe_m; /* Mask Number for WQ elements. */ 126 uint16_t wqe_comp; /* WQE index since last completion request. */ 127 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 128 /* WQ related fields. */ 129 uint16_t cq_ci; /* Consumer index for completion queue. */ 130 uint16_t cq_pi; /* Production index for completion queue. */ 131 uint16_t cqe_s; /* Number of CQ elements. */ 132 uint16_t cqe_m; /* Mask for CQ indices. */ 133 /* CQ related fields. */ 134 uint16_t elts_n:4; /* elts[] length (in log2). */ 135 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 136 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 137 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 138 uint16_t tunnel_en:1; 139 /* When set TX offload for tunneled packets are supported. */ 140 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 141 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 142 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 143 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 144 uint16_t rt_timestamp:1; /* Realtime timestamp format. */ 145 uint16_t wait_on_time:1; /* WQE with timestamp is supported. */ 146 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 147 uint16_t inlen_send; /* Ordinary send data inline size. */ 148 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 149 uint16_t inlen_mode; /* Minimal data length to inline. */ 150 uint8_t tx_aggr_affinity; /* TxQ affinity configuration. */ 151 uint32_t qp_num_8s; /* QP number shifted by 8. */ 152 uint64_t offloads; /* Offloads for Tx Queue. */ 153 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 154 struct mlx5_wqe *wqes; /* Work queue. */ 155 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 156 #ifdef RTE_LIBRTE_MLX5_DEBUG 157 uint32_t *fcqs; /* Free completion queue (debug extended). */ 158 #else 159 uint16_t *fcqs; /* Free completion queue. */ 160 #endif 161 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 162 volatile uint32_t *qp_db; /* Work queue doorbell. */ 163 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 164 uint16_t port_id; /* Port ID of device. */ 165 uint16_t idx; /* Queue index. */ 166 uint64_t rt_timemask; /* Scheduling timestamp mask. */ 167 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 168 uint64_t ts_last; /* Last scheduled timestamp. */ 169 int32_t ts_offset; /* Timestamp field dynamic offset. */ 170 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 171 struct mlx5_txq_stats stats; /* TX queue counters. */ 172 struct mlx5_txq_stats stats_reset; /* stats on last reset. */ 173 struct mlx5_uar_data uar_data; 174 struct rte_mbuf *elts[]; 175 /* Storage for queued packets, must be the last field. */ 176 }; 177 178 /* TX queue control descriptor. */ 179 __extension__ 180 struct mlx5_txq_ctrl { 181 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 182 RTE_ATOMIC(uint32_t) refcnt; /* Reference counter. */ 183 unsigned int socket; /* CPU socket ID for allocations. */ 184 bool is_hairpin; /* Whether TxQ type is Hairpin. */ 185 unsigned int max_inline_data; /* Max inline data. */ 186 unsigned int max_tso_header; /* Max TSO header size. */ 187 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 188 struct mlx5_priv *priv; /* Back pointer to private data. */ 189 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 190 uint16_t dump_file_n; /* Number of dump files. */ 191 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 192 uint32_t hairpin_status; /* Hairpin binding status. */ 193 struct mlx5_txq_data txq; /* Data path structure. */ 194 /* Must be the last field in the structure, contains elts[]. */ 195 }; 196 197 /* mlx5_txq.c */ 198 199 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 200 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 201 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 202 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 203 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 204 unsigned int socket, const struct rte_eth_txconf *conf); 205 int mlx5_tx_hairpin_queue_setup 206 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 207 const struct rte_eth_hairpin_conf *hairpin_conf); 208 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 209 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 210 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 211 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 212 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 213 uint16_t desc, unsigned int socket, 214 const struct rte_eth_txconf *conf); 215 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 216 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 217 const struct rte_eth_hairpin_conf *hairpin_conf); 218 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 219 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 220 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 221 int mlx5_txq_verify(struct rte_eth_dev *dev); 222 int mlx5_txq_get_sqn(struct mlx5_txq_ctrl *txq); 223 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 224 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 225 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 226 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 227 int mlx5_count_aggr_ports(struct rte_eth_dev *dev); 228 int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, 229 uint8_t affinity); 230 int mlx5_ext_txq_verify(struct rte_eth_dev *dev); 231 struct mlx5_external_q *mlx5_ext_txq_get(struct rte_eth_dev *dev, uint16_t idx); 232 233 /* mlx5_tx.c */ 234 235 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 236 unsigned int olx __rte_unused); 237 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 238 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 239 struct rte_eth_txq_info *qinfo); 240 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 241 struct rte_eth_burst_mode *mode); 242 243 /* mlx5_tx_empw.c */ 244 245 MLX5_TXOFF_PRE_DECL(full_empw); 246 MLX5_TXOFF_PRE_DECL(none_empw); 247 MLX5_TXOFF_PRE_DECL(md_empw); 248 MLX5_TXOFF_PRE_DECL(mt_empw); 249 MLX5_TXOFF_PRE_DECL(mtsc_empw); 250 MLX5_TXOFF_PRE_DECL(mti_empw); 251 MLX5_TXOFF_PRE_DECL(mtv_empw); 252 MLX5_TXOFF_PRE_DECL(mtiv_empw); 253 MLX5_TXOFF_PRE_DECL(sc_empw); 254 MLX5_TXOFF_PRE_DECL(sci_empw); 255 MLX5_TXOFF_PRE_DECL(scv_empw); 256 MLX5_TXOFF_PRE_DECL(sciv_empw); 257 MLX5_TXOFF_PRE_DECL(i_empw); 258 MLX5_TXOFF_PRE_DECL(v_empw); 259 MLX5_TXOFF_PRE_DECL(iv_empw); 260 261 /* mlx5_tx_nompw.c */ 262 263 MLX5_TXOFF_PRE_DECL(full); 264 MLX5_TXOFF_PRE_DECL(none); 265 MLX5_TXOFF_PRE_DECL(md); 266 MLX5_TXOFF_PRE_DECL(mt); 267 MLX5_TXOFF_PRE_DECL(mtsc); 268 MLX5_TXOFF_PRE_DECL(mti); 269 MLX5_TXOFF_PRE_DECL(mtv); 270 MLX5_TXOFF_PRE_DECL(mtiv); 271 MLX5_TXOFF_PRE_DECL(sc); 272 MLX5_TXOFF_PRE_DECL(sci); 273 MLX5_TXOFF_PRE_DECL(scv); 274 MLX5_TXOFF_PRE_DECL(sciv); 275 MLX5_TXOFF_PRE_DECL(i); 276 MLX5_TXOFF_PRE_DECL(v); 277 MLX5_TXOFF_PRE_DECL(iv); 278 279 /* mlx5_tx_txpp.c */ 280 281 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 282 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 283 MLX5_TXOFF_PRE_DECL(full_ts); 284 MLX5_TXOFF_PRE_DECL(full_ts_noi); 285 MLX5_TXOFF_PRE_DECL(none_ts); 286 MLX5_TXOFF_PRE_DECL(mdi_ts); 287 MLX5_TXOFF_PRE_DECL(mti_ts); 288 MLX5_TXOFF_PRE_DECL(mtiv_ts); 289 290 /* mlx5_tx_mpw.c */ 291 292 MLX5_TXOFF_PRE_DECL(none_mpw); 293 MLX5_TXOFF_PRE_DECL(mci_mpw); 294 MLX5_TXOFF_PRE_DECL(mc_mpw); 295 MLX5_TXOFF_PRE_DECL(i_mpw); 296 297 static __rte_always_inline struct mlx5_uar_data * 298 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 299 { 300 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 301 } 302 303 /** 304 * Ring TX queue doorbell and flush the update by write memory barrier. 305 * 306 * @param txq 307 * Pointer to TX queue structure. 308 * @param wqe 309 * Pointer to the last WQE posted in the NIC. 310 */ 311 static __rte_always_inline void 312 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 313 { 314 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 315 txq->wqe_ci, txq->qp_db, 1); 316 } 317 318 /** 319 * Convert timestamp from mbuf format to linear counter 320 * of Clock Queue completions (24 bits). 321 * 322 * @param sh 323 * Pointer to the device shared context to fetch Tx 324 * packet pacing timestamp and parameters. 325 * @param ts 326 * Timestamp from mbuf to convert. 327 * @return 328 * positive or zero value - completion ID to wait. 329 * negative value - conversion error. 330 */ 331 static __rte_always_inline int32_t 332 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 333 { 334 uint64_t ts, ci; 335 uint32_t tick; 336 337 do { 338 /* 339 * Read atomically two uint64_t fields and compare lsb bits. 340 * It there is no match - the timestamp was updated in 341 * the service thread, data should be re-read. 342 */ 343 rte_compiler_barrier(); 344 ci = rte_atomic_load_explicit(&sh->txpp.ts.ci_ts, rte_memory_order_relaxed); 345 ts = rte_atomic_load_explicit(&sh->txpp.ts.ts, rte_memory_order_relaxed); 346 rte_compiler_barrier(); 347 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 348 break; 349 } while (true); 350 /* Perform the skew correction, positive value to send earlier. */ 351 mts -= sh->txpp.skew; 352 mts -= ts; 353 if (unlikely(mts >= UINT64_MAX / 2)) { 354 /* We have negative integer, mts is in the past. */ 355 rte_atomic_fetch_add_explicit(&sh->txpp.err_ts_past, 356 1, rte_memory_order_relaxed); 357 return -1; 358 } 359 tick = sh->txpp.tick; 360 MLX5_ASSERT(tick); 361 /* Convert delta to completions, round up. */ 362 mts = (mts + tick - 1) / tick; 363 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 364 /* We have mts is too distant future. */ 365 rte_atomic_fetch_add_explicit(&sh->txpp.err_ts_future, 366 1, rte_memory_order_relaxed); 367 return -1; 368 } 369 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 370 ci += mts; 371 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 372 return ci; 373 } 374 375 /** 376 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 377 * Flags must be preliminary initialized to zero. 378 * 379 * @param loc 380 * Pointer to burst routine local context. 381 * @param swp_flags 382 * Pointer to store Software Parser flags. 383 * @param olx 384 * Configured Tx offloads mask. It is fully defined at 385 * compile time and may be used for optimization. 386 * 387 * @return 388 * Software Parser offsets packed in dword. 389 * Software Parser flags are set by pointer. 390 */ 391 static __rte_always_inline uint32_t 392 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 393 uint8_t *swp_flags, 394 unsigned int olx) 395 { 396 uint64_t ol, tunnel; 397 unsigned int idx, off; 398 uint32_t set; 399 400 if (!MLX5_TXOFF_CONFIG(SWP)) 401 return 0; 402 ol = loc->mbuf->ol_flags; 403 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 404 /* 405 * Check whether Software Parser is required. 406 * Only customized tunnels may ask for. 407 */ 408 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 409 return 0; 410 /* 411 * The index should have: 412 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 413 * bit[4] = RTE_MBUF_F_TX_IPV6 414 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 415 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 416 */ 417 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 418 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 419 *swp_flags = mlx5_swp_types_table[idx]; 420 /* 421 * Set offsets for SW parser. Since ConnectX-5, SW parser just 422 * complements HW parser. SW parser starts to engage only if HW parser 423 * can't reach a header. For the older devices, HW parser will not kick 424 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 425 * should be set regardless of HW offload. 426 */ 427 off = loc->mbuf->outer_l2_len; 428 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 429 off += sizeof(struct rte_vlan_hdr); 430 set = (off >> 1) << 8; /* Outer L3 offset. */ 431 off += loc->mbuf->outer_l3_len; 432 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 433 set |= off >> 1; /* Outer L4 offset. */ 434 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 435 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 436 off += loc->mbuf->l2_len; 437 set |= (off >> 1) << 24; /* Inner L3 offset. */ 438 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 439 csum == RTE_MBUF_F_TX_UDP_CKSUM || 440 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 441 off += loc->mbuf->l3_len; 442 set |= (off >> 1) << 16; /* Inner L4 offset. */ 443 } 444 } 445 set = rte_cpu_to_le_32(set); 446 return set; 447 } 448 449 /** 450 * Convert the Checksum offloads to Verbs. 451 * 452 * @param buf 453 * Pointer to the mbuf. 454 * 455 * @return 456 * Converted checksum flags. 457 */ 458 static __rte_always_inline uint8_t 459 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 460 { 461 uint32_t idx; 462 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 463 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 464 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 465 466 /* 467 * The index should have: 468 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 469 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 470 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 471 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 472 * bit[9] = tunnel 473 */ 474 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 475 return mlx5_cksum_table[idx]; 476 } 477 478 /** 479 * Free the mbufs from the linear array of pointers. 480 * 481 * @param txq 482 * Pointer to Tx queue structure. 483 * @param pkts 484 * Pointer to array of packets to be free. 485 * @param pkts_n 486 * Number of packets to be freed. 487 * @param olx 488 * Configured Tx offloads mask. It is fully defined at 489 * compile time and may be used for optimization. 490 */ 491 static __rte_always_inline void 492 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 493 struct rte_mbuf **__rte_restrict pkts, 494 unsigned int pkts_n, 495 unsigned int olx __rte_unused) 496 { 497 struct rte_mempool *pool = NULL; 498 struct rte_mbuf **p_free = NULL; 499 struct rte_mbuf *mbuf; 500 unsigned int n_free = 0; 501 502 /* 503 * The implemented algorithm eliminates 504 * copying pointers to temporary array 505 * for rte_mempool_put_bulk() calls. 506 */ 507 MLX5_ASSERT(pkts); 508 MLX5_ASSERT(pkts_n); 509 /* 510 * Free mbufs directly to the pool in bulk 511 * if fast free offload is engaged 512 */ 513 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 514 mbuf = *pkts; 515 pool = mbuf->pool; 516 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 517 return; 518 } 519 for (;;) { 520 for (;;) { 521 /* 522 * Decrement mbuf reference counter, detach 523 * indirect and external buffers if needed. 524 */ 525 mbuf = rte_pktmbuf_prefree_seg(*pkts); 526 if (likely(mbuf != NULL)) { 527 MLX5_ASSERT(mbuf == *pkts); 528 if (likely(n_free != 0)) { 529 if (unlikely(pool != mbuf->pool)) 530 /* From different pool. */ 531 break; 532 } else { 533 /* Start new scan array. */ 534 pool = mbuf->pool; 535 p_free = pkts; 536 } 537 ++n_free; 538 ++pkts; 539 --pkts_n; 540 if (unlikely(pkts_n == 0)) { 541 mbuf = NULL; 542 break; 543 } 544 } else { 545 /* 546 * This happens if mbuf is still referenced. 547 * We can't put it back to the pool, skip. 548 */ 549 ++pkts; 550 --pkts_n; 551 if (unlikely(n_free != 0)) 552 /* There is some array to free.*/ 553 break; 554 if (unlikely(pkts_n == 0)) 555 /* Last mbuf, nothing to free. */ 556 return; 557 } 558 } 559 for (;;) { 560 /* 561 * This loop is implemented to avoid multiple 562 * inlining of rte_mempool_put_bulk(). 563 */ 564 MLX5_ASSERT(pool); 565 MLX5_ASSERT(p_free); 566 MLX5_ASSERT(n_free); 567 /* 568 * Free the array of pre-freed mbufs 569 * belonging to the same memory pool. 570 */ 571 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 572 if (unlikely(mbuf != NULL)) { 573 /* There is the request to start new scan. */ 574 pool = mbuf->pool; 575 p_free = pkts++; 576 n_free = 1; 577 --pkts_n; 578 if (likely(pkts_n != 0)) 579 break; 580 /* 581 * This is the last mbuf to be freed. 582 * Do one more loop iteration to complete. 583 * This is rare case of the last unique mbuf. 584 */ 585 mbuf = NULL; 586 continue; 587 } 588 if (likely(pkts_n == 0)) 589 return; 590 n_free = 0; 591 break; 592 } 593 } 594 } 595 596 /** 597 * No inline version to free buffers for optimal call 598 * on the tx_burst completion. 599 */ 600 static __rte_noinline void 601 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 602 struct rte_mbuf **__rte_restrict pkts, 603 unsigned int pkts_n, 604 unsigned int olx __rte_unused) 605 { 606 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 607 } 608 609 /** 610 * Free the mbuf from the elts ring buffer till new tail. 611 * 612 * @param txq 613 * Pointer to Tx queue structure. 614 * @param tail 615 * Index in elts to free up to, becomes new elts tail. 616 * @param olx 617 * Configured Tx offloads mask. It is fully defined at 618 * compile time and may be used for optimization. 619 */ 620 static __rte_always_inline void 621 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 622 uint16_t tail, 623 unsigned int olx __rte_unused) 624 { 625 uint16_t n_elts = tail - txq->elts_tail; 626 627 MLX5_ASSERT(n_elts); 628 MLX5_ASSERT(n_elts <= txq->elts_s); 629 /* 630 * Implement a loop to support ring buffer wraparound 631 * with single inlining of mlx5_tx_free_mbuf(). 632 */ 633 do { 634 unsigned int part; 635 636 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 637 part = RTE_MIN(part, n_elts); 638 MLX5_ASSERT(part); 639 MLX5_ASSERT(part <= txq->elts_s); 640 mlx5_tx_free_mbuf(txq, 641 &txq->elts[txq->elts_tail & txq->elts_m], 642 part, olx); 643 txq->elts_tail += part; 644 n_elts -= part; 645 } while (n_elts); 646 } 647 648 /** 649 * Store the mbuf being sent into elts ring buffer. 650 * On Tx completion these mbufs will be freed. 651 * 652 * @param txq 653 * Pointer to Tx queue structure. 654 * @param pkts 655 * Pointer to array of packets to be stored. 656 * @param pkts_n 657 * Number of packets to be stored. 658 * @param olx 659 * Configured Tx offloads mask. It is fully defined at 660 * compile time and may be used for optimization. 661 */ 662 static __rte_always_inline void 663 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 664 struct rte_mbuf **__rte_restrict pkts, 665 unsigned int pkts_n, 666 unsigned int olx __rte_unused) 667 { 668 unsigned int part; 669 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 670 671 MLX5_ASSERT(pkts); 672 MLX5_ASSERT(pkts_n); 673 part = txq->elts_s - (txq->elts_head & txq->elts_m); 674 MLX5_ASSERT(part); 675 MLX5_ASSERT(part <= txq->elts_s); 676 /* This code is a good candidate for vectorizing with SIMD. */ 677 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 678 (void *)pkts, 679 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 680 txq->elts_head += pkts_n; 681 if (unlikely(part < pkts_n)) 682 /* The copy is wrapping around the elts array. */ 683 rte_memcpy((void *)elts, (void *)(pkts + part), 684 (pkts_n - part) * sizeof(struct rte_mbuf *)); 685 } 686 687 /** 688 * Check if the completion request flag should be set in the last WQE. 689 * Both pushed mbufs and WQEs are monitored and the completion request 690 * flag is set if any of thresholds is reached. 691 * 692 * @param txq 693 * Pointer to TX queue structure. 694 * @param loc 695 * Pointer to burst routine local context. 696 * @param olx 697 * Configured Tx offloads mask. It is fully defined at 698 * compile time and may be used for optimization. 699 */ 700 static __rte_always_inline void 701 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 702 struct mlx5_txq_local *__rte_restrict loc, 703 unsigned int olx) 704 { 705 uint16_t head = txq->elts_head; 706 unsigned int part; 707 708 part = MLX5_TXOFF_CONFIG(INLINE) ? 709 0 : loc->pkts_sent - loc->pkts_copy; 710 head += part; 711 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 712 (MLX5_TXOFF_CONFIG(INLINE) && 713 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 714 volatile struct mlx5_wqe *last = loc->wqe_last; 715 716 MLX5_ASSERT(last); 717 txq->elts_comp = head; 718 if (MLX5_TXOFF_CONFIG(INLINE)) 719 txq->wqe_comp = txq->wqe_ci; 720 /* Request unconditional completion on last WQE. */ 721 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 722 MLX5_COMP_MODE_OFFSET); 723 /* Save elts_head in dedicated free on completion queue. */ 724 #ifdef RTE_LIBRTE_MLX5_DEBUG 725 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 726 (last->cseg.opcode >> 8) << 16; 727 #else 728 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 729 #endif 730 /* A CQE slot must always be available. */ 731 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 732 } 733 } 734 735 /** 736 * Set completion request flag for all issued WQEs. 737 * This routine is intended to be used with enabled fast path tracing 738 * and send scheduling on time to provide the detailed report in trace 739 * for send completions on every WQE. 740 * 741 * @param txq 742 * Pointer to TX queue structure. 743 * @param loc 744 * Pointer to burst routine local context. 745 * @param olx 746 * Configured Tx offloads mask. It is fully defined at 747 * compile time and may be used for optimization. 748 */ 749 static __rte_always_inline void 750 mlx5_tx_request_completion_trace(struct mlx5_txq_data *__rte_restrict txq, 751 struct mlx5_txq_local *__rte_restrict loc, 752 unsigned int olx) 753 { 754 uint16_t head = txq->elts_comp; 755 756 while (txq->wqe_comp != txq->wqe_ci) { 757 volatile struct mlx5_wqe *wqe; 758 uint32_t wqe_n; 759 760 MLX5_ASSERT(loc->wqe_last); 761 wqe = txq->wqes + (txq->wqe_comp & txq->wqe_m); 762 if (wqe == loc->wqe_last) { 763 head = txq->elts_head; 764 head += MLX5_TXOFF_CONFIG(INLINE) ? 765 0 : loc->pkts_sent - loc->pkts_copy; 766 txq->elts_comp = head; 767 } 768 /* Completion request flag was set on cseg constructing. */ 769 #ifdef RTE_LIBRTE_MLX5_DEBUG 770 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 771 (wqe->cseg.opcode >> 8) << 16; 772 #else 773 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 774 #endif 775 /* A CQE slot must always be available. */ 776 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 777 /* Advance to the next WQE in the queue. */ 778 wqe_n = rte_be_to_cpu_32(wqe->cseg.sq_ds) & 0x3F; 779 txq->wqe_comp += RTE_ALIGN(wqe_n, 4) / 4; 780 } 781 } 782 783 /** 784 * Build the Control Segment with specified opcode: 785 * - MLX5_OPCODE_SEND 786 * - MLX5_OPCODE_ENHANCED_MPSW 787 * - MLX5_OPCODE_TSO 788 * 789 * @param txq 790 * Pointer to TX queue structure. 791 * @param loc 792 * Pointer to burst routine local context. 793 * @param wqe 794 * Pointer to WQE to fill with built Control Segment. 795 * @param ds 796 * Supposed length of WQE in segments. 797 * @param opcode 798 * SQ WQE opcode to put into Control Segment. 799 * @param olx 800 * Configured Tx offloads mask. It is fully defined at 801 * compile time and may be used for optimization. 802 */ 803 static __rte_always_inline void 804 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 805 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 806 struct mlx5_wqe *__rte_restrict wqe, 807 unsigned int ds, 808 unsigned int opcode, 809 unsigned int olx) 810 { 811 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 812 813 /* For legacy MPW replace the EMPW by TSO with modifier. */ 814 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 815 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 816 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 817 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 818 if (MLX5_TXOFF_CONFIG(TXPP) && __rte_trace_point_fp_is_enabled()) 819 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << 820 MLX5_COMP_MODE_OFFSET); 821 else 822 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 823 MLX5_COMP_MODE_OFFSET); 824 cs->misc = RTE_BE32(0); 825 if (__rte_trace_point_fp_is_enabled() && !loc->pkts_sent) 826 rte_pmd_mlx5_trace_tx_entry(txq->port_id, txq->idx); 827 rte_pmd_mlx5_trace_tx_wqe((txq->wqe_ci << 8) | opcode); 828 } 829 830 /** 831 * Build the Synchronize Queue Segment with specified completion index. 832 * 833 * @param txq 834 * Pointer to TX queue structure. 835 * @param loc 836 * Pointer to burst routine local context. 837 * @param wqe 838 * Pointer to WQE to fill with built Control Segment. 839 * @param wci 840 * Completion index in Clock Queue to wait. 841 * @param olx 842 * Configured Tx offloads mask. It is fully defined at 843 * compile time and may be used for optimization. 844 */ 845 static __rte_always_inline void 846 mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq, 847 struct mlx5_txq_local *restrict loc __rte_unused, 848 struct mlx5_wqe *restrict wqe, 849 unsigned int wci, 850 unsigned int olx __rte_unused) 851 { 852 struct mlx5_wqe_qseg *qs; 853 854 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 855 qs->max_index = rte_cpu_to_be_32(wci); 856 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 857 qs->reserved0 = RTE_BE32(0); 858 qs->reserved1 = RTE_BE32(0); 859 } 860 861 /** 862 * Build the Wait on Time Segment with specified timestamp value. 863 * 864 * @param txq 865 * Pointer to TX queue structure. 866 * @param loc 867 * Pointer to burst routine local context. 868 * @param wqe 869 * Pointer to WQE to fill with built Control Segment. 870 * @param ts 871 * Timesatmp value to wait. 872 * @param olx 873 * Configured Tx offloads mask. It is fully defined at 874 * compile time and may be used for optimization. 875 */ 876 static __rte_always_inline void 877 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 878 struct mlx5_txq_local *restrict loc __rte_unused, 879 struct mlx5_wqe *restrict wqe, 880 uint64_t ts, 881 unsigned int olx __rte_unused) 882 { 883 struct mlx5_wqe_wseg *ws; 884 885 ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 886 ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_SMALLER); 887 ws->lkey = RTE_BE32(0); 888 ws->va_high = RTE_BE32(0); 889 ws->va_low = RTE_BE32(0); 890 if (txq->rt_timestamp) { 891 ts = ts % (uint64_t)NS_PER_S 892 | (ts / (uint64_t)NS_PER_S) << 32; 893 } 894 ws->value = rte_cpu_to_be_64(ts); 895 ws->mask = txq->rt_timemask; 896 } 897 898 /** 899 * Build the Ethernet Segment without inlined data. 900 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 901 * 902 * @param txq 903 * Pointer to TX queue structure. 904 * @param loc 905 * Pointer to burst routine local context. 906 * @param wqe 907 * Pointer to WQE to fill with built Ethernet Segment. 908 * @param olx 909 * Configured Tx offloads mask. It is fully defined at 910 * compile time and may be used for optimization. 911 */ 912 static __rte_always_inline void 913 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 914 struct mlx5_txq_local *__rte_restrict loc, 915 struct mlx5_wqe *__rte_restrict wqe, 916 unsigned int olx) 917 { 918 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 919 uint32_t csum; 920 921 /* 922 * Calculate and set check sum flags first, dword field 923 * in segment may be shared with Software Parser flags. 924 */ 925 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 926 es->flags = rte_cpu_to_le_32(csum); 927 /* 928 * Calculate and set Software Parser offsets and flags. 929 * These flags a set for custom UDP and IP tunnel packets. 930 */ 931 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 932 /* Fill metadata field if needed. */ 933 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 934 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 935 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 936 0 : 0; 937 /* Engage VLAN tag insertion feature if requested. */ 938 if (MLX5_TXOFF_CONFIG(VLAN) && 939 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 940 /* 941 * We should get here only if device support 942 * this feature correctly. 943 */ 944 MLX5_ASSERT(txq->vlan_en); 945 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 946 loc->mbuf->vlan_tci); 947 } else { 948 es->inline_hdr = RTE_BE32(0); 949 } 950 } 951 952 /** 953 * Build the Ethernet Segment with minimal inlined data 954 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 955 * used to fill the gap in single WQEBB WQEs. 956 * Supports Software Parser, Checksums and VLAN 957 * insertion Tx offload features. 958 * 959 * @param txq 960 * Pointer to TX queue structure. 961 * @param loc 962 * Pointer to burst routine local context. 963 * @param wqe 964 * Pointer to WQE to fill with built Ethernet Segment. 965 * @param vlan 966 * Length of VLAN tag insertion if any. 967 * @param olx 968 * Configured Tx offloads mask. It is fully defined at 969 * compile time and may be used for optimization. 970 */ 971 static __rte_always_inline void 972 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 973 struct mlx5_txq_local *__rte_restrict loc, 974 struct mlx5_wqe *__rte_restrict wqe, 975 unsigned int vlan, 976 unsigned int olx) 977 { 978 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 979 uint32_t csum; 980 uint8_t *psrc, *pdst; 981 982 /* 983 * Calculate and set check sum flags first, dword field 984 * in segment may be shared with Software Parser flags. 985 */ 986 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 987 es->flags = rte_cpu_to_le_32(csum); 988 /* 989 * Calculate and set Software Parser offsets and flags. 990 * These flags a set for custom UDP and IP tunnel packets. 991 */ 992 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 993 /* Fill metadata field if needed. */ 994 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 995 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 996 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 997 0 : 0; 998 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 999 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 1000 es->inline_data = *(unaligned_uint16_t *)psrc; 1001 psrc += sizeof(uint16_t); 1002 pdst = (uint8_t *)(es + 1); 1003 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1004 /* Implement VLAN tag insertion as part inline data. */ 1005 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1006 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1007 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1008 /* Insert VLAN ethertype + VLAN tag. */ 1009 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1010 ((RTE_ETHER_TYPE_VLAN << 16) | 1011 loc->mbuf->vlan_tci); 1012 pdst += sizeof(struct rte_vlan_hdr); 1013 /* Copy the rest two bytes from packet data. */ 1014 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1015 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1016 } else { 1017 /* Fill the gap in the title WQEBB with inline data. */ 1018 rte_mov16(pdst, psrc); 1019 } 1020 } 1021 1022 /** 1023 * Build the Ethernet Segment with entire packet data inlining. Checks the 1024 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 1025 * Checksums and VLAN insertion Tx offload features. 1026 * 1027 * @param txq 1028 * Pointer to TX queue structure. 1029 * @param loc 1030 * Pointer to burst routine local context. 1031 * @param wqe 1032 * Pointer to WQE to fill with built Ethernet Segment. 1033 * @param vlan 1034 * Length of VLAN tag insertion if any. 1035 * @param inlen 1036 * Length of data to inline (VLAN included, if any). 1037 * @param tso 1038 * TSO flag, set mss field from the packet. 1039 * @param olx 1040 * Configured Tx offloads mask. It is fully defined at 1041 * compile time and may be used for optimization. 1042 * 1043 * @return 1044 * Pointer to the next Data Segment (aligned and wrapped around). 1045 */ 1046 static __rte_always_inline struct mlx5_wqe_dseg * 1047 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 1048 struct mlx5_txq_local *__rte_restrict loc, 1049 struct mlx5_wqe *__rte_restrict wqe, 1050 unsigned int vlan, 1051 unsigned int inlen, 1052 unsigned int tso, 1053 unsigned int olx) 1054 { 1055 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1056 uint32_t csum; 1057 uint8_t *psrc, *pdst; 1058 unsigned int part; 1059 1060 /* 1061 * Calculate and set check sum flags first, dword field 1062 * in segment may be shared with Software Parser flags. 1063 */ 1064 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1065 if (tso) { 1066 csum <<= 24; 1067 csum |= loc->mbuf->tso_segsz; 1068 es->flags = rte_cpu_to_be_32(csum); 1069 } else { 1070 es->flags = rte_cpu_to_le_32(csum); 1071 } 1072 /* 1073 * Calculate and set Software Parser offsets and flags. 1074 * These flags a set for custom UDP and IP tunnel packets. 1075 */ 1076 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1077 /* Fill metadata field if needed. */ 1078 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1079 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1080 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1081 0 : 0; 1082 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1083 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 1084 es->inline_data = *(unaligned_uint16_t *)psrc; 1085 psrc += sizeof(uint16_t); 1086 pdst = (uint8_t *)(es + 1); 1087 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1088 /* Implement VLAN tag insertion as part inline data. */ 1089 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1090 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1091 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1092 /* Insert VLAN ethertype + VLAN tag. */ 1093 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1094 ((RTE_ETHER_TYPE_VLAN << 16) | 1095 loc->mbuf->vlan_tci); 1096 pdst += sizeof(struct rte_vlan_hdr); 1097 /* Copy the rest two bytes from packet data. */ 1098 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1099 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1100 psrc += sizeof(uint16_t); 1101 } else { 1102 /* Fill the gap in the title WQEBB with inline data. */ 1103 rte_mov16(pdst, psrc); 1104 psrc += sizeof(rte_v128u32_t); 1105 } 1106 pdst = (uint8_t *)(es + 2); 1107 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1108 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1109 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1110 if (!inlen) { 1111 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1112 return (struct mlx5_wqe_dseg *)pdst; 1113 } 1114 /* 1115 * The WQEBB space availability is checked by caller. 1116 * Here we should be aware of WQE ring buffer wraparound only. 1117 */ 1118 part = (uint8_t *)txq->wqes_end - pdst; 1119 part = RTE_MIN(part, inlen); 1120 do { 1121 rte_memcpy(pdst, psrc, part); 1122 inlen -= part; 1123 if (likely(!inlen)) { 1124 /* 1125 * If return value is not used by the caller 1126 * the code below will be optimized out. 1127 */ 1128 pdst += part; 1129 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1130 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1131 pdst = (uint8_t *)txq->wqes; 1132 return (struct mlx5_wqe_dseg *)pdst; 1133 } 1134 pdst = (uint8_t *)txq->wqes; 1135 psrc += part; 1136 part = inlen; 1137 } while (true); 1138 } 1139 1140 /** 1141 * Copy data from chain of mbuf to the specified linear buffer. 1142 * Checksums and VLAN insertion Tx offload features. If data 1143 * from some mbuf copied completely this mbuf is freed. Local 1144 * structure is used to keep the byte stream state. 1145 * 1146 * @param pdst 1147 * Pointer to the destination linear buffer. 1148 * @param loc 1149 * Pointer to burst routine local context. 1150 * @param len 1151 * Length of data to be copied. 1152 * @param must 1153 * Length of data to be copied ignoring no inline hint. 1154 * @param olx 1155 * Configured Tx offloads mask. It is fully defined at 1156 * compile time and may be used for optimization. 1157 * 1158 * @return 1159 * Number of actual copied data bytes. This is always greater than or 1160 * equal to must parameter and might be lesser than len in no inline 1161 * hint flag is encountered. 1162 */ 1163 static __rte_always_inline unsigned int 1164 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1165 struct mlx5_txq_local *__rte_restrict loc, 1166 unsigned int len, 1167 unsigned int must, 1168 unsigned int olx __rte_unused) 1169 { 1170 struct rte_mbuf *mbuf; 1171 unsigned int part, dlen, copy = 0; 1172 uint8_t *psrc; 1173 1174 MLX5_ASSERT(len); 1175 do { 1176 /* Allow zero length packets, must check first. */ 1177 dlen = rte_pktmbuf_data_len(loc->mbuf); 1178 if (dlen <= loc->mbuf_off) { 1179 /* Exhausted packet, just free. */ 1180 mbuf = loc->mbuf; 1181 loc->mbuf = mbuf->next; 1182 rte_pktmbuf_free_seg(mbuf); 1183 loc->mbuf_off = 0; 1184 MLX5_ASSERT(loc->mbuf_nseg > 1); 1185 MLX5_ASSERT(loc->mbuf); 1186 --loc->mbuf_nseg; 1187 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1188 unsigned int diff; 1189 1190 if (copy >= must) { 1191 /* 1192 * We already copied the minimal 1193 * requested amount of data. 1194 */ 1195 return copy; 1196 } 1197 diff = must - copy; 1198 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1199 /* 1200 * Copy only the minimal required 1201 * part of the data buffer. Limit amount 1202 * of data to be copied to the length of 1203 * available space. 1204 */ 1205 len = RTE_MIN(len, diff); 1206 } 1207 } 1208 continue; 1209 } 1210 dlen -= loc->mbuf_off; 1211 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1212 loc->mbuf_off); 1213 part = RTE_MIN(len, dlen); 1214 rte_memcpy(pdst, psrc, part); 1215 copy += part; 1216 loc->mbuf_off += part; 1217 len -= part; 1218 if (!len) { 1219 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1220 loc->mbuf_off = 0; 1221 /* Exhausted packet, just free. */ 1222 mbuf = loc->mbuf; 1223 loc->mbuf = mbuf->next; 1224 rte_pktmbuf_free_seg(mbuf); 1225 loc->mbuf_off = 0; 1226 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1227 --loc->mbuf_nseg; 1228 } 1229 return copy; 1230 } 1231 pdst += part; 1232 } while (true); 1233 } 1234 1235 /** 1236 * Build the Ethernet Segment with inlined data from multi-segment packet. 1237 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1238 * Parser, Checksums and VLAN insertion Tx offload features. 1239 * 1240 * @param txq 1241 * Pointer to TX queue structure. 1242 * @param loc 1243 * Pointer to burst routine local context. 1244 * @param wqe 1245 * Pointer to WQE to fill with built Ethernet Segment. 1246 * @param vlan 1247 * Length of VLAN tag insertion if any. 1248 * @param inlen 1249 * Length of data to inline (VLAN included, if any). 1250 * @param tso 1251 * TSO flag, set mss field from the packet. 1252 * @param olx 1253 * Configured Tx offloads mask. It is fully defined at 1254 * compile time and may be used for optimization. 1255 * 1256 * @return 1257 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1258 * around - caller should do wrapping check on its own). 1259 */ 1260 static __rte_always_inline struct mlx5_wqe_dseg * 1261 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1262 struct mlx5_txq_local *__rte_restrict loc, 1263 struct mlx5_wqe *__rte_restrict wqe, 1264 unsigned int vlan, 1265 unsigned int inlen, 1266 unsigned int tso, 1267 unsigned int olx) 1268 { 1269 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1270 uint32_t csum; 1271 uint8_t *pdst; 1272 unsigned int part, tlen = 0; 1273 1274 /* 1275 * Calculate and set check sum flags first, uint32_t field 1276 * in segment may be shared with Software Parser flags. 1277 */ 1278 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1279 if (tso) { 1280 csum <<= 24; 1281 csum |= loc->mbuf->tso_segsz; 1282 es->flags = rte_cpu_to_be_32(csum); 1283 } else { 1284 es->flags = rte_cpu_to_le_32(csum); 1285 } 1286 /* 1287 * Calculate and set Software Parser offsets and flags. 1288 * These flags a set for custom UDP and IP tunnel packets. 1289 */ 1290 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1291 /* Fill metadata field if needed. */ 1292 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1293 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1294 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1295 0 : 0; 1296 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1297 pdst = (uint8_t *)&es->inline_data; 1298 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1299 /* Implement VLAN tag insertion as part inline data. */ 1300 mlx5_tx_mseg_memcpy(pdst, loc, 1301 2 * RTE_ETHER_ADDR_LEN, 1302 2 * RTE_ETHER_ADDR_LEN, olx); 1303 pdst += 2 * RTE_ETHER_ADDR_LEN; 1304 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1305 ((RTE_ETHER_TYPE_VLAN << 16) | 1306 loc->mbuf->vlan_tci); 1307 pdst += sizeof(struct rte_vlan_hdr); 1308 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1309 } 1310 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1311 /* 1312 * The WQEBB space availability is checked by caller. 1313 * Here we should be aware of WQE ring buffer wraparound only. 1314 */ 1315 part = (uint8_t *)txq->wqes_end - pdst; 1316 part = RTE_MIN(part, inlen - tlen); 1317 MLX5_ASSERT(part); 1318 do { 1319 unsigned int copy; 1320 1321 /* 1322 * Copying may be interrupted inside the routine 1323 * if run into no inline hint flag. 1324 */ 1325 copy = tso ? inlen : txq->inlen_mode; 1326 copy = tlen >= copy ? 0 : (copy - tlen); 1327 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1328 tlen += copy; 1329 if (likely(inlen <= tlen) || copy < part) { 1330 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1331 pdst += copy; 1332 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1333 return (struct mlx5_wqe_dseg *)pdst; 1334 } 1335 pdst = (uint8_t *)txq->wqes; 1336 part = inlen - tlen; 1337 } while (true); 1338 } 1339 1340 /** 1341 * Build the Data Segment of pointer type. 1342 * 1343 * @param txq 1344 * Pointer to TX queue structure. 1345 * @param loc 1346 * Pointer to burst routine local context. 1347 * @param dseg 1348 * Pointer to WQE to fill with built Data Segment. 1349 * @param buf 1350 * Data buffer to point. 1351 * @param len 1352 * Data buffer length. 1353 * @param olx 1354 * Configured Tx offloads mask. It is fully defined at 1355 * compile time and may be used for optimization. 1356 */ 1357 static __rte_always_inline void 1358 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1359 struct mlx5_txq_local *__rte_restrict loc, 1360 struct mlx5_wqe_dseg *__rte_restrict dseg, 1361 uint8_t *buf, 1362 unsigned int len, 1363 unsigned int olx __rte_unused) 1364 1365 { 1366 MLX5_ASSERT(len); 1367 dseg->bcount = rte_cpu_to_be_32(len); 1368 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1369 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1370 } 1371 1372 /** 1373 * Build the Data Segment of pointer type or inline if data length is less than 1374 * buffer in minimal Data Segment size. 1375 * 1376 * @param txq 1377 * Pointer to TX queue structure. 1378 * @param loc 1379 * Pointer to burst routine local context. 1380 * @param dseg 1381 * Pointer to WQE to fill with built Data Segment. 1382 * @param buf 1383 * Data buffer to point. 1384 * @param len 1385 * Data buffer length. 1386 * @param olx 1387 * Configured Tx offloads mask. It is fully defined at 1388 * compile time and may be used for optimization. 1389 */ 1390 static __rte_always_inline void 1391 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1392 struct mlx5_txq_local *__rte_restrict loc, 1393 struct mlx5_wqe_dseg *__rte_restrict dseg, 1394 uint8_t *buf, 1395 unsigned int len, 1396 unsigned int olx __rte_unused) 1397 1398 { 1399 uintptr_t dst, src; 1400 1401 MLX5_ASSERT(len); 1402 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1403 dseg->bcount = rte_cpu_to_be_32(len); 1404 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1405 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1406 1407 return; 1408 } 1409 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1410 /* Unrolled implementation of generic rte_memcpy. */ 1411 dst = (uintptr_t)&dseg->inline_data[0]; 1412 src = (uintptr_t)buf; 1413 if (len & 0x08) { 1414 #ifdef RTE_ARCH_STRICT_ALIGN 1415 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1416 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1417 dst += sizeof(uint32_t); 1418 src += sizeof(uint32_t); 1419 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1420 dst += sizeof(uint32_t); 1421 src += sizeof(uint32_t); 1422 #else 1423 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1424 dst += sizeof(uint64_t); 1425 src += sizeof(uint64_t); 1426 #endif 1427 } 1428 if (len & 0x04) { 1429 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1430 dst += sizeof(uint32_t); 1431 src += sizeof(uint32_t); 1432 } 1433 if (len & 0x02) { 1434 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1435 dst += sizeof(uint16_t); 1436 src += sizeof(uint16_t); 1437 } 1438 if (len & 0x01) 1439 *(uint8_t *)dst = *(uint8_t *)src; 1440 } 1441 1442 /** 1443 * Build the Data Segment of inlined data from single 1444 * segment packet, no VLAN insertion. 1445 * 1446 * @param txq 1447 * Pointer to TX queue structure. 1448 * @param loc 1449 * Pointer to burst routine local context. 1450 * @param dseg 1451 * Pointer to WQE to fill with built Data Segment. 1452 * @param buf 1453 * Data buffer to point. 1454 * @param len 1455 * Data buffer length. 1456 * @param olx 1457 * Configured Tx offloads mask. It is fully defined at 1458 * compile time and may be used for optimization. 1459 * 1460 * @return 1461 * Pointer to the next Data Segment after inlined data. 1462 * Ring buffer wraparound check is needed. We do not do it here because it 1463 * may not be needed for the last packet in the eMPW session. 1464 */ 1465 static __rte_always_inline struct mlx5_wqe_dseg * 1466 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1467 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1468 struct mlx5_wqe_dseg *__rte_restrict dseg, 1469 uint8_t *buf, 1470 unsigned int len, 1471 unsigned int olx __rte_unused) 1472 { 1473 unsigned int part; 1474 uint8_t *pdst; 1475 1476 if (!MLX5_TXOFF_CONFIG(MPW)) { 1477 /* Store the descriptor byte counter for eMPW sessions. */ 1478 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1479 pdst = &dseg->inline_data[0]; 1480 } else { 1481 /* The entire legacy MPW session counter is stored on close. */ 1482 pdst = (uint8_t *)dseg; 1483 } 1484 /* 1485 * The WQEBB space availability is checked by caller. 1486 * Here we should be aware of WQE ring buffer wraparound only. 1487 */ 1488 part = (uint8_t *)txq->wqes_end - pdst; 1489 part = RTE_MIN(part, len); 1490 do { 1491 rte_memcpy(pdst, buf, part); 1492 len -= part; 1493 if (likely(!len)) { 1494 pdst += part; 1495 if (!MLX5_TXOFF_CONFIG(MPW)) 1496 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1497 /* Note: no final wraparound check here. */ 1498 return (struct mlx5_wqe_dseg *)pdst; 1499 } 1500 pdst = (uint8_t *)txq->wqes; 1501 buf += part; 1502 part = len; 1503 } while (true); 1504 } 1505 1506 /** 1507 * Build the Data Segment of inlined data from single 1508 * segment packet with VLAN insertion. 1509 * 1510 * @param txq 1511 * Pointer to TX queue structure. 1512 * @param loc 1513 * Pointer to burst routine local context. 1514 * @param dseg 1515 * Pointer to the dseg fill with built Data Segment. 1516 * @param buf 1517 * Data buffer to point. 1518 * @param len 1519 * Data buffer length. 1520 * @param olx 1521 * Configured Tx offloads mask. It is fully defined at 1522 * compile time and may be used for optimization. 1523 * 1524 * @return 1525 * Pointer to the next Data Segment after inlined data. 1526 * Ring buffer wraparound check is needed. 1527 */ 1528 static __rte_always_inline struct mlx5_wqe_dseg * 1529 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1530 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1531 struct mlx5_wqe_dseg *__rte_restrict dseg, 1532 uint8_t *buf, 1533 unsigned int len, 1534 unsigned int olx __rte_unused) 1535 1536 { 1537 unsigned int part; 1538 uint8_t *pdst; 1539 1540 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1541 if (!MLX5_TXOFF_CONFIG(MPW)) { 1542 /* Store the descriptor byte counter for eMPW sessions. */ 1543 dseg->bcount = rte_cpu_to_be_32 1544 ((len + sizeof(struct rte_vlan_hdr)) | 1545 MLX5_ETH_WQE_DATA_INLINE); 1546 pdst = &dseg->inline_data[0]; 1547 } else { 1548 /* The entire legacy MPW session counter is stored on close. */ 1549 pdst = (uint8_t *)dseg; 1550 } 1551 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1552 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1553 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1554 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1555 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1556 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1557 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1558 pdst = (uint8_t *)txq->wqes; 1559 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1560 loc->mbuf->vlan_tci); 1561 pdst += sizeof(struct rte_vlan_hdr); 1562 /* 1563 * The WQEBB space availability is checked by caller. 1564 * Here we should be aware of WQE ring buffer wraparound only. 1565 */ 1566 part = (uint8_t *)txq->wqes_end - pdst; 1567 part = RTE_MIN(part, len); 1568 do { 1569 rte_memcpy(pdst, buf, part); 1570 len -= part; 1571 if (likely(!len)) { 1572 pdst += part; 1573 if (!MLX5_TXOFF_CONFIG(MPW)) 1574 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1575 /* Note: no final wraparound check here. */ 1576 return (struct mlx5_wqe_dseg *)pdst; 1577 } 1578 pdst = (uint8_t *)txq->wqes; 1579 buf += part; 1580 part = len; 1581 } while (true); 1582 } 1583 1584 /** 1585 * Build the Ethernet Segment with optionally inlined data with 1586 * VLAN insertion and following Data Segments (if any) from 1587 * multi-segment packet. Used by ordinary send and TSO. 1588 * 1589 * @param txq 1590 * Pointer to TX queue structure. 1591 * @param loc 1592 * Pointer to burst routine local context. 1593 * @param wqe 1594 * Pointer to WQE to fill with built Ethernet/Data Segments. 1595 * @param vlan 1596 * Length of VLAN header to insert, 0 means no VLAN insertion. 1597 * @param inlen 1598 * Data length to inline. For TSO this parameter specifies exact value, 1599 * for ordinary send routine can be aligned by caller to provide better WQE 1600 * space saving and data buffer start address alignment. 1601 * This length includes VLAN header being inserted. 1602 * @param tso 1603 * Zero means ordinary send, inlined data can be extended, 1604 * otherwise this is TSO, inlined data length is fixed. 1605 * @param olx 1606 * Configured Tx offloads mask. It is fully defined at 1607 * compile time and may be used for optimization. 1608 * 1609 * @return 1610 * Actual size of built WQE in segments. 1611 */ 1612 static __rte_always_inline unsigned int 1613 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1614 struct mlx5_txq_local *__rte_restrict loc, 1615 struct mlx5_wqe *__rte_restrict wqe, 1616 unsigned int vlan, 1617 unsigned int inlen, 1618 unsigned int tso, 1619 unsigned int olx __rte_unused) 1620 { 1621 struct mlx5_wqe_dseg *__rte_restrict dseg; 1622 unsigned int ds; 1623 1624 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1625 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1626 loc->mbuf_off = 0; 1627 1628 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1629 if (!loc->mbuf_nseg) 1630 goto dseg_done; 1631 /* 1632 * There are still some mbuf remaining, not inlined. 1633 * The first mbuf may be partially inlined and we 1634 * must process the possible non-zero data offset. 1635 */ 1636 if (loc->mbuf_off) { 1637 unsigned int dlen; 1638 uint8_t *dptr; 1639 1640 /* 1641 * Exhausted packets must be dropped before. 1642 * Non-zero offset means there are some data 1643 * remained in the packet. 1644 */ 1645 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1646 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1647 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1648 loc->mbuf_off); 1649 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1650 /* 1651 * Build the pointer/minimal Data Segment. 1652 * Do ring buffer wrapping check in advance. 1653 */ 1654 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1655 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1656 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1657 /* Store the mbuf to be freed on completion. */ 1658 MLX5_ASSERT(loc->elts_free); 1659 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1660 --loc->elts_free; 1661 ++dseg; 1662 if (--loc->mbuf_nseg == 0) 1663 goto dseg_done; 1664 loc->mbuf = loc->mbuf->next; 1665 loc->mbuf_off = 0; 1666 } 1667 do { 1668 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1669 struct rte_mbuf *mbuf; 1670 1671 /* Zero length segment found, just skip. */ 1672 mbuf = loc->mbuf; 1673 loc->mbuf = loc->mbuf->next; 1674 rte_pktmbuf_free_seg(mbuf); 1675 if (--loc->mbuf_nseg == 0) 1676 break; 1677 } else { 1678 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1679 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1680 mlx5_tx_dseg_iptr 1681 (txq, loc, dseg, 1682 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1683 rte_pktmbuf_data_len(loc->mbuf), olx); 1684 MLX5_ASSERT(loc->elts_free); 1685 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1686 --loc->elts_free; 1687 ++dseg; 1688 if (--loc->mbuf_nseg == 0) 1689 break; 1690 loc->mbuf = loc->mbuf->next; 1691 } 1692 } while (true); 1693 1694 dseg_done: 1695 /* Calculate actual segments used from the dseg pointer. */ 1696 if ((uintptr_t)wqe < (uintptr_t)dseg) 1697 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1698 else 1699 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1700 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1701 return ds; 1702 } 1703 1704 /** 1705 * The routine checks timestamp flag in the current packet, 1706 * and push WAIT WQE into the queue if scheduling is required. 1707 * 1708 * @param txq 1709 * Pointer to TX queue structure. 1710 * @param loc 1711 * Pointer to burst routine local context. 1712 * @param elts 1713 * Number of free elements in elts buffer to be checked, for zero 1714 * value the check is optimized out by compiler. 1715 * @param olx 1716 * Configured Tx offloads mask. It is fully defined at 1717 * compile time and may be used for optimization. 1718 * 1719 * @return 1720 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1721 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1722 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1723 * Local context variables partially updated. 1724 */ 1725 static __rte_always_inline enum mlx5_txcmp_code 1726 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1727 struct mlx5_txq_local *restrict loc, 1728 uint16_t elts, 1729 unsigned int olx) 1730 { 1731 if (MLX5_TXOFF_CONFIG(TXPP) && 1732 loc->mbuf->ol_flags & txq->ts_mask) { 1733 struct mlx5_dev_ctx_shared *sh; 1734 struct mlx5_wqe *wqe; 1735 uint64_t ts; 1736 1737 /* 1738 * Estimate the required space quickly and roughly. 1739 * We would like to ensure the packet can be pushed 1740 * to the queue and we won't get the orphan WAIT WQE. 1741 */ 1742 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1743 loc->elts_free < elts) 1744 return MLX5_TXCMP_CODE_EXIT; 1745 /* Convert the timestamp into completion to wait. */ 1746 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1747 if (txq->ts_last && ts < txq->ts_last) 1748 rte_atomic_fetch_add_explicit(&txq->sh->txpp.err_ts_order, 1749 1, rte_memory_order_relaxed); 1750 txq->ts_last = ts; 1751 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1752 sh = txq->sh; 1753 if (txq->wait_on_time) { 1754 /* The wait on time capability should be used. */ 1755 ts -= sh->txpp.skew; 1756 rte_pmd_mlx5_trace_tx_wait(ts); 1757 mlx5_tx_cseg_init(txq, loc, wqe, 1758 1 + sizeof(struct mlx5_wqe_wseg) / 1759 MLX5_WSEG_SIZE, 1760 MLX5_OPCODE_WAIT | 1761 MLX5_OPC_MOD_WAIT_TIME << 24, olx); 1762 mlx5_tx_wseg_init(txq, loc, wqe, ts, olx); 1763 } else { 1764 /* Legacy cross-channel operation should be used. */ 1765 int32_t wci; 1766 1767 wci = mlx5_txpp_convert_tx_ts(sh, ts); 1768 if (unlikely(wci < 0)) 1769 return MLX5_TXCMP_CODE_SINGLE; 1770 /* Build the WAIT WQE with specified completion. */ 1771 rte_pmd_mlx5_trace_tx_wait(ts - sh->txpp.skew); 1772 mlx5_tx_cseg_init(txq, loc, wqe, 1773 1 + sizeof(struct mlx5_wqe_qseg) / 1774 MLX5_WSEG_SIZE, 1775 MLX5_OPCODE_WAIT | 1776 MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx); 1777 mlx5_tx_qseg_init(txq, loc, wqe, wci, olx); 1778 } 1779 ++txq->wqe_ci; 1780 --loc->wqe_free; 1781 return MLX5_TXCMP_CODE_MULTI; 1782 } 1783 return MLX5_TXCMP_CODE_SINGLE; 1784 } 1785 1786 /** 1787 * Tx one packet function for multi-segment TSO. Supports all 1788 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1789 * sends one packet per WQE. 1790 * 1791 * This routine is responsible for storing processed mbuf 1792 * into elts ring buffer and update elts_head. 1793 * 1794 * @param txq 1795 * Pointer to TX queue structure. 1796 * @param loc 1797 * Pointer to burst routine local context. 1798 * @param olx 1799 * Configured Tx offloads mask. It is fully defined at 1800 * compile time and may be used for optimization. 1801 * 1802 * @return 1803 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1804 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1805 * Local context variables partially updated. 1806 */ 1807 static __rte_always_inline enum mlx5_txcmp_code 1808 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1809 struct mlx5_txq_local *__rte_restrict loc, 1810 unsigned int olx) 1811 { 1812 struct mlx5_wqe *__rte_restrict wqe; 1813 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1814 1815 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1816 if (MLX5_TXOFF_CONFIG(TXPP)) { 1817 enum mlx5_txcmp_code wret; 1818 1819 /* Generate WAIT for scheduling if requested. */ 1820 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1821 if (wret == MLX5_TXCMP_CODE_EXIT) 1822 return MLX5_TXCMP_CODE_EXIT; 1823 if (wret == MLX5_TXCMP_CODE_ERROR) 1824 return MLX5_TXCMP_CODE_ERROR; 1825 } 1826 /* 1827 * Calculate data length to be inlined to estimate 1828 * the required space in WQE ring buffer. 1829 */ 1830 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1831 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1832 vlan = sizeof(struct rte_vlan_hdr); 1833 inlen = loc->mbuf->l2_len + vlan + 1834 loc->mbuf->l3_len + loc->mbuf->l4_len; 1835 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1836 return MLX5_TXCMP_CODE_ERROR; 1837 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1838 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1839 /* Packet must contain all TSO headers. */ 1840 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1841 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1842 inlen > (dlen + vlan))) 1843 return MLX5_TXCMP_CODE_ERROR; 1844 /* 1845 * Check whether there are enough free WQEBBs: 1846 * - Control Segment 1847 * - Ethernet Segment 1848 * - First Segment of inlined Ethernet data 1849 * - ... data continued ... 1850 * - Data Segments of pointer/min inline type 1851 */ 1852 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1853 MLX5_ESEG_MIN_INLINE_SIZE + 1854 MLX5_WSEG_SIZE + 1855 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1856 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1857 return MLX5_TXCMP_CODE_EXIT; 1858 /* Check for maximal WQE size. */ 1859 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1860 return MLX5_TXCMP_CODE_ERROR; 1861 #ifdef MLX5_PMD_SOFT_COUNTERS 1862 /* Update sent data bytes/packets counters. */ 1863 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1864 loc->mbuf->tso_segsz; 1865 /* 1866 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1867 * from loc->pkts_sent field. 1868 */ 1869 --ntcp; 1870 txq->stats.opackets += ntcp; 1871 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1872 #endif 1873 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1874 loc->wqe_last = wqe; 1875 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1876 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 1877 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1878 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1879 txq->wqe_ci += (ds + 3) / 4; 1880 loc->wqe_free -= (ds + 3) / 4; 1881 return MLX5_TXCMP_CODE_MULTI; 1882 } 1883 1884 /** 1885 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1886 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1887 * without any data inlining in Ethernet Segment. 1888 * 1889 * This routine is responsible for storing processed mbuf 1890 * into elts ring buffer and update elts_head. 1891 * 1892 * @param txq 1893 * Pointer to TX queue structure. 1894 * @param loc 1895 * Pointer to burst routine local context. 1896 * @param olx 1897 * Configured Tx offloads mask. It is fully defined at 1898 * compile time and may be used for optimization. 1899 * 1900 * @return 1901 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1902 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1903 * Local context variables partially updated. 1904 */ 1905 static __rte_always_inline enum mlx5_txcmp_code 1906 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1907 struct mlx5_txq_local *__rte_restrict loc, 1908 unsigned int olx) 1909 { 1910 struct mlx5_wqe_dseg *__rte_restrict dseg; 1911 struct mlx5_wqe *__rte_restrict wqe; 1912 unsigned int ds, nseg; 1913 1914 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1915 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1916 if (MLX5_TXOFF_CONFIG(TXPP)) { 1917 enum mlx5_txcmp_code wret; 1918 1919 /* Generate WAIT for scheduling if requested. */ 1920 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1921 if (wret == MLX5_TXCMP_CODE_EXIT) 1922 return MLX5_TXCMP_CODE_EXIT; 1923 if (wret == MLX5_TXCMP_CODE_ERROR) 1924 return MLX5_TXCMP_CODE_ERROR; 1925 } 1926 /* 1927 * No inline at all, it means the CPU cycles saving is prioritized at 1928 * configuration, we should not copy any packet data to WQE. 1929 */ 1930 nseg = NB_SEGS(loc->mbuf); 1931 ds = 2 + nseg; 1932 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1933 return MLX5_TXCMP_CODE_EXIT; 1934 /* Check for maximal WQE size. */ 1935 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1936 return MLX5_TXCMP_CODE_ERROR; 1937 /* 1938 * Some Tx offloads may cause an error if packet is not long enough, 1939 * check against assumed minimal length. 1940 */ 1941 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1942 return MLX5_TXCMP_CODE_ERROR; 1943 #ifdef MLX5_PMD_SOFT_COUNTERS 1944 /* Update sent data bytes counter. */ 1945 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1946 if (MLX5_TXOFF_CONFIG(VLAN) && 1947 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1948 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1949 #endif 1950 /* 1951 * SEND WQE, one WQEBB: 1952 * - Control Segment, SEND opcode 1953 * - Ethernet Segment, optional VLAN, no inline 1954 * - Data Segments, pointer only type 1955 */ 1956 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1957 loc->wqe_last = wqe; 1958 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1959 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 1960 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1961 dseg = &wqe->dseg[0]; 1962 do { 1963 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1964 struct rte_mbuf *mbuf; 1965 1966 /* 1967 * Zero length segment found, have to correct total 1968 * size of WQE in segments. 1969 * It is supposed to be rare occasion, so in normal 1970 * case (no zero length segments) we avoid extra 1971 * writing to the Control Segment. 1972 */ 1973 --ds; 1974 wqe->cseg.sq_ds -= RTE_BE32(1); 1975 mbuf = loc->mbuf; 1976 loc->mbuf = mbuf->next; 1977 rte_pktmbuf_free_seg(mbuf); 1978 if (--nseg == 0) 1979 break; 1980 } else { 1981 mlx5_tx_dseg_ptr 1982 (txq, loc, dseg, 1983 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1984 rte_pktmbuf_data_len(loc->mbuf), olx); 1985 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1986 --loc->elts_free; 1987 if (--nseg == 0) 1988 break; 1989 ++dseg; 1990 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1991 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1992 loc->mbuf = loc->mbuf->next; 1993 } 1994 } while (true); 1995 txq->wqe_ci += (ds + 3) / 4; 1996 loc->wqe_free -= (ds + 3) / 4; 1997 return MLX5_TXCMP_CODE_MULTI; 1998 } 1999 2000 /** 2001 * Tx one packet function for multi-segment SEND. Supports all 2002 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 2003 * sends one packet per WQE, with data inlining in 2004 * Ethernet Segment and minimal Data Segments. 2005 * 2006 * This routine is responsible for storing processed mbuf 2007 * into elts ring buffer and update elts_head. 2008 * 2009 * @param txq 2010 * Pointer to TX queue structure. 2011 * @param loc 2012 * Pointer to burst routine local context. 2013 * @param olx 2014 * Configured Tx offloads mask. It is fully defined at 2015 * compile time and may be used for optimization. 2016 * 2017 * @return 2018 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2019 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2020 * Local context variables partially updated. 2021 */ 2022 static __rte_always_inline enum mlx5_txcmp_code 2023 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 2024 struct mlx5_txq_local *__rte_restrict loc, 2025 unsigned int olx) 2026 { 2027 struct mlx5_wqe *__rte_restrict wqe; 2028 unsigned int ds, inlen, dlen, vlan = 0; 2029 2030 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2031 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2032 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 2033 /* 2034 * First calculate data length to be inlined 2035 * to estimate the required space for WQE. 2036 */ 2037 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 2038 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 2039 vlan = sizeof(struct rte_vlan_hdr); 2040 inlen = dlen + vlan; 2041 /* Check against minimal length. */ 2042 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 2043 return MLX5_TXCMP_CODE_ERROR; 2044 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 2045 if (inlen > txq->inlen_send || 2046 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 2047 struct rte_mbuf *mbuf; 2048 unsigned int nxlen; 2049 uintptr_t start; 2050 2051 mbuf = loc->mbuf; 2052 nxlen = rte_pktmbuf_data_len(mbuf) + vlan; 2053 /* 2054 * Packet length exceeds the allowed inline data length, 2055 * check whether the minimal inlining is required. 2056 */ 2057 if (txq->inlen_mode) { 2058 MLX5_ASSERT(txq->inlen_mode >= 2059 MLX5_ESEG_MIN_INLINE_SIZE); 2060 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 2061 inlen = RTE_MIN(txq->inlen_mode, inlen); 2062 } else if (vlan && !txq->vlan_en) { 2063 /* 2064 * VLAN insertion is requested and hardware does not 2065 * support the offload, will do with software inline. 2066 */ 2067 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 2068 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 2069 nxlen > txq->inlen_send) { 2070 return mlx5_tx_packet_multi_send(txq, loc, olx); 2071 } else if (nxlen <= MLX5_ESEG_MIN_INLINE_SIZE) { 2072 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 2073 } else { 2074 goto do_first; 2075 } 2076 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2077 goto do_build; 2078 /* 2079 * Now we know the minimal amount of data is requested 2080 * to inline. Check whether we should inline the buffers 2081 * from the chain beginning to eliminate some mbufs. 2082 */ 2083 if (unlikely(nxlen <= txq->inlen_send)) { 2084 /* We can inline first mbuf at least. */ 2085 if (nxlen < inlen) { 2086 unsigned int smlen; 2087 2088 /* Scan mbufs till inlen filled. */ 2089 do { 2090 smlen = nxlen; 2091 mbuf = NEXT(mbuf); 2092 MLX5_ASSERT(mbuf); 2093 nxlen = rte_pktmbuf_data_len(mbuf); 2094 nxlen += smlen; 2095 } while (unlikely(nxlen < inlen)); 2096 if (unlikely(nxlen > txq->inlen_send)) { 2097 /* We cannot inline entire mbuf. */ 2098 smlen = inlen - smlen; 2099 start = rte_pktmbuf_mtod_offset 2100 (mbuf, uintptr_t, smlen); 2101 goto do_align; 2102 } 2103 } 2104 do_first: 2105 do { 2106 inlen = nxlen; 2107 mbuf = NEXT(mbuf); 2108 /* There should be not end of packet. */ 2109 MLX5_ASSERT(mbuf); 2110 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2111 break; 2112 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 2113 } while (unlikely(nxlen < txq->inlen_send)); 2114 } 2115 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 2116 /* 2117 * Check whether we can do inline to align start 2118 * address of data buffer to cacheline. 2119 */ 2120 do_align: 2121 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 2122 if (unlikely(start)) { 2123 start += inlen; 2124 if (start <= txq->inlen_send) 2125 inlen = start; 2126 } 2127 } 2128 /* 2129 * Check whether there are enough free WQEBBs: 2130 * - Control Segment 2131 * - Ethernet Segment 2132 * - First Segment of inlined Ethernet data 2133 * - ... data continued ... 2134 * - Data Segments of pointer/min inline type 2135 * 2136 * Estimate the number of Data Segments conservatively, 2137 * supposing no any mbufs is being freed during inlining. 2138 */ 2139 do_build: 2140 if (MLX5_TXOFF_CONFIG(TXPP)) { 2141 enum mlx5_txcmp_code wret; 2142 2143 /* Generate WAIT for scheduling if requested. */ 2144 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2145 if (wret == MLX5_TXCMP_CODE_EXIT) 2146 return MLX5_TXCMP_CODE_EXIT; 2147 if (wret == MLX5_TXCMP_CODE_ERROR) 2148 return MLX5_TXCMP_CODE_ERROR; 2149 } 2150 MLX5_ASSERT(inlen <= txq->inlen_send); 2151 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2152 MLX5_ESEG_MIN_INLINE_SIZE + 2153 MLX5_WSEG_SIZE + 2154 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2155 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2156 return MLX5_TXCMP_CODE_EXIT; 2157 /* Check for maximal WQE size. */ 2158 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) { 2159 /* Check if we can adjust the inline length. */ 2160 if (unlikely(txq->inlen_mode)) { 2161 ds = NB_SEGS(loc->mbuf) + 2 + 2162 (txq->inlen_mode - 2163 MLX5_ESEG_MIN_INLINE_SIZE + 2164 MLX5_WSEG_SIZE + 2165 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2166 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 2167 return MLX5_TXCMP_CODE_ERROR; 2168 } 2169 /* We have lucky opportunity to adjust. */ 2170 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX - 2171 MLX5_WSEG_SIZE * 2 - 2172 MLX5_WSEG_SIZE * NB_SEGS(loc->mbuf) - 2173 MLX5_WSEG_SIZE + 2174 MLX5_ESEG_MIN_INLINE_SIZE); 2175 } 2176 #ifdef MLX5_PMD_SOFT_COUNTERS 2177 /* Update sent data bytes/packets counters. */ 2178 txq->stats.obytes += dlen + vlan; 2179 #endif 2180 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2181 loc->wqe_last = wqe; 2182 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2183 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2184 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2185 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2186 txq->wqe_ci += (ds + 3) / 4; 2187 loc->wqe_free -= (ds + 3) / 4; 2188 return MLX5_TXCMP_CODE_MULTI; 2189 } 2190 2191 /** 2192 * Tx burst function for multi-segment packets. Supports all 2193 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2194 * sends one packet per WQE. Function stops sending if it 2195 * encounters the single-segment packet. 2196 * 2197 * This routine is responsible for storing processed mbuf 2198 * into elts ring buffer and update elts_head. 2199 * 2200 * @param txq 2201 * Pointer to TX queue structure. 2202 * @param[in] pkts 2203 * Packets to transmit. 2204 * @param pkts_n 2205 * Number of packets in array. 2206 * @param loc 2207 * Pointer to burst routine local context. 2208 * @param olx 2209 * Configured Tx offloads mask. It is fully defined at 2210 * compile time and may be used for optimization. 2211 * 2212 * @return 2213 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2214 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2215 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2216 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2217 * Local context variables updated. 2218 */ 2219 static __rte_always_inline enum mlx5_txcmp_code 2220 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2221 struct rte_mbuf **__rte_restrict pkts, 2222 unsigned int pkts_n, 2223 struct mlx5_txq_local *__rte_restrict loc, 2224 unsigned int olx) 2225 { 2226 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2227 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2228 pkts += loc->pkts_sent + 1; 2229 pkts_n -= loc->pkts_sent; 2230 for (;;) { 2231 enum mlx5_txcmp_code ret; 2232 2233 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2234 /* 2235 * Estimate the number of free elts quickly but conservatively. 2236 * Some segment may be fully inlined and freed, 2237 * ignore this here - precise estimation is costly. 2238 */ 2239 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2240 return MLX5_TXCMP_CODE_EXIT; 2241 if (MLX5_TXOFF_CONFIG(TSO) && 2242 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2243 /* Proceed with multi-segment TSO. */ 2244 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2245 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2246 /* Proceed with multi-segment SEND with inlining. */ 2247 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2248 } else { 2249 /* Proceed with multi-segment SEND w/o inlining. */ 2250 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2251 } 2252 if (ret == MLX5_TXCMP_CODE_EXIT) 2253 return MLX5_TXCMP_CODE_EXIT; 2254 if (ret == MLX5_TXCMP_CODE_ERROR) 2255 return MLX5_TXCMP_CODE_ERROR; 2256 /* WQE is built, go to the next packet. */ 2257 ++loc->pkts_sent; 2258 --pkts_n; 2259 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2260 return MLX5_TXCMP_CODE_EXIT; 2261 loc->mbuf = *pkts++; 2262 if (pkts_n > 1) 2263 rte_prefetch0(*pkts); 2264 if (likely(NB_SEGS(loc->mbuf) > 1)) 2265 continue; 2266 /* Here ends the series of multi-segment packets. */ 2267 if (MLX5_TXOFF_CONFIG(TSO) && 2268 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2269 return MLX5_TXCMP_CODE_TSO; 2270 return MLX5_TXCMP_CODE_SINGLE; 2271 } 2272 MLX5_ASSERT(false); 2273 } 2274 2275 /** 2276 * Tx burst function for single-segment packets with TSO. 2277 * Supports all types of Tx offloads, except multi-packets. 2278 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2279 * Function stops sending if it encounters the multi-segment 2280 * packet or packet without TSO requested. 2281 * 2282 * The routine is responsible for storing processed mbuf into elts ring buffer 2283 * and update elts_head if inline offloads is requested due to possible early 2284 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2285 * 2286 * @param txq 2287 * Pointer to TX queue structure. 2288 * @param[in] pkts 2289 * Packets to transmit. 2290 * @param pkts_n 2291 * Number of packets in array. 2292 * @param loc 2293 * Pointer to burst routine local context. 2294 * @param olx 2295 * Configured Tx offloads mask. It is fully defined at 2296 * compile time and may be used for optimization. 2297 * 2298 * @return 2299 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2300 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2301 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2302 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2303 * Local context variables updated. 2304 */ 2305 static __rte_always_inline enum mlx5_txcmp_code 2306 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2307 struct rte_mbuf **__rte_restrict pkts, 2308 unsigned int pkts_n, 2309 struct mlx5_txq_local *__rte_restrict loc, 2310 unsigned int olx) 2311 { 2312 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2313 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2314 pkts += loc->pkts_sent + 1; 2315 pkts_n -= loc->pkts_sent; 2316 for (;;) { 2317 struct mlx5_wqe_dseg *__rte_restrict dseg; 2318 struct mlx5_wqe *__rte_restrict wqe; 2319 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2320 uint8_t *dptr; 2321 2322 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2323 if (MLX5_TXOFF_CONFIG(TXPP)) { 2324 enum mlx5_txcmp_code wret; 2325 2326 /* Generate WAIT for scheduling if requested. */ 2327 wret = mlx5_tx_schedule_send(txq, loc, 1, olx); 2328 if (wret == MLX5_TXCMP_CODE_EXIT) 2329 return MLX5_TXCMP_CODE_EXIT; 2330 if (wret == MLX5_TXCMP_CODE_ERROR) 2331 return MLX5_TXCMP_CODE_ERROR; 2332 } 2333 dlen = rte_pktmbuf_data_len(loc->mbuf); 2334 if (MLX5_TXOFF_CONFIG(VLAN) && 2335 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2336 vlan = sizeof(struct rte_vlan_hdr); 2337 } 2338 /* 2339 * First calculate the WQE size to check 2340 * whether we have enough space in ring buffer. 2341 */ 2342 hlen = loc->mbuf->l2_len + vlan + 2343 loc->mbuf->l3_len + loc->mbuf->l4_len; 2344 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2345 return MLX5_TXCMP_CODE_ERROR; 2346 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2347 hlen += loc->mbuf->outer_l2_len + 2348 loc->mbuf->outer_l3_len; 2349 /* Segment must contain all TSO headers. */ 2350 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2351 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2352 hlen > (dlen + vlan))) 2353 return MLX5_TXCMP_CODE_ERROR; 2354 /* 2355 * Check whether there are enough free WQEBBs: 2356 * - Control Segment 2357 * - Ethernet Segment 2358 * - First Segment of inlined Ethernet data 2359 * - ... data continued ... 2360 * - Finishing Data Segment of pointer type 2361 */ 2362 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2363 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2364 if (loc->wqe_free < ((ds + 3) / 4)) 2365 return MLX5_TXCMP_CODE_EXIT; 2366 #ifdef MLX5_PMD_SOFT_COUNTERS 2367 /* Update sent data bytes/packets counters. */ 2368 ntcp = (dlen + vlan - hlen + 2369 loc->mbuf->tso_segsz - 1) / 2370 loc->mbuf->tso_segsz; 2371 /* 2372 * One will be added for mbuf itself at the end 2373 * of the mlx5_tx_burst from loc->pkts_sent field. 2374 */ 2375 --ntcp; 2376 txq->stats.opackets += ntcp; 2377 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2378 #endif 2379 /* 2380 * Build the TSO WQE: 2381 * - Control Segment 2382 * - Ethernet Segment with hlen bytes inlined 2383 * - Data Segment of pointer type 2384 */ 2385 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2386 loc->wqe_last = wqe; 2387 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_TSO, olx); 2388 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2389 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2390 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2391 dlen -= hlen - vlan; 2392 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2393 /* 2394 * WQE is built, update the loop parameters 2395 * and go to the next packet. 2396 */ 2397 txq->wqe_ci += (ds + 3) / 4; 2398 loc->wqe_free -= (ds + 3) / 4; 2399 if (MLX5_TXOFF_CONFIG(INLINE)) 2400 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2401 --loc->elts_free; 2402 ++loc->pkts_sent; 2403 --pkts_n; 2404 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2405 return MLX5_TXCMP_CODE_EXIT; 2406 loc->mbuf = *pkts++; 2407 if (pkts_n > 1) 2408 rte_prefetch0(*pkts); 2409 if (MLX5_TXOFF_CONFIG(MULTI) && 2410 unlikely(NB_SEGS(loc->mbuf) > 1)) 2411 return MLX5_TXCMP_CODE_MULTI; 2412 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2413 return MLX5_TXCMP_CODE_SINGLE; 2414 /* Continue with the next TSO packet. */ 2415 } 2416 MLX5_ASSERT(false); 2417 } 2418 2419 /** 2420 * Analyze the packet and select the best method to send. 2421 * 2422 * @param txq 2423 * Pointer to TX queue structure. 2424 * @param loc 2425 * Pointer to burst routine local context. 2426 * @param olx 2427 * Configured Tx offloads mask. It is fully defined at 2428 * compile time and may be used for optimization. 2429 * @param newp 2430 * The predefined flag whether do complete check for 2431 * multi-segment packets and TSO. 2432 * 2433 * @return 2434 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2435 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2436 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2437 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2438 */ 2439 static __rte_always_inline enum mlx5_txcmp_code 2440 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2441 struct mlx5_txq_local *__rte_restrict loc, 2442 unsigned int olx, 2443 bool newp) 2444 { 2445 /* Check for multi-segment packet. */ 2446 if (newp && 2447 MLX5_TXOFF_CONFIG(MULTI) && 2448 unlikely(NB_SEGS(loc->mbuf) > 1)) 2449 return MLX5_TXCMP_CODE_MULTI; 2450 /* Check for TSO packet. */ 2451 if (newp && 2452 MLX5_TXOFF_CONFIG(TSO) && 2453 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2454 return MLX5_TXCMP_CODE_TSO; 2455 /* Check if eMPW is enabled at all. */ 2456 if (!MLX5_TXOFF_CONFIG(EMPW)) 2457 return MLX5_TXCMP_CODE_SINGLE; 2458 /* Check if eMPW can be engaged. */ 2459 if (MLX5_TXOFF_CONFIG(VLAN) && 2460 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2461 (!MLX5_TXOFF_CONFIG(INLINE) || 2462 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2463 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2464 /* 2465 * eMPW does not support VLAN insertion offload, we have to 2466 * inline the entire packet but packet is too long for inlining. 2467 */ 2468 return MLX5_TXCMP_CODE_SINGLE; 2469 } 2470 return MLX5_TXCMP_CODE_EMPW; 2471 } 2472 2473 /** 2474 * Check the next packet attributes to match with the eMPW batch ones. 2475 * In addition, for legacy MPW the packet length is checked either. 2476 * 2477 * @param txq 2478 * Pointer to TX queue structure. 2479 * @param es 2480 * Pointer to Ethernet Segment of eMPW batch. 2481 * @param loc 2482 * Pointer to burst routine local context. 2483 * @param dlen 2484 * Length of previous packet in MPW descriptor. 2485 * @param olx 2486 * Configured Tx offloads mask. It is fully defined at 2487 * compile time and may be used for optimization. 2488 * 2489 * @return 2490 * true - packet match with eMPW batch attributes. 2491 * false - no match, eMPW should be restarted. 2492 */ 2493 static __rte_always_inline bool 2494 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2495 struct mlx5_wqe_eseg *__rte_restrict es, 2496 struct mlx5_txq_local *__rte_restrict loc, 2497 uint32_t dlen, 2498 unsigned int olx) 2499 { 2500 uint8_t swp_flags = 0; 2501 2502 /* Compare the checksum flags, if any. */ 2503 if (MLX5_TXOFF_CONFIG(CSUM) && 2504 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2505 return false; 2506 /* Compare the Software Parser offsets and flags. */ 2507 if (MLX5_TXOFF_CONFIG(SWP) && 2508 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2509 es->swp_flags != swp_flags)) 2510 return false; 2511 /* Fill metadata field if needed. */ 2512 if (MLX5_TXOFF_CONFIG(METADATA) && 2513 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2514 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2515 return false; 2516 /* Legacy MPW can send packets with the same length only. */ 2517 if (MLX5_TXOFF_CONFIG(MPW) && 2518 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2519 return false; 2520 /* There must be no VLAN packets in eMPW loop. */ 2521 if (MLX5_TXOFF_CONFIG(VLAN)) 2522 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2523 /* Check if the scheduling is requested. */ 2524 if (MLX5_TXOFF_CONFIG(TXPP) && 2525 loc->mbuf->ol_flags & txq->ts_mask) 2526 return false; 2527 return true; 2528 } 2529 2530 /** 2531 * Update send loop variables and WQE for eMPW loop without data inlining. 2532 * Number of Data Segments is equal to the number of sent packets. 2533 * 2534 * @param txq 2535 * Pointer to TX queue structure. 2536 * @param loc 2537 * Pointer to burst routine local context. 2538 * @param ds 2539 * Number of packets/Data Segments/Packets. 2540 * @param slen 2541 * Accumulated statistics, bytes sent. 2542 * @param olx 2543 * Configured Tx offloads mask. It is fully defined at 2544 * compile time and may be used for optimization. 2545 * 2546 * @return 2547 * true - packet match with eMPW batch attributes. 2548 * false - no match, eMPW should be restarted. 2549 */ 2550 static __rte_always_inline void 2551 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2552 struct mlx5_txq_local *__rte_restrict loc, 2553 unsigned int ds, 2554 unsigned int slen, 2555 unsigned int olx __rte_unused) 2556 { 2557 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2558 #ifdef MLX5_PMD_SOFT_COUNTERS 2559 /* Update sent data bytes counter. */ 2560 txq->stats.obytes += slen; 2561 #else 2562 (void)slen; 2563 #endif 2564 loc->elts_free -= ds; 2565 loc->pkts_sent += ds; 2566 ds += 2; 2567 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2568 txq->wqe_ci += (ds + 3) / 4; 2569 loc->wqe_free -= (ds + 3) / 4; 2570 } 2571 2572 /** 2573 * Update send loop variables and WQE for eMPW loop with data inlining. 2574 * Gets the size of pushed descriptors and data to the WQE. 2575 * 2576 * @param txq 2577 * Pointer to TX queue structure. 2578 * @param loc 2579 * Pointer to burst routine local context. 2580 * @param len 2581 * Total size of descriptor/data in bytes. 2582 * @param slen 2583 * Accumulated statistics, data bytes sent. 2584 * @param wqem 2585 * The base WQE for the eMPW/MPW descriptor. 2586 * @param olx 2587 * Configured Tx offloads mask. It is fully defined at 2588 * compile time and may be used for optimization. 2589 * 2590 * @return 2591 * true - packet match with eMPW batch attributes. 2592 * false - no match, eMPW should be restarted. 2593 */ 2594 static __rte_always_inline void 2595 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2596 struct mlx5_txq_local *__rte_restrict loc, 2597 unsigned int len, 2598 unsigned int slen, 2599 struct mlx5_wqe *__rte_restrict wqem, 2600 unsigned int olx __rte_unused) 2601 { 2602 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2603 2604 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2605 #ifdef MLX5_PMD_SOFT_COUNTERS 2606 /* Update sent data bytes counter. */ 2607 txq->stats.obytes += slen; 2608 #else 2609 (void)slen; 2610 #endif 2611 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2612 /* 2613 * If the legacy MPW session contains the inline packets 2614 * we should set the only inline data segment length 2615 * and align the total length to the segment size. 2616 */ 2617 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2618 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2619 MLX5_ETH_WQE_DATA_INLINE); 2620 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2621 } else { 2622 /* 2623 * The session is not legacy MPW or contains the 2624 * data buffer pointer segments. 2625 */ 2626 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2627 len = len / MLX5_WSEG_SIZE + 2; 2628 } 2629 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2630 txq->wqe_ci += (len + 3) / 4; 2631 loc->wqe_free -= (len + 3) / 4; 2632 loc->wqe_last = wqem; 2633 } 2634 2635 /** 2636 * The set of Tx burst functions for single-segment packets without TSO 2637 * and with Multi-Packet Writing feature support. 2638 * Supports all types of Tx offloads, except multi-packets and TSO. 2639 * 2640 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2641 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2642 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2643 * placed in WQE. 2644 * 2645 * Functions stop sending if it encounters the multi-segment packet or packet 2646 * with TSO requested. 2647 * 2648 * The routines are responsible for storing processed mbuf into elts ring buffer 2649 * and update elts_head if inlining offload is requested. Otherwise the copying 2650 * mbufs to elts can be postponed and completed at the end of burst routine. 2651 * 2652 * @param txq 2653 * Pointer to TX queue structure. 2654 * @param[in] pkts 2655 * Packets to transmit. 2656 * @param pkts_n 2657 * Number of packets in array. 2658 * @param loc 2659 * Pointer to burst routine local context. 2660 * @param olx 2661 * Configured Tx offloads mask. It is fully defined at 2662 * compile time and may be used for optimization. 2663 * 2664 * @return 2665 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2666 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2667 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2668 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2669 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2670 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2671 * 2672 * Local context variables updated. 2673 * 2674 * 2675 * The routine sends packets with MLX5_OPCODE_EMPW 2676 * without inlining, this is dedicated optimized branch. 2677 * No VLAN insertion is supported. 2678 */ 2679 static __rte_always_inline enum mlx5_txcmp_code 2680 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2681 struct rte_mbuf **__rte_restrict pkts, 2682 unsigned int pkts_n, 2683 struct mlx5_txq_local *__rte_restrict loc, 2684 unsigned int olx) 2685 { 2686 /* 2687 * Subroutine is the part of mlx5_tx_burst_single() and sends 2688 * single-segment packet with eMPW opcode without data inlining. 2689 */ 2690 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2691 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2692 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2693 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2694 pkts += loc->pkts_sent + 1; 2695 pkts_n -= loc->pkts_sent; 2696 for (;;) { 2697 struct mlx5_wqe_dseg *__rte_restrict dseg; 2698 struct mlx5_wqe_eseg *__rte_restrict eseg; 2699 enum mlx5_txcmp_code ret; 2700 unsigned int part, loop; 2701 unsigned int slen = 0; 2702 2703 next_empw: 2704 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2705 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2706 MLX5_MPW_MAX_PACKETS : 2707 MLX5_EMPW_MAX_PACKETS); 2708 if (unlikely(loc->elts_free < part)) { 2709 /* We have no enough elts to save all mbufs. */ 2710 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2711 return MLX5_TXCMP_CODE_EXIT; 2712 /* But we still able to send at least minimal eMPW. */ 2713 part = loc->elts_free; 2714 } 2715 if (MLX5_TXOFF_CONFIG(TXPP)) { 2716 enum mlx5_txcmp_code wret; 2717 2718 /* Generate WAIT for scheduling if requested. */ 2719 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2720 if (wret == MLX5_TXCMP_CODE_EXIT) 2721 return MLX5_TXCMP_CODE_EXIT; 2722 if (wret == MLX5_TXCMP_CODE_ERROR) 2723 return MLX5_TXCMP_CODE_ERROR; 2724 } 2725 /* Check whether we have enough WQEs */ 2726 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2727 if (unlikely(loc->wqe_free < 2728 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2729 return MLX5_TXCMP_CODE_EXIT; 2730 part = (loc->wqe_free * 4) - 2; 2731 } 2732 if (likely(part > 1)) 2733 rte_prefetch0(*pkts); 2734 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2735 /* 2736 * Build eMPW title WQEBB: 2737 * - Control Segment, eMPW opcode 2738 * - Ethernet Segment, no inline 2739 */ 2740 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2741 MLX5_OPCODE_ENHANCED_MPSW, olx); 2742 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2743 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2744 eseg = &loc->wqe_last->eseg; 2745 dseg = &loc->wqe_last->dseg[0]; 2746 loop = part; 2747 /* Store the packet length for legacy MPW. */ 2748 if (MLX5_TXOFF_CONFIG(MPW)) 2749 eseg->mss = rte_cpu_to_be_16 2750 (rte_pktmbuf_data_len(loc->mbuf)); 2751 for (;;) { 2752 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2753 #ifdef MLX5_PMD_SOFT_COUNTERS 2754 /* Update sent data bytes counter. */ 2755 slen += dlen; 2756 #endif 2757 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2758 mlx5_tx_dseg_ptr 2759 (txq, loc, dseg, 2760 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2761 dlen, olx); 2762 if (unlikely(--loop == 0)) 2763 break; 2764 loc->mbuf = *pkts++; 2765 if (likely(loop > 1)) 2766 rte_prefetch0(*pkts); 2767 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2768 /* 2769 * Unroll the completion code to avoid 2770 * returning variable value - it results in 2771 * unoptimized sequent checking in caller. 2772 */ 2773 if (ret == MLX5_TXCMP_CODE_MULTI) { 2774 part -= loop; 2775 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2776 if (unlikely(!loc->elts_free || 2777 !loc->wqe_free)) 2778 return MLX5_TXCMP_CODE_EXIT; 2779 return MLX5_TXCMP_CODE_MULTI; 2780 } 2781 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2782 if (ret == MLX5_TXCMP_CODE_TSO) { 2783 part -= loop; 2784 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2785 if (unlikely(!loc->elts_free || 2786 !loc->wqe_free)) 2787 return MLX5_TXCMP_CODE_EXIT; 2788 return MLX5_TXCMP_CODE_TSO; 2789 } 2790 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2791 part -= loop; 2792 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2793 if (unlikely(!loc->elts_free || 2794 !loc->wqe_free)) 2795 return MLX5_TXCMP_CODE_EXIT; 2796 return MLX5_TXCMP_CODE_SINGLE; 2797 } 2798 if (ret != MLX5_TXCMP_CODE_EMPW) { 2799 MLX5_ASSERT(false); 2800 part -= loop; 2801 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2802 return MLX5_TXCMP_CODE_ERROR; 2803 } 2804 /* 2805 * Check whether packet parameters coincide 2806 * within assumed eMPW batch: 2807 * - check sum settings 2808 * - metadata value 2809 * - software parser settings 2810 * - packets length (legacy MPW only) 2811 * - scheduling is not required 2812 */ 2813 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2814 MLX5_ASSERT(loop); 2815 part -= loop; 2816 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2817 if (unlikely(!loc->elts_free || 2818 !loc->wqe_free)) 2819 return MLX5_TXCMP_CODE_EXIT; 2820 pkts_n -= part; 2821 goto next_empw; 2822 } 2823 /* Packet attributes match, continue the same eMPW. */ 2824 ++dseg; 2825 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2826 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2827 } 2828 /* eMPW is built successfully, update loop parameters. */ 2829 MLX5_ASSERT(!loop); 2830 MLX5_ASSERT(pkts_n >= part); 2831 #ifdef MLX5_PMD_SOFT_COUNTERS 2832 /* Update sent data bytes counter. */ 2833 txq->stats.obytes += slen; 2834 #endif 2835 loc->elts_free -= part; 2836 loc->pkts_sent += part; 2837 txq->wqe_ci += (2 + part + 3) / 4; 2838 loc->wqe_free -= (2 + part + 3) / 4; 2839 pkts_n -= part; 2840 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2841 return MLX5_TXCMP_CODE_EXIT; 2842 loc->mbuf = *pkts++; 2843 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2844 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2845 return ret; 2846 /* Continue sending eMPW batches. */ 2847 } 2848 MLX5_ASSERT(false); 2849 } 2850 2851 /** 2852 * The routine sends packets with MLX5_OPCODE_EMPW 2853 * with inlining, optionally supports VLAN insertion. 2854 */ 2855 static __rte_always_inline enum mlx5_txcmp_code 2856 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2857 struct rte_mbuf **__rte_restrict pkts, 2858 unsigned int pkts_n, 2859 struct mlx5_txq_local *__rte_restrict loc, 2860 unsigned int olx) 2861 { 2862 /* 2863 * Subroutine is the part of mlx5_tx_burst_single() and sends 2864 * single-segment packet with eMPW opcode with data inlining. 2865 */ 2866 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2867 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2868 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2869 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2870 pkts += loc->pkts_sent + 1; 2871 pkts_n -= loc->pkts_sent; 2872 for (;;) { 2873 struct mlx5_wqe_dseg *__rte_restrict dseg; 2874 struct mlx5_wqe *__rte_restrict wqem; 2875 enum mlx5_txcmp_code ret; 2876 unsigned int room, part, nlim; 2877 unsigned int slen = 0; 2878 2879 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2880 /* 2881 * Limits the amount of packets in one WQE 2882 * to improve CQE latency generation. 2883 */ 2884 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2885 MLX5_MPW_INLINE_MAX_PACKETS : 2886 MLX5_EMPW_MAX_PACKETS); 2887 if (MLX5_TXOFF_CONFIG(TXPP)) { 2888 enum mlx5_txcmp_code wret; 2889 2890 /* Generate WAIT for scheduling if requested. */ 2891 wret = mlx5_tx_schedule_send(txq, loc, nlim, olx); 2892 if (wret == MLX5_TXCMP_CODE_EXIT) 2893 return MLX5_TXCMP_CODE_EXIT; 2894 if (wret == MLX5_TXCMP_CODE_ERROR) 2895 return MLX5_TXCMP_CODE_ERROR; 2896 } 2897 /* Check whether we have minimal amount WQEs */ 2898 if (unlikely(loc->wqe_free < 2899 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2900 return MLX5_TXCMP_CODE_EXIT; 2901 if (likely(pkts_n > 1)) 2902 rte_prefetch0(*pkts); 2903 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2904 /* 2905 * Build eMPW title WQEBB: 2906 * - Control Segment, eMPW opcode, zero DS 2907 * - Ethernet Segment, no inline 2908 */ 2909 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2910 MLX5_OPCODE_ENHANCED_MPSW, olx); 2911 mlx5_tx_eseg_none(txq, loc, wqem, 2912 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2913 dseg = &wqem->dseg[0]; 2914 /* Store the packet length for legacy MPW. */ 2915 if (MLX5_TXOFF_CONFIG(MPW)) 2916 wqem->eseg.mss = rte_cpu_to_be_16 2917 (rte_pktmbuf_data_len(loc->mbuf)); 2918 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2919 loc->wqe_free) * MLX5_WQE_SIZE - 2920 MLX5_WQE_CSEG_SIZE - 2921 MLX5_WQE_ESEG_SIZE; 2922 /* Limit the room for legacy MPW sessions for performance. */ 2923 if (MLX5_TXOFF_CONFIG(MPW)) 2924 room = RTE_MIN(room, 2925 RTE_MAX(txq->inlen_empw + 2926 sizeof(dseg->bcount) + 2927 (MLX5_TXOFF_CONFIG(VLAN) ? 2928 sizeof(struct rte_vlan_hdr) : 0), 2929 MLX5_MPW_INLINE_MAX_PACKETS * 2930 MLX5_WQE_DSEG_SIZE)); 2931 /* Build WQE till we have space, packets and resources. */ 2932 part = room; 2933 for (;;) { 2934 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2935 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2936 unsigned int tlen; 2937 2938 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2939 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2940 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2941 /* 2942 * Some Tx offloads may cause an error if packet is not 2943 * long enough, check against assumed minimal length. 2944 */ 2945 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2946 part -= room; 2947 if (unlikely(!part)) 2948 return MLX5_TXCMP_CODE_ERROR; 2949 /* 2950 * We have some successfully built 2951 * packet Data Segments to send. 2952 */ 2953 mlx5_tx_idone_empw(txq, loc, part, 2954 slen, wqem, olx); 2955 return MLX5_TXCMP_CODE_ERROR; 2956 } 2957 /* Inline or not inline - that's the Question. */ 2958 if (dlen > txq->inlen_empw || 2959 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2960 goto pointer_empw; 2961 if (MLX5_TXOFF_CONFIG(MPW)) { 2962 if (dlen > txq->inlen_send) 2963 goto pointer_empw; 2964 tlen = dlen; 2965 if (part == room) { 2966 /* Open new inline MPW session. */ 2967 tlen += sizeof(dseg->bcount); 2968 dseg->bcount = RTE_BE32(0); 2969 dseg = RTE_PTR_ADD 2970 (dseg, sizeof(dseg->bcount)); 2971 } else { 2972 /* 2973 * No pointer and inline descriptor 2974 * intermix for legacy MPW sessions. 2975 */ 2976 if (wqem->dseg[0].bcount) 2977 break; 2978 } 2979 } else { 2980 tlen = sizeof(dseg->bcount) + dlen; 2981 } 2982 /* Inline entire packet, optional VLAN insertion. */ 2983 if (MLX5_TXOFF_CONFIG(VLAN) && 2984 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2985 /* 2986 * The packet length must be checked in 2987 * mlx5_tx_able_to_empw() and packet 2988 * fits into inline length guaranteed. 2989 */ 2990 MLX5_ASSERT((dlen + 2991 sizeof(struct rte_vlan_hdr)) <= 2992 txq->inlen_empw); 2993 tlen += sizeof(struct rte_vlan_hdr); 2994 if (room < tlen) 2995 break; 2996 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2997 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2998 dptr, dlen, olx); 2999 #ifdef MLX5_PMD_SOFT_COUNTERS 3000 /* Update sent data bytes counter. */ 3001 slen += sizeof(struct rte_vlan_hdr); 3002 #endif 3003 } else { 3004 if (room < tlen) 3005 break; 3006 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3007 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 3008 dptr, dlen, olx); 3009 } 3010 if (!MLX5_TXOFF_CONFIG(MPW)) 3011 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 3012 MLX5_ASSERT(room >= tlen); 3013 room -= tlen; 3014 /* 3015 * Packet data are completely inline, 3016 * we can try to free the packet. 3017 */ 3018 if (likely(loc->pkts_sent == loc->mbuf_free)) { 3019 /* 3020 * All the packets from the burst beginning 3021 * are inline, we can free mbufs directly 3022 * from the origin array on tx_burst exit(). 3023 */ 3024 loc->mbuf_free++; 3025 goto next_mbuf; 3026 } 3027 /* 3028 * In order no to call rte_pktmbuf_free_seg() here, 3029 * in the most inner loop (that might be very 3030 * expensive) we just save the mbuf in elts. 3031 */ 3032 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3033 loc->elts_free--; 3034 goto next_mbuf; 3035 pointer_empw: 3036 /* 3037 * No pointer and inline descriptor 3038 * intermix for legacy MPW sessions. 3039 */ 3040 if (MLX5_TXOFF_CONFIG(MPW) && 3041 part != room && 3042 wqem->dseg[0].bcount == RTE_BE32(0)) 3043 break; 3044 /* 3045 * Not inlinable VLAN packets are 3046 * proceeded outside of this routine. 3047 */ 3048 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 3049 if (MLX5_TXOFF_CONFIG(VLAN)) 3050 MLX5_ASSERT(!(loc->mbuf->ol_flags & 3051 RTE_MBUF_F_TX_VLAN)); 3052 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3053 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3054 /* We have to store mbuf in elts.*/ 3055 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3056 loc->elts_free--; 3057 room -= MLX5_WQE_DSEG_SIZE; 3058 /* Ring buffer wraparound is checked at the loop end.*/ 3059 ++dseg; 3060 next_mbuf: 3061 #ifdef MLX5_PMD_SOFT_COUNTERS 3062 /* Update sent data bytes counter. */ 3063 slen += dlen; 3064 #endif 3065 loc->pkts_sent++; 3066 pkts_n--; 3067 if (unlikely(!pkts_n || !loc->elts_free)) { 3068 /* 3069 * We have no resources/packets to 3070 * continue build descriptors. 3071 */ 3072 part -= room; 3073 mlx5_tx_idone_empw(txq, loc, part, 3074 slen, wqem, olx); 3075 return MLX5_TXCMP_CODE_EXIT; 3076 } 3077 loc->mbuf = *pkts++; 3078 if (likely(pkts_n > 1)) 3079 rte_prefetch0(*pkts); 3080 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3081 /* 3082 * Unroll the completion code to avoid 3083 * returning variable value - it results in 3084 * unoptimized sequent checking in caller. 3085 */ 3086 if (ret == MLX5_TXCMP_CODE_MULTI) { 3087 part -= room; 3088 mlx5_tx_idone_empw(txq, loc, part, 3089 slen, wqem, olx); 3090 if (unlikely(!loc->elts_free || 3091 !loc->wqe_free)) 3092 return MLX5_TXCMP_CODE_EXIT; 3093 return MLX5_TXCMP_CODE_MULTI; 3094 } 3095 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3096 if (ret == MLX5_TXCMP_CODE_TSO) { 3097 part -= room; 3098 mlx5_tx_idone_empw(txq, loc, part, 3099 slen, wqem, olx); 3100 if (unlikely(!loc->elts_free || 3101 !loc->wqe_free)) 3102 return MLX5_TXCMP_CODE_EXIT; 3103 return MLX5_TXCMP_CODE_TSO; 3104 } 3105 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3106 part -= room; 3107 mlx5_tx_idone_empw(txq, loc, part, 3108 slen, wqem, olx); 3109 if (unlikely(!loc->elts_free || 3110 !loc->wqe_free)) 3111 return MLX5_TXCMP_CODE_EXIT; 3112 return MLX5_TXCMP_CODE_SINGLE; 3113 } 3114 if (ret != MLX5_TXCMP_CODE_EMPW) { 3115 MLX5_ASSERT(false); 3116 part -= room; 3117 mlx5_tx_idone_empw(txq, loc, part, 3118 slen, wqem, olx); 3119 return MLX5_TXCMP_CODE_ERROR; 3120 } 3121 /* Check if we have minimal room left. */ 3122 nlim--; 3123 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 3124 break; 3125 /* 3126 * Check whether packet parameters coincide 3127 * within assumed eMPW batch: 3128 * - check sum settings 3129 * - metadata value 3130 * - software parser settings 3131 * - packets length (legacy MPW only) 3132 * - scheduling is not required 3133 */ 3134 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 3135 loc, dlen, olx)) 3136 break; 3137 /* Packet attributes match, continue the same eMPW. */ 3138 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3139 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3140 } 3141 /* 3142 * We get here to close an existing eMPW 3143 * session and start the new one. 3144 */ 3145 MLX5_ASSERT(pkts_n); 3146 part -= room; 3147 if (unlikely(!part)) 3148 return MLX5_TXCMP_CODE_EXIT; 3149 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 3150 if (unlikely(!loc->elts_free || 3151 !loc->wqe_free)) 3152 return MLX5_TXCMP_CODE_EXIT; 3153 /* Continue the loop with new eMPW session. */ 3154 } 3155 MLX5_ASSERT(false); 3156 } 3157 3158 /** 3159 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3160 * Data inlining and VLAN insertion are supported. 3161 */ 3162 static __rte_always_inline enum mlx5_txcmp_code 3163 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3164 struct rte_mbuf **__rte_restrict pkts, 3165 unsigned int pkts_n, 3166 struct mlx5_txq_local *__rte_restrict loc, 3167 unsigned int olx) 3168 { 3169 /* 3170 * Subroutine is the part of mlx5_tx_burst_single() 3171 * and sends single-segment packet with SEND opcode. 3172 */ 3173 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3174 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3175 pkts += loc->pkts_sent + 1; 3176 pkts_n -= loc->pkts_sent; 3177 for (;;) { 3178 struct mlx5_wqe *__rte_restrict wqe; 3179 enum mlx5_txcmp_code ret; 3180 3181 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3182 MLX5_ASSERT(loc->elts_free); 3183 if (MLX5_TXOFF_CONFIG(TXPP)) { 3184 enum mlx5_txcmp_code wret; 3185 3186 /* Generate WAIT for scheduling if requested. */ 3187 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 3188 if (wret == MLX5_TXCMP_CODE_EXIT) 3189 return MLX5_TXCMP_CODE_EXIT; 3190 if (wret == MLX5_TXCMP_CODE_ERROR) 3191 return MLX5_TXCMP_CODE_ERROR; 3192 } 3193 if (MLX5_TXOFF_CONFIG(INLINE)) { 3194 unsigned int inlen, vlan = 0; 3195 3196 inlen = rte_pktmbuf_data_len(loc->mbuf); 3197 if (MLX5_TXOFF_CONFIG(VLAN) && 3198 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3199 vlan = sizeof(struct rte_vlan_hdr); 3200 inlen += vlan; 3201 } 3202 /* 3203 * If inlining is enabled at configuration time 3204 * the limit must be not less than minimal size. 3205 * Otherwise we would do extra check for data 3206 * size to avoid crashes due to length overflow. 3207 */ 3208 MLX5_ASSERT(txq->inlen_send >= 3209 MLX5_ESEG_MIN_INLINE_SIZE); 3210 if (inlen <= txq->inlen_send) { 3211 unsigned int seg_n, wqe_n; 3212 3213 rte_prefetch0(rte_pktmbuf_mtod 3214 (loc->mbuf, uint8_t *)); 3215 /* Check against minimal length. */ 3216 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3217 return MLX5_TXCMP_CODE_ERROR; 3218 if (loc->mbuf->ol_flags & 3219 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3220 /* 3221 * The hint flag not to inline packet 3222 * data is set. Check whether we can 3223 * follow the hint. 3224 */ 3225 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3226 txq->inlen_mode) || 3227 (MLX5_TXOFF_CONFIG(MPW) && 3228 txq->inlen_mode)) { 3229 if (inlen <= txq->inlen_send) 3230 goto single_inline; 3231 /* 3232 * The hardware requires the 3233 * minimal inline data header. 3234 */ 3235 goto single_min_inline; 3236 } 3237 if (MLX5_TXOFF_CONFIG(VLAN) && 3238 vlan && !txq->vlan_en) { 3239 /* 3240 * We must insert VLAN tag 3241 * by software means. 3242 */ 3243 goto single_part_inline; 3244 } 3245 goto single_no_inline; 3246 } 3247 single_inline: 3248 /* 3249 * Completely inlined packet data WQE: 3250 * - Control Segment, SEND opcode 3251 * - Ethernet Segment, no VLAN insertion 3252 * - Data inlined, VLAN optionally inserted 3253 * - Alignment to MLX5_WSEG_SIZE 3254 * Have to estimate amount of WQEBBs 3255 */ 3256 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3257 MLX5_ESEG_MIN_INLINE_SIZE + 3258 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3259 /* Check if there are enough WQEBBs. */ 3260 wqe_n = (seg_n + 3) / 4; 3261 if (wqe_n > loc->wqe_free) 3262 return MLX5_TXCMP_CODE_EXIT; 3263 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3264 loc->wqe_last = wqe; 3265 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3266 MLX5_OPCODE_SEND, olx); 3267 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3268 mlx5_tx_eseg_data(txq, loc, wqe, 3269 vlan, inlen, 0, olx); 3270 txq->wqe_ci += wqe_n; 3271 loc->wqe_free -= wqe_n; 3272 /* 3273 * Packet data are completely inlined, 3274 * free the packet immediately. 3275 */ 3276 rte_pktmbuf_free_seg(loc->mbuf); 3277 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3278 MLX5_TXOFF_CONFIG(MPW)) && 3279 txq->inlen_mode) { 3280 /* 3281 * If minimal inlining is requested the eMPW 3282 * feature should be disabled due to data is 3283 * inlined into Ethernet Segment, which can 3284 * not contain inlined data for eMPW due to 3285 * segment shared for all packets. 3286 */ 3287 struct mlx5_wqe_dseg *__rte_restrict dseg; 3288 unsigned int ds; 3289 uint8_t *dptr; 3290 3291 /* 3292 * The inline-mode settings require 3293 * to inline the specified amount of 3294 * data bytes to the Ethernet Segment. 3295 * We should check the free space in 3296 * WQE ring buffer to inline partially. 3297 */ 3298 single_min_inline: 3299 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3300 MLX5_ASSERT(inlen > txq->inlen_mode); 3301 MLX5_ASSERT(txq->inlen_mode >= 3302 MLX5_ESEG_MIN_INLINE_SIZE); 3303 /* 3304 * Check whether there are enough free WQEBBs: 3305 * - Control Segment 3306 * - Ethernet Segment 3307 * - First Segment of inlined Ethernet data 3308 * - ... data continued ... 3309 * - Finishing Data Segment of pointer type 3310 */ 3311 ds = (MLX5_WQE_CSEG_SIZE + 3312 MLX5_WQE_ESEG_SIZE + 3313 MLX5_WQE_DSEG_SIZE + 3314 txq->inlen_mode - 3315 MLX5_ESEG_MIN_INLINE_SIZE + 3316 MLX5_WQE_DSEG_SIZE + 3317 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3318 if (loc->wqe_free < ((ds + 3) / 4)) 3319 return MLX5_TXCMP_CODE_EXIT; 3320 /* 3321 * Build the ordinary SEND WQE: 3322 * - Control Segment 3323 * - Ethernet Segment, inline inlen_mode bytes 3324 * - Data Segment of pointer type 3325 */ 3326 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3327 loc->wqe_last = wqe; 3328 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3329 MLX5_OPCODE_SEND, olx); 3330 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3331 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3332 txq->inlen_mode, 3333 0, olx); 3334 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3335 txq->inlen_mode - vlan; 3336 inlen -= txq->inlen_mode; 3337 mlx5_tx_dseg_ptr(txq, loc, dseg, 3338 dptr, inlen, olx); 3339 /* 3340 * WQE is built, update the loop parameters 3341 * and got to the next packet. 3342 */ 3343 txq->wqe_ci += (ds + 3) / 4; 3344 loc->wqe_free -= (ds + 3) / 4; 3345 /* We have to store mbuf in elts.*/ 3346 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3347 txq->elts[txq->elts_head++ & txq->elts_m] = 3348 loc->mbuf; 3349 --loc->elts_free; 3350 } else { 3351 uint8_t *dptr; 3352 unsigned int dlen; 3353 3354 /* 3355 * Partially inlined packet data WQE, we have 3356 * some space in title WQEBB, we can fill it 3357 * with some packet data. It takes one WQEBB, 3358 * it is available, no extra space check: 3359 * - Control Segment, SEND opcode 3360 * - Ethernet Segment, no VLAN insertion 3361 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3362 * - Data Segment, pointer type 3363 * 3364 * We also get here if VLAN insertion is not 3365 * supported by HW, the inline is enabled. 3366 */ 3367 single_part_inline: 3368 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3369 loc->wqe_last = wqe; 3370 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3371 MLX5_OPCODE_SEND, olx); 3372 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3373 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3374 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3375 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3376 /* 3377 * The length check is performed above, by 3378 * comparing with txq->inlen_send. We should 3379 * not get overflow here. 3380 */ 3381 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3382 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3383 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3384 dptr, dlen, olx); 3385 ++txq->wqe_ci; 3386 --loc->wqe_free; 3387 /* We have to store mbuf in elts.*/ 3388 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3389 txq->elts[txq->elts_head++ & txq->elts_m] = 3390 loc->mbuf; 3391 --loc->elts_free; 3392 } 3393 #ifdef MLX5_PMD_SOFT_COUNTERS 3394 /* Update sent data bytes counter. */ 3395 txq->stats.obytes += vlan + 3396 rte_pktmbuf_data_len(loc->mbuf); 3397 #endif 3398 } else { 3399 /* 3400 * No inline at all, it means the CPU cycles saving 3401 * is prioritized at configuration, we should not 3402 * copy any packet data to WQE. 3403 * 3404 * SEND WQE, one WQEBB: 3405 * - Control Segment, SEND opcode 3406 * - Ethernet Segment, optional VLAN, no inline 3407 * - Data Segment, pointer type 3408 */ 3409 single_no_inline: 3410 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3411 loc->wqe_last = wqe; 3412 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3413 MLX5_OPCODE_SEND, olx); 3414 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3415 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3416 mlx5_tx_dseg_ptr 3417 (txq, loc, &wqe->dseg[0], 3418 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3419 rte_pktmbuf_data_len(loc->mbuf), olx); 3420 ++txq->wqe_ci; 3421 --loc->wqe_free; 3422 /* 3423 * We should not store mbuf pointer in elts 3424 * if no inlining is configured, this is done 3425 * by calling routine in a batch copy. 3426 */ 3427 if (MLX5_TXOFF_CONFIG(INLINE)) 3428 txq->elts[txq->elts_head++ & txq->elts_m] = 3429 loc->mbuf; 3430 --loc->elts_free; 3431 #ifdef MLX5_PMD_SOFT_COUNTERS 3432 /* Update sent data bytes counter. */ 3433 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3434 if (MLX5_TXOFF_CONFIG(VLAN) && 3435 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3436 txq->stats.obytes += 3437 sizeof(struct rte_vlan_hdr); 3438 #endif 3439 } 3440 ++loc->pkts_sent; 3441 --pkts_n; 3442 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3443 return MLX5_TXCMP_CODE_EXIT; 3444 loc->mbuf = *pkts++; 3445 if (pkts_n > 1) 3446 rte_prefetch0(*pkts); 3447 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3448 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3449 return ret; 3450 } 3451 MLX5_ASSERT(false); 3452 } 3453 3454 static __rte_always_inline enum mlx5_txcmp_code 3455 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3456 struct rte_mbuf **__rte_restrict pkts, 3457 unsigned int pkts_n, 3458 struct mlx5_txq_local *__rte_restrict loc, 3459 unsigned int olx) 3460 { 3461 enum mlx5_txcmp_code ret; 3462 3463 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3464 if (ret == MLX5_TXCMP_CODE_SINGLE) 3465 goto ordinary_send; 3466 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3467 for (;;) { 3468 /* Optimize for inline/no inline eMPW send. */ 3469 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3470 mlx5_tx_burst_empw_inline 3471 (txq, pkts, pkts_n, loc, olx) : 3472 mlx5_tx_burst_empw_simple 3473 (txq, pkts, pkts_n, loc, olx); 3474 if (ret != MLX5_TXCMP_CODE_SINGLE) 3475 return ret; 3476 /* The resources to send one packet should remain. */ 3477 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3478 ordinary_send: 3479 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3480 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3481 if (ret != MLX5_TXCMP_CODE_EMPW) 3482 return ret; 3483 /* The resources to send one packet should remain. */ 3484 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3485 } 3486 } 3487 3488 /** 3489 * DPDK Tx callback template. This is configured template used to generate 3490 * routines optimized for specified offload setup. 3491 * One of this generated functions is chosen at SQ configuration time. 3492 * 3493 * @param txq 3494 * Generic pointer to TX queue structure. 3495 * @param[in] pkts 3496 * Packets to transmit. 3497 * @param pkts_n 3498 * Number of packets in array. 3499 * @param olx 3500 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3501 * values. Should be static to take compile time static configuration 3502 * advantages. 3503 * 3504 * @return 3505 * Number of packets successfully transmitted (<= pkts_n). 3506 */ 3507 static __rte_always_inline uint16_t 3508 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3509 struct rte_mbuf **__rte_restrict pkts, 3510 uint16_t pkts_n, 3511 unsigned int olx) 3512 { 3513 struct mlx5_txq_local loc; 3514 enum mlx5_txcmp_code ret; 3515 unsigned int part; 3516 3517 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3518 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3519 if (unlikely(!pkts_n)) 3520 return 0; 3521 if (MLX5_TXOFF_CONFIG(INLINE)) 3522 loc.mbuf_free = 0; 3523 loc.pkts_sent = 0; 3524 loc.pkts_copy = 0; 3525 loc.wqe_last = NULL; 3526 3527 send_loop: 3528 loc.pkts_loop = loc.pkts_sent; 3529 /* 3530 * Check if there are some CQEs, if any: 3531 * - process an encountered errors 3532 * - process the completed WQEs 3533 * - free related mbufs 3534 * - doorbell the NIC about processed CQEs 3535 */ 3536 rte_prefetch0(*(pkts + loc.pkts_sent)); 3537 mlx5_tx_handle_completion(txq, olx); 3538 /* 3539 * Calculate the number of available resources - elts and WQEs. 3540 * There are two possible different scenarios: 3541 * - no data inlining into WQEs, one WQEBB may contains up to 3542 * four packets, in this case elts become scarce resource 3543 * - data inlining into WQEs, one packet may require multiple 3544 * WQEBBs, the WQEs become the limiting factor. 3545 */ 3546 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3547 loc.elts_free = txq->elts_s - 3548 (uint16_t)(txq->elts_head - txq->elts_tail); 3549 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3550 loc.wqe_free = txq->wqe_s - 3551 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3552 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3553 goto burst_exit; 3554 for (;;) { 3555 /* 3556 * Fetch the packet from array. Usually this is the first 3557 * packet in series of multi/single segment packets. 3558 */ 3559 loc.mbuf = *(pkts + loc.pkts_sent); 3560 /* Dedicated branch for multi-segment packets. */ 3561 if (MLX5_TXOFF_CONFIG(MULTI) && 3562 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3563 /* 3564 * Multi-segment packet encountered. 3565 * Hardware is able to process it only 3566 * with SEND/TSO opcodes, one packet 3567 * per WQE, do it in dedicated routine. 3568 */ 3569 enter_send_multi: 3570 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3571 part = loc.pkts_sent - loc.pkts_copy; 3572 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3573 /* 3574 * There are some single-segment mbufs not 3575 * stored in elts. The mbufs must be in the 3576 * same order as WQEs, so we must copy the 3577 * mbufs to elts here, before the coming 3578 * multi-segment packet mbufs is appended. 3579 */ 3580 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3581 part, olx); 3582 loc.pkts_copy = loc.pkts_sent; 3583 } 3584 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3585 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3586 if (!MLX5_TXOFF_CONFIG(INLINE)) 3587 loc.pkts_copy = loc.pkts_sent; 3588 /* 3589 * These returned code checks are supposed 3590 * to be optimized out due to routine inlining. 3591 */ 3592 if (ret == MLX5_TXCMP_CODE_EXIT) { 3593 /* 3594 * The routine returns this code when 3595 * all packets are sent or there is no 3596 * enough resources to complete request. 3597 */ 3598 break; 3599 } 3600 if (ret == MLX5_TXCMP_CODE_ERROR) { 3601 /* 3602 * The routine returns this code when some error 3603 * in the incoming packets format occurred. 3604 */ 3605 txq->stats.oerrors++; 3606 break; 3607 } 3608 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3609 /* 3610 * The single-segment packet was encountered 3611 * in the array, try to send it with the 3612 * best optimized way, possible engaging eMPW. 3613 */ 3614 goto enter_send_single; 3615 } 3616 if (MLX5_TXOFF_CONFIG(TSO) && 3617 ret == MLX5_TXCMP_CODE_TSO) { 3618 /* 3619 * The single-segment TSO packet was 3620 * encountered in the array. 3621 */ 3622 goto enter_send_tso; 3623 } 3624 /* We must not get here. Something is going wrong. */ 3625 MLX5_ASSERT(false); 3626 txq->stats.oerrors++; 3627 break; 3628 } 3629 /* Dedicated branch for single-segment TSO packets. */ 3630 if (MLX5_TXOFF_CONFIG(TSO) && 3631 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3632 /* 3633 * TSO might require special way for inlining 3634 * (dedicated parameters) and is sent with 3635 * MLX5_OPCODE_TSO opcode only, provide this 3636 * in dedicated branch. 3637 */ 3638 enter_send_tso: 3639 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3640 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3641 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3642 /* 3643 * These returned code checks are supposed 3644 * to be optimized out due to routine inlining. 3645 */ 3646 if (ret == MLX5_TXCMP_CODE_EXIT) 3647 break; 3648 if (ret == MLX5_TXCMP_CODE_ERROR) { 3649 txq->stats.oerrors++; 3650 break; 3651 } 3652 if (ret == MLX5_TXCMP_CODE_SINGLE) 3653 goto enter_send_single; 3654 if (MLX5_TXOFF_CONFIG(MULTI) && 3655 ret == MLX5_TXCMP_CODE_MULTI) { 3656 /* 3657 * The multi-segment packet was 3658 * encountered in the array. 3659 */ 3660 goto enter_send_multi; 3661 } 3662 /* We must not get here. Something is going wrong. */ 3663 MLX5_ASSERT(false); 3664 txq->stats.oerrors++; 3665 break; 3666 } 3667 /* 3668 * The dedicated branch for the single-segment packets 3669 * without TSO. Often these ones can be sent using 3670 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3671 * The routine builds the WQEs till it encounters 3672 * the TSO or multi-segment packet (in case if these 3673 * offloads are requested at SQ configuration time). 3674 */ 3675 enter_send_single: 3676 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3677 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3678 /* 3679 * These returned code checks are supposed 3680 * to be optimized out due to routine inlining. 3681 */ 3682 if (ret == MLX5_TXCMP_CODE_EXIT) 3683 break; 3684 if (ret == MLX5_TXCMP_CODE_ERROR) { 3685 txq->stats.oerrors++; 3686 break; 3687 } 3688 if (MLX5_TXOFF_CONFIG(MULTI) && 3689 ret == MLX5_TXCMP_CODE_MULTI) { 3690 /* 3691 * The multi-segment packet was 3692 * encountered in the array. 3693 */ 3694 goto enter_send_multi; 3695 } 3696 if (MLX5_TXOFF_CONFIG(TSO) && 3697 ret == MLX5_TXCMP_CODE_TSO) { 3698 /* 3699 * The single-segment TSO packet was 3700 * encountered in the array. 3701 */ 3702 goto enter_send_tso; 3703 } 3704 /* We must not get here. Something is going wrong. */ 3705 MLX5_ASSERT(false); 3706 txq->stats.oerrors++; 3707 break; 3708 } 3709 /* 3710 * Main Tx loop is completed, do the rest: 3711 * - set completion request if thresholds are reached 3712 * - doorbell the hardware 3713 * - copy the rest of mbufs to elts (if any) 3714 */ 3715 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3716 loc.pkts_sent >= loc.pkts_copy); 3717 /* Take a shortcut if nothing is sent. */ 3718 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3719 goto burst_exit; 3720 /* Request CQE generation if limits are reached. */ 3721 if (MLX5_TXOFF_CONFIG(TXPP) && __rte_trace_point_fp_is_enabled()) 3722 mlx5_tx_request_completion_trace(txq, &loc, olx); 3723 else 3724 mlx5_tx_request_completion(txq, &loc, olx); 3725 /* 3726 * Ring QP doorbell immediately after WQE building completion 3727 * to improve latencies. The pure software related data treatment 3728 * can be completed after doorbell. Tx CQEs for this SQ are 3729 * processed in this thread only by the polling. 3730 * 3731 * The rdma core library can map doorbell register in two ways, 3732 * depending on the environment variable "MLX5_SHUT_UP_BF": 3733 * 3734 * - as regular cached memory, the variable is either missing or 3735 * set to zero. This type of mapping may cause the significant 3736 * doorbell register writing latency and requires explicit memory 3737 * write barrier to mitigate this issue and prevent write combining. 3738 * 3739 * - as non-cached memory, the variable is present and set to not "0" 3740 * value. This type of mapping may cause performance impact under 3741 * heavy loading conditions but the explicit write memory barrier is 3742 * not required and it may improve core performance. 3743 * 3744 * - the legacy behaviour (prior 19.08 release) was to use some 3745 * heuristics to decide whether write memory barrier should 3746 * be performed. This behavior is supported with specifying 3747 * tx_db_nc=2, write barrier is skipped if application provides 3748 * the full recommended burst of packets, it supposes the next 3749 * packets are coming and the write barrier will be issued on 3750 * the next burst (after descriptor writing, at least). 3751 */ 3752 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3753 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3754 txq->qp_db, !txq->db_nc && 3755 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3756 /* Not all of the mbufs may be stored into elts yet. */ 3757 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3758 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3759 /* 3760 * There are some single-segment mbufs not stored in elts. 3761 * It can be only if the last packet was single-segment. 3762 * The copying is gathered into one place due to it is 3763 * a good opportunity to optimize that with SIMD. 3764 * Unfortunately if inlining is enabled the gaps in pointer 3765 * array may happen due to early freeing of the inlined mbufs. 3766 */ 3767 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3768 loc.pkts_copy = loc.pkts_sent; 3769 } 3770 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3771 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3772 if (pkts_n > loc.pkts_sent) { 3773 /* 3774 * If burst size is large there might be no enough CQE 3775 * fetched from completion queue and no enough resources 3776 * freed to send all the packets. 3777 */ 3778 goto send_loop; 3779 } 3780 burst_exit: 3781 #ifdef MLX5_PMD_SOFT_COUNTERS 3782 /* Increment sent packets counter. */ 3783 txq->stats.opackets += loc.pkts_sent; 3784 #endif 3785 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3786 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3787 /* Trace productive bursts only. */ 3788 if (__rte_trace_point_fp_is_enabled() && loc.pkts_sent) 3789 rte_pmd_mlx5_trace_tx_exit(loc.pkts_sent, pkts_n); 3790 return loc.pkts_sent; 3791 } 3792 3793 /** 3794 * Check whether given TxQ is external. 3795 * 3796 * @param dev 3797 * Pointer to Ethernet device. 3798 * @param queue_idx 3799 * Tx queue index. 3800 * 3801 * @return 3802 * True if is external TxQ, otherwise false. 3803 */ 3804 static __rte_always_inline bool 3805 mlx5_is_external_txq(struct rte_eth_dev *dev, uint16_t queue_idx) 3806 { 3807 struct mlx5_priv *priv = dev->data->dev_private; 3808 struct mlx5_external_q *txq; 3809 3810 if (!priv->ext_txqs || queue_idx < MLX5_EXTERNAL_TX_QUEUE_ID_MIN) 3811 return false; 3812 txq = &priv->ext_txqs[queue_idx - MLX5_EXTERNAL_TX_QUEUE_ID_MIN]; 3813 return !!rte_atomic_load_explicit(&txq->refcnt, rte_memory_order_relaxed); 3814 } 3815 3816 #endif /* RTE_PMD_MLX5_TX_H_ */ 3817