1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 #include <rte_trace_point.h> 17 18 #include <mlx5_common.h> 19 #include <mlx5_common_mr.h> 20 21 #include "mlx5.h" 22 #include "mlx5_autoconf.h" 23 #include "mlx5_rxtx.h" 24 #include "mlx5_trace.h" 25 26 /* TX burst subroutines return codes. */ 27 enum mlx5_txcmp_code { 28 MLX5_TXCMP_CODE_EXIT = 0, 29 MLX5_TXCMP_CODE_ERROR, 30 MLX5_TXCMP_CODE_SINGLE, 31 MLX5_TXCMP_CODE_MULTI, 32 MLX5_TXCMP_CODE_TSO, 33 MLX5_TXCMP_CODE_EMPW, 34 }; 35 36 /* 37 * These defines are used to configure Tx burst routine option set supported 38 * at compile time. The not specified options are optimized out due to if 39 * conditions can be explicitly calculated at compile time. 40 * The offloads with bigger runtime check (require more CPU cycles toskip) 41 * overhead should have the bigger index - this is needed to select the better 42 * matching routine function if no exact match and some offloads are not 43 * actually requested. 44 */ 45 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 46 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 47 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 48 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 49 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 50 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 51 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 52 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 53 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 54 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 55 56 /* The most common offloads groups. */ 57 #define MLX5_TXOFF_CONFIG_NONE 0 58 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 59 MLX5_TXOFF_CONFIG_TSO | \ 60 MLX5_TXOFF_CONFIG_SWP | \ 61 MLX5_TXOFF_CONFIG_CSUM | \ 62 MLX5_TXOFF_CONFIG_INLINE | \ 63 MLX5_TXOFF_CONFIG_VLAN | \ 64 MLX5_TXOFF_CONFIG_METADATA) 65 66 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 67 68 #define MLX5_TXOFF_PRE_DECL(func) \ 69 uint16_t mlx5_tx_burst_##func(void *txq, \ 70 struct rte_mbuf **pkts, \ 71 uint16_t pkts_n) 72 73 #define MLX5_TXOFF_DECL(func, olx) \ 74 uint16_t mlx5_tx_burst_##func(void *txq, \ 75 struct rte_mbuf **pkts, \ 76 uint16_t pkts_n) \ 77 { \ 78 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 79 pkts, pkts_n, (olx)); \ 80 } 81 82 /* Mbuf dynamic flag offset for inline. */ 83 extern uint64_t rte_net_mlx5_dynf_inline_mask; 84 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 85 86 extern alignas(RTE_CACHE_LINE_SIZE) uint32_t mlx5_ptype_table[]; 87 extern alignas(RTE_CACHE_LINE_SIZE) uint8_t mlx5_cksum_table[1 << 10]; 88 extern alignas(RTE_CACHE_LINE_SIZE) uint8_t mlx5_swp_types_table[1 << 10]; 89 90 struct mlx5_txq_stats { 91 #ifdef MLX5_PMD_SOFT_COUNTERS 92 uint64_t opackets; /**< Total of successfully sent packets. */ 93 uint64_t obytes; /**< Total of successfully sent bytes. */ 94 #endif 95 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 96 }; 97 98 /* TX queue send local data. */ 99 __extension__ 100 struct mlx5_txq_local { 101 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 102 struct rte_mbuf *mbuf; /* first mbuf to process. */ 103 uint16_t pkts_copy; /* packets copied to elts. */ 104 uint16_t pkts_sent; /* packets sent. */ 105 uint16_t pkts_loop; /* packets sent on loop entry. */ 106 uint16_t elts_free; /* available elts remain. */ 107 uint16_t wqe_free; /* available wqe remain. */ 108 uint16_t mbuf_off; /* data offset in current mbuf. */ 109 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 110 uint16_t mbuf_free; /* number of inline mbufs to free. */ 111 }; 112 113 /* TX queue descriptor. */ 114 __extension__ 115 struct __rte_cache_aligned mlx5_txq_data { 116 uint16_t elts_head; /* Current counter in (*elts)[]. */ 117 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 118 uint16_t elts_comp; /* elts index since last completion request. */ 119 uint16_t elts_s; /* Number of mbuf elements. */ 120 uint16_t elts_m; /* Mask for mbuf elements indices. */ 121 /* Fields related to elts mbuf storage. */ 122 uint16_t wqe_ci; /* Consumer index for work queue. */ 123 uint16_t wqe_pi; /* Producer index for work queue. */ 124 uint16_t wqe_s; /* Number of WQ elements. */ 125 uint16_t wqe_m; /* Mask Number for WQ elements. */ 126 uint16_t wqe_comp; /* WQE index since last completion request. */ 127 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 128 /* WQ related fields. */ 129 uint16_t cq_ci; /* Consumer index for completion queue. */ 130 uint16_t cq_pi; /* Production index for completion queue. */ 131 uint16_t cqe_s; /* Number of CQ elements. */ 132 uint16_t cqe_m; /* Mask for CQ indices. */ 133 /* CQ related fields. */ 134 uint16_t elts_n:4; /* elts[] length (in log2). */ 135 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 136 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 137 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 138 uint16_t tunnel_en:1; 139 /* When set TX offload for tunneled packets are supported. */ 140 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 141 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 142 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 143 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 144 uint16_t rt_timestamp:1; /* Realtime timestamp format. */ 145 uint16_t wait_on_time:1; /* WQE with timestamp is supported. */ 146 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 147 uint16_t inlen_send; /* Ordinary send data inline size. */ 148 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 149 uint16_t inlen_mode; /* Minimal data length to inline. */ 150 uint8_t tx_aggr_affinity; /* TxQ affinity configuration. */ 151 uint32_t qp_num_8s; /* QP number shifted by 8. */ 152 uint64_t offloads; /* Offloads for Tx Queue. */ 153 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 154 struct mlx5_wqe *wqes; /* Work queue. */ 155 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 156 #ifdef RTE_LIBRTE_MLX5_DEBUG 157 uint32_t *fcqs; /* Free completion queue (debug extended). */ 158 #else 159 uint16_t *fcqs; /* Free completion queue. */ 160 #endif 161 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 162 volatile uint32_t *qp_db; /* Work queue doorbell. */ 163 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 164 uint16_t port_id; /* Port ID of device. */ 165 uint16_t idx; /* Queue index. */ 166 uint64_t rt_timemask; /* Scheduling timestamp mask. */ 167 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 168 uint64_t ts_last; /* Last scheduled timestamp. */ 169 int32_t ts_offset; /* Timestamp field dynamic offset. */ 170 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 171 struct mlx5_txq_stats stats; /* TX queue counters. */ 172 struct mlx5_txq_stats stats_reset; /* stats on last reset. */ 173 struct mlx5_uar_data uar_data; 174 struct rte_mbuf *elts[]; 175 /* Storage for queued packets, must be the last field. */ 176 }; 177 178 /* TX queue control descriptor. */ 179 __extension__ 180 struct mlx5_txq_ctrl { 181 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 182 RTE_ATOMIC(uint32_t) refcnt; /* Reference counter. */ 183 unsigned int socket; /* CPU socket ID for allocations. */ 184 bool is_hairpin; /* Whether TxQ type is Hairpin. */ 185 unsigned int max_inline_data; /* Max inline data. */ 186 unsigned int max_tso_header; /* Max TSO header size. */ 187 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 188 struct mlx5_priv *priv; /* Back pointer to private data. */ 189 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 190 uint16_t dump_file_n; /* Number of dump files. */ 191 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 192 uint32_t hairpin_status; /* Hairpin binding status. */ 193 struct mlx5_txq_data txq; /* Data path structure. */ 194 /* Must be the last field in the structure, contains elts[]. */ 195 }; 196 197 /* mlx5_txq.c */ 198 199 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 200 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 201 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 202 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 203 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 204 unsigned int socket, const struct rte_eth_txconf *conf); 205 int mlx5_tx_hairpin_queue_setup 206 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 207 const struct rte_eth_hairpin_conf *hairpin_conf); 208 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 209 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 210 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 211 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 212 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 213 uint16_t desc, unsigned int socket, 214 const struct rte_eth_txconf *conf); 215 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 216 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 217 const struct rte_eth_hairpin_conf *hairpin_conf); 218 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 219 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 220 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 221 int mlx5_txq_verify(struct rte_eth_dev *dev); 222 int mlx5_txq_get_sqn(struct mlx5_txq_ctrl *txq); 223 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 224 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 225 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 226 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 227 int mlx5_count_aggr_ports(struct rte_eth_dev *dev); 228 int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, 229 uint8_t affinity); 230 int mlx5_ext_txq_verify(struct rte_eth_dev *dev); 231 struct mlx5_external_q *mlx5_ext_txq_get(struct rte_eth_dev *dev, uint16_t idx); 232 233 /* mlx5_tx.c */ 234 235 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 236 unsigned int olx __rte_unused); 237 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 238 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 239 struct rte_eth_txq_info *qinfo); 240 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 241 struct rte_eth_burst_mode *mode); 242 243 /* mlx5_tx_empw.c */ 244 245 MLX5_TXOFF_PRE_DECL(full_empw); 246 MLX5_TXOFF_PRE_DECL(none_empw); 247 MLX5_TXOFF_PRE_DECL(md_empw); 248 MLX5_TXOFF_PRE_DECL(mt_empw); 249 MLX5_TXOFF_PRE_DECL(mtsc_empw); 250 MLX5_TXOFF_PRE_DECL(mti_empw); 251 MLX5_TXOFF_PRE_DECL(mtv_empw); 252 MLX5_TXOFF_PRE_DECL(mtiv_empw); 253 MLX5_TXOFF_PRE_DECL(sc_empw); 254 MLX5_TXOFF_PRE_DECL(sci_empw); 255 MLX5_TXOFF_PRE_DECL(scv_empw); 256 MLX5_TXOFF_PRE_DECL(sciv_empw); 257 MLX5_TXOFF_PRE_DECL(i_empw); 258 MLX5_TXOFF_PRE_DECL(v_empw); 259 MLX5_TXOFF_PRE_DECL(iv_empw); 260 261 /* mlx5_tx_nompw.c */ 262 263 MLX5_TXOFF_PRE_DECL(full); 264 MLX5_TXOFF_PRE_DECL(none); 265 MLX5_TXOFF_PRE_DECL(md); 266 MLX5_TXOFF_PRE_DECL(mt); 267 MLX5_TXOFF_PRE_DECL(mtsc); 268 MLX5_TXOFF_PRE_DECL(mti); 269 MLX5_TXOFF_PRE_DECL(mtv); 270 MLX5_TXOFF_PRE_DECL(mtiv); 271 MLX5_TXOFF_PRE_DECL(sc); 272 MLX5_TXOFF_PRE_DECL(sci); 273 MLX5_TXOFF_PRE_DECL(scv); 274 MLX5_TXOFF_PRE_DECL(sciv); 275 MLX5_TXOFF_PRE_DECL(i); 276 MLX5_TXOFF_PRE_DECL(v); 277 MLX5_TXOFF_PRE_DECL(iv); 278 279 /* mlx5_tx_txpp.c */ 280 281 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 282 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 283 MLX5_TXOFF_PRE_DECL(full_ts); 284 MLX5_TXOFF_PRE_DECL(full_ts_noi); 285 MLX5_TXOFF_PRE_DECL(none_ts); 286 MLX5_TXOFF_PRE_DECL(mdi_ts); 287 MLX5_TXOFF_PRE_DECL(mti_ts); 288 MLX5_TXOFF_PRE_DECL(mtiv_ts); 289 290 /* mlx5_tx_mpw.c */ 291 292 MLX5_TXOFF_PRE_DECL(none_mpw); 293 MLX5_TXOFF_PRE_DECL(mci_mpw); 294 MLX5_TXOFF_PRE_DECL(mc_mpw); 295 MLX5_TXOFF_PRE_DECL(i_mpw); 296 297 static __rte_always_inline struct mlx5_uar_data * 298 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 299 { 300 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 301 } 302 303 /** 304 * Ring TX queue doorbell and flush the update by write memory barrier. 305 * 306 * @param txq 307 * Pointer to TX queue structure. 308 * @param wqe 309 * Pointer to the last WQE posted in the NIC. 310 */ 311 static __rte_always_inline void 312 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 313 { 314 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 315 txq->wqe_ci, txq->qp_db, 1); 316 } 317 318 /** 319 * Convert timestamp from mbuf format to linear counter 320 * of Clock Queue completions (24 bits). 321 * 322 * @param sh 323 * Pointer to the device shared context to fetch Tx 324 * packet pacing timestamp and parameters. 325 * @param ts 326 * Timestamp from mbuf to convert. 327 * @return 328 * positive or zero value - completion ID to wait. 329 * negative value - conversion error. 330 */ 331 static __rte_always_inline int32_t 332 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 333 { 334 uint64_t ts, ci; 335 uint32_t tick; 336 337 do { 338 /* 339 * Read atomically two uint64_t fields and compare lsb bits. 340 * It there is no match - the timestamp was updated in 341 * the service thread, data should be re-read. 342 */ 343 rte_compiler_barrier(); 344 ci = rte_atomic_load_explicit(&sh->txpp.ts.ci_ts, rte_memory_order_relaxed); 345 ts = rte_atomic_load_explicit(&sh->txpp.ts.ts, rte_memory_order_relaxed); 346 rte_compiler_barrier(); 347 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 348 break; 349 } while (true); 350 /* Perform the skew correction, positive value to send earlier. */ 351 mts -= sh->txpp.skew; 352 mts -= ts; 353 if (unlikely(mts >= UINT64_MAX / 2)) { 354 /* We have negative integer, mts is in the past. */ 355 rte_atomic_fetch_add_explicit(&sh->txpp.err_ts_past, 356 1, rte_memory_order_relaxed); 357 return -1; 358 } 359 tick = sh->txpp.tick; 360 MLX5_ASSERT(tick); 361 /* Convert delta to completions, round up. */ 362 mts = (mts + tick - 1) / tick; 363 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 364 /* We have mts is too distant future. */ 365 rte_atomic_fetch_add_explicit(&sh->txpp.err_ts_future, 366 1, rte_memory_order_relaxed); 367 return -1; 368 } 369 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 370 ci += mts; 371 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 372 return ci; 373 } 374 375 /** 376 * Read real time clock counter directly from the device PCI BAR area. 377 * The PCI BAR must be mapped to the process memory space at initialization. 378 * 379 * @param dev 380 * Device to read clock counter from 381 * 382 * @return 383 * 0 - if HCA BAR is not supported or not mapped. 384 * !=0 - read 64-bit value of real-time in UTC formatv (nanoseconds) 385 */ 386 static __rte_always_inline uint64_t mlx5_read_pcibar_clock(struct rte_eth_dev *dev) 387 { 388 struct mlx5_proc_priv *ppriv = dev->process_private; 389 390 if (ppriv && ppriv->hca_bar) { 391 struct mlx5_priv *priv = dev->data->dev_private; 392 struct mlx5_dev_ctx_shared *sh = priv->sh; 393 uint64_t *hca_ptr = (uint64_t *)(ppriv->hca_bar) + 394 __mlx5_64_off(initial_seg, real_time); 395 uint64_t __rte_atomic *ts_addr; 396 uint64_t ts; 397 398 ts_addr = (uint64_t __rte_atomic *)hca_ptr; 399 ts = rte_atomic_load_explicit(ts_addr, rte_memory_order_seq_cst); 400 ts = rte_be_to_cpu_64(ts); 401 ts = mlx5_txpp_convert_rx_ts(sh, ts); 402 return ts; 403 } 404 return 0; 405 } 406 407 static __rte_always_inline uint64_t mlx5_read_pcibar_clock_from_txq(struct mlx5_txq_data *txq) 408 { 409 struct mlx5_txq_ctrl *txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq); 410 struct rte_eth_dev *dev = ETH_DEV(txq_ctrl->priv); 411 412 return mlx5_read_pcibar_clock(dev); 413 } 414 415 /** 416 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 417 * Flags must be preliminary initialized to zero. 418 * 419 * @param loc 420 * Pointer to burst routine local context. 421 * @param swp_flags 422 * Pointer to store Software Parser flags. 423 * @param olx 424 * Configured Tx offloads mask. It is fully defined at 425 * compile time and may be used for optimization. 426 * 427 * @return 428 * Software Parser offsets packed in dword. 429 * Software Parser flags are set by pointer. 430 */ 431 static __rte_always_inline uint32_t 432 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 433 uint8_t *swp_flags, 434 unsigned int olx) 435 { 436 uint64_t ol, tunnel; 437 unsigned int idx, off; 438 uint32_t set; 439 440 if (!MLX5_TXOFF_CONFIG(SWP)) 441 return 0; 442 ol = loc->mbuf->ol_flags; 443 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 444 /* 445 * Check whether Software Parser is required. 446 * Only customized tunnels may ask for. 447 */ 448 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 449 return 0; 450 /* 451 * The index should have: 452 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 453 * bit[4] = RTE_MBUF_F_TX_IPV6 454 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 455 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 456 */ 457 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 458 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 459 *swp_flags = mlx5_swp_types_table[idx]; 460 /* 461 * Set offsets for SW parser. Since ConnectX-5, SW parser just 462 * complements HW parser. SW parser starts to engage only if HW parser 463 * can't reach a header. For the older devices, HW parser will not kick 464 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 465 * should be set regardless of HW offload. 466 */ 467 off = loc->mbuf->outer_l2_len; 468 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 469 off += sizeof(struct rte_vlan_hdr); 470 set = (off >> 1) << 8; /* Outer L3 offset. */ 471 off += loc->mbuf->outer_l3_len; 472 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 473 set |= off >> 1; /* Outer L4 offset. */ 474 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 475 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 476 off += loc->mbuf->l2_len; 477 set |= (off >> 1) << 24; /* Inner L3 offset. */ 478 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 479 csum == RTE_MBUF_F_TX_UDP_CKSUM || 480 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 481 off += loc->mbuf->l3_len; 482 set |= (off >> 1) << 16; /* Inner L4 offset. */ 483 } 484 } 485 set = rte_cpu_to_le_32(set); 486 return set; 487 } 488 489 /** 490 * Convert the Checksum offloads to Verbs. 491 * 492 * @param buf 493 * Pointer to the mbuf. 494 * 495 * @return 496 * Converted checksum flags. 497 */ 498 static __rte_always_inline uint8_t 499 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 500 { 501 uint32_t idx; 502 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 503 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 504 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 505 506 /* 507 * The index should have: 508 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 509 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 510 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 511 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 512 * bit[9] = tunnel 513 */ 514 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 515 return mlx5_cksum_table[idx]; 516 } 517 518 /** 519 * Free the mbufs from the linear array of pointers. 520 * 521 * @param txq 522 * Pointer to Tx queue structure. 523 * @param pkts 524 * Pointer to array of packets to be free. 525 * @param pkts_n 526 * Number of packets to be freed. 527 * @param olx 528 * Configured Tx offloads mask. It is fully defined at 529 * compile time and may be used for optimization. 530 */ 531 static __rte_always_inline void 532 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 533 struct rte_mbuf **__rte_restrict pkts, 534 unsigned int pkts_n, 535 unsigned int olx __rte_unused) 536 { 537 struct rte_mempool *pool = NULL; 538 struct rte_mbuf **p_free = NULL; 539 struct rte_mbuf *mbuf; 540 unsigned int n_free = 0; 541 542 /* 543 * The implemented algorithm eliminates 544 * copying pointers to temporary array 545 * for rte_mempool_put_bulk() calls. 546 */ 547 MLX5_ASSERT(pkts); 548 MLX5_ASSERT(pkts_n); 549 /* 550 * Free mbufs directly to the pool in bulk 551 * if fast free offload is engaged 552 */ 553 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 554 mbuf = *pkts; 555 pool = mbuf->pool; 556 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 557 return; 558 } 559 for (;;) { 560 for (;;) { 561 /* 562 * Decrement mbuf reference counter, detach 563 * indirect and external buffers if needed. 564 */ 565 mbuf = rte_pktmbuf_prefree_seg(*pkts); 566 if (likely(mbuf != NULL)) { 567 MLX5_ASSERT(mbuf == *pkts); 568 if (likely(n_free != 0)) { 569 if (unlikely(pool != mbuf->pool)) 570 /* From different pool. */ 571 break; 572 } else { 573 /* Start new scan array. */ 574 pool = mbuf->pool; 575 p_free = pkts; 576 } 577 ++n_free; 578 ++pkts; 579 --pkts_n; 580 if (unlikely(pkts_n == 0)) { 581 mbuf = NULL; 582 break; 583 } 584 } else { 585 /* 586 * This happens if mbuf is still referenced. 587 * We can't put it back to the pool, skip. 588 */ 589 ++pkts; 590 --pkts_n; 591 if (unlikely(n_free != 0)) 592 /* There is some array to free.*/ 593 break; 594 if (unlikely(pkts_n == 0)) 595 /* Last mbuf, nothing to free. */ 596 return; 597 } 598 } 599 for (;;) { 600 /* 601 * This loop is implemented to avoid multiple 602 * inlining of rte_mempool_put_bulk(). 603 */ 604 MLX5_ASSERT(pool); 605 MLX5_ASSERT(p_free); 606 MLX5_ASSERT(n_free); 607 /* 608 * Free the array of pre-freed mbufs 609 * belonging to the same memory pool. 610 */ 611 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 612 if (unlikely(mbuf != NULL)) { 613 /* There is the request to start new scan. */ 614 pool = mbuf->pool; 615 p_free = pkts++; 616 n_free = 1; 617 --pkts_n; 618 if (likely(pkts_n != 0)) 619 break; 620 /* 621 * This is the last mbuf to be freed. 622 * Do one more loop iteration to complete. 623 * This is rare case of the last unique mbuf. 624 */ 625 mbuf = NULL; 626 continue; 627 } 628 if (likely(pkts_n == 0)) 629 return; 630 n_free = 0; 631 break; 632 } 633 } 634 } 635 636 /** 637 * No inline version to free buffers for optimal call 638 * on the tx_burst completion. 639 */ 640 static __rte_noinline void 641 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 642 struct rte_mbuf **__rte_restrict pkts, 643 unsigned int pkts_n, 644 unsigned int olx __rte_unused) 645 { 646 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 647 } 648 649 /** 650 * Free the mbuf from the elts ring buffer till new tail. 651 * 652 * @param txq 653 * Pointer to Tx queue structure. 654 * @param tail 655 * Index in elts to free up to, becomes new elts tail. 656 * @param olx 657 * Configured Tx offloads mask. It is fully defined at 658 * compile time and may be used for optimization. 659 */ 660 static __rte_always_inline void 661 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 662 uint16_t tail, 663 unsigned int olx __rte_unused) 664 { 665 uint16_t n_elts = tail - txq->elts_tail; 666 667 MLX5_ASSERT(n_elts); 668 MLX5_ASSERT(n_elts <= txq->elts_s); 669 /* 670 * Implement a loop to support ring buffer wraparound 671 * with single inlining of mlx5_tx_free_mbuf(). 672 */ 673 do { 674 unsigned int part; 675 676 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 677 part = RTE_MIN(part, n_elts); 678 MLX5_ASSERT(part); 679 MLX5_ASSERT(part <= txq->elts_s); 680 mlx5_tx_free_mbuf(txq, 681 &txq->elts[txq->elts_tail & txq->elts_m], 682 part, olx); 683 txq->elts_tail += part; 684 n_elts -= part; 685 } while (n_elts); 686 } 687 688 /** 689 * Store the mbuf being sent into elts ring buffer. 690 * On Tx completion these mbufs will be freed. 691 * 692 * @param txq 693 * Pointer to Tx queue structure. 694 * @param pkts 695 * Pointer to array of packets to be stored. 696 * @param pkts_n 697 * Number of packets to be stored. 698 * @param olx 699 * Configured Tx offloads mask. It is fully defined at 700 * compile time and may be used for optimization. 701 */ 702 static __rte_always_inline void 703 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 704 struct rte_mbuf **__rte_restrict pkts, 705 unsigned int pkts_n, 706 unsigned int olx __rte_unused) 707 { 708 unsigned int part; 709 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 710 711 MLX5_ASSERT(pkts); 712 MLX5_ASSERT(pkts_n); 713 part = txq->elts_s - (txq->elts_head & txq->elts_m); 714 MLX5_ASSERT(part); 715 MLX5_ASSERT(part <= txq->elts_s); 716 /* This code is a good candidate for vectorizing with SIMD. */ 717 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 718 (void *)pkts, 719 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 720 txq->elts_head += pkts_n; 721 if (unlikely(part < pkts_n)) 722 /* The copy is wrapping around the elts array. */ 723 rte_memcpy((void *)elts, (void *)(pkts + part), 724 (pkts_n - part) * sizeof(struct rte_mbuf *)); 725 } 726 727 /** 728 * Check if the completion request flag should be set in the last WQE. 729 * Both pushed mbufs and WQEs are monitored and the completion request 730 * flag is set if any of thresholds is reached. 731 * 732 * @param txq 733 * Pointer to TX queue structure. 734 * @param loc 735 * Pointer to burst routine local context. 736 * @param olx 737 * Configured Tx offloads mask. It is fully defined at 738 * compile time and may be used for optimization. 739 */ 740 static __rte_always_inline void 741 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 742 struct mlx5_txq_local *__rte_restrict loc, 743 unsigned int olx) 744 { 745 uint16_t head = txq->elts_head; 746 unsigned int part; 747 748 part = MLX5_TXOFF_CONFIG(INLINE) ? 749 0 : loc->pkts_sent - loc->pkts_copy; 750 head += part; 751 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 752 (MLX5_TXOFF_CONFIG(INLINE) && 753 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 754 volatile struct mlx5_wqe *last = loc->wqe_last; 755 756 MLX5_ASSERT(last); 757 txq->elts_comp = head; 758 if (MLX5_TXOFF_CONFIG(INLINE)) 759 txq->wqe_comp = txq->wqe_ci; 760 /* Request unconditional completion on last WQE. */ 761 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 762 MLX5_COMP_MODE_OFFSET); 763 /* Save elts_head in dedicated free on completion queue. */ 764 #ifdef RTE_LIBRTE_MLX5_DEBUG 765 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 766 (last->cseg.opcode >> 8) << 16; 767 #else 768 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 769 #endif 770 /* A CQE slot must always be available. */ 771 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 772 } 773 } 774 775 /** 776 * Set completion request flag for all issued WQEs. 777 * This routine is intended to be used with enabled fast path tracing 778 * and send scheduling on time to provide the detailed report in trace 779 * for send completions on every WQE. 780 * 781 * @param txq 782 * Pointer to TX queue structure. 783 * @param loc 784 * Pointer to burst routine local context. 785 * @param olx 786 * Configured Tx offloads mask. It is fully defined at 787 * compile time and may be used for optimization. 788 */ 789 static __rte_always_inline void 790 mlx5_tx_request_completion_trace(struct mlx5_txq_data *__rte_restrict txq, 791 struct mlx5_txq_local *__rte_restrict loc, 792 unsigned int olx) 793 { 794 uint16_t head = txq->elts_comp; 795 796 while (txq->wqe_comp != txq->wqe_ci) { 797 volatile struct mlx5_wqe *wqe; 798 uint32_t wqe_n; 799 800 MLX5_ASSERT(loc->wqe_last); 801 wqe = txq->wqes + (txq->wqe_comp & txq->wqe_m); 802 if (wqe == loc->wqe_last) { 803 head = txq->elts_head; 804 head += MLX5_TXOFF_CONFIG(INLINE) ? 805 0 : loc->pkts_sent - loc->pkts_copy; 806 txq->elts_comp = head; 807 } 808 /* Completion request flag was set on cseg constructing. */ 809 #ifdef RTE_LIBRTE_MLX5_DEBUG 810 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 811 (wqe->cseg.opcode >> 8) << 16; 812 #else 813 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 814 #endif 815 /* A CQE slot must always be available. */ 816 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 817 /* Advance to the next WQE in the queue. */ 818 wqe_n = rte_be_to_cpu_32(wqe->cseg.sq_ds) & 0x3F; 819 txq->wqe_comp += RTE_ALIGN(wqe_n, 4) / 4; 820 } 821 } 822 823 /** 824 * Build the Control Segment with specified opcode: 825 * - MLX5_OPCODE_SEND 826 * - MLX5_OPCODE_ENHANCED_MPSW 827 * - MLX5_OPCODE_TSO 828 * 829 * @param txq 830 * Pointer to TX queue structure. 831 * @param loc 832 * Pointer to burst routine local context. 833 * @param wqe 834 * Pointer to WQE to fill with built Control Segment. 835 * @param ds 836 * Supposed length of WQE in segments. 837 * @param opcode 838 * SQ WQE opcode to put into Control Segment. 839 * @param olx 840 * Configured Tx offloads mask. It is fully defined at 841 * compile time and may be used for optimization. 842 */ 843 static __rte_always_inline void 844 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 845 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 846 struct mlx5_wqe *__rte_restrict wqe, 847 unsigned int ds, 848 unsigned int opcode, 849 unsigned int olx) 850 { 851 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 852 uint64_t real_time; 853 854 /* For legacy MPW replace the EMPW by TSO with modifier. */ 855 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 856 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 857 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 858 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 859 if (MLX5_TXOFF_CONFIG(TXPP) && __rte_trace_point_fp_is_enabled()) 860 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << 861 MLX5_COMP_MODE_OFFSET); 862 else 863 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 864 MLX5_COMP_MODE_OFFSET); 865 cs->misc = RTE_BE32(0); 866 if (__rte_trace_point_fp_is_enabled()) { 867 real_time = mlx5_read_pcibar_clock_from_txq(txq); 868 if (!loc->pkts_sent) 869 rte_pmd_mlx5_trace_tx_entry(real_time, txq->port_id, txq->idx); 870 rte_pmd_mlx5_trace_tx_wqe(real_time, (txq->wqe_ci << 8) | opcode); 871 } 872 } 873 874 /** 875 * Build the Synchronize Queue Segment with specified completion index. 876 * 877 * @param txq 878 * Pointer to TX queue structure. 879 * @param loc 880 * Pointer to burst routine local context. 881 * @param wqe 882 * Pointer to WQE to fill with built Control Segment. 883 * @param wci 884 * Completion index in Clock Queue to wait. 885 * @param olx 886 * Configured Tx offloads mask. It is fully defined at 887 * compile time and may be used for optimization. 888 */ 889 static __rte_always_inline void 890 mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq, 891 struct mlx5_txq_local *restrict loc __rte_unused, 892 struct mlx5_wqe *restrict wqe, 893 unsigned int wci, 894 unsigned int olx __rte_unused) 895 { 896 struct mlx5_wqe_qseg *qs; 897 898 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 899 qs->max_index = rte_cpu_to_be_32(wci); 900 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 901 qs->reserved0 = RTE_BE32(0); 902 qs->reserved1 = RTE_BE32(0); 903 } 904 905 /** 906 * Build the Wait on Time Segment with specified timestamp value. 907 * 908 * @param txq 909 * Pointer to TX queue structure. 910 * @param loc 911 * Pointer to burst routine local context. 912 * @param wqe 913 * Pointer to WQE to fill with built Control Segment. 914 * @param ts 915 * Timesatmp value to wait. 916 * @param olx 917 * Configured Tx offloads mask. It is fully defined at 918 * compile time and may be used for optimization. 919 */ 920 static __rte_always_inline void 921 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 922 struct mlx5_txq_local *restrict loc __rte_unused, 923 struct mlx5_wqe *restrict wqe, 924 uint64_t ts, 925 unsigned int olx __rte_unused) 926 { 927 struct mlx5_wqe_wseg *ws; 928 929 ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 930 ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_SMALLER); 931 ws->lkey = RTE_BE32(0); 932 ws->va_high = RTE_BE32(0); 933 ws->va_low = RTE_BE32(0); 934 if (txq->rt_timestamp) { 935 ts = ts % (uint64_t)NS_PER_S 936 | (ts / (uint64_t)NS_PER_S) << 32; 937 } 938 ws->value = rte_cpu_to_be_64(ts); 939 ws->mask = txq->rt_timemask; 940 } 941 942 /** 943 * Build the Ethernet Segment without inlined data. 944 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 945 * 946 * @param txq 947 * Pointer to TX queue structure. 948 * @param loc 949 * Pointer to burst routine local context. 950 * @param wqe 951 * Pointer to WQE to fill with built Ethernet Segment. 952 * @param olx 953 * Configured Tx offloads mask. It is fully defined at 954 * compile time and may be used for optimization. 955 */ 956 static __rte_always_inline void 957 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 958 struct mlx5_txq_local *__rte_restrict loc, 959 struct mlx5_wqe *__rte_restrict wqe, 960 unsigned int olx) 961 { 962 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 963 uint32_t csum; 964 965 /* 966 * Calculate and set check sum flags first, dword field 967 * in segment may be shared with Software Parser flags. 968 */ 969 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 970 es->flags = rte_cpu_to_le_32(csum); 971 /* 972 * Calculate and set Software Parser offsets and flags. 973 * These flags a set for custom UDP and IP tunnel packets. 974 */ 975 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 976 /* Fill metadata field if needed. */ 977 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 978 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 979 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 980 0 : 0; 981 /* Engage VLAN tag insertion feature if requested. */ 982 if (MLX5_TXOFF_CONFIG(VLAN) && 983 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 984 /* 985 * We should get here only if device support 986 * this feature correctly. 987 */ 988 MLX5_ASSERT(txq->vlan_en); 989 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 990 loc->mbuf->vlan_tci); 991 } else { 992 es->inline_hdr = RTE_BE32(0); 993 } 994 } 995 996 /** 997 * Build the Ethernet Segment with minimal inlined data 998 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 999 * used to fill the gap in single WQEBB WQEs. 1000 * Supports Software Parser, Checksums and VLAN 1001 * insertion Tx offload features. 1002 * 1003 * @param txq 1004 * Pointer to TX queue structure. 1005 * @param loc 1006 * Pointer to burst routine local context. 1007 * @param wqe 1008 * Pointer to WQE to fill with built Ethernet Segment. 1009 * @param vlan 1010 * Length of VLAN tag insertion if any. 1011 * @param olx 1012 * Configured Tx offloads mask. It is fully defined at 1013 * compile time and may be used for optimization. 1014 */ 1015 static __rte_always_inline void 1016 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 1017 struct mlx5_txq_local *__rte_restrict loc, 1018 struct mlx5_wqe *__rte_restrict wqe, 1019 unsigned int vlan, 1020 unsigned int olx) 1021 { 1022 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1023 uint32_t csum; 1024 uint8_t *psrc, *pdst; 1025 1026 /* 1027 * Calculate and set check sum flags first, dword field 1028 * in segment may be shared with Software Parser flags. 1029 */ 1030 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1031 es->flags = rte_cpu_to_le_32(csum); 1032 /* 1033 * Calculate and set Software Parser offsets and flags. 1034 * These flags a set for custom UDP and IP tunnel packets. 1035 */ 1036 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1037 /* Fill metadata field if needed. */ 1038 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1039 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1040 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1041 0 : 0; 1042 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1043 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 1044 es->inline_data = *(unaligned_uint16_t *)psrc; 1045 psrc += sizeof(uint16_t); 1046 pdst = (uint8_t *)(es + 1); 1047 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1048 /* Implement VLAN tag insertion as part inline data. */ 1049 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1050 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1051 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1052 /* Insert VLAN ethertype + VLAN tag. */ 1053 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1054 ((RTE_ETHER_TYPE_VLAN << 16) | 1055 loc->mbuf->vlan_tci); 1056 pdst += sizeof(struct rte_vlan_hdr); 1057 /* Copy the rest two bytes from packet data. */ 1058 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1059 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1060 } else { 1061 /* Fill the gap in the title WQEBB with inline data. */ 1062 rte_mov16(pdst, psrc); 1063 } 1064 } 1065 1066 /** 1067 * Build the Ethernet Segment with entire packet data inlining. Checks the 1068 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 1069 * Checksums and VLAN insertion Tx offload features. 1070 * 1071 * @param txq 1072 * Pointer to TX queue structure. 1073 * @param loc 1074 * Pointer to burst routine local context. 1075 * @param wqe 1076 * Pointer to WQE to fill with built Ethernet Segment. 1077 * @param vlan 1078 * Length of VLAN tag insertion if any. 1079 * @param inlen 1080 * Length of data to inline (VLAN included, if any). 1081 * @param tso 1082 * TSO flag, set mss field from the packet. 1083 * @param olx 1084 * Configured Tx offloads mask. It is fully defined at 1085 * compile time and may be used for optimization. 1086 * 1087 * @return 1088 * Pointer to the next Data Segment (aligned and wrapped around). 1089 */ 1090 static __rte_always_inline struct mlx5_wqe_dseg * 1091 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 1092 struct mlx5_txq_local *__rte_restrict loc, 1093 struct mlx5_wqe *__rte_restrict wqe, 1094 unsigned int vlan, 1095 unsigned int inlen, 1096 unsigned int tso, 1097 unsigned int olx) 1098 { 1099 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1100 uint32_t csum; 1101 uint8_t *psrc, *pdst; 1102 unsigned int part; 1103 1104 /* 1105 * Calculate and set check sum flags first, dword field 1106 * in segment may be shared with Software Parser flags. 1107 */ 1108 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1109 if (tso) { 1110 csum <<= 24; 1111 csum |= loc->mbuf->tso_segsz; 1112 es->flags = rte_cpu_to_be_32(csum); 1113 } else { 1114 es->flags = rte_cpu_to_le_32(csum); 1115 } 1116 /* 1117 * Calculate and set Software Parser offsets and flags. 1118 * These flags a set for custom UDP and IP tunnel packets. 1119 */ 1120 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1121 /* Fill metadata field if needed. */ 1122 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1123 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1124 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1125 0 : 0; 1126 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1127 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 1128 es->inline_data = *(unaligned_uint16_t *)psrc; 1129 psrc += sizeof(uint16_t); 1130 pdst = (uint8_t *)(es + 1); 1131 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1132 /* Implement VLAN tag insertion as part inline data. */ 1133 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1134 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1135 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1136 /* Insert VLAN ethertype + VLAN tag. */ 1137 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1138 ((RTE_ETHER_TYPE_VLAN << 16) | 1139 loc->mbuf->vlan_tci); 1140 pdst += sizeof(struct rte_vlan_hdr); 1141 /* Copy the rest two bytes from packet data. */ 1142 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1143 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1144 psrc += sizeof(uint16_t); 1145 } else { 1146 /* Fill the gap in the title WQEBB with inline data. */ 1147 rte_mov16(pdst, psrc); 1148 psrc += sizeof(rte_v128u32_t); 1149 } 1150 pdst = (uint8_t *)(es + 2); 1151 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1152 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1153 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1154 if (!inlen) { 1155 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1156 return (struct mlx5_wqe_dseg *)pdst; 1157 } 1158 /* 1159 * The WQEBB space availability is checked by caller. 1160 * Here we should be aware of WQE ring buffer wraparound only. 1161 */ 1162 part = (uint8_t *)txq->wqes_end - pdst; 1163 part = RTE_MIN(part, inlen); 1164 do { 1165 rte_memcpy(pdst, psrc, part); 1166 inlen -= part; 1167 if (likely(!inlen)) { 1168 /* 1169 * If return value is not used by the caller 1170 * the code below will be optimized out. 1171 */ 1172 pdst += part; 1173 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1174 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1175 pdst = (uint8_t *)txq->wqes; 1176 return (struct mlx5_wqe_dseg *)pdst; 1177 } 1178 pdst = (uint8_t *)txq->wqes; 1179 psrc += part; 1180 part = inlen; 1181 } while (true); 1182 } 1183 1184 /** 1185 * Copy data from chain of mbuf to the specified linear buffer. 1186 * Checksums and VLAN insertion Tx offload features. If data 1187 * from some mbuf copied completely this mbuf is freed. Local 1188 * structure is used to keep the byte stream state. 1189 * 1190 * @param pdst 1191 * Pointer to the destination linear buffer. 1192 * @param loc 1193 * Pointer to burst routine local context. 1194 * @param len 1195 * Length of data to be copied. 1196 * @param must 1197 * Length of data to be copied ignoring no inline hint. 1198 * @param olx 1199 * Configured Tx offloads mask. It is fully defined at 1200 * compile time and may be used for optimization. 1201 * 1202 * @return 1203 * Number of actual copied data bytes. This is always greater than or 1204 * equal to must parameter and might be lesser than len in no inline 1205 * hint flag is encountered. 1206 */ 1207 static __rte_always_inline unsigned int 1208 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1209 struct mlx5_txq_local *__rte_restrict loc, 1210 unsigned int len, 1211 unsigned int must, 1212 unsigned int olx __rte_unused) 1213 { 1214 struct rte_mbuf *mbuf; 1215 unsigned int part, dlen, copy = 0; 1216 uint8_t *psrc; 1217 1218 MLX5_ASSERT(len); 1219 do { 1220 /* Allow zero length packets, must check first. */ 1221 dlen = rte_pktmbuf_data_len(loc->mbuf); 1222 if (dlen <= loc->mbuf_off) { 1223 /* Exhausted packet, just free. */ 1224 mbuf = loc->mbuf; 1225 loc->mbuf = mbuf->next; 1226 rte_pktmbuf_free_seg(mbuf); 1227 loc->mbuf_off = 0; 1228 MLX5_ASSERT(loc->mbuf_nseg > 1); 1229 MLX5_ASSERT(loc->mbuf); 1230 --loc->mbuf_nseg; 1231 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1232 unsigned int diff; 1233 1234 if (copy >= must) { 1235 /* 1236 * We already copied the minimal 1237 * requested amount of data. 1238 */ 1239 return copy; 1240 } 1241 diff = must - copy; 1242 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1243 /* 1244 * Copy only the minimal required 1245 * part of the data buffer. Limit amount 1246 * of data to be copied to the length of 1247 * available space. 1248 */ 1249 len = RTE_MIN(len, diff); 1250 } 1251 } 1252 continue; 1253 } 1254 dlen -= loc->mbuf_off; 1255 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1256 loc->mbuf_off); 1257 part = RTE_MIN(len, dlen); 1258 rte_memcpy(pdst, psrc, part); 1259 copy += part; 1260 loc->mbuf_off += part; 1261 len -= part; 1262 if (!len) { 1263 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1264 loc->mbuf_off = 0; 1265 /* Exhausted packet, just free. */ 1266 mbuf = loc->mbuf; 1267 loc->mbuf = mbuf->next; 1268 rte_pktmbuf_free_seg(mbuf); 1269 loc->mbuf_off = 0; 1270 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1271 --loc->mbuf_nseg; 1272 } 1273 return copy; 1274 } 1275 pdst += part; 1276 } while (true); 1277 } 1278 1279 /** 1280 * Build the Ethernet Segment with inlined data from multi-segment packet. 1281 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1282 * Parser, Checksums and VLAN insertion Tx offload features. 1283 * 1284 * @param txq 1285 * Pointer to TX queue structure. 1286 * @param loc 1287 * Pointer to burst routine local context. 1288 * @param wqe 1289 * Pointer to WQE to fill with built Ethernet Segment. 1290 * @param vlan 1291 * Length of VLAN tag insertion if any. 1292 * @param inlen 1293 * Length of data to inline (VLAN included, if any). 1294 * @param tso 1295 * TSO flag, set mss field from the packet. 1296 * @param olx 1297 * Configured Tx offloads mask. It is fully defined at 1298 * compile time and may be used for optimization. 1299 * 1300 * @return 1301 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1302 * around - caller should do wrapping check on its own). 1303 */ 1304 static __rte_always_inline struct mlx5_wqe_dseg * 1305 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1306 struct mlx5_txq_local *__rte_restrict loc, 1307 struct mlx5_wqe *__rte_restrict wqe, 1308 unsigned int vlan, 1309 unsigned int inlen, 1310 unsigned int tso, 1311 unsigned int olx) 1312 { 1313 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1314 uint32_t csum; 1315 uint8_t *pdst; 1316 unsigned int part, tlen = 0; 1317 1318 /* 1319 * Calculate and set check sum flags first, uint32_t field 1320 * in segment may be shared with Software Parser flags. 1321 */ 1322 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1323 if (tso) { 1324 csum <<= 24; 1325 csum |= loc->mbuf->tso_segsz; 1326 es->flags = rte_cpu_to_be_32(csum); 1327 } else { 1328 es->flags = rte_cpu_to_le_32(csum); 1329 } 1330 /* 1331 * Calculate and set Software Parser offsets and flags. 1332 * These flags a set for custom UDP and IP tunnel packets. 1333 */ 1334 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1335 /* Fill metadata field if needed. */ 1336 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1337 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1338 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1339 0 : 0; 1340 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1341 pdst = (uint8_t *)&es->inline_data; 1342 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1343 /* Implement VLAN tag insertion as part inline data. */ 1344 mlx5_tx_mseg_memcpy(pdst, loc, 1345 2 * RTE_ETHER_ADDR_LEN, 1346 2 * RTE_ETHER_ADDR_LEN, olx); 1347 pdst += 2 * RTE_ETHER_ADDR_LEN; 1348 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1349 ((RTE_ETHER_TYPE_VLAN << 16) | 1350 loc->mbuf->vlan_tci); 1351 pdst += sizeof(struct rte_vlan_hdr); 1352 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1353 } 1354 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1355 /* 1356 * The WQEBB space availability is checked by caller. 1357 * Here we should be aware of WQE ring buffer wraparound only. 1358 */ 1359 part = (uint8_t *)txq->wqes_end - pdst; 1360 part = RTE_MIN(part, inlen - tlen); 1361 MLX5_ASSERT(part); 1362 do { 1363 unsigned int copy; 1364 1365 /* 1366 * Copying may be interrupted inside the routine 1367 * if run into no inline hint flag. 1368 */ 1369 copy = tso ? inlen : txq->inlen_mode; 1370 copy = tlen >= copy ? 0 : (copy - tlen); 1371 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1372 tlen += copy; 1373 if (likely(inlen <= tlen) || copy < part) { 1374 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1375 pdst += copy; 1376 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1377 return (struct mlx5_wqe_dseg *)pdst; 1378 } 1379 pdst = (uint8_t *)txq->wqes; 1380 part = inlen - tlen; 1381 } while (true); 1382 } 1383 1384 /** 1385 * Build the Data Segment of pointer type. 1386 * 1387 * @param txq 1388 * Pointer to TX queue structure. 1389 * @param loc 1390 * Pointer to burst routine local context. 1391 * @param dseg 1392 * Pointer to WQE to fill with built Data Segment. 1393 * @param buf 1394 * Data buffer to point. 1395 * @param len 1396 * Data buffer length. 1397 * @param olx 1398 * Configured Tx offloads mask. It is fully defined at 1399 * compile time and may be used for optimization. 1400 */ 1401 static __rte_always_inline void 1402 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1403 struct mlx5_txq_local *__rte_restrict loc, 1404 struct mlx5_wqe_dseg *__rte_restrict dseg, 1405 uint8_t *buf, 1406 unsigned int len, 1407 unsigned int olx __rte_unused) 1408 1409 { 1410 MLX5_ASSERT(len); 1411 dseg->bcount = rte_cpu_to_be_32(len); 1412 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1413 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1414 } 1415 1416 /** 1417 * Build the Data Segment of pointer type or inline if data length is less than 1418 * buffer in minimal Data Segment size. 1419 * 1420 * @param txq 1421 * Pointer to TX queue structure. 1422 * @param loc 1423 * Pointer to burst routine local context. 1424 * @param dseg 1425 * Pointer to WQE to fill with built Data Segment. 1426 * @param buf 1427 * Data buffer to point. 1428 * @param len 1429 * Data buffer length. 1430 * @param olx 1431 * Configured Tx offloads mask. It is fully defined at 1432 * compile time and may be used for optimization. 1433 */ 1434 static __rte_always_inline void 1435 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1436 struct mlx5_txq_local *__rte_restrict loc, 1437 struct mlx5_wqe_dseg *__rte_restrict dseg, 1438 uint8_t *buf, 1439 unsigned int len, 1440 unsigned int olx __rte_unused) 1441 1442 { 1443 uintptr_t dst, src; 1444 1445 MLX5_ASSERT(len); 1446 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1447 dseg->bcount = rte_cpu_to_be_32(len); 1448 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1449 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1450 1451 return; 1452 } 1453 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1454 /* Unrolled implementation of generic rte_memcpy. */ 1455 dst = (uintptr_t)&dseg->inline_data[0]; 1456 src = (uintptr_t)buf; 1457 if (len & 0x08) { 1458 #ifdef RTE_ARCH_STRICT_ALIGN 1459 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1460 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1461 dst += sizeof(uint32_t); 1462 src += sizeof(uint32_t); 1463 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1464 dst += sizeof(uint32_t); 1465 src += sizeof(uint32_t); 1466 #else 1467 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1468 dst += sizeof(uint64_t); 1469 src += sizeof(uint64_t); 1470 #endif 1471 } 1472 if (len & 0x04) { 1473 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1474 dst += sizeof(uint32_t); 1475 src += sizeof(uint32_t); 1476 } 1477 if (len & 0x02) { 1478 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1479 dst += sizeof(uint16_t); 1480 src += sizeof(uint16_t); 1481 } 1482 if (len & 0x01) 1483 *(uint8_t *)dst = *(uint8_t *)src; 1484 } 1485 1486 /** 1487 * Build the Data Segment of inlined data from single 1488 * segment packet, no VLAN insertion. 1489 * 1490 * @param txq 1491 * Pointer to TX queue structure. 1492 * @param loc 1493 * Pointer to burst routine local context. 1494 * @param dseg 1495 * Pointer to WQE to fill with built Data Segment. 1496 * @param buf 1497 * Data buffer to point. 1498 * @param len 1499 * Data buffer length. 1500 * @param olx 1501 * Configured Tx offloads mask. It is fully defined at 1502 * compile time and may be used for optimization. 1503 * 1504 * @return 1505 * Pointer to the next Data Segment after inlined data. 1506 * Ring buffer wraparound check is needed. We do not do it here because it 1507 * may not be needed for the last packet in the eMPW session. 1508 */ 1509 static __rte_always_inline struct mlx5_wqe_dseg * 1510 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1511 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1512 struct mlx5_wqe_dseg *__rte_restrict dseg, 1513 uint8_t *buf, 1514 unsigned int len, 1515 unsigned int olx __rte_unused) 1516 { 1517 unsigned int part; 1518 uint8_t *pdst; 1519 1520 if (!MLX5_TXOFF_CONFIG(MPW)) { 1521 /* Store the descriptor byte counter for eMPW sessions. */ 1522 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1523 pdst = &dseg->inline_data[0]; 1524 } else { 1525 /* The entire legacy MPW session counter is stored on close. */ 1526 pdst = (uint8_t *)dseg; 1527 } 1528 /* 1529 * The WQEBB space availability is checked by caller. 1530 * Here we should be aware of WQE ring buffer wraparound only. 1531 */ 1532 part = (uint8_t *)txq->wqes_end - pdst; 1533 part = RTE_MIN(part, len); 1534 do { 1535 rte_memcpy(pdst, buf, part); 1536 len -= part; 1537 if (likely(!len)) { 1538 pdst += part; 1539 if (!MLX5_TXOFF_CONFIG(MPW)) 1540 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1541 /* Note: no final wraparound check here. */ 1542 return (struct mlx5_wqe_dseg *)pdst; 1543 } 1544 pdst = (uint8_t *)txq->wqes; 1545 buf += part; 1546 part = len; 1547 } while (true); 1548 } 1549 1550 /** 1551 * Build the Data Segment of inlined data from single 1552 * segment packet with VLAN insertion. 1553 * 1554 * @param txq 1555 * Pointer to TX queue structure. 1556 * @param loc 1557 * Pointer to burst routine local context. 1558 * @param dseg 1559 * Pointer to the dseg fill with built Data Segment. 1560 * @param buf 1561 * Data buffer to point. 1562 * @param len 1563 * Data buffer length. 1564 * @param olx 1565 * Configured Tx offloads mask. It is fully defined at 1566 * compile time and may be used for optimization. 1567 * 1568 * @return 1569 * Pointer to the next Data Segment after inlined data. 1570 * Ring buffer wraparound check is needed. 1571 */ 1572 static __rte_always_inline struct mlx5_wqe_dseg * 1573 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1574 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1575 struct mlx5_wqe_dseg *__rte_restrict dseg, 1576 uint8_t *buf, 1577 unsigned int len, 1578 unsigned int olx __rte_unused) 1579 1580 { 1581 unsigned int part; 1582 uint8_t *pdst; 1583 1584 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1585 if (!MLX5_TXOFF_CONFIG(MPW)) { 1586 /* Store the descriptor byte counter for eMPW sessions. */ 1587 dseg->bcount = rte_cpu_to_be_32 1588 ((len + sizeof(struct rte_vlan_hdr)) | 1589 MLX5_ETH_WQE_DATA_INLINE); 1590 pdst = &dseg->inline_data[0]; 1591 } else { 1592 /* The entire legacy MPW session counter is stored on close. */ 1593 pdst = (uint8_t *)dseg; 1594 } 1595 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1596 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1597 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1598 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1599 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1600 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1601 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1602 pdst = (uint8_t *)txq->wqes; 1603 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1604 loc->mbuf->vlan_tci); 1605 pdst += sizeof(struct rte_vlan_hdr); 1606 /* 1607 * The WQEBB space availability is checked by caller. 1608 * Here we should be aware of WQE ring buffer wraparound only. 1609 */ 1610 part = (uint8_t *)txq->wqes_end - pdst; 1611 part = RTE_MIN(part, len); 1612 do { 1613 rte_memcpy(pdst, buf, part); 1614 len -= part; 1615 if (likely(!len)) { 1616 pdst += part; 1617 if (!MLX5_TXOFF_CONFIG(MPW)) 1618 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1619 /* Note: no final wraparound check here. */ 1620 return (struct mlx5_wqe_dseg *)pdst; 1621 } 1622 pdst = (uint8_t *)txq->wqes; 1623 buf += part; 1624 part = len; 1625 } while (true); 1626 } 1627 1628 /** 1629 * Build the Ethernet Segment with optionally inlined data with 1630 * VLAN insertion and following Data Segments (if any) from 1631 * multi-segment packet. Used by ordinary send and TSO. 1632 * 1633 * @param txq 1634 * Pointer to TX queue structure. 1635 * @param loc 1636 * Pointer to burst routine local context. 1637 * @param wqe 1638 * Pointer to WQE to fill with built Ethernet/Data Segments. 1639 * @param vlan 1640 * Length of VLAN header to insert, 0 means no VLAN insertion. 1641 * @param inlen 1642 * Data length to inline. For TSO this parameter specifies exact value, 1643 * for ordinary send routine can be aligned by caller to provide better WQE 1644 * space saving and data buffer start address alignment. 1645 * This length includes VLAN header being inserted. 1646 * @param tso 1647 * Zero means ordinary send, inlined data can be extended, 1648 * otherwise this is TSO, inlined data length is fixed. 1649 * @param olx 1650 * Configured Tx offloads mask. It is fully defined at 1651 * compile time and may be used for optimization. 1652 * 1653 * @return 1654 * Actual size of built WQE in segments. 1655 */ 1656 static __rte_always_inline unsigned int 1657 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1658 struct mlx5_txq_local *__rte_restrict loc, 1659 struct mlx5_wqe *__rte_restrict wqe, 1660 unsigned int vlan, 1661 unsigned int inlen, 1662 unsigned int tso, 1663 unsigned int olx __rte_unused) 1664 { 1665 struct mlx5_wqe_dseg *__rte_restrict dseg; 1666 unsigned int ds; 1667 1668 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1669 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1670 loc->mbuf_off = 0; 1671 1672 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1673 if (!loc->mbuf_nseg) 1674 goto dseg_done; 1675 /* 1676 * There are still some mbuf remaining, not inlined. 1677 * The first mbuf may be partially inlined and we 1678 * must process the possible non-zero data offset. 1679 */ 1680 if (loc->mbuf_off) { 1681 unsigned int dlen; 1682 uint8_t *dptr; 1683 1684 /* 1685 * Exhausted packets must be dropped before. 1686 * Non-zero offset means there are some data 1687 * remained in the packet. 1688 */ 1689 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1690 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1691 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1692 loc->mbuf_off); 1693 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1694 /* 1695 * Build the pointer/minimal Data Segment. 1696 * Do ring buffer wrapping check in advance. 1697 */ 1698 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1699 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1700 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1701 /* Store the mbuf to be freed on completion. */ 1702 MLX5_ASSERT(loc->elts_free); 1703 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1704 --loc->elts_free; 1705 ++dseg; 1706 if (--loc->mbuf_nseg == 0) 1707 goto dseg_done; 1708 loc->mbuf = loc->mbuf->next; 1709 loc->mbuf_off = 0; 1710 } 1711 do { 1712 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1713 struct rte_mbuf *mbuf; 1714 1715 /* Zero length segment found, just skip. */ 1716 mbuf = loc->mbuf; 1717 loc->mbuf = loc->mbuf->next; 1718 rte_pktmbuf_free_seg(mbuf); 1719 if (--loc->mbuf_nseg == 0) 1720 break; 1721 } else { 1722 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1723 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1724 mlx5_tx_dseg_iptr 1725 (txq, loc, dseg, 1726 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1727 rte_pktmbuf_data_len(loc->mbuf), olx); 1728 MLX5_ASSERT(loc->elts_free); 1729 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1730 --loc->elts_free; 1731 ++dseg; 1732 if (--loc->mbuf_nseg == 0) 1733 break; 1734 loc->mbuf = loc->mbuf->next; 1735 } 1736 } while (true); 1737 1738 dseg_done: 1739 /* Calculate actual segments used from the dseg pointer. */ 1740 if ((uintptr_t)wqe < (uintptr_t)dseg) 1741 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1742 else 1743 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1744 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1745 return ds; 1746 } 1747 1748 /** 1749 * The routine checks timestamp flag in the current packet, 1750 * and push WAIT WQE into the queue if scheduling is required. 1751 * 1752 * @param txq 1753 * Pointer to TX queue structure. 1754 * @param loc 1755 * Pointer to burst routine local context. 1756 * @param elts 1757 * Number of free elements in elts buffer to be checked, for zero 1758 * value the check is optimized out by compiler. 1759 * @param olx 1760 * Configured Tx offloads mask. It is fully defined at 1761 * compile time and may be used for optimization. 1762 * 1763 * @return 1764 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1765 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1766 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1767 * Local context variables partially updated. 1768 */ 1769 static __rte_always_inline enum mlx5_txcmp_code 1770 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1771 struct mlx5_txq_local *restrict loc, 1772 uint16_t elts, 1773 unsigned int olx) 1774 { 1775 if (MLX5_TXOFF_CONFIG(TXPP) && 1776 loc->mbuf->ol_flags & txq->ts_mask) { 1777 struct mlx5_dev_ctx_shared *sh; 1778 struct mlx5_wqe *wqe; 1779 uint64_t ts; 1780 1781 /* 1782 * Estimate the required space quickly and roughly. 1783 * We would like to ensure the packet can be pushed 1784 * to the queue and we won't get the orphan WAIT WQE. 1785 */ 1786 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1787 loc->elts_free < elts) 1788 return MLX5_TXCMP_CODE_EXIT; 1789 /* Convert the timestamp into completion to wait. */ 1790 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1791 if (txq->ts_last && ts < txq->ts_last) 1792 rte_atomic_fetch_add_explicit(&txq->sh->txpp.err_ts_order, 1793 1, rte_memory_order_relaxed); 1794 txq->ts_last = ts; 1795 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1796 sh = txq->sh; 1797 if (txq->wait_on_time) { 1798 /* The wait on time capability should be used. */ 1799 ts -= sh->txpp.skew; 1800 rte_pmd_mlx5_trace_tx_wait(ts); 1801 mlx5_tx_cseg_init(txq, loc, wqe, 1802 1 + sizeof(struct mlx5_wqe_wseg) / 1803 MLX5_WSEG_SIZE, 1804 MLX5_OPCODE_WAIT | 1805 MLX5_OPC_MOD_WAIT_TIME << 24, olx); 1806 mlx5_tx_wseg_init(txq, loc, wqe, ts, olx); 1807 } else { 1808 /* Legacy cross-channel operation should be used. */ 1809 int32_t wci; 1810 1811 wci = mlx5_txpp_convert_tx_ts(sh, ts); 1812 if (unlikely(wci < 0)) 1813 return MLX5_TXCMP_CODE_SINGLE; 1814 /* Build the WAIT WQE with specified completion. */ 1815 rte_pmd_mlx5_trace_tx_wait(ts - sh->txpp.skew); 1816 mlx5_tx_cseg_init(txq, loc, wqe, 1817 1 + sizeof(struct mlx5_wqe_qseg) / 1818 MLX5_WSEG_SIZE, 1819 MLX5_OPCODE_WAIT | 1820 MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx); 1821 mlx5_tx_qseg_init(txq, loc, wqe, wci, olx); 1822 } 1823 ++txq->wqe_ci; 1824 --loc->wqe_free; 1825 return MLX5_TXCMP_CODE_MULTI; 1826 } 1827 return MLX5_TXCMP_CODE_SINGLE; 1828 } 1829 1830 /** 1831 * Tx one packet function for multi-segment TSO. Supports all 1832 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1833 * sends one packet per WQE. 1834 * 1835 * This routine is responsible for storing processed mbuf 1836 * into elts ring buffer and update elts_head. 1837 * 1838 * @param txq 1839 * Pointer to TX queue structure. 1840 * @param loc 1841 * Pointer to burst routine local context. 1842 * @param olx 1843 * Configured Tx offloads mask. It is fully defined at 1844 * compile time and may be used for optimization. 1845 * 1846 * @return 1847 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1848 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1849 * Local context variables partially updated. 1850 */ 1851 static __rte_always_inline enum mlx5_txcmp_code 1852 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1853 struct mlx5_txq_local *__rte_restrict loc, 1854 unsigned int olx) 1855 { 1856 struct mlx5_wqe *__rte_restrict wqe; 1857 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1858 1859 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1860 if (MLX5_TXOFF_CONFIG(TXPP)) { 1861 enum mlx5_txcmp_code wret; 1862 1863 /* Generate WAIT for scheduling if requested. */ 1864 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1865 if (wret == MLX5_TXCMP_CODE_EXIT) 1866 return MLX5_TXCMP_CODE_EXIT; 1867 if (wret == MLX5_TXCMP_CODE_ERROR) 1868 return MLX5_TXCMP_CODE_ERROR; 1869 } 1870 /* 1871 * Calculate data length to be inlined to estimate 1872 * the required space in WQE ring buffer. 1873 */ 1874 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1875 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1876 vlan = sizeof(struct rte_vlan_hdr); 1877 inlen = loc->mbuf->l2_len + vlan + 1878 loc->mbuf->l3_len + loc->mbuf->l4_len; 1879 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1880 return MLX5_TXCMP_CODE_ERROR; 1881 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1882 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1883 /* Packet must contain all TSO headers. */ 1884 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1885 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1886 inlen > (dlen + vlan))) 1887 return MLX5_TXCMP_CODE_ERROR; 1888 /* 1889 * Check whether there are enough free WQEBBs: 1890 * - Control Segment 1891 * - Ethernet Segment 1892 * - First Segment of inlined Ethernet data 1893 * - ... data continued ... 1894 * - Data Segments of pointer/min inline type 1895 */ 1896 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1897 MLX5_ESEG_MIN_INLINE_SIZE + 1898 MLX5_WSEG_SIZE + 1899 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1900 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1901 return MLX5_TXCMP_CODE_EXIT; 1902 /* Check for maximal WQE size. */ 1903 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1904 return MLX5_TXCMP_CODE_ERROR; 1905 #ifdef MLX5_PMD_SOFT_COUNTERS 1906 /* Update sent data bytes/packets counters. */ 1907 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1908 loc->mbuf->tso_segsz; 1909 /* 1910 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1911 * from loc->pkts_sent field. 1912 */ 1913 --ntcp; 1914 txq->stats.opackets += ntcp; 1915 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1916 #endif 1917 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1918 loc->wqe_last = wqe; 1919 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1920 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 1921 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1922 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1923 txq->wqe_ci += (ds + 3) / 4; 1924 loc->wqe_free -= (ds + 3) / 4; 1925 return MLX5_TXCMP_CODE_MULTI; 1926 } 1927 1928 /** 1929 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1930 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1931 * without any data inlining in Ethernet Segment. 1932 * 1933 * This routine is responsible for storing processed mbuf 1934 * into elts ring buffer and update elts_head. 1935 * 1936 * @param txq 1937 * Pointer to TX queue structure. 1938 * @param loc 1939 * Pointer to burst routine local context. 1940 * @param olx 1941 * Configured Tx offloads mask. It is fully defined at 1942 * compile time and may be used for optimization. 1943 * 1944 * @return 1945 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1946 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1947 * Local context variables partially updated. 1948 */ 1949 static __rte_always_inline enum mlx5_txcmp_code 1950 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1951 struct mlx5_txq_local *__rte_restrict loc, 1952 unsigned int olx) 1953 { 1954 struct mlx5_wqe_dseg *__rte_restrict dseg; 1955 struct mlx5_wqe *__rte_restrict wqe; 1956 unsigned int ds, nseg; 1957 1958 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1959 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1960 if (MLX5_TXOFF_CONFIG(TXPP)) { 1961 enum mlx5_txcmp_code wret; 1962 1963 /* Generate WAIT for scheduling if requested. */ 1964 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1965 if (wret == MLX5_TXCMP_CODE_EXIT) 1966 return MLX5_TXCMP_CODE_EXIT; 1967 if (wret == MLX5_TXCMP_CODE_ERROR) 1968 return MLX5_TXCMP_CODE_ERROR; 1969 } 1970 /* 1971 * No inline at all, it means the CPU cycles saving is prioritized at 1972 * configuration, we should not copy any packet data to WQE. 1973 */ 1974 nseg = NB_SEGS(loc->mbuf); 1975 ds = 2 + nseg; 1976 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1977 return MLX5_TXCMP_CODE_EXIT; 1978 /* Check for maximal WQE size. */ 1979 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1980 return MLX5_TXCMP_CODE_ERROR; 1981 /* 1982 * Some Tx offloads may cause an error if packet is not long enough, 1983 * check against assumed minimal length. 1984 */ 1985 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1986 return MLX5_TXCMP_CODE_ERROR; 1987 #ifdef MLX5_PMD_SOFT_COUNTERS 1988 /* Update sent data bytes counter. */ 1989 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1990 if (MLX5_TXOFF_CONFIG(VLAN) && 1991 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1992 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1993 #endif 1994 /* 1995 * SEND WQE, one WQEBB: 1996 * - Control Segment, SEND opcode 1997 * - Ethernet Segment, optional VLAN, no inline 1998 * - Data Segments, pointer only type 1999 */ 2000 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2001 loc->wqe_last = wqe; 2002 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 2003 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2004 mlx5_tx_eseg_none(txq, loc, wqe, olx); 2005 dseg = &wqe->dseg[0]; 2006 do { 2007 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 2008 struct rte_mbuf *mbuf; 2009 2010 /* 2011 * Zero length segment found, have to correct total 2012 * size of WQE in segments. 2013 * It is supposed to be rare occasion, so in normal 2014 * case (no zero length segments) we avoid extra 2015 * writing to the Control Segment. 2016 */ 2017 --ds; 2018 wqe->cseg.sq_ds -= RTE_BE32(1); 2019 mbuf = loc->mbuf; 2020 loc->mbuf = mbuf->next; 2021 rte_pktmbuf_free_seg(mbuf); 2022 if (--nseg == 0) 2023 break; 2024 } else { 2025 mlx5_tx_dseg_ptr 2026 (txq, loc, dseg, 2027 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2028 rte_pktmbuf_data_len(loc->mbuf), olx); 2029 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2030 --loc->elts_free; 2031 if (--nseg == 0) 2032 break; 2033 ++dseg; 2034 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2035 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2036 loc->mbuf = loc->mbuf->next; 2037 } 2038 } while (true); 2039 txq->wqe_ci += (ds + 3) / 4; 2040 loc->wqe_free -= (ds + 3) / 4; 2041 return MLX5_TXCMP_CODE_MULTI; 2042 } 2043 2044 /** 2045 * Tx one packet function for multi-segment SEND. Supports all 2046 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 2047 * sends one packet per WQE, with data inlining in 2048 * Ethernet Segment and minimal Data Segments. 2049 * 2050 * This routine is responsible for storing processed mbuf 2051 * into elts ring buffer and update elts_head. 2052 * 2053 * @param txq 2054 * Pointer to TX queue structure. 2055 * @param loc 2056 * Pointer to burst routine local context. 2057 * @param olx 2058 * Configured Tx offloads mask. It is fully defined at 2059 * compile time and may be used for optimization. 2060 * 2061 * @return 2062 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2063 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2064 * Local context variables partially updated. 2065 */ 2066 static __rte_always_inline enum mlx5_txcmp_code 2067 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 2068 struct mlx5_txq_local *__rte_restrict loc, 2069 unsigned int olx) 2070 { 2071 struct mlx5_wqe *__rte_restrict wqe; 2072 unsigned int ds, inlen, dlen, vlan = 0; 2073 2074 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2075 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2076 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 2077 /* 2078 * First calculate data length to be inlined 2079 * to estimate the required space for WQE. 2080 */ 2081 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 2082 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 2083 vlan = sizeof(struct rte_vlan_hdr); 2084 inlen = dlen + vlan; 2085 /* Check against minimal length. */ 2086 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 2087 return MLX5_TXCMP_CODE_ERROR; 2088 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 2089 if (inlen > txq->inlen_send || 2090 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 2091 struct rte_mbuf *mbuf; 2092 unsigned int nxlen; 2093 uintptr_t start; 2094 2095 mbuf = loc->mbuf; 2096 nxlen = rte_pktmbuf_data_len(mbuf) + vlan; 2097 /* 2098 * Packet length exceeds the allowed inline data length, 2099 * check whether the minimal inlining is required. 2100 */ 2101 if (txq->inlen_mode) { 2102 MLX5_ASSERT(txq->inlen_mode >= 2103 MLX5_ESEG_MIN_INLINE_SIZE); 2104 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 2105 inlen = RTE_MIN(txq->inlen_mode, inlen); 2106 } else if (vlan && !txq->vlan_en) { 2107 /* 2108 * VLAN insertion is requested and hardware does not 2109 * support the offload, will do with software inline. 2110 */ 2111 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 2112 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 2113 nxlen > txq->inlen_send) { 2114 return mlx5_tx_packet_multi_send(txq, loc, olx); 2115 } else if (nxlen <= MLX5_ESEG_MIN_INLINE_SIZE) { 2116 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 2117 } else { 2118 goto do_first; 2119 } 2120 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2121 goto do_build; 2122 /* 2123 * Now we know the minimal amount of data is requested 2124 * to inline. Check whether we should inline the buffers 2125 * from the chain beginning to eliminate some mbufs. 2126 */ 2127 if (unlikely(nxlen <= txq->inlen_send)) { 2128 /* We can inline first mbuf at least. */ 2129 if (nxlen < inlen) { 2130 unsigned int smlen; 2131 2132 /* Scan mbufs till inlen filled. */ 2133 do { 2134 smlen = nxlen; 2135 mbuf = NEXT(mbuf); 2136 MLX5_ASSERT(mbuf); 2137 nxlen = rte_pktmbuf_data_len(mbuf); 2138 nxlen += smlen; 2139 } while (unlikely(nxlen < inlen)); 2140 if (unlikely(nxlen > txq->inlen_send)) { 2141 /* We cannot inline entire mbuf. */ 2142 smlen = inlen - smlen; 2143 start = rte_pktmbuf_mtod_offset 2144 (mbuf, uintptr_t, smlen); 2145 goto do_align; 2146 } 2147 } 2148 do_first: 2149 do { 2150 inlen = nxlen; 2151 mbuf = NEXT(mbuf); 2152 /* There should be not end of packet. */ 2153 MLX5_ASSERT(mbuf); 2154 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2155 break; 2156 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 2157 } while (unlikely(nxlen < txq->inlen_send)); 2158 } 2159 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 2160 /* 2161 * Check whether we can do inline to align start 2162 * address of data buffer to cacheline. 2163 */ 2164 do_align: 2165 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 2166 if (unlikely(start)) { 2167 start += inlen; 2168 if (start <= txq->inlen_send) 2169 inlen = start; 2170 } 2171 } 2172 /* 2173 * Check whether there are enough free WQEBBs: 2174 * - Control Segment 2175 * - Ethernet Segment 2176 * - First Segment of inlined Ethernet data 2177 * - ... data continued ... 2178 * - Data Segments of pointer/min inline type 2179 * 2180 * Estimate the number of Data Segments conservatively, 2181 * supposing no any mbufs is being freed during inlining. 2182 */ 2183 do_build: 2184 if (MLX5_TXOFF_CONFIG(TXPP)) { 2185 enum mlx5_txcmp_code wret; 2186 2187 /* Generate WAIT for scheduling if requested. */ 2188 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2189 if (wret == MLX5_TXCMP_CODE_EXIT) 2190 return MLX5_TXCMP_CODE_EXIT; 2191 if (wret == MLX5_TXCMP_CODE_ERROR) 2192 return MLX5_TXCMP_CODE_ERROR; 2193 } 2194 MLX5_ASSERT(inlen <= txq->inlen_send); 2195 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2196 MLX5_ESEG_MIN_INLINE_SIZE + 2197 MLX5_WSEG_SIZE + 2198 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2199 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2200 return MLX5_TXCMP_CODE_EXIT; 2201 /* Check for maximal WQE size. */ 2202 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) { 2203 /* Check if we can adjust the inline length. */ 2204 if (unlikely(txq->inlen_mode)) { 2205 ds = NB_SEGS(loc->mbuf) + 2 + 2206 (txq->inlen_mode - 2207 MLX5_ESEG_MIN_INLINE_SIZE + 2208 MLX5_WSEG_SIZE + 2209 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2210 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 2211 return MLX5_TXCMP_CODE_ERROR; 2212 } 2213 /* We have lucky opportunity to adjust. */ 2214 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX - 2215 MLX5_WSEG_SIZE * 2 - 2216 MLX5_WSEG_SIZE * NB_SEGS(loc->mbuf) - 2217 MLX5_WSEG_SIZE + 2218 MLX5_ESEG_MIN_INLINE_SIZE); 2219 } 2220 #ifdef MLX5_PMD_SOFT_COUNTERS 2221 /* Update sent data bytes/packets counters. */ 2222 txq->stats.obytes += dlen + vlan; 2223 #endif 2224 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2225 loc->wqe_last = wqe; 2226 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2227 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2228 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2229 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2230 txq->wqe_ci += (ds + 3) / 4; 2231 loc->wqe_free -= (ds + 3) / 4; 2232 return MLX5_TXCMP_CODE_MULTI; 2233 } 2234 2235 /** 2236 * Tx burst function for multi-segment packets. Supports all 2237 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2238 * sends one packet per WQE. Function stops sending if it 2239 * encounters the single-segment packet. 2240 * 2241 * This routine is responsible for storing processed mbuf 2242 * into elts ring buffer and update elts_head. 2243 * 2244 * @param txq 2245 * Pointer to TX queue structure. 2246 * @param[in] pkts 2247 * Packets to transmit. 2248 * @param pkts_n 2249 * Number of packets in array. 2250 * @param loc 2251 * Pointer to burst routine local context. 2252 * @param olx 2253 * Configured Tx offloads mask. It is fully defined at 2254 * compile time and may be used for optimization. 2255 * 2256 * @return 2257 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2258 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2259 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2260 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2261 * Local context variables updated. 2262 */ 2263 static __rte_always_inline enum mlx5_txcmp_code 2264 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2265 struct rte_mbuf **__rte_restrict pkts, 2266 unsigned int pkts_n, 2267 struct mlx5_txq_local *__rte_restrict loc, 2268 unsigned int olx) 2269 { 2270 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2271 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2272 pkts += loc->pkts_sent + 1; 2273 pkts_n -= loc->pkts_sent; 2274 for (;;) { 2275 enum mlx5_txcmp_code ret; 2276 2277 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2278 /* 2279 * Estimate the number of free elts quickly but conservatively. 2280 * Some segment may be fully inlined and freed, 2281 * ignore this here - precise estimation is costly. 2282 */ 2283 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2284 return MLX5_TXCMP_CODE_EXIT; 2285 if (MLX5_TXOFF_CONFIG(TSO) && 2286 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2287 /* Proceed with multi-segment TSO. */ 2288 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2289 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2290 /* Proceed with multi-segment SEND with inlining. */ 2291 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2292 } else { 2293 /* Proceed with multi-segment SEND w/o inlining. */ 2294 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2295 } 2296 if (ret == MLX5_TXCMP_CODE_EXIT) 2297 return MLX5_TXCMP_CODE_EXIT; 2298 if (ret == MLX5_TXCMP_CODE_ERROR) 2299 return MLX5_TXCMP_CODE_ERROR; 2300 /* WQE is built, go to the next packet. */ 2301 ++loc->pkts_sent; 2302 --pkts_n; 2303 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2304 return MLX5_TXCMP_CODE_EXIT; 2305 loc->mbuf = *pkts++; 2306 if (pkts_n > 1) 2307 rte_prefetch0(*pkts); 2308 if (likely(NB_SEGS(loc->mbuf) > 1)) 2309 continue; 2310 /* Here ends the series of multi-segment packets. */ 2311 if (MLX5_TXOFF_CONFIG(TSO) && 2312 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2313 return MLX5_TXCMP_CODE_TSO; 2314 return MLX5_TXCMP_CODE_SINGLE; 2315 } 2316 MLX5_ASSERT(false); 2317 } 2318 2319 /** 2320 * Tx burst function for single-segment packets with TSO. 2321 * Supports all types of Tx offloads, except multi-packets. 2322 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2323 * Function stops sending if it encounters the multi-segment 2324 * packet or packet without TSO requested. 2325 * 2326 * The routine is responsible for storing processed mbuf into elts ring buffer 2327 * and update elts_head if inline offloads is requested due to possible early 2328 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2329 * 2330 * @param txq 2331 * Pointer to TX queue structure. 2332 * @param[in] pkts 2333 * Packets to transmit. 2334 * @param pkts_n 2335 * Number of packets in array. 2336 * @param loc 2337 * Pointer to burst routine local context. 2338 * @param olx 2339 * Configured Tx offloads mask. It is fully defined at 2340 * compile time and may be used for optimization. 2341 * 2342 * @return 2343 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2344 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2345 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2346 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2347 * Local context variables updated. 2348 */ 2349 static __rte_always_inline enum mlx5_txcmp_code 2350 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2351 struct rte_mbuf **__rte_restrict pkts, 2352 unsigned int pkts_n, 2353 struct mlx5_txq_local *__rte_restrict loc, 2354 unsigned int olx) 2355 { 2356 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2357 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2358 pkts += loc->pkts_sent + 1; 2359 pkts_n -= loc->pkts_sent; 2360 for (;;) { 2361 struct mlx5_wqe_dseg *__rte_restrict dseg; 2362 struct mlx5_wqe *__rte_restrict wqe; 2363 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2364 uint8_t *dptr; 2365 2366 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2367 if (MLX5_TXOFF_CONFIG(TXPP)) { 2368 enum mlx5_txcmp_code wret; 2369 2370 /* Generate WAIT for scheduling if requested. */ 2371 wret = mlx5_tx_schedule_send(txq, loc, 1, olx); 2372 if (wret == MLX5_TXCMP_CODE_EXIT) 2373 return MLX5_TXCMP_CODE_EXIT; 2374 if (wret == MLX5_TXCMP_CODE_ERROR) 2375 return MLX5_TXCMP_CODE_ERROR; 2376 } 2377 dlen = rte_pktmbuf_data_len(loc->mbuf); 2378 if (MLX5_TXOFF_CONFIG(VLAN) && 2379 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2380 vlan = sizeof(struct rte_vlan_hdr); 2381 } 2382 /* 2383 * First calculate the WQE size to check 2384 * whether we have enough space in ring buffer. 2385 */ 2386 hlen = loc->mbuf->l2_len + vlan + 2387 loc->mbuf->l3_len + loc->mbuf->l4_len; 2388 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2389 return MLX5_TXCMP_CODE_ERROR; 2390 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2391 hlen += loc->mbuf->outer_l2_len + 2392 loc->mbuf->outer_l3_len; 2393 /* Segment must contain all TSO headers. */ 2394 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2395 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2396 hlen > (dlen + vlan))) 2397 return MLX5_TXCMP_CODE_ERROR; 2398 /* 2399 * Check whether there are enough free WQEBBs: 2400 * - Control Segment 2401 * - Ethernet Segment 2402 * - First Segment of inlined Ethernet data 2403 * - ... data continued ... 2404 * - Finishing Data Segment of pointer type 2405 */ 2406 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2407 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2408 if (loc->wqe_free < ((ds + 3) / 4)) 2409 return MLX5_TXCMP_CODE_EXIT; 2410 #ifdef MLX5_PMD_SOFT_COUNTERS 2411 /* Update sent data bytes/packets counters. */ 2412 ntcp = (dlen + vlan - hlen + 2413 loc->mbuf->tso_segsz - 1) / 2414 loc->mbuf->tso_segsz; 2415 /* 2416 * One will be added for mbuf itself at the end 2417 * of the mlx5_tx_burst from loc->pkts_sent field. 2418 */ 2419 --ntcp; 2420 txq->stats.opackets += ntcp; 2421 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2422 #endif 2423 /* 2424 * Build the TSO WQE: 2425 * - Control Segment 2426 * - Ethernet Segment with hlen bytes inlined 2427 * - Data Segment of pointer type 2428 */ 2429 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2430 loc->wqe_last = wqe; 2431 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_TSO, olx); 2432 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2433 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2434 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2435 dlen -= hlen - vlan; 2436 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2437 /* 2438 * WQE is built, update the loop parameters 2439 * and go to the next packet. 2440 */ 2441 txq->wqe_ci += (ds + 3) / 4; 2442 loc->wqe_free -= (ds + 3) / 4; 2443 if (MLX5_TXOFF_CONFIG(INLINE)) 2444 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2445 --loc->elts_free; 2446 ++loc->pkts_sent; 2447 --pkts_n; 2448 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2449 return MLX5_TXCMP_CODE_EXIT; 2450 loc->mbuf = *pkts++; 2451 if (pkts_n > 1) 2452 rte_prefetch0(*pkts); 2453 if (MLX5_TXOFF_CONFIG(MULTI) && 2454 unlikely(NB_SEGS(loc->mbuf) > 1)) 2455 return MLX5_TXCMP_CODE_MULTI; 2456 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2457 return MLX5_TXCMP_CODE_SINGLE; 2458 /* Continue with the next TSO packet. */ 2459 } 2460 MLX5_ASSERT(false); 2461 } 2462 2463 /** 2464 * Analyze the packet and select the best method to send. 2465 * 2466 * @param txq 2467 * Pointer to TX queue structure. 2468 * @param loc 2469 * Pointer to burst routine local context. 2470 * @param olx 2471 * Configured Tx offloads mask. It is fully defined at 2472 * compile time and may be used for optimization. 2473 * @param newp 2474 * The predefined flag whether do complete check for 2475 * multi-segment packets and TSO. 2476 * 2477 * @return 2478 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2479 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2480 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2481 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2482 */ 2483 static __rte_always_inline enum mlx5_txcmp_code 2484 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2485 struct mlx5_txq_local *__rte_restrict loc, 2486 unsigned int olx, 2487 bool newp) 2488 { 2489 /* Check for multi-segment packet. */ 2490 if (newp && 2491 MLX5_TXOFF_CONFIG(MULTI) && 2492 unlikely(NB_SEGS(loc->mbuf) > 1)) 2493 return MLX5_TXCMP_CODE_MULTI; 2494 /* Check for TSO packet. */ 2495 if (newp && 2496 MLX5_TXOFF_CONFIG(TSO) && 2497 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2498 return MLX5_TXCMP_CODE_TSO; 2499 /* Check if eMPW is enabled at all. */ 2500 if (!MLX5_TXOFF_CONFIG(EMPW)) 2501 return MLX5_TXCMP_CODE_SINGLE; 2502 /* Check if eMPW can be engaged. */ 2503 if (MLX5_TXOFF_CONFIG(VLAN) && 2504 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2505 (!MLX5_TXOFF_CONFIG(INLINE) || 2506 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2507 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2508 /* 2509 * eMPW does not support VLAN insertion offload, we have to 2510 * inline the entire packet but packet is too long for inlining. 2511 */ 2512 return MLX5_TXCMP_CODE_SINGLE; 2513 } 2514 return MLX5_TXCMP_CODE_EMPW; 2515 } 2516 2517 /** 2518 * Check the next packet attributes to match with the eMPW batch ones. 2519 * In addition, for legacy MPW the packet length is checked either. 2520 * 2521 * @param txq 2522 * Pointer to TX queue structure. 2523 * @param es 2524 * Pointer to Ethernet Segment of eMPW batch. 2525 * @param loc 2526 * Pointer to burst routine local context. 2527 * @param dlen 2528 * Length of previous packet in MPW descriptor. 2529 * @param olx 2530 * Configured Tx offloads mask. It is fully defined at 2531 * compile time and may be used for optimization. 2532 * 2533 * @return 2534 * true - packet match with eMPW batch attributes. 2535 * false - no match, eMPW should be restarted. 2536 */ 2537 static __rte_always_inline bool 2538 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2539 struct mlx5_wqe_eseg *__rte_restrict es, 2540 struct mlx5_txq_local *__rte_restrict loc, 2541 uint32_t dlen, 2542 unsigned int olx) 2543 { 2544 uint8_t swp_flags = 0; 2545 2546 /* Compare the checksum flags, if any. */ 2547 if (MLX5_TXOFF_CONFIG(CSUM) && 2548 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2549 return false; 2550 /* Compare the Software Parser offsets and flags. */ 2551 if (MLX5_TXOFF_CONFIG(SWP) && 2552 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2553 es->swp_flags != swp_flags)) 2554 return false; 2555 /* Fill metadata field if needed. */ 2556 if (MLX5_TXOFF_CONFIG(METADATA) && 2557 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2558 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2559 return false; 2560 /* Legacy MPW can send packets with the same length only. */ 2561 if (MLX5_TXOFF_CONFIG(MPW) && 2562 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2563 return false; 2564 /* There must be no VLAN packets in eMPW loop. */ 2565 if (MLX5_TXOFF_CONFIG(VLAN)) 2566 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2567 /* Check if the scheduling is requested. */ 2568 if (MLX5_TXOFF_CONFIG(TXPP) && 2569 loc->mbuf->ol_flags & txq->ts_mask) 2570 return false; 2571 return true; 2572 } 2573 2574 /** 2575 * Update send loop variables and WQE for eMPW loop without data inlining. 2576 * Number of Data Segments is equal to the number of sent packets. 2577 * 2578 * @param txq 2579 * Pointer to TX queue structure. 2580 * @param loc 2581 * Pointer to burst routine local context. 2582 * @param ds 2583 * Number of packets/Data Segments/Packets. 2584 * @param slen 2585 * Accumulated statistics, bytes sent. 2586 * @param olx 2587 * Configured Tx offloads mask. It is fully defined at 2588 * compile time and may be used for optimization. 2589 * 2590 * @return 2591 * true - packet match with eMPW batch attributes. 2592 * false - no match, eMPW should be restarted. 2593 */ 2594 static __rte_always_inline void 2595 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2596 struct mlx5_txq_local *__rte_restrict loc, 2597 unsigned int ds, 2598 unsigned int slen, 2599 unsigned int olx __rte_unused) 2600 { 2601 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2602 #ifdef MLX5_PMD_SOFT_COUNTERS 2603 /* Update sent data bytes counter. */ 2604 txq->stats.obytes += slen; 2605 #else 2606 (void)slen; 2607 #endif 2608 loc->elts_free -= ds; 2609 loc->pkts_sent += ds; 2610 ds += 2; 2611 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2612 txq->wqe_ci += (ds + 3) / 4; 2613 loc->wqe_free -= (ds + 3) / 4; 2614 } 2615 2616 /** 2617 * Update send loop variables and WQE for eMPW loop with data inlining. 2618 * Gets the size of pushed descriptors and data to the WQE. 2619 * 2620 * @param txq 2621 * Pointer to TX queue structure. 2622 * @param loc 2623 * Pointer to burst routine local context. 2624 * @param len 2625 * Total size of descriptor/data in bytes. 2626 * @param slen 2627 * Accumulated statistics, data bytes sent. 2628 * @param wqem 2629 * The base WQE for the eMPW/MPW descriptor. 2630 * @param olx 2631 * Configured Tx offloads mask. It is fully defined at 2632 * compile time and may be used for optimization. 2633 * 2634 * @return 2635 * true - packet match with eMPW batch attributes. 2636 * false - no match, eMPW should be restarted. 2637 */ 2638 static __rte_always_inline void 2639 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2640 struct mlx5_txq_local *__rte_restrict loc, 2641 unsigned int len, 2642 unsigned int slen, 2643 struct mlx5_wqe *__rte_restrict wqem, 2644 unsigned int olx __rte_unused) 2645 { 2646 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2647 2648 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2649 #ifdef MLX5_PMD_SOFT_COUNTERS 2650 /* Update sent data bytes counter. */ 2651 txq->stats.obytes += slen; 2652 #else 2653 (void)slen; 2654 #endif 2655 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2656 /* 2657 * If the legacy MPW session contains the inline packets 2658 * we should set the only inline data segment length 2659 * and align the total length to the segment size. 2660 */ 2661 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2662 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2663 MLX5_ETH_WQE_DATA_INLINE); 2664 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2665 } else { 2666 /* 2667 * The session is not legacy MPW or contains the 2668 * data buffer pointer segments. 2669 */ 2670 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2671 len = len / MLX5_WSEG_SIZE + 2; 2672 } 2673 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2674 txq->wqe_ci += (len + 3) / 4; 2675 loc->wqe_free -= (len + 3) / 4; 2676 loc->wqe_last = wqem; 2677 } 2678 2679 /** 2680 * The set of Tx burst functions for single-segment packets without TSO 2681 * and with Multi-Packet Writing feature support. 2682 * Supports all types of Tx offloads, except multi-packets and TSO. 2683 * 2684 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2685 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2686 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2687 * placed in WQE. 2688 * 2689 * Functions stop sending if it encounters the multi-segment packet or packet 2690 * with TSO requested. 2691 * 2692 * The routines are responsible for storing processed mbuf into elts ring buffer 2693 * and update elts_head if inlining offload is requested. Otherwise the copying 2694 * mbufs to elts can be postponed and completed at the end of burst routine. 2695 * 2696 * @param txq 2697 * Pointer to TX queue structure. 2698 * @param[in] pkts 2699 * Packets to transmit. 2700 * @param pkts_n 2701 * Number of packets in array. 2702 * @param loc 2703 * Pointer to burst routine local context. 2704 * @param olx 2705 * Configured Tx offloads mask. It is fully defined at 2706 * compile time and may be used for optimization. 2707 * 2708 * @return 2709 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2710 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2711 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2712 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2713 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2714 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2715 * 2716 * Local context variables updated. 2717 * 2718 * 2719 * The routine sends packets with MLX5_OPCODE_EMPW 2720 * without inlining, this is dedicated optimized branch. 2721 * No VLAN insertion is supported. 2722 */ 2723 static __rte_always_inline enum mlx5_txcmp_code 2724 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2725 struct rte_mbuf **__rte_restrict pkts, 2726 unsigned int pkts_n, 2727 struct mlx5_txq_local *__rte_restrict loc, 2728 unsigned int olx) 2729 { 2730 /* 2731 * Subroutine is the part of mlx5_tx_burst_single() and sends 2732 * single-segment packet with eMPW opcode without data inlining. 2733 */ 2734 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2735 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2736 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2737 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2738 pkts += loc->pkts_sent + 1; 2739 pkts_n -= loc->pkts_sent; 2740 for (;;) { 2741 struct mlx5_wqe_dseg *__rte_restrict dseg; 2742 struct mlx5_wqe_eseg *__rte_restrict eseg; 2743 enum mlx5_txcmp_code ret; 2744 unsigned int part, loop; 2745 unsigned int slen = 0; 2746 2747 next_empw: 2748 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2749 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2750 MLX5_MPW_MAX_PACKETS : 2751 MLX5_EMPW_MAX_PACKETS); 2752 if (unlikely(loc->elts_free < part)) { 2753 /* We have no enough elts to save all mbufs. */ 2754 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2755 return MLX5_TXCMP_CODE_EXIT; 2756 /* But we still able to send at least minimal eMPW. */ 2757 part = loc->elts_free; 2758 } 2759 if (MLX5_TXOFF_CONFIG(TXPP)) { 2760 enum mlx5_txcmp_code wret; 2761 2762 /* Generate WAIT for scheduling if requested. */ 2763 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2764 if (wret == MLX5_TXCMP_CODE_EXIT) 2765 return MLX5_TXCMP_CODE_EXIT; 2766 if (wret == MLX5_TXCMP_CODE_ERROR) 2767 return MLX5_TXCMP_CODE_ERROR; 2768 } 2769 /* Check whether we have enough WQEs */ 2770 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2771 if (unlikely(loc->wqe_free < 2772 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2773 return MLX5_TXCMP_CODE_EXIT; 2774 part = (loc->wqe_free * 4) - 2; 2775 } 2776 if (likely(part > 1)) 2777 rte_prefetch0(*pkts); 2778 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2779 /* 2780 * Build eMPW title WQEBB: 2781 * - Control Segment, eMPW opcode 2782 * - Ethernet Segment, no inline 2783 */ 2784 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2785 MLX5_OPCODE_ENHANCED_MPSW, olx); 2786 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2787 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2788 eseg = &loc->wqe_last->eseg; 2789 dseg = &loc->wqe_last->dseg[0]; 2790 loop = part; 2791 /* Store the packet length for legacy MPW. */ 2792 if (MLX5_TXOFF_CONFIG(MPW)) 2793 eseg->mss = rte_cpu_to_be_16 2794 (rte_pktmbuf_data_len(loc->mbuf)); 2795 for (;;) { 2796 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2797 #ifdef MLX5_PMD_SOFT_COUNTERS 2798 /* Update sent data bytes counter. */ 2799 slen += dlen; 2800 #endif 2801 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2802 mlx5_tx_dseg_ptr 2803 (txq, loc, dseg, 2804 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2805 dlen, olx); 2806 if (unlikely(--loop == 0)) 2807 break; 2808 loc->mbuf = *pkts++; 2809 if (likely(loop > 1)) 2810 rte_prefetch0(*pkts); 2811 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2812 /* 2813 * Unroll the completion code to avoid 2814 * returning variable value - it results in 2815 * unoptimized sequent checking in caller. 2816 */ 2817 if (ret == MLX5_TXCMP_CODE_MULTI) { 2818 part -= loop; 2819 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2820 if (unlikely(!loc->elts_free || 2821 !loc->wqe_free)) 2822 return MLX5_TXCMP_CODE_EXIT; 2823 return MLX5_TXCMP_CODE_MULTI; 2824 } 2825 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2826 if (ret == MLX5_TXCMP_CODE_TSO) { 2827 part -= loop; 2828 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2829 if (unlikely(!loc->elts_free || 2830 !loc->wqe_free)) 2831 return MLX5_TXCMP_CODE_EXIT; 2832 return MLX5_TXCMP_CODE_TSO; 2833 } 2834 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2835 part -= loop; 2836 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2837 if (unlikely(!loc->elts_free || 2838 !loc->wqe_free)) 2839 return MLX5_TXCMP_CODE_EXIT; 2840 return MLX5_TXCMP_CODE_SINGLE; 2841 } 2842 if (ret != MLX5_TXCMP_CODE_EMPW) { 2843 MLX5_ASSERT(false); 2844 part -= loop; 2845 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2846 return MLX5_TXCMP_CODE_ERROR; 2847 } 2848 /* 2849 * Check whether packet parameters coincide 2850 * within assumed eMPW batch: 2851 * - check sum settings 2852 * - metadata value 2853 * - software parser settings 2854 * - packets length (legacy MPW only) 2855 * - scheduling is not required 2856 */ 2857 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2858 MLX5_ASSERT(loop); 2859 part -= loop; 2860 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2861 if (unlikely(!loc->elts_free || 2862 !loc->wqe_free)) 2863 return MLX5_TXCMP_CODE_EXIT; 2864 pkts_n -= part; 2865 goto next_empw; 2866 } 2867 /* Packet attributes match, continue the same eMPW. */ 2868 ++dseg; 2869 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2870 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2871 } 2872 /* eMPW is built successfully, update loop parameters. */ 2873 MLX5_ASSERT(!loop); 2874 MLX5_ASSERT(pkts_n >= part); 2875 #ifdef MLX5_PMD_SOFT_COUNTERS 2876 /* Update sent data bytes counter. */ 2877 txq->stats.obytes += slen; 2878 #endif 2879 loc->elts_free -= part; 2880 loc->pkts_sent += part; 2881 txq->wqe_ci += (2 + part + 3) / 4; 2882 loc->wqe_free -= (2 + part + 3) / 4; 2883 pkts_n -= part; 2884 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2885 return MLX5_TXCMP_CODE_EXIT; 2886 loc->mbuf = *pkts++; 2887 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2888 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2889 return ret; 2890 /* Continue sending eMPW batches. */ 2891 } 2892 MLX5_ASSERT(false); 2893 } 2894 2895 /** 2896 * The routine sends packets with MLX5_OPCODE_EMPW 2897 * with inlining, optionally supports VLAN insertion. 2898 */ 2899 static __rte_always_inline enum mlx5_txcmp_code 2900 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2901 struct rte_mbuf **__rte_restrict pkts, 2902 unsigned int pkts_n, 2903 struct mlx5_txq_local *__rte_restrict loc, 2904 unsigned int olx) 2905 { 2906 /* 2907 * Subroutine is the part of mlx5_tx_burst_single() and sends 2908 * single-segment packet with eMPW opcode with data inlining. 2909 */ 2910 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2911 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2912 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2913 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2914 pkts += loc->pkts_sent + 1; 2915 pkts_n -= loc->pkts_sent; 2916 for (;;) { 2917 struct mlx5_wqe_dseg *__rte_restrict dseg; 2918 struct mlx5_wqe *__rte_restrict wqem; 2919 enum mlx5_txcmp_code ret; 2920 unsigned int room, part, nlim; 2921 unsigned int slen = 0; 2922 2923 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2924 /* 2925 * Limits the amount of packets in one WQE 2926 * to improve CQE latency generation. 2927 */ 2928 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2929 MLX5_MPW_INLINE_MAX_PACKETS : 2930 MLX5_EMPW_MAX_PACKETS); 2931 if (MLX5_TXOFF_CONFIG(TXPP)) { 2932 enum mlx5_txcmp_code wret; 2933 2934 /* Generate WAIT for scheduling if requested. */ 2935 wret = mlx5_tx_schedule_send(txq, loc, nlim, olx); 2936 if (wret == MLX5_TXCMP_CODE_EXIT) 2937 return MLX5_TXCMP_CODE_EXIT; 2938 if (wret == MLX5_TXCMP_CODE_ERROR) 2939 return MLX5_TXCMP_CODE_ERROR; 2940 } 2941 /* Check whether we have minimal amount WQEs */ 2942 if (unlikely(loc->wqe_free < 2943 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2944 return MLX5_TXCMP_CODE_EXIT; 2945 if (likely(pkts_n > 1)) 2946 rte_prefetch0(*pkts); 2947 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2948 /* 2949 * Build eMPW title WQEBB: 2950 * - Control Segment, eMPW opcode, zero DS 2951 * - Ethernet Segment, no inline 2952 */ 2953 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2954 MLX5_OPCODE_ENHANCED_MPSW, olx); 2955 mlx5_tx_eseg_none(txq, loc, wqem, 2956 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2957 dseg = &wqem->dseg[0]; 2958 /* Store the packet length for legacy MPW. */ 2959 if (MLX5_TXOFF_CONFIG(MPW)) 2960 wqem->eseg.mss = rte_cpu_to_be_16 2961 (rte_pktmbuf_data_len(loc->mbuf)); 2962 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2963 loc->wqe_free) * MLX5_WQE_SIZE - 2964 MLX5_WQE_CSEG_SIZE - 2965 MLX5_WQE_ESEG_SIZE; 2966 /* Limit the room for legacy MPW sessions for performance. */ 2967 if (MLX5_TXOFF_CONFIG(MPW)) 2968 room = RTE_MIN(room, 2969 RTE_MAX(txq->inlen_empw + 2970 sizeof(dseg->bcount) + 2971 (MLX5_TXOFF_CONFIG(VLAN) ? 2972 sizeof(struct rte_vlan_hdr) : 0), 2973 MLX5_MPW_INLINE_MAX_PACKETS * 2974 MLX5_WQE_DSEG_SIZE)); 2975 /* Build WQE till we have space, packets and resources. */ 2976 part = room; 2977 for (;;) { 2978 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2979 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2980 unsigned int tlen; 2981 2982 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2983 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2984 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2985 /* 2986 * Some Tx offloads may cause an error if packet is not 2987 * long enough, check against assumed minimal length. 2988 */ 2989 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2990 part -= room; 2991 if (unlikely(!part)) 2992 return MLX5_TXCMP_CODE_ERROR; 2993 /* 2994 * We have some successfully built 2995 * packet Data Segments to send. 2996 */ 2997 mlx5_tx_idone_empw(txq, loc, part, 2998 slen, wqem, olx); 2999 return MLX5_TXCMP_CODE_ERROR; 3000 } 3001 /* Inline or not inline - that's the Question. */ 3002 if (dlen > txq->inlen_empw || 3003 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 3004 goto pointer_empw; 3005 if (MLX5_TXOFF_CONFIG(MPW)) { 3006 if (dlen > txq->inlen_send) 3007 goto pointer_empw; 3008 tlen = dlen; 3009 if (part == room) { 3010 /* Open new inline MPW session. */ 3011 tlen += sizeof(dseg->bcount); 3012 dseg->bcount = RTE_BE32(0); 3013 dseg = RTE_PTR_ADD 3014 (dseg, sizeof(dseg->bcount)); 3015 } else { 3016 /* 3017 * No pointer and inline descriptor 3018 * intermix for legacy MPW sessions. 3019 */ 3020 if (wqem->dseg[0].bcount) 3021 break; 3022 } 3023 } else { 3024 tlen = sizeof(dseg->bcount) + dlen; 3025 } 3026 /* Inline entire packet, optional VLAN insertion. */ 3027 if (MLX5_TXOFF_CONFIG(VLAN) && 3028 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3029 /* 3030 * The packet length must be checked in 3031 * mlx5_tx_able_to_empw() and packet 3032 * fits into inline length guaranteed. 3033 */ 3034 MLX5_ASSERT((dlen + 3035 sizeof(struct rte_vlan_hdr)) <= 3036 txq->inlen_empw); 3037 tlen += sizeof(struct rte_vlan_hdr); 3038 if (room < tlen) 3039 break; 3040 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3041 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 3042 dptr, dlen, olx); 3043 #ifdef MLX5_PMD_SOFT_COUNTERS 3044 /* Update sent data bytes counter. */ 3045 slen += sizeof(struct rte_vlan_hdr); 3046 #endif 3047 } else { 3048 if (room < tlen) 3049 break; 3050 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3051 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 3052 dptr, dlen, olx); 3053 } 3054 if (!MLX5_TXOFF_CONFIG(MPW)) 3055 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 3056 MLX5_ASSERT(room >= tlen); 3057 room -= tlen; 3058 /* 3059 * Packet data are completely inline, 3060 * we can try to free the packet. 3061 */ 3062 if (likely(loc->pkts_sent == loc->mbuf_free)) { 3063 /* 3064 * All the packets from the burst beginning 3065 * are inline, we can free mbufs directly 3066 * from the origin array on tx_burst exit(). 3067 */ 3068 loc->mbuf_free++; 3069 goto next_mbuf; 3070 } 3071 /* 3072 * In order no to call rte_pktmbuf_free_seg() here, 3073 * in the most inner loop (that might be very 3074 * expensive) we just save the mbuf in elts. 3075 */ 3076 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3077 loc->elts_free--; 3078 goto next_mbuf; 3079 pointer_empw: 3080 /* 3081 * No pointer and inline descriptor 3082 * intermix for legacy MPW sessions. 3083 */ 3084 if (MLX5_TXOFF_CONFIG(MPW) && 3085 part != room && 3086 wqem->dseg[0].bcount == RTE_BE32(0)) 3087 break; 3088 /* 3089 * Not inlinable VLAN packets are 3090 * proceeded outside of this routine. 3091 */ 3092 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 3093 if (MLX5_TXOFF_CONFIG(VLAN)) 3094 MLX5_ASSERT(!(loc->mbuf->ol_flags & 3095 RTE_MBUF_F_TX_VLAN)); 3096 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3097 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3098 /* We have to store mbuf in elts.*/ 3099 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3100 loc->elts_free--; 3101 room -= MLX5_WQE_DSEG_SIZE; 3102 /* Ring buffer wraparound is checked at the loop end.*/ 3103 ++dseg; 3104 next_mbuf: 3105 #ifdef MLX5_PMD_SOFT_COUNTERS 3106 /* Update sent data bytes counter. */ 3107 slen += dlen; 3108 #endif 3109 loc->pkts_sent++; 3110 pkts_n--; 3111 if (unlikely(!pkts_n || !loc->elts_free)) { 3112 /* 3113 * We have no resources/packets to 3114 * continue build descriptors. 3115 */ 3116 part -= room; 3117 mlx5_tx_idone_empw(txq, loc, part, 3118 slen, wqem, olx); 3119 return MLX5_TXCMP_CODE_EXIT; 3120 } 3121 loc->mbuf = *pkts++; 3122 if (likely(pkts_n > 1)) 3123 rte_prefetch0(*pkts); 3124 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3125 /* 3126 * Unroll the completion code to avoid 3127 * returning variable value - it results in 3128 * unoptimized sequent checking in caller. 3129 */ 3130 if (ret == MLX5_TXCMP_CODE_MULTI) { 3131 part -= room; 3132 mlx5_tx_idone_empw(txq, loc, part, 3133 slen, wqem, olx); 3134 if (unlikely(!loc->elts_free || 3135 !loc->wqe_free)) 3136 return MLX5_TXCMP_CODE_EXIT; 3137 return MLX5_TXCMP_CODE_MULTI; 3138 } 3139 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3140 if (ret == MLX5_TXCMP_CODE_TSO) { 3141 part -= room; 3142 mlx5_tx_idone_empw(txq, loc, part, 3143 slen, wqem, olx); 3144 if (unlikely(!loc->elts_free || 3145 !loc->wqe_free)) 3146 return MLX5_TXCMP_CODE_EXIT; 3147 return MLX5_TXCMP_CODE_TSO; 3148 } 3149 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3150 part -= room; 3151 mlx5_tx_idone_empw(txq, loc, part, 3152 slen, wqem, olx); 3153 if (unlikely(!loc->elts_free || 3154 !loc->wqe_free)) 3155 return MLX5_TXCMP_CODE_EXIT; 3156 return MLX5_TXCMP_CODE_SINGLE; 3157 } 3158 if (ret != MLX5_TXCMP_CODE_EMPW) { 3159 MLX5_ASSERT(false); 3160 part -= room; 3161 mlx5_tx_idone_empw(txq, loc, part, 3162 slen, wqem, olx); 3163 return MLX5_TXCMP_CODE_ERROR; 3164 } 3165 /* Check if we have minimal room left. */ 3166 nlim--; 3167 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 3168 break; 3169 /* 3170 * Check whether packet parameters coincide 3171 * within assumed eMPW batch: 3172 * - check sum settings 3173 * - metadata value 3174 * - software parser settings 3175 * - packets length (legacy MPW only) 3176 * - scheduling is not required 3177 */ 3178 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 3179 loc, dlen, olx)) 3180 break; 3181 /* Packet attributes match, continue the same eMPW. */ 3182 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3183 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3184 } 3185 /* 3186 * We get here to close an existing eMPW 3187 * session and start the new one. 3188 */ 3189 MLX5_ASSERT(pkts_n); 3190 part -= room; 3191 if (unlikely(!part)) 3192 return MLX5_TXCMP_CODE_EXIT; 3193 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 3194 if (unlikely(!loc->elts_free || 3195 !loc->wqe_free)) 3196 return MLX5_TXCMP_CODE_EXIT; 3197 /* Continue the loop with new eMPW session. */ 3198 } 3199 MLX5_ASSERT(false); 3200 } 3201 3202 /** 3203 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3204 * Data inlining and VLAN insertion are supported. 3205 */ 3206 static __rte_always_inline enum mlx5_txcmp_code 3207 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3208 struct rte_mbuf **__rte_restrict pkts, 3209 unsigned int pkts_n, 3210 struct mlx5_txq_local *__rte_restrict loc, 3211 unsigned int olx) 3212 { 3213 /* 3214 * Subroutine is the part of mlx5_tx_burst_single() 3215 * and sends single-segment packet with SEND opcode. 3216 */ 3217 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3218 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3219 pkts += loc->pkts_sent + 1; 3220 pkts_n -= loc->pkts_sent; 3221 for (;;) { 3222 struct mlx5_wqe *__rte_restrict wqe; 3223 enum mlx5_txcmp_code ret; 3224 3225 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3226 MLX5_ASSERT(loc->elts_free); 3227 if (MLX5_TXOFF_CONFIG(TXPP)) { 3228 enum mlx5_txcmp_code wret; 3229 3230 /* Generate WAIT for scheduling if requested. */ 3231 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 3232 if (wret == MLX5_TXCMP_CODE_EXIT) 3233 return MLX5_TXCMP_CODE_EXIT; 3234 if (wret == MLX5_TXCMP_CODE_ERROR) 3235 return MLX5_TXCMP_CODE_ERROR; 3236 } 3237 if (MLX5_TXOFF_CONFIG(INLINE)) { 3238 unsigned int inlen, vlan = 0; 3239 3240 inlen = rte_pktmbuf_data_len(loc->mbuf); 3241 if (MLX5_TXOFF_CONFIG(VLAN) && 3242 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3243 vlan = sizeof(struct rte_vlan_hdr); 3244 inlen += vlan; 3245 } 3246 /* 3247 * If inlining is enabled at configuration time 3248 * the limit must be not less than minimal size. 3249 * Otherwise we would do extra check for data 3250 * size to avoid crashes due to length overflow. 3251 */ 3252 MLX5_ASSERT(txq->inlen_send >= 3253 MLX5_ESEG_MIN_INLINE_SIZE); 3254 if (inlen <= txq->inlen_send) { 3255 unsigned int seg_n, wqe_n; 3256 3257 rte_prefetch0(rte_pktmbuf_mtod 3258 (loc->mbuf, uint8_t *)); 3259 /* Check against minimal length. */ 3260 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3261 return MLX5_TXCMP_CODE_ERROR; 3262 if (loc->mbuf->ol_flags & 3263 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3264 /* 3265 * The hint flag not to inline packet 3266 * data is set. Check whether we can 3267 * follow the hint. 3268 */ 3269 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3270 txq->inlen_mode) || 3271 (MLX5_TXOFF_CONFIG(MPW) && 3272 txq->inlen_mode)) { 3273 if (inlen <= txq->inlen_send) 3274 goto single_inline; 3275 /* 3276 * The hardware requires the 3277 * minimal inline data header. 3278 */ 3279 goto single_min_inline; 3280 } 3281 if (MLX5_TXOFF_CONFIG(VLAN) && 3282 vlan && !txq->vlan_en) { 3283 /* 3284 * We must insert VLAN tag 3285 * by software means. 3286 */ 3287 goto single_part_inline; 3288 } 3289 goto single_no_inline; 3290 } 3291 single_inline: 3292 /* 3293 * Completely inlined packet data WQE: 3294 * - Control Segment, SEND opcode 3295 * - Ethernet Segment, no VLAN insertion 3296 * - Data inlined, VLAN optionally inserted 3297 * - Alignment to MLX5_WSEG_SIZE 3298 * Have to estimate amount of WQEBBs 3299 */ 3300 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3301 MLX5_ESEG_MIN_INLINE_SIZE + 3302 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3303 /* Check if there are enough WQEBBs. */ 3304 wqe_n = (seg_n + 3) / 4; 3305 if (wqe_n > loc->wqe_free) 3306 return MLX5_TXCMP_CODE_EXIT; 3307 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3308 loc->wqe_last = wqe; 3309 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3310 MLX5_OPCODE_SEND, olx); 3311 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3312 mlx5_tx_eseg_data(txq, loc, wqe, 3313 vlan, inlen, 0, olx); 3314 txq->wqe_ci += wqe_n; 3315 loc->wqe_free -= wqe_n; 3316 /* 3317 * Packet data are completely inlined, 3318 * free the packet immediately. 3319 */ 3320 rte_pktmbuf_free_seg(loc->mbuf); 3321 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3322 MLX5_TXOFF_CONFIG(MPW)) && 3323 txq->inlen_mode) { 3324 /* 3325 * If minimal inlining is requested the eMPW 3326 * feature should be disabled due to data is 3327 * inlined into Ethernet Segment, which can 3328 * not contain inlined data for eMPW due to 3329 * segment shared for all packets. 3330 */ 3331 struct mlx5_wqe_dseg *__rte_restrict dseg; 3332 unsigned int ds; 3333 uint8_t *dptr; 3334 3335 /* 3336 * The inline-mode settings require 3337 * to inline the specified amount of 3338 * data bytes to the Ethernet Segment. 3339 * We should check the free space in 3340 * WQE ring buffer to inline partially. 3341 */ 3342 single_min_inline: 3343 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3344 MLX5_ASSERT(inlen > txq->inlen_mode); 3345 MLX5_ASSERT(txq->inlen_mode >= 3346 MLX5_ESEG_MIN_INLINE_SIZE); 3347 /* 3348 * Check whether there are enough free WQEBBs: 3349 * - Control Segment 3350 * - Ethernet Segment 3351 * - First Segment of inlined Ethernet data 3352 * - ... data continued ... 3353 * - Finishing Data Segment of pointer type 3354 */ 3355 ds = (MLX5_WQE_CSEG_SIZE + 3356 MLX5_WQE_ESEG_SIZE + 3357 MLX5_WQE_DSEG_SIZE + 3358 txq->inlen_mode - 3359 MLX5_ESEG_MIN_INLINE_SIZE + 3360 MLX5_WQE_DSEG_SIZE + 3361 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3362 if (loc->wqe_free < ((ds + 3) / 4)) 3363 return MLX5_TXCMP_CODE_EXIT; 3364 /* 3365 * Build the ordinary SEND WQE: 3366 * - Control Segment 3367 * - Ethernet Segment, inline inlen_mode bytes 3368 * - Data Segment of pointer type 3369 */ 3370 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3371 loc->wqe_last = wqe; 3372 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3373 MLX5_OPCODE_SEND, olx); 3374 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3375 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3376 txq->inlen_mode, 3377 0, olx); 3378 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3379 txq->inlen_mode - vlan; 3380 inlen -= txq->inlen_mode; 3381 mlx5_tx_dseg_ptr(txq, loc, dseg, 3382 dptr, inlen, olx); 3383 /* 3384 * WQE is built, update the loop parameters 3385 * and got to the next packet. 3386 */ 3387 txq->wqe_ci += (ds + 3) / 4; 3388 loc->wqe_free -= (ds + 3) / 4; 3389 /* We have to store mbuf in elts.*/ 3390 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3391 txq->elts[txq->elts_head++ & txq->elts_m] = 3392 loc->mbuf; 3393 --loc->elts_free; 3394 } else { 3395 uint8_t *dptr; 3396 unsigned int dlen; 3397 3398 /* 3399 * Partially inlined packet data WQE, we have 3400 * some space in title WQEBB, we can fill it 3401 * with some packet data. It takes one WQEBB, 3402 * it is available, no extra space check: 3403 * - Control Segment, SEND opcode 3404 * - Ethernet Segment, no VLAN insertion 3405 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3406 * - Data Segment, pointer type 3407 * 3408 * We also get here if VLAN insertion is not 3409 * supported by HW, the inline is enabled. 3410 */ 3411 single_part_inline: 3412 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3413 loc->wqe_last = wqe; 3414 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3415 MLX5_OPCODE_SEND, olx); 3416 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3417 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3418 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3419 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3420 /* 3421 * The length check is performed above, by 3422 * comparing with txq->inlen_send. We should 3423 * not get overflow here. 3424 */ 3425 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3426 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3427 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3428 dptr, dlen, olx); 3429 ++txq->wqe_ci; 3430 --loc->wqe_free; 3431 /* We have to store mbuf in elts.*/ 3432 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3433 txq->elts[txq->elts_head++ & txq->elts_m] = 3434 loc->mbuf; 3435 --loc->elts_free; 3436 } 3437 #ifdef MLX5_PMD_SOFT_COUNTERS 3438 /* Update sent data bytes counter. */ 3439 txq->stats.obytes += vlan + 3440 rte_pktmbuf_data_len(loc->mbuf); 3441 #endif 3442 } else { 3443 /* 3444 * No inline at all, it means the CPU cycles saving 3445 * is prioritized at configuration, we should not 3446 * copy any packet data to WQE. 3447 * 3448 * SEND WQE, one WQEBB: 3449 * - Control Segment, SEND opcode 3450 * - Ethernet Segment, optional VLAN, no inline 3451 * - Data Segment, pointer type 3452 */ 3453 single_no_inline: 3454 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3455 loc->wqe_last = wqe; 3456 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3457 MLX5_OPCODE_SEND, olx); 3458 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3459 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3460 mlx5_tx_dseg_ptr 3461 (txq, loc, &wqe->dseg[0], 3462 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3463 rte_pktmbuf_data_len(loc->mbuf), olx); 3464 ++txq->wqe_ci; 3465 --loc->wqe_free; 3466 /* 3467 * We should not store mbuf pointer in elts 3468 * if no inlining is configured, this is done 3469 * by calling routine in a batch copy. 3470 */ 3471 if (MLX5_TXOFF_CONFIG(INLINE)) 3472 txq->elts[txq->elts_head++ & txq->elts_m] = 3473 loc->mbuf; 3474 --loc->elts_free; 3475 #ifdef MLX5_PMD_SOFT_COUNTERS 3476 /* Update sent data bytes counter. */ 3477 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3478 if (MLX5_TXOFF_CONFIG(VLAN) && 3479 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3480 txq->stats.obytes += 3481 sizeof(struct rte_vlan_hdr); 3482 #endif 3483 } 3484 ++loc->pkts_sent; 3485 --pkts_n; 3486 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3487 return MLX5_TXCMP_CODE_EXIT; 3488 loc->mbuf = *pkts++; 3489 if (pkts_n > 1) 3490 rte_prefetch0(*pkts); 3491 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3492 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3493 return ret; 3494 } 3495 MLX5_ASSERT(false); 3496 } 3497 3498 static __rte_always_inline enum mlx5_txcmp_code 3499 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3500 struct rte_mbuf **__rte_restrict pkts, 3501 unsigned int pkts_n, 3502 struct mlx5_txq_local *__rte_restrict loc, 3503 unsigned int olx) 3504 { 3505 enum mlx5_txcmp_code ret; 3506 3507 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3508 if (ret == MLX5_TXCMP_CODE_SINGLE) 3509 goto ordinary_send; 3510 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3511 for (;;) { 3512 /* Optimize for inline/no inline eMPW send. */ 3513 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3514 mlx5_tx_burst_empw_inline 3515 (txq, pkts, pkts_n, loc, olx) : 3516 mlx5_tx_burst_empw_simple 3517 (txq, pkts, pkts_n, loc, olx); 3518 if (ret != MLX5_TXCMP_CODE_SINGLE) 3519 return ret; 3520 /* The resources to send one packet should remain. */ 3521 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3522 ordinary_send: 3523 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3524 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3525 if (ret != MLX5_TXCMP_CODE_EMPW) 3526 return ret; 3527 /* The resources to send one packet should remain. */ 3528 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3529 } 3530 } 3531 3532 /** 3533 * DPDK Tx callback template. This is configured template used to generate 3534 * routines optimized for specified offload setup. 3535 * One of this generated functions is chosen at SQ configuration time. 3536 * 3537 * @param txq 3538 * Generic pointer to TX queue structure. 3539 * @param[in] pkts 3540 * Packets to transmit. 3541 * @param pkts_n 3542 * Number of packets in array. 3543 * @param olx 3544 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3545 * values. Should be static to take compile time static configuration 3546 * advantages. 3547 * 3548 * @return 3549 * Number of packets successfully transmitted (<= pkts_n). 3550 */ 3551 static __rte_always_inline uint16_t 3552 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3553 struct rte_mbuf **__rte_restrict pkts, 3554 uint16_t pkts_n, 3555 unsigned int olx) 3556 { 3557 struct mlx5_txq_local loc; 3558 enum mlx5_txcmp_code ret; 3559 unsigned int part; 3560 3561 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3562 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3563 if (unlikely(!pkts_n)) 3564 return 0; 3565 if (MLX5_TXOFF_CONFIG(INLINE)) 3566 loc.mbuf_free = 0; 3567 loc.pkts_sent = 0; 3568 loc.pkts_copy = 0; 3569 loc.wqe_last = NULL; 3570 3571 send_loop: 3572 loc.pkts_loop = loc.pkts_sent; 3573 /* 3574 * Check if there are some CQEs, if any: 3575 * - process an encountered errors 3576 * - process the completed WQEs 3577 * - free related mbufs 3578 * - doorbell the NIC about processed CQEs 3579 */ 3580 rte_prefetch0(*(pkts + loc.pkts_sent)); 3581 mlx5_tx_handle_completion(txq, olx); 3582 /* 3583 * Calculate the number of available resources - elts and WQEs. 3584 * There are two possible different scenarios: 3585 * - no data inlining into WQEs, one WQEBB may contains up to 3586 * four packets, in this case elts become scarce resource 3587 * - data inlining into WQEs, one packet may require multiple 3588 * WQEBBs, the WQEs become the limiting factor. 3589 */ 3590 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3591 loc.elts_free = txq->elts_s - 3592 (uint16_t)(txq->elts_head - txq->elts_tail); 3593 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3594 loc.wqe_free = txq->wqe_s - 3595 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3596 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3597 goto burst_exit; 3598 for (;;) { 3599 /* 3600 * Fetch the packet from array. Usually this is the first 3601 * packet in series of multi/single segment packets. 3602 */ 3603 loc.mbuf = *(pkts + loc.pkts_sent); 3604 /* Dedicated branch for multi-segment packets. */ 3605 if (MLX5_TXOFF_CONFIG(MULTI) && 3606 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3607 /* 3608 * Multi-segment packet encountered. 3609 * Hardware is able to process it only 3610 * with SEND/TSO opcodes, one packet 3611 * per WQE, do it in dedicated routine. 3612 */ 3613 enter_send_multi: 3614 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3615 part = loc.pkts_sent - loc.pkts_copy; 3616 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3617 /* 3618 * There are some single-segment mbufs not 3619 * stored in elts. The mbufs must be in the 3620 * same order as WQEs, so we must copy the 3621 * mbufs to elts here, before the coming 3622 * multi-segment packet mbufs is appended. 3623 */ 3624 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3625 part, olx); 3626 loc.pkts_copy = loc.pkts_sent; 3627 } 3628 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3629 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3630 if (!MLX5_TXOFF_CONFIG(INLINE)) 3631 loc.pkts_copy = loc.pkts_sent; 3632 /* 3633 * These returned code checks are supposed 3634 * to be optimized out due to routine inlining. 3635 */ 3636 if (ret == MLX5_TXCMP_CODE_EXIT) { 3637 /* 3638 * The routine returns this code when 3639 * all packets are sent or there is no 3640 * enough resources to complete request. 3641 */ 3642 break; 3643 } 3644 if (ret == MLX5_TXCMP_CODE_ERROR) { 3645 /* 3646 * The routine returns this code when some error 3647 * in the incoming packets format occurred. 3648 */ 3649 txq->stats.oerrors++; 3650 break; 3651 } 3652 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3653 /* 3654 * The single-segment packet was encountered 3655 * in the array, try to send it with the 3656 * best optimized way, possible engaging eMPW. 3657 */ 3658 goto enter_send_single; 3659 } 3660 if (MLX5_TXOFF_CONFIG(TSO) && 3661 ret == MLX5_TXCMP_CODE_TSO) { 3662 /* 3663 * The single-segment TSO packet was 3664 * encountered in the array. 3665 */ 3666 goto enter_send_tso; 3667 } 3668 /* We must not get here. Something is going wrong. */ 3669 MLX5_ASSERT(false); 3670 txq->stats.oerrors++; 3671 break; 3672 } 3673 /* Dedicated branch for single-segment TSO packets. */ 3674 if (MLX5_TXOFF_CONFIG(TSO) && 3675 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3676 /* 3677 * TSO might require special way for inlining 3678 * (dedicated parameters) and is sent with 3679 * MLX5_OPCODE_TSO opcode only, provide this 3680 * in dedicated branch. 3681 */ 3682 enter_send_tso: 3683 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3684 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3685 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3686 /* 3687 * These returned code checks are supposed 3688 * to be optimized out due to routine inlining. 3689 */ 3690 if (ret == MLX5_TXCMP_CODE_EXIT) 3691 break; 3692 if (ret == MLX5_TXCMP_CODE_ERROR) { 3693 txq->stats.oerrors++; 3694 break; 3695 } 3696 if (ret == MLX5_TXCMP_CODE_SINGLE) 3697 goto enter_send_single; 3698 if (MLX5_TXOFF_CONFIG(MULTI) && 3699 ret == MLX5_TXCMP_CODE_MULTI) { 3700 /* 3701 * The multi-segment packet was 3702 * encountered in the array. 3703 */ 3704 goto enter_send_multi; 3705 } 3706 /* We must not get here. Something is going wrong. */ 3707 MLX5_ASSERT(false); 3708 txq->stats.oerrors++; 3709 break; 3710 } 3711 /* 3712 * The dedicated branch for the single-segment packets 3713 * without TSO. Often these ones can be sent using 3714 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3715 * The routine builds the WQEs till it encounters 3716 * the TSO or multi-segment packet (in case if these 3717 * offloads are requested at SQ configuration time). 3718 */ 3719 enter_send_single: 3720 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3721 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3722 /* 3723 * These returned code checks are supposed 3724 * to be optimized out due to routine inlining. 3725 */ 3726 if (ret == MLX5_TXCMP_CODE_EXIT) 3727 break; 3728 if (ret == MLX5_TXCMP_CODE_ERROR) { 3729 txq->stats.oerrors++; 3730 break; 3731 } 3732 if (MLX5_TXOFF_CONFIG(MULTI) && 3733 ret == MLX5_TXCMP_CODE_MULTI) { 3734 /* 3735 * The multi-segment packet was 3736 * encountered in the array. 3737 */ 3738 goto enter_send_multi; 3739 } 3740 if (MLX5_TXOFF_CONFIG(TSO) && 3741 ret == MLX5_TXCMP_CODE_TSO) { 3742 /* 3743 * The single-segment TSO packet was 3744 * encountered in the array. 3745 */ 3746 goto enter_send_tso; 3747 } 3748 /* We must not get here. Something is going wrong. */ 3749 MLX5_ASSERT(false); 3750 txq->stats.oerrors++; 3751 break; 3752 } 3753 /* 3754 * Main Tx loop is completed, do the rest: 3755 * - set completion request if thresholds are reached 3756 * - doorbell the hardware 3757 * - copy the rest of mbufs to elts (if any) 3758 */ 3759 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3760 loc.pkts_sent >= loc.pkts_copy); 3761 /* Take a shortcut if nothing is sent. */ 3762 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3763 goto burst_exit; 3764 /* Request CQE generation if limits are reached. */ 3765 if (MLX5_TXOFF_CONFIG(TXPP) && __rte_trace_point_fp_is_enabled()) 3766 mlx5_tx_request_completion_trace(txq, &loc, olx); 3767 else 3768 mlx5_tx_request_completion(txq, &loc, olx); 3769 /* 3770 * Ring QP doorbell immediately after WQE building completion 3771 * to improve latencies. The pure software related data treatment 3772 * can be completed after doorbell. Tx CQEs for this SQ are 3773 * processed in this thread only by the polling. 3774 * 3775 * The rdma core library can map doorbell register in two ways, 3776 * depending on the environment variable "MLX5_SHUT_UP_BF": 3777 * 3778 * - as regular cached memory, the variable is either missing or 3779 * set to zero. This type of mapping may cause the significant 3780 * doorbell register writing latency and requires explicit memory 3781 * write barrier to mitigate this issue and prevent write combining. 3782 * 3783 * - as non-cached memory, the variable is present and set to not "0" 3784 * value. This type of mapping may cause performance impact under 3785 * heavy loading conditions but the explicit write memory barrier is 3786 * not required and it may improve core performance. 3787 * 3788 * - the legacy behaviour (prior 19.08 release) was to use some 3789 * heuristics to decide whether write memory barrier should 3790 * be performed. This behavior is supported with specifying 3791 * tx_db_nc=2, write barrier is skipped if application provides 3792 * the full recommended burst of packets, it supposes the next 3793 * packets are coming and the write barrier will be issued on 3794 * the next burst (after descriptor writing, at least). 3795 */ 3796 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3797 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3798 txq->qp_db, !txq->db_nc && 3799 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3800 /* Not all of the mbufs may be stored into elts yet. */ 3801 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3802 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3803 /* 3804 * There are some single-segment mbufs not stored in elts. 3805 * It can be only if the last packet was single-segment. 3806 * The copying is gathered into one place due to it is 3807 * a good opportunity to optimize that with SIMD. 3808 * Unfortunately if inlining is enabled the gaps in pointer 3809 * array may happen due to early freeing of the inlined mbufs. 3810 */ 3811 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3812 loc.pkts_copy = loc.pkts_sent; 3813 } 3814 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3815 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3816 if (pkts_n > loc.pkts_sent) { 3817 /* 3818 * If burst size is large there might be no enough CQE 3819 * fetched from completion queue and no enough resources 3820 * freed to send all the packets. 3821 */ 3822 goto send_loop; 3823 } 3824 burst_exit: 3825 #ifdef MLX5_PMD_SOFT_COUNTERS 3826 /* Increment sent packets counter. */ 3827 txq->stats.opackets += loc.pkts_sent; 3828 #endif 3829 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3830 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3831 /* Trace productive bursts only. */ 3832 if (__rte_trace_point_fp_is_enabled() && loc.pkts_sent) 3833 rte_pmd_mlx5_trace_tx_exit(mlx5_read_pcibar_clock_from_txq(txq), 3834 loc.pkts_sent, pkts_n); 3835 return loc.pkts_sent; 3836 } 3837 3838 /** 3839 * Check whether given TxQ is external. 3840 * 3841 * @param dev 3842 * Pointer to Ethernet device. 3843 * @param queue_idx 3844 * Tx queue index. 3845 * 3846 * @return 3847 * True if is external TxQ, otherwise false. 3848 */ 3849 static __rte_always_inline bool 3850 mlx5_is_external_txq(struct rte_eth_dev *dev, uint16_t queue_idx) 3851 { 3852 struct mlx5_priv *priv = dev->data->dev_private; 3853 struct mlx5_external_q *txq; 3854 3855 if (!priv->ext_txqs || queue_idx < MLX5_EXTERNAL_TX_QUEUE_ID_MIN) 3856 return false; 3857 txq = &priv->ext_txqs[queue_idx - MLX5_EXTERNAL_TX_QUEUE_ID_MIN]; 3858 return !!rte_atomic_load_explicit(&txq->refcnt, rte_memory_order_relaxed); 3859 } 3860 3861 #endif /* RTE_PMD_MLX5_TX_H_ */ 3862