1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 #include <rte_trace_point.h> 17 18 #include <mlx5_common.h> 19 #include <mlx5_common_mr.h> 20 21 #include "mlx5.h" 22 #include "mlx5_autoconf.h" 23 #include "mlx5_rxtx.h" 24 #include "mlx5_trace.h" 25 26 /* TX burst subroutines return codes. */ 27 enum mlx5_txcmp_code { 28 MLX5_TXCMP_CODE_EXIT = 0, 29 MLX5_TXCMP_CODE_ERROR, 30 MLX5_TXCMP_CODE_SINGLE, 31 MLX5_TXCMP_CODE_MULTI, 32 MLX5_TXCMP_CODE_TSO, 33 MLX5_TXCMP_CODE_EMPW, 34 }; 35 36 /* 37 * These defines are used to configure Tx burst routine option set supported 38 * at compile time. The not specified options are optimized out due to if 39 * conditions can be explicitly calculated at compile time. 40 * The offloads with bigger runtime check (require more CPU cycles toskip) 41 * overhead should have the bigger index - this is needed to select the better 42 * matching routine function if no exact match and some offloads are not 43 * actually requested. 44 */ 45 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 46 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 47 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 48 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 49 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 50 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 51 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 52 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 53 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 54 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 55 56 /* The most common offloads groups. */ 57 #define MLX5_TXOFF_CONFIG_NONE 0 58 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 59 MLX5_TXOFF_CONFIG_TSO | \ 60 MLX5_TXOFF_CONFIG_SWP | \ 61 MLX5_TXOFF_CONFIG_CSUM | \ 62 MLX5_TXOFF_CONFIG_INLINE | \ 63 MLX5_TXOFF_CONFIG_VLAN | \ 64 MLX5_TXOFF_CONFIG_METADATA) 65 66 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 67 68 #define MLX5_TXOFF_PRE_DECL(func) \ 69 uint16_t mlx5_tx_burst_##func(void *txq, \ 70 struct rte_mbuf **pkts, \ 71 uint16_t pkts_n) 72 73 #define MLX5_TXOFF_DECL(func, olx) \ 74 uint16_t mlx5_tx_burst_##func(void *txq, \ 75 struct rte_mbuf **pkts, \ 76 uint16_t pkts_n) \ 77 { \ 78 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 79 pkts, pkts_n, (olx)); \ 80 } 81 82 /* Mbuf dynamic flag offset for inline. */ 83 extern uint64_t rte_net_mlx5_dynf_inline_mask; 84 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 85 86 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 87 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 88 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 89 90 struct mlx5_txq_stats { 91 #ifdef MLX5_PMD_SOFT_COUNTERS 92 uint64_t opackets; /**< Total of successfully sent packets. */ 93 uint64_t obytes; /**< Total of successfully sent bytes. */ 94 #endif 95 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 96 }; 97 98 /* TX queue send local data. */ 99 __extension__ 100 struct mlx5_txq_local { 101 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 102 struct rte_mbuf *mbuf; /* first mbuf to process. */ 103 uint16_t pkts_copy; /* packets copied to elts. */ 104 uint16_t pkts_sent; /* packets sent. */ 105 uint16_t pkts_loop; /* packets sent on loop entry. */ 106 uint16_t elts_free; /* available elts remain. */ 107 uint16_t wqe_free; /* available wqe remain. */ 108 uint16_t mbuf_off; /* data offset in current mbuf. */ 109 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 110 uint16_t mbuf_free; /* number of inline mbufs to free. */ 111 }; 112 113 /* TX queue descriptor. */ 114 __extension__ 115 struct mlx5_txq_data { 116 uint16_t elts_head; /* Current counter in (*elts)[]. */ 117 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 118 uint16_t elts_comp; /* elts index since last completion request. */ 119 uint16_t elts_s; /* Number of mbuf elements. */ 120 uint16_t elts_m; /* Mask for mbuf elements indices. */ 121 /* Fields related to elts mbuf storage. */ 122 uint16_t wqe_ci; /* Consumer index for work queue. */ 123 uint16_t wqe_pi; /* Producer index for work queue. */ 124 uint16_t wqe_s; /* Number of WQ elements. */ 125 uint16_t wqe_m; /* Mask Number for WQ elements. */ 126 uint16_t wqe_comp; /* WQE index since last completion request. */ 127 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 128 /* WQ related fields. */ 129 uint16_t cq_ci; /* Consumer index for completion queue. */ 130 uint16_t cq_pi; /* Production index for completion queue. */ 131 uint16_t cqe_s; /* Number of CQ elements. */ 132 uint16_t cqe_m; /* Mask for CQ indices. */ 133 /* CQ related fields. */ 134 uint16_t elts_n:4; /* elts[] length (in log2). */ 135 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 136 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 137 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 138 uint16_t tunnel_en:1; 139 /* When set TX offload for tunneled packets are supported. */ 140 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 141 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 142 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 143 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 144 uint16_t rt_timestamp:1; /* Realtime timestamp format. */ 145 uint16_t wait_on_time:1; /* WQE with timestamp is supported. */ 146 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 147 uint16_t inlen_send; /* Ordinary send data inline size. */ 148 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 149 uint16_t inlen_mode; /* Minimal data length to inline. */ 150 uint8_t tx_aggr_affinity; /* TxQ affinity configuration. */ 151 uint32_t qp_num_8s; /* QP number shifted by 8. */ 152 uint64_t offloads; /* Offloads for Tx Queue. */ 153 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 154 struct mlx5_wqe *wqes; /* Work queue. */ 155 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 156 #ifdef RTE_LIBRTE_MLX5_DEBUG 157 uint32_t *fcqs; /* Free completion queue (debug extended). */ 158 #else 159 uint16_t *fcqs; /* Free completion queue. */ 160 #endif 161 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 162 volatile uint32_t *qp_db; /* Work queue doorbell. */ 163 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 164 uint16_t port_id; /* Port ID of device. */ 165 uint16_t idx; /* Queue index. */ 166 uint64_t rt_timemask; /* Scheduling timestamp mask. */ 167 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 168 uint64_t ts_last; /* Last scheduled timestamp. */ 169 int32_t ts_offset; /* Timestamp field dynamic offset. */ 170 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 171 struct mlx5_txq_stats stats; /* TX queue counters. */ 172 struct mlx5_txq_stats stats_reset; /* stats on last reset. */ 173 struct mlx5_uar_data uar_data; 174 struct rte_mbuf *elts[]; 175 /* Storage for queued packets, must be the last field. */ 176 } __rte_cache_aligned; 177 178 /* TX queue control descriptor. */ 179 __extension__ 180 struct mlx5_txq_ctrl { 181 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 182 uint32_t refcnt; /* Reference counter. */ 183 unsigned int socket; /* CPU socket ID for allocations. */ 184 bool is_hairpin; /* Whether TxQ type is Hairpin. */ 185 unsigned int max_inline_data; /* Max inline data. */ 186 unsigned int max_tso_header; /* Max TSO header size. */ 187 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 188 struct mlx5_priv *priv; /* Back pointer to private data. */ 189 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 190 uint16_t dump_file_n; /* Number of dump files. */ 191 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 192 uint32_t hairpin_status; /* Hairpin binding status. */ 193 struct mlx5_txq_data txq; /* Data path structure. */ 194 /* Must be the last field in the structure, contains elts[]. */ 195 }; 196 197 /* mlx5_txq.c */ 198 199 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 200 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 201 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 202 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 203 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 204 unsigned int socket, const struct rte_eth_txconf *conf); 205 int mlx5_tx_hairpin_queue_setup 206 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 207 const struct rte_eth_hairpin_conf *hairpin_conf); 208 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 209 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 210 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 211 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 212 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 213 uint16_t desc, unsigned int socket, 214 const struct rte_eth_txconf *conf); 215 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 216 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 217 const struct rte_eth_hairpin_conf *hairpin_conf); 218 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 219 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 220 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 221 int mlx5_txq_verify(struct rte_eth_dev *dev); 222 int mlx5_txq_get_sqn(struct mlx5_txq_ctrl *txq); 223 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 224 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 225 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 226 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 227 int mlx5_count_aggr_ports(struct rte_eth_dev *dev); 228 int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id, 229 uint8_t affinity); 230 231 /* mlx5_tx.c */ 232 233 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 234 unsigned int olx __rte_unused); 235 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 236 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 237 struct rte_eth_txq_info *qinfo); 238 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 239 struct rte_eth_burst_mode *mode); 240 241 /* mlx5_tx_empw.c */ 242 243 MLX5_TXOFF_PRE_DECL(full_empw); 244 MLX5_TXOFF_PRE_DECL(none_empw); 245 MLX5_TXOFF_PRE_DECL(md_empw); 246 MLX5_TXOFF_PRE_DECL(mt_empw); 247 MLX5_TXOFF_PRE_DECL(mtsc_empw); 248 MLX5_TXOFF_PRE_DECL(mti_empw); 249 MLX5_TXOFF_PRE_DECL(mtv_empw); 250 MLX5_TXOFF_PRE_DECL(mtiv_empw); 251 MLX5_TXOFF_PRE_DECL(sc_empw); 252 MLX5_TXOFF_PRE_DECL(sci_empw); 253 MLX5_TXOFF_PRE_DECL(scv_empw); 254 MLX5_TXOFF_PRE_DECL(sciv_empw); 255 MLX5_TXOFF_PRE_DECL(i_empw); 256 MLX5_TXOFF_PRE_DECL(v_empw); 257 MLX5_TXOFF_PRE_DECL(iv_empw); 258 259 /* mlx5_tx_nompw.c */ 260 261 MLX5_TXOFF_PRE_DECL(full); 262 MLX5_TXOFF_PRE_DECL(none); 263 MLX5_TXOFF_PRE_DECL(md); 264 MLX5_TXOFF_PRE_DECL(mt); 265 MLX5_TXOFF_PRE_DECL(mtsc); 266 MLX5_TXOFF_PRE_DECL(mti); 267 MLX5_TXOFF_PRE_DECL(mtv); 268 MLX5_TXOFF_PRE_DECL(mtiv); 269 MLX5_TXOFF_PRE_DECL(sc); 270 MLX5_TXOFF_PRE_DECL(sci); 271 MLX5_TXOFF_PRE_DECL(scv); 272 MLX5_TXOFF_PRE_DECL(sciv); 273 MLX5_TXOFF_PRE_DECL(i); 274 MLX5_TXOFF_PRE_DECL(v); 275 MLX5_TXOFF_PRE_DECL(iv); 276 277 /* mlx5_tx_txpp.c */ 278 279 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 280 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 281 MLX5_TXOFF_PRE_DECL(full_ts); 282 MLX5_TXOFF_PRE_DECL(full_ts_noi); 283 MLX5_TXOFF_PRE_DECL(none_ts); 284 MLX5_TXOFF_PRE_DECL(mdi_ts); 285 MLX5_TXOFF_PRE_DECL(mti_ts); 286 MLX5_TXOFF_PRE_DECL(mtiv_ts); 287 288 /* mlx5_tx_mpw.c */ 289 290 MLX5_TXOFF_PRE_DECL(none_mpw); 291 MLX5_TXOFF_PRE_DECL(mci_mpw); 292 MLX5_TXOFF_PRE_DECL(mc_mpw); 293 MLX5_TXOFF_PRE_DECL(i_mpw); 294 295 static __rte_always_inline struct mlx5_uar_data * 296 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 297 { 298 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 299 } 300 301 /** 302 * Ring TX queue doorbell and flush the update by write memory barrier. 303 * 304 * @param txq 305 * Pointer to TX queue structure. 306 * @param wqe 307 * Pointer to the last WQE posted in the NIC. 308 */ 309 static __rte_always_inline void 310 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 311 { 312 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 313 txq->wqe_ci, txq->qp_db, 1); 314 } 315 316 /** 317 * Convert timestamp from mbuf format to linear counter 318 * of Clock Queue completions (24 bits). 319 * 320 * @param sh 321 * Pointer to the device shared context to fetch Tx 322 * packet pacing timestamp and parameters. 323 * @param ts 324 * Timestamp from mbuf to convert. 325 * @return 326 * positive or zero value - completion ID to wait. 327 * negative value - conversion error. 328 */ 329 static __rte_always_inline int32_t 330 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 331 { 332 uint64_t ts, ci; 333 uint32_t tick; 334 335 do { 336 /* 337 * Read atomically two uint64_t fields and compare lsb bits. 338 * It there is no match - the timestamp was updated in 339 * the service thread, data should be re-read. 340 */ 341 rte_compiler_barrier(); 342 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 343 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 344 rte_compiler_barrier(); 345 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 346 break; 347 } while (true); 348 /* Perform the skew correction, positive value to send earlier. */ 349 mts -= sh->txpp.skew; 350 mts -= ts; 351 if (unlikely(mts >= UINT64_MAX / 2)) { 352 /* We have negative integer, mts is in the past. */ 353 __atomic_fetch_add(&sh->txpp.err_ts_past, 354 1, __ATOMIC_RELAXED); 355 return -1; 356 } 357 tick = sh->txpp.tick; 358 MLX5_ASSERT(tick); 359 /* Convert delta to completions, round up. */ 360 mts = (mts + tick - 1) / tick; 361 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 362 /* We have mts is too distant future. */ 363 __atomic_fetch_add(&sh->txpp.err_ts_future, 364 1, __ATOMIC_RELAXED); 365 return -1; 366 } 367 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 368 ci += mts; 369 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 370 return ci; 371 } 372 373 /** 374 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 375 * Flags must be preliminary initialized to zero. 376 * 377 * @param loc 378 * Pointer to burst routine local context. 379 * @param swp_flags 380 * Pointer to store Software Parser flags. 381 * @param olx 382 * Configured Tx offloads mask. It is fully defined at 383 * compile time and may be used for optimization. 384 * 385 * @return 386 * Software Parser offsets packed in dword. 387 * Software Parser flags are set by pointer. 388 */ 389 static __rte_always_inline uint32_t 390 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 391 uint8_t *swp_flags, 392 unsigned int olx) 393 { 394 uint64_t ol, tunnel; 395 unsigned int idx, off; 396 uint32_t set; 397 398 if (!MLX5_TXOFF_CONFIG(SWP)) 399 return 0; 400 ol = loc->mbuf->ol_flags; 401 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 402 /* 403 * Check whether Software Parser is required. 404 * Only customized tunnels may ask for. 405 */ 406 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 407 return 0; 408 /* 409 * The index should have: 410 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 411 * bit[4] = RTE_MBUF_F_TX_IPV6 412 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 413 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 414 */ 415 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 416 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 417 *swp_flags = mlx5_swp_types_table[idx]; 418 /* 419 * Set offsets for SW parser. Since ConnectX-5, SW parser just 420 * complements HW parser. SW parser starts to engage only if HW parser 421 * can't reach a header. For the older devices, HW parser will not kick 422 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 423 * should be set regardless of HW offload. 424 */ 425 off = loc->mbuf->outer_l2_len; 426 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 427 off += sizeof(struct rte_vlan_hdr); 428 set = (off >> 1) << 8; /* Outer L3 offset. */ 429 off += loc->mbuf->outer_l3_len; 430 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 431 set |= off >> 1; /* Outer L4 offset. */ 432 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 433 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 434 off += loc->mbuf->l2_len; 435 set |= (off >> 1) << 24; /* Inner L3 offset. */ 436 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 437 csum == RTE_MBUF_F_TX_UDP_CKSUM || 438 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 439 off += loc->mbuf->l3_len; 440 set |= (off >> 1) << 16; /* Inner L4 offset. */ 441 } 442 } 443 set = rte_cpu_to_le_32(set); 444 return set; 445 } 446 447 /** 448 * Convert the Checksum offloads to Verbs. 449 * 450 * @param buf 451 * Pointer to the mbuf. 452 * 453 * @return 454 * Converted checksum flags. 455 */ 456 static __rte_always_inline uint8_t 457 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 458 { 459 uint32_t idx; 460 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 461 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 462 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 463 464 /* 465 * The index should have: 466 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 467 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 468 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 469 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 470 * bit[9] = tunnel 471 */ 472 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 473 return mlx5_cksum_table[idx]; 474 } 475 476 /** 477 * Free the mbufs from the linear array of pointers. 478 * 479 * @param txq 480 * Pointer to Tx queue structure. 481 * @param pkts 482 * Pointer to array of packets to be free. 483 * @param pkts_n 484 * Number of packets to be freed. 485 * @param olx 486 * Configured Tx offloads mask. It is fully defined at 487 * compile time and may be used for optimization. 488 */ 489 static __rte_always_inline void 490 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 491 struct rte_mbuf **__rte_restrict pkts, 492 unsigned int pkts_n, 493 unsigned int olx __rte_unused) 494 { 495 struct rte_mempool *pool = NULL; 496 struct rte_mbuf **p_free = NULL; 497 struct rte_mbuf *mbuf; 498 unsigned int n_free = 0; 499 500 /* 501 * The implemented algorithm eliminates 502 * copying pointers to temporary array 503 * for rte_mempool_put_bulk() calls. 504 */ 505 MLX5_ASSERT(pkts); 506 MLX5_ASSERT(pkts_n); 507 /* 508 * Free mbufs directly to the pool in bulk 509 * if fast free offload is engaged 510 */ 511 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 512 mbuf = *pkts; 513 pool = mbuf->pool; 514 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 515 return; 516 } 517 for (;;) { 518 for (;;) { 519 /* 520 * Decrement mbuf reference counter, detach 521 * indirect and external buffers if needed. 522 */ 523 mbuf = rte_pktmbuf_prefree_seg(*pkts); 524 if (likely(mbuf != NULL)) { 525 MLX5_ASSERT(mbuf == *pkts); 526 if (likely(n_free != 0)) { 527 if (unlikely(pool != mbuf->pool)) 528 /* From different pool. */ 529 break; 530 } else { 531 /* Start new scan array. */ 532 pool = mbuf->pool; 533 p_free = pkts; 534 } 535 ++n_free; 536 ++pkts; 537 --pkts_n; 538 if (unlikely(pkts_n == 0)) { 539 mbuf = NULL; 540 break; 541 } 542 } else { 543 /* 544 * This happens if mbuf is still referenced. 545 * We can't put it back to the pool, skip. 546 */ 547 ++pkts; 548 --pkts_n; 549 if (unlikely(n_free != 0)) 550 /* There is some array to free.*/ 551 break; 552 if (unlikely(pkts_n == 0)) 553 /* Last mbuf, nothing to free. */ 554 return; 555 } 556 } 557 for (;;) { 558 /* 559 * This loop is implemented to avoid multiple 560 * inlining of rte_mempool_put_bulk(). 561 */ 562 MLX5_ASSERT(pool); 563 MLX5_ASSERT(p_free); 564 MLX5_ASSERT(n_free); 565 /* 566 * Free the array of pre-freed mbufs 567 * belonging to the same memory pool. 568 */ 569 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 570 if (unlikely(mbuf != NULL)) { 571 /* There is the request to start new scan. */ 572 pool = mbuf->pool; 573 p_free = pkts++; 574 n_free = 1; 575 --pkts_n; 576 if (likely(pkts_n != 0)) 577 break; 578 /* 579 * This is the last mbuf to be freed. 580 * Do one more loop iteration to complete. 581 * This is rare case of the last unique mbuf. 582 */ 583 mbuf = NULL; 584 continue; 585 } 586 if (likely(pkts_n == 0)) 587 return; 588 n_free = 0; 589 break; 590 } 591 } 592 } 593 594 /** 595 * No inline version to free buffers for optimal call 596 * on the tx_burst completion. 597 */ 598 static __rte_noinline void 599 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 600 struct rte_mbuf **__rte_restrict pkts, 601 unsigned int pkts_n, 602 unsigned int olx __rte_unused) 603 { 604 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 605 } 606 607 /** 608 * Free the mbuf from the elts ring buffer till new tail. 609 * 610 * @param txq 611 * Pointer to Tx queue structure. 612 * @param tail 613 * Index in elts to free up to, becomes new elts tail. 614 * @param olx 615 * Configured Tx offloads mask. It is fully defined at 616 * compile time and may be used for optimization. 617 */ 618 static __rte_always_inline void 619 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 620 uint16_t tail, 621 unsigned int olx __rte_unused) 622 { 623 uint16_t n_elts = tail - txq->elts_tail; 624 625 MLX5_ASSERT(n_elts); 626 MLX5_ASSERT(n_elts <= txq->elts_s); 627 /* 628 * Implement a loop to support ring buffer wraparound 629 * with single inlining of mlx5_tx_free_mbuf(). 630 */ 631 do { 632 unsigned int part; 633 634 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 635 part = RTE_MIN(part, n_elts); 636 MLX5_ASSERT(part); 637 MLX5_ASSERT(part <= txq->elts_s); 638 mlx5_tx_free_mbuf(txq, 639 &txq->elts[txq->elts_tail & txq->elts_m], 640 part, olx); 641 txq->elts_tail += part; 642 n_elts -= part; 643 } while (n_elts); 644 } 645 646 /** 647 * Store the mbuf being sent into elts ring buffer. 648 * On Tx completion these mbufs will be freed. 649 * 650 * @param txq 651 * Pointer to Tx queue structure. 652 * @param pkts 653 * Pointer to array of packets to be stored. 654 * @param pkts_n 655 * Number of packets to be stored. 656 * @param olx 657 * Configured Tx offloads mask. It is fully defined at 658 * compile time and may be used for optimization. 659 */ 660 static __rte_always_inline void 661 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 662 struct rte_mbuf **__rte_restrict pkts, 663 unsigned int pkts_n, 664 unsigned int olx __rte_unused) 665 { 666 unsigned int part; 667 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 668 669 MLX5_ASSERT(pkts); 670 MLX5_ASSERT(pkts_n); 671 part = txq->elts_s - (txq->elts_head & txq->elts_m); 672 MLX5_ASSERT(part); 673 MLX5_ASSERT(part <= txq->elts_s); 674 /* This code is a good candidate for vectorizing with SIMD. */ 675 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 676 (void *)pkts, 677 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 678 txq->elts_head += pkts_n; 679 if (unlikely(part < pkts_n)) 680 /* The copy is wrapping around the elts array. */ 681 rte_memcpy((void *)elts, (void *)(pkts + part), 682 (pkts_n - part) * sizeof(struct rte_mbuf *)); 683 } 684 685 /** 686 * Check if the completion request flag should be set in the last WQE. 687 * Both pushed mbufs and WQEs are monitored and the completion request 688 * flag is set if any of thresholds is reached. 689 * 690 * @param txq 691 * Pointer to TX queue structure. 692 * @param loc 693 * Pointer to burst routine local context. 694 * @param olx 695 * Configured Tx offloads mask. It is fully defined at 696 * compile time and may be used for optimization. 697 */ 698 static __rte_always_inline void 699 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 700 struct mlx5_txq_local *__rte_restrict loc, 701 unsigned int olx) 702 { 703 uint16_t head = txq->elts_head; 704 unsigned int part; 705 706 part = MLX5_TXOFF_CONFIG(INLINE) ? 707 0 : loc->pkts_sent - loc->pkts_copy; 708 head += part; 709 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 710 (MLX5_TXOFF_CONFIG(INLINE) && 711 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 712 volatile struct mlx5_wqe *last = loc->wqe_last; 713 714 MLX5_ASSERT(last); 715 txq->elts_comp = head; 716 if (MLX5_TXOFF_CONFIG(INLINE)) 717 txq->wqe_comp = txq->wqe_ci; 718 /* Request unconditional completion on last WQE. */ 719 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 720 MLX5_COMP_MODE_OFFSET); 721 /* Save elts_head in dedicated free on completion queue. */ 722 #ifdef RTE_LIBRTE_MLX5_DEBUG 723 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 724 (last->cseg.opcode >> 8) << 16; 725 #else 726 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 727 #endif 728 /* A CQE slot must always be available. */ 729 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 730 } 731 } 732 733 /** 734 * Set completion request flag for all issued WQEs. 735 * This routine is intended to be used with enabled fast path tracing 736 * and send scheduling on time to provide the detailed report in trace 737 * for send completions on every WQE. 738 * 739 * @param txq 740 * Pointer to TX queue structure. 741 * @param loc 742 * Pointer to burst routine local context. 743 * @param olx 744 * Configured Tx offloads mask. It is fully defined at 745 * compile time and may be used for optimization. 746 */ 747 static __rte_always_inline void 748 mlx5_tx_request_completion_trace(struct mlx5_txq_data *__rte_restrict txq, 749 struct mlx5_txq_local *__rte_restrict loc, 750 unsigned int olx) 751 { 752 uint16_t head = txq->elts_comp; 753 754 while (txq->wqe_comp != txq->wqe_ci) { 755 volatile struct mlx5_wqe *wqe; 756 uint32_t wqe_n; 757 758 MLX5_ASSERT(loc->wqe_last); 759 wqe = txq->wqes + (txq->wqe_comp & txq->wqe_m); 760 if (wqe == loc->wqe_last) { 761 head = txq->elts_head; 762 head += MLX5_TXOFF_CONFIG(INLINE) ? 763 0 : loc->pkts_sent - loc->pkts_copy; 764 txq->elts_comp = head; 765 } 766 /* Completion request flag was set on cseg constructing. */ 767 #ifdef RTE_LIBRTE_MLX5_DEBUG 768 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 769 (wqe->cseg.opcode >> 8) << 16; 770 #else 771 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 772 #endif 773 /* A CQE slot must always be available. */ 774 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 775 /* Advance to the next WQE in the queue. */ 776 wqe_n = rte_be_to_cpu_32(wqe->cseg.sq_ds) & 0x3F; 777 txq->wqe_comp += RTE_ALIGN(wqe_n, 4) / 4; 778 } 779 } 780 781 /** 782 * Build the Control Segment with specified opcode: 783 * - MLX5_OPCODE_SEND 784 * - MLX5_OPCODE_ENHANCED_MPSW 785 * - MLX5_OPCODE_TSO 786 * 787 * @param txq 788 * Pointer to TX queue structure. 789 * @param loc 790 * Pointer to burst routine local context. 791 * @param wqe 792 * Pointer to WQE to fill with built Control Segment. 793 * @param ds 794 * Supposed length of WQE in segments. 795 * @param opcode 796 * SQ WQE opcode to put into Control Segment. 797 * @param olx 798 * Configured Tx offloads mask. It is fully defined at 799 * compile time and may be used for optimization. 800 */ 801 static __rte_always_inline void 802 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 803 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 804 struct mlx5_wqe *__rte_restrict wqe, 805 unsigned int ds, 806 unsigned int opcode, 807 unsigned int olx) 808 { 809 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 810 811 /* For legacy MPW replace the EMPW by TSO with modifier. */ 812 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 813 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 814 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 815 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 816 if (MLX5_TXOFF_CONFIG(TXPP) && __rte_trace_point_fp_is_enabled()) 817 cs->flags = RTE_BE32(MLX5_COMP_ALWAYS << 818 MLX5_COMP_MODE_OFFSET); 819 else 820 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 821 MLX5_COMP_MODE_OFFSET); 822 cs->misc = RTE_BE32(0); 823 if (__rte_trace_point_fp_is_enabled() && !loc->pkts_sent) 824 rte_pmd_mlx5_trace_tx_entry(txq->port_id, txq->idx); 825 rte_pmd_mlx5_trace_tx_wqe((txq->wqe_ci << 8) | opcode); 826 } 827 828 /** 829 * Build the Synchronize Queue Segment with specified completion index. 830 * 831 * @param txq 832 * Pointer to TX queue structure. 833 * @param loc 834 * Pointer to burst routine local context. 835 * @param wqe 836 * Pointer to WQE to fill with built Control Segment. 837 * @param wci 838 * Completion index in Clock Queue to wait. 839 * @param olx 840 * Configured Tx offloads mask. It is fully defined at 841 * compile time and may be used for optimization. 842 */ 843 static __rte_always_inline void 844 mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq, 845 struct mlx5_txq_local *restrict loc __rte_unused, 846 struct mlx5_wqe *restrict wqe, 847 unsigned int wci, 848 unsigned int olx __rte_unused) 849 { 850 struct mlx5_wqe_qseg *qs; 851 852 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 853 qs->max_index = rte_cpu_to_be_32(wci); 854 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 855 qs->reserved0 = RTE_BE32(0); 856 qs->reserved1 = RTE_BE32(0); 857 } 858 859 /** 860 * Build the Wait on Time Segment with specified timestamp value. 861 * 862 * @param txq 863 * Pointer to TX queue structure. 864 * @param loc 865 * Pointer to burst routine local context. 866 * @param wqe 867 * Pointer to WQE to fill with built Control Segment. 868 * @param ts 869 * Timesatmp value to wait. 870 * @param olx 871 * Configured Tx offloads mask. It is fully defined at 872 * compile time and may be used for optimization. 873 */ 874 static __rte_always_inline void 875 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 876 struct mlx5_txq_local *restrict loc __rte_unused, 877 struct mlx5_wqe *restrict wqe, 878 uint64_t ts, 879 unsigned int olx __rte_unused) 880 { 881 struct mlx5_wqe_wseg *ws; 882 883 ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 884 ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_SMALLER); 885 ws->lkey = RTE_BE32(0); 886 ws->va_high = RTE_BE32(0); 887 ws->va_low = RTE_BE32(0); 888 if (txq->rt_timestamp) { 889 ts = ts % (uint64_t)NS_PER_S 890 | (ts / (uint64_t)NS_PER_S) << 32; 891 } 892 ws->value = rte_cpu_to_be_64(ts); 893 ws->mask = txq->rt_timemask; 894 } 895 896 /** 897 * Build the Ethernet Segment without inlined data. 898 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 899 * 900 * @param txq 901 * Pointer to TX queue structure. 902 * @param loc 903 * Pointer to burst routine local context. 904 * @param wqe 905 * Pointer to WQE to fill with built Ethernet Segment. 906 * @param olx 907 * Configured Tx offloads mask. It is fully defined at 908 * compile time and may be used for optimization. 909 */ 910 static __rte_always_inline void 911 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 912 struct mlx5_txq_local *__rte_restrict loc, 913 struct mlx5_wqe *__rte_restrict wqe, 914 unsigned int olx) 915 { 916 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 917 uint32_t csum; 918 919 /* 920 * Calculate and set check sum flags first, dword field 921 * in segment may be shared with Software Parser flags. 922 */ 923 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 924 es->flags = rte_cpu_to_le_32(csum); 925 /* 926 * Calculate and set Software Parser offsets and flags. 927 * These flags a set for custom UDP and IP tunnel packets. 928 */ 929 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 930 /* Fill metadata field if needed. */ 931 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 932 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 933 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 934 0 : 0; 935 /* Engage VLAN tag insertion feature if requested. */ 936 if (MLX5_TXOFF_CONFIG(VLAN) && 937 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 938 /* 939 * We should get here only if device support 940 * this feature correctly. 941 */ 942 MLX5_ASSERT(txq->vlan_en); 943 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 944 loc->mbuf->vlan_tci); 945 } else { 946 es->inline_hdr = RTE_BE32(0); 947 } 948 } 949 950 /** 951 * Build the Ethernet Segment with minimal inlined data 952 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 953 * used to fill the gap in single WQEBB WQEs. 954 * Supports Software Parser, Checksums and VLAN 955 * insertion Tx offload features. 956 * 957 * @param txq 958 * Pointer to TX queue structure. 959 * @param loc 960 * Pointer to burst routine local context. 961 * @param wqe 962 * Pointer to WQE to fill with built Ethernet Segment. 963 * @param vlan 964 * Length of VLAN tag insertion if any. 965 * @param olx 966 * Configured Tx offloads mask. It is fully defined at 967 * compile time and may be used for optimization. 968 */ 969 static __rte_always_inline void 970 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 971 struct mlx5_txq_local *__rte_restrict loc, 972 struct mlx5_wqe *__rte_restrict wqe, 973 unsigned int vlan, 974 unsigned int olx) 975 { 976 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 977 uint32_t csum; 978 uint8_t *psrc, *pdst; 979 980 /* 981 * Calculate and set check sum flags first, dword field 982 * in segment may be shared with Software Parser flags. 983 */ 984 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 985 es->flags = rte_cpu_to_le_32(csum); 986 /* 987 * Calculate and set Software Parser offsets and flags. 988 * These flags a set for custom UDP and IP tunnel packets. 989 */ 990 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 991 /* Fill metadata field if needed. */ 992 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 993 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 994 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 995 0 : 0; 996 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 997 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 998 es->inline_data = *(unaligned_uint16_t *)psrc; 999 psrc += sizeof(uint16_t); 1000 pdst = (uint8_t *)(es + 1); 1001 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1002 /* Implement VLAN tag insertion as part inline data. */ 1003 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1004 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1005 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1006 /* Insert VLAN ethertype + VLAN tag. */ 1007 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1008 ((RTE_ETHER_TYPE_VLAN << 16) | 1009 loc->mbuf->vlan_tci); 1010 pdst += sizeof(struct rte_vlan_hdr); 1011 /* Copy the rest two bytes from packet data. */ 1012 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1013 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1014 } else { 1015 /* Fill the gap in the title WQEBB with inline data. */ 1016 rte_mov16(pdst, psrc); 1017 } 1018 } 1019 1020 /** 1021 * Build the Ethernet Segment with entire packet data inlining. Checks the 1022 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 1023 * Checksums and VLAN insertion Tx offload features. 1024 * 1025 * @param txq 1026 * Pointer to TX queue structure. 1027 * @param loc 1028 * Pointer to burst routine local context. 1029 * @param wqe 1030 * Pointer to WQE to fill with built Ethernet Segment. 1031 * @param vlan 1032 * Length of VLAN tag insertion if any. 1033 * @param inlen 1034 * Length of data to inline (VLAN included, if any). 1035 * @param tso 1036 * TSO flag, set mss field from the packet. 1037 * @param olx 1038 * Configured Tx offloads mask. It is fully defined at 1039 * compile time and may be used for optimization. 1040 * 1041 * @return 1042 * Pointer to the next Data Segment (aligned and wrapped around). 1043 */ 1044 static __rte_always_inline struct mlx5_wqe_dseg * 1045 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 1046 struct mlx5_txq_local *__rte_restrict loc, 1047 struct mlx5_wqe *__rte_restrict wqe, 1048 unsigned int vlan, 1049 unsigned int inlen, 1050 unsigned int tso, 1051 unsigned int olx) 1052 { 1053 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1054 uint32_t csum; 1055 uint8_t *psrc, *pdst; 1056 unsigned int part; 1057 1058 /* 1059 * Calculate and set check sum flags first, dword field 1060 * in segment may be shared with Software Parser flags. 1061 */ 1062 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1063 if (tso) { 1064 csum <<= 24; 1065 csum |= loc->mbuf->tso_segsz; 1066 es->flags = rte_cpu_to_be_32(csum); 1067 } else { 1068 es->flags = rte_cpu_to_le_32(csum); 1069 } 1070 /* 1071 * Calculate and set Software Parser offsets and flags. 1072 * These flags a set for custom UDP and IP tunnel packets. 1073 */ 1074 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1075 /* Fill metadata field if needed. */ 1076 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1077 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1078 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1079 0 : 0; 1080 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1081 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 1082 es->inline_data = *(unaligned_uint16_t *)psrc; 1083 psrc += sizeof(uint16_t); 1084 pdst = (uint8_t *)(es + 1); 1085 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1086 /* Implement VLAN tag insertion as part inline data. */ 1087 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1088 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1089 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1090 /* Insert VLAN ethertype + VLAN tag. */ 1091 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1092 ((RTE_ETHER_TYPE_VLAN << 16) | 1093 loc->mbuf->vlan_tci); 1094 pdst += sizeof(struct rte_vlan_hdr); 1095 /* Copy the rest two bytes from packet data. */ 1096 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1097 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1098 psrc += sizeof(uint16_t); 1099 } else { 1100 /* Fill the gap in the title WQEBB with inline data. */ 1101 rte_mov16(pdst, psrc); 1102 psrc += sizeof(rte_v128u32_t); 1103 } 1104 pdst = (uint8_t *)(es + 2); 1105 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1106 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1107 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1108 if (!inlen) { 1109 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1110 return (struct mlx5_wqe_dseg *)pdst; 1111 } 1112 /* 1113 * The WQEBB space availability is checked by caller. 1114 * Here we should be aware of WQE ring buffer wraparound only. 1115 */ 1116 part = (uint8_t *)txq->wqes_end - pdst; 1117 part = RTE_MIN(part, inlen); 1118 do { 1119 rte_memcpy(pdst, psrc, part); 1120 inlen -= part; 1121 if (likely(!inlen)) { 1122 /* 1123 * If return value is not used by the caller 1124 * the code below will be optimized out. 1125 */ 1126 pdst += part; 1127 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1128 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1129 pdst = (uint8_t *)txq->wqes; 1130 return (struct mlx5_wqe_dseg *)pdst; 1131 } 1132 pdst = (uint8_t *)txq->wqes; 1133 psrc += part; 1134 part = inlen; 1135 } while (true); 1136 } 1137 1138 /** 1139 * Copy data from chain of mbuf to the specified linear buffer. 1140 * Checksums and VLAN insertion Tx offload features. If data 1141 * from some mbuf copied completely this mbuf is freed. Local 1142 * structure is used to keep the byte stream state. 1143 * 1144 * @param pdst 1145 * Pointer to the destination linear buffer. 1146 * @param loc 1147 * Pointer to burst routine local context. 1148 * @param len 1149 * Length of data to be copied. 1150 * @param must 1151 * Length of data to be copied ignoring no inline hint. 1152 * @param olx 1153 * Configured Tx offloads mask. It is fully defined at 1154 * compile time and may be used for optimization. 1155 * 1156 * @return 1157 * Number of actual copied data bytes. This is always greater than or 1158 * equal to must parameter and might be lesser than len in no inline 1159 * hint flag is encountered. 1160 */ 1161 static __rte_always_inline unsigned int 1162 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1163 struct mlx5_txq_local *__rte_restrict loc, 1164 unsigned int len, 1165 unsigned int must, 1166 unsigned int olx __rte_unused) 1167 { 1168 struct rte_mbuf *mbuf; 1169 unsigned int part, dlen, copy = 0; 1170 uint8_t *psrc; 1171 1172 MLX5_ASSERT(len); 1173 do { 1174 /* Allow zero length packets, must check first. */ 1175 dlen = rte_pktmbuf_data_len(loc->mbuf); 1176 if (dlen <= loc->mbuf_off) { 1177 /* Exhausted packet, just free. */ 1178 mbuf = loc->mbuf; 1179 loc->mbuf = mbuf->next; 1180 rte_pktmbuf_free_seg(mbuf); 1181 loc->mbuf_off = 0; 1182 MLX5_ASSERT(loc->mbuf_nseg > 1); 1183 MLX5_ASSERT(loc->mbuf); 1184 --loc->mbuf_nseg; 1185 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1186 unsigned int diff; 1187 1188 if (copy >= must) { 1189 /* 1190 * We already copied the minimal 1191 * requested amount of data. 1192 */ 1193 return copy; 1194 } 1195 diff = must - copy; 1196 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1197 /* 1198 * Copy only the minimal required 1199 * part of the data buffer. Limit amount 1200 * of data to be copied to the length of 1201 * available space. 1202 */ 1203 len = RTE_MIN(len, diff); 1204 } 1205 } 1206 continue; 1207 } 1208 dlen -= loc->mbuf_off; 1209 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1210 loc->mbuf_off); 1211 part = RTE_MIN(len, dlen); 1212 rte_memcpy(pdst, psrc, part); 1213 copy += part; 1214 loc->mbuf_off += part; 1215 len -= part; 1216 if (!len) { 1217 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1218 loc->mbuf_off = 0; 1219 /* Exhausted packet, just free. */ 1220 mbuf = loc->mbuf; 1221 loc->mbuf = mbuf->next; 1222 rte_pktmbuf_free_seg(mbuf); 1223 loc->mbuf_off = 0; 1224 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1225 --loc->mbuf_nseg; 1226 } 1227 return copy; 1228 } 1229 pdst += part; 1230 } while (true); 1231 } 1232 1233 /** 1234 * Build the Ethernet Segment with inlined data from multi-segment packet. 1235 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1236 * Parser, Checksums and VLAN insertion Tx offload features. 1237 * 1238 * @param txq 1239 * Pointer to TX queue structure. 1240 * @param loc 1241 * Pointer to burst routine local context. 1242 * @param wqe 1243 * Pointer to WQE to fill with built Ethernet Segment. 1244 * @param vlan 1245 * Length of VLAN tag insertion if any. 1246 * @param inlen 1247 * Length of data to inline (VLAN included, if any). 1248 * @param tso 1249 * TSO flag, set mss field from the packet. 1250 * @param olx 1251 * Configured Tx offloads mask. It is fully defined at 1252 * compile time and may be used for optimization. 1253 * 1254 * @return 1255 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1256 * around - caller should do wrapping check on its own). 1257 */ 1258 static __rte_always_inline struct mlx5_wqe_dseg * 1259 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1260 struct mlx5_txq_local *__rte_restrict loc, 1261 struct mlx5_wqe *__rte_restrict wqe, 1262 unsigned int vlan, 1263 unsigned int inlen, 1264 unsigned int tso, 1265 unsigned int olx) 1266 { 1267 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1268 uint32_t csum; 1269 uint8_t *pdst; 1270 unsigned int part, tlen = 0; 1271 1272 /* 1273 * Calculate and set check sum flags first, uint32_t field 1274 * in segment may be shared with Software Parser flags. 1275 */ 1276 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1277 if (tso) { 1278 csum <<= 24; 1279 csum |= loc->mbuf->tso_segsz; 1280 es->flags = rte_cpu_to_be_32(csum); 1281 } else { 1282 es->flags = rte_cpu_to_le_32(csum); 1283 } 1284 /* 1285 * Calculate and set Software Parser offsets and flags. 1286 * These flags a set for custom UDP and IP tunnel packets. 1287 */ 1288 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1289 /* Fill metadata field if needed. */ 1290 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1291 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1292 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1293 0 : 0; 1294 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1295 pdst = (uint8_t *)&es->inline_data; 1296 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1297 /* Implement VLAN tag insertion as part inline data. */ 1298 mlx5_tx_mseg_memcpy(pdst, loc, 1299 2 * RTE_ETHER_ADDR_LEN, 1300 2 * RTE_ETHER_ADDR_LEN, olx); 1301 pdst += 2 * RTE_ETHER_ADDR_LEN; 1302 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1303 ((RTE_ETHER_TYPE_VLAN << 16) | 1304 loc->mbuf->vlan_tci); 1305 pdst += sizeof(struct rte_vlan_hdr); 1306 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1307 } 1308 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1309 /* 1310 * The WQEBB space availability is checked by caller. 1311 * Here we should be aware of WQE ring buffer wraparound only. 1312 */ 1313 part = (uint8_t *)txq->wqes_end - pdst; 1314 part = RTE_MIN(part, inlen - tlen); 1315 MLX5_ASSERT(part); 1316 do { 1317 unsigned int copy; 1318 1319 /* 1320 * Copying may be interrupted inside the routine 1321 * if run into no inline hint flag. 1322 */ 1323 copy = tso ? inlen : txq->inlen_mode; 1324 copy = tlen >= copy ? 0 : (copy - tlen); 1325 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1326 tlen += copy; 1327 if (likely(inlen <= tlen) || copy < part) { 1328 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1329 pdst += copy; 1330 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1331 return (struct mlx5_wqe_dseg *)pdst; 1332 } 1333 pdst = (uint8_t *)txq->wqes; 1334 part = inlen - tlen; 1335 } while (true); 1336 } 1337 1338 /** 1339 * Build the Data Segment of pointer type. 1340 * 1341 * @param txq 1342 * Pointer to TX queue structure. 1343 * @param loc 1344 * Pointer to burst routine local context. 1345 * @param dseg 1346 * Pointer to WQE to fill with built Data Segment. 1347 * @param buf 1348 * Data buffer to point. 1349 * @param len 1350 * Data buffer length. 1351 * @param olx 1352 * Configured Tx offloads mask. It is fully defined at 1353 * compile time and may be used for optimization. 1354 */ 1355 static __rte_always_inline void 1356 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1357 struct mlx5_txq_local *__rte_restrict loc, 1358 struct mlx5_wqe_dseg *__rte_restrict dseg, 1359 uint8_t *buf, 1360 unsigned int len, 1361 unsigned int olx __rte_unused) 1362 1363 { 1364 MLX5_ASSERT(len); 1365 dseg->bcount = rte_cpu_to_be_32(len); 1366 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1367 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1368 } 1369 1370 /** 1371 * Build the Data Segment of pointer type or inline if data length is less than 1372 * buffer in minimal Data Segment size. 1373 * 1374 * @param txq 1375 * Pointer to TX queue structure. 1376 * @param loc 1377 * Pointer to burst routine local context. 1378 * @param dseg 1379 * Pointer to WQE to fill with built Data Segment. 1380 * @param buf 1381 * Data buffer to point. 1382 * @param len 1383 * Data buffer length. 1384 * @param olx 1385 * Configured Tx offloads mask. It is fully defined at 1386 * compile time and may be used for optimization. 1387 */ 1388 static __rte_always_inline void 1389 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1390 struct mlx5_txq_local *__rte_restrict loc, 1391 struct mlx5_wqe_dseg *__rte_restrict dseg, 1392 uint8_t *buf, 1393 unsigned int len, 1394 unsigned int olx __rte_unused) 1395 1396 { 1397 uintptr_t dst, src; 1398 1399 MLX5_ASSERT(len); 1400 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1401 dseg->bcount = rte_cpu_to_be_32(len); 1402 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1403 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1404 1405 return; 1406 } 1407 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1408 /* Unrolled implementation of generic rte_memcpy. */ 1409 dst = (uintptr_t)&dseg->inline_data[0]; 1410 src = (uintptr_t)buf; 1411 if (len & 0x08) { 1412 #ifdef RTE_ARCH_STRICT_ALIGN 1413 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1414 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1415 dst += sizeof(uint32_t); 1416 src += sizeof(uint32_t); 1417 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1418 dst += sizeof(uint32_t); 1419 src += sizeof(uint32_t); 1420 #else 1421 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1422 dst += sizeof(uint64_t); 1423 src += sizeof(uint64_t); 1424 #endif 1425 } 1426 if (len & 0x04) { 1427 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1428 dst += sizeof(uint32_t); 1429 src += sizeof(uint32_t); 1430 } 1431 if (len & 0x02) { 1432 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1433 dst += sizeof(uint16_t); 1434 src += sizeof(uint16_t); 1435 } 1436 if (len & 0x01) 1437 *(uint8_t *)dst = *(uint8_t *)src; 1438 } 1439 1440 /** 1441 * Build the Data Segment of inlined data from single 1442 * segment packet, no VLAN insertion. 1443 * 1444 * @param txq 1445 * Pointer to TX queue structure. 1446 * @param loc 1447 * Pointer to burst routine local context. 1448 * @param dseg 1449 * Pointer to WQE to fill with built Data Segment. 1450 * @param buf 1451 * Data buffer to point. 1452 * @param len 1453 * Data buffer length. 1454 * @param olx 1455 * Configured Tx offloads mask. It is fully defined at 1456 * compile time and may be used for optimization. 1457 * 1458 * @return 1459 * Pointer to the next Data Segment after inlined data. 1460 * Ring buffer wraparound check is needed. We do not do it here because it 1461 * may not be needed for the last packet in the eMPW session. 1462 */ 1463 static __rte_always_inline struct mlx5_wqe_dseg * 1464 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1465 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1466 struct mlx5_wqe_dseg *__rte_restrict dseg, 1467 uint8_t *buf, 1468 unsigned int len, 1469 unsigned int olx __rte_unused) 1470 { 1471 unsigned int part; 1472 uint8_t *pdst; 1473 1474 if (!MLX5_TXOFF_CONFIG(MPW)) { 1475 /* Store the descriptor byte counter for eMPW sessions. */ 1476 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1477 pdst = &dseg->inline_data[0]; 1478 } else { 1479 /* The entire legacy MPW session counter is stored on close. */ 1480 pdst = (uint8_t *)dseg; 1481 } 1482 /* 1483 * The WQEBB space availability is checked by caller. 1484 * Here we should be aware of WQE ring buffer wraparound only. 1485 */ 1486 part = (uint8_t *)txq->wqes_end - pdst; 1487 part = RTE_MIN(part, len); 1488 do { 1489 rte_memcpy(pdst, buf, part); 1490 len -= part; 1491 if (likely(!len)) { 1492 pdst += part; 1493 if (!MLX5_TXOFF_CONFIG(MPW)) 1494 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1495 /* Note: no final wraparound check here. */ 1496 return (struct mlx5_wqe_dseg *)pdst; 1497 } 1498 pdst = (uint8_t *)txq->wqes; 1499 buf += part; 1500 part = len; 1501 } while (true); 1502 } 1503 1504 /** 1505 * Build the Data Segment of inlined data from single 1506 * segment packet with VLAN insertion. 1507 * 1508 * @param txq 1509 * Pointer to TX queue structure. 1510 * @param loc 1511 * Pointer to burst routine local context. 1512 * @param dseg 1513 * Pointer to the dseg fill with built Data Segment. 1514 * @param buf 1515 * Data buffer to point. 1516 * @param len 1517 * Data buffer length. 1518 * @param olx 1519 * Configured Tx offloads mask. It is fully defined at 1520 * compile time and may be used for optimization. 1521 * 1522 * @return 1523 * Pointer to the next Data Segment after inlined data. 1524 * Ring buffer wraparound check is needed. 1525 */ 1526 static __rte_always_inline struct mlx5_wqe_dseg * 1527 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1528 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1529 struct mlx5_wqe_dseg *__rte_restrict dseg, 1530 uint8_t *buf, 1531 unsigned int len, 1532 unsigned int olx __rte_unused) 1533 1534 { 1535 unsigned int part; 1536 uint8_t *pdst; 1537 1538 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1539 if (!MLX5_TXOFF_CONFIG(MPW)) { 1540 /* Store the descriptor byte counter for eMPW sessions. */ 1541 dseg->bcount = rte_cpu_to_be_32 1542 ((len + sizeof(struct rte_vlan_hdr)) | 1543 MLX5_ETH_WQE_DATA_INLINE); 1544 pdst = &dseg->inline_data[0]; 1545 } else { 1546 /* The entire legacy MPW session counter is stored on close. */ 1547 pdst = (uint8_t *)dseg; 1548 } 1549 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1550 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1551 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1552 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1553 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1554 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1555 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1556 pdst = (uint8_t *)txq->wqes; 1557 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1558 loc->mbuf->vlan_tci); 1559 pdst += sizeof(struct rte_vlan_hdr); 1560 /* 1561 * The WQEBB space availability is checked by caller. 1562 * Here we should be aware of WQE ring buffer wraparound only. 1563 */ 1564 part = (uint8_t *)txq->wqes_end - pdst; 1565 part = RTE_MIN(part, len); 1566 do { 1567 rte_memcpy(pdst, buf, part); 1568 len -= part; 1569 if (likely(!len)) { 1570 pdst += part; 1571 if (!MLX5_TXOFF_CONFIG(MPW)) 1572 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1573 /* Note: no final wraparound check here. */ 1574 return (struct mlx5_wqe_dseg *)pdst; 1575 } 1576 pdst = (uint8_t *)txq->wqes; 1577 buf += part; 1578 part = len; 1579 } while (true); 1580 } 1581 1582 /** 1583 * Build the Ethernet Segment with optionally inlined data with 1584 * VLAN insertion and following Data Segments (if any) from 1585 * multi-segment packet. Used by ordinary send and TSO. 1586 * 1587 * @param txq 1588 * Pointer to TX queue structure. 1589 * @param loc 1590 * Pointer to burst routine local context. 1591 * @param wqe 1592 * Pointer to WQE to fill with built Ethernet/Data Segments. 1593 * @param vlan 1594 * Length of VLAN header to insert, 0 means no VLAN insertion. 1595 * @param inlen 1596 * Data length to inline. For TSO this parameter specifies exact value, 1597 * for ordinary send routine can be aligned by caller to provide better WQE 1598 * space saving and data buffer start address alignment. 1599 * This length includes VLAN header being inserted. 1600 * @param tso 1601 * Zero means ordinary send, inlined data can be extended, 1602 * otherwise this is TSO, inlined data length is fixed. 1603 * @param olx 1604 * Configured Tx offloads mask. It is fully defined at 1605 * compile time and may be used for optimization. 1606 * 1607 * @return 1608 * Actual size of built WQE in segments. 1609 */ 1610 static __rte_always_inline unsigned int 1611 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1612 struct mlx5_txq_local *__rte_restrict loc, 1613 struct mlx5_wqe *__rte_restrict wqe, 1614 unsigned int vlan, 1615 unsigned int inlen, 1616 unsigned int tso, 1617 unsigned int olx __rte_unused) 1618 { 1619 struct mlx5_wqe_dseg *__rte_restrict dseg; 1620 unsigned int ds; 1621 1622 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1623 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1624 loc->mbuf_off = 0; 1625 1626 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1627 if (!loc->mbuf_nseg) 1628 goto dseg_done; 1629 /* 1630 * There are still some mbuf remaining, not inlined. 1631 * The first mbuf may be partially inlined and we 1632 * must process the possible non-zero data offset. 1633 */ 1634 if (loc->mbuf_off) { 1635 unsigned int dlen; 1636 uint8_t *dptr; 1637 1638 /* 1639 * Exhausted packets must be dropped before. 1640 * Non-zero offset means there are some data 1641 * remained in the packet. 1642 */ 1643 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1644 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1645 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1646 loc->mbuf_off); 1647 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1648 /* 1649 * Build the pointer/minimal Data Segment. 1650 * Do ring buffer wrapping check in advance. 1651 */ 1652 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1653 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1654 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1655 /* Store the mbuf to be freed on completion. */ 1656 MLX5_ASSERT(loc->elts_free); 1657 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1658 --loc->elts_free; 1659 ++dseg; 1660 if (--loc->mbuf_nseg == 0) 1661 goto dseg_done; 1662 loc->mbuf = loc->mbuf->next; 1663 loc->mbuf_off = 0; 1664 } 1665 do { 1666 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1667 struct rte_mbuf *mbuf; 1668 1669 /* Zero length segment found, just skip. */ 1670 mbuf = loc->mbuf; 1671 loc->mbuf = loc->mbuf->next; 1672 rte_pktmbuf_free_seg(mbuf); 1673 if (--loc->mbuf_nseg == 0) 1674 break; 1675 } else { 1676 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1677 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1678 mlx5_tx_dseg_iptr 1679 (txq, loc, dseg, 1680 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1681 rte_pktmbuf_data_len(loc->mbuf), olx); 1682 MLX5_ASSERT(loc->elts_free); 1683 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1684 --loc->elts_free; 1685 ++dseg; 1686 if (--loc->mbuf_nseg == 0) 1687 break; 1688 loc->mbuf = loc->mbuf->next; 1689 } 1690 } while (true); 1691 1692 dseg_done: 1693 /* Calculate actual segments used from the dseg pointer. */ 1694 if ((uintptr_t)wqe < (uintptr_t)dseg) 1695 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1696 else 1697 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1698 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1699 return ds; 1700 } 1701 1702 /** 1703 * The routine checks timestamp flag in the current packet, 1704 * and push WAIT WQE into the queue if scheduling is required. 1705 * 1706 * @param txq 1707 * Pointer to TX queue structure. 1708 * @param loc 1709 * Pointer to burst routine local context. 1710 * @param elts 1711 * Number of free elements in elts buffer to be checked, for zero 1712 * value the check is optimized out by compiler. 1713 * @param olx 1714 * Configured Tx offloads mask. It is fully defined at 1715 * compile time and may be used for optimization. 1716 * 1717 * @return 1718 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1719 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1720 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1721 * Local context variables partially updated. 1722 */ 1723 static __rte_always_inline enum mlx5_txcmp_code 1724 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1725 struct mlx5_txq_local *restrict loc, 1726 uint16_t elts, 1727 unsigned int olx) 1728 { 1729 if (MLX5_TXOFF_CONFIG(TXPP) && 1730 loc->mbuf->ol_flags & txq->ts_mask) { 1731 struct mlx5_dev_ctx_shared *sh; 1732 struct mlx5_wqe *wqe; 1733 uint64_t ts; 1734 1735 /* 1736 * Estimate the required space quickly and roughly. 1737 * We would like to ensure the packet can be pushed 1738 * to the queue and we won't get the orphan WAIT WQE. 1739 */ 1740 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1741 loc->elts_free < elts) 1742 return MLX5_TXCMP_CODE_EXIT; 1743 /* Convert the timestamp into completion to wait. */ 1744 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1745 if (txq->ts_last && ts < txq->ts_last) 1746 __atomic_fetch_add(&txq->sh->txpp.err_ts_order, 1747 1, __ATOMIC_RELAXED); 1748 txq->ts_last = ts; 1749 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1750 sh = txq->sh; 1751 if (txq->wait_on_time) { 1752 /* The wait on time capability should be used. */ 1753 ts -= sh->txpp.skew; 1754 rte_pmd_mlx5_trace_tx_wait(ts); 1755 mlx5_tx_cseg_init(txq, loc, wqe, 1756 1 + sizeof(struct mlx5_wqe_wseg) / 1757 MLX5_WSEG_SIZE, 1758 MLX5_OPCODE_WAIT | 1759 MLX5_OPC_MOD_WAIT_TIME << 24, olx); 1760 mlx5_tx_wseg_init(txq, loc, wqe, ts, olx); 1761 } else { 1762 /* Legacy cross-channel operation should be used. */ 1763 int32_t wci; 1764 1765 wci = mlx5_txpp_convert_tx_ts(sh, ts); 1766 if (unlikely(wci < 0)) 1767 return MLX5_TXCMP_CODE_SINGLE; 1768 /* Build the WAIT WQE with specified completion. */ 1769 rte_pmd_mlx5_trace_tx_wait(ts - sh->txpp.skew); 1770 mlx5_tx_cseg_init(txq, loc, wqe, 1771 1 + sizeof(struct mlx5_wqe_qseg) / 1772 MLX5_WSEG_SIZE, 1773 MLX5_OPCODE_WAIT | 1774 MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx); 1775 mlx5_tx_qseg_init(txq, loc, wqe, wci, olx); 1776 } 1777 ++txq->wqe_ci; 1778 --loc->wqe_free; 1779 return MLX5_TXCMP_CODE_MULTI; 1780 } 1781 return MLX5_TXCMP_CODE_SINGLE; 1782 } 1783 1784 /** 1785 * Tx one packet function for multi-segment TSO. Supports all 1786 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1787 * sends one packet per WQE. 1788 * 1789 * This routine is responsible for storing processed mbuf 1790 * into elts ring buffer and update elts_head. 1791 * 1792 * @param txq 1793 * Pointer to TX queue structure. 1794 * @param loc 1795 * Pointer to burst routine local context. 1796 * @param olx 1797 * Configured Tx offloads mask. It is fully defined at 1798 * compile time and may be used for optimization. 1799 * 1800 * @return 1801 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1802 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1803 * Local context variables partially updated. 1804 */ 1805 static __rte_always_inline enum mlx5_txcmp_code 1806 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1807 struct mlx5_txq_local *__rte_restrict loc, 1808 unsigned int olx) 1809 { 1810 struct mlx5_wqe *__rte_restrict wqe; 1811 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1812 1813 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1814 if (MLX5_TXOFF_CONFIG(TXPP)) { 1815 enum mlx5_txcmp_code wret; 1816 1817 /* Generate WAIT for scheduling if requested. */ 1818 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1819 if (wret == MLX5_TXCMP_CODE_EXIT) 1820 return MLX5_TXCMP_CODE_EXIT; 1821 if (wret == MLX5_TXCMP_CODE_ERROR) 1822 return MLX5_TXCMP_CODE_ERROR; 1823 } 1824 /* 1825 * Calculate data length to be inlined to estimate 1826 * the required space in WQE ring buffer. 1827 */ 1828 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1829 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1830 vlan = sizeof(struct rte_vlan_hdr); 1831 inlen = loc->mbuf->l2_len + vlan + 1832 loc->mbuf->l3_len + loc->mbuf->l4_len; 1833 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1834 return MLX5_TXCMP_CODE_ERROR; 1835 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1836 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1837 /* Packet must contain all TSO headers. */ 1838 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1839 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1840 inlen > (dlen + vlan))) 1841 return MLX5_TXCMP_CODE_ERROR; 1842 /* 1843 * Check whether there are enough free WQEBBs: 1844 * - Control Segment 1845 * - Ethernet Segment 1846 * - First Segment of inlined Ethernet data 1847 * - ... data continued ... 1848 * - Data Segments of pointer/min inline type 1849 */ 1850 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1851 MLX5_ESEG_MIN_INLINE_SIZE + 1852 MLX5_WSEG_SIZE + 1853 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1854 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1855 return MLX5_TXCMP_CODE_EXIT; 1856 /* Check for maximal WQE size. */ 1857 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1858 return MLX5_TXCMP_CODE_ERROR; 1859 #ifdef MLX5_PMD_SOFT_COUNTERS 1860 /* Update sent data bytes/packets counters. */ 1861 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1862 loc->mbuf->tso_segsz; 1863 /* 1864 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1865 * from loc->pkts_sent field. 1866 */ 1867 --ntcp; 1868 txq->stats.opackets += ntcp; 1869 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1870 #endif 1871 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1872 loc->wqe_last = wqe; 1873 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1874 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 1875 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1876 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1877 txq->wqe_ci += (ds + 3) / 4; 1878 loc->wqe_free -= (ds + 3) / 4; 1879 return MLX5_TXCMP_CODE_MULTI; 1880 } 1881 1882 /** 1883 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1884 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1885 * without any data inlining in Ethernet Segment. 1886 * 1887 * This routine is responsible for storing processed mbuf 1888 * into elts ring buffer and update elts_head. 1889 * 1890 * @param txq 1891 * Pointer to TX queue structure. 1892 * @param loc 1893 * Pointer to burst routine local context. 1894 * @param olx 1895 * Configured Tx offloads mask. It is fully defined at 1896 * compile time and may be used for optimization. 1897 * 1898 * @return 1899 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1900 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1901 * Local context variables partially updated. 1902 */ 1903 static __rte_always_inline enum mlx5_txcmp_code 1904 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1905 struct mlx5_txq_local *__rte_restrict loc, 1906 unsigned int olx) 1907 { 1908 struct mlx5_wqe_dseg *__rte_restrict dseg; 1909 struct mlx5_wqe *__rte_restrict wqe; 1910 unsigned int ds, nseg; 1911 1912 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1913 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1914 if (MLX5_TXOFF_CONFIG(TXPP)) { 1915 enum mlx5_txcmp_code wret; 1916 1917 /* Generate WAIT for scheduling if requested. */ 1918 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1919 if (wret == MLX5_TXCMP_CODE_EXIT) 1920 return MLX5_TXCMP_CODE_EXIT; 1921 if (wret == MLX5_TXCMP_CODE_ERROR) 1922 return MLX5_TXCMP_CODE_ERROR; 1923 } 1924 /* 1925 * No inline at all, it means the CPU cycles saving is prioritized at 1926 * configuration, we should not copy any packet data to WQE. 1927 */ 1928 nseg = NB_SEGS(loc->mbuf); 1929 ds = 2 + nseg; 1930 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1931 return MLX5_TXCMP_CODE_EXIT; 1932 /* Check for maximal WQE size. */ 1933 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1934 return MLX5_TXCMP_CODE_ERROR; 1935 /* 1936 * Some Tx offloads may cause an error if packet is not long enough, 1937 * check against assumed minimal length. 1938 */ 1939 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1940 return MLX5_TXCMP_CODE_ERROR; 1941 #ifdef MLX5_PMD_SOFT_COUNTERS 1942 /* Update sent data bytes counter. */ 1943 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1944 if (MLX5_TXOFF_CONFIG(VLAN) && 1945 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1946 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1947 #endif 1948 /* 1949 * SEND WQE, one WQEBB: 1950 * - Control Segment, SEND opcode 1951 * - Ethernet Segment, optional VLAN, no inline 1952 * - Data Segments, pointer only type 1953 */ 1954 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1955 loc->wqe_last = wqe; 1956 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1957 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 1958 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1959 dseg = &wqe->dseg[0]; 1960 do { 1961 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1962 struct rte_mbuf *mbuf; 1963 1964 /* 1965 * Zero length segment found, have to correct total 1966 * size of WQE in segments. 1967 * It is supposed to be rare occasion, so in normal 1968 * case (no zero length segments) we avoid extra 1969 * writing to the Control Segment. 1970 */ 1971 --ds; 1972 wqe->cseg.sq_ds -= RTE_BE32(1); 1973 mbuf = loc->mbuf; 1974 loc->mbuf = mbuf->next; 1975 rte_pktmbuf_free_seg(mbuf); 1976 if (--nseg == 0) 1977 break; 1978 } else { 1979 mlx5_tx_dseg_ptr 1980 (txq, loc, dseg, 1981 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1982 rte_pktmbuf_data_len(loc->mbuf), olx); 1983 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1984 --loc->elts_free; 1985 if (--nseg == 0) 1986 break; 1987 ++dseg; 1988 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1989 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1990 loc->mbuf = loc->mbuf->next; 1991 } 1992 } while (true); 1993 txq->wqe_ci += (ds + 3) / 4; 1994 loc->wqe_free -= (ds + 3) / 4; 1995 return MLX5_TXCMP_CODE_MULTI; 1996 } 1997 1998 /** 1999 * Tx one packet function for multi-segment SEND. Supports all 2000 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 2001 * sends one packet per WQE, with data inlining in 2002 * Ethernet Segment and minimal Data Segments. 2003 * 2004 * This routine is responsible for storing processed mbuf 2005 * into elts ring buffer and update elts_head. 2006 * 2007 * @param txq 2008 * Pointer to TX queue structure. 2009 * @param loc 2010 * Pointer to burst routine local context. 2011 * @param olx 2012 * Configured Tx offloads mask. It is fully defined at 2013 * compile time and may be used for optimization. 2014 * 2015 * @return 2016 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2017 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2018 * Local context variables partially updated. 2019 */ 2020 static __rte_always_inline enum mlx5_txcmp_code 2021 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 2022 struct mlx5_txq_local *__rte_restrict loc, 2023 unsigned int olx) 2024 { 2025 struct mlx5_wqe *__rte_restrict wqe; 2026 unsigned int ds, inlen, dlen, vlan = 0; 2027 2028 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2029 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2030 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 2031 /* 2032 * First calculate data length to be inlined 2033 * to estimate the required space for WQE. 2034 */ 2035 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 2036 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 2037 vlan = sizeof(struct rte_vlan_hdr); 2038 inlen = dlen + vlan; 2039 /* Check against minimal length. */ 2040 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 2041 return MLX5_TXCMP_CODE_ERROR; 2042 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 2043 if (inlen > txq->inlen_send || 2044 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 2045 struct rte_mbuf *mbuf; 2046 unsigned int nxlen; 2047 uintptr_t start; 2048 2049 mbuf = loc->mbuf; 2050 nxlen = rte_pktmbuf_data_len(mbuf) + vlan; 2051 /* 2052 * Packet length exceeds the allowed inline data length, 2053 * check whether the minimal inlining is required. 2054 */ 2055 if (txq->inlen_mode) { 2056 MLX5_ASSERT(txq->inlen_mode >= 2057 MLX5_ESEG_MIN_INLINE_SIZE); 2058 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 2059 inlen = RTE_MIN(txq->inlen_mode, inlen); 2060 } else if (vlan && !txq->vlan_en) { 2061 /* 2062 * VLAN insertion is requested and hardware does not 2063 * support the offload, will do with software inline. 2064 */ 2065 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 2066 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 2067 nxlen > txq->inlen_send) { 2068 return mlx5_tx_packet_multi_send(txq, loc, olx); 2069 } else if (nxlen <= MLX5_ESEG_MIN_INLINE_SIZE) { 2070 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 2071 } else { 2072 goto do_first; 2073 } 2074 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2075 goto do_build; 2076 /* 2077 * Now we know the minimal amount of data is requested 2078 * to inline. Check whether we should inline the buffers 2079 * from the chain beginning to eliminate some mbufs. 2080 */ 2081 if (unlikely(nxlen <= txq->inlen_send)) { 2082 /* We can inline first mbuf at least. */ 2083 if (nxlen < inlen) { 2084 unsigned int smlen; 2085 2086 /* Scan mbufs till inlen filled. */ 2087 do { 2088 smlen = nxlen; 2089 mbuf = NEXT(mbuf); 2090 MLX5_ASSERT(mbuf); 2091 nxlen = rte_pktmbuf_data_len(mbuf); 2092 nxlen += smlen; 2093 } while (unlikely(nxlen < inlen)); 2094 if (unlikely(nxlen > txq->inlen_send)) { 2095 /* We cannot inline entire mbuf. */ 2096 smlen = inlen - smlen; 2097 start = rte_pktmbuf_mtod_offset 2098 (mbuf, uintptr_t, smlen); 2099 goto do_align; 2100 } 2101 } 2102 do_first: 2103 do { 2104 inlen = nxlen; 2105 mbuf = NEXT(mbuf); 2106 /* There should be not end of packet. */ 2107 MLX5_ASSERT(mbuf); 2108 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2109 break; 2110 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 2111 } while (unlikely(nxlen < txq->inlen_send)); 2112 } 2113 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 2114 /* 2115 * Check whether we can do inline to align start 2116 * address of data buffer to cacheline. 2117 */ 2118 do_align: 2119 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 2120 if (unlikely(start)) { 2121 start += inlen; 2122 if (start <= txq->inlen_send) 2123 inlen = start; 2124 } 2125 } 2126 /* 2127 * Check whether there are enough free WQEBBs: 2128 * - Control Segment 2129 * - Ethernet Segment 2130 * - First Segment of inlined Ethernet data 2131 * - ... data continued ... 2132 * - Data Segments of pointer/min inline type 2133 * 2134 * Estimate the number of Data Segments conservatively, 2135 * supposing no any mbufs is being freed during inlining. 2136 */ 2137 do_build: 2138 if (MLX5_TXOFF_CONFIG(TXPP)) { 2139 enum mlx5_txcmp_code wret; 2140 2141 /* Generate WAIT for scheduling if requested. */ 2142 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2143 if (wret == MLX5_TXCMP_CODE_EXIT) 2144 return MLX5_TXCMP_CODE_EXIT; 2145 if (wret == MLX5_TXCMP_CODE_ERROR) 2146 return MLX5_TXCMP_CODE_ERROR; 2147 } 2148 MLX5_ASSERT(inlen <= txq->inlen_send); 2149 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2150 MLX5_ESEG_MIN_INLINE_SIZE + 2151 MLX5_WSEG_SIZE + 2152 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2153 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2154 return MLX5_TXCMP_CODE_EXIT; 2155 /* Check for maximal WQE size. */ 2156 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) { 2157 /* Check if we can adjust the inline length. */ 2158 if (unlikely(txq->inlen_mode)) { 2159 ds = NB_SEGS(loc->mbuf) + 2 + 2160 (txq->inlen_mode - 2161 MLX5_ESEG_MIN_INLINE_SIZE + 2162 MLX5_WSEG_SIZE + 2163 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2164 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 2165 return MLX5_TXCMP_CODE_ERROR; 2166 } 2167 /* We have lucky opportunity to adjust. */ 2168 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX - 2169 MLX5_WSEG_SIZE * 2 - 2170 MLX5_WSEG_SIZE * NB_SEGS(loc->mbuf) - 2171 MLX5_WSEG_SIZE + 2172 MLX5_ESEG_MIN_INLINE_SIZE); 2173 } 2174 #ifdef MLX5_PMD_SOFT_COUNTERS 2175 /* Update sent data bytes/packets counters. */ 2176 txq->stats.obytes += dlen + vlan; 2177 #endif 2178 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2179 loc->wqe_last = wqe; 2180 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2181 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2182 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2183 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2184 txq->wqe_ci += (ds + 3) / 4; 2185 loc->wqe_free -= (ds + 3) / 4; 2186 return MLX5_TXCMP_CODE_MULTI; 2187 } 2188 2189 /** 2190 * Tx burst function for multi-segment packets. Supports all 2191 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2192 * sends one packet per WQE. Function stops sending if it 2193 * encounters the single-segment packet. 2194 * 2195 * This routine is responsible for storing processed mbuf 2196 * into elts ring buffer and update elts_head. 2197 * 2198 * @param txq 2199 * Pointer to TX queue structure. 2200 * @param[in] pkts 2201 * Packets to transmit. 2202 * @param pkts_n 2203 * Number of packets in array. 2204 * @param loc 2205 * Pointer to burst routine local context. 2206 * @param olx 2207 * Configured Tx offloads mask. It is fully defined at 2208 * compile time and may be used for optimization. 2209 * 2210 * @return 2211 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2212 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2213 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2214 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2215 * Local context variables updated. 2216 */ 2217 static __rte_always_inline enum mlx5_txcmp_code 2218 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2219 struct rte_mbuf **__rte_restrict pkts, 2220 unsigned int pkts_n, 2221 struct mlx5_txq_local *__rte_restrict loc, 2222 unsigned int olx) 2223 { 2224 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2225 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2226 pkts += loc->pkts_sent + 1; 2227 pkts_n -= loc->pkts_sent; 2228 for (;;) { 2229 enum mlx5_txcmp_code ret; 2230 2231 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2232 /* 2233 * Estimate the number of free elts quickly but conservatively. 2234 * Some segment may be fully inlined and freed, 2235 * ignore this here - precise estimation is costly. 2236 */ 2237 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2238 return MLX5_TXCMP_CODE_EXIT; 2239 if (MLX5_TXOFF_CONFIG(TSO) && 2240 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2241 /* Proceed with multi-segment TSO. */ 2242 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2243 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2244 /* Proceed with multi-segment SEND with inlining. */ 2245 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2246 } else { 2247 /* Proceed with multi-segment SEND w/o inlining. */ 2248 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2249 } 2250 if (ret == MLX5_TXCMP_CODE_EXIT) 2251 return MLX5_TXCMP_CODE_EXIT; 2252 if (ret == MLX5_TXCMP_CODE_ERROR) 2253 return MLX5_TXCMP_CODE_ERROR; 2254 /* WQE is built, go to the next packet. */ 2255 ++loc->pkts_sent; 2256 --pkts_n; 2257 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2258 return MLX5_TXCMP_CODE_EXIT; 2259 loc->mbuf = *pkts++; 2260 if (pkts_n > 1) 2261 rte_prefetch0(*pkts); 2262 if (likely(NB_SEGS(loc->mbuf) > 1)) 2263 continue; 2264 /* Here ends the series of multi-segment packets. */ 2265 if (MLX5_TXOFF_CONFIG(TSO) && 2266 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2267 return MLX5_TXCMP_CODE_TSO; 2268 return MLX5_TXCMP_CODE_SINGLE; 2269 } 2270 MLX5_ASSERT(false); 2271 } 2272 2273 /** 2274 * Tx burst function for single-segment packets with TSO. 2275 * Supports all types of Tx offloads, except multi-packets. 2276 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2277 * Function stops sending if it encounters the multi-segment 2278 * packet or packet without TSO requested. 2279 * 2280 * The routine is responsible for storing processed mbuf into elts ring buffer 2281 * and update elts_head if inline offloads is requested due to possible early 2282 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2283 * 2284 * @param txq 2285 * Pointer to TX queue structure. 2286 * @param[in] pkts 2287 * Packets to transmit. 2288 * @param pkts_n 2289 * Number of packets in array. 2290 * @param loc 2291 * Pointer to burst routine local context. 2292 * @param olx 2293 * Configured Tx offloads mask. It is fully defined at 2294 * compile time and may be used for optimization. 2295 * 2296 * @return 2297 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2298 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2299 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2300 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2301 * Local context variables updated. 2302 */ 2303 static __rte_always_inline enum mlx5_txcmp_code 2304 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2305 struct rte_mbuf **__rte_restrict pkts, 2306 unsigned int pkts_n, 2307 struct mlx5_txq_local *__rte_restrict loc, 2308 unsigned int olx) 2309 { 2310 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2311 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2312 pkts += loc->pkts_sent + 1; 2313 pkts_n -= loc->pkts_sent; 2314 for (;;) { 2315 struct mlx5_wqe_dseg *__rte_restrict dseg; 2316 struct mlx5_wqe *__rte_restrict wqe; 2317 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2318 uint8_t *dptr; 2319 2320 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2321 if (MLX5_TXOFF_CONFIG(TXPP)) { 2322 enum mlx5_txcmp_code wret; 2323 2324 /* Generate WAIT for scheduling if requested. */ 2325 wret = mlx5_tx_schedule_send(txq, loc, 1, olx); 2326 if (wret == MLX5_TXCMP_CODE_EXIT) 2327 return MLX5_TXCMP_CODE_EXIT; 2328 if (wret == MLX5_TXCMP_CODE_ERROR) 2329 return MLX5_TXCMP_CODE_ERROR; 2330 } 2331 dlen = rte_pktmbuf_data_len(loc->mbuf); 2332 if (MLX5_TXOFF_CONFIG(VLAN) && 2333 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2334 vlan = sizeof(struct rte_vlan_hdr); 2335 } 2336 /* 2337 * First calculate the WQE size to check 2338 * whether we have enough space in ring buffer. 2339 */ 2340 hlen = loc->mbuf->l2_len + vlan + 2341 loc->mbuf->l3_len + loc->mbuf->l4_len; 2342 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2343 return MLX5_TXCMP_CODE_ERROR; 2344 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2345 hlen += loc->mbuf->outer_l2_len + 2346 loc->mbuf->outer_l3_len; 2347 /* Segment must contain all TSO headers. */ 2348 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2349 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2350 hlen > (dlen + vlan))) 2351 return MLX5_TXCMP_CODE_ERROR; 2352 /* 2353 * Check whether there are enough free WQEBBs: 2354 * - Control Segment 2355 * - Ethernet Segment 2356 * - First Segment of inlined Ethernet data 2357 * - ... data continued ... 2358 * - Finishing Data Segment of pointer type 2359 */ 2360 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2361 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2362 if (loc->wqe_free < ((ds + 3) / 4)) 2363 return MLX5_TXCMP_CODE_EXIT; 2364 #ifdef MLX5_PMD_SOFT_COUNTERS 2365 /* Update sent data bytes/packets counters. */ 2366 ntcp = (dlen + vlan - hlen + 2367 loc->mbuf->tso_segsz - 1) / 2368 loc->mbuf->tso_segsz; 2369 /* 2370 * One will be added for mbuf itself at the end 2371 * of the mlx5_tx_burst from loc->pkts_sent field. 2372 */ 2373 --ntcp; 2374 txq->stats.opackets += ntcp; 2375 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2376 #endif 2377 /* 2378 * Build the TSO WQE: 2379 * - Control Segment 2380 * - Ethernet Segment with hlen bytes inlined 2381 * - Data Segment of pointer type 2382 */ 2383 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2384 loc->wqe_last = wqe; 2385 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_TSO, olx); 2386 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2387 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2388 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2389 dlen -= hlen - vlan; 2390 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2391 /* 2392 * WQE is built, update the loop parameters 2393 * and go to the next packet. 2394 */ 2395 txq->wqe_ci += (ds + 3) / 4; 2396 loc->wqe_free -= (ds + 3) / 4; 2397 if (MLX5_TXOFF_CONFIG(INLINE)) 2398 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2399 --loc->elts_free; 2400 ++loc->pkts_sent; 2401 --pkts_n; 2402 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2403 return MLX5_TXCMP_CODE_EXIT; 2404 loc->mbuf = *pkts++; 2405 if (pkts_n > 1) 2406 rte_prefetch0(*pkts); 2407 if (MLX5_TXOFF_CONFIG(MULTI) && 2408 unlikely(NB_SEGS(loc->mbuf) > 1)) 2409 return MLX5_TXCMP_CODE_MULTI; 2410 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2411 return MLX5_TXCMP_CODE_SINGLE; 2412 /* Continue with the next TSO packet. */ 2413 } 2414 MLX5_ASSERT(false); 2415 } 2416 2417 /** 2418 * Analyze the packet and select the best method to send. 2419 * 2420 * @param txq 2421 * Pointer to TX queue structure. 2422 * @param loc 2423 * Pointer to burst routine local context. 2424 * @param olx 2425 * Configured Tx offloads mask. It is fully defined at 2426 * compile time and may be used for optimization. 2427 * @param newp 2428 * The predefined flag whether do complete check for 2429 * multi-segment packets and TSO. 2430 * 2431 * @return 2432 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2433 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2434 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2435 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2436 */ 2437 static __rte_always_inline enum mlx5_txcmp_code 2438 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2439 struct mlx5_txq_local *__rte_restrict loc, 2440 unsigned int olx, 2441 bool newp) 2442 { 2443 /* Check for multi-segment packet. */ 2444 if (newp && 2445 MLX5_TXOFF_CONFIG(MULTI) && 2446 unlikely(NB_SEGS(loc->mbuf) > 1)) 2447 return MLX5_TXCMP_CODE_MULTI; 2448 /* Check for TSO packet. */ 2449 if (newp && 2450 MLX5_TXOFF_CONFIG(TSO) && 2451 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2452 return MLX5_TXCMP_CODE_TSO; 2453 /* Check if eMPW is enabled at all. */ 2454 if (!MLX5_TXOFF_CONFIG(EMPW)) 2455 return MLX5_TXCMP_CODE_SINGLE; 2456 /* Check if eMPW can be engaged. */ 2457 if (MLX5_TXOFF_CONFIG(VLAN) && 2458 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2459 (!MLX5_TXOFF_CONFIG(INLINE) || 2460 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2461 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2462 /* 2463 * eMPW does not support VLAN insertion offload, we have to 2464 * inline the entire packet but packet is too long for inlining. 2465 */ 2466 return MLX5_TXCMP_CODE_SINGLE; 2467 } 2468 return MLX5_TXCMP_CODE_EMPW; 2469 } 2470 2471 /** 2472 * Check the next packet attributes to match with the eMPW batch ones. 2473 * In addition, for legacy MPW the packet length is checked either. 2474 * 2475 * @param txq 2476 * Pointer to TX queue structure. 2477 * @param es 2478 * Pointer to Ethernet Segment of eMPW batch. 2479 * @param loc 2480 * Pointer to burst routine local context. 2481 * @param dlen 2482 * Length of previous packet in MPW descriptor. 2483 * @param olx 2484 * Configured Tx offloads mask. It is fully defined at 2485 * compile time and may be used for optimization. 2486 * 2487 * @return 2488 * true - packet match with eMPW batch attributes. 2489 * false - no match, eMPW should be restarted. 2490 */ 2491 static __rte_always_inline bool 2492 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2493 struct mlx5_wqe_eseg *__rte_restrict es, 2494 struct mlx5_txq_local *__rte_restrict loc, 2495 uint32_t dlen, 2496 unsigned int olx) 2497 { 2498 uint8_t swp_flags = 0; 2499 2500 /* Compare the checksum flags, if any. */ 2501 if (MLX5_TXOFF_CONFIG(CSUM) && 2502 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2503 return false; 2504 /* Compare the Software Parser offsets and flags. */ 2505 if (MLX5_TXOFF_CONFIG(SWP) && 2506 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2507 es->swp_flags != swp_flags)) 2508 return false; 2509 /* Fill metadata field if needed. */ 2510 if (MLX5_TXOFF_CONFIG(METADATA) && 2511 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2512 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2513 return false; 2514 /* Legacy MPW can send packets with the same length only. */ 2515 if (MLX5_TXOFF_CONFIG(MPW) && 2516 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2517 return false; 2518 /* There must be no VLAN packets in eMPW loop. */ 2519 if (MLX5_TXOFF_CONFIG(VLAN)) 2520 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2521 /* Check if the scheduling is requested. */ 2522 if (MLX5_TXOFF_CONFIG(TXPP) && 2523 loc->mbuf->ol_flags & txq->ts_mask) 2524 return false; 2525 return true; 2526 } 2527 2528 /** 2529 * Update send loop variables and WQE for eMPW loop without data inlining. 2530 * Number of Data Segments is equal to the number of sent packets. 2531 * 2532 * @param txq 2533 * Pointer to TX queue structure. 2534 * @param loc 2535 * Pointer to burst routine local context. 2536 * @param ds 2537 * Number of packets/Data Segments/Packets. 2538 * @param slen 2539 * Accumulated statistics, bytes sent. 2540 * @param olx 2541 * Configured Tx offloads mask. It is fully defined at 2542 * compile time and may be used for optimization. 2543 * 2544 * @return 2545 * true - packet match with eMPW batch attributes. 2546 * false - no match, eMPW should be restarted. 2547 */ 2548 static __rte_always_inline void 2549 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2550 struct mlx5_txq_local *__rte_restrict loc, 2551 unsigned int ds, 2552 unsigned int slen, 2553 unsigned int olx __rte_unused) 2554 { 2555 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2556 #ifdef MLX5_PMD_SOFT_COUNTERS 2557 /* Update sent data bytes counter. */ 2558 txq->stats.obytes += slen; 2559 #else 2560 (void)slen; 2561 #endif 2562 loc->elts_free -= ds; 2563 loc->pkts_sent += ds; 2564 ds += 2; 2565 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2566 txq->wqe_ci += (ds + 3) / 4; 2567 loc->wqe_free -= (ds + 3) / 4; 2568 } 2569 2570 /** 2571 * Update send loop variables and WQE for eMPW loop with data inlining. 2572 * Gets the size of pushed descriptors and data to the WQE. 2573 * 2574 * @param txq 2575 * Pointer to TX queue structure. 2576 * @param loc 2577 * Pointer to burst routine local context. 2578 * @param len 2579 * Total size of descriptor/data in bytes. 2580 * @param slen 2581 * Accumulated statistics, data bytes sent. 2582 * @param wqem 2583 * The base WQE for the eMPW/MPW descriptor. 2584 * @param olx 2585 * Configured Tx offloads mask. It is fully defined at 2586 * compile time and may be used for optimization. 2587 * 2588 * @return 2589 * true - packet match with eMPW batch attributes. 2590 * false - no match, eMPW should be restarted. 2591 */ 2592 static __rte_always_inline void 2593 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2594 struct mlx5_txq_local *__rte_restrict loc, 2595 unsigned int len, 2596 unsigned int slen, 2597 struct mlx5_wqe *__rte_restrict wqem, 2598 unsigned int olx __rte_unused) 2599 { 2600 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2601 2602 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2603 #ifdef MLX5_PMD_SOFT_COUNTERS 2604 /* Update sent data bytes counter. */ 2605 txq->stats.obytes += slen; 2606 #else 2607 (void)slen; 2608 #endif 2609 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2610 /* 2611 * If the legacy MPW session contains the inline packets 2612 * we should set the only inline data segment length 2613 * and align the total length to the segment size. 2614 */ 2615 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2616 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2617 MLX5_ETH_WQE_DATA_INLINE); 2618 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2619 } else { 2620 /* 2621 * The session is not legacy MPW or contains the 2622 * data buffer pointer segments. 2623 */ 2624 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2625 len = len / MLX5_WSEG_SIZE + 2; 2626 } 2627 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2628 txq->wqe_ci += (len + 3) / 4; 2629 loc->wqe_free -= (len + 3) / 4; 2630 loc->wqe_last = wqem; 2631 } 2632 2633 /** 2634 * The set of Tx burst functions for single-segment packets without TSO 2635 * and with Multi-Packet Writing feature support. 2636 * Supports all types of Tx offloads, except multi-packets and TSO. 2637 * 2638 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2639 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2640 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2641 * placed in WQE. 2642 * 2643 * Functions stop sending if it encounters the multi-segment packet or packet 2644 * with TSO requested. 2645 * 2646 * The routines are responsible for storing processed mbuf into elts ring buffer 2647 * and update elts_head if inlining offload is requested. Otherwise the copying 2648 * mbufs to elts can be postponed and completed at the end of burst routine. 2649 * 2650 * @param txq 2651 * Pointer to TX queue structure. 2652 * @param[in] pkts 2653 * Packets to transmit. 2654 * @param pkts_n 2655 * Number of packets in array. 2656 * @param loc 2657 * Pointer to burst routine local context. 2658 * @param olx 2659 * Configured Tx offloads mask. It is fully defined at 2660 * compile time and may be used for optimization. 2661 * 2662 * @return 2663 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2664 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2665 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2666 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2667 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2668 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2669 * 2670 * Local context variables updated. 2671 * 2672 * 2673 * The routine sends packets with MLX5_OPCODE_EMPW 2674 * without inlining, this is dedicated optimized branch. 2675 * No VLAN insertion is supported. 2676 */ 2677 static __rte_always_inline enum mlx5_txcmp_code 2678 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2679 struct rte_mbuf **__rte_restrict pkts, 2680 unsigned int pkts_n, 2681 struct mlx5_txq_local *__rte_restrict loc, 2682 unsigned int olx) 2683 { 2684 /* 2685 * Subroutine is the part of mlx5_tx_burst_single() and sends 2686 * single-segment packet with eMPW opcode without data inlining. 2687 */ 2688 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2689 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2690 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2691 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2692 pkts += loc->pkts_sent + 1; 2693 pkts_n -= loc->pkts_sent; 2694 for (;;) { 2695 struct mlx5_wqe_dseg *__rte_restrict dseg; 2696 struct mlx5_wqe_eseg *__rte_restrict eseg; 2697 enum mlx5_txcmp_code ret; 2698 unsigned int part, loop; 2699 unsigned int slen = 0; 2700 2701 next_empw: 2702 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2703 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2704 MLX5_MPW_MAX_PACKETS : 2705 MLX5_EMPW_MAX_PACKETS); 2706 if (unlikely(loc->elts_free < part)) { 2707 /* We have no enough elts to save all mbufs. */ 2708 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2709 return MLX5_TXCMP_CODE_EXIT; 2710 /* But we still able to send at least minimal eMPW. */ 2711 part = loc->elts_free; 2712 } 2713 if (MLX5_TXOFF_CONFIG(TXPP)) { 2714 enum mlx5_txcmp_code wret; 2715 2716 /* Generate WAIT for scheduling if requested. */ 2717 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2718 if (wret == MLX5_TXCMP_CODE_EXIT) 2719 return MLX5_TXCMP_CODE_EXIT; 2720 if (wret == MLX5_TXCMP_CODE_ERROR) 2721 return MLX5_TXCMP_CODE_ERROR; 2722 } 2723 /* Check whether we have enough WQEs */ 2724 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2725 if (unlikely(loc->wqe_free < 2726 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2727 return MLX5_TXCMP_CODE_EXIT; 2728 part = (loc->wqe_free * 4) - 2; 2729 } 2730 if (likely(part > 1)) 2731 rte_prefetch0(*pkts); 2732 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2733 /* 2734 * Build eMPW title WQEBB: 2735 * - Control Segment, eMPW opcode 2736 * - Ethernet Segment, no inline 2737 */ 2738 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2739 MLX5_OPCODE_ENHANCED_MPSW, olx); 2740 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2741 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2742 eseg = &loc->wqe_last->eseg; 2743 dseg = &loc->wqe_last->dseg[0]; 2744 loop = part; 2745 /* Store the packet length for legacy MPW. */ 2746 if (MLX5_TXOFF_CONFIG(MPW)) 2747 eseg->mss = rte_cpu_to_be_16 2748 (rte_pktmbuf_data_len(loc->mbuf)); 2749 for (;;) { 2750 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2751 #ifdef MLX5_PMD_SOFT_COUNTERS 2752 /* Update sent data bytes counter. */ 2753 slen += dlen; 2754 #endif 2755 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2756 mlx5_tx_dseg_ptr 2757 (txq, loc, dseg, 2758 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2759 dlen, olx); 2760 if (unlikely(--loop == 0)) 2761 break; 2762 loc->mbuf = *pkts++; 2763 if (likely(loop > 1)) 2764 rte_prefetch0(*pkts); 2765 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2766 /* 2767 * Unroll the completion code to avoid 2768 * returning variable value - it results in 2769 * unoptimized sequent checking in caller. 2770 */ 2771 if (ret == MLX5_TXCMP_CODE_MULTI) { 2772 part -= loop; 2773 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2774 if (unlikely(!loc->elts_free || 2775 !loc->wqe_free)) 2776 return MLX5_TXCMP_CODE_EXIT; 2777 return MLX5_TXCMP_CODE_MULTI; 2778 } 2779 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2780 if (ret == MLX5_TXCMP_CODE_TSO) { 2781 part -= loop; 2782 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2783 if (unlikely(!loc->elts_free || 2784 !loc->wqe_free)) 2785 return MLX5_TXCMP_CODE_EXIT; 2786 return MLX5_TXCMP_CODE_TSO; 2787 } 2788 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2789 part -= loop; 2790 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2791 if (unlikely(!loc->elts_free || 2792 !loc->wqe_free)) 2793 return MLX5_TXCMP_CODE_EXIT; 2794 return MLX5_TXCMP_CODE_SINGLE; 2795 } 2796 if (ret != MLX5_TXCMP_CODE_EMPW) { 2797 MLX5_ASSERT(false); 2798 part -= loop; 2799 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2800 return MLX5_TXCMP_CODE_ERROR; 2801 } 2802 /* 2803 * Check whether packet parameters coincide 2804 * within assumed eMPW batch: 2805 * - check sum settings 2806 * - metadata value 2807 * - software parser settings 2808 * - packets length (legacy MPW only) 2809 * - scheduling is not required 2810 */ 2811 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2812 MLX5_ASSERT(loop); 2813 part -= loop; 2814 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2815 if (unlikely(!loc->elts_free || 2816 !loc->wqe_free)) 2817 return MLX5_TXCMP_CODE_EXIT; 2818 pkts_n -= part; 2819 goto next_empw; 2820 } 2821 /* Packet attributes match, continue the same eMPW. */ 2822 ++dseg; 2823 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2824 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2825 } 2826 /* eMPW is built successfully, update loop parameters. */ 2827 MLX5_ASSERT(!loop); 2828 MLX5_ASSERT(pkts_n >= part); 2829 #ifdef MLX5_PMD_SOFT_COUNTERS 2830 /* Update sent data bytes counter. */ 2831 txq->stats.obytes += slen; 2832 #endif 2833 loc->elts_free -= part; 2834 loc->pkts_sent += part; 2835 txq->wqe_ci += (2 + part + 3) / 4; 2836 loc->wqe_free -= (2 + part + 3) / 4; 2837 pkts_n -= part; 2838 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2839 return MLX5_TXCMP_CODE_EXIT; 2840 loc->mbuf = *pkts++; 2841 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2842 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2843 return ret; 2844 /* Continue sending eMPW batches. */ 2845 } 2846 MLX5_ASSERT(false); 2847 } 2848 2849 /** 2850 * The routine sends packets with MLX5_OPCODE_EMPW 2851 * with inlining, optionally supports VLAN insertion. 2852 */ 2853 static __rte_always_inline enum mlx5_txcmp_code 2854 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2855 struct rte_mbuf **__rte_restrict pkts, 2856 unsigned int pkts_n, 2857 struct mlx5_txq_local *__rte_restrict loc, 2858 unsigned int olx) 2859 { 2860 /* 2861 * Subroutine is the part of mlx5_tx_burst_single() and sends 2862 * single-segment packet with eMPW opcode with data inlining. 2863 */ 2864 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2865 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2866 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2867 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2868 pkts += loc->pkts_sent + 1; 2869 pkts_n -= loc->pkts_sent; 2870 for (;;) { 2871 struct mlx5_wqe_dseg *__rte_restrict dseg; 2872 struct mlx5_wqe *__rte_restrict wqem; 2873 enum mlx5_txcmp_code ret; 2874 unsigned int room, part, nlim; 2875 unsigned int slen = 0; 2876 2877 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2878 /* 2879 * Limits the amount of packets in one WQE 2880 * to improve CQE latency generation. 2881 */ 2882 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2883 MLX5_MPW_INLINE_MAX_PACKETS : 2884 MLX5_EMPW_MAX_PACKETS); 2885 if (MLX5_TXOFF_CONFIG(TXPP)) { 2886 enum mlx5_txcmp_code wret; 2887 2888 /* Generate WAIT for scheduling if requested. */ 2889 wret = mlx5_tx_schedule_send(txq, loc, nlim, olx); 2890 if (wret == MLX5_TXCMP_CODE_EXIT) 2891 return MLX5_TXCMP_CODE_EXIT; 2892 if (wret == MLX5_TXCMP_CODE_ERROR) 2893 return MLX5_TXCMP_CODE_ERROR; 2894 } 2895 /* Check whether we have minimal amount WQEs */ 2896 if (unlikely(loc->wqe_free < 2897 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2898 return MLX5_TXCMP_CODE_EXIT; 2899 if (likely(pkts_n > 1)) 2900 rte_prefetch0(*pkts); 2901 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2902 /* 2903 * Build eMPW title WQEBB: 2904 * - Control Segment, eMPW opcode, zero DS 2905 * - Ethernet Segment, no inline 2906 */ 2907 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2908 MLX5_OPCODE_ENHANCED_MPSW, olx); 2909 mlx5_tx_eseg_none(txq, loc, wqem, 2910 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2911 dseg = &wqem->dseg[0]; 2912 /* Store the packet length for legacy MPW. */ 2913 if (MLX5_TXOFF_CONFIG(MPW)) 2914 wqem->eseg.mss = rte_cpu_to_be_16 2915 (rte_pktmbuf_data_len(loc->mbuf)); 2916 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2917 loc->wqe_free) * MLX5_WQE_SIZE - 2918 MLX5_WQE_CSEG_SIZE - 2919 MLX5_WQE_ESEG_SIZE; 2920 /* Limit the room for legacy MPW sessions for performance. */ 2921 if (MLX5_TXOFF_CONFIG(MPW)) 2922 room = RTE_MIN(room, 2923 RTE_MAX(txq->inlen_empw + 2924 sizeof(dseg->bcount) + 2925 (MLX5_TXOFF_CONFIG(VLAN) ? 2926 sizeof(struct rte_vlan_hdr) : 0), 2927 MLX5_MPW_INLINE_MAX_PACKETS * 2928 MLX5_WQE_DSEG_SIZE)); 2929 /* Build WQE till we have space, packets and resources. */ 2930 part = room; 2931 for (;;) { 2932 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2933 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2934 unsigned int tlen; 2935 2936 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2937 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2938 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2939 /* 2940 * Some Tx offloads may cause an error if packet is not 2941 * long enough, check against assumed minimal length. 2942 */ 2943 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2944 part -= room; 2945 if (unlikely(!part)) 2946 return MLX5_TXCMP_CODE_ERROR; 2947 /* 2948 * We have some successfully built 2949 * packet Data Segments to send. 2950 */ 2951 mlx5_tx_idone_empw(txq, loc, part, 2952 slen, wqem, olx); 2953 return MLX5_TXCMP_CODE_ERROR; 2954 } 2955 /* Inline or not inline - that's the Question. */ 2956 if (dlen > txq->inlen_empw || 2957 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2958 goto pointer_empw; 2959 if (MLX5_TXOFF_CONFIG(MPW)) { 2960 if (dlen > txq->inlen_send) 2961 goto pointer_empw; 2962 tlen = dlen; 2963 if (part == room) { 2964 /* Open new inline MPW session. */ 2965 tlen += sizeof(dseg->bcount); 2966 dseg->bcount = RTE_BE32(0); 2967 dseg = RTE_PTR_ADD 2968 (dseg, sizeof(dseg->bcount)); 2969 } else { 2970 /* 2971 * No pointer and inline descriptor 2972 * intermix for legacy MPW sessions. 2973 */ 2974 if (wqem->dseg[0].bcount) 2975 break; 2976 } 2977 } else { 2978 tlen = sizeof(dseg->bcount) + dlen; 2979 } 2980 /* Inline entire packet, optional VLAN insertion. */ 2981 if (MLX5_TXOFF_CONFIG(VLAN) && 2982 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2983 /* 2984 * The packet length must be checked in 2985 * mlx5_tx_able_to_empw() and packet 2986 * fits into inline length guaranteed. 2987 */ 2988 MLX5_ASSERT((dlen + 2989 sizeof(struct rte_vlan_hdr)) <= 2990 txq->inlen_empw); 2991 tlen += sizeof(struct rte_vlan_hdr); 2992 if (room < tlen) 2993 break; 2994 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 2995 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2996 dptr, dlen, olx); 2997 #ifdef MLX5_PMD_SOFT_COUNTERS 2998 /* Update sent data bytes counter. */ 2999 slen += sizeof(struct rte_vlan_hdr); 3000 #endif 3001 } else { 3002 if (room < tlen) 3003 break; 3004 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3005 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 3006 dptr, dlen, olx); 3007 } 3008 if (!MLX5_TXOFF_CONFIG(MPW)) 3009 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 3010 MLX5_ASSERT(room >= tlen); 3011 room -= tlen; 3012 /* 3013 * Packet data are completely inline, 3014 * we can try to free the packet. 3015 */ 3016 if (likely(loc->pkts_sent == loc->mbuf_free)) { 3017 /* 3018 * All the packets from the burst beginning 3019 * are inline, we can free mbufs directly 3020 * from the origin array on tx_burst exit(). 3021 */ 3022 loc->mbuf_free++; 3023 goto next_mbuf; 3024 } 3025 /* 3026 * In order no to call rte_pktmbuf_free_seg() here, 3027 * in the most inner loop (that might be very 3028 * expensive) we just save the mbuf in elts. 3029 */ 3030 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3031 loc->elts_free--; 3032 goto next_mbuf; 3033 pointer_empw: 3034 /* 3035 * No pointer and inline descriptor 3036 * intermix for legacy MPW sessions. 3037 */ 3038 if (MLX5_TXOFF_CONFIG(MPW) && 3039 part != room && 3040 wqem->dseg[0].bcount == RTE_BE32(0)) 3041 break; 3042 /* 3043 * Not inlinable VLAN packets are 3044 * proceeded outside of this routine. 3045 */ 3046 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 3047 if (MLX5_TXOFF_CONFIG(VLAN)) 3048 MLX5_ASSERT(!(loc->mbuf->ol_flags & 3049 RTE_MBUF_F_TX_VLAN)); 3050 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3051 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3052 /* We have to store mbuf in elts.*/ 3053 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3054 loc->elts_free--; 3055 room -= MLX5_WQE_DSEG_SIZE; 3056 /* Ring buffer wraparound is checked at the loop end.*/ 3057 ++dseg; 3058 next_mbuf: 3059 #ifdef MLX5_PMD_SOFT_COUNTERS 3060 /* Update sent data bytes counter. */ 3061 slen += dlen; 3062 #endif 3063 loc->pkts_sent++; 3064 pkts_n--; 3065 if (unlikely(!pkts_n || !loc->elts_free)) { 3066 /* 3067 * We have no resources/packets to 3068 * continue build descriptors. 3069 */ 3070 part -= room; 3071 mlx5_tx_idone_empw(txq, loc, part, 3072 slen, wqem, olx); 3073 return MLX5_TXCMP_CODE_EXIT; 3074 } 3075 loc->mbuf = *pkts++; 3076 if (likely(pkts_n > 1)) 3077 rte_prefetch0(*pkts); 3078 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3079 /* 3080 * Unroll the completion code to avoid 3081 * returning variable value - it results in 3082 * unoptimized sequent checking in caller. 3083 */ 3084 if (ret == MLX5_TXCMP_CODE_MULTI) { 3085 part -= room; 3086 mlx5_tx_idone_empw(txq, loc, part, 3087 slen, wqem, olx); 3088 if (unlikely(!loc->elts_free || 3089 !loc->wqe_free)) 3090 return MLX5_TXCMP_CODE_EXIT; 3091 return MLX5_TXCMP_CODE_MULTI; 3092 } 3093 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3094 if (ret == MLX5_TXCMP_CODE_TSO) { 3095 part -= room; 3096 mlx5_tx_idone_empw(txq, loc, part, 3097 slen, wqem, olx); 3098 if (unlikely(!loc->elts_free || 3099 !loc->wqe_free)) 3100 return MLX5_TXCMP_CODE_EXIT; 3101 return MLX5_TXCMP_CODE_TSO; 3102 } 3103 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3104 part -= room; 3105 mlx5_tx_idone_empw(txq, loc, part, 3106 slen, wqem, olx); 3107 if (unlikely(!loc->elts_free || 3108 !loc->wqe_free)) 3109 return MLX5_TXCMP_CODE_EXIT; 3110 return MLX5_TXCMP_CODE_SINGLE; 3111 } 3112 if (ret != MLX5_TXCMP_CODE_EMPW) { 3113 MLX5_ASSERT(false); 3114 part -= room; 3115 mlx5_tx_idone_empw(txq, loc, part, 3116 slen, wqem, olx); 3117 return MLX5_TXCMP_CODE_ERROR; 3118 } 3119 /* Check if we have minimal room left. */ 3120 nlim--; 3121 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 3122 break; 3123 /* 3124 * Check whether packet parameters coincide 3125 * within assumed eMPW batch: 3126 * - check sum settings 3127 * - metadata value 3128 * - software parser settings 3129 * - packets length (legacy MPW only) 3130 * - scheduling is not required 3131 */ 3132 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 3133 loc, dlen, olx)) 3134 break; 3135 /* Packet attributes match, continue the same eMPW. */ 3136 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3137 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3138 } 3139 /* 3140 * We get here to close an existing eMPW 3141 * session and start the new one. 3142 */ 3143 MLX5_ASSERT(pkts_n); 3144 part -= room; 3145 if (unlikely(!part)) 3146 return MLX5_TXCMP_CODE_EXIT; 3147 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 3148 if (unlikely(!loc->elts_free || 3149 !loc->wqe_free)) 3150 return MLX5_TXCMP_CODE_EXIT; 3151 /* Continue the loop with new eMPW session. */ 3152 } 3153 MLX5_ASSERT(false); 3154 } 3155 3156 /** 3157 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3158 * Data inlining and VLAN insertion are supported. 3159 */ 3160 static __rte_always_inline enum mlx5_txcmp_code 3161 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3162 struct rte_mbuf **__rte_restrict pkts, 3163 unsigned int pkts_n, 3164 struct mlx5_txq_local *__rte_restrict loc, 3165 unsigned int olx) 3166 { 3167 /* 3168 * Subroutine is the part of mlx5_tx_burst_single() 3169 * and sends single-segment packet with SEND opcode. 3170 */ 3171 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3172 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3173 pkts += loc->pkts_sent + 1; 3174 pkts_n -= loc->pkts_sent; 3175 for (;;) { 3176 struct mlx5_wqe *__rte_restrict wqe; 3177 enum mlx5_txcmp_code ret; 3178 3179 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3180 MLX5_ASSERT(loc->elts_free); 3181 if (MLX5_TXOFF_CONFIG(TXPP)) { 3182 enum mlx5_txcmp_code wret; 3183 3184 /* Generate WAIT for scheduling if requested. */ 3185 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 3186 if (wret == MLX5_TXCMP_CODE_EXIT) 3187 return MLX5_TXCMP_CODE_EXIT; 3188 if (wret == MLX5_TXCMP_CODE_ERROR) 3189 return MLX5_TXCMP_CODE_ERROR; 3190 } 3191 if (MLX5_TXOFF_CONFIG(INLINE)) { 3192 unsigned int inlen, vlan = 0; 3193 3194 inlen = rte_pktmbuf_data_len(loc->mbuf); 3195 if (MLX5_TXOFF_CONFIG(VLAN) && 3196 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3197 vlan = sizeof(struct rte_vlan_hdr); 3198 inlen += vlan; 3199 } 3200 /* 3201 * If inlining is enabled at configuration time 3202 * the limit must be not less than minimal size. 3203 * Otherwise we would do extra check for data 3204 * size to avoid crashes due to length overflow. 3205 */ 3206 MLX5_ASSERT(txq->inlen_send >= 3207 MLX5_ESEG_MIN_INLINE_SIZE); 3208 if (inlen <= txq->inlen_send) { 3209 unsigned int seg_n, wqe_n; 3210 3211 rte_prefetch0(rte_pktmbuf_mtod 3212 (loc->mbuf, uint8_t *)); 3213 /* Check against minimal length. */ 3214 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3215 return MLX5_TXCMP_CODE_ERROR; 3216 if (loc->mbuf->ol_flags & 3217 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3218 /* 3219 * The hint flag not to inline packet 3220 * data is set. Check whether we can 3221 * follow the hint. 3222 */ 3223 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3224 txq->inlen_mode) || 3225 (MLX5_TXOFF_CONFIG(MPW) && 3226 txq->inlen_mode)) { 3227 if (inlen <= txq->inlen_send) 3228 goto single_inline; 3229 /* 3230 * The hardware requires the 3231 * minimal inline data header. 3232 */ 3233 goto single_min_inline; 3234 } 3235 if (MLX5_TXOFF_CONFIG(VLAN) && 3236 vlan && !txq->vlan_en) { 3237 /* 3238 * We must insert VLAN tag 3239 * by software means. 3240 */ 3241 goto single_part_inline; 3242 } 3243 goto single_no_inline; 3244 } 3245 single_inline: 3246 /* 3247 * Completely inlined packet data WQE: 3248 * - Control Segment, SEND opcode 3249 * - Ethernet Segment, no VLAN insertion 3250 * - Data inlined, VLAN optionally inserted 3251 * - Alignment to MLX5_WSEG_SIZE 3252 * Have to estimate amount of WQEBBs 3253 */ 3254 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3255 MLX5_ESEG_MIN_INLINE_SIZE + 3256 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3257 /* Check if there are enough WQEBBs. */ 3258 wqe_n = (seg_n + 3) / 4; 3259 if (wqe_n > loc->wqe_free) 3260 return MLX5_TXCMP_CODE_EXIT; 3261 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3262 loc->wqe_last = wqe; 3263 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3264 MLX5_OPCODE_SEND, olx); 3265 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3266 mlx5_tx_eseg_data(txq, loc, wqe, 3267 vlan, inlen, 0, olx); 3268 txq->wqe_ci += wqe_n; 3269 loc->wqe_free -= wqe_n; 3270 /* 3271 * Packet data are completely inlined, 3272 * free the packet immediately. 3273 */ 3274 rte_pktmbuf_free_seg(loc->mbuf); 3275 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3276 MLX5_TXOFF_CONFIG(MPW)) && 3277 txq->inlen_mode) { 3278 /* 3279 * If minimal inlining is requested the eMPW 3280 * feature should be disabled due to data is 3281 * inlined into Ethernet Segment, which can 3282 * not contain inlined data for eMPW due to 3283 * segment shared for all packets. 3284 */ 3285 struct mlx5_wqe_dseg *__rte_restrict dseg; 3286 unsigned int ds; 3287 uint8_t *dptr; 3288 3289 /* 3290 * The inline-mode settings require 3291 * to inline the specified amount of 3292 * data bytes to the Ethernet Segment. 3293 * We should check the free space in 3294 * WQE ring buffer to inline partially. 3295 */ 3296 single_min_inline: 3297 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3298 MLX5_ASSERT(inlen > txq->inlen_mode); 3299 MLX5_ASSERT(txq->inlen_mode >= 3300 MLX5_ESEG_MIN_INLINE_SIZE); 3301 /* 3302 * Check whether there are enough free WQEBBs: 3303 * - Control Segment 3304 * - Ethernet Segment 3305 * - First Segment of inlined Ethernet data 3306 * - ... data continued ... 3307 * - Finishing Data Segment of pointer type 3308 */ 3309 ds = (MLX5_WQE_CSEG_SIZE + 3310 MLX5_WQE_ESEG_SIZE + 3311 MLX5_WQE_DSEG_SIZE + 3312 txq->inlen_mode - 3313 MLX5_ESEG_MIN_INLINE_SIZE + 3314 MLX5_WQE_DSEG_SIZE + 3315 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3316 if (loc->wqe_free < ((ds + 3) / 4)) 3317 return MLX5_TXCMP_CODE_EXIT; 3318 /* 3319 * Build the ordinary SEND WQE: 3320 * - Control Segment 3321 * - Ethernet Segment, inline inlen_mode bytes 3322 * - Data Segment of pointer type 3323 */ 3324 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3325 loc->wqe_last = wqe; 3326 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3327 MLX5_OPCODE_SEND, olx); 3328 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3329 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3330 txq->inlen_mode, 3331 0, olx); 3332 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3333 txq->inlen_mode - vlan; 3334 inlen -= txq->inlen_mode; 3335 mlx5_tx_dseg_ptr(txq, loc, dseg, 3336 dptr, inlen, olx); 3337 /* 3338 * WQE is built, update the loop parameters 3339 * and got to the next packet. 3340 */ 3341 txq->wqe_ci += (ds + 3) / 4; 3342 loc->wqe_free -= (ds + 3) / 4; 3343 /* We have to store mbuf in elts.*/ 3344 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3345 txq->elts[txq->elts_head++ & txq->elts_m] = 3346 loc->mbuf; 3347 --loc->elts_free; 3348 } else { 3349 uint8_t *dptr; 3350 unsigned int dlen; 3351 3352 /* 3353 * Partially inlined packet data WQE, we have 3354 * some space in title WQEBB, we can fill it 3355 * with some packet data. It takes one WQEBB, 3356 * it is available, no extra space check: 3357 * - Control Segment, SEND opcode 3358 * - Ethernet Segment, no VLAN insertion 3359 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3360 * - Data Segment, pointer type 3361 * 3362 * We also get here if VLAN insertion is not 3363 * supported by HW, the inline is enabled. 3364 */ 3365 single_part_inline: 3366 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3367 loc->wqe_last = wqe; 3368 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3369 MLX5_OPCODE_SEND, olx); 3370 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3371 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3372 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3373 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3374 /* 3375 * The length check is performed above, by 3376 * comparing with txq->inlen_send. We should 3377 * not get overflow here. 3378 */ 3379 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3380 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3381 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3382 dptr, dlen, olx); 3383 ++txq->wqe_ci; 3384 --loc->wqe_free; 3385 /* We have to store mbuf in elts.*/ 3386 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3387 txq->elts[txq->elts_head++ & txq->elts_m] = 3388 loc->mbuf; 3389 --loc->elts_free; 3390 } 3391 #ifdef MLX5_PMD_SOFT_COUNTERS 3392 /* Update sent data bytes counter. */ 3393 txq->stats.obytes += vlan + 3394 rte_pktmbuf_data_len(loc->mbuf); 3395 #endif 3396 } else { 3397 /* 3398 * No inline at all, it means the CPU cycles saving 3399 * is prioritized at configuration, we should not 3400 * copy any packet data to WQE. 3401 * 3402 * SEND WQE, one WQEBB: 3403 * - Control Segment, SEND opcode 3404 * - Ethernet Segment, optional VLAN, no inline 3405 * - Data Segment, pointer type 3406 */ 3407 single_no_inline: 3408 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3409 loc->wqe_last = wqe; 3410 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3411 MLX5_OPCODE_SEND, olx); 3412 rte_pmd_mlx5_trace_tx_push(loc->mbuf, txq->wqe_ci); 3413 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3414 mlx5_tx_dseg_ptr 3415 (txq, loc, &wqe->dseg[0], 3416 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3417 rte_pktmbuf_data_len(loc->mbuf), olx); 3418 ++txq->wqe_ci; 3419 --loc->wqe_free; 3420 /* 3421 * We should not store mbuf pointer in elts 3422 * if no inlining is configured, this is done 3423 * by calling routine in a batch copy. 3424 */ 3425 if (MLX5_TXOFF_CONFIG(INLINE)) 3426 txq->elts[txq->elts_head++ & txq->elts_m] = 3427 loc->mbuf; 3428 --loc->elts_free; 3429 #ifdef MLX5_PMD_SOFT_COUNTERS 3430 /* Update sent data bytes counter. */ 3431 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3432 if (MLX5_TXOFF_CONFIG(VLAN) && 3433 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3434 txq->stats.obytes += 3435 sizeof(struct rte_vlan_hdr); 3436 #endif 3437 } 3438 ++loc->pkts_sent; 3439 --pkts_n; 3440 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3441 return MLX5_TXCMP_CODE_EXIT; 3442 loc->mbuf = *pkts++; 3443 if (pkts_n > 1) 3444 rte_prefetch0(*pkts); 3445 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3446 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3447 return ret; 3448 } 3449 MLX5_ASSERT(false); 3450 } 3451 3452 static __rte_always_inline enum mlx5_txcmp_code 3453 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3454 struct rte_mbuf **__rte_restrict pkts, 3455 unsigned int pkts_n, 3456 struct mlx5_txq_local *__rte_restrict loc, 3457 unsigned int olx) 3458 { 3459 enum mlx5_txcmp_code ret; 3460 3461 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3462 if (ret == MLX5_TXCMP_CODE_SINGLE) 3463 goto ordinary_send; 3464 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3465 for (;;) { 3466 /* Optimize for inline/no inline eMPW send. */ 3467 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3468 mlx5_tx_burst_empw_inline 3469 (txq, pkts, pkts_n, loc, olx) : 3470 mlx5_tx_burst_empw_simple 3471 (txq, pkts, pkts_n, loc, olx); 3472 if (ret != MLX5_TXCMP_CODE_SINGLE) 3473 return ret; 3474 /* The resources to send one packet should remain. */ 3475 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3476 ordinary_send: 3477 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3478 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3479 if (ret != MLX5_TXCMP_CODE_EMPW) 3480 return ret; 3481 /* The resources to send one packet should remain. */ 3482 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3483 } 3484 } 3485 3486 /** 3487 * DPDK Tx callback template. This is configured template used to generate 3488 * routines optimized for specified offload setup. 3489 * One of this generated functions is chosen at SQ configuration time. 3490 * 3491 * @param txq 3492 * Generic pointer to TX queue structure. 3493 * @param[in] pkts 3494 * Packets to transmit. 3495 * @param pkts_n 3496 * Number of packets in array. 3497 * @param olx 3498 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3499 * values. Should be static to take compile time static configuration 3500 * advantages. 3501 * 3502 * @return 3503 * Number of packets successfully transmitted (<= pkts_n). 3504 */ 3505 static __rte_always_inline uint16_t 3506 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3507 struct rte_mbuf **__rte_restrict pkts, 3508 uint16_t pkts_n, 3509 unsigned int olx) 3510 { 3511 struct mlx5_txq_local loc; 3512 enum mlx5_txcmp_code ret; 3513 unsigned int part; 3514 3515 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3516 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3517 if (unlikely(!pkts_n)) 3518 return 0; 3519 if (MLX5_TXOFF_CONFIG(INLINE)) 3520 loc.mbuf_free = 0; 3521 loc.pkts_sent = 0; 3522 loc.pkts_copy = 0; 3523 loc.wqe_last = NULL; 3524 3525 send_loop: 3526 loc.pkts_loop = loc.pkts_sent; 3527 /* 3528 * Check if there are some CQEs, if any: 3529 * - process an encountered errors 3530 * - process the completed WQEs 3531 * - free related mbufs 3532 * - doorbell the NIC about processed CQEs 3533 */ 3534 rte_prefetch0(*(pkts + loc.pkts_sent)); 3535 mlx5_tx_handle_completion(txq, olx); 3536 /* 3537 * Calculate the number of available resources - elts and WQEs. 3538 * There are two possible different scenarios: 3539 * - no data inlining into WQEs, one WQEBB may contains up to 3540 * four packets, in this case elts become scarce resource 3541 * - data inlining into WQEs, one packet may require multiple 3542 * WQEBBs, the WQEs become the limiting factor. 3543 */ 3544 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3545 loc.elts_free = txq->elts_s - 3546 (uint16_t)(txq->elts_head - txq->elts_tail); 3547 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3548 loc.wqe_free = txq->wqe_s - 3549 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3550 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3551 goto burst_exit; 3552 for (;;) { 3553 /* 3554 * Fetch the packet from array. Usually this is the first 3555 * packet in series of multi/single segment packets. 3556 */ 3557 loc.mbuf = *(pkts + loc.pkts_sent); 3558 /* Dedicated branch for multi-segment packets. */ 3559 if (MLX5_TXOFF_CONFIG(MULTI) && 3560 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3561 /* 3562 * Multi-segment packet encountered. 3563 * Hardware is able to process it only 3564 * with SEND/TSO opcodes, one packet 3565 * per WQE, do it in dedicated routine. 3566 */ 3567 enter_send_multi: 3568 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3569 part = loc.pkts_sent - loc.pkts_copy; 3570 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3571 /* 3572 * There are some single-segment mbufs not 3573 * stored in elts. The mbufs must be in the 3574 * same order as WQEs, so we must copy the 3575 * mbufs to elts here, before the coming 3576 * multi-segment packet mbufs is appended. 3577 */ 3578 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3579 part, olx); 3580 loc.pkts_copy = loc.pkts_sent; 3581 } 3582 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3583 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3584 if (!MLX5_TXOFF_CONFIG(INLINE)) 3585 loc.pkts_copy = loc.pkts_sent; 3586 /* 3587 * These returned code checks are supposed 3588 * to be optimized out due to routine inlining. 3589 */ 3590 if (ret == MLX5_TXCMP_CODE_EXIT) { 3591 /* 3592 * The routine returns this code when 3593 * all packets are sent or there is no 3594 * enough resources to complete request. 3595 */ 3596 break; 3597 } 3598 if (ret == MLX5_TXCMP_CODE_ERROR) { 3599 /* 3600 * The routine returns this code when some error 3601 * in the incoming packets format occurred. 3602 */ 3603 txq->stats.oerrors++; 3604 break; 3605 } 3606 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3607 /* 3608 * The single-segment packet was encountered 3609 * in the array, try to send it with the 3610 * best optimized way, possible engaging eMPW. 3611 */ 3612 goto enter_send_single; 3613 } 3614 if (MLX5_TXOFF_CONFIG(TSO) && 3615 ret == MLX5_TXCMP_CODE_TSO) { 3616 /* 3617 * The single-segment TSO packet was 3618 * encountered in the array. 3619 */ 3620 goto enter_send_tso; 3621 } 3622 /* We must not get here. Something is going wrong. */ 3623 MLX5_ASSERT(false); 3624 txq->stats.oerrors++; 3625 break; 3626 } 3627 /* Dedicated branch for single-segment TSO packets. */ 3628 if (MLX5_TXOFF_CONFIG(TSO) && 3629 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3630 /* 3631 * TSO might require special way for inlining 3632 * (dedicated parameters) and is sent with 3633 * MLX5_OPCODE_TSO opcode only, provide this 3634 * in dedicated branch. 3635 */ 3636 enter_send_tso: 3637 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3638 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3639 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3640 /* 3641 * These returned code checks are supposed 3642 * to be optimized out due to routine inlining. 3643 */ 3644 if (ret == MLX5_TXCMP_CODE_EXIT) 3645 break; 3646 if (ret == MLX5_TXCMP_CODE_ERROR) { 3647 txq->stats.oerrors++; 3648 break; 3649 } 3650 if (ret == MLX5_TXCMP_CODE_SINGLE) 3651 goto enter_send_single; 3652 if (MLX5_TXOFF_CONFIG(MULTI) && 3653 ret == MLX5_TXCMP_CODE_MULTI) { 3654 /* 3655 * The multi-segment packet was 3656 * encountered in the array. 3657 */ 3658 goto enter_send_multi; 3659 } 3660 /* We must not get here. Something is going wrong. */ 3661 MLX5_ASSERT(false); 3662 txq->stats.oerrors++; 3663 break; 3664 } 3665 /* 3666 * The dedicated branch for the single-segment packets 3667 * without TSO. Often these ones can be sent using 3668 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3669 * The routine builds the WQEs till it encounters 3670 * the TSO or multi-segment packet (in case if these 3671 * offloads are requested at SQ configuration time). 3672 */ 3673 enter_send_single: 3674 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3675 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3676 /* 3677 * These returned code checks are supposed 3678 * to be optimized out due to routine inlining. 3679 */ 3680 if (ret == MLX5_TXCMP_CODE_EXIT) 3681 break; 3682 if (ret == MLX5_TXCMP_CODE_ERROR) { 3683 txq->stats.oerrors++; 3684 break; 3685 } 3686 if (MLX5_TXOFF_CONFIG(MULTI) && 3687 ret == MLX5_TXCMP_CODE_MULTI) { 3688 /* 3689 * The multi-segment packet was 3690 * encountered in the array. 3691 */ 3692 goto enter_send_multi; 3693 } 3694 if (MLX5_TXOFF_CONFIG(TSO) && 3695 ret == MLX5_TXCMP_CODE_TSO) { 3696 /* 3697 * The single-segment TSO packet was 3698 * encountered in the array. 3699 */ 3700 goto enter_send_tso; 3701 } 3702 /* We must not get here. Something is going wrong. */ 3703 MLX5_ASSERT(false); 3704 txq->stats.oerrors++; 3705 break; 3706 } 3707 /* 3708 * Main Tx loop is completed, do the rest: 3709 * - set completion request if thresholds are reached 3710 * - doorbell the hardware 3711 * - copy the rest of mbufs to elts (if any) 3712 */ 3713 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3714 loc.pkts_sent >= loc.pkts_copy); 3715 /* Take a shortcut if nothing is sent. */ 3716 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3717 goto burst_exit; 3718 /* Request CQE generation if limits are reached. */ 3719 if (MLX5_TXOFF_CONFIG(TXPP) && __rte_trace_point_fp_is_enabled()) 3720 mlx5_tx_request_completion_trace(txq, &loc, olx); 3721 else 3722 mlx5_tx_request_completion(txq, &loc, olx); 3723 /* 3724 * Ring QP doorbell immediately after WQE building completion 3725 * to improve latencies. The pure software related data treatment 3726 * can be completed after doorbell. Tx CQEs for this SQ are 3727 * processed in this thread only by the polling. 3728 * 3729 * The rdma core library can map doorbell register in two ways, 3730 * depending on the environment variable "MLX5_SHUT_UP_BF": 3731 * 3732 * - as regular cached memory, the variable is either missing or 3733 * set to zero. This type of mapping may cause the significant 3734 * doorbell register writing latency and requires explicit memory 3735 * write barrier to mitigate this issue and prevent write combining. 3736 * 3737 * - as non-cached memory, the variable is present and set to not "0" 3738 * value. This type of mapping may cause performance impact under 3739 * heavy loading conditions but the explicit write memory barrier is 3740 * not required and it may improve core performance. 3741 * 3742 * - the legacy behaviour (prior 19.08 release) was to use some 3743 * heuristics to decide whether write memory barrier should 3744 * be performed. This behavior is supported with specifying 3745 * tx_db_nc=2, write barrier is skipped if application provides 3746 * the full recommended burst of packets, it supposes the next 3747 * packets are coming and the write barrier will be issued on 3748 * the next burst (after descriptor writing, at least). 3749 */ 3750 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3751 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3752 txq->qp_db, !txq->db_nc && 3753 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3754 /* Not all of the mbufs may be stored into elts yet. */ 3755 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3756 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3757 /* 3758 * There are some single-segment mbufs not stored in elts. 3759 * It can be only if the last packet was single-segment. 3760 * The copying is gathered into one place due to it is 3761 * a good opportunity to optimize that with SIMD. 3762 * Unfortunately if inlining is enabled the gaps in pointer 3763 * array may happen due to early freeing of the inlined mbufs. 3764 */ 3765 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3766 loc.pkts_copy = loc.pkts_sent; 3767 } 3768 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3769 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3770 if (pkts_n > loc.pkts_sent) { 3771 /* 3772 * If burst size is large there might be no enough CQE 3773 * fetched from completion queue and no enough resources 3774 * freed to send all the packets. 3775 */ 3776 goto send_loop; 3777 } 3778 burst_exit: 3779 #ifdef MLX5_PMD_SOFT_COUNTERS 3780 /* Increment sent packets counter. */ 3781 txq->stats.opackets += loc.pkts_sent; 3782 #endif 3783 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3784 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3785 /* Trace productive bursts only. */ 3786 if (__rte_trace_point_fp_is_enabled() && loc.pkts_sent) 3787 rte_pmd_mlx5_trace_tx_exit(loc.pkts_sent, pkts_n); 3788 return loc.pkts_sent; 3789 } 3790 3791 #endif /* RTE_PMD_MLX5_TX_H_ */ 3792