1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common_mr.h> 18 19 #include "mlx5.h" 20 #include "mlx5_autoconf.h" 21 #include "mlx5_mr.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define PKT_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 142 uint16_t inlen_send; /* Ordinary send data inline size. */ 143 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 144 uint16_t inlen_mode; /* Minimal data length to inline. */ 145 uint32_t qp_num_8s; /* QP number shifted by 8. */ 146 uint64_t offloads; /* Offloads for Tx Queue. */ 147 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 148 struct mlx5_wqe *wqes; /* Work queue. */ 149 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 150 #ifdef RTE_LIBRTE_MLX5_DEBUG 151 uint32_t *fcqs; /* Free completion queue (debug extended). */ 152 #else 153 uint16_t *fcqs; /* Free completion queue. */ 154 #endif 155 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 156 volatile uint32_t *qp_db; /* Work queue doorbell. */ 157 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 158 uint16_t port_id; /* Port ID of device. */ 159 uint16_t idx; /* Queue index. */ 160 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 161 int32_t ts_offset; /* Timestamp field dynamic offset. */ 162 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 163 struct mlx5_txq_stats stats; /* TX queue counters. */ 164 #ifndef RTE_ARCH_64 165 rte_spinlock_t *uar_lock; 166 /* UAR access lock required for 32bit implementations */ 167 #endif 168 struct rte_mbuf *elts[0]; 169 /* Storage for queued packets, must be the last field. */ 170 } __rte_cache_aligned; 171 172 enum mlx5_txq_type { 173 MLX5_TXQ_TYPE_STANDARD, /* Standard Tx queue. */ 174 MLX5_TXQ_TYPE_HAIRPIN, /* Hairpin Tx queue. */ 175 }; 176 177 /* TX queue control descriptor. */ 178 struct mlx5_txq_ctrl { 179 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 180 uint32_t refcnt; /* Reference counter. */ 181 unsigned int socket; /* CPU socket ID for allocations. */ 182 enum mlx5_txq_type type; /* The txq ctrl type. */ 183 unsigned int max_inline_data; /* Max inline data. */ 184 unsigned int max_tso_header; /* Max TSO header size. */ 185 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 186 struct mlx5_priv *priv; /* Back pointer to private data. */ 187 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 188 void *bf_reg; /* BlueFlame register from Verbs. */ 189 uint16_t dump_file_n; /* Number of dump files. */ 190 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 191 uint32_t hairpin_status; /* Hairpin binding status. */ 192 struct mlx5_txq_data txq; /* Data path structure. */ 193 /* Must be the last field in the structure, contains elts[]. */ 194 }; 195 196 /* mlx5_txq.c */ 197 198 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 199 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 200 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 201 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 202 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 203 unsigned int socket, const struct rte_eth_txconf *conf); 204 int mlx5_tx_hairpin_queue_setup 205 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 206 const struct rte_eth_hairpin_conf *hairpin_conf); 207 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 208 void txq_uar_init(struct mlx5_txq_ctrl *txq_ctrl); 209 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 210 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 211 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 212 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 213 uint16_t desc, unsigned int socket, 214 const struct rte_eth_txconf *conf); 215 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 216 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 217 const struct rte_eth_hairpin_conf *hairpin_conf); 218 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 219 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 220 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 221 int mlx5_txq_verify(struct rte_eth_dev *dev); 222 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 223 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 224 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 225 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 226 227 /* mlx5_tx.c */ 228 229 uint16_t removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 230 uint16_t pkts_n); 231 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 232 unsigned int olx __rte_unused); 233 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 234 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 235 struct rte_eth_txq_info *qinfo); 236 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 237 struct rte_eth_burst_mode *mode); 238 239 /* mlx5_mr.c */ 240 241 uint32_t mlx5_tx_mb2mr_bh(struct mlx5_txq_data *txq, struct rte_mbuf *mb); 242 uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr, 243 struct rte_mempool *mp); 244 245 /* mlx5_tx_empw.c */ 246 247 MLX5_TXOFF_PRE_DECL(full_empw); 248 MLX5_TXOFF_PRE_DECL(none_empw); 249 MLX5_TXOFF_PRE_DECL(md_empw); 250 MLX5_TXOFF_PRE_DECL(mt_empw); 251 MLX5_TXOFF_PRE_DECL(mtsc_empw); 252 MLX5_TXOFF_PRE_DECL(mti_empw); 253 MLX5_TXOFF_PRE_DECL(mtv_empw); 254 MLX5_TXOFF_PRE_DECL(mtiv_empw); 255 MLX5_TXOFF_PRE_DECL(sc_empw); 256 MLX5_TXOFF_PRE_DECL(sci_empw); 257 MLX5_TXOFF_PRE_DECL(scv_empw); 258 MLX5_TXOFF_PRE_DECL(sciv_empw); 259 MLX5_TXOFF_PRE_DECL(i_empw); 260 MLX5_TXOFF_PRE_DECL(v_empw); 261 MLX5_TXOFF_PRE_DECL(iv_empw); 262 263 /* mlx5_tx_nompw.c */ 264 265 MLX5_TXOFF_PRE_DECL(full); 266 MLX5_TXOFF_PRE_DECL(none); 267 MLX5_TXOFF_PRE_DECL(md); 268 MLX5_TXOFF_PRE_DECL(mt); 269 MLX5_TXOFF_PRE_DECL(mtsc); 270 MLX5_TXOFF_PRE_DECL(mti); 271 MLX5_TXOFF_PRE_DECL(mtv); 272 MLX5_TXOFF_PRE_DECL(mtiv); 273 MLX5_TXOFF_PRE_DECL(sc); 274 MLX5_TXOFF_PRE_DECL(sci); 275 MLX5_TXOFF_PRE_DECL(scv); 276 MLX5_TXOFF_PRE_DECL(sciv); 277 MLX5_TXOFF_PRE_DECL(i); 278 MLX5_TXOFF_PRE_DECL(v); 279 MLX5_TXOFF_PRE_DECL(iv); 280 281 /* mlx5_tx_txpp.c */ 282 283 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 284 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 285 MLX5_TXOFF_PRE_DECL(full_ts); 286 MLX5_TXOFF_PRE_DECL(full_ts_noi); 287 MLX5_TXOFF_PRE_DECL(none_ts); 288 MLX5_TXOFF_PRE_DECL(mdi_ts); 289 MLX5_TXOFF_PRE_DECL(mti_ts); 290 MLX5_TXOFF_PRE_DECL(mtiv_ts); 291 292 /* mlx5_tx_mpw.c */ 293 294 MLX5_TXOFF_PRE_DECL(none_mpw); 295 MLX5_TXOFF_PRE_DECL(mci_mpw); 296 MLX5_TXOFF_PRE_DECL(mc_mpw); 297 MLX5_TXOFF_PRE_DECL(i_mpw); 298 299 static __rte_always_inline uint64_t * 300 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 301 { 302 return MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 303 } 304 305 /** 306 * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and 307 * 64bit architectures. 308 * 309 * @param val 310 * value to write in CPU endian format. 311 * @param addr 312 * Address to write to. 313 * @param lock 314 * Address of the lock to use for that UAR access. 315 */ 316 static __rte_always_inline void 317 __mlx5_uar_write64_relaxed(uint64_t val, void *addr, 318 rte_spinlock_t *lock __rte_unused) 319 { 320 #ifdef RTE_ARCH_64 321 *(uint64_t *)addr = val; 322 #else /* !RTE_ARCH_64 */ 323 rte_spinlock_lock(lock); 324 *(uint32_t *)addr = val; 325 rte_io_wmb(); 326 *((uint32_t *)addr + 1) = val >> 32; 327 rte_spinlock_unlock(lock); 328 #endif 329 } 330 331 /** 332 * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and 333 * 64bit architectures while guaranteeing the order of execution with the 334 * code being executed. 335 * 336 * @param val 337 * value to write in CPU endian format. 338 * @param addr 339 * Address to write to. 340 * @param lock 341 * Address of the lock to use for that UAR access. 342 */ 343 static __rte_always_inline void 344 __mlx5_uar_write64(uint64_t val, void *addr, rte_spinlock_t *lock) 345 { 346 rte_io_wmb(); 347 __mlx5_uar_write64_relaxed(val, addr, lock); 348 } 349 350 /* Assist macros, used instead of directly calling the functions they wrap. */ 351 #ifdef RTE_ARCH_64 352 #define mlx5_uar_write64_relaxed(val, dst, lock) \ 353 __mlx5_uar_write64_relaxed(val, dst, NULL) 354 #define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, NULL) 355 #else 356 #define mlx5_uar_write64_relaxed(val, dst, lock) \ 357 __mlx5_uar_write64_relaxed(val, dst, lock) 358 #define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock) 359 #endif 360 361 /** 362 * Query LKey from a packet buffer for Tx. If not found, add the mempool. 363 * 364 * @param txq 365 * Pointer to Tx queue structure. 366 * @param addr 367 * Address to search. 368 * 369 * @return 370 * Searched LKey on success, UINT32_MAX on no match. 371 */ 372 static __rte_always_inline uint32_t 373 mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) 374 { 375 struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl; 376 uintptr_t addr = (uintptr_t)mb->buf_addr; 377 uint32_t lkey; 378 379 /* Check generation bit to see if there's any change on existing MRs. */ 380 if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen)) 381 mlx5_mr_flush_local_cache(mr_ctrl); 382 /* Linear search on MR cache array. */ 383 lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru, 384 MLX5_MR_CACHE_N, addr); 385 if (likely(lkey != UINT32_MAX)) 386 return lkey; 387 /* Take slower bottom-half on miss. */ 388 return mlx5_tx_mb2mr_bh(txq, mb); 389 } 390 391 /** 392 * Ring TX queue doorbell and flush the update if requested. 393 * 394 * @param txq 395 * Pointer to TX queue structure. 396 * @param wqe 397 * Pointer to the last WQE posted in the NIC. 398 * @param cond 399 * Request for write memory barrier after BlueFlame update. 400 */ 401 static __rte_always_inline void 402 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe, 403 int cond) 404 { 405 uint64_t *dst = mlx5_tx_bfreg(txq); 406 volatile uint64_t *src = ((volatile uint64_t *)wqe); 407 408 rte_io_wmb(); 409 *txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci); 410 /* Ensure ordering between DB record and BF copy. */ 411 rte_wmb(); 412 mlx5_uar_write64_relaxed(*src, dst, txq->uar_lock); 413 if (cond) 414 rte_wmb(); 415 } 416 417 /** 418 * Ring TX queue doorbell and flush the update by write memory barrier. 419 * 420 * @param txq 421 * Pointer to TX queue structure. 422 * @param wqe 423 * Pointer to the last WQE posted in the NIC. 424 */ 425 static __rte_always_inline void 426 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 427 { 428 mlx5_tx_dbrec_cond_wmb(txq, wqe, 1); 429 } 430 431 /** 432 * Convert timestamp from mbuf format to linear counter 433 * of Clock Queue completions (24 bits). 434 * 435 * @param sh 436 * Pointer to the device shared context to fetch Tx 437 * packet pacing timestamp and parameters. 438 * @param ts 439 * Timestamp from mbuf to convert. 440 * @return 441 * positive or zero value - completion ID to wait. 442 * negative value - conversion error. 443 */ 444 static __rte_always_inline int32_t 445 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 446 { 447 uint64_t ts, ci; 448 uint32_t tick; 449 450 do { 451 /* 452 * Read atomically two uint64_t fields and compare lsb bits. 453 * It there is no match - the timestamp was updated in 454 * the service thread, data should be re-read. 455 */ 456 rte_compiler_barrier(); 457 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 458 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 459 rte_compiler_barrier(); 460 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 461 break; 462 } while (true); 463 /* Perform the skew correction, positive value to send earlier. */ 464 mts -= sh->txpp.skew; 465 mts -= ts; 466 if (unlikely(mts >= UINT64_MAX / 2)) { 467 /* We have negative integer, mts is in the past. */ 468 __atomic_fetch_add(&sh->txpp.err_ts_past, 469 1, __ATOMIC_RELAXED); 470 return -1; 471 } 472 tick = sh->txpp.tick; 473 MLX5_ASSERT(tick); 474 /* Convert delta to completions, round up. */ 475 mts = (mts + tick - 1) / tick; 476 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 477 /* We have mts is too distant future. */ 478 __atomic_fetch_add(&sh->txpp.err_ts_future, 479 1, __ATOMIC_RELAXED); 480 return -1; 481 } 482 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 483 ci += mts; 484 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 485 return ci; 486 } 487 488 /** 489 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 490 * Flags must be preliminary initialized to zero. 491 * 492 * @param loc 493 * Pointer to burst routine local context. 494 * @param swp_flags 495 * Pointer to store Software Parser flags. 496 * @param olx 497 * Configured Tx offloads mask. It is fully defined at 498 * compile time and may be used for optimization. 499 * 500 * @return 501 * Software Parser offsets packed in dword. 502 * Software Parser flags are set by pointer. 503 */ 504 static __rte_always_inline uint32_t 505 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 506 uint8_t *swp_flags, 507 unsigned int olx) 508 { 509 uint64_t ol, tunnel; 510 unsigned int idx, off; 511 uint32_t set; 512 513 if (!MLX5_TXOFF_CONFIG(SWP)) 514 return 0; 515 ol = loc->mbuf->ol_flags; 516 tunnel = ol & PKT_TX_TUNNEL_MASK; 517 /* 518 * Check whether Software Parser is required. 519 * Only customized tunnels may ask for. 520 */ 521 if (likely(tunnel != PKT_TX_TUNNEL_UDP && tunnel != PKT_TX_TUNNEL_IP)) 522 return 0; 523 /* 524 * The index should have: 525 * bit[0:1] = PKT_TX_L4_MASK 526 * bit[4] = PKT_TX_IPV6 527 * bit[8] = PKT_TX_OUTER_IPV6 528 * bit[9] = PKT_TX_OUTER_UDP 529 */ 530 idx = (ol & (PKT_TX_L4_MASK | PKT_TX_IPV6 | PKT_TX_OUTER_IPV6)) >> 52; 531 idx |= (tunnel == PKT_TX_TUNNEL_UDP) ? (1 << 9) : 0; 532 *swp_flags = mlx5_swp_types_table[idx]; 533 /* 534 * Set offsets for SW parser. Since ConnectX-5, SW parser just 535 * complements HW parser. SW parser starts to engage only if HW parser 536 * can't reach a header. For the older devices, HW parser will not kick 537 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 538 * should be set regardless of HW offload. 539 */ 540 off = loc->mbuf->outer_l2_len; 541 if (MLX5_TXOFF_CONFIG(VLAN) && ol & PKT_TX_VLAN_PKT) 542 off += sizeof(struct rte_vlan_hdr); 543 set = (off >> 1) << 8; /* Outer L3 offset. */ 544 off += loc->mbuf->outer_l3_len; 545 if (tunnel == PKT_TX_TUNNEL_UDP) 546 set |= off >> 1; /* Outer L4 offset. */ 547 if (ol & (PKT_TX_IPV4 | PKT_TX_IPV6)) { /* Inner IP. */ 548 const uint64_t csum = ol & PKT_TX_L4_MASK; 549 off += loc->mbuf->l2_len; 550 set |= (off >> 1) << 24; /* Inner L3 offset. */ 551 if (csum == PKT_TX_TCP_CKSUM || 552 csum == PKT_TX_UDP_CKSUM || 553 (MLX5_TXOFF_CONFIG(TSO) && ol & PKT_TX_TCP_SEG)) { 554 off += loc->mbuf->l3_len; 555 set |= (off >> 1) << 16; /* Inner L4 offset. */ 556 } 557 } 558 set = rte_cpu_to_le_32(set); 559 return set; 560 } 561 562 /** 563 * Convert the Checksum offloads to Verbs. 564 * 565 * @param buf 566 * Pointer to the mbuf. 567 * 568 * @return 569 * Converted checksum flags. 570 */ 571 static __rte_always_inline uint8_t 572 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 573 { 574 uint32_t idx; 575 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 576 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 577 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 578 579 /* 580 * The index should have: 581 * bit[0] = PKT_TX_TCP_SEG 582 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 583 * bit[4] = PKT_TX_IP_CKSUM 584 * bit[8] = PKT_TX_OUTER_IP_CKSUM 585 * bit[9] = tunnel 586 */ 587 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 588 return mlx5_cksum_table[idx]; 589 } 590 591 /** 592 * Free the mbufs from the linear array of pointers. 593 * 594 * @param txq 595 * Pointer to Tx queue structure. 596 * @param pkts 597 * Pointer to array of packets to be free. 598 * @param pkts_n 599 * Number of packets to be freed. 600 * @param olx 601 * Configured Tx offloads mask. It is fully defined at 602 * compile time and may be used for optimization. 603 */ 604 static __rte_always_inline void 605 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 606 struct rte_mbuf **__rte_restrict pkts, 607 unsigned int pkts_n, 608 unsigned int olx __rte_unused) 609 { 610 struct rte_mempool *pool = NULL; 611 struct rte_mbuf **p_free = NULL; 612 struct rte_mbuf *mbuf; 613 unsigned int n_free = 0; 614 615 /* 616 * The implemented algorithm eliminates 617 * copying pointers to temporary array 618 * for rte_mempool_put_bulk() calls. 619 */ 620 MLX5_ASSERT(pkts); 621 MLX5_ASSERT(pkts_n); 622 /* 623 * Free mbufs directly to the pool in bulk 624 * if fast free offload is engaged 625 */ 626 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 627 mbuf = *pkts; 628 pool = mbuf->pool; 629 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 630 return; 631 } 632 for (;;) { 633 for (;;) { 634 /* 635 * Decrement mbuf reference counter, detach 636 * indirect and external buffers if needed. 637 */ 638 mbuf = rte_pktmbuf_prefree_seg(*pkts); 639 if (likely(mbuf != NULL)) { 640 MLX5_ASSERT(mbuf == *pkts); 641 if (likely(n_free != 0)) { 642 if (unlikely(pool != mbuf->pool)) 643 /* From different pool. */ 644 break; 645 } else { 646 /* Start new scan array. */ 647 pool = mbuf->pool; 648 p_free = pkts; 649 } 650 ++n_free; 651 ++pkts; 652 --pkts_n; 653 if (unlikely(pkts_n == 0)) { 654 mbuf = NULL; 655 break; 656 } 657 } else { 658 /* 659 * This happens if mbuf is still referenced. 660 * We can't put it back to the pool, skip. 661 */ 662 ++pkts; 663 --pkts_n; 664 if (unlikely(n_free != 0)) 665 /* There is some array to free.*/ 666 break; 667 if (unlikely(pkts_n == 0)) 668 /* Last mbuf, nothing to free. */ 669 return; 670 } 671 } 672 for (;;) { 673 /* 674 * This loop is implemented to avoid multiple 675 * inlining of rte_mempool_put_bulk(). 676 */ 677 MLX5_ASSERT(pool); 678 MLX5_ASSERT(p_free); 679 MLX5_ASSERT(n_free); 680 /* 681 * Free the array of pre-freed mbufs 682 * belonging to the same memory pool. 683 */ 684 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 685 if (unlikely(mbuf != NULL)) { 686 /* There is the request to start new scan. */ 687 pool = mbuf->pool; 688 p_free = pkts++; 689 n_free = 1; 690 --pkts_n; 691 if (likely(pkts_n != 0)) 692 break; 693 /* 694 * This is the last mbuf to be freed. 695 * Do one more loop iteration to complete. 696 * This is rare case of the last unique mbuf. 697 */ 698 mbuf = NULL; 699 continue; 700 } 701 if (likely(pkts_n == 0)) 702 return; 703 n_free = 0; 704 break; 705 } 706 } 707 } 708 709 /** 710 * No inline version to free buffers for optimal call 711 * on the tx_burst completion. 712 */ 713 static __rte_noinline void 714 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 715 struct rte_mbuf **__rte_restrict pkts, 716 unsigned int pkts_n, 717 unsigned int olx __rte_unused) 718 { 719 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 720 } 721 722 /** 723 * Free the mbuf from the elts ring buffer till new tail. 724 * 725 * @param txq 726 * Pointer to Tx queue structure. 727 * @param tail 728 * Index in elts to free up to, becomes new elts tail. 729 * @param olx 730 * Configured Tx offloads mask. It is fully defined at 731 * compile time and may be used for optimization. 732 */ 733 static __rte_always_inline void 734 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 735 uint16_t tail, 736 unsigned int olx __rte_unused) 737 { 738 uint16_t n_elts = tail - txq->elts_tail; 739 740 MLX5_ASSERT(n_elts); 741 MLX5_ASSERT(n_elts <= txq->elts_s); 742 /* 743 * Implement a loop to support ring buffer wraparound 744 * with single inlining of mlx5_tx_free_mbuf(). 745 */ 746 do { 747 unsigned int part; 748 749 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 750 part = RTE_MIN(part, n_elts); 751 MLX5_ASSERT(part); 752 MLX5_ASSERT(part <= txq->elts_s); 753 mlx5_tx_free_mbuf(txq, 754 &txq->elts[txq->elts_tail & txq->elts_m], 755 part, olx); 756 txq->elts_tail += part; 757 n_elts -= part; 758 } while (n_elts); 759 } 760 761 /** 762 * Store the mbuf being sent into elts ring buffer. 763 * On Tx completion these mbufs will be freed. 764 * 765 * @param txq 766 * Pointer to Tx queue structure. 767 * @param pkts 768 * Pointer to array of packets to be stored. 769 * @param pkts_n 770 * Number of packets to be stored. 771 * @param olx 772 * Configured Tx offloads mask. It is fully defined at 773 * compile time and may be used for optimization. 774 */ 775 static __rte_always_inline void 776 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 777 struct rte_mbuf **__rte_restrict pkts, 778 unsigned int pkts_n, 779 unsigned int olx __rte_unused) 780 { 781 unsigned int part; 782 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 783 784 MLX5_ASSERT(pkts); 785 MLX5_ASSERT(pkts_n); 786 part = txq->elts_s - (txq->elts_head & txq->elts_m); 787 MLX5_ASSERT(part); 788 MLX5_ASSERT(part <= txq->elts_s); 789 /* This code is a good candidate for vectorizing with SIMD. */ 790 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 791 (void *)pkts, 792 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 793 txq->elts_head += pkts_n; 794 if (unlikely(part < pkts_n)) 795 /* The copy is wrapping around the elts array. */ 796 rte_memcpy((void *)elts, (void *)(pkts + part), 797 (pkts_n - part) * sizeof(struct rte_mbuf *)); 798 } 799 800 /** 801 * Check if the completion request flag should be set in the last WQE. 802 * Both pushed mbufs and WQEs are monitored and the completion request 803 * flag is set if any of thresholds is reached. 804 * 805 * @param txq 806 * Pointer to TX queue structure. 807 * @param loc 808 * Pointer to burst routine local context. 809 * @param olx 810 * Configured Tx offloads mask. It is fully defined at 811 * compile time and may be used for optimization. 812 */ 813 static __rte_always_inline void 814 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 815 struct mlx5_txq_local *__rte_restrict loc, 816 unsigned int olx) 817 { 818 uint16_t head = txq->elts_head; 819 unsigned int part; 820 821 part = MLX5_TXOFF_CONFIG(INLINE) ? 822 0 : loc->pkts_sent - loc->pkts_copy; 823 head += part; 824 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 825 (MLX5_TXOFF_CONFIG(INLINE) && 826 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 827 volatile struct mlx5_wqe *last = loc->wqe_last; 828 829 MLX5_ASSERT(last); 830 txq->elts_comp = head; 831 if (MLX5_TXOFF_CONFIG(INLINE)) 832 txq->wqe_comp = txq->wqe_ci; 833 /* Request unconditional completion on last WQE. */ 834 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 835 MLX5_COMP_MODE_OFFSET); 836 /* Save elts_head in dedicated free on completion queue. */ 837 #ifdef RTE_LIBRTE_MLX5_DEBUG 838 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 839 (last->cseg.opcode >> 8) << 16; 840 #else 841 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 842 #endif 843 /* A CQE slot must always be available. */ 844 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 845 } 846 } 847 848 /** 849 * Build the Control Segment with specified opcode: 850 * - MLX5_OPCODE_SEND 851 * - MLX5_OPCODE_ENHANCED_MPSW 852 * - MLX5_OPCODE_TSO 853 * 854 * @param txq 855 * Pointer to TX queue structure. 856 * @param loc 857 * Pointer to burst routine local context. 858 * @param wqe 859 * Pointer to WQE to fill with built Control Segment. 860 * @param ds 861 * Supposed length of WQE in segments. 862 * @param opcode 863 * SQ WQE opcode to put into Control Segment. 864 * @param olx 865 * Configured Tx offloads mask. It is fully defined at 866 * compile time and may be used for optimization. 867 */ 868 static __rte_always_inline void 869 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 870 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 871 struct mlx5_wqe *__rte_restrict wqe, 872 unsigned int ds, 873 unsigned int opcode, 874 unsigned int olx __rte_unused) 875 { 876 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 877 878 /* For legacy MPW replace the EMPW by TSO with modifier. */ 879 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 880 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 881 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 882 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 883 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 884 MLX5_COMP_MODE_OFFSET); 885 cs->misc = RTE_BE32(0); 886 } 887 888 /** 889 * Build the Synchronize Queue Segment with specified completion index. 890 * 891 * @param txq 892 * Pointer to TX queue structure. 893 * @param loc 894 * Pointer to burst routine local context. 895 * @param wqe 896 * Pointer to WQE to fill with built Control Segment. 897 * @param wci 898 * Completion index in Clock Queue to wait. 899 * @param olx 900 * Configured Tx offloads mask. It is fully defined at 901 * compile time and may be used for optimization. 902 */ 903 static __rte_always_inline void 904 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 905 struct mlx5_txq_local *restrict loc __rte_unused, 906 struct mlx5_wqe *restrict wqe, 907 unsigned int wci, 908 unsigned int olx __rte_unused) 909 { 910 struct mlx5_wqe_qseg *qs; 911 912 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 913 qs->max_index = rte_cpu_to_be_32(wci); 914 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 915 qs->reserved0 = RTE_BE32(0); 916 qs->reserved1 = RTE_BE32(0); 917 } 918 919 /** 920 * Build the Ethernet Segment without inlined data. 921 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 922 * 923 * @param txq 924 * Pointer to TX queue structure. 925 * @param loc 926 * Pointer to burst routine local context. 927 * @param wqe 928 * Pointer to WQE to fill with built Ethernet Segment. 929 * @param olx 930 * Configured Tx offloads mask. It is fully defined at 931 * compile time and may be used for optimization. 932 */ 933 static __rte_always_inline void 934 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 935 struct mlx5_txq_local *__rte_restrict loc, 936 struct mlx5_wqe *__rte_restrict wqe, 937 unsigned int olx) 938 { 939 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 940 uint32_t csum; 941 942 /* 943 * Calculate and set check sum flags first, dword field 944 * in segment may be shared with Software Parser flags. 945 */ 946 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 947 es->flags = rte_cpu_to_le_32(csum); 948 /* 949 * Calculate and set Software Parser offsets and flags. 950 * These flags a set for custom UDP and IP tunnel packets. 951 */ 952 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 953 /* Fill metadata field if needed. */ 954 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 955 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 956 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 957 0 : 0; 958 /* Engage VLAN tag insertion feature if requested. */ 959 if (MLX5_TXOFF_CONFIG(VLAN) && 960 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 961 /* 962 * We should get here only if device support 963 * this feature correctly. 964 */ 965 MLX5_ASSERT(txq->vlan_en); 966 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 967 loc->mbuf->vlan_tci); 968 } else { 969 es->inline_hdr = RTE_BE32(0); 970 } 971 } 972 973 /** 974 * Build the Ethernet Segment with minimal inlined data 975 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 976 * used to fill the gap in single WQEBB WQEs. 977 * Supports Software Parser, Checksums and VLAN 978 * insertion Tx offload features. 979 * 980 * @param txq 981 * Pointer to TX queue structure. 982 * @param loc 983 * Pointer to burst routine local context. 984 * @param wqe 985 * Pointer to WQE to fill with built Ethernet Segment. 986 * @param vlan 987 * Length of VLAN tag insertion if any. 988 * @param olx 989 * Configured Tx offloads mask. It is fully defined at 990 * compile time and may be used for optimization. 991 */ 992 static __rte_always_inline void 993 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 994 struct mlx5_txq_local *__rte_restrict loc, 995 struct mlx5_wqe *__rte_restrict wqe, 996 unsigned int vlan, 997 unsigned int olx) 998 { 999 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1000 uint32_t csum; 1001 uint8_t *psrc, *pdst; 1002 1003 /* 1004 * Calculate and set check sum flags first, dword field 1005 * in segment may be shared with Software Parser flags. 1006 */ 1007 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1008 es->flags = rte_cpu_to_le_32(csum); 1009 /* 1010 * Calculate and set Software Parser offsets and flags. 1011 * These flags a set for custom UDP and IP tunnel packets. 1012 */ 1013 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1014 /* Fill metadata field if needed. */ 1015 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1016 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 1017 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1018 0 : 0; 1019 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1020 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 1021 es->inline_data = *(unaligned_uint16_t *)psrc; 1022 psrc += sizeof(uint16_t); 1023 pdst = (uint8_t *)(es + 1); 1024 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1025 /* Implement VLAN tag insertion as part inline data. */ 1026 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1027 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1028 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1029 /* Insert VLAN ethertype + VLAN tag. */ 1030 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1031 ((RTE_ETHER_TYPE_VLAN << 16) | 1032 loc->mbuf->vlan_tci); 1033 pdst += sizeof(struct rte_vlan_hdr); 1034 /* Copy the rest two bytes from packet data. */ 1035 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1036 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1037 } else { 1038 /* Fill the gap in the title WQEBB with inline data. */ 1039 rte_mov16(pdst, psrc); 1040 } 1041 } 1042 1043 /** 1044 * Build the Ethernet Segment with entire packet data inlining. Checks the 1045 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 1046 * Checksums and VLAN insertion Tx offload features. 1047 * 1048 * @param txq 1049 * Pointer to TX queue structure. 1050 * @param loc 1051 * Pointer to burst routine local context. 1052 * @param wqe 1053 * Pointer to WQE to fill with built Ethernet Segment. 1054 * @param vlan 1055 * Length of VLAN tag insertion if any. 1056 * @param inlen 1057 * Length of data to inline (VLAN included, if any). 1058 * @param tso 1059 * TSO flag, set mss field from the packet. 1060 * @param olx 1061 * Configured Tx offloads mask. It is fully defined at 1062 * compile time and may be used for optimization. 1063 * 1064 * @return 1065 * Pointer to the next Data Segment (aligned and wrapped around). 1066 */ 1067 static __rte_always_inline struct mlx5_wqe_dseg * 1068 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 1069 struct mlx5_txq_local *__rte_restrict loc, 1070 struct mlx5_wqe *__rte_restrict wqe, 1071 unsigned int vlan, 1072 unsigned int inlen, 1073 unsigned int tso, 1074 unsigned int olx) 1075 { 1076 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1077 uint32_t csum; 1078 uint8_t *psrc, *pdst; 1079 unsigned int part; 1080 1081 /* 1082 * Calculate and set check sum flags first, dword field 1083 * in segment may be shared with Software Parser flags. 1084 */ 1085 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1086 if (tso) { 1087 csum <<= 24; 1088 csum |= loc->mbuf->tso_segsz; 1089 es->flags = rte_cpu_to_be_32(csum); 1090 } else { 1091 es->flags = rte_cpu_to_le_32(csum); 1092 } 1093 /* 1094 * Calculate and set Software Parser offsets and flags. 1095 * These flags a set for custom UDP and IP tunnel packets. 1096 */ 1097 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1098 /* Fill metadata field if needed. */ 1099 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1100 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 1101 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1102 0 : 0; 1103 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1104 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 1105 es->inline_data = *(unaligned_uint16_t *)psrc; 1106 psrc += sizeof(uint16_t); 1107 pdst = (uint8_t *)(es + 1); 1108 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1109 /* Implement VLAN tag insertion as part inline data. */ 1110 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1111 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1112 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1113 /* Insert VLAN ethertype + VLAN tag. */ 1114 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1115 ((RTE_ETHER_TYPE_VLAN << 16) | 1116 loc->mbuf->vlan_tci); 1117 pdst += sizeof(struct rte_vlan_hdr); 1118 /* Copy the rest two bytes from packet data. */ 1119 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1120 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1121 psrc += sizeof(uint16_t); 1122 } else { 1123 /* Fill the gap in the title WQEBB with inline data. */ 1124 rte_mov16(pdst, psrc); 1125 psrc += sizeof(rte_v128u32_t); 1126 } 1127 pdst = (uint8_t *)(es + 2); 1128 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1129 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1130 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1131 if (!inlen) { 1132 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1133 return (struct mlx5_wqe_dseg *)pdst; 1134 } 1135 /* 1136 * The WQEBB space availability is checked by caller. 1137 * Here we should be aware of WQE ring buffer wraparound only. 1138 */ 1139 part = (uint8_t *)txq->wqes_end - pdst; 1140 part = RTE_MIN(part, inlen); 1141 do { 1142 rte_memcpy(pdst, psrc, part); 1143 inlen -= part; 1144 if (likely(!inlen)) { 1145 /* 1146 * If return value is not used by the caller 1147 * the code below will be optimized out. 1148 */ 1149 pdst += part; 1150 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1151 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1152 pdst = (uint8_t *)txq->wqes; 1153 return (struct mlx5_wqe_dseg *)pdst; 1154 } 1155 pdst = (uint8_t *)txq->wqes; 1156 psrc += part; 1157 part = inlen; 1158 } while (true); 1159 } 1160 1161 /** 1162 * Copy data from chain of mbuf to the specified linear buffer. 1163 * Checksums and VLAN insertion Tx offload features. If data 1164 * from some mbuf copied completely this mbuf is freed. Local 1165 * structure is used to keep the byte stream state. 1166 * 1167 * @param pdst 1168 * Pointer to the destination linear buffer. 1169 * @param loc 1170 * Pointer to burst routine local context. 1171 * @param len 1172 * Length of data to be copied. 1173 * @param must 1174 * Length of data to be copied ignoring no inline hint. 1175 * @param olx 1176 * Configured Tx offloads mask. It is fully defined at 1177 * compile time and may be used for optimization. 1178 * 1179 * @return 1180 * Number of actual copied data bytes. This is always greater than or 1181 * equal to must parameter and might be lesser than len in no inline 1182 * hint flag is encountered. 1183 */ 1184 static __rte_always_inline unsigned int 1185 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1186 struct mlx5_txq_local *__rte_restrict loc, 1187 unsigned int len, 1188 unsigned int must, 1189 unsigned int olx __rte_unused) 1190 { 1191 struct rte_mbuf *mbuf; 1192 unsigned int part, dlen, copy = 0; 1193 uint8_t *psrc; 1194 1195 MLX5_ASSERT(len); 1196 MLX5_ASSERT(must <= len); 1197 do { 1198 /* Allow zero length packets, must check first. */ 1199 dlen = rte_pktmbuf_data_len(loc->mbuf); 1200 if (dlen <= loc->mbuf_off) { 1201 /* Exhausted packet, just free. */ 1202 mbuf = loc->mbuf; 1203 loc->mbuf = mbuf->next; 1204 rte_pktmbuf_free_seg(mbuf); 1205 loc->mbuf_off = 0; 1206 MLX5_ASSERT(loc->mbuf_nseg > 1); 1207 MLX5_ASSERT(loc->mbuf); 1208 --loc->mbuf_nseg; 1209 if (loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 1210 unsigned int diff; 1211 1212 if (copy >= must) { 1213 /* 1214 * We already copied the minimal 1215 * requested amount of data. 1216 */ 1217 return copy; 1218 } 1219 diff = must - copy; 1220 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1221 /* 1222 * Copy only the minimal required 1223 * part of the data buffer. 1224 */ 1225 len = diff; 1226 } 1227 } 1228 continue; 1229 } 1230 dlen -= loc->mbuf_off; 1231 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1232 loc->mbuf_off); 1233 part = RTE_MIN(len, dlen); 1234 rte_memcpy(pdst, psrc, part); 1235 copy += part; 1236 loc->mbuf_off += part; 1237 len -= part; 1238 if (!len) { 1239 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1240 loc->mbuf_off = 0; 1241 /* Exhausted packet, just free. */ 1242 mbuf = loc->mbuf; 1243 loc->mbuf = mbuf->next; 1244 rte_pktmbuf_free_seg(mbuf); 1245 loc->mbuf_off = 0; 1246 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1247 --loc->mbuf_nseg; 1248 } 1249 return copy; 1250 } 1251 pdst += part; 1252 } while (true); 1253 } 1254 1255 /** 1256 * Build the Ethernet Segment with inlined data from multi-segment packet. 1257 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1258 * Parser, Checksums and VLAN insertion Tx offload features. 1259 * 1260 * @param txq 1261 * Pointer to TX queue structure. 1262 * @param loc 1263 * Pointer to burst routine local context. 1264 * @param wqe 1265 * Pointer to WQE to fill with built Ethernet Segment. 1266 * @param vlan 1267 * Length of VLAN tag insertion if any. 1268 * @param inlen 1269 * Length of data to inline (VLAN included, if any). 1270 * @param tso 1271 * TSO flag, set mss field from the packet. 1272 * @param olx 1273 * Configured Tx offloads mask. It is fully defined at 1274 * compile time and may be used for optimization. 1275 * 1276 * @return 1277 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1278 * around - caller should do wrapping check on its own). 1279 */ 1280 static __rte_always_inline struct mlx5_wqe_dseg * 1281 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1282 struct mlx5_txq_local *__rte_restrict loc, 1283 struct mlx5_wqe *__rte_restrict wqe, 1284 unsigned int vlan, 1285 unsigned int inlen, 1286 unsigned int tso, 1287 unsigned int olx) 1288 { 1289 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1290 uint32_t csum; 1291 uint8_t *pdst; 1292 unsigned int part, tlen = 0; 1293 1294 /* 1295 * Calculate and set check sum flags first, uint32_t field 1296 * in segment may be shared with Software Parser flags. 1297 */ 1298 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1299 if (tso) { 1300 csum <<= 24; 1301 csum |= loc->mbuf->tso_segsz; 1302 es->flags = rte_cpu_to_be_32(csum); 1303 } else { 1304 es->flags = rte_cpu_to_le_32(csum); 1305 } 1306 /* 1307 * Calculate and set Software Parser offsets and flags. 1308 * These flags a set for custom UDP and IP tunnel packets. 1309 */ 1310 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1311 /* Fill metadata field if needed. */ 1312 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1313 loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 1314 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1315 0 : 0; 1316 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1317 pdst = (uint8_t *)&es->inline_data; 1318 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1319 /* Implement VLAN tag insertion as part inline data. */ 1320 mlx5_tx_mseg_memcpy(pdst, loc, 1321 2 * RTE_ETHER_ADDR_LEN, 1322 2 * RTE_ETHER_ADDR_LEN, olx); 1323 pdst += 2 * RTE_ETHER_ADDR_LEN; 1324 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1325 ((RTE_ETHER_TYPE_VLAN << 16) | 1326 loc->mbuf->vlan_tci); 1327 pdst += sizeof(struct rte_vlan_hdr); 1328 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1329 } 1330 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1331 /* 1332 * The WQEBB space availability is checked by caller. 1333 * Here we should be aware of WQE ring buffer wraparound only. 1334 */ 1335 part = (uint8_t *)txq->wqes_end - pdst; 1336 part = RTE_MIN(part, inlen - tlen); 1337 MLX5_ASSERT(part); 1338 do { 1339 unsigned int copy; 1340 1341 /* 1342 * Copying may be interrupted inside the routine 1343 * if run into no inline hint flag. 1344 */ 1345 copy = tso ? inlen : txq->inlen_mode; 1346 copy = tlen >= copy ? 0 : (copy - tlen); 1347 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1348 tlen += copy; 1349 if (likely(inlen <= tlen) || copy < part) { 1350 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1351 pdst += copy; 1352 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1353 return (struct mlx5_wqe_dseg *)pdst; 1354 } 1355 pdst = (uint8_t *)txq->wqes; 1356 part = inlen - tlen; 1357 } while (true); 1358 } 1359 1360 /** 1361 * Build the Data Segment of pointer type. 1362 * 1363 * @param txq 1364 * Pointer to TX queue structure. 1365 * @param loc 1366 * Pointer to burst routine local context. 1367 * @param dseg 1368 * Pointer to WQE to fill with built Data Segment. 1369 * @param buf 1370 * Data buffer to point. 1371 * @param len 1372 * Data buffer length. 1373 * @param olx 1374 * Configured Tx offloads mask. It is fully defined at 1375 * compile time and may be used for optimization. 1376 */ 1377 static __rte_always_inline void 1378 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1379 struct mlx5_txq_local *__rte_restrict loc, 1380 struct mlx5_wqe_dseg *__rte_restrict dseg, 1381 uint8_t *buf, 1382 unsigned int len, 1383 unsigned int olx __rte_unused) 1384 1385 { 1386 MLX5_ASSERT(len); 1387 dseg->bcount = rte_cpu_to_be_32(len); 1388 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 1389 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1390 } 1391 1392 /** 1393 * Build the Data Segment of pointer type or inline if data length is less than 1394 * buffer in minimal Data Segment size. 1395 * 1396 * @param txq 1397 * Pointer to TX queue structure. 1398 * @param loc 1399 * Pointer to burst routine local context. 1400 * @param dseg 1401 * Pointer to WQE to fill with built Data Segment. 1402 * @param buf 1403 * Data buffer to point. 1404 * @param len 1405 * Data buffer length. 1406 * @param olx 1407 * Configured Tx offloads mask. It is fully defined at 1408 * compile time and may be used for optimization. 1409 */ 1410 static __rte_always_inline void 1411 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1412 struct mlx5_txq_local *__rte_restrict loc, 1413 struct mlx5_wqe_dseg *__rte_restrict dseg, 1414 uint8_t *buf, 1415 unsigned int len, 1416 unsigned int olx __rte_unused) 1417 1418 { 1419 uintptr_t dst, src; 1420 1421 MLX5_ASSERT(len); 1422 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1423 dseg->bcount = rte_cpu_to_be_32(len); 1424 dseg->lkey = mlx5_tx_mb2mr(txq, loc->mbuf); 1425 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1426 1427 return; 1428 } 1429 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1430 /* Unrolled implementation of generic rte_memcpy. */ 1431 dst = (uintptr_t)&dseg->inline_data[0]; 1432 src = (uintptr_t)buf; 1433 if (len & 0x08) { 1434 #ifdef RTE_ARCH_STRICT_ALIGN 1435 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1436 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1437 dst += sizeof(uint32_t); 1438 src += sizeof(uint32_t); 1439 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1440 dst += sizeof(uint32_t); 1441 src += sizeof(uint32_t); 1442 #else 1443 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1444 dst += sizeof(uint64_t); 1445 src += sizeof(uint64_t); 1446 #endif 1447 } 1448 if (len & 0x04) { 1449 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1450 dst += sizeof(uint32_t); 1451 src += sizeof(uint32_t); 1452 } 1453 if (len & 0x02) { 1454 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1455 dst += sizeof(uint16_t); 1456 src += sizeof(uint16_t); 1457 } 1458 if (len & 0x01) 1459 *(uint8_t *)dst = *(uint8_t *)src; 1460 } 1461 1462 /** 1463 * Build the Data Segment of inlined data from single 1464 * segment packet, no VLAN insertion. 1465 * 1466 * @param txq 1467 * Pointer to TX queue structure. 1468 * @param loc 1469 * Pointer to burst routine local context. 1470 * @param dseg 1471 * Pointer to WQE to fill with built Data Segment. 1472 * @param buf 1473 * Data buffer to point. 1474 * @param len 1475 * Data buffer length. 1476 * @param olx 1477 * Configured Tx offloads mask. It is fully defined at 1478 * compile time and may be used for optimization. 1479 * 1480 * @return 1481 * Pointer to the next Data Segment after inlined data. 1482 * Ring buffer wraparound check is needed. We do not do it here because it 1483 * may not be needed for the last packet in the eMPW session. 1484 */ 1485 static __rte_always_inline struct mlx5_wqe_dseg * 1486 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1487 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1488 struct mlx5_wqe_dseg *__rte_restrict dseg, 1489 uint8_t *buf, 1490 unsigned int len, 1491 unsigned int olx __rte_unused) 1492 { 1493 unsigned int part; 1494 uint8_t *pdst; 1495 1496 if (!MLX5_TXOFF_CONFIG(MPW)) { 1497 /* Store the descriptor byte counter for eMPW sessions. */ 1498 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1499 pdst = &dseg->inline_data[0]; 1500 } else { 1501 /* The entire legacy MPW session counter is stored on close. */ 1502 pdst = (uint8_t *)dseg; 1503 } 1504 /* 1505 * The WQEBB space availability is checked by caller. 1506 * Here we should be aware of WQE ring buffer wraparound only. 1507 */ 1508 part = (uint8_t *)txq->wqes_end - pdst; 1509 part = RTE_MIN(part, len); 1510 do { 1511 rte_memcpy(pdst, buf, part); 1512 len -= part; 1513 if (likely(!len)) { 1514 pdst += part; 1515 if (!MLX5_TXOFF_CONFIG(MPW)) 1516 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1517 /* Note: no final wraparound check here. */ 1518 return (struct mlx5_wqe_dseg *)pdst; 1519 } 1520 pdst = (uint8_t *)txq->wqes; 1521 buf += part; 1522 part = len; 1523 } while (true); 1524 } 1525 1526 /** 1527 * Build the Data Segment of inlined data from single 1528 * segment packet with VLAN insertion. 1529 * 1530 * @param txq 1531 * Pointer to TX queue structure. 1532 * @param loc 1533 * Pointer to burst routine local context. 1534 * @param dseg 1535 * Pointer to the dseg fill with built Data Segment. 1536 * @param buf 1537 * Data buffer to point. 1538 * @param len 1539 * Data buffer length. 1540 * @param olx 1541 * Configured Tx offloads mask. It is fully defined at 1542 * compile time and may be used for optimization. 1543 * 1544 * @return 1545 * Pointer to the next Data Segment after inlined data. 1546 * Ring buffer wraparound check is needed. 1547 */ 1548 static __rte_always_inline struct mlx5_wqe_dseg * 1549 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1550 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1551 struct mlx5_wqe_dseg *__rte_restrict dseg, 1552 uint8_t *buf, 1553 unsigned int len, 1554 unsigned int olx __rte_unused) 1555 1556 { 1557 unsigned int part; 1558 uint8_t *pdst; 1559 1560 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1561 if (!MLX5_TXOFF_CONFIG(MPW)) { 1562 /* Store the descriptor byte counter for eMPW sessions. */ 1563 dseg->bcount = rte_cpu_to_be_32 1564 ((len + sizeof(struct rte_vlan_hdr)) | 1565 MLX5_ETH_WQE_DATA_INLINE); 1566 pdst = &dseg->inline_data[0]; 1567 } else { 1568 /* The entire legacy MPW session counter is stored on close. */ 1569 pdst = (uint8_t *)dseg; 1570 } 1571 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1572 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1573 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1574 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1575 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1576 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1577 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1578 pdst = (uint8_t *)txq->wqes; 1579 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1580 loc->mbuf->vlan_tci); 1581 pdst += sizeof(struct rte_vlan_hdr); 1582 /* 1583 * The WQEBB space availability is checked by caller. 1584 * Here we should be aware of WQE ring buffer wraparound only. 1585 */ 1586 part = (uint8_t *)txq->wqes_end - pdst; 1587 part = RTE_MIN(part, len); 1588 do { 1589 rte_memcpy(pdst, buf, part); 1590 len -= part; 1591 if (likely(!len)) { 1592 pdst += part; 1593 if (!MLX5_TXOFF_CONFIG(MPW)) 1594 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1595 /* Note: no final wraparound check here. */ 1596 return (struct mlx5_wqe_dseg *)pdst; 1597 } 1598 pdst = (uint8_t *)txq->wqes; 1599 buf += part; 1600 part = len; 1601 } while (true); 1602 } 1603 1604 /** 1605 * Build the Ethernet Segment with optionally inlined data with 1606 * VLAN insertion and following Data Segments (if any) from 1607 * multi-segment packet. Used by ordinary send and TSO. 1608 * 1609 * @param txq 1610 * Pointer to TX queue structure. 1611 * @param loc 1612 * Pointer to burst routine local context. 1613 * @param wqe 1614 * Pointer to WQE to fill with built Ethernet/Data Segments. 1615 * @param vlan 1616 * Length of VLAN header to insert, 0 means no VLAN insertion. 1617 * @param inlen 1618 * Data length to inline. For TSO this parameter specifies exact value, 1619 * for ordinary send routine can be aligned by caller to provide better WQE 1620 * space saving and data buffer start address alignment. 1621 * This length includes VLAN header being inserted. 1622 * @param tso 1623 * Zero means ordinary send, inlined data can be extended, 1624 * otherwise this is TSO, inlined data length is fixed. 1625 * @param olx 1626 * Configured Tx offloads mask. It is fully defined at 1627 * compile time and may be used for optimization. 1628 * 1629 * @return 1630 * Actual size of built WQE in segments. 1631 */ 1632 static __rte_always_inline unsigned int 1633 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1634 struct mlx5_txq_local *__rte_restrict loc, 1635 struct mlx5_wqe *__rte_restrict wqe, 1636 unsigned int vlan, 1637 unsigned int inlen, 1638 unsigned int tso, 1639 unsigned int olx __rte_unused) 1640 { 1641 struct mlx5_wqe_dseg *__rte_restrict dseg; 1642 unsigned int ds; 1643 1644 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1645 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1646 loc->mbuf_off = 0; 1647 1648 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1649 if (!loc->mbuf_nseg) 1650 goto dseg_done; 1651 /* 1652 * There are still some mbuf remaining, not inlined. 1653 * The first mbuf may be partially inlined and we 1654 * must process the possible non-zero data offset. 1655 */ 1656 if (loc->mbuf_off) { 1657 unsigned int dlen; 1658 uint8_t *dptr; 1659 1660 /* 1661 * Exhausted packets must be dropped before. 1662 * Non-zero offset means there are some data 1663 * remained in the packet. 1664 */ 1665 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1666 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1667 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1668 loc->mbuf_off); 1669 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1670 /* 1671 * Build the pointer/minimal Data Segment. 1672 * Do ring buffer wrapping check in advance. 1673 */ 1674 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1675 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1676 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1677 /* Store the mbuf to be freed on completion. */ 1678 MLX5_ASSERT(loc->elts_free); 1679 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1680 --loc->elts_free; 1681 ++dseg; 1682 if (--loc->mbuf_nseg == 0) 1683 goto dseg_done; 1684 loc->mbuf = loc->mbuf->next; 1685 loc->mbuf_off = 0; 1686 } 1687 do { 1688 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1689 struct rte_mbuf *mbuf; 1690 1691 /* Zero length segment found, just skip. */ 1692 mbuf = loc->mbuf; 1693 loc->mbuf = loc->mbuf->next; 1694 rte_pktmbuf_free_seg(mbuf); 1695 if (--loc->mbuf_nseg == 0) 1696 break; 1697 } else { 1698 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1699 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1700 mlx5_tx_dseg_iptr 1701 (txq, loc, dseg, 1702 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1703 rte_pktmbuf_data_len(loc->mbuf), olx); 1704 MLX5_ASSERT(loc->elts_free); 1705 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1706 --loc->elts_free; 1707 ++dseg; 1708 if (--loc->mbuf_nseg == 0) 1709 break; 1710 loc->mbuf = loc->mbuf->next; 1711 } 1712 } while (true); 1713 1714 dseg_done: 1715 /* Calculate actual segments used from the dseg pointer. */ 1716 if ((uintptr_t)wqe < (uintptr_t)dseg) 1717 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1718 else 1719 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1720 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1721 return ds; 1722 } 1723 1724 /** 1725 * The routine checks timestamp flag in the current packet, 1726 * and push WAIT WQE into the queue if scheduling is required. 1727 * 1728 * @param txq 1729 * Pointer to TX queue structure. 1730 * @param loc 1731 * Pointer to burst routine local context. 1732 * @param olx 1733 * Configured Tx offloads mask. It is fully defined at 1734 * compile time and may be used for optimization. 1735 * 1736 * @return 1737 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1738 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1739 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1740 * Local context variables partially updated. 1741 */ 1742 static __rte_always_inline enum mlx5_txcmp_code 1743 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1744 struct mlx5_txq_local *restrict loc, 1745 unsigned int olx) 1746 { 1747 if (MLX5_TXOFF_CONFIG(TXPP) && 1748 loc->mbuf->ol_flags & txq->ts_mask) { 1749 struct mlx5_wqe *wqe; 1750 uint64_t ts; 1751 int32_t wci; 1752 1753 /* 1754 * Estimate the required space quickly and roughly. 1755 * We would like to ensure the packet can be pushed 1756 * to the queue and we won't get the orphan WAIT WQE. 1757 */ 1758 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1759 loc->elts_free < NB_SEGS(loc->mbuf)) 1760 return MLX5_TXCMP_CODE_EXIT; 1761 /* Convert the timestamp into completion to wait. */ 1762 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1763 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 1764 if (unlikely(wci < 0)) 1765 return MLX5_TXCMP_CODE_SINGLE; 1766 /* Build the WAIT WQE with specified completion. */ 1767 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1768 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 1769 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 1770 ++txq->wqe_ci; 1771 --loc->wqe_free; 1772 return MLX5_TXCMP_CODE_MULTI; 1773 } 1774 return MLX5_TXCMP_CODE_SINGLE; 1775 } 1776 1777 /** 1778 * Tx one packet function for multi-segment TSO. Supports all 1779 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1780 * sends one packet per WQE. 1781 * 1782 * This routine is responsible for storing processed mbuf 1783 * into elts ring buffer and update elts_head. 1784 * 1785 * @param txq 1786 * Pointer to TX queue structure. 1787 * @param loc 1788 * Pointer to burst routine local context. 1789 * @param olx 1790 * Configured Tx offloads mask. It is fully defined at 1791 * compile time and may be used for optimization. 1792 * 1793 * @return 1794 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1795 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1796 * Local context variables partially updated. 1797 */ 1798 static __rte_always_inline enum mlx5_txcmp_code 1799 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1800 struct mlx5_txq_local *__rte_restrict loc, 1801 unsigned int olx) 1802 { 1803 struct mlx5_wqe *__rte_restrict wqe; 1804 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1805 1806 if (MLX5_TXOFF_CONFIG(TXPP)) { 1807 enum mlx5_txcmp_code wret; 1808 1809 /* Generate WAIT for scheduling if requested. */ 1810 wret = mlx5_tx_schedule_send(txq, loc, olx); 1811 if (wret == MLX5_TXCMP_CODE_EXIT) 1812 return MLX5_TXCMP_CODE_EXIT; 1813 if (wret == MLX5_TXCMP_CODE_ERROR) 1814 return MLX5_TXCMP_CODE_ERROR; 1815 } 1816 /* 1817 * Calculate data length to be inlined to estimate 1818 * the required space in WQE ring buffer. 1819 */ 1820 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1821 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 1822 vlan = sizeof(struct rte_vlan_hdr); 1823 inlen = loc->mbuf->l2_len + vlan + 1824 loc->mbuf->l3_len + loc->mbuf->l4_len; 1825 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1826 return MLX5_TXCMP_CODE_ERROR; 1827 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 1828 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1829 /* Packet must contain all TSO headers. */ 1830 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1831 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1832 inlen > (dlen + vlan))) 1833 return MLX5_TXCMP_CODE_ERROR; 1834 MLX5_ASSERT(inlen >= txq->inlen_mode); 1835 /* 1836 * Check whether there are enough free WQEBBs: 1837 * - Control Segment 1838 * - Ethernet Segment 1839 * - First Segment of inlined Ethernet data 1840 * - ... data continued ... 1841 * - Data Segments of pointer/min inline type 1842 */ 1843 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1844 MLX5_ESEG_MIN_INLINE_SIZE + 1845 MLX5_WSEG_SIZE + 1846 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1847 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1848 return MLX5_TXCMP_CODE_EXIT; 1849 /* Check for maximal WQE size. */ 1850 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1851 return MLX5_TXCMP_CODE_ERROR; 1852 #ifdef MLX5_PMD_SOFT_COUNTERS 1853 /* Update sent data bytes/packets counters. */ 1854 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1855 loc->mbuf->tso_segsz; 1856 /* 1857 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1858 * from loc->pkts_sent field. 1859 */ 1860 --ntcp; 1861 txq->stats.opackets += ntcp; 1862 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1863 #endif 1864 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1865 loc->wqe_last = wqe; 1866 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1867 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1868 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1869 txq->wqe_ci += (ds + 3) / 4; 1870 loc->wqe_free -= (ds + 3) / 4; 1871 return MLX5_TXCMP_CODE_MULTI; 1872 } 1873 1874 /** 1875 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1876 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1877 * without any data inlining in Ethernet Segment. 1878 * 1879 * This routine is responsible for storing processed mbuf 1880 * into elts ring buffer and update elts_head. 1881 * 1882 * @param txq 1883 * Pointer to TX queue structure. 1884 * @param loc 1885 * Pointer to burst routine local context. 1886 * @param olx 1887 * Configured Tx offloads mask. It is fully defined at 1888 * compile time and may be used for optimization. 1889 * 1890 * @return 1891 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1892 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1893 * Local context variables partially updated. 1894 */ 1895 static __rte_always_inline enum mlx5_txcmp_code 1896 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1897 struct mlx5_txq_local *__rte_restrict loc, 1898 unsigned int olx) 1899 { 1900 struct mlx5_wqe_dseg *__rte_restrict dseg; 1901 struct mlx5_wqe *__rte_restrict wqe; 1902 unsigned int ds, nseg; 1903 1904 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1905 if (MLX5_TXOFF_CONFIG(TXPP)) { 1906 enum mlx5_txcmp_code wret; 1907 1908 /* Generate WAIT for scheduling if requested. */ 1909 wret = mlx5_tx_schedule_send(txq, loc, olx); 1910 if (wret == MLX5_TXCMP_CODE_EXIT) 1911 return MLX5_TXCMP_CODE_EXIT; 1912 if (wret == MLX5_TXCMP_CODE_ERROR) 1913 return MLX5_TXCMP_CODE_ERROR; 1914 } 1915 /* 1916 * No inline at all, it means the CPU cycles saving is prioritized at 1917 * configuration, we should not copy any packet data to WQE. 1918 */ 1919 nseg = NB_SEGS(loc->mbuf); 1920 ds = 2 + nseg; 1921 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1922 return MLX5_TXCMP_CODE_EXIT; 1923 /* Check for maximal WQE size. */ 1924 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1925 return MLX5_TXCMP_CODE_ERROR; 1926 /* 1927 * Some Tx offloads may cause an error if packet is not long enough, 1928 * check against assumed minimal length. 1929 */ 1930 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1931 return MLX5_TXCMP_CODE_ERROR; 1932 #ifdef MLX5_PMD_SOFT_COUNTERS 1933 /* Update sent data bytes counter. */ 1934 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1935 if (MLX5_TXOFF_CONFIG(VLAN) && 1936 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 1937 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1938 #endif 1939 /* 1940 * SEND WQE, one WQEBB: 1941 * - Control Segment, SEND opcode 1942 * - Ethernet Segment, optional VLAN, no inline 1943 * - Data Segments, pointer only type 1944 */ 1945 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1946 loc->wqe_last = wqe; 1947 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1948 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1949 dseg = &wqe->dseg[0]; 1950 do { 1951 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1952 struct rte_mbuf *mbuf; 1953 1954 /* 1955 * Zero length segment found, have to correct total 1956 * size of WQE in segments. 1957 * It is supposed to be rare occasion, so in normal 1958 * case (no zero length segments) we avoid extra 1959 * writing to the Control Segment. 1960 */ 1961 --ds; 1962 wqe->cseg.sq_ds -= RTE_BE32(1); 1963 mbuf = loc->mbuf; 1964 loc->mbuf = mbuf->next; 1965 rte_pktmbuf_free_seg(mbuf); 1966 if (--nseg == 0) 1967 break; 1968 } else { 1969 mlx5_tx_dseg_ptr 1970 (txq, loc, dseg, 1971 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1972 rte_pktmbuf_data_len(loc->mbuf), olx); 1973 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1974 --loc->elts_free; 1975 if (--nseg == 0) 1976 break; 1977 ++dseg; 1978 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1979 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1980 loc->mbuf = loc->mbuf->next; 1981 } 1982 } while (true); 1983 txq->wqe_ci += (ds + 3) / 4; 1984 loc->wqe_free -= (ds + 3) / 4; 1985 return MLX5_TXCMP_CODE_MULTI; 1986 } 1987 1988 /** 1989 * Tx one packet function for multi-segment SEND. Supports all 1990 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1991 * sends one packet per WQE, with data inlining in 1992 * Ethernet Segment and minimal Data Segments. 1993 * 1994 * This routine is responsible for storing processed mbuf 1995 * into elts ring buffer and update elts_head. 1996 * 1997 * @param txq 1998 * Pointer to TX queue structure. 1999 * @param loc 2000 * Pointer to burst routine local context. 2001 * @param olx 2002 * Configured Tx offloads mask. It is fully defined at 2003 * compile time and may be used for optimization. 2004 * 2005 * @return 2006 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2007 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2008 * Local context variables partially updated. 2009 */ 2010 static __rte_always_inline enum mlx5_txcmp_code 2011 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 2012 struct mlx5_txq_local *__rte_restrict loc, 2013 unsigned int olx) 2014 { 2015 struct mlx5_wqe *__rte_restrict wqe; 2016 unsigned int ds, inlen, dlen, vlan = 0; 2017 2018 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2019 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2020 if (MLX5_TXOFF_CONFIG(TXPP)) { 2021 enum mlx5_txcmp_code wret; 2022 2023 /* Generate WAIT for scheduling if requested. */ 2024 wret = mlx5_tx_schedule_send(txq, loc, olx); 2025 if (wret == MLX5_TXCMP_CODE_EXIT) 2026 return MLX5_TXCMP_CODE_EXIT; 2027 if (wret == MLX5_TXCMP_CODE_ERROR) 2028 return MLX5_TXCMP_CODE_ERROR; 2029 } 2030 /* 2031 * First calculate data length to be inlined 2032 * to estimate the required space for WQE. 2033 */ 2034 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 2035 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 2036 vlan = sizeof(struct rte_vlan_hdr); 2037 inlen = dlen + vlan; 2038 /* Check against minimal length. */ 2039 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 2040 return MLX5_TXCMP_CODE_ERROR; 2041 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 2042 if (inlen > txq->inlen_send || 2043 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) { 2044 struct rte_mbuf *mbuf; 2045 unsigned int nxlen; 2046 uintptr_t start; 2047 2048 mbuf = loc->mbuf; 2049 nxlen = rte_pktmbuf_data_len(mbuf); 2050 /* 2051 * Packet length exceeds the allowed inline data length, 2052 * check whether the minimal inlining is required. 2053 */ 2054 if (txq->inlen_mode) { 2055 MLX5_ASSERT(txq->inlen_mode >= 2056 MLX5_ESEG_MIN_INLINE_SIZE); 2057 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 2058 inlen = txq->inlen_mode; 2059 } else if (vlan && !txq->vlan_en) { 2060 /* 2061 * VLAN insertion is requested and hardware does not 2062 * support the offload, will do with software inline. 2063 */ 2064 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 2065 } else if (mbuf->ol_flags & PKT_TX_DYNF_NOINLINE || 2066 nxlen > txq->inlen_send) { 2067 return mlx5_tx_packet_multi_send(txq, loc, olx); 2068 } else { 2069 goto do_first; 2070 } 2071 /* 2072 * Now we know the minimal amount of data is requested 2073 * to inline. Check whether we should inline the buffers 2074 * from the chain beginning to eliminate some mbufs. 2075 */ 2076 if (unlikely(nxlen <= txq->inlen_send)) { 2077 /* We can inline first mbuf at least. */ 2078 if (nxlen < inlen) { 2079 unsigned int smlen; 2080 2081 /* Scan mbufs till inlen filled. */ 2082 do { 2083 smlen = nxlen; 2084 mbuf = NEXT(mbuf); 2085 MLX5_ASSERT(mbuf); 2086 nxlen = rte_pktmbuf_data_len(mbuf); 2087 nxlen += smlen; 2088 } while (unlikely(nxlen < inlen)); 2089 if (unlikely(nxlen > txq->inlen_send)) { 2090 /* We cannot inline entire mbuf. */ 2091 smlen = inlen - smlen; 2092 start = rte_pktmbuf_mtod_offset 2093 (mbuf, uintptr_t, smlen); 2094 goto do_align; 2095 } 2096 } 2097 do_first: 2098 do { 2099 inlen = nxlen; 2100 mbuf = NEXT(mbuf); 2101 /* There should be not end of packet. */ 2102 MLX5_ASSERT(mbuf); 2103 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 2104 } while (unlikely(nxlen < txq->inlen_send)); 2105 } 2106 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 2107 /* 2108 * Check whether we can do inline to align start 2109 * address of data buffer to cacheline. 2110 */ 2111 do_align: 2112 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 2113 if (unlikely(start)) { 2114 start += inlen; 2115 if (start <= txq->inlen_send) 2116 inlen = start; 2117 } 2118 } 2119 /* 2120 * Check whether there are enough free WQEBBs: 2121 * - Control Segment 2122 * - Ethernet Segment 2123 * - First Segment of inlined Ethernet data 2124 * - ... data continued ... 2125 * - Data Segments of pointer/min inline type 2126 * 2127 * Estimate the number of Data Segments conservatively, 2128 * supposing no any mbufs is being freed during inlining. 2129 */ 2130 MLX5_ASSERT(inlen <= txq->inlen_send); 2131 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2132 MLX5_ESEG_MIN_INLINE_SIZE + 2133 MLX5_WSEG_SIZE + 2134 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2135 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2136 return MLX5_TXCMP_CODE_EXIT; 2137 /* Check for maximal WQE size. */ 2138 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 2139 return MLX5_TXCMP_CODE_ERROR; 2140 #ifdef MLX5_PMD_SOFT_COUNTERS 2141 /* Update sent data bytes/packets counters. */ 2142 txq->stats.obytes += dlen + vlan; 2143 #endif 2144 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2145 loc->wqe_last = wqe; 2146 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2147 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2148 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2149 txq->wqe_ci += (ds + 3) / 4; 2150 loc->wqe_free -= (ds + 3) / 4; 2151 return MLX5_TXCMP_CODE_MULTI; 2152 } 2153 2154 /** 2155 * Tx burst function for multi-segment packets. Supports all 2156 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2157 * sends one packet per WQE. Function stops sending if it 2158 * encounters the single-segment packet. 2159 * 2160 * This routine is responsible for storing processed mbuf 2161 * into elts ring buffer and update elts_head. 2162 * 2163 * @param txq 2164 * Pointer to TX queue structure. 2165 * @param[in] pkts 2166 * Packets to transmit. 2167 * @param pkts_n 2168 * Number of packets in array. 2169 * @param loc 2170 * Pointer to burst routine local context. 2171 * @param olx 2172 * Configured Tx offloads mask. It is fully defined at 2173 * compile time and may be used for optimization. 2174 * 2175 * @return 2176 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2177 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2178 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2179 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2180 * Local context variables updated. 2181 */ 2182 static __rte_always_inline enum mlx5_txcmp_code 2183 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2184 struct rte_mbuf **__rte_restrict pkts, 2185 unsigned int pkts_n, 2186 struct mlx5_txq_local *__rte_restrict loc, 2187 unsigned int olx) 2188 { 2189 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2190 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2191 pkts += loc->pkts_sent + 1; 2192 pkts_n -= loc->pkts_sent; 2193 for (;;) { 2194 enum mlx5_txcmp_code ret; 2195 2196 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2197 /* 2198 * Estimate the number of free elts quickly but conservatively. 2199 * Some segment may be fully inlined and freed, 2200 * ignore this here - precise estimation is costly. 2201 */ 2202 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2203 return MLX5_TXCMP_CODE_EXIT; 2204 if (MLX5_TXOFF_CONFIG(TSO) && 2205 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) { 2206 /* Proceed with multi-segment TSO. */ 2207 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2208 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2209 /* Proceed with multi-segment SEND with inlining. */ 2210 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2211 } else { 2212 /* Proceed with multi-segment SEND w/o inlining. */ 2213 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2214 } 2215 if (ret == MLX5_TXCMP_CODE_EXIT) 2216 return MLX5_TXCMP_CODE_EXIT; 2217 if (ret == MLX5_TXCMP_CODE_ERROR) 2218 return MLX5_TXCMP_CODE_ERROR; 2219 /* WQE is built, go to the next packet. */ 2220 ++loc->pkts_sent; 2221 --pkts_n; 2222 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2223 return MLX5_TXCMP_CODE_EXIT; 2224 loc->mbuf = *pkts++; 2225 if (pkts_n > 1) 2226 rte_prefetch0(*pkts); 2227 if (likely(NB_SEGS(loc->mbuf) > 1)) 2228 continue; 2229 /* Here ends the series of multi-segment packets. */ 2230 if (MLX5_TXOFF_CONFIG(TSO) && 2231 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 2232 return MLX5_TXCMP_CODE_TSO; 2233 return MLX5_TXCMP_CODE_SINGLE; 2234 } 2235 MLX5_ASSERT(false); 2236 } 2237 2238 /** 2239 * Tx burst function for single-segment packets with TSO. 2240 * Supports all types of Tx offloads, except multi-packets. 2241 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2242 * Function stops sending if it encounters the multi-segment 2243 * packet or packet without TSO requested. 2244 * 2245 * The routine is responsible for storing processed mbuf into elts ring buffer 2246 * and update elts_head if inline offloads is requested due to possible early 2247 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2248 * 2249 * @param txq 2250 * Pointer to TX queue structure. 2251 * @param[in] pkts 2252 * Packets to transmit. 2253 * @param pkts_n 2254 * Number of packets in array. 2255 * @param loc 2256 * Pointer to burst routine local context. 2257 * @param olx 2258 * Configured Tx offloads mask. It is fully defined at 2259 * compile time and may be used for optimization. 2260 * 2261 * @return 2262 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2263 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2264 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2265 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2266 * Local context variables updated. 2267 */ 2268 static __rte_always_inline enum mlx5_txcmp_code 2269 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2270 struct rte_mbuf **__rte_restrict pkts, 2271 unsigned int pkts_n, 2272 struct mlx5_txq_local *__rte_restrict loc, 2273 unsigned int olx) 2274 { 2275 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2276 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2277 pkts += loc->pkts_sent + 1; 2278 pkts_n -= loc->pkts_sent; 2279 for (;;) { 2280 struct mlx5_wqe_dseg *__rte_restrict dseg; 2281 struct mlx5_wqe *__rte_restrict wqe; 2282 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2283 uint8_t *dptr; 2284 2285 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2286 if (MLX5_TXOFF_CONFIG(TXPP)) { 2287 enum mlx5_txcmp_code wret; 2288 2289 /* Generate WAIT for scheduling if requested. */ 2290 wret = mlx5_tx_schedule_send(txq, loc, olx); 2291 if (wret == MLX5_TXCMP_CODE_EXIT) 2292 return MLX5_TXCMP_CODE_EXIT; 2293 if (wret == MLX5_TXCMP_CODE_ERROR) 2294 return MLX5_TXCMP_CODE_ERROR; 2295 } 2296 dlen = rte_pktmbuf_data_len(loc->mbuf); 2297 if (MLX5_TXOFF_CONFIG(VLAN) && 2298 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2299 vlan = sizeof(struct rte_vlan_hdr); 2300 } 2301 /* 2302 * First calculate the WQE size to check 2303 * whether we have enough space in ring buffer. 2304 */ 2305 hlen = loc->mbuf->l2_len + vlan + 2306 loc->mbuf->l3_len + loc->mbuf->l4_len; 2307 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2308 return MLX5_TXCMP_CODE_ERROR; 2309 if (loc->mbuf->ol_flags & PKT_TX_TUNNEL_MASK) 2310 hlen += loc->mbuf->outer_l2_len + 2311 loc->mbuf->outer_l3_len; 2312 /* Segment must contain all TSO headers. */ 2313 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2314 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2315 hlen > (dlen + vlan))) 2316 return MLX5_TXCMP_CODE_ERROR; 2317 /* 2318 * Check whether there are enough free WQEBBs: 2319 * - Control Segment 2320 * - Ethernet Segment 2321 * - First Segment of inlined Ethernet data 2322 * - ... data continued ... 2323 * - Finishing Data Segment of pointer type 2324 */ 2325 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2326 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2327 if (loc->wqe_free < ((ds + 3) / 4)) 2328 return MLX5_TXCMP_CODE_EXIT; 2329 #ifdef MLX5_PMD_SOFT_COUNTERS 2330 /* Update sent data bytes/packets counters. */ 2331 ntcp = (dlen + vlan - hlen + 2332 loc->mbuf->tso_segsz - 1) / 2333 loc->mbuf->tso_segsz; 2334 /* 2335 * One will be added for mbuf itself at the end 2336 * of the mlx5_tx_burst from loc->pkts_sent field. 2337 */ 2338 --ntcp; 2339 txq->stats.opackets += ntcp; 2340 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2341 #endif 2342 /* 2343 * Build the TSO WQE: 2344 * - Control Segment 2345 * - Ethernet Segment with hlen bytes inlined 2346 * - Data Segment of pointer type 2347 */ 2348 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2349 loc->wqe_last = wqe; 2350 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2351 MLX5_OPCODE_TSO, olx); 2352 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2353 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2354 dlen -= hlen - vlan; 2355 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2356 /* 2357 * WQE is built, update the loop parameters 2358 * and go to the next packet. 2359 */ 2360 txq->wqe_ci += (ds + 3) / 4; 2361 loc->wqe_free -= (ds + 3) / 4; 2362 if (MLX5_TXOFF_CONFIG(INLINE)) 2363 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2364 --loc->elts_free; 2365 ++loc->pkts_sent; 2366 --pkts_n; 2367 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2368 return MLX5_TXCMP_CODE_EXIT; 2369 loc->mbuf = *pkts++; 2370 if (pkts_n > 1) 2371 rte_prefetch0(*pkts); 2372 if (MLX5_TXOFF_CONFIG(MULTI) && 2373 unlikely(NB_SEGS(loc->mbuf) > 1)) 2374 return MLX5_TXCMP_CODE_MULTI; 2375 if (likely(!(loc->mbuf->ol_flags & PKT_TX_TCP_SEG))) 2376 return MLX5_TXCMP_CODE_SINGLE; 2377 /* Continue with the next TSO packet. */ 2378 } 2379 MLX5_ASSERT(false); 2380 } 2381 2382 /** 2383 * Analyze the packet and select the best method to send. 2384 * 2385 * @param txq 2386 * Pointer to TX queue structure. 2387 * @param loc 2388 * Pointer to burst routine local context. 2389 * @param olx 2390 * Configured Tx offloads mask. It is fully defined at 2391 * compile time and may be used for optimization. 2392 * @param newp 2393 * The predefined flag whether do complete check for 2394 * multi-segment packets and TSO. 2395 * 2396 * @return 2397 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2398 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2399 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2400 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2401 */ 2402 static __rte_always_inline enum mlx5_txcmp_code 2403 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2404 struct mlx5_txq_local *__rte_restrict loc, 2405 unsigned int olx, 2406 bool newp) 2407 { 2408 /* Check for multi-segment packet. */ 2409 if (newp && 2410 MLX5_TXOFF_CONFIG(MULTI) && 2411 unlikely(NB_SEGS(loc->mbuf) > 1)) 2412 return MLX5_TXCMP_CODE_MULTI; 2413 /* Check for TSO packet. */ 2414 if (newp && 2415 MLX5_TXOFF_CONFIG(TSO) && 2416 unlikely(loc->mbuf->ol_flags & PKT_TX_TCP_SEG)) 2417 return MLX5_TXCMP_CODE_TSO; 2418 /* Check if eMPW is enabled at all. */ 2419 if (!MLX5_TXOFF_CONFIG(EMPW)) 2420 return MLX5_TXCMP_CODE_SINGLE; 2421 /* Check if eMPW can be engaged. */ 2422 if (MLX5_TXOFF_CONFIG(VLAN) && 2423 unlikely(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) && 2424 (!MLX5_TXOFF_CONFIG(INLINE) || 2425 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2426 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2427 /* 2428 * eMPW does not support VLAN insertion offload, we have to 2429 * inline the entire packet but packet is too long for inlining. 2430 */ 2431 return MLX5_TXCMP_CODE_SINGLE; 2432 } 2433 return MLX5_TXCMP_CODE_EMPW; 2434 } 2435 2436 /** 2437 * Check the next packet attributes to match with the eMPW batch ones. 2438 * In addition, for legacy MPW the packet length is checked either. 2439 * 2440 * @param txq 2441 * Pointer to TX queue structure. 2442 * @param es 2443 * Pointer to Ethernet Segment of eMPW batch. 2444 * @param loc 2445 * Pointer to burst routine local context. 2446 * @param dlen 2447 * Length of previous packet in MPW descriptor. 2448 * @param olx 2449 * Configured Tx offloads mask. It is fully defined at 2450 * compile time and may be used for optimization. 2451 * 2452 * @return 2453 * true - packet match with eMPW batch attributes. 2454 * false - no match, eMPW should be restarted. 2455 */ 2456 static __rte_always_inline bool 2457 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2458 struct mlx5_wqe_eseg *__rte_restrict es, 2459 struct mlx5_txq_local *__rte_restrict loc, 2460 uint32_t dlen, 2461 unsigned int olx) 2462 { 2463 uint8_t swp_flags = 0; 2464 2465 /* Compare the checksum flags, if any. */ 2466 if (MLX5_TXOFF_CONFIG(CSUM) && 2467 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2468 return false; 2469 /* Compare the Software Parser offsets and flags. */ 2470 if (MLX5_TXOFF_CONFIG(SWP) && 2471 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2472 es->swp_flags != swp_flags)) 2473 return false; 2474 /* Fill metadata field if needed. */ 2475 if (MLX5_TXOFF_CONFIG(METADATA) && 2476 es->metadata != (loc->mbuf->ol_flags & PKT_TX_DYNF_METADATA ? 2477 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2478 return false; 2479 /* Legacy MPW can send packets with the same length only. */ 2480 if (MLX5_TXOFF_CONFIG(MPW) && 2481 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2482 return false; 2483 /* There must be no VLAN packets in eMPW loop. */ 2484 if (MLX5_TXOFF_CONFIG(VLAN)) 2485 MLX5_ASSERT(!(loc->mbuf->ol_flags & PKT_TX_VLAN_PKT)); 2486 /* Check if the scheduling is requested. */ 2487 if (MLX5_TXOFF_CONFIG(TXPP) && 2488 loc->mbuf->ol_flags & txq->ts_mask) 2489 return false; 2490 return true; 2491 } 2492 2493 /** 2494 * Update send loop variables and WQE for eMPW loop without data inlining. 2495 * Number of Data Segments is equal to the number of sent packets. 2496 * 2497 * @param txq 2498 * Pointer to TX queue structure. 2499 * @param loc 2500 * Pointer to burst routine local context. 2501 * @param ds 2502 * Number of packets/Data Segments/Packets. 2503 * @param slen 2504 * Accumulated statistics, bytes sent. 2505 * @param olx 2506 * Configured Tx offloads mask. It is fully defined at 2507 * compile time and may be used for optimization. 2508 * 2509 * @return 2510 * true - packet match with eMPW batch attributes. 2511 * false - no match, eMPW should be restarted. 2512 */ 2513 static __rte_always_inline void 2514 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2515 struct mlx5_txq_local *__rte_restrict loc, 2516 unsigned int ds, 2517 unsigned int slen, 2518 unsigned int olx __rte_unused) 2519 { 2520 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2521 #ifdef MLX5_PMD_SOFT_COUNTERS 2522 /* Update sent data bytes counter. */ 2523 txq->stats.obytes += slen; 2524 #else 2525 (void)slen; 2526 #endif 2527 loc->elts_free -= ds; 2528 loc->pkts_sent += ds; 2529 ds += 2; 2530 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2531 txq->wqe_ci += (ds + 3) / 4; 2532 loc->wqe_free -= (ds + 3) / 4; 2533 } 2534 2535 /** 2536 * Update send loop variables and WQE for eMPW loop with data inlining. 2537 * Gets the size of pushed descriptors and data to the WQE. 2538 * 2539 * @param txq 2540 * Pointer to TX queue structure. 2541 * @param loc 2542 * Pointer to burst routine local context. 2543 * @param len 2544 * Total size of descriptor/data in bytes. 2545 * @param slen 2546 * Accumulated statistics, data bytes sent. 2547 * @param wqem 2548 * The base WQE for the eMPW/MPW descriptor. 2549 * @param olx 2550 * Configured Tx offloads mask. It is fully defined at 2551 * compile time and may be used for optimization. 2552 * 2553 * @return 2554 * true - packet match with eMPW batch attributes. 2555 * false - no match, eMPW should be restarted. 2556 */ 2557 static __rte_always_inline void 2558 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2559 struct mlx5_txq_local *__rte_restrict loc, 2560 unsigned int len, 2561 unsigned int slen, 2562 struct mlx5_wqe *__rte_restrict wqem, 2563 unsigned int olx __rte_unused) 2564 { 2565 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2566 2567 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2568 #ifdef MLX5_PMD_SOFT_COUNTERS 2569 /* Update sent data bytes counter. */ 2570 txq->stats.obytes += slen; 2571 #else 2572 (void)slen; 2573 #endif 2574 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2575 /* 2576 * If the legacy MPW session contains the inline packets 2577 * we should set the only inline data segment length 2578 * and align the total length to the segment size. 2579 */ 2580 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2581 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2582 MLX5_ETH_WQE_DATA_INLINE); 2583 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2584 } else { 2585 /* 2586 * The session is not legacy MPW or contains the 2587 * data buffer pointer segments. 2588 */ 2589 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2590 len = len / MLX5_WSEG_SIZE + 2; 2591 } 2592 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2593 txq->wqe_ci += (len + 3) / 4; 2594 loc->wqe_free -= (len + 3) / 4; 2595 loc->wqe_last = wqem; 2596 } 2597 2598 /** 2599 * The set of Tx burst functions for single-segment packets without TSO 2600 * and with Multi-Packet Writing feature support. 2601 * Supports all types of Tx offloads, except multi-packets and TSO. 2602 * 2603 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2604 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2605 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2606 * placed in WQE. 2607 * 2608 * Functions stop sending if it encounters the multi-segment packet or packet 2609 * with TSO requested. 2610 * 2611 * The routines are responsible for storing processed mbuf into elts ring buffer 2612 * and update elts_head if inlining offload is requested. Otherwise the copying 2613 * mbufs to elts can be postponed and completed at the end of burst routine. 2614 * 2615 * @param txq 2616 * Pointer to TX queue structure. 2617 * @param[in] pkts 2618 * Packets to transmit. 2619 * @param pkts_n 2620 * Number of packets in array. 2621 * @param loc 2622 * Pointer to burst routine local context. 2623 * @param olx 2624 * Configured Tx offloads mask. It is fully defined at 2625 * compile time and may be used for optimization. 2626 * 2627 * @return 2628 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2629 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2630 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2631 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2632 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2633 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2634 * 2635 * Local context variables updated. 2636 * 2637 * 2638 * The routine sends packets with MLX5_OPCODE_EMPW 2639 * without inlining, this is dedicated optimized branch. 2640 * No VLAN insertion is supported. 2641 */ 2642 static __rte_always_inline enum mlx5_txcmp_code 2643 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2644 struct rte_mbuf **__rte_restrict pkts, 2645 unsigned int pkts_n, 2646 struct mlx5_txq_local *__rte_restrict loc, 2647 unsigned int olx) 2648 { 2649 /* 2650 * Subroutine is the part of mlx5_tx_burst_single() and sends 2651 * single-segment packet with eMPW opcode without data inlining. 2652 */ 2653 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2654 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2655 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2656 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2657 pkts += loc->pkts_sent + 1; 2658 pkts_n -= loc->pkts_sent; 2659 for (;;) { 2660 struct mlx5_wqe_dseg *__rte_restrict dseg; 2661 struct mlx5_wqe_eseg *__rte_restrict eseg; 2662 enum mlx5_txcmp_code ret; 2663 unsigned int part, loop; 2664 unsigned int slen = 0; 2665 2666 next_empw: 2667 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2668 if (MLX5_TXOFF_CONFIG(TXPP)) { 2669 enum mlx5_txcmp_code wret; 2670 2671 /* Generate WAIT for scheduling if requested. */ 2672 wret = mlx5_tx_schedule_send(txq, loc, olx); 2673 if (wret == MLX5_TXCMP_CODE_EXIT) 2674 return MLX5_TXCMP_CODE_EXIT; 2675 if (wret == MLX5_TXCMP_CODE_ERROR) 2676 return MLX5_TXCMP_CODE_ERROR; 2677 } 2678 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2679 MLX5_MPW_MAX_PACKETS : 2680 MLX5_EMPW_MAX_PACKETS); 2681 if (unlikely(loc->elts_free < part)) { 2682 /* We have no enough elts to save all mbufs. */ 2683 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2684 return MLX5_TXCMP_CODE_EXIT; 2685 /* But we still able to send at least minimal eMPW. */ 2686 part = loc->elts_free; 2687 } 2688 /* Check whether we have enough WQEs */ 2689 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2690 if (unlikely(loc->wqe_free < 2691 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2692 return MLX5_TXCMP_CODE_EXIT; 2693 part = (loc->wqe_free * 4) - 2; 2694 } 2695 if (likely(part > 1)) 2696 rte_prefetch0(*pkts); 2697 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2698 /* 2699 * Build eMPW title WQEBB: 2700 * - Control Segment, eMPW opcode 2701 * - Ethernet Segment, no inline 2702 */ 2703 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2704 MLX5_OPCODE_ENHANCED_MPSW, olx); 2705 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2706 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2707 eseg = &loc->wqe_last->eseg; 2708 dseg = &loc->wqe_last->dseg[0]; 2709 loop = part; 2710 /* Store the packet length for legacy MPW. */ 2711 if (MLX5_TXOFF_CONFIG(MPW)) 2712 eseg->mss = rte_cpu_to_be_16 2713 (rte_pktmbuf_data_len(loc->mbuf)); 2714 for (;;) { 2715 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2716 #ifdef MLX5_PMD_SOFT_COUNTERS 2717 /* Update sent data bytes counter. */ 2718 slen += dlen; 2719 #endif 2720 mlx5_tx_dseg_ptr 2721 (txq, loc, dseg, 2722 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2723 dlen, olx); 2724 if (unlikely(--loop == 0)) 2725 break; 2726 loc->mbuf = *pkts++; 2727 if (likely(loop > 1)) 2728 rte_prefetch0(*pkts); 2729 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2730 /* 2731 * Unroll the completion code to avoid 2732 * returning variable value - it results in 2733 * unoptimized sequent checking in caller. 2734 */ 2735 if (ret == MLX5_TXCMP_CODE_MULTI) { 2736 part -= loop; 2737 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2738 if (unlikely(!loc->elts_free || 2739 !loc->wqe_free)) 2740 return MLX5_TXCMP_CODE_EXIT; 2741 return MLX5_TXCMP_CODE_MULTI; 2742 } 2743 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2744 if (ret == MLX5_TXCMP_CODE_TSO) { 2745 part -= loop; 2746 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2747 if (unlikely(!loc->elts_free || 2748 !loc->wqe_free)) 2749 return MLX5_TXCMP_CODE_EXIT; 2750 return MLX5_TXCMP_CODE_TSO; 2751 } 2752 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2753 part -= loop; 2754 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2755 if (unlikely(!loc->elts_free || 2756 !loc->wqe_free)) 2757 return MLX5_TXCMP_CODE_EXIT; 2758 return MLX5_TXCMP_CODE_SINGLE; 2759 } 2760 if (ret != MLX5_TXCMP_CODE_EMPW) { 2761 MLX5_ASSERT(false); 2762 part -= loop; 2763 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2764 return MLX5_TXCMP_CODE_ERROR; 2765 } 2766 /* 2767 * Check whether packet parameters coincide 2768 * within assumed eMPW batch: 2769 * - check sum settings 2770 * - metadata value 2771 * - software parser settings 2772 * - packets length (legacy MPW only) 2773 * - scheduling is not required 2774 */ 2775 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2776 MLX5_ASSERT(loop); 2777 part -= loop; 2778 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2779 if (unlikely(!loc->elts_free || 2780 !loc->wqe_free)) 2781 return MLX5_TXCMP_CODE_EXIT; 2782 pkts_n -= part; 2783 goto next_empw; 2784 } 2785 /* Packet attributes match, continue the same eMPW. */ 2786 ++dseg; 2787 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2788 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2789 } 2790 /* eMPW is built successfully, update loop parameters. */ 2791 MLX5_ASSERT(!loop); 2792 MLX5_ASSERT(pkts_n >= part); 2793 #ifdef MLX5_PMD_SOFT_COUNTERS 2794 /* Update sent data bytes counter. */ 2795 txq->stats.obytes += slen; 2796 #endif 2797 loc->elts_free -= part; 2798 loc->pkts_sent += part; 2799 txq->wqe_ci += (2 + part + 3) / 4; 2800 loc->wqe_free -= (2 + part + 3) / 4; 2801 pkts_n -= part; 2802 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2803 return MLX5_TXCMP_CODE_EXIT; 2804 loc->mbuf = *pkts++; 2805 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2806 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2807 return ret; 2808 /* Continue sending eMPW batches. */ 2809 } 2810 MLX5_ASSERT(false); 2811 } 2812 2813 /** 2814 * The routine sends packets with MLX5_OPCODE_EMPW 2815 * with inlining, optionally supports VLAN insertion. 2816 */ 2817 static __rte_always_inline enum mlx5_txcmp_code 2818 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2819 struct rte_mbuf **__rte_restrict pkts, 2820 unsigned int pkts_n, 2821 struct mlx5_txq_local *__rte_restrict loc, 2822 unsigned int olx) 2823 { 2824 /* 2825 * Subroutine is the part of mlx5_tx_burst_single() and sends 2826 * single-segment packet with eMPW opcode with data inlining. 2827 */ 2828 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2829 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2830 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2831 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2832 pkts += loc->pkts_sent + 1; 2833 pkts_n -= loc->pkts_sent; 2834 for (;;) { 2835 struct mlx5_wqe_dseg *__rte_restrict dseg; 2836 struct mlx5_wqe *__rte_restrict wqem; 2837 enum mlx5_txcmp_code ret; 2838 unsigned int room, part, nlim; 2839 unsigned int slen = 0; 2840 2841 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2842 if (MLX5_TXOFF_CONFIG(TXPP)) { 2843 enum mlx5_txcmp_code wret; 2844 2845 /* Generate WAIT for scheduling if requested. */ 2846 wret = mlx5_tx_schedule_send(txq, loc, olx); 2847 if (wret == MLX5_TXCMP_CODE_EXIT) 2848 return MLX5_TXCMP_CODE_EXIT; 2849 if (wret == MLX5_TXCMP_CODE_ERROR) 2850 return MLX5_TXCMP_CODE_ERROR; 2851 } 2852 /* 2853 * Limits the amount of packets in one WQE 2854 * to improve CQE latency generation. 2855 */ 2856 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2857 MLX5_MPW_INLINE_MAX_PACKETS : 2858 MLX5_EMPW_MAX_PACKETS); 2859 /* Check whether we have minimal amount WQEs */ 2860 if (unlikely(loc->wqe_free < 2861 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2862 return MLX5_TXCMP_CODE_EXIT; 2863 if (likely(pkts_n > 1)) 2864 rte_prefetch0(*pkts); 2865 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2866 /* 2867 * Build eMPW title WQEBB: 2868 * - Control Segment, eMPW opcode, zero DS 2869 * - Ethernet Segment, no inline 2870 */ 2871 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2872 MLX5_OPCODE_ENHANCED_MPSW, olx); 2873 mlx5_tx_eseg_none(txq, loc, wqem, 2874 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2875 dseg = &wqem->dseg[0]; 2876 /* Store the packet length for legacy MPW. */ 2877 if (MLX5_TXOFF_CONFIG(MPW)) 2878 wqem->eseg.mss = rte_cpu_to_be_16 2879 (rte_pktmbuf_data_len(loc->mbuf)); 2880 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2881 loc->wqe_free) * MLX5_WQE_SIZE - 2882 MLX5_WQE_CSEG_SIZE - 2883 MLX5_WQE_ESEG_SIZE; 2884 /* Limit the room for legacy MPW sessions for performance. */ 2885 if (MLX5_TXOFF_CONFIG(MPW)) 2886 room = RTE_MIN(room, 2887 RTE_MAX(txq->inlen_empw + 2888 sizeof(dseg->bcount) + 2889 (MLX5_TXOFF_CONFIG(VLAN) ? 2890 sizeof(struct rte_vlan_hdr) : 0), 2891 MLX5_MPW_INLINE_MAX_PACKETS * 2892 MLX5_WQE_DSEG_SIZE)); 2893 /* Build WQE till we have space, packets and resources. */ 2894 part = room; 2895 for (;;) { 2896 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2897 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2898 unsigned int tlen; 2899 2900 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2901 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2902 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2903 /* 2904 * Some Tx offloads may cause an error if packet is not 2905 * long enough, check against assumed minimal length. 2906 */ 2907 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2908 part -= room; 2909 if (unlikely(!part)) 2910 return MLX5_TXCMP_CODE_ERROR; 2911 /* 2912 * We have some successfully built 2913 * packet Data Segments to send. 2914 */ 2915 mlx5_tx_idone_empw(txq, loc, part, 2916 slen, wqem, olx); 2917 return MLX5_TXCMP_CODE_ERROR; 2918 } 2919 /* Inline or not inline - that's the Question. */ 2920 if (dlen > txq->inlen_empw || 2921 loc->mbuf->ol_flags & PKT_TX_DYNF_NOINLINE) 2922 goto pointer_empw; 2923 if (MLX5_TXOFF_CONFIG(MPW)) { 2924 if (dlen > txq->inlen_send) 2925 goto pointer_empw; 2926 tlen = dlen; 2927 if (part == room) { 2928 /* Open new inline MPW session. */ 2929 tlen += sizeof(dseg->bcount); 2930 dseg->bcount = RTE_BE32(0); 2931 dseg = RTE_PTR_ADD 2932 (dseg, sizeof(dseg->bcount)); 2933 } else { 2934 /* 2935 * No pointer and inline descriptor 2936 * intermix for legacy MPW sessions. 2937 */ 2938 if (wqem->dseg[0].bcount) 2939 break; 2940 } 2941 } else { 2942 tlen = sizeof(dseg->bcount) + dlen; 2943 } 2944 /* Inline entire packet, optional VLAN insertion. */ 2945 if (MLX5_TXOFF_CONFIG(VLAN) && 2946 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 2947 /* 2948 * The packet length must be checked in 2949 * mlx5_tx_able_to_empw() and packet 2950 * fits into inline length guaranteed. 2951 */ 2952 MLX5_ASSERT((dlen + 2953 sizeof(struct rte_vlan_hdr)) <= 2954 txq->inlen_empw); 2955 tlen += sizeof(struct rte_vlan_hdr); 2956 if (room < tlen) 2957 break; 2958 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2959 dptr, dlen, olx); 2960 #ifdef MLX5_PMD_SOFT_COUNTERS 2961 /* Update sent data bytes counter. */ 2962 slen += sizeof(struct rte_vlan_hdr); 2963 #endif 2964 } else { 2965 if (room < tlen) 2966 break; 2967 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2968 dptr, dlen, olx); 2969 } 2970 if (!MLX5_TXOFF_CONFIG(MPW)) 2971 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2972 MLX5_ASSERT(room >= tlen); 2973 room -= tlen; 2974 /* 2975 * Packet data are completely inline, 2976 * we can try to free the packet. 2977 */ 2978 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2979 /* 2980 * All the packets from the burst beginning 2981 * are inline, we can free mbufs directly 2982 * from the origin array on tx_burst exit(). 2983 */ 2984 loc->mbuf_free++; 2985 goto next_mbuf; 2986 } 2987 /* 2988 * In order no to call rte_pktmbuf_free_seg() here, 2989 * in the most inner loop (that might be very 2990 * expensive) we just save the mbuf in elts. 2991 */ 2992 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2993 loc->elts_free--; 2994 goto next_mbuf; 2995 pointer_empw: 2996 /* 2997 * No pointer and inline descriptor 2998 * intermix for legacy MPW sessions. 2999 */ 3000 if (MLX5_TXOFF_CONFIG(MPW) && 3001 part != room && 3002 wqem->dseg[0].bcount == RTE_BE32(0)) 3003 break; 3004 /* 3005 * Not inlinable VLAN packets are 3006 * proceeded outside of this routine. 3007 */ 3008 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 3009 if (MLX5_TXOFF_CONFIG(VLAN)) 3010 MLX5_ASSERT(!(loc->mbuf->ol_flags & 3011 PKT_TX_VLAN_PKT)); 3012 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 3013 /* We have to store mbuf in elts.*/ 3014 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 3015 loc->elts_free--; 3016 room -= MLX5_WQE_DSEG_SIZE; 3017 /* Ring buffer wraparound is checked at the loop end.*/ 3018 ++dseg; 3019 next_mbuf: 3020 #ifdef MLX5_PMD_SOFT_COUNTERS 3021 /* Update sent data bytes counter. */ 3022 slen += dlen; 3023 #endif 3024 loc->pkts_sent++; 3025 pkts_n--; 3026 if (unlikely(!pkts_n || !loc->elts_free)) { 3027 /* 3028 * We have no resources/packets to 3029 * continue build descriptors. 3030 */ 3031 part -= room; 3032 mlx5_tx_idone_empw(txq, loc, part, 3033 slen, wqem, olx); 3034 return MLX5_TXCMP_CODE_EXIT; 3035 } 3036 loc->mbuf = *pkts++; 3037 if (likely(pkts_n > 1)) 3038 rte_prefetch0(*pkts); 3039 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3040 /* 3041 * Unroll the completion code to avoid 3042 * returning variable value - it results in 3043 * unoptimized sequent checking in caller. 3044 */ 3045 if (ret == MLX5_TXCMP_CODE_MULTI) { 3046 part -= room; 3047 mlx5_tx_idone_empw(txq, loc, part, 3048 slen, wqem, olx); 3049 if (unlikely(!loc->elts_free || 3050 !loc->wqe_free)) 3051 return MLX5_TXCMP_CODE_EXIT; 3052 return MLX5_TXCMP_CODE_MULTI; 3053 } 3054 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3055 if (ret == MLX5_TXCMP_CODE_TSO) { 3056 part -= room; 3057 mlx5_tx_idone_empw(txq, loc, part, 3058 slen, wqem, olx); 3059 if (unlikely(!loc->elts_free || 3060 !loc->wqe_free)) 3061 return MLX5_TXCMP_CODE_EXIT; 3062 return MLX5_TXCMP_CODE_TSO; 3063 } 3064 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3065 part -= room; 3066 mlx5_tx_idone_empw(txq, loc, part, 3067 slen, wqem, olx); 3068 if (unlikely(!loc->elts_free || 3069 !loc->wqe_free)) 3070 return MLX5_TXCMP_CODE_EXIT; 3071 return MLX5_TXCMP_CODE_SINGLE; 3072 } 3073 if (ret != MLX5_TXCMP_CODE_EMPW) { 3074 MLX5_ASSERT(false); 3075 part -= room; 3076 mlx5_tx_idone_empw(txq, loc, part, 3077 slen, wqem, olx); 3078 return MLX5_TXCMP_CODE_ERROR; 3079 } 3080 /* Check if we have minimal room left. */ 3081 nlim--; 3082 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 3083 break; 3084 /* 3085 * Check whether packet parameters coincide 3086 * within assumed eMPW batch: 3087 * - check sum settings 3088 * - metadata value 3089 * - software parser settings 3090 * - packets length (legacy MPW only) 3091 * - scheduling is not required 3092 */ 3093 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 3094 loc, dlen, olx)) 3095 break; 3096 /* Packet attributes match, continue the same eMPW. */ 3097 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3098 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3099 } 3100 /* 3101 * We get here to close an existing eMPW 3102 * session and start the new one. 3103 */ 3104 MLX5_ASSERT(pkts_n); 3105 part -= room; 3106 if (unlikely(!part)) 3107 return MLX5_TXCMP_CODE_EXIT; 3108 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 3109 if (unlikely(!loc->elts_free || 3110 !loc->wqe_free)) 3111 return MLX5_TXCMP_CODE_EXIT; 3112 /* Continue the loop with new eMPW session. */ 3113 } 3114 MLX5_ASSERT(false); 3115 } 3116 3117 /** 3118 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3119 * Data inlining and VLAN insertion are supported. 3120 */ 3121 static __rte_always_inline enum mlx5_txcmp_code 3122 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3123 struct rte_mbuf **__rte_restrict pkts, 3124 unsigned int pkts_n, 3125 struct mlx5_txq_local *__rte_restrict loc, 3126 unsigned int olx) 3127 { 3128 /* 3129 * Subroutine is the part of mlx5_tx_burst_single() 3130 * and sends single-segment packet with SEND opcode. 3131 */ 3132 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3133 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3134 pkts += loc->pkts_sent + 1; 3135 pkts_n -= loc->pkts_sent; 3136 for (;;) { 3137 struct mlx5_wqe *__rte_restrict wqe; 3138 enum mlx5_txcmp_code ret; 3139 3140 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3141 if (MLX5_TXOFF_CONFIG(TXPP)) { 3142 enum mlx5_txcmp_code wret; 3143 3144 /* Generate WAIT for scheduling if requested. */ 3145 wret = mlx5_tx_schedule_send(txq, loc, olx); 3146 if (wret == MLX5_TXCMP_CODE_EXIT) 3147 return MLX5_TXCMP_CODE_EXIT; 3148 if (wret == MLX5_TXCMP_CODE_ERROR) 3149 return MLX5_TXCMP_CODE_ERROR; 3150 } 3151 if (MLX5_TXOFF_CONFIG(INLINE)) { 3152 unsigned int inlen, vlan = 0; 3153 3154 inlen = rte_pktmbuf_data_len(loc->mbuf); 3155 if (MLX5_TXOFF_CONFIG(VLAN) && 3156 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) { 3157 vlan = sizeof(struct rte_vlan_hdr); 3158 inlen += vlan; 3159 } 3160 /* 3161 * If inlining is enabled at configuration time 3162 * the limit must be not less than minimal size. 3163 * Otherwise we would do extra check for data 3164 * size to avoid crashes due to length overflow. 3165 */ 3166 MLX5_ASSERT(txq->inlen_send >= 3167 MLX5_ESEG_MIN_INLINE_SIZE); 3168 if (inlen <= txq->inlen_send) { 3169 unsigned int seg_n, wqe_n; 3170 3171 rte_prefetch0(rte_pktmbuf_mtod 3172 (loc->mbuf, uint8_t *)); 3173 /* Check against minimal length. */ 3174 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3175 return MLX5_TXCMP_CODE_ERROR; 3176 if (loc->mbuf->ol_flags & 3177 PKT_TX_DYNF_NOINLINE) { 3178 /* 3179 * The hint flag not to inline packet 3180 * data is set. Check whether we can 3181 * follow the hint. 3182 */ 3183 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3184 txq->inlen_mode) || 3185 (MLX5_TXOFF_CONFIG(MPW) && 3186 txq->inlen_mode)) { 3187 if (inlen <= txq->inlen_send) 3188 goto single_inline; 3189 /* 3190 * The hardware requires the 3191 * minimal inline data header. 3192 */ 3193 goto single_min_inline; 3194 } 3195 if (MLX5_TXOFF_CONFIG(VLAN) && 3196 vlan && !txq->vlan_en) { 3197 /* 3198 * We must insert VLAN tag 3199 * by software means. 3200 */ 3201 goto single_part_inline; 3202 } 3203 goto single_no_inline; 3204 } 3205 single_inline: 3206 /* 3207 * Completely inlined packet data WQE: 3208 * - Control Segment, SEND opcode 3209 * - Ethernet Segment, no VLAN insertion 3210 * - Data inlined, VLAN optionally inserted 3211 * - Alignment to MLX5_WSEG_SIZE 3212 * Have to estimate amount of WQEBBs 3213 */ 3214 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3215 MLX5_ESEG_MIN_INLINE_SIZE + 3216 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3217 /* Check if there are enough WQEBBs. */ 3218 wqe_n = (seg_n + 3) / 4; 3219 if (wqe_n > loc->wqe_free) 3220 return MLX5_TXCMP_CODE_EXIT; 3221 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3222 loc->wqe_last = wqe; 3223 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3224 MLX5_OPCODE_SEND, olx); 3225 mlx5_tx_eseg_data(txq, loc, wqe, 3226 vlan, inlen, 0, olx); 3227 txq->wqe_ci += wqe_n; 3228 loc->wqe_free -= wqe_n; 3229 /* 3230 * Packet data are completely inlined, 3231 * free the packet immediately. 3232 */ 3233 rte_pktmbuf_free_seg(loc->mbuf); 3234 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3235 MLX5_TXOFF_CONFIG(MPW)) && 3236 txq->inlen_mode) { 3237 /* 3238 * If minimal inlining is requested the eMPW 3239 * feature should be disabled due to data is 3240 * inlined into Ethernet Segment, which can 3241 * not contain inlined data for eMPW due to 3242 * segment shared for all packets. 3243 */ 3244 struct mlx5_wqe_dseg *__rte_restrict dseg; 3245 unsigned int ds; 3246 uint8_t *dptr; 3247 3248 /* 3249 * The inline-mode settings require 3250 * to inline the specified amount of 3251 * data bytes to the Ethernet Segment. 3252 * We should check the free space in 3253 * WQE ring buffer to inline partially. 3254 */ 3255 single_min_inline: 3256 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3257 MLX5_ASSERT(inlen > txq->inlen_mode); 3258 MLX5_ASSERT(txq->inlen_mode >= 3259 MLX5_ESEG_MIN_INLINE_SIZE); 3260 /* 3261 * Check whether there are enough free WQEBBs: 3262 * - Control Segment 3263 * - Ethernet Segment 3264 * - First Segment of inlined Ethernet data 3265 * - ... data continued ... 3266 * - Finishing Data Segment of pointer type 3267 */ 3268 ds = (MLX5_WQE_CSEG_SIZE + 3269 MLX5_WQE_ESEG_SIZE + 3270 MLX5_WQE_DSEG_SIZE + 3271 txq->inlen_mode - 3272 MLX5_ESEG_MIN_INLINE_SIZE + 3273 MLX5_WQE_DSEG_SIZE + 3274 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3275 if (loc->wqe_free < ((ds + 3) / 4)) 3276 return MLX5_TXCMP_CODE_EXIT; 3277 /* 3278 * Build the ordinary SEND WQE: 3279 * - Control Segment 3280 * - Ethernet Segment, inline inlen_mode bytes 3281 * - Data Segment of pointer type 3282 */ 3283 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3284 loc->wqe_last = wqe; 3285 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3286 MLX5_OPCODE_SEND, olx); 3287 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3288 txq->inlen_mode, 3289 0, olx); 3290 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3291 txq->inlen_mode - vlan; 3292 inlen -= txq->inlen_mode; 3293 mlx5_tx_dseg_ptr(txq, loc, dseg, 3294 dptr, inlen, olx); 3295 /* 3296 * WQE is built, update the loop parameters 3297 * and got to the next packet. 3298 */ 3299 txq->wqe_ci += (ds + 3) / 4; 3300 loc->wqe_free -= (ds + 3) / 4; 3301 /* We have to store mbuf in elts.*/ 3302 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3303 txq->elts[txq->elts_head++ & txq->elts_m] = 3304 loc->mbuf; 3305 --loc->elts_free; 3306 } else { 3307 uint8_t *dptr; 3308 unsigned int dlen; 3309 3310 /* 3311 * Partially inlined packet data WQE, we have 3312 * some space in title WQEBB, we can fill it 3313 * with some packet data. It takes one WQEBB, 3314 * it is available, no extra space check: 3315 * - Control Segment, SEND opcode 3316 * - Ethernet Segment, no VLAN insertion 3317 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3318 * - Data Segment, pointer type 3319 * 3320 * We also get here if VLAN insertion is not 3321 * supported by HW, the inline is enabled. 3322 */ 3323 single_part_inline: 3324 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3325 loc->wqe_last = wqe; 3326 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3327 MLX5_OPCODE_SEND, olx); 3328 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3329 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3330 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3331 /* 3332 * The length check is performed above, by 3333 * comparing with txq->inlen_send. We should 3334 * not get overflow here. 3335 */ 3336 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3337 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3338 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3339 dptr, dlen, olx); 3340 ++txq->wqe_ci; 3341 --loc->wqe_free; 3342 /* We have to store mbuf in elts.*/ 3343 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3344 txq->elts[txq->elts_head++ & txq->elts_m] = 3345 loc->mbuf; 3346 --loc->elts_free; 3347 } 3348 #ifdef MLX5_PMD_SOFT_COUNTERS 3349 /* Update sent data bytes counter. */ 3350 txq->stats.obytes += vlan + 3351 rte_pktmbuf_data_len(loc->mbuf); 3352 #endif 3353 } else { 3354 /* 3355 * No inline at all, it means the CPU cycles saving 3356 * is prioritized at configuration, we should not 3357 * copy any packet data to WQE. 3358 * 3359 * SEND WQE, one WQEBB: 3360 * - Control Segment, SEND opcode 3361 * - Ethernet Segment, optional VLAN, no inline 3362 * - Data Segment, pointer type 3363 */ 3364 single_no_inline: 3365 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3366 loc->wqe_last = wqe; 3367 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3368 MLX5_OPCODE_SEND, olx); 3369 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3370 mlx5_tx_dseg_ptr 3371 (txq, loc, &wqe->dseg[0], 3372 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3373 rte_pktmbuf_data_len(loc->mbuf), olx); 3374 ++txq->wqe_ci; 3375 --loc->wqe_free; 3376 /* 3377 * We should not store mbuf pointer in elts 3378 * if no inlining is configured, this is done 3379 * by calling routine in a batch copy. 3380 */ 3381 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3382 --loc->elts_free; 3383 #ifdef MLX5_PMD_SOFT_COUNTERS 3384 /* Update sent data bytes counter. */ 3385 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3386 if (MLX5_TXOFF_CONFIG(VLAN) && 3387 loc->mbuf->ol_flags & PKT_TX_VLAN_PKT) 3388 txq->stats.obytes += 3389 sizeof(struct rte_vlan_hdr); 3390 #endif 3391 } 3392 ++loc->pkts_sent; 3393 --pkts_n; 3394 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3395 return MLX5_TXCMP_CODE_EXIT; 3396 loc->mbuf = *pkts++; 3397 if (pkts_n > 1) 3398 rte_prefetch0(*pkts); 3399 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3400 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3401 return ret; 3402 } 3403 MLX5_ASSERT(false); 3404 } 3405 3406 static __rte_always_inline enum mlx5_txcmp_code 3407 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3408 struct rte_mbuf **__rte_restrict pkts, 3409 unsigned int pkts_n, 3410 struct mlx5_txq_local *__rte_restrict loc, 3411 unsigned int olx) 3412 { 3413 enum mlx5_txcmp_code ret; 3414 3415 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3416 if (ret == MLX5_TXCMP_CODE_SINGLE) 3417 goto ordinary_send; 3418 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3419 for (;;) { 3420 /* Optimize for inline/no inline eMPW send. */ 3421 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3422 mlx5_tx_burst_empw_inline 3423 (txq, pkts, pkts_n, loc, olx) : 3424 mlx5_tx_burst_empw_simple 3425 (txq, pkts, pkts_n, loc, olx); 3426 if (ret != MLX5_TXCMP_CODE_SINGLE) 3427 return ret; 3428 /* The resources to send one packet should remain. */ 3429 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3430 ordinary_send: 3431 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3432 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3433 if (ret != MLX5_TXCMP_CODE_EMPW) 3434 return ret; 3435 /* The resources to send one packet should remain. */ 3436 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3437 } 3438 } 3439 3440 /** 3441 * DPDK Tx callback template. This is configured template used to generate 3442 * routines optimized for specified offload setup. 3443 * One of this generated functions is chosen at SQ configuration time. 3444 * 3445 * @param txq 3446 * Generic pointer to TX queue structure. 3447 * @param[in] pkts 3448 * Packets to transmit. 3449 * @param pkts_n 3450 * Number of packets in array. 3451 * @param olx 3452 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3453 * values. Should be static to take compile time static configuration 3454 * advantages. 3455 * 3456 * @return 3457 * Number of packets successfully transmitted (<= pkts_n). 3458 */ 3459 static __rte_always_inline uint16_t 3460 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3461 struct rte_mbuf **__rte_restrict pkts, 3462 uint16_t pkts_n, 3463 unsigned int olx) 3464 { 3465 struct mlx5_txq_local loc; 3466 enum mlx5_txcmp_code ret; 3467 unsigned int part; 3468 3469 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3470 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3471 if (unlikely(!pkts_n)) 3472 return 0; 3473 if (MLX5_TXOFF_CONFIG(INLINE)) 3474 loc.mbuf_free = 0; 3475 loc.pkts_sent = 0; 3476 loc.pkts_copy = 0; 3477 loc.wqe_last = NULL; 3478 3479 send_loop: 3480 loc.pkts_loop = loc.pkts_sent; 3481 /* 3482 * Check if there are some CQEs, if any: 3483 * - process an encountered errors 3484 * - process the completed WQEs 3485 * - free related mbufs 3486 * - doorbell the NIC about processed CQEs 3487 */ 3488 rte_prefetch0(*(pkts + loc.pkts_sent)); 3489 mlx5_tx_handle_completion(txq, olx); 3490 /* 3491 * Calculate the number of available resources - elts and WQEs. 3492 * There are two possible different scenarios: 3493 * - no data inlining into WQEs, one WQEBB may contains up to 3494 * four packets, in this case elts become scarce resource 3495 * - data inlining into WQEs, one packet may require multiple 3496 * WQEBBs, the WQEs become the limiting factor. 3497 */ 3498 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3499 loc.elts_free = txq->elts_s - 3500 (uint16_t)(txq->elts_head - txq->elts_tail); 3501 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3502 loc.wqe_free = txq->wqe_s - 3503 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3504 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3505 goto burst_exit; 3506 for (;;) { 3507 /* 3508 * Fetch the packet from array. Usually this is the first 3509 * packet in series of multi/single segment packets. 3510 */ 3511 loc.mbuf = *(pkts + loc.pkts_sent); 3512 /* Dedicated branch for multi-segment packets. */ 3513 if (MLX5_TXOFF_CONFIG(MULTI) && 3514 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3515 /* 3516 * Multi-segment packet encountered. 3517 * Hardware is able to process it only 3518 * with SEND/TSO opcodes, one packet 3519 * per WQE, do it in dedicated routine. 3520 */ 3521 enter_send_multi: 3522 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3523 part = loc.pkts_sent - loc.pkts_copy; 3524 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3525 /* 3526 * There are some single-segment mbufs not 3527 * stored in elts. The mbufs must be in the 3528 * same order as WQEs, so we must copy the 3529 * mbufs to elts here, before the coming 3530 * multi-segment packet mbufs is appended. 3531 */ 3532 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3533 part, olx); 3534 loc.pkts_copy = loc.pkts_sent; 3535 } 3536 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3537 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3538 if (!MLX5_TXOFF_CONFIG(INLINE)) 3539 loc.pkts_copy = loc.pkts_sent; 3540 /* 3541 * These returned code checks are supposed 3542 * to be optimized out due to routine inlining. 3543 */ 3544 if (ret == MLX5_TXCMP_CODE_EXIT) { 3545 /* 3546 * The routine returns this code when 3547 * all packets are sent or there is no 3548 * enough resources to complete request. 3549 */ 3550 break; 3551 } 3552 if (ret == MLX5_TXCMP_CODE_ERROR) { 3553 /* 3554 * The routine returns this code when some error 3555 * in the incoming packets format occurred. 3556 */ 3557 txq->stats.oerrors++; 3558 break; 3559 } 3560 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3561 /* 3562 * The single-segment packet was encountered 3563 * in the array, try to send it with the 3564 * best optimized way, possible engaging eMPW. 3565 */ 3566 goto enter_send_single; 3567 } 3568 if (MLX5_TXOFF_CONFIG(TSO) && 3569 ret == MLX5_TXCMP_CODE_TSO) { 3570 /* 3571 * The single-segment TSO packet was 3572 * encountered in the array. 3573 */ 3574 goto enter_send_tso; 3575 } 3576 /* We must not get here. Something is going wrong. */ 3577 MLX5_ASSERT(false); 3578 txq->stats.oerrors++; 3579 break; 3580 } 3581 /* Dedicated branch for single-segment TSO packets. */ 3582 if (MLX5_TXOFF_CONFIG(TSO) && 3583 unlikely(loc.mbuf->ol_flags & PKT_TX_TCP_SEG)) { 3584 /* 3585 * TSO might require special way for inlining 3586 * (dedicated parameters) and is sent with 3587 * MLX5_OPCODE_TSO opcode only, provide this 3588 * in dedicated branch. 3589 */ 3590 enter_send_tso: 3591 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3592 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3593 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3594 /* 3595 * These returned code checks are supposed 3596 * to be optimized out due to routine inlining. 3597 */ 3598 if (ret == MLX5_TXCMP_CODE_EXIT) 3599 break; 3600 if (ret == MLX5_TXCMP_CODE_ERROR) { 3601 txq->stats.oerrors++; 3602 break; 3603 } 3604 if (ret == MLX5_TXCMP_CODE_SINGLE) 3605 goto enter_send_single; 3606 if (MLX5_TXOFF_CONFIG(MULTI) && 3607 ret == MLX5_TXCMP_CODE_MULTI) { 3608 /* 3609 * The multi-segment packet was 3610 * encountered in the array. 3611 */ 3612 goto enter_send_multi; 3613 } 3614 /* We must not get here. Something is going wrong. */ 3615 MLX5_ASSERT(false); 3616 txq->stats.oerrors++; 3617 break; 3618 } 3619 /* 3620 * The dedicated branch for the single-segment packets 3621 * without TSO. Often these ones can be sent using 3622 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3623 * The routine builds the WQEs till it encounters 3624 * the TSO or multi-segment packet (in case if these 3625 * offloads are requested at SQ configuration time). 3626 */ 3627 enter_send_single: 3628 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3629 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3630 /* 3631 * These returned code checks are supposed 3632 * to be optimized out due to routine inlining. 3633 */ 3634 if (ret == MLX5_TXCMP_CODE_EXIT) 3635 break; 3636 if (ret == MLX5_TXCMP_CODE_ERROR) { 3637 txq->stats.oerrors++; 3638 break; 3639 } 3640 if (MLX5_TXOFF_CONFIG(MULTI) && 3641 ret == MLX5_TXCMP_CODE_MULTI) { 3642 /* 3643 * The multi-segment packet was 3644 * encountered in the array. 3645 */ 3646 goto enter_send_multi; 3647 } 3648 if (MLX5_TXOFF_CONFIG(TSO) && 3649 ret == MLX5_TXCMP_CODE_TSO) { 3650 /* 3651 * The single-segment TSO packet was 3652 * encountered in the array. 3653 */ 3654 goto enter_send_tso; 3655 } 3656 /* We must not get here. Something is going wrong. */ 3657 MLX5_ASSERT(false); 3658 txq->stats.oerrors++; 3659 break; 3660 } 3661 /* 3662 * Main Tx loop is completed, do the rest: 3663 * - set completion request if thresholds are reached 3664 * - doorbell the hardware 3665 * - copy the rest of mbufs to elts (if any) 3666 */ 3667 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3668 loc.pkts_sent >= loc.pkts_copy); 3669 /* Take a shortcut if nothing is sent. */ 3670 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3671 goto burst_exit; 3672 /* Request CQE generation if limits are reached. */ 3673 mlx5_tx_request_completion(txq, &loc, olx); 3674 /* 3675 * Ring QP doorbell immediately after WQE building completion 3676 * to improve latencies. The pure software related data treatment 3677 * can be completed after doorbell. Tx CQEs for this SQ are 3678 * processed in this thread only by the polling. 3679 * 3680 * The rdma core library can map doorbell register in two ways, 3681 * depending on the environment variable "MLX5_SHUT_UP_BF": 3682 * 3683 * - as regular cached memory, the variable is either missing or 3684 * set to zero. This type of mapping may cause the significant 3685 * doorbell register writing latency and requires explicit memory 3686 * write barrier to mitigate this issue and prevent write combining. 3687 * 3688 * - as non-cached memory, the variable is present and set to not "0" 3689 * value. This type of mapping may cause performance impact under 3690 * heavy loading conditions but the explicit write memory barrier is 3691 * not required and it may improve core performance. 3692 * 3693 * - the legacy behaviour (prior 19.08 release) was to use some 3694 * heuristics to decide whether write memory barrier should 3695 * be performed. This behavior is supported with specifying 3696 * tx_db_nc=2, write barrier is skipped if application provides 3697 * the full recommended burst of packets, it supposes the next 3698 * packets are coming and the write barrier will be issued on 3699 * the next burst (after descriptor writing, at least). 3700 */ 3701 mlx5_tx_dbrec_cond_wmb(txq, loc.wqe_last, !txq->db_nc && 3702 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3703 /* Not all of the mbufs may be stored into elts yet. */ 3704 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3705 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3706 /* 3707 * There are some single-segment mbufs not stored in elts. 3708 * It can be only if the last packet was single-segment. 3709 * The copying is gathered into one place due to it is 3710 * a good opportunity to optimize that with SIMD. 3711 * Unfortunately if inlining is enabled the gaps in pointer 3712 * array may happen due to early freeing of the inlined mbufs. 3713 */ 3714 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3715 loc.pkts_copy = loc.pkts_sent; 3716 } 3717 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3718 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3719 if (pkts_n > loc.pkts_sent) { 3720 /* 3721 * If burst size is large there might be no enough CQE 3722 * fetched from completion queue and no enough resources 3723 * freed to send all the packets. 3724 */ 3725 goto send_loop; 3726 } 3727 burst_exit: 3728 #ifdef MLX5_PMD_SOFT_COUNTERS 3729 /* Increment sent packets counter. */ 3730 txq->stats.opackets += loc.pkts_sent; 3731 #endif 3732 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3733 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3734 return loc.pkts_sent; 3735 } 3736 3737 #endif /* RTE_PMD_MLX5_TX_H_ */ 3738