1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common.h> 18 #include <mlx5_common_mr.h> 19 20 #include "mlx5.h" 21 #include "mlx5_autoconf.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t rt_timestamp:1; /* Realtime timestamp format. */ 142 uint16_t wait_on_time:1; /* WQE with timestamp is supported. */ 143 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 144 uint16_t inlen_send; /* Ordinary send data inline size. */ 145 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 146 uint16_t inlen_mode; /* Minimal data length to inline. */ 147 uint32_t qp_num_8s; /* QP number shifted by 8. */ 148 uint64_t offloads; /* Offloads for Tx Queue. */ 149 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 150 struct mlx5_wqe *wqes; /* Work queue. */ 151 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 152 #ifdef RTE_LIBRTE_MLX5_DEBUG 153 uint32_t *fcqs; /* Free completion queue (debug extended). */ 154 #else 155 uint16_t *fcqs; /* Free completion queue. */ 156 #endif 157 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 158 volatile uint32_t *qp_db; /* Work queue doorbell. */ 159 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 160 uint16_t port_id; /* Port ID of device. */ 161 uint16_t idx; /* Queue index. */ 162 uint64_t rt_timemask; /* Scheduling timestamp mask. */ 163 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 164 int32_t ts_offset; /* Timestamp field dynamic offset. */ 165 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 166 struct mlx5_txq_stats stats; /* TX queue counters. */ 167 struct mlx5_txq_stats stats_reset; /* stats on last reset. */ 168 struct mlx5_uar_data uar_data; 169 struct rte_mbuf *elts[0]; 170 /* Storage for queued packets, must be the last field. */ 171 } __rte_cache_aligned; 172 173 /* TX queue control descriptor. */ 174 struct mlx5_txq_ctrl { 175 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 176 uint32_t refcnt; /* Reference counter. */ 177 unsigned int socket; /* CPU socket ID for allocations. */ 178 bool is_hairpin; /* Whether TxQ type is Hairpin. */ 179 unsigned int max_inline_data; /* Max inline data. */ 180 unsigned int max_tso_header; /* Max TSO header size. */ 181 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 182 struct mlx5_priv *priv; /* Back pointer to private data. */ 183 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 184 uint16_t dump_file_n; /* Number of dump files. */ 185 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 186 uint32_t hairpin_status; /* Hairpin binding status. */ 187 struct mlx5_txq_data txq; /* Data path structure. */ 188 /* Must be the last field in the structure, contains elts[]. */ 189 }; 190 191 /* mlx5_txq.c */ 192 193 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 194 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 195 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 196 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 197 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 198 unsigned int socket, const struct rte_eth_txconf *conf); 199 int mlx5_tx_hairpin_queue_setup 200 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 201 const struct rte_eth_hairpin_conf *hairpin_conf); 202 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 203 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 204 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 205 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 206 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 207 uint16_t desc, unsigned int socket, 208 const struct rte_eth_txconf *conf); 209 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 210 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 211 const struct rte_eth_hairpin_conf *hairpin_conf); 212 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 213 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 214 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 215 int mlx5_txq_verify(struct rte_eth_dev *dev); 216 int mlx5_txq_get_sqn(struct mlx5_txq_ctrl *txq); 217 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 218 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 219 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 220 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 221 222 /* mlx5_tx.c */ 223 224 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 225 unsigned int olx __rte_unused); 226 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 227 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 228 struct rte_eth_txq_info *qinfo); 229 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 230 struct rte_eth_burst_mode *mode); 231 232 /* mlx5_tx_empw.c */ 233 234 MLX5_TXOFF_PRE_DECL(full_empw); 235 MLX5_TXOFF_PRE_DECL(none_empw); 236 MLX5_TXOFF_PRE_DECL(md_empw); 237 MLX5_TXOFF_PRE_DECL(mt_empw); 238 MLX5_TXOFF_PRE_DECL(mtsc_empw); 239 MLX5_TXOFF_PRE_DECL(mti_empw); 240 MLX5_TXOFF_PRE_DECL(mtv_empw); 241 MLX5_TXOFF_PRE_DECL(mtiv_empw); 242 MLX5_TXOFF_PRE_DECL(sc_empw); 243 MLX5_TXOFF_PRE_DECL(sci_empw); 244 MLX5_TXOFF_PRE_DECL(scv_empw); 245 MLX5_TXOFF_PRE_DECL(sciv_empw); 246 MLX5_TXOFF_PRE_DECL(i_empw); 247 MLX5_TXOFF_PRE_DECL(v_empw); 248 MLX5_TXOFF_PRE_DECL(iv_empw); 249 250 /* mlx5_tx_nompw.c */ 251 252 MLX5_TXOFF_PRE_DECL(full); 253 MLX5_TXOFF_PRE_DECL(none); 254 MLX5_TXOFF_PRE_DECL(md); 255 MLX5_TXOFF_PRE_DECL(mt); 256 MLX5_TXOFF_PRE_DECL(mtsc); 257 MLX5_TXOFF_PRE_DECL(mti); 258 MLX5_TXOFF_PRE_DECL(mtv); 259 MLX5_TXOFF_PRE_DECL(mtiv); 260 MLX5_TXOFF_PRE_DECL(sc); 261 MLX5_TXOFF_PRE_DECL(sci); 262 MLX5_TXOFF_PRE_DECL(scv); 263 MLX5_TXOFF_PRE_DECL(sciv); 264 MLX5_TXOFF_PRE_DECL(i); 265 MLX5_TXOFF_PRE_DECL(v); 266 MLX5_TXOFF_PRE_DECL(iv); 267 268 /* mlx5_tx_txpp.c */ 269 270 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 271 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 272 MLX5_TXOFF_PRE_DECL(full_ts); 273 MLX5_TXOFF_PRE_DECL(full_ts_noi); 274 MLX5_TXOFF_PRE_DECL(none_ts); 275 MLX5_TXOFF_PRE_DECL(mdi_ts); 276 MLX5_TXOFF_PRE_DECL(mti_ts); 277 MLX5_TXOFF_PRE_DECL(mtiv_ts); 278 279 /* mlx5_tx_mpw.c */ 280 281 MLX5_TXOFF_PRE_DECL(none_mpw); 282 MLX5_TXOFF_PRE_DECL(mci_mpw); 283 MLX5_TXOFF_PRE_DECL(mc_mpw); 284 MLX5_TXOFF_PRE_DECL(i_mpw); 285 286 static __rte_always_inline struct mlx5_uar_data * 287 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 288 { 289 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 290 } 291 292 /** 293 * Ring TX queue doorbell and flush the update by write memory barrier. 294 * 295 * @param txq 296 * Pointer to TX queue structure. 297 * @param wqe 298 * Pointer to the last WQE posted in the NIC. 299 */ 300 static __rte_always_inline void 301 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 302 { 303 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 304 txq->wqe_ci, txq->qp_db, 1); 305 } 306 307 /** 308 * Convert timestamp from mbuf format to linear counter 309 * of Clock Queue completions (24 bits). 310 * 311 * @param sh 312 * Pointer to the device shared context to fetch Tx 313 * packet pacing timestamp and parameters. 314 * @param ts 315 * Timestamp from mbuf to convert. 316 * @return 317 * positive or zero value - completion ID to wait. 318 * negative value - conversion error. 319 */ 320 static __rte_always_inline int32_t 321 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 322 { 323 uint64_t ts, ci; 324 uint32_t tick; 325 326 do { 327 /* 328 * Read atomically two uint64_t fields and compare lsb bits. 329 * It there is no match - the timestamp was updated in 330 * the service thread, data should be re-read. 331 */ 332 rte_compiler_barrier(); 333 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 334 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 335 rte_compiler_barrier(); 336 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 337 break; 338 } while (true); 339 /* Perform the skew correction, positive value to send earlier. */ 340 mts -= sh->txpp.skew; 341 mts -= ts; 342 if (unlikely(mts >= UINT64_MAX / 2)) { 343 /* We have negative integer, mts is in the past. */ 344 __atomic_fetch_add(&sh->txpp.err_ts_past, 345 1, __ATOMIC_RELAXED); 346 return -1; 347 } 348 tick = sh->txpp.tick; 349 MLX5_ASSERT(tick); 350 /* Convert delta to completions, round up. */ 351 mts = (mts + tick - 1) / tick; 352 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 353 /* We have mts is too distant future. */ 354 __atomic_fetch_add(&sh->txpp.err_ts_future, 355 1, __ATOMIC_RELAXED); 356 return -1; 357 } 358 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 359 ci += mts; 360 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 361 return ci; 362 } 363 364 /** 365 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 366 * Flags must be preliminary initialized to zero. 367 * 368 * @param loc 369 * Pointer to burst routine local context. 370 * @param swp_flags 371 * Pointer to store Software Parser flags. 372 * @param olx 373 * Configured Tx offloads mask. It is fully defined at 374 * compile time and may be used for optimization. 375 * 376 * @return 377 * Software Parser offsets packed in dword. 378 * Software Parser flags are set by pointer. 379 */ 380 static __rte_always_inline uint32_t 381 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 382 uint8_t *swp_flags, 383 unsigned int olx) 384 { 385 uint64_t ol, tunnel; 386 unsigned int idx, off; 387 uint32_t set; 388 389 if (!MLX5_TXOFF_CONFIG(SWP)) 390 return 0; 391 ol = loc->mbuf->ol_flags; 392 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 393 /* 394 * Check whether Software Parser is required. 395 * Only customized tunnels may ask for. 396 */ 397 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 398 return 0; 399 /* 400 * The index should have: 401 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 402 * bit[4] = RTE_MBUF_F_TX_IPV6 403 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 404 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 405 */ 406 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 407 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 408 *swp_flags = mlx5_swp_types_table[idx]; 409 /* 410 * Set offsets for SW parser. Since ConnectX-5, SW parser just 411 * complements HW parser. SW parser starts to engage only if HW parser 412 * can't reach a header. For the older devices, HW parser will not kick 413 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 414 * should be set regardless of HW offload. 415 */ 416 off = loc->mbuf->outer_l2_len; 417 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 418 off += sizeof(struct rte_vlan_hdr); 419 set = (off >> 1) << 8; /* Outer L3 offset. */ 420 off += loc->mbuf->outer_l3_len; 421 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 422 set |= off >> 1; /* Outer L4 offset. */ 423 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 424 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 425 off += loc->mbuf->l2_len; 426 set |= (off >> 1) << 24; /* Inner L3 offset. */ 427 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 428 csum == RTE_MBUF_F_TX_UDP_CKSUM || 429 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 430 off += loc->mbuf->l3_len; 431 set |= (off >> 1) << 16; /* Inner L4 offset. */ 432 } 433 } 434 set = rte_cpu_to_le_32(set); 435 return set; 436 } 437 438 /** 439 * Convert the Checksum offloads to Verbs. 440 * 441 * @param buf 442 * Pointer to the mbuf. 443 * 444 * @return 445 * Converted checksum flags. 446 */ 447 static __rte_always_inline uint8_t 448 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 449 { 450 uint32_t idx; 451 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 452 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 453 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 454 455 /* 456 * The index should have: 457 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 458 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 459 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 460 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 461 * bit[9] = tunnel 462 */ 463 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 464 return mlx5_cksum_table[idx]; 465 } 466 467 /** 468 * Free the mbufs from the linear array of pointers. 469 * 470 * @param txq 471 * Pointer to Tx queue structure. 472 * @param pkts 473 * Pointer to array of packets to be free. 474 * @param pkts_n 475 * Number of packets to be freed. 476 * @param olx 477 * Configured Tx offloads mask. It is fully defined at 478 * compile time and may be used for optimization. 479 */ 480 static __rte_always_inline void 481 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 482 struct rte_mbuf **__rte_restrict pkts, 483 unsigned int pkts_n, 484 unsigned int olx __rte_unused) 485 { 486 struct rte_mempool *pool = NULL; 487 struct rte_mbuf **p_free = NULL; 488 struct rte_mbuf *mbuf; 489 unsigned int n_free = 0; 490 491 /* 492 * The implemented algorithm eliminates 493 * copying pointers to temporary array 494 * for rte_mempool_put_bulk() calls. 495 */ 496 MLX5_ASSERT(pkts); 497 MLX5_ASSERT(pkts_n); 498 /* 499 * Free mbufs directly to the pool in bulk 500 * if fast free offload is engaged 501 */ 502 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 503 mbuf = *pkts; 504 pool = mbuf->pool; 505 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 506 return; 507 } 508 for (;;) { 509 for (;;) { 510 /* 511 * Decrement mbuf reference counter, detach 512 * indirect and external buffers if needed. 513 */ 514 mbuf = rte_pktmbuf_prefree_seg(*pkts); 515 if (likely(mbuf != NULL)) { 516 MLX5_ASSERT(mbuf == *pkts); 517 if (likely(n_free != 0)) { 518 if (unlikely(pool != mbuf->pool)) 519 /* From different pool. */ 520 break; 521 } else { 522 /* Start new scan array. */ 523 pool = mbuf->pool; 524 p_free = pkts; 525 } 526 ++n_free; 527 ++pkts; 528 --pkts_n; 529 if (unlikely(pkts_n == 0)) { 530 mbuf = NULL; 531 break; 532 } 533 } else { 534 /* 535 * This happens if mbuf is still referenced. 536 * We can't put it back to the pool, skip. 537 */ 538 ++pkts; 539 --pkts_n; 540 if (unlikely(n_free != 0)) 541 /* There is some array to free.*/ 542 break; 543 if (unlikely(pkts_n == 0)) 544 /* Last mbuf, nothing to free. */ 545 return; 546 } 547 } 548 for (;;) { 549 /* 550 * This loop is implemented to avoid multiple 551 * inlining of rte_mempool_put_bulk(). 552 */ 553 MLX5_ASSERT(pool); 554 MLX5_ASSERT(p_free); 555 MLX5_ASSERT(n_free); 556 /* 557 * Free the array of pre-freed mbufs 558 * belonging to the same memory pool. 559 */ 560 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 561 if (unlikely(mbuf != NULL)) { 562 /* There is the request to start new scan. */ 563 pool = mbuf->pool; 564 p_free = pkts++; 565 n_free = 1; 566 --pkts_n; 567 if (likely(pkts_n != 0)) 568 break; 569 /* 570 * This is the last mbuf to be freed. 571 * Do one more loop iteration to complete. 572 * This is rare case of the last unique mbuf. 573 */ 574 mbuf = NULL; 575 continue; 576 } 577 if (likely(pkts_n == 0)) 578 return; 579 n_free = 0; 580 break; 581 } 582 } 583 } 584 585 /** 586 * No inline version to free buffers for optimal call 587 * on the tx_burst completion. 588 */ 589 static __rte_noinline void 590 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 591 struct rte_mbuf **__rte_restrict pkts, 592 unsigned int pkts_n, 593 unsigned int olx __rte_unused) 594 { 595 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 596 } 597 598 /** 599 * Free the mbuf from the elts ring buffer till new tail. 600 * 601 * @param txq 602 * Pointer to Tx queue structure. 603 * @param tail 604 * Index in elts to free up to, becomes new elts tail. 605 * @param olx 606 * Configured Tx offloads mask. It is fully defined at 607 * compile time and may be used for optimization. 608 */ 609 static __rte_always_inline void 610 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 611 uint16_t tail, 612 unsigned int olx __rte_unused) 613 { 614 uint16_t n_elts = tail - txq->elts_tail; 615 616 MLX5_ASSERT(n_elts); 617 MLX5_ASSERT(n_elts <= txq->elts_s); 618 /* 619 * Implement a loop to support ring buffer wraparound 620 * with single inlining of mlx5_tx_free_mbuf(). 621 */ 622 do { 623 unsigned int part; 624 625 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 626 part = RTE_MIN(part, n_elts); 627 MLX5_ASSERT(part); 628 MLX5_ASSERT(part <= txq->elts_s); 629 mlx5_tx_free_mbuf(txq, 630 &txq->elts[txq->elts_tail & txq->elts_m], 631 part, olx); 632 txq->elts_tail += part; 633 n_elts -= part; 634 } while (n_elts); 635 } 636 637 /** 638 * Store the mbuf being sent into elts ring buffer. 639 * On Tx completion these mbufs will be freed. 640 * 641 * @param txq 642 * Pointer to Tx queue structure. 643 * @param pkts 644 * Pointer to array of packets to be stored. 645 * @param pkts_n 646 * Number of packets to be stored. 647 * @param olx 648 * Configured Tx offloads mask. It is fully defined at 649 * compile time and may be used for optimization. 650 */ 651 static __rte_always_inline void 652 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 653 struct rte_mbuf **__rte_restrict pkts, 654 unsigned int pkts_n, 655 unsigned int olx __rte_unused) 656 { 657 unsigned int part; 658 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 659 660 MLX5_ASSERT(pkts); 661 MLX5_ASSERT(pkts_n); 662 part = txq->elts_s - (txq->elts_head & txq->elts_m); 663 MLX5_ASSERT(part); 664 MLX5_ASSERT(part <= txq->elts_s); 665 /* This code is a good candidate for vectorizing with SIMD. */ 666 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 667 (void *)pkts, 668 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 669 txq->elts_head += pkts_n; 670 if (unlikely(part < pkts_n)) 671 /* The copy is wrapping around the elts array. */ 672 rte_memcpy((void *)elts, (void *)(pkts + part), 673 (pkts_n - part) * sizeof(struct rte_mbuf *)); 674 } 675 676 /** 677 * Check if the completion request flag should be set in the last WQE. 678 * Both pushed mbufs and WQEs are monitored and the completion request 679 * flag is set if any of thresholds is reached. 680 * 681 * @param txq 682 * Pointer to TX queue structure. 683 * @param loc 684 * Pointer to burst routine local context. 685 * @param olx 686 * Configured Tx offloads mask. It is fully defined at 687 * compile time and may be used for optimization. 688 */ 689 static __rte_always_inline void 690 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 691 struct mlx5_txq_local *__rte_restrict loc, 692 unsigned int olx) 693 { 694 uint16_t head = txq->elts_head; 695 unsigned int part; 696 697 part = MLX5_TXOFF_CONFIG(INLINE) ? 698 0 : loc->pkts_sent - loc->pkts_copy; 699 head += part; 700 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 701 (MLX5_TXOFF_CONFIG(INLINE) && 702 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 703 volatile struct mlx5_wqe *last = loc->wqe_last; 704 705 MLX5_ASSERT(last); 706 txq->elts_comp = head; 707 if (MLX5_TXOFF_CONFIG(INLINE)) 708 txq->wqe_comp = txq->wqe_ci; 709 /* Request unconditional completion on last WQE. */ 710 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 711 MLX5_COMP_MODE_OFFSET); 712 /* Save elts_head in dedicated free on completion queue. */ 713 #ifdef RTE_LIBRTE_MLX5_DEBUG 714 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 715 (last->cseg.opcode >> 8) << 16; 716 #else 717 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 718 #endif 719 /* A CQE slot must always be available. */ 720 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 721 } 722 } 723 724 /** 725 * Build the Control Segment with specified opcode: 726 * - MLX5_OPCODE_SEND 727 * - MLX5_OPCODE_ENHANCED_MPSW 728 * - MLX5_OPCODE_TSO 729 * 730 * @param txq 731 * Pointer to TX queue structure. 732 * @param loc 733 * Pointer to burst routine local context. 734 * @param wqe 735 * Pointer to WQE to fill with built Control Segment. 736 * @param ds 737 * Supposed length of WQE in segments. 738 * @param opcode 739 * SQ WQE opcode to put into Control Segment. 740 * @param olx 741 * Configured Tx offloads mask. It is fully defined at 742 * compile time and may be used for optimization. 743 */ 744 static __rte_always_inline void 745 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 746 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 747 struct mlx5_wqe *__rte_restrict wqe, 748 unsigned int ds, 749 unsigned int opcode, 750 unsigned int olx __rte_unused) 751 { 752 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 753 754 /* For legacy MPW replace the EMPW by TSO with modifier. */ 755 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 756 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 757 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 758 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 759 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 760 MLX5_COMP_MODE_OFFSET); 761 cs->misc = RTE_BE32(0); 762 } 763 764 /** 765 * Build the Synchronize Queue Segment with specified completion index. 766 * 767 * @param txq 768 * Pointer to TX queue structure. 769 * @param loc 770 * Pointer to burst routine local context. 771 * @param wqe 772 * Pointer to WQE to fill with built Control Segment. 773 * @param wci 774 * Completion index in Clock Queue to wait. 775 * @param olx 776 * Configured Tx offloads mask. It is fully defined at 777 * compile time and may be used for optimization. 778 */ 779 static __rte_always_inline void 780 mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq, 781 struct mlx5_txq_local *restrict loc __rte_unused, 782 struct mlx5_wqe *restrict wqe, 783 unsigned int wci, 784 unsigned int olx __rte_unused) 785 { 786 struct mlx5_wqe_qseg *qs; 787 788 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 789 qs->max_index = rte_cpu_to_be_32(wci); 790 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 791 qs->reserved0 = RTE_BE32(0); 792 qs->reserved1 = RTE_BE32(0); 793 } 794 795 /** 796 * Build the Wait on Time Segment with specified timestamp value. 797 * 798 * @param txq 799 * Pointer to TX queue structure. 800 * @param loc 801 * Pointer to burst routine local context. 802 * @param wqe 803 * Pointer to WQE to fill with built Control Segment. 804 * @param ts 805 * Timesatmp value to wait. 806 * @param olx 807 * Configured Tx offloads mask. It is fully defined at 808 * compile time and may be used for optimization. 809 */ 810 static __rte_always_inline void 811 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 812 struct mlx5_txq_local *restrict loc __rte_unused, 813 struct mlx5_wqe *restrict wqe, 814 uint64_t ts, 815 unsigned int olx __rte_unused) 816 { 817 struct mlx5_wqe_wseg *ws; 818 819 ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 820 ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_SMALLER); 821 ws->lkey = RTE_BE32(0); 822 ws->va_high = RTE_BE32(0); 823 ws->va_low = RTE_BE32(0); 824 if (txq->rt_timestamp) { 825 ts = ts % (uint64_t)NS_PER_S 826 | (ts / (uint64_t)NS_PER_S) << 32; 827 } 828 ws->value = rte_cpu_to_be_64(ts); 829 ws->mask = txq->rt_timemask; 830 } 831 832 /** 833 * Build the Ethernet Segment without inlined data. 834 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 835 * 836 * @param txq 837 * Pointer to TX queue structure. 838 * @param loc 839 * Pointer to burst routine local context. 840 * @param wqe 841 * Pointer to WQE to fill with built Ethernet Segment. 842 * @param olx 843 * Configured Tx offloads mask. It is fully defined at 844 * compile time and may be used for optimization. 845 */ 846 static __rte_always_inline void 847 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 848 struct mlx5_txq_local *__rte_restrict loc, 849 struct mlx5_wqe *__rte_restrict wqe, 850 unsigned int olx) 851 { 852 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 853 uint32_t csum; 854 855 /* 856 * Calculate and set check sum flags first, dword field 857 * in segment may be shared with Software Parser flags. 858 */ 859 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 860 es->flags = rte_cpu_to_le_32(csum); 861 /* 862 * Calculate and set Software Parser offsets and flags. 863 * These flags a set for custom UDP and IP tunnel packets. 864 */ 865 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 866 /* Fill metadata field if needed. */ 867 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 868 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 869 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 870 0 : 0; 871 /* Engage VLAN tag insertion feature if requested. */ 872 if (MLX5_TXOFF_CONFIG(VLAN) && 873 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 874 /* 875 * We should get here only if device support 876 * this feature correctly. 877 */ 878 MLX5_ASSERT(txq->vlan_en); 879 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 880 loc->mbuf->vlan_tci); 881 } else { 882 es->inline_hdr = RTE_BE32(0); 883 } 884 } 885 886 /** 887 * Build the Ethernet Segment with minimal inlined data 888 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 889 * used to fill the gap in single WQEBB WQEs. 890 * Supports Software Parser, Checksums and VLAN 891 * insertion Tx offload features. 892 * 893 * @param txq 894 * Pointer to TX queue structure. 895 * @param loc 896 * Pointer to burst routine local context. 897 * @param wqe 898 * Pointer to WQE to fill with built Ethernet Segment. 899 * @param vlan 900 * Length of VLAN tag insertion if any. 901 * @param olx 902 * Configured Tx offloads mask. It is fully defined at 903 * compile time and may be used for optimization. 904 */ 905 static __rte_always_inline void 906 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 907 struct mlx5_txq_local *__rte_restrict loc, 908 struct mlx5_wqe *__rte_restrict wqe, 909 unsigned int vlan, 910 unsigned int olx) 911 { 912 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 913 uint32_t csum; 914 uint8_t *psrc, *pdst; 915 916 /* 917 * Calculate and set check sum flags first, dword field 918 * in segment may be shared with Software Parser flags. 919 */ 920 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 921 es->flags = rte_cpu_to_le_32(csum); 922 /* 923 * Calculate and set Software Parser offsets and flags. 924 * These flags a set for custom UDP and IP tunnel packets. 925 */ 926 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 927 /* Fill metadata field if needed. */ 928 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 929 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 930 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 931 0 : 0; 932 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 933 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 934 es->inline_data = *(unaligned_uint16_t *)psrc; 935 psrc += sizeof(uint16_t); 936 pdst = (uint8_t *)(es + 1); 937 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 938 /* Implement VLAN tag insertion as part inline data. */ 939 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 940 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 941 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 942 /* Insert VLAN ethertype + VLAN tag. */ 943 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 944 ((RTE_ETHER_TYPE_VLAN << 16) | 945 loc->mbuf->vlan_tci); 946 pdst += sizeof(struct rte_vlan_hdr); 947 /* Copy the rest two bytes from packet data. */ 948 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 949 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 950 } else { 951 /* Fill the gap in the title WQEBB with inline data. */ 952 rte_mov16(pdst, psrc); 953 } 954 } 955 956 /** 957 * Build the Ethernet Segment with entire packet data inlining. Checks the 958 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 959 * Checksums and VLAN insertion Tx offload features. 960 * 961 * @param txq 962 * Pointer to TX queue structure. 963 * @param loc 964 * Pointer to burst routine local context. 965 * @param wqe 966 * Pointer to WQE to fill with built Ethernet Segment. 967 * @param vlan 968 * Length of VLAN tag insertion if any. 969 * @param inlen 970 * Length of data to inline (VLAN included, if any). 971 * @param tso 972 * TSO flag, set mss field from the packet. 973 * @param olx 974 * Configured Tx offloads mask. It is fully defined at 975 * compile time and may be used for optimization. 976 * 977 * @return 978 * Pointer to the next Data Segment (aligned and wrapped around). 979 */ 980 static __rte_always_inline struct mlx5_wqe_dseg * 981 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 982 struct mlx5_txq_local *__rte_restrict loc, 983 struct mlx5_wqe *__rte_restrict wqe, 984 unsigned int vlan, 985 unsigned int inlen, 986 unsigned int tso, 987 unsigned int olx) 988 { 989 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 990 uint32_t csum; 991 uint8_t *psrc, *pdst; 992 unsigned int part; 993 994 /* 995 * Calculate and set check sum flags first, dword field 996 * in segment may be shared with Software Parser flags. 997 */ 998 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 999 if (tso) { 1000 csum <<= 24; 1001 csum |= loc->mbuf->tso_segsz; 1002 es->flags = rte_cpu_to_be_32(csum); 1003 } else { 1004 es->flags = rte_cpu_to_le_32(csum); 1005 } 1006 /* 1007 * Calculate and set Software Parser offsets and flags. 1008 * These flags a set for custom UDP and IP tunnel packets. 1009 */ 1010 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1011 /* Fill metadata field if needed. */ 1012 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1013 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1014 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1015 0 : 0; 1016 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1017 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 1018 es->inline_data = *(unaligned_uint16_t *)psrc; 1019 psrc += sizeof(uint16_t); 1020 pdst = (uint8_t *)(es + 1); 1021 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1022 /* Implement VLAN tag insertion as part inline data. */ 1023 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1024 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1025 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1026 /* Insert VLAN ethertype + VLAN tag. */ 1027 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1028 ((RTE_ETHER_TYPE_VLAN << 16) | 1029 loc->mbuf->vlan_tci); 1030 pdst += sizeof(struct rte_vlan_hdr); 1031 /* Copy the rest two bytes from packet data. */ 1032 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1033 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1034 psrc += sizeof(uint16_t); 1035 } else { 1036 /* Fill the gap in the title WQEBB with inline data. */ 1037 rte_mov16(pdst, psrc); 1038 psrc += sizeof(rte_v128u32_t); 1039 } 1040 pdst = (uint8_t *)(es + 2); 1041 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1042 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1043 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1044 if (!inlen) { 1045 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1046 return (struct mlx5_wqe_dseg *)pdst; 1047 } 1048 /* 1049 * The WQEBB space availability is checked by caller. 1050 * Here we should be aware of WQE ring buffer wraparound only. 1051 */ 1052 part = (uint8_t *)txq->wqes_end - pdst; 1053 part = RTE_MIN(part, inlen); 1054 do { 1055 rte_memcpy(pdst, psrc, part); 1056 inlen -= part; 1057 if (likely(!inlen)) { 1058 /* 1059 * If return value is not used by the caller 1060 * the code below will be optimized out. 1061 */ 1062 pdst += part; 1063 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1064 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1065 pdst = (uint8_t *)txq->wqes; 1066 return (struct mlx5_wqe_dseg *)pdst; 1067 } 1068 pdst = (uint8_t *)txq->wqes; 1069 psrc += part; 1070 part = inlen; 1071 } while (true); 1072 } 1073 1074 /** 1075 * Copy data from chain of mbuf to the specified linear buffer. 1076 * Checksums and VLAN insertion Tx offload features. If data 1077 * from some mbuf copied completely this mbuf is freed. Local 1078 * structure is used to keep the byte stream state. 1079 * 1080 * @param pdst 1081 * Pointer to the destination linear buffer. 1082 * @param loc 1083 * Pointer to burst routine local context. 1084 * @param len 1085 * Length of data to be copied. 1086 * @param must 1087 * Length of data to be copied ignoring no inline hint. 1088 * @param olx 1089 * Configured Tx offloads mask. It is fully defined at 1090 * compile time and may be used for optimization. 1091 * 1092 * @return 1093 * Number of actual copied data bytes. This is always greater than or 1094 * equal to must parameter and might be lesser than len in no inline 1095 * hint flag is encountered. 1096 */ 1097 static __rte_always_inline unsigned int 1098 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1099 struct mlx5_txq_local *__rte_restrict loc, 1100 unsigned int len, 1101 unsigned int must, 1102 unsigned int olx __rte_unused) 1103 { 1104 struct rte_mbuf *mbuf; 1105 unsigned int part, dlen, copy = 0; 1106 uint8_t *psrc; 1107 1108 MLX5_ASSERT(len); 1109 do { 1110 /* Allow zero length packets, must check first. */ 1111 dlen = rte_pktmbuf_data_len(loc->mbuf); 1112 if (dlen <= loc->mbuf_off) { 1113 /* Exhausted packet, just free. */ 1114 mbuf = loc->mbuf; 1115 loc->mbuf = mbuf->next; 1116 rte_pktmbuf_free_seg(mbuf); 1117 loc->mbuf_off = 0; 1118 MLX5_ASSERT(loc->mbuf_nseg > 1); 1119 MLX5_ASSERT(loc->mbuf); 1120 --loc->mbuf_nseg; 1121 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1122 unsigned int diff; 1123 1124 if (copy >= must) { 1125 /* 1126 * We already copied the minimal 1127 * requested amount of data. 1128 */ 1129 return copy; 1130 } 1131 diff = must - copy; 1132 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1133 /* 1134 * Copy only the minimal required 1135 * part of the data buffer. Limit amount 1136 * of data to be copied to the length of 1137 * available space. 1138 */ 1139 len = RTE_MIN(len, diff); 1140 } 1141 } 1142 continue; 1143 } 1144 dlen -= loc->mbuf_off; 1145 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1146 loc->mbuf_off); 1147 part = RTE_MIN(len, dlen); 1148 rte_memcpy(pdst, psrc, part); 1149 copy += part; 1150 loc->mbuf_off += part; 1151 len -= part; 1152 if (!len) { 1153 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1154 loc->mbuf_off = 0; 1155 /* Exhausted packet, just free. */ 1156 mbuf = loc->mbuf; 1157 loc->mbuf = mbuf->next; 1158 rte_pktmbuf_free_seg(mbuf); 1159 loc->mbuf_off = 0; 1160 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1161 --loc->mbuf_nseg; 1162 } 1163 return copy; 1164 } 1165 pdst += part; 1166 } while (true); 1167 } 1168 1169 /** 1170 * Build the Ethernet Segment with inlined data from multi-segment packet. 1171 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1172 * Parser, Checksums and VLAN insertion Tx offload features. 1173 * 1174 * @param txq 1175 * Pointer to TX queue structure. 1176 * @param loc 1177 * Pointer to burst routine local context. 1178 * @param wqe 1179 * Pointer to WQE to fill with built Ethernet Segment. 1180 * @param vlan 1181 * Length of VLAN tag insertion if any. 1182 * @param inlen 1183 * Length of data to inline (VLAN included, if any). 1184 * @param tso 1185 * TSO flag, set mss field from the packet. 1186 * @param olx 1187 * Configured Tx offloads mask. It is fully defined at 1188 * compile time and may be used for optimization. 1189 * 1190 * @return 1191 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1192 * around - caller should do wrapping check on its own). 1193 */ 1194 static __rte_always_inline struct mlx5_wqe_dseg * 1195 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1196 struct mlx5_txq_local *__rte_restrict loc, 1197 struct mlx5_wqe *__rte_restrict wqe, 1198 unsigned int vlan, 1199 unsigned int inlen, 1200 unsigned int tso, 1201 unsigned int olx) 1202 { 1203 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1204 uint32_t csum; 1205 uint8_t *pdst; 1206 unsigned int part, tlen = 0; 1207 1208 /* 1209 * Calculate and set check sum flags first, uint32_t field 1210 * in segment may be shared with Software Parser flags. 1211 */ 1212 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1213 if (tso) { 1214 csum <<= 24; 1215 csum |= loc->mbuf->tso_segsz; 1216 es->flags = rte_cpu_to_be_32(csum); 1217 } else { 1218 es->flags = rte_cpu_to_le_32(csum); 1219 } 1220 /* 1221 * Calculate and set Software Parser offsets and flags. 1222 * These flags a set for custom UDP and IP tunnel packets. 1223 */ 1224 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1225 /* Fill metadata field if needed. */ 1226 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1227 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1228 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1229 0 : 0; 1230 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1231 pdst = (uint8_t *)&es->inline_data; 1232 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1233 /* Implement VLAN tag insertion as part inline data. */ 1234 mlx5_tx_mseg_memcpy(pdst, loc, 1235 2 * RTE_ETHER_ADDR_LEN, 1236 2 * RTE_ETHER_ADDR_LEN, olx); 1237 pdst += 2 * RTE_ETHER_ADDR_LEN; 1238 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1239 ((RTE_ETHER_TYPE_VLAN << 16) | 1240 loc->mbuf->vlan_tci); 1241 pdst += sizeof(struct rte_vlan_hdr); 1242 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1243 } 1244 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1245 /* 1246 * The WQEBB space availability is checked by caller. 1247 * Here we should be aware of WQE ring buffer wraparound only. 1248 */ 1249 part = (uint8_t *)txq->wqes_end - pdst; 1250 part = RTE_MIN(part, inlen - tlen); 1251 MLX5_ASSERT(part); 1252 do { 1253 unsigned int copy; 1254 1255 /* 1256 * Copying may be interrupted inside the routine 1257 * if run into no inline hint flag. 1258 */ 1259 copy = tso ? inlen : txq->inlen_mode; 1260 copy = tlen >= copy ? 0 : (copy - tlen); 1261 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1262 tlen += copy; 1263 if (likely(inlen <= tlen) || copy < part) { 1264 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1265 pdst += copy; 1266 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1267 return (struct mlx5_wqe_dseg *)pdst; 1268 } 1269 pdst = (uint8_t *)txq->wqes; 1270 part = inlen - tlen; 1271 } while (true); 1272 } 1273 1274 /** 1275 * Build the Data Segment of pointer type. 1276 * 1277 * @param txq 1278 * Pointer to TX queue structure. 1279 * @param loc 1280 * Pointer to burst routine local context. 1281 * @param dseg 1282 * Pointer to WQE to fill with built Data Segment. 1283 * @param buf 1284 * Data buffer to point. 1285 * @param len 1286 * Data buffer length. 1287 * @param olx 1288 * Configured Tx offloads mask. It is fully defined at 1289 * compile time and may be used for optimization. 1290 */ 1291 static __rte_always_inline void 1292 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1293 struct mlx5_txq_local *__rte_restrict loc, 1294 struct mlx5_wqe_dseg *__rte_restrict dseg, 1295 uint8_t *buf, 1296 unsigned int len, 1297 unsigned int olx __rte_unused) 1298 1299 { 1300 MLX5_ASSERT(len); 1301 dseg->bcount = rte_cpu_to_be_32(len); 1302 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1303 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1304 } 1305 1306 /** 1307 * Build the Data Segment of pointer type or inline if data length is less than 1308 * buffer in minimal Data Segment size. 1309 * 1310 * @param txq 1311 * Pointer to TX queue structure. 1312 * @param loc 1313 * Pointer to burst routine local context. 1314 * @param dseg 1315 * Pointer to WQE to fill with built Data Segment. 1316 * @param buf 1317 * Data buffer to point. 1318 * @param len 1319 * Data buffer length. 1320 * @param olx 1321 * Configured Tx offloads mask. It is fully defined at 1322 * compile time and may be used for optimization. 1323 */ 1324 static __rte_always_inline void 1325 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1326 struct mlx5_txq_local *__rte_restrict loc, 1327 struct mlx5_wqe_dseg *__rte_restrict dseg, 1328 uint8_t *buf, 1329 unsigned int len, 1330 unsigned int olx __rte_unused) 1331 1332 { 1333 uintptr_t dst, src; 1334 1335 MLX5_ASSERT(len); 1336 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1337 dseg->bcount = rte_cpu_to_be_32(len); 1338 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1339 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1340 1341 return; 1342 } 1343 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1344 /* Unrolled implementation of generic rte_memcpy. */ 1345 dst = (uintptr_t)&dseg->inline_data[0]; 1346 src = (uintptr_t)buf; 1347 if (len & 0x08) { 1348 #ifdef RTE_ARCH_STRICT_ALIGN 1349 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1350 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1351 dst += sizeof(uint32_t); 1352 src += sizeof(uint32_t); 1353 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1354 dst += sizeof(uint32_t); 1355 src += sizeof(uint32_t); 1356 #else 1357 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1358 dst += sizeof(uint64_t); 1359 src += sizeof(uint64_t); 1360 #endif 1361 } 1362 if (len & 0x04) { 1363 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1364 dst += sizeof(uint32_t); 1365 src += sizeof(uint32_t); 1366 } 1367 if (len & 0x02) { 1368 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1369 dst += sizeof(uint16_t); 1370 src += sizeof(uint16_t); 1371 } 1372 if (len & 0x01) 1373 *(uint8_t *)dst = *(uint8_t *)src; 1374 } 1375 1376 /** 1377 * Build the Data Segment of inlined data from single 1378 * segment packet, no VLAN insertion. 1379 * 1380 * @param txq 1381 * Pointer to TX queue structure. 1382 * @param loc 1383 * Pointer to burst routine local context. 1384 * @param dseg 1385 * Pointer to WQE to fill with built Data Segment. 1386 * @param buf 1387 * Data buffer to point. 1388 * @param len 1389 * Data buffer length. 1390 * @param olx 1391 * Configured Tx offloads mask. It is fully defined at 1392 * compile time and may be used for optimization. 1393 * 1394 * @return 1395 * Pointer to the next Data Segment after inlined data. 1396 * Ring buffer wraparound check is needed. We do not do it here because it 1397 * may not be needed for the last packet in the eMPW session. 1398 */ 1399 static __rte_always_inline struct mlx5_wqe_dseg * 1400 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1401 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1402 struct mlx5_wqe_dseg *__rte_restrict dseg, 1403 uint8_t *buf, 1404 unsigned int len, 1405 unsigned int olx __rte_unused) 1406 { 1407 unsigned int part; 1408 uint8_t *pdst; 1409 1410 if (!MLX5_TXOFF_CONFIG(MPW)) { 1411 /* Store the descriptor byte counter for eMPW sessions. */ 1412 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1413 pdst = &dseg->inline_data[0]; 1414 } else { 1415 /* The entire legacy MPW session counter is stored on close. */ 1416 pdst = (uint8_t *)dseg; 1417 } 1418 /* 1419 * The WQEBB space availability is checked by caller. 1420 * Here we should be aware of WQE ring buffer wraparound only. 1421 */ 1422 part = (uint8_t *)txq->wqes_end - pdst; 1423 part = RTE_MIN(part, len); 1424 do { 1425 rte_memcpy(pdst, buf, part); 1426 len -= part; 1427 if (likely(!len)) { 1428 pdst += part; 1429 if (!MLX5_TXOFF_CONFIG(MPW)) 1430 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1431 /* Note: no final wraparound check here. */ 1432 return (struct mlx5_wqe_dseg *)pdst; 1433 } 1434 pdst = (uint8_t *)txq->wqes; 1435 buf += part; 1436 part = len; 1437 } while (true); 1438 } 1439 1440 /** 1441 * Build the Data Segment of inlined data from single 1442 * segment packet with VLAN insertion. 1443 * 1444 * @param txq 1445 * Pointer to TX queue structure. 1446 * @param loc 1447 * Pointer to burst routine local context. 1448 * @param dseg 1449 * Pointer to the dseg fill with built Data Segment. 1450 * @param buf 1451 * Data buffer to point. 1452 * @param len 1453 * Data buffer length. 1454 * @param olx 1455 * Configured Tx offloads mask. It is fully defined at 1456 * compile time and may be used for optimization. 1457 * 1458 * @return 1459 * Pointer to the next Data Segment after inlined data. 1460 * Ring buffer wraparound check is needed. 1461 */ 1462 static __rte_always_inline struct mlx5_wqe_dseg * 1463 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1464 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1465 struct mlx5_wqe_dseg *__rte_restrict dseg, 1466 uint8_t *buf, 1467 unsigned int len, 1468 unsigned int olx __rte_unused) 1469 1470 { 1471 unsigned int part; 1472 uint8_t *pdst; 1473 1474 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1475 if (!MLX5_TXOFF_CONFIG(MPW)) { 1476 /* Store the descriptor byte counter for eMPW sessions. */ 1477 dseg->bcount = rte_cpu_to_be_32 1478 ((len + sizeof(struct rte_vlan_hdr)) | 1479 MLX5_ETH_WQE_DATA_INLINE); 1480 pdst = &dseg->inline_data[0]; 1481 } else { 1482 /* The entire legacy MPW session counter is stored on close. */ 1483 pdst = (uint8_t *)dseg; 1484 } 1485 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1486 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1487 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1488 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1489 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1490 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1491 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1492 pdst = (uint8_t *)txq->wqes; 1493 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1494 loc->mbuf->vlan_tci); 1495 pdst += sizeof(struct rte_vlan_hdr); 1496 /* 1497 * The WQEBB space availability is checked by caller. 1498 * Here we should be aware of WQE ring buffer wraparound only. 1499 */ 1500 part = (uint8_t *)txq->wqes_end - pdst; 1501 part = RTE_MIN(part, len); 1502 do { 1503 rte_memcpy(pdst, buf, part); 1504 len -= part; 1505 if (likely(!len)) { 1506 pdst += part; 1507 if (!MLX5_TXOFF_CONFIG(MPW)) 1508 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1509 /* Note: no final wraparound check here. */ 1510 return (struct mlx5_wqe_dseg *)pdst; 1511 } 1512 pdst = (uint8_t *)txq->wqes; 1513 buf += part; 1514 part = len; 1515 } while (true); 1516 } 1517 1518 /** 1519 * Build the Ethernet Segment with optionally inlined data with 1520 * VLAN insertion and following Data Segments (if any) from 1521 * multi-segment packet. Used by ordinary send and TSO. 1522 * 1523 * @param txq 1524 * Pointer to TX queue structure. 1525 * @param loc 1526 * Pointer to burst routine local context. 1527 * @param wqe 1528 * Pointer to WQE to fill with built Ethernet/Data Segments. 1529 * @param vlan 1530 * Length of VLAN header to insert, 0 means no VLAN insertion. 1531 * @param inlen 1532 * Data length to inline. For TSO this parameter specifies exact value, 1533 * for ordinary send routine can be aligned by caller to provide better WQE 1534 * space saving and data buffer start address alignment. 1535 * This length includes VLAN header being inserted. 1536 * @param tso 1537 * Zero means ordinary send, inlined data can be extended, 1538 * otherwise this is TSO, inlined data length is fixed. 1539 * @param olx 1540 * Configured Tx offloads mask. It is fully defined at 1541 * compile time and may be used for optimization. 1542 * 1543 * @return 1544 * Actual size of built WQE in segments. 1545 */ 1546 static __rte_always_inline unsigned int 1547 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1548 struct mlx5_txq_local *__rte_restrict loc, 1549 struct mlx5_wqe *__rte_restrict wqe, 1550 unsigned int vlan, 1551 unsigned int inlen, 1552 unsigned int tso, 1553 unsigned int olx __rte_unused) 1554 { 1555 struct mlx5_wqe_dseg *__rte_restrict dseg; 1556 unsigned int ds; 1557 1558 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1559 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1560 loc->mbuf_off = 0; 1561 1562 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1563 if (!loc->mbuf_nseg) 1564 goto dseg_done; 1565 /* 1566 * There are still some mbuf remaining, not inlined. 1567 * The first mbuf may be partially inlined and we 1568 * must process the possible non-zero data offset. 1569 */ 1570 if (loc->mbuf_off) { 1571 unsigned int dlen; 1572 uint8_t *dptr; 1573 1574 /* 1575 * Exhausted packets must be dropped before. 1576 * Non-zero offset means there are some data 1577 * remained in the packet. 1578 */ 1579 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1580 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1581 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1582 loc->mbuf_off); 1583 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1584 /* 1585 * Build the pointer/minimal Data Segment. 1586 * Do ring buffer wrapping check in advance. 1587 */ 1588 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1589 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1590 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1591 /* Store the mbuf to be freed on completion. */ 1592 MLX5_ASSERT(loc->elts_free); 1593 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1594 --loc->elts_free; 1595 ++dseg; 1596 if (--loc->mbuf_nseg == 0) 1597 goto dseg_done; 1598 loc->mbuf = loc->mbuf->next; 1599 loc->mbuf_off = 0; 1600 } 1601 do { 1602 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1603 struct rte_mbuf *mbuf; 1604 1605 /* Zero length segment found, just skip. */ 1606 mbuf = loc->mbuf; 1607 loc->mbuf = loc->mbuf->next; 1608 rte_pktmbuf_free_seg(mbuf); 1609 if (--loc->mbuf_nseg == 0) 1610 break; 1611 } else { 1612 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1613 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1614 mlx5_tx_dseg_iptr 1615 (txq, loc, dseg, 1616 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1617 rte_pktmbuf_data_len(loc->mbuf), olx); 1618 MLX5_ASSERT(loc->elts_free); 1619 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1620 --loc->elts_free; 1621 ++dseg; 1622 if (--loc->mbuf_nseg == 0) 1623 break; 1624 loc->mbuf = loc->mbuf->next; 1625 } 1626 } while (true); 1627 1628 dseg_done: 1629 /* Calculate actual segments used from the dseg pointer. */ 1630 if ((uintptr_t)wqe < (uintptr_t)dseg) 1631 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1632 else 1633 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1634 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1635 return ds; 1636 } 1637 1638 /** 1639 * The routine checks timestamp flag in the current packet, 1640 * and push WAIT WQE into the queue if scheduling is required. 1641 * 1642 * @param txq 1643 * Pointer to TX queue structure. 1644 * @param loc 1645 * Pointer to burst routine local context. 1646 * @param elts 1647 * Number of free elements in elts buffer to be checked, for zero 1648 * value the check is optimized out by compiler. 1649 * @param olx 1650 * Configured Tx offloads mask. It is fully defined at 1651 * compile time and may be used for optimization. 1652 * 1653 * @return 1654 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1655 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1656 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1657 * Local context variables partially updated. 1658 */ 1659 static __rte_always_inline enum mlx5_txcmp_code 1660 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1661 struct mlx5_txq_local *restrict loc, 1662 uint16_t elts, 1663 unsigned int olx) 1664 { 1665 if (MLX5_TXOFF_CONFIG(TXPP) && 1666 loc->mbuf->ol_flags & txq->ts_mask) { 1667 struct mlx5_dev_ctx_shared *sh; 1668 struct mlx5_wqe *wqe; 1669 uint64_t ts; 1670 1671 /* 1672 * Estimate the required space quickly and roughly. 1673 * We would like to ensure the packet can be pushed 1674 * to the queue and we won't get the orphan WAIT WQE. 1675 */ 1676 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1677 loc->elts_free < elts) 1678 return MLX5_TXCMP_CODE_EXIT; 1679 /* Convert the timestamp into completion to wait. */ 1680 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1681 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1682 sh = txq->sh; 1683 if (txq->wait_on_time) { 1684 /* The wait on time capability should be used. */ 1685 ts -= sh->txpp.skew; 1686 mlx5_tx_cseg_init(txq, loc, wqe, 1687 1 + sizeof(struct mlx5_wqe_wseg) / 1688 MLX5_WSEG_SIZE, 1689 MLX5_OPCODE_WAIT | 1690 MLX5_OPC_MOD_WAIT_TIME << 24, olx); 1691 mlx5_tx_wseg_init(txq, loc, wqe, ts, olx); 1692 } else { 1693 /* Legacy cross-channel operation should be used. */ 1694 int32_t wci; 1695 1696 wci = mlx5_txpp_convert_tx_ts(sh, ts); 1697 if (unlikely(wci < 0)) 1698 return MLX5_TXCMP_CODE_SINGLE; 1699 /* Build the WAIT WQE with specified completion. */ 1700 mlx5_tx_cseg_init(txq, loc, wqe, 1701 1 + sizeof(struct mlx5_wqe_qseg) / 1702 MLX5_WSEG_SIZE, 1703 MLX5_OPCODE_WAIT | 1704 MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx); 1705 mlx5_tx_qseg_init(txq, loc, wqe, wci, olx); 1706 } 1707 ++txq->wqe_ci; 1708 --loc->wqe_free; 1709 return MLX5_TXCMP_CODE_MULTI; 1710 } 1711 return MLX5_TXCMP_CODE_SINGLE; 1712 } 1713 1714 /** 1715 * Tx one packet function for multi-segment TSO. Supports all 1716 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1717 * sends one packet per WQE. 1718 * 1719 * This routine is responsible for storing processed mbuf 1720 * into elts ring buffer and update elts_head. 1721 * 1722 * @param txq 1723 * Pointer to TX queue structure. 1724 * @param loc 1725 * Pointer to burst routine local context. 1726 * @param olx 1727 * Configured Tx offloads mask. It is fully defined at 1728 * compile time and may be used for optimization. 1729 * 1730 * @return 1731 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1732 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1733 * Local context variables partially updated. 1734 */ 1735 static __rte_always_inline enum mlx5_txcmp_code 1736 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1737 struct mlx5_txq_local *__rte_restrict loc, 1738 unsigned int olx) 1739 { 1740 struct mlx5_wqe *__rte_restrict wqe; 1741 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1742 1743 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1744 if (MLX5_TXOFF_CONFIG(TXPP)) { 1745 enum mlx5_txcmp_code wret; 1746 1747 /* Generate WAIT for scheduling if requested. */ 1748 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1749 if (wret == MLX5_TXCMP_CODE_EXIT) 1750 return MLX5_TXCMP_CODE_EXIT; 1751 if (wret == MLX5_TXCMP_CODE_ERROR) 1752 return MLX5_TXCMP_CODE_ERROR; 1753 } 1754 /* 1755 * Calculate data length to be inlined to estimate 1756 * the required space in WQE ring buffer. 1757 */ 1758 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1759 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1760 vlan = sizeof(struct rte_vlan_hdr); 1761 inlen = loc->mbuf->l2_len + vlan + 1762 loc->mbuf->l3_len + loc->mbuf->l4_len; 1763 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1764 return MLX5_TXCMP_CODE_ERROR; 1765 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1766 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1767 /* Packet must contain all TSO headers. */ 1768 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1769 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1770 inlen > (dlen + vlan))) 1771 return MLX5_TXCMP_CODE_ERROR; 1772 /* 1773 * Check whether there are enough free WQEBBs: 1774 * - Control Segment 1775 * - Ethernet Segment 1776 * - First Segment of inlined Ethernet data 1777 * - ... data continued ... 1778 * - Data Segments of pointer/min inline type 1779 */ 1780 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1781 MLX5_ESEG_MIN_INLINE_SIZE + 1782 MLX5_WSEG_SIZE + 1783 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1784 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1785 return MLX5_TXCMP_CODE_EXIT; 1786 /* Check for maximal WQE size. */ 1787 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1788 return MLX5_TXCMP_CODE_ERROR; 1789 #ifdef MLX5_PMD_SOFT_COUNTERS 1790 /* Update sent data bytes/packets counters. */ 1791 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1792 loc->mbuf->tso_segsz; 1793 /* 1794 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1795 * from loc->pkts_sent field. 1796 */ 1797 --ntcp; 1798 txq->stats.opackets += ntcp; 1799 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1800 #endif 1801 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1802 loc->wqe_last = wqe; 1803 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1804 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1805 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1806 txq->wqe_ci += (ds + 3) / 4; 1807 loc->wqe_free -= (ds + 3) / 4; 1808 return MLX5_TXCMP_CODE_MULTI; 1809 } 1810 1811 /** 1812 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1813 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1814 * without any data inlining in Ethernet Segment. 1815 * 1816 * This routine is responsible for storing processed mbuf 1817 * into elts ring buffer and update elts_head. 1818 * 1819 * @param txq 1820 * Pointer to TX queue structure. 1821 * @param loc 1822 * Pointer to burst routine local context. 1823 * @param olx 1824 * Configured Tx offloads mask. It is fully defined at 1825 * compile time and may be used for optimization. 1826 * 1827 * @return 1828 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1829 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1830 * Local context variables partially updated. 1831 */ 1832 static __rte_always_inline enum mlx5_txcmp_code 1833 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1834 struct mlx5_txq_local *__rte_restrict loc, 1835 unsigned int olx) 1836 { 1837 struct mlx5_wqe_dseg *__rte_restrict dseg; 1838 struct mlx5_wqe *__rte_restrict wqe; 1839 unsigned int ds, nseg; 1840 1841 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1842 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1843 if (MLX5_TXOFF_CONFIG(TXPP)) { 1844 enum mlx5_txcmp_code wret; 1845 1846 /* Generate WAIT for scheduling if requested. */ 1847 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1848 if (wret == MLX5_TXCMP_CODE_EXIT) 1849 return MLX5_TXCMP_CODE_EXIT; 1850 if (wret == MLX5_TXCMP_CODE_ERROR) 1851 return MLX5_TXCMP_CODE_ERROR; 1852 } 1853 /* 1854 * No inline at all, it means the CPU cycles saving is prioritized at 1855 * configuration, we should not copy any packet data to WQE. 1856 */ 1857 nseg = NB_SEGS(loc->mbuf); 1858 ds = 2 + nseg; 1859 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1860 return MLX5_TXCMP_CODE_EXIT; 1861 /* Check for maximal WQE size. */ 1862 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1863 return MLX5_TXCMP_CODE_ERROR; 1864 /* 1865 * Some Tx offloads may cause an error if packet is not long enough, 1866 * check against assumed minimal length. 1867 */ 1868 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1869 return MLX5_TXCMP_CODE_ERROR; 1870 #ifdef MLX5_PMD_SOFT_COUNTERS 1871 /* Update sent data bytes counter. */ 1872 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1873 if (MLX5_TXOFF_CONFIG(VLAN) && 1874 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1875 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1876 #endif 1877 /* 1878 * SEND WQE, one WQEBB: 1879 * - Control Segment, SEND opcode 1880 * - Ethernet Segment, optional VLAN, no inline 1881 * - Data Segments, pointer only type 1882 */ 1883 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1884 loc->wqe_last = wqe; 1885 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1886 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1887 dseg = &wqe->dseg[0]; 1888 do { 1889 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1890 struct rte_mbuf *mbuf; 1891 1892 /* 1893 * Zero length segment found, have to correct total 1894 * size of WQE in segments. 1895 * It is supposed to be rare occasion, so in normal 1896 * case (no zero length segments) we avoid extra 1897 * writing to the Control Segment. 1898 */ 1899 --ds; 1900 wqe->cseg.sq_ds -= RTE_BE32(1); 1901 mbuf = loc->mbuf; 1902 loc->mbuf = mbuf->next; 1903 rte_pktmbuf_free_seg(mbuf); 1904 if (--nseg == 0) 1905 break; 1906 } else { 1907 mlx5_tx_dseg_ptr 1908 (txq, loc, dseg, 1909 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1910 rte_pktmbuf_data_len(loc->mbuf), olx); 1911 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1912 --loc->elts_free; 1913 if (--nseg == 0) 1914 break; 1915 ++dseg; 1916 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1917 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1918 loc->mbuf = loc->mbuf->next; 1919 } 1920 } while (true); 1921 txq->wqe_ci += (ds + 3) / 4; 1922 loc->wqe_free -= (ds + 3) / 4; 1923 return MLX5_TXCMP_CODE_MULTI; 1924 } 1925 1926 /** 1927 * Tx one packet function for multi-segment SEND. Supports all 1928 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1929 * sends one packet per WQE, with data inlining in 1930 * Ethernet Segment and minimal Data Segments. 1931 * 1932 * This routine is responsible for storing processed mbuf 1933 * into elts ring buffer and update elts_head. 1934 * 1935 * @param txq 1936 * Pointer to TX queue structure. 1937 * @param loc 1938 * Pointer to burst routine local context. 1939 * @param olx 1940 * Configured Tx offloads mask. It is fully defined at 1941 * compile time and may be used for optimization. 1942 * 1943 * @return 1944 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1945 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1946 * Local context variables partially updated. 1947 */ 1948 static __rte_always_inline enum mlx5_txcmp_code 1949 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 1950 struct mlx5_txq_local *__rte_restrict loc, 1951 unsigned int olx) 1952 { 1953 struct mlx5_wqe *__rte_restrict wqe; 1954 unsigned int ds, inlen, dlen, vlan = 0; 1955 1956 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 1957 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1958 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1959 /* 1960 * First calculate data length to be inlined 1961 * to estimate the required space for WQE. 1962 */ 1963 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1964 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1965 vlan = sizeof(struct rte_vlan_hdr); 1966 inlen = dlen + vlan; 1967 /* Check against minimal length. */ 1968 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 1969 return MLX5_TXCMP_CODE_ERROR; 1970 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1971 if (inlen > txq->inlen_send || 1972 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1973 struct rte_mbuf *mbuf; 1974 unsigned int nxlen; 1975 uintptr_t start; 1976 1977 mbuf = loc->mbuf; 1978 nxlen = rte_pktmbuf_data_len(mbuf); 1979 /* 1980 * Packet length exceeds the allowed inline data length, 1981 * check whether the minimal inlining is required. 1982 */ 1983 if (txq->inlen_mode) { 1984 MLX5_ASSERT(txq->inlen_mode >= 1985 MLX5_ESEG_MIN_INLINE_SIZE); 1986 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 1987 inlen = RTE_MIN(txq->inlen_mode, inlen); 1988 } else if (vlan && !txq->vlan_en) { 1989 /* 1990 * VLAN insertion is requested and hardware does not 1991 * support the offload, will do with software inline. 1992 */ 1993 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 1994 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 1995 nxlen > txq->inlen_send) { 1996 return mlx5_tx_packet_multi_send(txq, loc, olx); 1997 } else if (nxlen <= MLX5_ESEG_MIN_INLINE_SIZE) { 1998 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 1999 } else { 2000 goto do_first; 2001 } 2002 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2003 goto do_build; 2004 /* 2005 * Now we know the minimal amount of data is requested 2006 * to inline. Check whether we should inline the buffers 2007 * from the chain beginning to eliminate some mbufs. 2008 */ 2009 if (unlikely(nxlen <= txq->inlen_send)) { 2010 /* We can inline first mbuf at least. */ 2011 if (nxlen < inlen) { 2012 unsigned int smlen; 2013 2014 /* Scan mbufs till inlen filled. */ 2015 do { 2016 smlen = nxlen; 2017 mbuf = NEXT(mbuf); 2018 MLX5_ASSERT(mbuf); 2019 nxlen = rte_pktmbuf_data_len(mbuf); 2020 nxlen += smlen; 2021 } while (unlikely(nxlen < inlen)); 2022 if (unlikely(nxlen > txq->inlen_send)) { 2023 /* We cannot inline entire mbuf. */ 2024 smlen = inlen - smlen; 2025 start = rte_pktmbuf_mtod_offset 2026 (mbuf, uintptr_t, smlen); 2027 goto do_align; 2028 } 2029 } 2030 do_first: 2031 do { 2032 inlen = nxlen; 2033 mbuf = NEXT(mbuf); 2034 /* There should be not end of packet. */ 2035 MLX5_ASSERT(mbuf); 2036 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2037 break; 2038 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 2039 } while (unlikely(nxlen < txq->inlen_send)); 2040 } 2041 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 2042 /* 2043 * Check whether we can do inline to align start 2044 * address of data buffer to cacheline. 2045 */ 2046 do_align: 2047 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 2048 if (unlikely(start)) { 2049 start += inlen; 2050 if (start <= txq->inlen_send) 2051 inlen = start; 2052 } 2053 } 2054 /* 2055 * Check whether there are enough free WQEBBs: 2056 * - Control Segment 2057 * - Ethernet Segment 2058 * - First Segment of inlined Ethernet data 2059 * - ... data continued ... 2060 * - Data Segments of pointer/min inline type 2061 * 2062 * Estimate the number of Data Segments conservatively, 2063 * supposing no any mbufs is being freed during inlining. 2064 */ 2065 do_build: 2066 if (MLX5_TXOFF_CONFIG(TXPP)) { 2067 enum mlx5_txcmp_code wret; 2068 2069 /* Generate WAIT for scheduling if requested. */ 2070 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2071 if (wret == MLX5_TXCMP_CODE_EXIT) 2072 return MLX5_TXCMP_CODE_EXIT; 2073 if (wret == MLX5_TXCMP_CODE_ERROR) 2074 return MLX5_TXCMP_CODE_ERROR; 2075 } 2076 MLX5_ASSERT(inlen <= txq->inlen_send); 2077 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2078 MLX5_ESEG_MIN_INLINE_SIZE + 2079 MLX5_WSEG_SIZE + 2080 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2081 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2082 return MLX5_TXCMP_CODE_EXIT; 2083 /* Check for maximal WQE size. */ 2084 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) { 2085 /* Check if we can adjust the inline length. */ 2086 if (unlikely(txq->inlen_mode)) { 2087 ds = NB_SEGS(loc->mbuf) + 2 + 2088 (txq->inlen_mode - 2089 MLX5_ESEG_MIN_INLINE_SIZE + 2090 MLX5_WSEG_SIZE + 2091 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2092 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 2093 return MLX5_TXCMP_CODE_ERROR; 2094 } 2095 /* We have lucky opportunity to adjust. */ 2096 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX - 2097 MLX5_WSEG_SIZE * 2 - 2098 MLX5_WSEG_SIZE * NB_SEGS(loc->mbuf) - 2099 MLX5_WSEG_SIZE + 2100 MLX5_ESEG_MIN_INLINE_SIZE); 2101 } 2102 #ifdef MLX5_PMD_SOFT_COUNTERS 2103 /* Update sent data bytes/packets counters. */ 2104 txq->stats.obytes += dlen + vlan; 2105 #endif 2106 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2107 loc->wqe_last = wqe; 2108 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2109 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2110 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2111 txq->wqe_ci += (ds + 3) / 4; 2112 loc->wqe_free -= (ds + 3) / 4; 2113 return MLX5_TXCMP_CODE_MULTI; 2114 } 2115 2116 /** 2117 * Tx burst function for multi-segment packets. Supports all 2118 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2119 * sends one packet per WQE. Function stops sending if it 2120 * encounters the single-segment packet. 2121 * 2122 * This routine is responsible for storing processed mbuf 2123 * into elts ring buffer and update elts_head. 2124 * 2125 * @param txq 2126 * Pointer to TX queue structure. 2127 * @param[in] pkts 2128 * Packets to transmit. 2129 * @param pkts_n 2130 * Number of packets in array. 2131 * @param loc 2132 * Pointer to burst routine local context. 2133 * @param olx 2134 * Configured Tx offloads mask. It is fully defined at 2135 * compile time and may be used for optimization. 2136 * 2137 * @return 2138 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2139 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2140 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2141 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2142 * Local context variables updated. 2143 */ 2144 static __rte_always_inline enum mlx5_txcmp_code 2145 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2146 struct rte_mbuf **__rte_restrict pkts, 2147 unsigned int pkts_n, 2148 struct mlx5_txq_local *__rte_restrict loc, 2149 unsigned int olx) 2150 { 2151 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2152 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2153 pkts += loc->pkts_sent + 1; 2154 pkts_n -= loc->pkts_sent; 2155 for (;;) { 2156 enum mlx5_txcmp_code ret; 2157 2158 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2159 /* 2160 * Estimate the number of free elts quickly but conservatively. 2161 * Some segment may be fully inlined and freed, 2162 * ignore this here - precise estimation is costly. 2163 */ 2164 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2165 return MLX5_TXCMP_CODE_EXIT; 2166 if (MLX5_TXOFF_CONFIG(TSO) && 2167 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2168 /* Proceed with multi-segment TSO. */ 2169 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2170 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2171 /* Proceed with multi-segment SEND with inlining. */ 2172 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2173 } else { 2174 /* Proceed with multi-segment SEND w/o inlining. */ 2175 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2176 } 2177 if (ret == MLX5_TXCMP_CODE_EXIT) 2178 return MLX5_TXCMP_CODE_EXIT; 2179 if (ret == MLX5_TXCMP_CODE_ERROR) 2180 return MLX5_TXCMP_CODE_ERROR; 2181 /* WQE is built, go to the next packet. */ 2182 ++loc->pkts_sent; 2183 --pkts_n; 2184 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2185 return MLX5_TXCMP_CODE_EXIT; 2186 loc->mbuf = *pkts++; 2187 if (pkts_n > 1) 2188 rte_prefetch0(*pkts); 2189 if (likely(NB_SEGS(loc->mbuf) > 1)) 2190 continue; 2191 /* Here ends the series of multi-segment packets. */ 2192 if (MLX5_TXOFF_CONFIG(TSO) && 2193 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2194 return MLX5_TXCMP_CODE_TSO; 2195 return MLX5_TXCMP_CODE_SINGLE; 2196 } 2197 MLX5_ASSERT(false); 2198 } 2199 2200 /** 2201 * Tx burst function for single-segment packets with TSO. 2202 * Supports all types of Tx offloads, except multi-packets. 2203 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2204 * Function stops sending if it encounters the multi-segment 2205 * packet or packet without TSO requested. 2206 * 2207 * The routine is responsible for storing processed mbuf into elts ring buffer 2208 * and update elts_head if inline offloads is requested due to possible early 2209 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2210 * 2211 * @param txq 2212 * Pointer to TX queue structure. 2213 * @param[in] pkts 2214 * Packets to transmit. 2215 * @param pkts_n 2216 * Number of packets in array. 2217 * @param loc 2218 * Pointer to burst routine local context. 2219 * @param olx 2220 * Configured Tx offloads mask. It is fully defined at 2221 * compile time and may be used for optimization. 2222 * 2223 * @return 2224 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2225 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2226 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2227 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2228 * Local context variables updated. 2229 */ 2230 static __rte_always_inline enum mlx5_txcmp_code 2231 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2232 struct rte_mbuf **__rte_restrict pkts, 2233 unsigned int pkts_n, 2234 struct mlx5_txq_local *__rte_restrict loc, 2235 unsigned int olx) 2236 { 2237 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2238 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2239 pkts += loc->pkts_sent + 1; 2240 pkts_n -= loc->pkts_sent; 2241 for (;;) { 2242 struct mlx5_wqe_dseg *__rte_restrict dseg; 2243 struct mlx5_wqe *__rte_restrict wqe; 2244 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2245 uint8_t *dptr; 2246 2247 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2248 if (MLX5_TXOFF_CONFIG(TXPP)) { 2249 enum mlx5_txcmp_code wret; 2250 2251 /* Generate WAIT for scheduling if requested. */ 2252 wret = mlx5_tx_schedule_send(txq, loc, 1, olx); 2253 if (wret == MLX5_TXCMP_CODE_EXIT) 2254 return MLX5_TXCMP_CODE_EXIT; 2255 if (wret == MLX5_TXCMP_CODE_ERROR) 2256 return MLX5_TXCMP_CODE_ERROR; 2257 } 2258 dlen = rte_pktmbuf_data_len(loc->mbuf); 2259 if (MLX5_TXOFF_CONFIG(VLAN) && 2260 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2261 vlan = sizeof(struct rte_vlan_hdr); 2262 } 2263 /* 2264 * First calculate the WQE size to check 2265 * whether we have enough space in ring buffer. 2266 */ 2267 hlen = loc->mbuf->l2_len + vlan + 2268 loc->mbuf->l3_len + loc->mbuf->l4_len; 2269 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2270 return MLX5_TXCMP_CODE_ERROR; 2271 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2272 hlen += loc->mbuf->outer_l2_len + 2273 loc->mbuf->outer_l3_len; 2274 /* Segment must contain all TSO headers. */ 2275 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2276 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2277 hlen > (dlen + vlan))) 2278 return MLX5_TXCMP_CODE_ERROR; 2279 /* 2280 * Check whether there are enough free WQEBBs: 2281 * - Control Segment 2282 * - Ethernet Segment 2283 * - First Segment of inlined Ethernet data 2284 * - ... data continued ... 2285 * - Finishing Data Segment of pointer type 2286 */ 2287 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2288 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2289 if (loc->wqe_free < ((ds + 3) / 4)) 2290 return MLX5_TXCMP_CODE_EXIT; 2291 #ifdef MLX5_PMD_SOFT_COUNTERS 2292 /* Update sent data bytes/packets counters. */ 2293 ntcp = (dlen + vlan - hlen + 2294 loc->mbuf->tso_segsz - 1) / 2295 loc->mbuf->tso_segsz; 2296 /* 2297 * One will be added for mbuf itself at the end 2298 * of the mlx5_tx_burst from loc->pkts_sent field. 2299 */ 2300 --ntcp; 2301 txq->stats.opackets += ntcp; 2302 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2303 #endif 2304 /* 2305 * Build the TSO WQE: 2306 * - Control Segment 2307 * - Ethernet Segment with hlen bytes inlined 2308 * - Data Segment of pointer type 2309 */ 2310 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2311 loc->wqe_last = wqe; 2312 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2313 MLX5_OPCODE_TSO, olx); 2314 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2315 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2316 dlen -= hlen - vlan; 2317 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2318 /* 2319 * WQE is built, update the loop parameters 2320 * and go to the next packet. 2321 */ 2322 txq->wqe_ci += (ds + 3) / 4; 2323 loc->wqe_free -= (ds + 3) / 4; 2324 if (MLX5_TXOFF_CONFIG(INLINE)) 2325 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2326 --loc->elts_free; 2327 ++loc->pkts_sent; 2328 --pkts_n; 2329 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2330 return MLX5_TXCMP_CODE_EXIT; 2331 loc->mbuf = *pkts++; 2332 if (pkts_n > 1) 2333 rte_prefetch0(*pkts); 2334 if (MLX5_TXOFF_CONFIG(MULTI) && 2335 unlikely(NB_SEGS(loc->mbuf) > 1)) 2336 return MLX5_TXCMP_CODE_MULTI; 2337 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2338 return MLX5_TXCMP_CODE_SINGLE; 2339 /* Continue with the next TSO packet. */ 2340 } 2341 MLX5_ASSERT(false); 2342 } 2343 2344 /** 2345 * Analyze the packet and select the best method to send. 2346 * 2347 * @param txq 2348 * Pointer to TX queue structure. 2349 * @param loc 2350 * Pointer to burst routine local context. 2351 * @param olx 2352 * Configured Tx offloads mask. It is fully defined at 2353 * compile time and may be used for optimization. 2354 * @param newp 2355 * The predefined flag whether do complete check for 2356 * multi-segment packets and TSO. 2357 * 2358 * @return 2359 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2360 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2361 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2362 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2363 */ 2364 static __rte_always_inline enum mlx5_txcmp_code 2365 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2366 struct mlx5_txq_local *__rte_restrict loc, 2367 unsigned int olx, 2368 bool newp) 2369 { 2370 /* Check for multi-segment packet. */ 2371 if (newp && 2372 MLX5_TXOFF_CONFIG(MULTI) && 2373 unlikely(NB_SEGS(loc->mbuf) > 1)) 2374 return MLX5_TXCMP_CODE_MULTI; 2375 /* Check for TSO packet. */ 2376 if (newp && 2377 MLX5_TXOFF_CONFIG(TSO) && 2378 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2379 return MLX5_TXCMP_CODE_TSO; 2380 /* Check if eMPW is enabled at all. */ 2381 if (!MLX5_TXOFF_CONFIG(EMPW)) 2382 return MLX5_TXCMP_CODE_SINGLE; 2383 /* Check if eMPW can be engaged. */ 2384 if (MLX5_TXOFF_CONFIG(VLAN) && 2385 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2386 (!MLX5_TXOFF_CONFIG(INLINE) || 2387 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2388 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2389 /* 2390 * eMPW does not support VLAN insertion offload, we have to 2391 * inline the entire packet but packet is too long for inlining. 2392 */ 2393 return MLX5_TXCMP_CODE_SINGLE; 2394 } 2395 return MLX5_TXCMP_CODE_EMPW; 2396 } 2397 2398 /** 2399 * Check the next packet attributes to match with the eMPW batch ones. 2400 * In addition, for legacy MPW the packet length is checked either. 2401 * 2402 * @param txq 2403 * Pointer to TX queue structure. 2404 * @param es 2405 * Pointer to Ethernet Segment of eMPW batch. 2406 * @param loc 2407 * Pointer to burst routine local context. 2408 * @param dlen 2409 * Length of previous packet in MPW descriptor. 2410 * @param olx 2411 * Configured Tx offloads mask. It is fully defined at 2412 * compile time and may be used for optimization. 2413 * 2414 * @return 2415 * true - packet match with eMPW batch attributes. 2416 * false - no match, eMPW should be restarted. 2417 */ 2418 static __rte_always_inline bool 2419 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2420 struct mlx5_wqe_eseg *__rte_restrict es, 2421 struct mlx5_txq_local *__rte_restrict loc, 2422 uint32_t dlen, 2423 unsigned int olx) 2424 { 2425 uint8_t swp_flags = 0; 2426 2427 /* Compare the checksum flags, if any. */ 2428 if (MLX5_TXOFF_CONFIG(CSUM) && 2429 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2430 return false; 2431 /* Compare the Software Parser offsets and flags. */ 2432 if (MLX5_TXOFF_CONFIG(SWP) && 2433 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2434 es->swp_flags != swp_flags)) 2435 return false; 2436 /* Fill metadata field if needed. */ 2437 if (MLX5_TXOFF_CONFIG(METADATA) && 2438 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2439 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2440 return false; 2441 /* Legacy MPW can send packets with the same length only. */ 2442 if (MLX5_TXOFF_CONFIG(MPW) && 2443 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2444 return false; 2445 /* There must be no VLAN packets in eMPW loop. */ 2446 if (MLX5_TXOFF_CONFIG(VLAN)) 2447 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2448 /* Check if the scheduling is requested. */ 2449 if (MLX5_TXOFF_CONFIG(TXPP) && 2450 loc->mbuf->ol_flags & txq->ts_mask) 2451 return false; 2452 return true; 2453 } 2454 2455 /** 2456 * Update send loop variables and WQE for eMPW loop without data inlining. 2457 * Number of Data Segments is equal to the number of sent packets. 2458 * 2459 * @param txq 2460 * Pointer to TX queue structure. 2461 * @param loc 2462 * Pointer to burst routine local context. 2463 * @param ds 2464 * Number of packets/Data Segments/Packets. 2465 * @param slen 2466 * Accumulated statistics, bytes sent. 2467 * @param olx 2468 * Configured Tx offloads mask. It is fully defined at 2469 * compile time and may be used for optimization. 2470 * 2471 * @return 2472 * true - packet match with eMPW batch attributes. 2473 * false - no match, eMPW should be restarted. 2474 */ 2475 static __rte_always_inline void 2476 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2477 struct mlx5_txq_local *__rte_restrict loc, 2478 unsigned int ds, 2479 unsigned int slen, 2480 unsigned int olx __rte_unused) 2481 { 2482 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2483 #ifdef MLX5_PMD_SOFT_COUNTERS 2484 /* Update sent data bytes counter. */ 2485 txq->stats.obytes += slen; 2486 #else 2487 (void)slen; 2488 #endif 2489 loc->elts_free -= ds; 2490 loc->pkts_sent += ds; 2491 ds += 2; 2492 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2493 txq->wqe_ci += (ds + 3) / 4; 2494 loc->wqe_free -= (ds + 3) / 4; 2495 } 2496 2497 /** 2498 * Update send loop variables and WQE for eMPW loop with data inlining. 2499 * Gets the size of pushed descriptors and data to the WQE. 2500 * 2501 * @param txq 2502 * Pointer to TX queue structure. 2503 * @param loc 2504 * Pointer to burst routine local context. 2505 * @param len 2506 * Total size of descriptor/data in bytes. 2507 * @param slen 2508 * Accumulated statistics, data bytes sent. 2509 * @param wqem 2510 * The base WQE for the eMPW/MPW descriptor. 2511 * @param olx 2512 * Configured Tx offloads mask. It is fully defined at 2513 * compile time and may be used for optimization. 2514 * 2515 * @return 2516 * true - packet match with eMPW batch attributes. 2517 * false - no match, eMPW should be restarted. 2518 */ 2519 static __rte_always_inline void 2520 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2521 struct mlx5_txq_local *__rte_restrict loc, 2522 unsigned int len, 2523 unsigned int slen, 2524 struct mlx5_wqe *__rte_restrict wqem, 2525 unsigned int olx __rte_unused) 2526 { 2527 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2528 2529 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2530 #ifdef MLX5_PMD_SOFT_COUNTERS 2531 /* Update sent data bytes counter. */ 2532 txq->stats.obytes += slen; 2533 #else 2534 (void)slen; 2535 #endif 2536 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2537 /* 2538 * If the legacy MPW session contains the inline packets 2539 * we should set the only inline data segment length 2540 * and align the total length to the segment size. 2541 */ 2542 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2543 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2544 MLX5_ETH_WQE_DATA_INLINE); 2545 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2546 } else { 2547 /* 2548 * The session is not legacy MPW or contains the 2549 * data buffer pointer segments. 2550 */ 2551 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2552 len = len / MLX5_WSEG_SIZE + 2; 2553 } 2554 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2555 txq->wqe_ci += (len + 3) / 4; 2556 loc->wqe_free -= (len + 3) / 4; 2557 loc->wqe_last = wqem; 2558 } 2559 2560 /** 2561 * The set of Tx burst functions for single-segment packets without TSO 2562 * and with Multi-Packet Writing feature support. 2563 * Supports all types of Tx offloads, except multi-packets and TSO. 2564 * 2565 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2566 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2567 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2568 * placed in WQE. 2569 * 2570 * Functions stop sending if it encounters the multi-segment packet or packet 2571 * with TSO requested. 2572 * 2573 * The routines are responsible for storing processed mbuf into elts ring buffer 2574 * and update elts_head if inlining offload is requested. Otherwise the copying 2575 * mbufs to elts can be postponed and completed at the end of burst routine. 2576 * 2577 * @param txq 2578 * Pointer to TX queue structure. 2579 * @param[in] pkts 2580 * Packets to transmit. 2581 * @param pkts_n 2582 * Number of packets in array. 2583 * @param loc 2584 * Pointer to burst routine local context. 2585 * @param olx 2586 * Configured Tx offloads mask. It is fully defined at 2587 * compile time and may be used for optimization. 2588 * 2589 * @return 2590 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2591 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2592 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2593 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2594 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2595 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2596 * 2597 * Local context variables updated. 2598 * 2599 * 2600 * The routine sends packets with MLX5_OPCODE_EMPW 2601 * without inlining, this is dedicated optimized branch. 2602 * No VLAN insertion is supported. 2603 */ 2604 static __rte_always_inline enum mlx5_txcmp_code 2605 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2606 struct rte_mbuf **__rte_restrict pkts, 2607 unsigned int pkts_n, 2608 struct mlx5_txq_local *__rte_restrict loc, 2609 unsigned int olx) 2610 { 2611 /* 2612 * Subroutine is the part of mlx5_tx_burst_single() and sends 2613 * single-segment packet with eMPW opcode without data inlining. 2614 */ 2615 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2616 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2617 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2618 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2619 pkts += loc->pkts_sent + 1; 2620 pkts_n -= loc->pkts_sent; 2621 for (;;) { 2622 struct mlx5_wqe_dseg *__rte_restrict dseg; 2623 struct mlx5_wqe_eseg *__rte_restrict eseg; 2624 enum mlx5_txcmp_code ret; 2625 unsigned int part, loop; 2626 unsigned int slen = 0; 2627 2628 next_empw: 2629 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2630 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2631 MLX5_MPW_MAX_PACKETS : 2632 MLX5_EMPW_MAX_PACKETS); 2633 if (unlikely(loc->elts_free < part)) { 2634 /* We have no enough elts to save all mbufs. */ 2635 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2636 return MLX5_TXCMP_CODE_EXIT; 2637 /* But we still able to send at least minimal eMPW. */ 2638 part = loc->elts_free; 2639 } 2640 if (MLX5_TXOFF_CONFIG(TXPP)) { 2641 enum mlx5_txcmp_code wret; 2642 2643 /* Generate WAIT for scheduling if requested. */ 2644 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2645 if (wret == MLX5_TXCMP_CODE_EXIT) 2646 return MLX5_TXCMP_CODE_EXIT; 2647 if (wret == MLX5_TXCMP_CODE_ERROR) 2648 return MLX5_TXCMP_CODE_ERROR; 2649 } 2650 /* Check whether we have enough WQEs */ 2651 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2652 if (unlikely(loc->wqe_free < 2653 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2654 return MLX5_TXCMP_CODE_EXIT; 2655 part = (loc->wqe_free * 4) - 2; 2656 } 2657 if (likely(part > 1)) 2658 rte_prefetch0(*pkts); 2659 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2660 /* 2661 * Build eMPW title WQEBB: 2662 * - Control Segment, eMPW opcode 2663 * - Ethernet Segment, no inline 2664 */ 2665 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2666 MLX5_OPCODE_ENHANCED_MPSW, olx); 2667 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2668 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2669 eseg = &loc->wqe_last->eseg; 2670 dseg = &loc->wqe_last->dseg[0]; 2671 loop = part; 2672 /* Store the packet length for legacy MPW. */ 2673 if (MLX5_TXOFF_CONFIG(MPW)) 2674 eseg->mss = rte_cpu_to_be_16 2675 (rte_pktmbuf_data_len(loc->mbuf)); 2676 for (;;) { 2677 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2678 #ifdef MLX5_PMD_SOFT_COUNTERS 2679 /* Update sent data bytes counter. */ 2680 slen += dlen; 2681 #endif 2682 mlx5_tx_dseg_ptr 2683 (txq, loc, dseg, 2684 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2685 dlen, olx); 2686 if (unlikely(--loop == 0)) 2687 break; 2688 loc->mbuf = *pkts++; 2689 if (likely(loop > 1)) 2690 rte_prefetch0(*pkts); 2691 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2692 /* 2693 * Unroll the completion code to avoid 2694 * returning variable value - it results in 2695 * unoptimized sequent checking in caller. 2696 */ 2697 if (ret == MLX5_TXCMP_CODE_MULTI) { 2698 part -= loop; 2699 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2700 if (unlikely(!loc->elts_free || 2701 !loc->wqe_free)) 2702 return MLX5_TXCMP_CODE_EXIT; 2703 return MLX5_TXCMP_CODE_MULTI; 2704 } 2705 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2706 if (ret == MLX5_TXCMP_CODE_TSO) { 2707 part -= loop; 2708 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2709 if (unlikely(!loc->elts_free || 2710 !loc->wqe_free)) 2711 return MLX5_TXCMP_CODE_EXIT; 2712 return MLX5_TXCMP_CODE_TSO; 2713 } 2714 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2715 part -= loop; 2716 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2717 if (unlikely(!loc->elts_free || 2718 !loc->wqe_free)) 2719 return MLX5_TXCMP_CODE_EXIT; 2720 return MLX5_TXCMP_CODE_SINGLE; 2721 } 2722 if (ret != MLX5_TXCMP_CODE_EMPW) { 2723 MLX5_ASSERT(false); 2724 part -= loop; 2725 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2726 return MLX5_TXCMP_CODE_ERROR; 2727 } 2728 /* 2729 * Check whether packet parameters coincide 2730 * within assumed eMPW batch: 2731 * - check sum settings 2732 * - metadata value 2733 * - software parser settings 2734 * - packets length (legacy MPW only) 2735 * - scheduling is not required 2736 */ 2737 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2738 MLX5_ASSERT(loop); 2739 part -= loop; 2740 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2741 if (unlikely(!loc->elts_free || 2742 !loc->wqe_free)) 2743 return MLX5_TXCMP_CODE_EXIT; 2744 pkts_n -= part; 2745 goto next_empw; 2746 } 2747 /* Packet attributes match, continue the same eMPW. */ 2748 ++dseg; 2749 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2750 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2751 } 2752 /* eMPW is built successfully, update loop parameters. */ 2753 MLX5_ASSERT(!loop); 2754 MLX5_ASSERT(pkts_n >= part); 2755 #ifdef MLX5_PMD_SOFT_COUNTERS 2756 /* Update sent data bytes counter. */ 2757 txq->stats.obytes += slen; 2758 #endif 2759 loc->elts_free -= part; 2760 loc->pkts_sent += part; 2761 txq->wqe_ci += (2 + part + 3) / 4; 2762 loc->wqe_free -= (2 + part + 3) / 4; 2763 pkts_n -= part; 2764 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2765 return MLX5_TXCMP_CODE_EXIT; 2766 loc->mbuf = *pkts++; 2767 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2768 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2769 return ret; 2770 /* Continue sending eMPW batches. */ 2771 } 2772 MLX5_ASSERT(false); 2773 } 2774 2775 /** 2776 * The routine sends packets with MLX5_OPCODE_EMPW 2777 * with inlining, optionally supports VLAN insertion. 2778 */ 2779 static __rte_always_inline enum mlx5_txcmp_code 2780 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2781 struct rte_mbuf **__rte_restrict pkts, 2782 unsigned int pkts_n, 2783 struct mlx5_txq_local *__rte_restrict loc, 2784 unsigned int olx) 2785 { 2786 /* 2787 * Subroutine is the part of mlx5_tx_burst_single() and sends 2788 * single-segment packet with eMPW opcode with data inlining. 2789 */ 2790 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2791 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2792 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2793 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2794 pkts += loc->pkts_sent + 1; 2795 pkts_n -= loc->pkts_sent; 2796 for (;;) { 2797 struct mlx5_wqe_dseg *__rte_restrict dseg; 2798 struct mlx5_wqe *__rte_restrict wqem; 2799 enum mlx5_txcmp_code ret; 2800 unsigned int room, part, nlim; 2801 unsigned int slen = 0; 2802 2803 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2804 /* 2805 * Limits the amount of packets in one WQE 2806 * to improve CQE latency generation. 2807 */ 2808 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2809 MLX5_MPW_INLINE_MAX_PACKETS : 2810 MLX5_EMPW_MAX_PACKETS); 2811 if (MLX5_TXOFF_CONFIG(TXPP)) { 2812 enum mlx5_txcmp_code wret; 2813 2814 /* Generate WAIT for scheduling if requested. */ 2815 wret = mlx5_tx_schedule_send(txq, loc, nlim, olx); 2816 if (wret == MLX5_TXCMP_CODE_EXIT) 2817 return MLX5_TXCMP_CODE_EXIT; 2818 if (wret == MLX5_TXCMP_CODE_ERROR) 2819 return MLX5_TXCMP_CODE_ERROR; 2820 } 2821 /* Check whether we have minimal amount WQEs */ 2822 if (unlikely(loc->wqe_free < 2823 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2824 return MLX5_TXCMP_CODE_EXIT; 2825 if (likely(pkts_n > 1)) 2826 rte_prefetch0(*pkts); 2827 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2828 /* 2829 * Build eMPW title WQEBB: 2830 * - Control Segment, eMPW opcode, zero DS 2831 * - Ethernet Segment, no inline 2832 */ 2833 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2834 MLX5_OPCODE_ENHANCED_MPSW, olx); 2835 mlx5_tx_eseg_none(txq, loc, wqem, 2836 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2837 dseg = &wqem->dseg[0]; 2838 /* Store the packet length for legacy MPW. */ 2839 if (MLX5_TXOFF_CONFIG(MPW)) 2840 wqem->eseg.mss = rte_cpu_to_be_16 2841 (rte_pktmbuf_data_len(loc->mbuf)); 2842 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2843 loc->wqe_free) * MLX5_WQE_SIZE - 2844 MLX5_WQE_CSEG_SIZE - 2845 MLX5_WQE_ESEG_SIZE; 2846 /* Limit the room for legacy MPW sessions for performance. */ 2847 if (MLX5_TXOFF_CONFIG(MPW)) 2848 room = RTE_MIN(room, 2849 RTE_MAX(txq->inlen_empw + 2850 sizeof(dseg->bcount) + 2851 (MLX5_TXOFF_CONFIG(VLAN) ? 2852 sizeof(struct rte_vlan_hdr) : 0), 2853 MLX5_MPW_INLINE_MAX_PACKETS * 2854 MLX5_WQE_DSEG_SIZE)); 2855 /* Build WQE till we have space, packets and resources. */ 2856 part = room; 2857 for (;;) { 2858 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2859 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2860 unsigned int tlen; 2861 2862 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2863 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2864 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2865 /* 2866 * Some Tx offloads may cause an error if packet is not 2867 * long enough, check against assumed minimal length. 2868 */ 2869 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2870 part -= room; 2871 if (unlikely(!part)) 2872 return MLX5_TXCMP_CODE_ERROR; 2873 /* 2874 * We have some successfully built 2875 * packet Data Segments to send. 2876 */ 2877 mlx5_tx_idone_empw(txq, loc, part, 2878 slen, wqem, olx); 2879 return MLX5_TXCMP_CODE_ERROR; 2880 } 2881 /* Inline or not inline - that's the Question. */ 2882 if (dlen > txq->inlen_empw || 2883 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2884 goto pointer_empw; 2885 if (MLX5_TXOFF_CONFIG(MPW)) { 2886 if (dlen > txq->inlen_send) 2887 goto pointer_empw; 2888 tlen = dlen; 2889 if (part == room) { 2890 /* Open new inline MPW session. */ 2891 tlen += sizeof(dseg->bcount); 2892 dseg->bcount = RTE_BE32(0); 2893 dseg = RTE_PTR_ADD 2894 (dseg, sizeof(dseg->bcount)); 2895 } else { 2896 /* 2897 * No pointer and inline descriptor 2898 * intermix for legacy MPW sessions. 2899 */ 2900 if (wqem->dseg[0].bcount) 2901 break; 2902 } 2903 } else { 2904 tlen = sizeof(dseg->bcount) + dlen; 2905 } 2906 /* Inline entire packet, optional VLAN insertion. */ 2907 if (MLX5_TXOFF_CONFIG(VLAN) && 2908 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2909 /* 2910 * The packet length must be checked in 2911 * mlx5_tx_able_to_empw() and packet 2912 * fits into inline length guaranteed. 2913 */ 2914 MLX5_ASSERT((dlen + 2915 sizeof(struct rte_vlan_hdr)) <= 2916 txq->inlen_empw); 2917 tlen += sizeof(struct rte_vlan_hdr); 2918 if (room < tlen) 2919 break; 2920 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2921 dptr, dlen, olx); 2922 #ifdef MLX5_PMD_SOFT_COUNTERS 2923 /* Update sent data bytes counter. */ 2924 slen += sizeof(struct rte_vlan_hdr); 2925 #endif 2926 } else { 2927 if (room < tlen) 2928 break; 2929 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2930 dptr, dlen, olx); 2931 } 2932 if (!MLX5_TXOFF_CONFIG(MPW)) 2933 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2934 MLX5_ASSERT(room >= tlen); 2935 room -= tlen; 2936 /* 2937 * Packet data are completely inline, 2938 * we can try to free the packet. 2939 */ 2940 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2941 /* 2942 * All the packets from the burst beginning 2943 * are inline, we can free mbufs directly 2944 * from the origin array on tx_burst exit(). 2945 */ 2946 loc->mbuf_free++; 2947 goto next_mbuf; 2948 } 2949 /* 2950 * In order no to call rte_pktmbuf_free_seg() here, 2951 * in the most inner loop (that might be very 2952 * expensive) we just save the mbuf in elts. 2953 */ 2954 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2955 loc->elts_free--; 2956 goto next_mbuf; 2957 pointer_empw: 2958 /* 2959 * No pointer and inline descriptor 2960 * intermix for legacy MPW sessions. 2961 */ 2962 if (MLX5_TXOFF_CONFIG(MPW) && 2963 part != room && 2964 wqem->dseg[0].bcount == RTE_BE32(0)) 2965 break; 2966 /* 2967 * Not inlinable VLAN packets are 2968 * proceeded outside of this routine. 2969 */ 2970 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2971 if (MLX5_TXOFF_CONFIG(VLAN)) 2972 MLX5_ASSERT(!(loc->mbuf->ol_flags & 2973 RTE_MBUF_F_TX_VLAN)); 2974 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2975 /* We have to store mbuf in elts.*/ 2976 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2977 loc->elts_free--; 2978 room -= MLX5_WQE_DSEG_SIZE; 2979 /* Ring buffer wraparound is checked at the loop end.*/ 2980 ++dseg; 2981 next_mbuf: 2982 #ifdef MLX5_PMD_SOFT_COUNTERS 2983 /* Update sent data bytes counter. */ 2984 slen += dlen; 2985 #endif 2986 loc->pkts_sent++; 2987 pkts_n--; 2988 if (unlikely(!pkts_n || !loc->elts_free)) { 2989 /* 2990 * We have no resources/packets to 2991 * continue build descriptors. 2992 */ 2993 part -= room; 2994 mlx5_tx_idone_empw(txq, loc, part, 2995 slen, wqem, olx); 2996 return MLX5_TXCMP_CODE_EXIT; 2997 } 2998 loc->mbuf = *pkts++; 2999 if (likely(pkts_n > 1)) 3000 rte_prefetch0(*pkts); 3001 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3002 /* 3003 * Unroll the completion code to avoid 3004 * returning variable value - it results in 3005 * unoptimized sequent checking in caller. 3006 */ 3007 if (ret == MLX5_TXCMP_CODE_MULTI) { 3008 part -= room; 3009 mlx5_tx_idone_empw(txq, loc, part, 3010 slen, wqem, olx); 3011 if (unlikely(!loc->elts_free || 3012 !loc->wqe_free)) 3013 return MLX5_TXCMP_CODE_EXIT; 3014 return MLX5_TXCMP_CODE_MULTI; 3015 } 3016 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3017 if (ret == MLX5_TXCMP_CODE_TSO) { 3018 part -= room; 3019 mlx5_tx_idone_empw(txq, loc, part, 3020 slen, wqem, olx); 3021 if (unlikely(!loc->elts_free || 3022 !loc->wqe_free)) 3023 return MLX5_TXCMP_CODE_EXIT; 3024 return MLX5_TXCMP_CODE_TSO; 3025 } 3026 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3027 part -= room; 3028 mlx5_tx_idone_empw(txq, loc, part, 3029 slen, wqem, olx); 3030 if (unlikely(!loc->elts_free || 3031 !loc->wqe_free)) 3032 return MLX5_TXCMP_CODE_EXIT; 3033 return MLX5_TXCMP_CODE_SINGLE; 3034 } 3035 if (ret != MLX5_TXCMP_CODE_EMPW) { 3036 MLX5_ASSERT(false); 3037 part -= room; 3038 mlx5_tx_idone_empw(txq, loc, part, 3039 slen, wqem, olx); 3040 return MLX5_TXCMP_CODE_ERROR; 3041 } 3042 /* Check if we have minimal room left. */ 3043 nlim--; 3044 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 3045 break; 3046 /* 3047 * Check whether packet parameters coincide 3048 * within assumed eMPW batch: 3049 * - check sum settings 3050 * - metadata value 3051 * - software parser settings 3052 * - packets length (legacy MPW only) 3053 * - scheduling is not required 3054 */ 3055 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 3056 loc, dlen, olx)) 3057 break; 3058 /* Packet attributes match, continue the same eMPW. */ 3059 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3060 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3061 } 3062 /* 3063 * We get here to close an existing eMPW 3064 * session and start the new one. 3065 */ 3066 MLX5_ASSERT(pkts_n); 3067 part -= room; 3068 if (unlikely(!part)) 3069 return MLX5_TXCMP_CODE_EXIT; 3070 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 3071 if (unlikely(!loc->elts_free || 3072 !loc->wqe_free)) 3073 return MLX5_TXCMP_CODE_EXIT; 3074 /* Continue the loop with new eMPW session. */ 3075 } 3076 MLX5_ASSERT(false); 3077 } 3078 3079 /** 3080 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3081 * Data inlining and VLAN insertion are supported. 3082 */ 3083 static __rte_always_inline enum mlx5_txcmp_code 3084 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3085 struct rte_mbuf **__rte_restrict pkts, 3086 unsigned int pkts_n, 3087 struct mlx5_txq_local *__rte_restrict loc, 3088 unsigned int olx) 3089 { 3090 /* 3091 * Subroutine is the part of mlx5_tx_burst_single() 3092 * and sends single-segment packet with SEND opcode. 3093 */ 3094 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3095 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3096 pkts += loc->pkts_sent + 1; 3097 pkts_n -= loc->pkts_sent; 3098 for (;;) { 3099 struct mlx5_wqe *__rte_restrict wqe; 3100 enum mlx5_txcmp_code ret; 3101 3102 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3103 MLX5_ASSERT(loc->elts_free); 3104 if (MLX5_TXOFF_CONFIG(TXPP)) { 3105 enum mlx5_txcmp_code wret; 3106 3107 /* Generate WAIT for scheduling if requested. */ 3108 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 3109 if (wret == MLX5_TXCMP_CODE_EXIT) 3110 return MLX5_TXCMP_CODE_EXIT; 3111 if (wret == MLX5_TXCMP_CODE_ERROR) 3112 return MLX5_TXCMP_CODE_ERROR; 3113 } 3114 if (MLX5_TXOFF_CONFIG(INLINE)) { 3115 unsigned int inlen, vlan = 0; 3116 3117 inlen = rte_pktmbuf_data_len(loc->mbuf); 3118 if (MLX5_TXOFF_CONFIG(VLAN) && 3119 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3120 vlan = sizeof(struct rte_vlan_hdr); 3121 inlen += vlan; 3122 } 3123 /* 3124 * If inlining is enabled at configuration time 3125 * the limit must be not less than minimal size. 3126 * Otherwise we would do extra check for data 3127 * size to avoid crashes due to length overflow. 3128 */ 3129 MLX5_ASSERT(txq->inlen_send >= 3130 MLX5_ESEG_MIN_INLINE_SIZE); 3131 if (inlen <= txq->inlen_send) { 3132 unsigned int seg_n, wqe_n; 3133 3134 rte_prefetch0(rte_pktmbuf_mtod 3135 (loc->mbuf, uint8_t *)); 3136 /* Check against minimal length. */ 3137 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3138 return MLX5_TXCMP_CODE_ERROR; 3139 if (loc->mbuf->ol_flags & 3140 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3141 /* 3142 * The hint flag not to inline packet 3143 * data is set. Check whether we can 3144 * follow the hint. 3145 */ 3146 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3147 txq->inlen_mode) || 3148 (MLX5_TXOFF_CONFIG(MPW) && 3149 txq->inlen_mode)) { 3150 if (inlen <= txq->inlen_send) 3151 goto single_inline; 3152 /* 3153 * The hardware requires the 3154 * minimal inline data header. 3155 */ 3156 goto single_min_inline; 3157 } 3158 if (MLX5_TXOFF_CONFIG(VLAN) && 3159 vlan && !txq->vlan_en) { 3160 /* 3161 * We must insert VLAN tag 3162 * by software means. 3163 */ 3164 goto single_part_inline; 3165 } 3166 goto single_no_inline; 3167 } 3168 single_inline: 3169 /* 3170 * Completely inlined packet data WQE: 3171 * - Control Segment, SEND opcode 3172 * - Ethernet Segment, no VLAN insertion 3173 * - Data inlined, VLAN optionally inserted 3174 * - Alignment to MLX5_WSEG_SIZE 3175 * Have to estimate amount of WQEBBs 3176 */ 3177 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3178 MLX5_ESEG_MIN_INLINE_SIZE + 3179 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3180 /* Check if there are enough WQEBBs. */ 3181 wqe_n = (seg_n + 3) / 4; 3182 if (wqe_n > loc->wqe_free) 3183 return MLX5_TXCMP_CODE_EXIT; 3184 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3185 loc->wqe_last = wqe; 3186 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3187 MLX5_OPCODE_SEND, olx); 3188 mlx5_tx_eseg_data(txq, loc, wqe, 3189 vlan, inlen, 0, olx); 3190 txq->wqe_ci += wqe_n; 3191 loc->wqe_free -= wqe_n; 3192 /* 3193 * Packet data are completely inlined, 3194 * free the packet immediately. 3195 */ 3196 rte_pktmbuf_free_seg(loc->mbuf); 3197 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3198 MLX5_TXOFF_CONFIG(MPW)) && 3199 txq->inlen_mode) { 3200 /* 3201 * If minimal inlining is requested the eMPW 3202 * feature should be disabled due to data is 3203 * inlined into Ethernet Segment, which can 3204 * not contain inlined data for eMPW due to 3205 * segment shared for all packets. 3206 */ 3207 struct mlx5_wqe_dseg *__rte_restrict dseg; 3208 unsigned int ds; 3209 uint8_t *dptr; 3210 3211 /* 3212 * The inline-mode settings require 3213 * to inline the specified amount of 3214 * data bytes to the Ethernet Segment. 3215 * We should check the free space in 3216 * WQE ring buffer to inline partially. 3217 */ 3218 single_min_inline: 3219 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3220 MLX5_ASSERT(inlen > txq->inlen_mode); 3221 MLX5_ASSERT(txq->inlen_mode >= 3222 MLX5_ESEG_MIN_INLINE_SIZE); 3223 /* 3224 * Check whether there are enough free WQEBBs: 3225 * - Control Segment 3226 * - Ethernet Segment 3227 * - First Segment of inlined Ethernet data 3228 * - ... data continued ... 3229 * - Finishing Data Segment of pointer type 3230 */ 3231 ds = (MLX5_WQE_CSEG_SIZE + 3232 MLX5_WQE_ESEG_SIZE + 3233 MLX5_WQE_DSEG_SIZE + 3234 txq->inlen_mode - 3235 MLX5_ESEG_MIN_INLINE_SIZE + 3236 MLX5_WQE_DSEG_SIZE + 3237 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3238 if (loc->wqe_free < ((ds + 3) / 4)) 3239 return MLX5_TXCMP_CODE_EXIT; 3240 /* 3241 * Build the ordinary SEND WQE: 3242 * - Control Segment 3243 * - Ethernet Segment, inline inlen_mode bytes 3244 * - Data Segment of pointer type 3245 */ 3246 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3247 loc->wqe_last = wqe; 3248 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3249 MLX5_OPCODE_SEND, olx); 3250 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3251 txq->inlen_mode, 3252 0, olx); 3253 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3254 txq->inlen_mode - vlan; 3255 inlen -= txq->inlen_mode; 3256 mlx5_tx_dseg_ptr(txq, loc, dseg, 3257 dptr, inlen, olx); 3258 /* 3259 * WQE is built, update the loop parameters 3260 * and got to the next packet. 3261 */ 3262 txq->wqe_ci += (ds + 3) / 4; 3263 loc->wqe_free -= (ds + 3) / 4; 3264 /* We have to store mbuf in elts.*/ 3265 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3266 txq->elts[txq->elts_head++ & txq->elts_m] = 3267 loc->mbuf; 3268 --loc->elts_free; 3269 } else { 3270 uint8_t *dptr; 3271 unsigned int dlen; 3272 3273 /* 3274 * Partially inlined packet data WQE, we have 3275 * some space in title WQEBB, we can fill it 3276 * with some packet data. It takes one WQEBB, 3277 * it is available, no extra space check: 3278 * - Control Segment, SEND opcode 3279 * - Ethernet Segment, no VLAN insertion 3280 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3281 * - Data Segment, pointer type 3282 * 3283 * We also get here if VLAN insertion is not 3284 * supported by HW, the inline is enabled. 3285 */ 3286 single_part_inline: 3287 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3288 loc->wqe_last = wqe; 3289 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3290 MLX5_OPCODE_SEND, olx); 3291 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3292 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3293 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3294 /* 3295 * The length check is performed above, by 3296 * comparing with txq->inlen_send. We should 3297 * not get overflow here. 3298 */ 3299 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3300 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3301 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3302 dptr, dlen, olx); 3303 ++txq->wqe_ci; 3304 --loc->wqe_free; 3305 /* We have to store mbuf in elts.*/ 3306 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3307 txq->elts[txq->elts_head++ & txq->elts_m] = 3308 loc->mbuf; 3309 --loc->elts_free; 3310 } 3311 #ifdef MLX5_PMD_SOFT_COUNTERS 3312 /* Update sent data bytes counter. */ 3313 txq->stats.obytes += vlan + 3314 rte_pktmbuf_data_len(loc->mbuf); 3315 #endif 3316 } else { 3317 /* 3318 * No inline at all, it means the CPU cycles saving 3319 * is prioritized at configuration, we should not 3320 * copy any packet data to WQE. 3321 * 3322 * SEND WQE, one WQEBB: 3323 * - Control Segment, SEND opcode 3324 * - Ethernet Segment, optional VLAN, no inline 3325 * - Data Segment, pointer type 3326 */ 3327 single_no_inline: 3328 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3329 loc->wqe_last = wqe; 3330 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3331 MLX5_OPCODE_SEND, olx); 3332 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3333 mlx5_tx_dseg_ptr 3334 (txq, loc, &wqe->dseg[0], 3335 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3336 rte_pktmbuf_data_len(loc->mbuf), olx); 3337 ++txq->wqe_ci; 3338 --loc->wqe_free; 3339 /* 3340 * We should not store mbuf pointer in elts 3341 * if no inlining is configured, this is done 3342 * by calling routine in a batch copy. 3343 */ 3344 if (MLX5_TXOFF_CONFIG(INLINE)) 3345 txq->elts[txq->elts_head++ & txq->elts_m] = 3346 loc->mbuf; 3347 --loc->elts_free; 3348 #ifdef MLX5_PMD_SOFT_COUNTERS 3349 /* Update sent data bytes counter. */ 3350 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3351 if (MLX5_TXOFF_CONFIG(VLAN) && 3352 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3353 txq->stats.obytes += 3354 sizeof(struct rte_vlan_hdr); 3355 #endif 3356 } 3357 ++loc->pkts_sent; 3358 --pkts_n; 3359 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3360 return MLX5_TXCMP_CODE_EXIT; 3361 loc->mbuf = *pkts++; 3362 if (pkts_n > 1) 3363 rte_prefetch0(*pkts); 3364 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3365 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3366 return ret; 3367 } 3368 MLX5_ASSERT(false); 3369 } 3370 3371 static __rte_always_inline enum mlx5_txcmp_code 3372 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3373 struct rte_mbuf **__rte_restrict pkts, 3374 unsigned int pkts_n, 3375 struct mlx5_txq_local *__rte_restrict loc, 3376 unsigned int olx) 3377 { 3378 enum mlx5_txcmp_code ret; 3379 3380 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3381 if (ret == MLX5_TXCMP_CODE_SINGLE) 3382 goto ordinary_send; 3383 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3384 for (;;) { 3385 /* Optimize for inline/no inline eMPW send. */ 3386 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3387 mlx5_tx_burst_empw_inline 3388 (txq, pkts, pkts_n, loc, olx) : 3389 mlx5_tx_burst_empw_simple 3390 (txq, pkts, pkts_n, loc, olx); 3391 if (ret != MLX5_TXCMP_CODE_SINGLE) 3392 return ret; 3393 /* The resources to send one packet should remain. */ 3394 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3395 ordinary_send: 3396 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3397 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3398 if (ret != MLX5_TXCMP_CODE_EMPW) 3399 return ret; 3400 /* The resources to send one packet should remain. */ 3401 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3402 } 3403 } 3404 3405 /** 3406 * DPDK Tx callback template. This is configured template used to generate 3407 * routines optimized for specified offload setup. 3408 * One of this generated functions is chosen at SQ configuration time. 3409 * 3410 * @param txq 3411 * Generic pointer to TX queue structure. 3412 * @param[in] pkts 3413 * Packets to transmit. 3414 * @param pkts_n 3415 * Number of packets in array. 3416 * @param olx 3417 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3418 * values. Should be static to take compile time static configuration 3419 * advantages. 3420 * 3421 * @return 3422 * Number of packets successfully transmitted (<= pkts_n). 3423 */ 3424 static __rte_always_inline uint16_t 3425 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3426 struct rte_mbuf **__rte_restrict pkts, 3427 uint16_t pkts_n, 3428 unsigned int olx) 3429 { 3430 struct mlx5_txq_local loc; 3431 enum mlx5_txcmp_code ret; 3432 unsigned int part; 3433 3434 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3435 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3436 if (unlikely(!pkts_n)) 3437 return 0; 3438 if (MLX5_TXOFF_CONFIG(INLINE)) 3439 loc.mbuf_free = 0; 3440 loc.pkts_sent = 0; 3441 loc.pkts_copy = 0; 3442 loc.wqe_last = NULL; 3443 3444 send_loop: 3445 loc.pkts_loop = loc.pkts_sent; 3446 /* 3447 * Check if there are some CQEs, if any: 3448 * - process an encountered errors 3449 * - process the completed WQEs 3450 * - free related mbufs 3451 * - doorbell the NIC about processed CQEs 3452 */ 3453 rte_prefetch0(*(pkts + loc.pkts_sent)); 3454 mlx5_tx_handle_completion(txq, olx); 3455 /* 3456 * Calculate the number of available resources - elts and WQEs. 3457 * There are two possible different scenarios: 3458 * - no data inlining into WQEs, one WQEBB may contains up to 3459 * four packets, in this case elts become scarce resource 3460 * - data inlining into WQEs, one packet may require multiple 3461 * WQEBBs, the WQEs become the limiting factor. 3462 */ 3463 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3464 loc.elts_free = txq->elts_s - 3465 (uint16_t)(txq->elts_head - txq->elts_tail); 3466 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3467 loc.wqe_free = txq->wqe_s - 3468 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3469 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3470 goto burst_exit; 3471 for (;;) { 3472 /* 3473 * Fetch the packet from array. Usually this is the first 3474 * packet in series of multi/single segment packets. 3475 */ 3476 loc.mbuf = *(pkts + loc.pkts_sent); 3477 /* Dedicated branch for multi-segment packets. */ 3478 if (MLX5_TXOFF_CONFIG(MULTI) && 3479 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3480 /* 3481 * Multi-segment packet encountered. 3482 * Hardware is able to process it only 3483 * with SEND/TSO opcodes, one packet 3484 * per WQE, do it in dedicated routine. 3485 */ 3486 enter_send_multi: 3487 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3488 part = loc.pkts_sent - loc.pkts_copy; 3489 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3490 /* 3491 * There are some single-segment mbufs not 3492 * stored in elts. The mbufs must be in the 3493 * same order as WQEs, so we must copy the 3494 * mbufs to elts here, before the coming 3495 * multi-segment packet mbufs is appended. 3496 */ 3497 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3498 part, olx); 3499 loc.pkts_copy = loc.pkts_sent; 3500 } 3501 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3502 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3503 if (!MLX5_TXOFF_CONFIG(INLINE)) 3504 loc.pkts_copy = loc.pkts_sent; 3505 /* 3506 * These returned code checks are supposed 3507 * to be optimized out due to routine inlining. 3508 */ 3509 if (ret == MLX5_TXCMP_CODE_EXIT) { 3510 /* 3511 * The routine returns this code when 3512 * all packets are sent or there is no 3513 * enough resources to complete request. 3514 */ 3515 break; 3516 } 3517 if (ret == MLX5_TXCMP_CODE_ERROR) { 3518 /* 3519 * The routine returns this code when some error 3520 * in the incoming packets format occurred. 3521 */ 3522 txq->stats.oerrors++; 3523 break; 3524 } 3525 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3526 /* 3527 * The single-segment packet was encountered 3528 * in the array, try to send it with the 3529 * best optimized way, possible engaging eMPW. 3530 */ 3531 goto enter_send_single; 3532 } 3533 if (MLX5_TXOFF_CONFIG(TSO) && 3534 ret == MLX5_TXCMP_CODE_TSO) { 3535 /* 3536 * The single-segment TSO packet was 3537 * encountered in the array. 3538 */ 3539 goto enter_send_tso; 3540 } 3541 /* We must not get here. Something is going wrong. */ 3542 MLX5_ASSERT(false); 3543 txq->stats.oerrors++; 3544 break; 3545 } 3546 /* Dedicated branch for single-segment TSO packets. */ 3547 if (MLX5_TXOFF_CONFIG(TSO) && 3548 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3549 /* 3550 * TSO might require special way for inlining 3551 * (dedicated parameters) and is sent with 3552 * MLX5_OPCODE_TSO opcode only, provide this 3553 * in dedicated branch. 3554 */ 3555 enter_send_tso: 3556 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3557 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3558 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3559 /* 3560 * These returned code checks are supposed 3561 * to be optimized out due to routine inlining. 3562 */ 3563 if (ret == MLX5_TXCMP_CODE_EXIT) 3564 break; 3565 if (ret == MLX5_TXCMP_CODE_ERROR) { 3566 txq->stats.oerrors++; 3567 break; 3568 } 3569 if (ret == MLX5_TXCMP_CODE_SINGLE) 3570 goto enter_send_single; 3571 if (MLX5_TXOFF_CONFIG(MULTI) && 3572 ret == MLX5_TXCMP_CODE_MULTI) { 3573 /* 3574 * The multi-segment packet was 3575 * encountered in the array. 3576 */ 3577 goto enter_send_multi; 3578 } 3579 /* We must not get here. Something is going wrong. */ 3580 MLX5_ASSERT(false); 3581 txq->stats.oerrors++; 3582 break; 3583 } 3584 /* 3585 * The dedicated branch for the single-segment packets 3586 * without TSO. Often these ones can be sent using 3587 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3588 * The routine builds the WQEs till it encounters 3589 * the TSO or multi-segment packet (in case if these 3590 * offloads are requested at SQ configuration time). 3591 */ 3592 enter_send_single: 3593 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3594 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3595 /* 3596 * These returned code checks are supposed 3597 * to be optimized out due to routine inlining. 3598 */ 3599 if (ret == MLX5_TXCMP_CODE_EXIT) 3600 break; 3601 if (ret == MLX5_TXCMP_CODE_ERROR) { 3602 txq->stats.oerrors++; 3603 break; 3604 } 3605 if (MLX5_TXOFF_CONFIG(MULTI) && 3606 ret == MLX5_TXCMP_CODE_MULTI) { 3607 /* 3608 * The multi-segment packet was 3609 * encountered in the array. 3610 */ 3611 goto enter_send_multi; 3612 } 3613 if (MLX5_TXOFF_CONFIG(TSO) && 3614 ret == MLX5_TXCMP_CODE_TSO) { 3615 /* 3616 * The single-segment TSO packet was 3617 * encountered in the array. 3618 */ 3619 goto enter_send_tso; 3620 } 3621 /* We must not get here. Something is going wrong. */ 3622 MLX5_ASSERT(false); 3623 txq->stats.oerrors++; 3624 break; 3625 } 3626 /* 3627 * Main Tx loop is completed, do the rest: 3628 * - set completion request if thresholds are reached 3629 * - doorbell the hardware 3630 * - copy the rest of mbufs to elts (if any) 3631 */ 3632 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3633 loc.pkts_sent >= loc.pkts_copy); 3634 /* Take a shortcut if nothing is sent. */ 3635 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3636 goto burst_exit; 3637 /* Request CQE generation if limits are reached. */ 3638 mlx5_tx_request_completion(txq, &loc, olx); 3639 /* 3640 * Ring QP doorbell immediately after WQE building completion 3641 * to improve latencies. The pure software related data treatment 3642 * can be completed after doorbell. Tx CQEs for this SQ are 3643 * processed in this thread only by the polling. 3644 * 3645 * The rdma core library can map doorbell register in two ways, 3646 * depending on the environment variable "MLX5_SHUT_UP_BF": 3647 * 3648 * - as regular cached memory, the variable is either missing or 3649 * set to zero. This type of mapping may cause the significant 3650 * doorbell register writing latency and requires explicit memory 3651 * write barrier to mitigate this issue and prevent write combining. 3652 * 3653 * - as non-cached memory, the variable is present and set to not "0" 3654 * value. This type of mapping may cause performance impact under 3655 * heavy loading conditions but the explicit write memory barrier is 3656 * not required and it may improve core performance. 3657 * 3658 * - the legacy behaviour (prior 19.08 release) was to use some 3659 * heuristics to decide whether write memory barrier should 3660 * be performed. This behavior is supported with specifying 3661 * tx_db_nc=2, write barrier is skipped if application provides 3662 * the full recommended burst of packets, it supposes the next 3663 * packets are coming and the write barrier will be issued on 3664 * the next burst (after descriptor writing, at least). 3665 */ 3666 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3667 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3668 txq->qp_db, !txq->db_nc && 3669 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3670 /* Not all of the mbufs may be stored into elts yet. */ 3671 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3672 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3673 /* 3674 * There are some single-segment mbufs not stored in elts. 3675 * It can be only if the last packet was single-segment. 3676 * The copying is gathered into one place due to it is 3677 * a good opportunity to optimize that with SIMD. 3678 * Unfortunately if inlining is enabled the gaps in pointer 3679 * array may happen due to early freeing of the inlined mbufs. 3680 */ 3681 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3682 loc.pkts_copy = loc.pkts_sent; 3683 } 3684 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3685 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3686 if (pkts_n > loc.pkts_sent) { 3687 /* 3688 * If burst size is large there might be no enough CQE 3689 * fetched from completion queue and no enough resources 3690 * freed to send all the packets. 3691 */ 3692 goto send_loop; 3693 } 3694 burst_exit: 3695 #ifdef MLX5_PMD_SOFT_COUNTERS 3696 /* Increment sent packets counter. */ 3697 txq->stats.opackets += loc.pkts_sent; 3698 #endif 3699 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3700 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3701 return loc.pkts_sent; 3702 } 3703 3704 #endif /* RTE_PMD_MLX5_TX_H_ */ 3705