1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common.h> 18 #include <mlx5_common_mr.h> 19 20 #include "mlx5.h" 21 #include "mlx5_autoconf.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t rt_timestamp:1; /* Realtime timestamp format. */ 142 uint16_t wait_on_time:1; /* WQE with timestamp is supported. */ 143 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 144 uint16_t inlen_send; /* Ordinary send data inline size. */ 145 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 146 uint16_t inlen_mode; /* Minimal data length to inline. */ 147 uint32_t qp_num_8s; /* QP number shifted by 8. */ 148 uint64_t offloads; /* Offloads for Tx Queue. */ 149 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 150 struct mlx5_wqe *wqes; /* Work queue. */ 151 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 152 #ifdef RTE_LIBRTE_MLX5_DEBUG 153 uint32_t *fcqs; /* Free completion queue (debug extended). */ 154 #else 155 uint16_t *fcqs; /* Free completion queue. */ 156 #endif 157 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 158 volatile uint32_t *qp_db; /* Work queue doorbell. */ 159 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 160 uint16_t port_id; /* Port ID of device. */ 161 uint16_t idx; /* Queue index. */ 162 uint64_t rt_timemask; /* Scheduling timestamp mask. */ 163 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 164 int32_t ts_offset; /* Timestamp field dynamic offset. */ 165 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 166 struct mlx5_txq_stats stats; /* TX queue counters. */ 167 struct mlx5_txq_stats stats_reset; /* stats on last reset. */ 168 struct mlx5_uar_data uar_data; 169 struct rte_mbuf *elts[0]; 170 /* Storage for queued packets, must be the last field. */ 171 } __rte_cache_aligned; 172 173 /* TX queue control descriptor. */ 174 struct mlx5_txq_ctrl { 175 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 176 uint32_t refcnt; /* Reference counter. */ 177 unsigned int socket; /* CPU socket ID for allocations. */ 178 bool is_hairpin; /* Whether TxQ type is Hairpin. */ 179 unsigned int max_inline_data; /* Max inline data. */ 180 unsigned int max_tso_header; /* Max TSO header size. */ 181 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 182 struct mlx5_priv *priv; /* Back pointer to private data. */ 183 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 184 uint16_t dump_file_n; /* Number of dump files. */ 185 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 186 uint32_t hairpin_status; /* Hairpin binding status. */ 187 struct mlx5_txq_data txq; /* Data path structure. */ 188 /* Must be the last field in the structure, contains elts[]. */ 189 }; 190 191 /* mlx5_txq.c */ 192 193 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 194 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 195 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 196 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 197 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 198 unsigned int socket, const struct rte_eth_txconf *conf); 199 int mlx5_tx_hairpin_queue_setup 200 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 201 const struct rte_eth_hairpin_conf *hairpin_conf); 202 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 203 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 204 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 205 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 206 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 207 uint16_t desc, unsigned int socket, 208 const struct rte_eth_txconf *conf); 209 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 210 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 211 const struct rte_eth_hairpin_conf *hairpin_conf); 212 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 213 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 214 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 215 int mlx5_txq_verify(struct rte_eth_dev *dev); 216 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 217 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 218 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 219 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 220 221 /* mlx5_tx.c */ 222 223 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 224 unsigned int olx __rte_unused); 225 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 226 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 227 struct rte_eth_txq_info *qinfo); 228 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 229 struct rte_eth_burst_mode *mode); 230 231 /* mlx5_tx_empw.c */ 232 233 MLX5_TXOFF_PRE_DECL(full_empw); 234 MLX5_TXOFF_PRE_DECL(none_empw); 235 MLX5_TXOFF_PRE_DECL(md_empw); 236 MLX5_TXOFF_PRE_DECL(mt_empw); 237 MLX5_TXOFF_PRE_DECL(mtsc_empw); 238 MLX5_TXOFF_PRE_DECL(mti_empw); 239 MLX5_TXOFF_PRE_DECL(mtv_empw); 240 MLX5_TXOFF_PRE_DECL(mtiv_empw); 241 MLX5_TXOFF_PRE_DECL(sc_empw); 242 MLX5_TXOFF_PRE_DECL(sci_empw); 243 MLX5_TXOFF_PRE_DECL(scv_empw); 244 MLX5_TXOFF_PRE_DECL(sciv_empw); 245 MLX5_TXOFF_PRE_DECL(i_empw); 246 MLX5_TXOFF_PRE_DECL(v_empw); 247 MLX5_TXOFF_PRE_DECL(iv_empw); 248 249 /* mlx5_tx_nompw.c */ 250 251 MLX5_TXOFF_PRE_DECL(full); 252 MLX5_TXOFF_PRE_DECL(none); 253 MLX5_TXOFF_PRE_DECL(md); 254 MLX5_TXOFF_PRE_DECL(mt); 255 MLX5_TXOFF_PRE_DECL(mtsc); 256 MLX5_TXOFF_PRE_DECL(mti); 257 MLX5_TXOFF_PRE_DECL(mtv); 258 MLX5_TXOFF_PRE_DECL(mtiv); 259 MLX5_TXOFF_PRE_DECL(sc); 260 MLX5_TXOFF_PRE_DECL(sci); 261 MLX5_TXOFF_PRE_DECL(scv); 262 MLX5_TXOFF_PRE_DECL(sciv); 263 MLX5_TXOFF_PRE_DECL(i); 264 MLX5_TXOFF_PRE_DECL(v); 265 MLX5_TXOFF_PRE_DECL(iv); 266 267 /* mlx5_tx_txpp.c */ 268 269 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 270 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 271 MLX5_TXOFF_PRE_DECL(full_ts); 272 MLX5_TXOFF_PRE_DECL(full_ts_noi); 273 MLX5_TXOFF_PRE_DECL(none_ts); 274 MLX5_TXOFF_PRE_DECL(mdi_ts); 275 MLX5_TXOFF_PRE_DECL(mti_ts); 276 MLX5_TXOFF_PRE_DECL(mtiv_ts); 277 278 /* mlx5_tx_mpw.c */ 279 280 MLX5_TXOFF_PRE_DECL(none_mpw); 281 MLX5_TXOFF_PRE_DECL(mci_mpw); 282 MLX5_TXOFF_PRE_DECL(mc_mpw); 283 MLX5_TXOFF_PRE_DECL(i_mpw); 284 285 static __rte_always_inline struct mlx5_uar_data * 286 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 287 { 288 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 289 } 290 291 /** 292 * Ring TX queue doorbell and flush the update by write memory barrier. 293 * 294 * @param txq 295 * Pointer to TX queue structure. 296 * @param wqe 297 * Pointer to the last WQE posted in the NIC. 298 */ 299 static __rte_always_inline void 300 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 301 { 302 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 303 txq->wqe_ci, txq->qp_db, 1); 304 } 305 306 /** 307 * Convert timestamp from mbuf format to linear counter 308 * of Clock Queue completions (24 bits). 309 * 310 * @param sh 311 * Pointer to the device shared context to fetch Tx 312 * packet pacing timestamp and parameters. 313 * @param ts 314 * Timestamp from mbuf to convert. 315 * @return 316 * positive or zero value - completion ID to wait. 317 * negative value - conversion error. 318 */ 319 static __rte_always_inline int32_t 320 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 321 { 322 uint64_t ts, ci; 323 uint32_t tick; 324 325 do { 326 /* 327 * Read atomically two uint64_t fields and compare lsb bits. 328 * It there is no match - the timestamp was updated in 329 * the service thread, data should be re-read. 330 */ 331 rte_compiler_barrier(); 332 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 333 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 334 rte_compiler_barrier(); 335 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 336 break; 337 } while (true); 338 /* Perform the skew correction, positive value to send earlier. */ 339 mts -= sh->txpp.skew; 340 mts -= ts; 341 if (unlikely(mts >= UINT64_MAX / 2)) { 342 /* We have negative integer, mts is in the past. */ 343 __atomic_fetch_add(&sh->txpp.err_ts_past, 344 1, __ATOMIC_RELAXED); 345 return -1; 346 } 347 tick = sh->txpp.tick; 348 MLX5_ASSERT(tick); 349 /* Convert delta to completions, round up. */ 350 mts = (mts + tick - 1) / tick; 351 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 352 /* We have mts is too distant future. */ 353 __atomic_fetch_add(&sh->txpp.err_ts_future, 354 1, __ATOMIC_RELAXED); 355 return -1; 356 } 357 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 358 ci += mts; 359 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 360 return ci; 361 } 362 363 /** 364 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 365 * Flags must be preliminary initialized to zero. 366 * 367 * @param loc 368 * Pointer to burst routine local context. 369 * @param swp_flags 370 * Pointer to store Software Parser flags. 371 * @param olx 372 * Configured Tx offloads mask. It is fully defined at 373 * compile time and may be used for optimization. 374 * 375 * @return 376 * Software Parser offsets packed in dword. 377 * Software Parser flags are set by pointer. 378 */ 379 static __rte_always_inline uint32_t 380 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 381 uint8_t *swp_flags, 382 unsigned int olx) 383 { 384 uint64_t ol, tunnel; 385 unsigned int idx, off; 386 uint32_t set; 387 388 if (!MLX5_TXOFF_CONFIG(SWP)) 389 return 0; 390 ol = loc->mbuf->ol_flags; 391 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 392 /* 393 * Check whether Software Parser is required. 394 * Only customized tunnels may ask for. 395 */ 396 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 397 return 0; 398 /* 399 * The index should have: 400 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 401 * bit[4] = RTE_MBUF_F_TX_IPV6 402 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 403 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 404 */ 405 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 406 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 407 *swp_flags = mlx5_swp_types_table[idx]; 408 /* 409 * Set offsets for SW parser. Since ConnectX-5, SW parser just 410 * complements HW parser. SW parser starts to engage only if HW parser 411 * can't reach a header. For the older devices, HW parser will not kick 412 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 413 * should be set regardless of HW offload. 414 */ 415 off = loc->mbuf->outer_l2_len; 416 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 417 off += sizeof(struct rte_vlan_hdr); 418 set = (off >> 1) << 8; /* Outer L3 offset. */ 419 off += loc->mbuf->outer_l3_len; 420 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 421 set |= off >> 1; /* Outer L4 offset. */ 422 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 423 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 424 off += loc->mbuf->l2_len; 425 set |= (off >> 1) << 24; /* Inner L3 offset. */ 426 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 427 csum == RTE_MBUF_F_TX_UDP_CKSUM || 428 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 429 off += loc->mbuf->l3_len; 430 set |= (off >> 1) << 16; /* Inner L4 offset. */ 431 } 432 } 433 set = rte_cpu_to_le_32(set); 434 return set; 435 } 436 437 /** 438 * Convert the Checksum offloads to Verbs. 439 * 440 * @param buf 441 * Pointer to the mbuf. 442 * 443 * @return 444 * Converted checksum flags. 445 */ 446 static __rte_always_inline uint8_t 447 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 448 { 449 uint32_t idx; 450 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 451 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 452 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 453 454 /* 455 * The index should have: 456 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 457 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 458 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 459 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 460 * bit[9] = tunnel 461 */ 462 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 463 return mlx5_cksum_table[idx]; 464 } 465 466 /** 467 * Free the mbufs from the linear array of pointers. 468 * 469 * @param txq 470 * Pointer to Tx queue structure. 471 * @param pkts 472 * Pointer to array of packets to be free. 473 * @param pkts_n 474 * Number of packets to be freed. 475 * @param olx 476 * Configured Tx offloads mask. It is fully defined at 477 * compile time and may be used for optimization. 478 */ 479 static __rte_always_inline void 480 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 481 struct rte_mbuf **__rte_restrict pkts, 482 unsigned int pkts_n, 483 unsigned int olx __rte_unused) 484 { 485 struct rte_mempool *pool = NULL; 486 struct rte_mbuf **p_free = NULL; 487 struct rte_mbuf *mbuf; 488 unsigned int n_free = 0; 489 490 /* 491 * The implemented algorithm eliminates 492 * copying pointers to temporary array 493 * for rte_mempool_put_bulk() calls. 494 */ 495 MLX5_ASSERT(pkts); 496 MLX5_ASSERT(pkts_n); 497 /* 498 * Free mbufs directly to the pool in bulk 499 * if fast free offload is engaged 500 */ 501 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 502 mbuf = *pkts; 503 pool = mbuf->pool; 504 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 505 return; 506 } 507 for (;;) { 508 for (;;) { 509 /* 510 * Decrement mbuf reference counter, detach 511 * indirect and external buffers if needed. 512 */ 513 mbuf = rte_pktmbuf_prefree_seg(*pkts); 514 if (likely(mbuf != NULL)) { 515 MLX5_ASSERT(mbuf == *pkts); 516 if (likely(n_free != 0)) { 517 if (unlikely(pool != mbuf->pool)) 518 /* From different pool. */ 519 break; 520 } else { 521 /* Start new scan array. */ 522 pool = mbuf->pool; 523 p_free = pkts; 524 } 525 ++n_free; 526 ++pkts; 527 --pkts_n; 528 if (unlikely(pkts_n == 0)) { 529 mbuf = NULL; 530 break; 531 } 532 } else { 533 /* 534 * This happens if mbuf is still referenced. 535 * We can't put it back to the pool, skip. 536 */ 537 ++pkts; 538 --pkts_n; 539 if (unlikely(n_free != 0)) 540 /* There is some array to free.*/ 541 break; 542 if (unlikely(pkts_n == 0)) 543 /* Last mbuf, nothing to free. */ 544 return; 545 } 546 } 547 for (;;) { 548 /* 549 * This loop is implemented to avoid multiple 550 * inlining of rte_mempool_put_bulk(). 551 */ 552 MLX5_ASSERT(pool); 553 MLX5_ASSERT(p_free); 554 MLX5_ASSERT(n_free); 555 /* 556 * Free the array of pre-freed mbufs 557 * belonging to the same memory pool. 558 */ 559 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 560 if (unlikely(mbuf != NULL)) { 561 /* There is the request to start new scan. */ 562 pool = mbuf->pool; 563 p_free = pkts++; 564 n_free = 1; 565 --pkts_n; 566 if (likely(pkts_n != 0)) 567 break; 568 /* 569 * This is the last mbuf to be freed. 570 * Do one more loop iteration to complete. 571 * This is rare case of the last unique mbuf. 572 */ 573 mbuf = NULL; 574 continue; 575 } 576 if (likely(pkts_n == 0)) 577 return; 578 n_free = 0; 579 break; 580 } 581 } 582 } 583 584 /** 585 * No inline version to free buffers for optimal call 586 * on the tx_burst completion. 587 */ 588 static __rte_noinline void 589 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 590 struct rte_mbuf **__rte_restrict pkts, 591 unsigned int pkts_n, 592 unsigned int olx __rte_unused) 593 { 594 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 595 } 596 597 /** 598 * Free the mbuf from the elts ring buffer till new tail. 599 * 600 * @param txq 601 * Pointer to Tx queue structure. 602 * @param tail 603 * Index in elts to free up to, becomes new elts tail. 604 * @param olx 605 * Configured Tx offloads mask. It is fully defined at 606 * compile time and may be used for optimization. 607 */ 608 static __rte_always_inline void 609 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 610 uint16_t tail, 611 unsigned int olx __rte_unused) 612 { 613 uint16_t n_elts = tail - txq->elts_tail; 614 615 MLX5_ASSERT(n_elts); 616 MLX5_ASSERT(n_elts <= txq->elts_s); 617 /* 618 * Implement a loop to support ring buffer wraparound 619 * with single inlining of mlx5_tx_free_mbuf(). 620 */ 621 do { 622 unsigned int part; 623 624 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 625 part = RTE_MIN(part, n_elts); 626 MLX5_ASSERT(part); 627 MLX5_ASSERT(part <= txq->elts_s); 628 mlx5_tx_free_mbuf(txq, 629 &txq->elts[txq->elts_tail & txq->elts_m], 630 part, olx); 631 txq->elts_tail += part; 632 n_elts -= part; 633 } while (n_elts); 634 } 635 636 /** 637 * Store the mbuf being sent into elts ring buffer. 638 * On Tx completion these mbufs will be freed. 639 * 640 * @param txq 641 * Pointer to Tx queue structure. 642 * @param pkts 643 * Pointer to array of packets to be stored. 644 * @param pkts_n 645 * Number of packets to be stored. 646 * @param olx 647 * Configured Tx offloads mask. It is fully defined at 648 * compile time and may be used for optimization. 649 */ 650 static __rte_always_inline void 651 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 652 struct rte_mbuf **__rte_restrict pkts, 653 unsigned int pkts_n, 654 unsigned int olx __rte_unused) 655 { 656 unsigned int part; 657 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 658 659 MLX5_ASSERT(pkts); 660 MLX5_ASSERT(pkts_n); 661 part = txq->elts_s - (txq->elts_head & txq->elts_m); 662 MLX5_ASSERT(part); 663 MLX5_ASSERT(part <= txq->elts_s); 664 /* This code is a good candidate for vectorizing with SIMD. */ 665 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 666 (void *)pkts, 667 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 668 txq->elts_head += pkts_n; 669 if (unlikely(part < pkts_n)) 670 /* The copy is wrapping around the elts array. */ 671 rte_memcpy((void *)elts, (void *)(pkts + part), 672 (pkts_n - part) * sizeof(struct rte_mbuf *)); 673 } 674 675 /** 676 * Check if the completion request flag should be set in the last WQE. 677 * Both pushed mbufs and WQEs are monitored and the completion request 678 * flag is set if any of thresholds is reached. 679 * 680 * @param txq 681 * Pointer to TX queue structure. 682 * @param loc 683 * Pointer to burst routine local context. 684 * @param olx 685 * Configured Tx offloads mask. It is fully defined at 686 * compile time and may be used for optimization. 687 */ 688 static __rte_always_inline void 689 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 690 struct mlx5_txq_local *__rte_restrict loc, 691 unsigned int olx) 692 { 693 uint16_t head = txq->elts_head; 694 unsigned int part; 695 696 part = MLX5_TXOFF_CONFIG(INLINE) ? 697 0 : loc->pkts_sent - loc->pkts_copy; 698 head += part; 699 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 700 (MLX5_TXOFF_CONFIG(INLINE) && 701 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 702 volatile struct mlx5_wqe *last = loc->wqe_last; 703 704 MLX5_ASSERT(last); 705 txq->elts_comp = head; 706 if (MLX5_TXOFF_CONFIG(INLINE)) 707 txq->wqe_comp = txq->wqe_ci; 708 /* Request unconditional completion on last WQE. */ 709 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 710 MLX5_COMP_MODE_OFFSET); 711 /* Save elts_head in dedicated free on completion queue. */ 712 #ifdef RTE_LIBRTE_MLX5_DEBUG 713 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 714 (last->cseg.opcode >> 8) << 16; 715 #else 716 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 717 #endif 718 /* A CQE slot must always be available. */ 719 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 720 } 721 } 722 723 /** 724 * Build the Control Segment with specified opcode: 725 * - MLX5_OPCODE_SEND 726 * - MLX5_OPCODE_ENHANCED_MPSW 727 * - MLX5_OPCODE_TSO 728 * 729 * @param txq 730 * Pointer to TX queue structure. 731 * @param loc 732 * Pointer to burst routine local context. 733 * @param wqe 734 * Pointer to WQE to fill with built Control Segment. 735 * @param ds 736 * Supposed length of WQE in segments. 737 * @param opcode 738 * SQ WQE opcode to put into Control Segment. 739 * @param olx 740 * Configured Tx offloads mask. It is fully defined at 741 * compile time and may be used for optimization. 742 */ 743 static __rte_always_inline void 744 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 745 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 746 struct mlx5_wqe *__rte_restrict wqe, 747 unsigned int ds, 748 unsigned int opcode, 749 unsigned int olx __rte_unused) 750 { 751 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 752 753 /* For legacy MPW replace the EMPW by TSO with modifier. */ 754 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 755 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 756 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 757 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 758 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 759 MLX5_COMP_MODE_OFFSET); 760 cs->misc = RTE_BE32(0); 761 } 762 763 /** 764 * Build the Synchronize Queue Segment with specified completion index. 765 * 766 * @param txq 767 * Pointer to TX queue structure. 768 * @param loc 769 * Pointer to burst routine local context. 770 * @param wqe 771 * Pointer to WQE to fill with built Control Segment. 772 * @param wci 773 * Completion index in Clock Queue to wait. 774 * @param olx 775 * Configured Tx offloads mask. It is fully defined at 776 * compile time and may be used for optimization. 777 */ 778 static __rte_always_inline void 779 mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq, 780 struct mlx5_txq_local *restrict loc __rte_unused, 781 struct mlx5_wqe *restrict wqe, 782 unsigned int wci, 783 unsigned int olx __rte_unused) 784 { 785 struct mlx5_wqe_qseg *qs; 786 787 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 788 qs->max_index = rte_cpu_to_be_32(wci); 789 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 790 qs->reserved0 = RTE_BE32(0); 791 qs->reserved1 = RTE_BE32(0); 792 } 793 794 /** 795 * Build the Wait on Time Segment with specified timestamp value. 796 * 797 * @param txq 798 * Pointer to TX queue structure. 799 * @param loc 800 * Pointer to burst routine local context. 801 * @param wqe 802 * Pointer to WQE to fill with built Control Segment. 803 * @param ts 804 * Timesatmp value to wait. 805 * @param olx 806 * Configured Tx offloads mask. It is fully defined at 807 * compile time and may be used for optimization. 808 */ 809 static __rte_always_inline void 810 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 811 struct mlx5_txq_local *restrict loc __rte_unused, 812 struct mlx5_wqe *restrict wqe, 813 uint64_t ts, 814 unsigned int olx __rte_unused) 815 { 816 struct mlx5_wqe_wseg *ws; 817 818 ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 819 ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_BIGGER); 820 ws->lkey = RTE_BE32(0); 821 ws->va_high = RTE_BE32(0); 822 ws->va_low = RTE_BE32(0); 823 if (txq->rt_timestamp) { 824 ts = ts % (uint64_t)NS_PER_S 825 | (ts / (uint64_t)NS_PER_S) << 32; 826 } 827 ws->value = rte_cpu_to_be_64(ts); 828 ws->mask = txq->rt_timemask; 829 } 830 831 /** 832 * Build the Ethernet Segment without inlined data. 833 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 834 * 835 * @param txq 836 * Pointer to TX queue structure. 837 * @param loc 838 * Pointer to burst routine local context. 839 * @param wqe 840 * Pointer to WQE to fill with built Ethernet Segment. 841 * @param olx 842 * Configured Tx offloads mask. It is fully defined at 843 * compile time and may be used for optimization. 844 */ 845 static __rte_always_inline void 846 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 847 struct mlx5_txq_local *__rte_restrict loc, 848 struct mlx5_wqe *__rte_restrict wqe, 849 unsigned int olx) 850 { 851 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 852 uint32_t csum; 853 854 /* 855 * Calculate and set check sum flags first, dword field 856 * in segment may be shared with Software Parser flags. 857 */ 858 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 859 es->flags = rte_cpu_to_le_32(csum); 860 /* 861 * Calculate and set Software Parser offsets and flags. 862 * These flags a set for custom UDP and IP tunnel packets. 863 */ 864 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 865 /* Fill metadata field if needed. */ 866 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 867 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 868 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 869 0 : 0; 870 /* Engage VLAN tag insertion feature if requested. */ 871 if (MLX5_TXOFF_CONFIG(VLAN) && 872 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 873 /* 874 * We should get here only if device support 875 * this feature correctly. 876 */ 877 MLX5_ASSERT(txq->vlan_en); 878 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 879 loc->mbuf->vlan_tci); 880 } else { 881 es->inline_hdr = RTE_BE32(0); 882 } 883 } 884 885 /** 886 * Build the Ethernet Segment with minimal inlined data 887 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 888 * used to fill the gap in single WQEBB WQEs. 889 * Supports Software Parser, Checksums and VLAN 890 * insertion Tx offload features. 891 * 892 * @param txq 893 * Pointer to TX queue structure. 894 * @param loc 895 * Pointer to burst routine local context. 896 * @param wqe 897 * Pointer to WQE to fill with built Ethernet Segment. 898 * @param vlan 899 * Length of VLAN tag insertion if any. 900 * @param olx 901 * Configured Tx offloads mask. It is fully defined at 902 * compile time and may be used for optimization. 903 */ 904 static __rte_always_inline void 905 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 906 struct mlx5_txq_local *__rte_restrict loc, 907 struct mlx5_wqe *__rte_restrict wqe, 908 unsigned int vlan, 909 unsigned int olx) 910 { 911 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 912 uint32_t csum; 913 uint8_t *psrc, *pdst; 914 915 /* 916 * Calculate and set check sum flags first, dword field 917 * in segment may be shared with Software Parser flags. 918 */ 919 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 920 es->flags = rte_cpu_to_le_32(csum); 921 /* 922 * Calculate and set Software Parser offsets and flags. 923 * These flags a set for custom UDP and IP tunnel packets. 924 */ 925 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 926 /* Fill metadata field if needed. */ 927 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 928 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 929 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 930 0 : 0; 931 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 932 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 933 es->inline_data = *(unaligned_uint16_t *)psrc; 934 psrc += sizeof(uint16_t); 935 pdst = (uint8_t *)(es + 1); 936 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 937 /* Implement VLAN tag insertion as part inline data. */ 938 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 939 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 940 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 941 /* Insert VLAN ethertype + VLAN tag. */ 942 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 943 ((RTE_ETHER_TYPE_VLAN << 16) | 944 loc->mbuf->vlan_tci); 945 pdst += sizeof(struct rte_vlan_hdr); 946 /* Copy the rest two bytes from packet data. */ 947 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 948 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 949 } else { 950 /* Fill the gap in the title WQEBB with inline data. */ 951 rte_mov16(pdst, psrc); 952 } 953 } 954 955 /** 956 * Build the Ethernet Segment with entire packet data inlining. Checks the 957 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 958 * Checksums and VLAN insertion Tx offload features. 959 * 960 * @param txq 961 * Pointer to TX queue structure. 962 * @param loc 963 * Pointer to burst routine local context. 964 * @param wqe 965 * Pointer to WQE to fill with built Ethernet Segment. 966 * @param vlan 967 * Length of VLAN tag insertion if any. 968 * @param inlen 969 * Length of data to inline (VLAN included, if any). 970 * @param tso 971 * TSO flag, set mss field from the packet. 972 * @param olx 973 * Configured Tx offloads mask. It is fully defined at 974 * compile time and may be used for optimization. 975 * 976 * @return 977 * Pointer to the next Data Segment (aligned and wrapped around). 978 */ 979 static __rte_always_inline struct mlx5_wqe_dseg * 980 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 981 struct mlx5_txq_local *__rte_restrict loc, 982 struct mlx5_wqe *__rte_restrict wqe, 983 unsigned int vlan, 984 unsigned int inlen, 985 unsigned int tso, 986 unsigned int olx) 987 { 988 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 989 uint32_t csum; 990 uint8_t *psrc, *pdst; 991 unsigned int part; 992 993 /* 994 * Calculate and set check sum flags first, dword field 995 * in segment may be shared with Software Parser flags. 996 */ 997 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 998 if (tso) { 999 csum <<= 24; 1000 csum |= loc->mbuf->tso_segsz; 1001 es->flags = rte_cpu_to_be_32(csum); 1002 } else { 1003 es->flags = rte_cpu_to_le_32(csum); 1004 } 1005 /* 1006 * Calculate and set Software Parser offsets and flags. 1007 * These flags a set for custom UDP and IP tunnel packets. 1008 */ 1009 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1010 /* Fill metadata field if needed. */ 1011 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1012 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1013 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1014 0 : 0; 1015 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1016 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 1017 es->inline_data = *(unaligned_uint16_t *)psrc; 1018 psrc += sizeof(uint16_t); 1019 pdst = (uint8_t *)(es + 1); 1020 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1021 /* Implement VLAN tag insertion as part inline data. */ 1022 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1023 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1024 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1025 /* Insert VLAN ethertype + VLAN tag. */ 1026 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1027 ((RTE_ETHER_TYPE_VLAN << 16) | 1028 loc->mbuf->vlan_tci); 1029 pdst += sizeof(struct rte_vlan_hdr); 1030 /* Copy the rest two bytes from packet data. */ 1031 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1032 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1033 psrc += sizeof(uint16_t); 1034 } else { 1035 /* Fill the gap in the title WQEBB with inline data. */ 1036 rte_mov16(pdst, psrc); 1037 psrc += sizeof(rte_v128u32_t); 1038 } 1039 pdst = (uint8_t *)(es + 2); 1040 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1041 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1042 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1043 if (!inlen) { 1044 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1045 return (struct mlx5_wqe_dseg *)pdst; 1046 } 1047 /* 1048 * The WQEBB space availability is checked by caller. 1049 * Here we should be aware of WQE ring buffer wraparound only. 1050 */ 1051 part = (uint8_t *)txq->wqes_end - pdst; 1052 part = RTE_MIN(part, inlen); 1053 do { 1054 rte_memcpy(pdst, psrc, part); 1055 inlen -= part; 1056 if (likely(!inlen)) { 1057 /* 1058 * If return value is not used by the caller 1059 * the code below will be optimized out. 1060 */ 1061 pdst += part; 1062 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1063 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1064 pdst = (uint8_t *)txq->wqes; 1065 return (struct mlx5_wqe_dseg *)pdst; 1066 } 1067 pdst = (uint8_t *)txq->wqes; 1068 psrc += part; 1069 part = inlen; 1070 } while (true); 1071 } 1072 1073 /** 1074 * Copy data from chain of mbuf to the specified linear buffer. 1075 * Checksums and VLAN insertion Tx offload features. If data 1076 * from some mbuf copied completely this mbuf is freed. Local 1077 * structure is used to keep the byte stream state. 1078 * 1079 * @param pdst 1080 * Pointer to the destination linear buffer. 1081 * @param loc 1082 * Pointer to burst routine local context. 1083 * @param len 1084 * Length of data to be copied. 1085 * @param must 1086 * Length of data to be copied ignoring no inline hint. 1087 * @param olx 1088 * Configured Tx offloads mask. It is fully defined at 1089 * compile time and may be used for optimization. 1090 * 1091 * @return 1092 * Number of actual copied data bytes. This is always greater than or 1093 * equal to must parameter and might be lesser than len in no inline 1094 * hint flag is encountered. 1095 */ 1096 static __rte_always_inline unsigned int 1097 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1098 struct mlx5_txq_local *__rte_restrict loc, 1099 unsigned int len, 1100 unsigned int must, 1101 unsigned int olx __rte_unused) 1102 { 1103 struct rte_mbuf *mbuf; 1104 unsigned int part, dlen, copy = 0; 1105 uint8_t *psrc; 1106 1107 MLX5_ASSERT(len); 1108 do { 1109 /* Allow zero length packets, must check first. */ 1110 dlen = rte_pktmbuf_data_len(loc->mbuf); 1111 if (dlen <= loc->mbuf_off) { 1112 /* Exhausted packet, just free. */ 1113 mbuf = loc->mbuf; 1114 loc->mbuf = mbuf->next; 1115 rte_pktmbuf_free_seg(mbuf); 1116 loc->mbuf_off = 0; 1117 MLX5_ASSERT(loc->mbuf_nseg > 1); 1118 MLX5_ASSERT(loc->mbuf); 1119 --loc->mbuf_nseg; 1120 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1121 unsigned int diff; 1122 1123 if (copy >= must) { 1124 /* 1125 * We already copied the minimal 1126 * requested amount of data. 1127 */ 1128 return copy; 1129 } 1130 diff = must - copy; 1131 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1132 /* 1133 * Copy only the minimal required 1134 * part of the data buffer. Limit amount 1135 * of data to be copied to the length of 1136 * available space. 1137 */ 1138 len = RTE_MIN(len, diff); 1139 } 1140 } 1141 continue; 1142 } 1143 dlen -= loc->mbuf_off; 1144 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1145 loc->mbuf_off); 1146 part = RTE_MIN(len, dlen); 1147 rte_memcpy(pdst, psrc, part); 1148 copy += part; 1149 loc->mbuf_off += part; 1150 len -= part; 1151 if (!len) { 1152 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1153 loc->mbuf_off = 0; 1154 /* Exhausted packet, just free. */ 1155 mbuf = loc->mbuf; 1156 loc->mbuf = mbuf->next; 1157 rte_pktmbuf_free_seg(mbuf); 1158 loc->mbuf_off = 0; 1159 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1160 --loc->mbuf_nseg; 1161 } 1162 return copy; 1163 } 1164 pdst += part; 1165 } while (true); 1166 } 1167 1168 /** 1169 * Build the Ethernet Segment with inlined data from multi-segment packet. 1170 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1171 * Parser, Checksums and VLAN insertion Tx offload features. 1172 * 1173 * @param txq 1174 * Pointer to TX queue structure. 1175 * @param loc 1176 * Pointer to burst routine local context. 1177 * @param wqe 1178 * Pointer to WQE to fill with built Ethernet Segment. 1179 * @param vlan 1180 * Length of VLAN tag insertion if any. 1181 * @param inlen 1182 * Length of data to inline (VLAN included, if any). 1183 * @param tso 1184 * TSO flag, set mss field from the packet. 1185 * @param olx 1186 * Configured Tx offloads mask. It is fully defined at 1187 * compile time and may be used for optimization. 1188 * 1189 * @return 1190 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1191 * around - caller should do wrapping check on its own). 1192 */ 1193 static __rte_always_inline struct mlx5_wqe_dseg * 1194 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1195 struct mlx5_txq_local *__rte_restrict loc, 1196 struct mlx5_wqe *__rte_restrict wqe, 1197 unsigned int vlan, 1198 unsigned int inlen, 1199 unsigned int tso, 1200 unsigned int olx) 1201 { 1202 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1203 uint32_t csum; 1204 uint8_t *pdst; 1205 unsigned int part, tlen = 0; 1206 1207 /* 1208 * Calculate and set check sum flags first, uint32_t field 1209 * in segment may be shared with Software Parser flags. 1210 */ 1211 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1212 if (tso) { 1213 csum <<= 24; 1214 csum |= loc->mbuf->tso_segsz; 1215 es->flags = rte_cpu_to_be_32(csum); 1216 } else { 1217 es->flags = rte_cpu_to_le_32(csum); 1218 } 1219 /* 1220 * Calculate and set Software Parser offsets and flags. 1221 * These flags a set for custom UDP and IP tunnel packets. 1222 */ 1223 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1224 /* Fill metadata field if needed. */ 1225 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1226 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1227 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1228 0 : 0; 1229 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1230 pdst = (uint8_t *)&es->inline_data; 1231 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1232 /* Implement VLAN tag insertion as part inline data. */ 1233 mlx5_tx_mseg_memcpy(pdst, loc, 1234 2 * RTE_ETHER_ADDR_LEN, 1235 2 * RTE_ETHER_ADDR_LEN, olx); 1236 pdst += 2 * RTE_ETHER_ADDR_LEN; 1237 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1238 ((RTE_ETHER_TYPE_VLAN << 16) | 1239 loc->mbuf->vlan_tci); 1240 pdst += sizeof(struct rte_vlan_hdr); 1241 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1242 } 1243 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1244 /* 1245 * The WQEBB space availability is checked by caller. 1246 * Here we should be aware of WQE ring buffer wraparound only. 1247 */ 1248 part = (uint8_t *)txq->wqes_end - pdst; 1249 part = RTE_MIN(part, inlen - tlen); 1250 MLX5_ASSERT(part); 1251 do { 1252 unsigned int copy; 1253 1254 /* 1255 * Copying may be interrupted inside the routine 1256 * if run into no inline hint flag. 1257 */ 1258 copy = tso ? inlen : txq->inlen_mode; 1259 copy = tlen >= copy ? 0 : (copy - tlen); 1260 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1261 tlen += copy; 1262 if (likely(inlen <= tlen) || copy < part) { 1263 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1264 pdst += copy; 1265 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1266 return (struct mlx5_wqe_dseg *)pdst; 1267 } 1268 pdst = (uint8_t *)txq->wqes; 1269 part = inlen - tlen; 1270 } while (true); 1271 } 1272 1273 /** 1274 * Build the Data Segment of pointer type. 1275 * 1276 * @param txq 1277 * Pointer to TX queue structure. 1278 * @param loc 1279 * Pointer to burst routine local context. 1280 * @param dseg 1281 * Pointer to WQE to fill with built Data Segment. 1282 * @param buf 1283 * Data buffer to point. 1284 * @param len 1285 * Data buffer length. 1286 * @param olx 1287 * Configured Tx offloads mask. It is fully defined at 1288 * compile time and may be used for optimization. 1289 */ 1290 static __rte_always_inline void 1291 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1292 struct mlx5_txq_local *__rte_restrict loc, 1293 struct mlx5_wqe_dseg *__rte_restrict dseg, 1294 uint8_t *buf, 1295 unsigned int len, 1296 unsigned int olx __rte_unused) 1297 1298 { 1299 MLX5_ASSERT(len); 1300 dseg->bcount = rte_cpu_to_be_32(len); 1301 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1302 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1303 } 1304 1305 /** 1306 * Build the Data Segment of pointer type or inline if data length is less than 1307 * buffer in minimal Data Segment size. 1308 * 1309 * @param txq 1310 * Pointer to TX queue structure. 1311 * @param loc 1312 * Pointer to burst routine local context. 1313 * @param dseg 1314 * Pointer to WQE to fill with built Data Segment. 1315 * @param buf 1316 * Data buffer to point. 1317 * @param len 1318 * Data buffer length. 1319 * @param olx 1320 * Configured Tx offloads mask. It is fully defined at 1321 * compile time and may be used for optimization. 1322 */ 1323 static __rte_always_inline void 1324 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1325 struct mlx5_txq_local *__rte_restrict loc, 1326 struct mlx5_wqe_dseg *__rte_restrict dseg, 1327 uint8_t *buf, 1328 unsigned int len, 1329 unsigned int olx __rte_unused) 1330 1331 { 1332 uintptr_t dst, src; 1333 1334 MLX5_ASSERT(len); 1335 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1336 dseg->bcount = rte_cpu_to_be_32(len); 1337 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1338 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1339 1340 return; 1341 } 1342 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1343 /* Unrolled implementation of generic rte_memcpy. */ 1344 dst = (uintptr_t)&dseg->inline_data[0]; 1345 src = (uintptr_t)buf; 1346 if (len & 0x08) { 1347 #ifdef RTE_ARCH_STRICT_ALIGN 1348 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1349 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1350 dst += sizeof(uint32_t); 1351 src += sizeof(uint32_t); 1352 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1353 dst += sizeof(uint32_t); 1354 src += sizeof(uint32_t); 1355 #else 1356 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1357 dst += sizeof(uint64_t); 1358 src += sizeof(uint64_t); 1359 #endif 1360 } 1361 if (len & 0x04) { 1362 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1363 dst += sizeof(uint32_t); 1364 src += sizeof(uint32_t); 1365 } 1366 if (len & 0x02) { 1367 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1368 dst += sizeof(uint16_t); 1369 src += sizeof(uint16_t); 1370 } 1371 if (len & 0x01) 1372 *(uint8_t *)dst = *(uint8_t *)src; 1373 } 1374 1375 /** 1376 * Build the Data Segment of inlined data from single 1377 * segment packet, no VLAN insertion. 1378 * 1379 * @param txq 1380 * Pointer to TX queue structure. 1381 * @param loc 1382 * Pointer to burst routine local context. 1383 * @param dseg 1384 * Pointer to WQE to fill with built Data Segment. 1385 * @param buf 1386 * Data buffer to point. 1387 * @param len 1388 * Data buffer length. 1389 * @param olx 1390 * Configured Tx offloads mask. It is fully defined at 1391 * compile time and may be used for optimization. 1392 * 1393 * @return 1394 * Pointer to the next Data Segment after inlined data. 1395 * Ring buffer wraparound check is needed. We do not do it here because it 1396 * may not be needed for the last packet in the eMPW session. 1397 */ 1398 static __rte_always_inline struct mlx5_wqe_dseg * 1399 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1400 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1401 struct mlx5_wqe_dseg *__rte_restrict dseg, 1402 uint8_t *buf, 1403 unsigned int len, 1404 unsigned int olx __rte_unused) 1405 { 1406 unsigned int part; 1407 uint8_t *pdst; 1408 1409 if (!MLX5_TXOFF_CONFIG(MPW)) { 1410 /* Store the descriptor byte counter for eMPW sessions. */ 1411 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1412 pdst = &dseg->inline_data[0]; 1413 } else { 1414 /* The entire legacy MPW session counter is stored on close. */ 1415 pdst = (uint8_t *)dseg; 1416 } 1417 /* 1418 * The WQEBB space availability is checked by caller. 1419 * Here we should be aware of WQE ring buffer wraparound only. 1420 */ 1421 part = (uint8_t *)txq->wqes_end - pdst; 1422 part = RTE_MIN(part, len); 1423 do { 1424 rte_memcpy(pdst, buf, part); 1425 len -= part; 1426 if (likely(!len)) { 1427 pdst += part; 1428 if (!MLX5_TXOFF_CONFIG(MPW)) 1429 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1430 /* Note: no final wraparound check here. */ 1431 return (struct mlx5_wqe_dseg *)pdst; 1432 } 1433 pdst = (uint8_t *)txq->wqes; 1434 buf += part; 1435 part = len; 1436 } while (true); 1437 } 1438 1439 /** 1440 * Build the Data Segment of inlined data from single 1441 * segment packet with VLAN insertion. 1442 * 1443 * @param txq 1444 * Pointer to TX queue structure. 1445 * @param loc 1446 * Pointer to burst routine local context. 1447 * @param dseg 1448 * Pointer to the dseg fill with built Data Segment. 1449 * @param buf 1450 * Data buffer to point. 1451 * @param len 1452 * Data buffer length. 1453 * @param olx 1454 * Configured Tx offloads mask. It is fully defined at 1455 * compile time and may be used for optimization. 1456 * 1457 * @return 1458 * Pointer to the next Data Segment after inlined data. 1459 * Ring buffer wraparound check is needed. 1460 */ 1461 static __rte_always_inline struct mlx5_wqe_dseg * 1462 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1463 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1464 struct mlx5_wqe_dseg *__rte_restrict dseg, 1465 uint8_t *buf, 1466 unsigned int len, 1467 unsigned int olx __rte_unused) 1468 1469 { 1470 unsigned int part; 1471 uint8_t *pdst; 1472 1473 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1474 if (!MLX5_TXOFF_CONFIG(MPW)) { 1475 /* Store the descriptor byte counter for eMPW sessions. */ 1476 dseg->bcount = rte_cpu_to_be_32 1477 ((len + sizeof(struct rte_vlan_hdr)) | 1478 MLX5_ETH_WQE_DATA_INLINE); 1479 pdst = &dseg->inline_data[0]; 1480 } else { 1481 /* The entire legacy MPW session counter is stored on close. */ 1482 pdst = (uint8_t *)dseg; 1483 } 1484 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1485 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1486 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1487 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1488 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1489 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1490 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1491 pdst = (uint8_t *)txq->wqes; 1492 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1493 loc->mbuf->vlan_tci); 1494 pdst += sizeof(struct rte_vlan_hdr); 1495 /* 1496 * The WQEBB space availability is checked by caller. 1497 * Here we should be aware of WQE ring buffer wraparound only. 1498 */ 1499 part = (uint8_t *)txq->wqes_end - pdst; 1500 part = RTE_MIN(part, len); 1501 do { 1502 rte_memcpy(pdst, buf, part); 1503 len -= part; 1504 if (likely(!len)) { 1505 pdst += part; 1506 if (!MLX5_TXOFF_CONFIG(MPW)) 1507 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1508 /* Note: no final wraparound check here. */ 1509 return (struct mlx5_wqe_dseg *)pdst; 1510 } 1511 pdst = (uint8_t *)txq->wqes; 1512 buf += part; 1513 part = len; 1514 } while (true); 1515 } 1516 1517 /** 1518 * Build the Ethernet Segment with optionally inlined data with 1519 * VLAN insertion and following Data Segments (if any) from 1520 * multi-segment packet. Used by ordinary send and TSO. 1521 * 1522 * @param txq 1523 * Pointer to TX queue structure. 1524 * @param loc 1525 * Pointer to burst routine local context. 1526 * @param wqe 1527 * Pointer to WQE to fill with built Ethernet/Data Segments. 1528 * @param vlan 1529 * Length of VLAN header to insert, 0 means no VLAN insertion. 1530 * @param inlen 1531 * Data length to inline. For TSO this parameter specifies exact value, 1532 * for ordinary send routine can be aligned by caller to provide better WQE 1533 * space saving and data buffer start address alignment. 1534 * This length includes VLAN header being inserted. 1535 * @param tso 1536 * Zero means ordinary send, inlined data can be extended, 1537 * otherwise this is TSO, inlined data length is fixed. 1538 * @param olx 1539 * Configured Tx offloads mask. It is fully defined at 1540 * compile time and may be used for optimization. 1541 * 1542 * @return 1543 * Actual size of built WQE in segments. 1544 */ 1545 static __rte_always_inline unsigned int 1546 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1547 struct mlx5_txq_local *__rte_restrict loc, 1548 struct mlx5_wqe *__rte_restrict wqe, 1549 unsigned int vlan, 1550 unsigned int inlen, 1551 unsigned int tso, 1552 unsigned int olx __rte_unused) 1553 { 1554 struct mlx5_wqe_dseg *__rte_restrict dseg; 1555 unsigned int ds; 1556 1557 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1558 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1559 loc->mbuf_off = 0; 1560 1561 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1562 if (!loc->mbuf_nseg) 1563 goto dseg_done; 1564 /* 1565 * There are still some mbuf remaining, not inlined. 1566 * The first mbuf may be partially inlined and we 1567 * must process the possible non-zero data offset. 1568 */ 1569 if (loc->mbuf_off) { 1570 unsigned int dlen; 1571 uint8_t *dptr; 1572 1573 /* 1574 * Exhausted packets must be dropped before. 1575 * Non-zero offset means there are some data 1576 * remained in the packet. 1577 */ 1578 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1579 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1580 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1581 loc->mbuf_off); 1582 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1583 /* 1584 * Build the pointer/minimal Data Segment. 1585 * Do ring buffer wrapping check in advance. 1586 */ 1587 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1588 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1589 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1590 /* Store the mbuf to be freed on completion. */ 1591 MLX5_ASSERT(loc->elts_free); 1592 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1593 --loc->elts_free; 1594 ++dseg; 1595 if (--loc->mbuf_nseg == 0) 1596 goto dseg_done; 1597 loc->mbuf = loc->mbuf->next; 1598 loc->mbuf_off = 0; 1599 } 1600 do { 1601 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1602 struct rte_mbuf *mbuf; 1603 1604 /* Zero length segment found, just skip. */ 1605 mbuf = loc->mbuf; 1606 loc->mbuf = loc->mbuf->next; 1607 rte_pktmbuf_free_seg(mbuf); 1608 if (--loc->mbuf_nseg == 0) 1609 break; 1610 } else { 1611 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1612 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1613 mlx5_tx_dseg_iptr 1614 (txq, loc, dseg, 1615 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1616 rte_pktmbuf_data_len(loc->mbuf), olx); 1617 MLX5_ASSERT(loc->elts_free); 1618 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1619 --loc->elts_free; 1620 ++dseg; 1621 if (--loc->mbuf_nseg == 0) 1622 break; 1623 loc->mbuf = loc->mbuf->next; 1624 } 1625 } while (true); 1626 1627 dseg_done: 1628 /* Calculate actual segments used from the dseg pointer. */ 1629 if ((uintptr_t)wqe < (uintptr_t)dseg) 1630 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1631 else 1632 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1633 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1634 return ds; 1635 } 1636 1637 /** 1638 * The routine checks timestamp flag in the current packet, 1639 * and push WAIT WQE into the queue if scheduling is required. 1640 * 1641 * @param txq 1642 * Pointer to TX queue structure. 1643 * @param loc 1644 * Pointer to burst routine local context. 1645 * @param olx 1646 * Configured Tx offloads mask. It is fully defined at 1647 * compile time and may be used for optimization. 1648 * 1649 * @return 1650 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1651 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1652 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1653 * Local context variables partially updated. 1654 */ 1655 static __rte_always_inline enum mlx5_txcmp_code 1656 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1657 struct mlx5_txq_local *restrict loc, 1658 unsigned int olx) 1659 { 1660 if (MLX5_TXOFF_CONFIG(TXPP) && 1661 loc->mbuf->ol_flags & txq->ts_mask) { 1662 struct mlx5_dev_ctx_shared *sh; 1663 struct mlx5_wqe *wqe; 1664 uint64_t ts; 1665 1666 /* 1667 * Estimate the required space quickly and roughly. 1668 * We would like to ensure the packet can be pushed 1669 * to the queue and we won't get the orphan WAIT WQE. 1670 */ 1671 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1672 loc->elts_free < NB_SEGS(loc->mbuf)) 1673 return MLX5_TXCMP_CODE_EXIT; 1674 /* Convert the timestamp into completion to wait. */ 1675 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1676 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1677 sh = txq->sh; 1678 if (txq->wait_on_time) { 1679 /* The wait on time capability should be used. */ 1680 ts -= sh->txpp.skew; 1681 mlx5_tx_cseg_init(txq, loc, wqe, 1682 1 + sizeof(struct mlx5_wqe_wseg) / 1683 MLX5_WSEG_SIZE, 1684 MLX5_OPCODE_WAIT | 1685 MLX5_OPC_MOD_WAIT_TIME << 24, olx); 1686 mlx5_tx_wseg_init(txq, loc, wqe, ts, olx); 1687 } else { 1688 /* Legacy cross-channel operation should be used. */ 1689 int32_t wci; 1690 1691 wci = mlx5_txpp_convert_tx_ts(sh, ts); 1692 if (unlikely(wci < 0)) 1693 return MLX5_TXCMP_CODE_SINGLE; 1694 /* Build the WAIT WQE with specified completion. */ 1695 mlx5_tx_cseg_init(txq, loc, wqe, 1696 1 + sizeof(struct mlx5_wqe_qseg) / 1697 MLX5_WSEG_SIZE, 1698 MLX5_OPCODE_WAIT | 1699 MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx); 1700 mlx5_tx_qseg_init(txq, loc, wqe, wci, olx); 1701 } 1702 ++txq->wqe_ci; 1703 --loc->wqe_free; 1704 return MLX5_TXCMP_CODE_MULTI; 1705 } 1706 return MLX5_TXCMP_CODE_SINGLE; 1707 } 1708 1709 /** 1710 * Tx one packet function for multi-segment TSO. Supports all 1711 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1712 * sends one packet per WQE. 1713 * 1714 * This routine is responsible for storing processed mbuf 1715 * into elts ring buffer and update elts_head. 1716 * 1717 * @param txq 1718 * Pointer to TX queue structure. 1719 * @param loc 1720 * Pointer to burst routine local context. 1721 * @param olx 1722 * Configured Tx offloads mask. It is fully defined at 1723 * compile time and may be used for optimization. 1724 * 1725 * @return 1726 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1727 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1728 * Local context variables partially updated. 1729 */ 1730 static __rte_always_inline enum mlx5_txcmp_code 1731 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1732 struct mlx5_txq_local *__rte_restrict loc, 1733 unsigned int olx) 1734 { 1735 struct mlx5_wqe *__rte_restrict wqe; 1736 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1737 1738 if (MLX5_TXOFF_CONFIG(TXPP)) { 1739 enum mlx5_txcmp_code wret; 1740 1741 /* Generate WAIT for scheduling if requested. */ 1742 wret = mlx5_tx_schedule_send(txq, loc, olx); 1743 if (wret == MLX5_TXCMP_CODE_EXIT) 1744 return MLX5_TXCMP_CODE_EXIT; 1745 if (wret == MLX5_TXCMP_CODE_ERROR) 1746 return MLX5_TXCMP_CODE_ERROR; 1747 } 1748 /* 1749 * Calculate data length to be inlined to estimate 1750 * the required space in WQE ring buffer. 1751 */ 1752 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1753 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1754 vlan = sizeof(struct rte_vlan_hdr); 1755 inlen = loc->mbuf->l2_len + vlan + 1756 loc->mbuf->l3_len + loc->mbuf->l4_len; 1757 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1758 return MLX5_TXCMP_CODE_ERROR; 1759 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1760 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1761 /* Packet must contain all TSO headers. */ 1762 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1763 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1764 inlen > (dlen + vlan))) 1765 return MLX5_TXCMP_CODE_ERROR; 1766 /* 1767 * Check whether there are enough free WQEBBs: 1768 * - Control Segment 1769 * - Ethernet Segment 1770 * - First Segment of inlined Ethernet data 1771 * - ... data continued ... 1772 * - Data Segments of pointer/min inline type 1773 */ 1774 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1775 MLX5_ESEG_MIN_INLINE_SIZE + 1776 MLX5_WSEG_SIZE + 1777 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1778 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1779 return MLX5_TXCMP_CODE_EXIT; 1780 /* Check for maximal WQE size. */ 1781 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1782 return MLX5_TXCMP_CODE_ERROR; 1783 #ifdef MLX5_PMD_SOFT_COUNTERS 1784 /* Update sent data bytes/packets counters. */ 1785 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1786 loc->mbuf->tso_segsz; 1787 /* 1788 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1789 * from loc->pkts_sent field. 1790 */ 1791 --ntcp; 1792 txq->stats.opackets += ntcp; 1793 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1794 #endif 1795 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1796 loc->wqe_last = wqe; 1797 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1798 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1799 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1800 txq->wqe_ci += (ds + 3) / 4; 1801 loc->wqe_free -= (ds + 3) / 4; 1802 return MLX5_TXCMP_CODE_MULTI; 1803 } 1804 1805 /** 1806 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1807 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1808 * without any data inlining in Ethernet Segment. 1809 * 1810 * This routine is responsible for storing processed mbuf 1811 * into elts ring buffer and update elts_head. 1812 * 1813 * @param txq 1814 * Pointer to TX queue structure. 1815 * @param loc 1816 * Pointer to burst routine local context. 1817 * @param olx 1818 * Configured Tx offloads mask. It is fully defined at 1819 * compile time and may be used for optimization. 1820 * 1821 * @return 1822 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1823 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1824 * Local context variables partially updated. 1825 */ 1826 static __rte_always_inline enum mlx5_txcmp_code 1827 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1828 struct mlx5_txq_local *__rte_restrict loc, 1829 unsigned int olx) 1830 { 1831 struct mlx5_wqe_dseg *__rte_restrict dseg; 1832 struct mlx5_wqe *__rte_restrict wqe; 1833 unsigned int ds, nseg; 1834 1835 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1836 if (MLX5_TXOFF_CONFIG(TXPP)) { 1837 enum mlx5_txcmp_code wret; 1838 1839 /* Generate WAIT for scheduling if requested. */ 1840 wret = mlx5_tx_schedule_send(txq, loc, olx); 1841 if (wret == MLX5_TXCMP_CODE_EXIT) 1842 return MLX5_TXCMP_CODE_EXIT; 1843 if (wret == MLX5_TXCMP_CODE_ERROR) 1844 return MLX5_TXCMP_CODE_ERROR; 1845 } 1846 /* 1847 * No inline at all, it means the CPU cycles saving is prioritized at 1848 * configuration, we should not copy any packet data to WQE. 1849 */ 1850 nseg = NB_SEGS(loc->mbuf); 1851 ds = 2 + nseg; 1852 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1853 return MLX5_TXCMP_CODE_EXIT; 1854 /* Check for maximal WQE size. */ 1855 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1856 return MLX5_TXCMP_CODE_ERROR; 1857 /* 1858 * Some Tx offloads may cause an error if packet is not long enough, 1859 * check against assumed minimal length. 1860 */ 1861 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1862 return MLX5_TXCMP_CODE_ERROR; 1863 #ifdef MLX5_PMD_SOFT_COUNTERS 1864 /* Update sent data bytes counter. */ 1865 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1866 if (MLX5_TXOFF_CONFIG(VLAN) && 1867 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1868 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1869 #endif 1870 /* 1871 * SEND WQE, one WQEBB: 1872 * - Control Segment, SEND opcode 1873 * - Ethernet Segment, optional VLAN, no inline 1874 * - Data Segments, pointer only type 1875 */ 1876 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1877 loc->wqe_last = wqe; 1878 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1879 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1880 dseg = &wqe->dseg[0]; 1881 do { 1882 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1883 struct rte_mbuf *mbuf; 1884 1885 /* 1886 * Zero length segment found, have to correct total 1887 * size of WQE in segments. 1888 * It is supposed to be rare occasion, so in normal 1889 * case (no zero length segments) we avoid extra 1890 * writing to the Control Segment. 1891 */ 1892 --ds; 1893 wqe->cseg.sq_ds -= RTE_BE32(1); 1894 mbuf = loc->mbuf; 1895 loc->mbuf = mbuf->next; 1896 rte_pktmbuf_free_seg(mbuf); 1897 if (--nseg == 0) 1898 break; 1899 } else { 1900 mlx5_tx_dseg_ptr 1901 (txq, loc, dseg, 1902 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1903 rte_pktmbuf_data_len(loc->mbuf), olx); 1904 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1905 --loc->elts_free; 1906 if (--nseg == 0) 1907 break; 1908 ++dseg; 1909 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1910 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1911 loc->mbuf = loc->mbuf->next; 1912 } 1913 } while (true); 1914 txq->wqe_ci += (ds + 3) / 4; 1915 loc->wqe_free -= (ds + 3) / 4; 1916 return MLX5_TXCMP_CODE_MULTI; 1917 } 1918 1919 /** 1920 * Tx one packet function for multi-segment SEND. Supports all 1921 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1922 * sends one packet per WQE, with data inlining in 1923 * Ethernet Segment and minimal Data Segments. 1924 * 1925 * This routine is responsible for storing processed mbuf 1926 * into elts ring buffer and update elts_head. 1927 * 1928 * @param txq 1929 * Pointer to TX queue structure. 1930 * @param loc 1931 * Pointer to burst routine local context. 1932 * @param olx 1933 * Configured Tx offloads mask. It is fully defined at 1934 * compile time and may be used for optimization. 1935 * 1936 * @return 1937 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1938 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1939 * Local context variables partially updated. 1940 */ 1941 static __rte_always_inline enum mlx5_txcmp_code 1942 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 1943 struct mlx5_txq_local *__rte_restrict loc, 1944 unsigned int olx) 1945 { 1946 struct mlx5_wqe *__rte_restrict wqe; 1947 unsigned int ds, inlen, dlen, vlan = 0; 1948 1949 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 1950 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1951 if (MLX5_TXOFF_CONFIG(TXPP)) { 1952 enum mlx5_txcmp_code wret; 1953 1954 /* Generate WAIT for scheduling if requested. */ 1955 wret = mlx5_tx_schedule_send(txq, loc, olx); 1956 if (wret == MLX5_TXCMP_CODE_EXIT) 1957 return MLX5_TXCMP_CODE_EXIT; 1958 if (wret == MLX5_TXCMP_CODE_ERROR) 1959 return MLX5_TXCMP_CODE_ERROR; 1960 } 1961 /* 1962 * First calculate data length to be inlined 1963 * to estimate the required space for WQE. 1964 */ 1965 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1966 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1967 vlan = sizeof(struct rte_vlan_hdr); 1968 inlen = dlen + vlan; 1969 /* Check against minimal length. */ 1970 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 1971 return MLX5_TXCMP_CODE_ERROR; 1972 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1973 if (inlen > txq->inlen_send || 1974 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1975 struct rte_mbuf *mbuf; 1976 unsigned int nxlen; 1977 uintptr_t start; 1978 1979 mbuf = loc->mbuf; 1980 nxlen = rte_pktmbuf_data_len(mbuf); 1981 /* 1982 * Packet length exceeds the allowed inline data length, 1983 * check whether the minimal inlining is required. 1984 */ 1985 if (txq->inlen_mode) { 1986 MLX5_ASSERT(txq->inlen_mode >= 1987 MLX5_ESEG_MIN_INLINE_SIZE); 1988 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 1989 inlen = RTE_MIN(txq->inlen_mode, inlen); 1990 } else if (vlan && !txq->vlan_en) { 1991 /* 1992 * VLAN insertion is requested and hardware does not 1993 * support the offload, will do with software inline. 1994 */ 1995 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 1996 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 1997 nxlen > txq->inlen_send) { 1998 return mlx5_tx_packet_multi_send(txq, loc, olx); 1999 } else { 2000 goto do_first; 2001 } 2002 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2003 goto do_build; 2004 /* 2005 * Now we know the minimal amount of data is requested 2006 * to inline. Check whether we should inline the buffers 2007 * from the chain beginning to eliminate some mbufs. 2008 */ 2009 if (unlikely(nxlen <= txq->inlen_send)) { 2010 /* We can inline first mbuf at least. */ 2011 if (nxlen < inlen) { 2012 unsigned int smlen; 2013 2014 /* Scan mbufs till inlen filled. */ 2015 do { 2016 smlen = nxlen; 2017 mbuf = NEXT(mbuf); 2018 MLX5_ASSERT(mbuf); 2019 nxlen = rte_pktmbuf_data_len(mbuf); 2020 nxlen += smlen; 2021 } while (unlikely(nxlen < inlen)); 2022 if (unlikely(nxlen > txq->inlen_send)) { 2023 /* We cannot inline entire mbuf. */ 2024 smlen = inlen - smlen; 2025 start = rte_pktmbuf_mtod_offset 2026 (mbuf, uintptr_t, smlen); 2027 goto do_align; 2028 } 2029 } 2030 do_first: 2031 do { 2032 inlen = nxlen; 2033 mbuf = NEXT(mbuf); 2034 /* There should be not end of packet. */ 2035 MLX5_ASSERT(mbuf); 2036 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2037 break; 2038 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 2039 } while (unlikely(nxlen < txq->inlen_send)); 2040 } 2041 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 2042 /* 2043 * Check whether we can do inline to align start 2044 * address of data buffer to cacheline. 2045 */ 2046 do_align: 2047 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 2048 if (unlikely(start)) { 2049 start += inlen; 2050 if (start <= txq->inlen_send) 2051 inlen = start; 2052 } 2053 } 2054 /* 2055 * Check whether there are enough free WQEBBs: 2056 * - Control Segment 2057 * - Ethernet Segment 2058 * - First Segment of inlined Ethernet data 2059 * - ... data continued ... 2060 * - Data Segments of pointer/min inline type 2061 * 2062 * Estimate the number of Data Segments conservatively, 2063 * supposing no any mbufs is being freed during inlining. 2064 */ 2065 do_build: 2066 MLX5_ASSERT(inlen <= txq->inlen_send); 2067 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2068 MLX5_ESEG_MIN_INLINE_SIZE + 2069 MLX5_WSEG_SIZE + 2070 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2071 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2072 return MLX5_TXCMP_CODE_EXIT; 2073 /* Check for maximal WQE size. */ 2074 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 2075 return MLX5_TXCMP_CODE_ERROR; 2076 #ifdef MLX5_PMD_SOFT_COUNTERS 2077 /* Update sent data bytes/packets counters. */ 2078 txq->stats.obytes += dlen + vlan; 2079 #endif 2080 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2081 loc->wqe_last = wqe; 2082 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2083 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2084 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2085 txq->wqe_ci += (ds + 3) / 4; 2086 loc->wqe_free -= (ds + 3) / 4; 2087 return MLX5_TXCMP_CODE_MULTI; 2088 } 2089 2090 /** 2091 * Tx burst function for multi-segment packets. Supports all 2092 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2093 * sends one packet per WQE. Function stops sending if it 2094 * encounters the single-segment packet. 2095 * 2096 * This routine is responsible for storing processed mbuf 2097 * into elts ring buffer and update elts_head. 2098 * 2099 * @param txq 2100 * Pointer to TX queue structure. 2101 * @param[in] pkts 2102 * Packets to transmit. 2103 * @param pkts_n 2104 * Number of packets in array. 2105 * @param loc 2106 * Pointer to burst routine local context. 2107 * @param olx 2108 * Configured Tx offloads mask. It is fully defined at 2109 * compile time and may be used for optimization. 2110 * 2111 * @return 2112 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2113 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2114 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2115 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2116 * Local context variables updated. 2117 */ 2118 static __rte_always_inline enum mlx5_txcmp_code 2119 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2120 struct rte_mbuf **__rte_restrict pkts, 2121 unsigned int pkts_n, 2122 struct mlx5_txq_local *__rte_restrict loc, 2123 unsigned int olx) 2124 { 2125 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2126 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2127 pkts += loc->pkts_sent + 1; 2128 pkts_n -= loc->pkts_sent; 2129 for (;;) { 2130 enum mlx5_txcmp_code ret; 2131 2132 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2133 /* 2134 * Estimate the number of free elts quickly but conservatively. 2135 * Some segment may be fully inlined and freed, 2136 * ignore this here - precise estimation is costly. 2137 */ 2138 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2139 return MLX5_TXCMP_CODE_EXIT; 2140 if (MLX5_TXOFF_CONFIG(TSO) && 2141 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2142 /* Proceed with multi-segment TSO. */ 2143 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2144 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2145 /* Proceed with multi-segment SEND with inlining. */ 2146 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2147 } else { 2148 /* Proceed with multi-segment SEND w/o inlining. */ 2149 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2150 } 2151 if (ret == MLX5_TXCMP_CODE_EXIT) 2152 return MLX5_TXCMP_CODE_EXIT; 2153 if (ret == MLX5_TXCMP_CODE_ERROR) 2154 return MLX5_TXCMP_CODE_ERROR; 2155 /* WQE is built, go to the next packet. */ 2156 ++loc->pkts_sent; 2157 --pkts_n; 2158 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2159 return MLX5_TXCMP_CODE_EXIT; 2160 loc->mbuf = *pkts++; 2161 if (pkts_n > 1) 2162 rte_prefetch0(*pkts); 2163 if (likely(NB_SEGS(loc->mbuf) > 1)) 2164 continue; 2165 /* Here ends the series of multi-segment packets. */ 2166 if (MLX5_TXOFF_CONFIG(TSO) && 2167 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2168 return MLX5_TXCMP_CODE_TSO; 2169 return MLX5_TXCMP_CODE_SINGLE; 2170 } 2171 MLX5_ASSERT(false); 2172 } 2173 2174 /** 2175 * Tx burst function for single-segment packets with TSO. 2176 * Supports all types of Tx offloads, except multi-packets. 2177 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2178 * Function stops sending if it encounters the multi-segment 2179 * packet or packet without TSO requested. 2180 * 2181 * The routine is responsible for storing processed mbuf into elts ring buffer 2182 * and update elts_head if inline offloads is requested due to possible early 2183 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2184 * 2185 * @param txq 2186 * Pointer to TX queue structure. 2187 * @param[in] pkts 2188 * Packets to transmit. 2189 * @param pkts_n 2190 * Number of packets in array. 2191 * @param loc 2192 * Pointer to burst routine local context. 2193 * @param olx 2194 * Configured Tx offloads mask. It is fully defined at 2195 * compile time and may be used for optimization. 2196 * 2197 * @return 2198 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2199 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2200 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2201 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2202 * Local context variables updated. 2203 */ 2204 static __rte_always_inline enum mlx5_txcmp_code 2205 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2206 struct rte_mbuf **__rte_restrict pkts, 2207 unsigned int pkts_n, 2208 struct mlx5_txq_local *__rte_restrict loc, 2209 unsigned int olx) 2210 { 2211 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2212 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2213 pkts += loc->pkts_sent + 1; 2214 pkts_n -= loc->pkts_sent; 2215 for (;;) { 2216 struct mlx5_wqe_dseg *__rte_restrict dseg; 2217 struct mlx5_wqe *__rte_restrict wqe; 2218 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2219 uint8_t *dptr; 2220 2221 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2222 if (MLX5_TXOFF_CONFIG(TXPP)) { 2223 enum mlx5_txcmp_code wret; 2224 2225 /* Generate WAIT for scheduling if requested. */ 2226 wret = mlx5_tx_schedule_send(txq, loc, olx); 2227 if (wret == MLX5_TXCMP_CODE_EXIT) 2228 return MLX5_TXCMP_CODE_EXIT; 2229 if (wret == MLX5_TXCMP_CODE_ERROR) 2230 return MLX5_TXCMP_CODE_ERROR; 2231 } 2232 dlen = rte_pktmbuf_data_len(loc->mbuf); 2233 if (MLX5_TXOFF_CONFIG(VLAN) && 2234 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2235 vlan = sizeof(struct rte_vlan_hdr); 2236 } 2237 /* 2238 * First calculate the WQE size to check 2239 * whether we have enough space in ring buffer. 2240 */ 2241 hlen = loc->mbuf->l2_len + vlan + 2242 loc->mbuf->l3_len + loc->mbuf->l4_len; 2243 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2244 return MLX5_TXCMP_CODE_ERROR; 2245 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2246 hlen += loc->mbuf->outer_l2_len + 2247 loc->mbuf->outer_l3_len; 2248 /* Segment must contain all TSO headers. */ 2249 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2250 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2251 hlen > (dlen + vlan))) 2252 return MLX5_TXCMP_CODE_ERROR; 2253 /* 2254 * Check whether there are enough free WQEBBs: 2255 * - Control Segment 2256 * - Ethernet Segment 2257 * - First Segment of inlined Ethernet data 2258 * - ... data continued ... 2259 * - Finishing Data Segment of pointer type 2260 */ 2261 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2262 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2263 if (loc->wqe_free < ((ds + 3) / 4)) 2264 return MLX5_TXCMP_CODE_EXIT; 2265 #ifdef MLX5_PMD_SOFT_COUNTERS 2266 /* Update sent data bytes/packets counters. */ 2267 ntcp = (dlen + vlan - hlen + 2268 loc->mbuf->tso_segsz - 1) / 2269 loc->mbuf->tso_segsz; 2270 /* 2271 * One will be added for mbuf itself at the end 2272 * of the mlx5_tx_burst from loc->pkts_sent field. 2273 */ 2274 --ntcp; 2275 txq->stats.opackets += ntcp; 2276 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2277 #endif 2278 /* 2279 * Build the TSO WQE: 2280 * - Control Segment 2281 * - Ethernet Segment with hlen bytes inlined 2282 * - Data Segment of pointer type 2283 */ 2284 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2285 loc->wqe_last = wqe; 2286 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2287 MLX5_OPCODE_TSO, olx); 2288 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2289 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2290 dlen -= hlen - vlan; 2291 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2292 /* 2293 * WQE is built, update the loop parameters 2294 * and go to the next packet. 2295 */ 2296 txq->wqe_ci += (ds + 3) / 4; 2297 loc->wqe_free -= (ds + 3) / 4; 2298 if (MLX5_TXOFF_CONFIG(INLINE)) 2299 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2300 --loc->elts_free; 2301 ++loc->pkts_sent; 2302 --pkts_n; 2303 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2304 return MLX5_TXCMP_CODE_EXIT; 2305 loc->mbuf = *pkts++; 2306 if (pkts_n > 1) 2307 rte_prefetch0(*pkts); 2308 if (MLX5_TXOFF_CONFIG(MULTI) && 2309 unlikely(NB_SEGS(loc->mbuf) > 1)) 2310 return MLX5_TXCMP_CODE_MULTI; 2311 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2312 return MLX5_TXCMP_CODE_SINGLE; 2313 /* Continue with the next TSO packet. */ 2314 } 2315 MLX5_ASSERT(false); 2316 } 2317 2318 /** 2319 * Analyze the packet and select the best method to send. 2320 * 2321 * @param txq 2322 * Pointer to TX queue structure. 2323 * @param loc 2324 * Pointer to burst routine local context. 2325 * @param olx 2326 * Configured Tx offloads mask. It is fully defined at 2327 * compile time and may be used for optimization. 2328 * @param newp 2329 * The predefined flag whether do complete check for 2330 * multi-segment packets and TSO. 2331 * 2332 * @return 2333 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2334 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2335 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2336 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2337 */ 2338 static __rte_always_inline enum mlx5_txcmp_code 2339 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2340 struct mlx5_txq_local *__rte_restrict loc, 2341 unsigned int olx, 2342 bool newp) 2343 { 2344 /* Check for multi-segment packet. */ 2345 if (newp && 2346 MLX5_TXOFF_CONFIG(MULTI) && 2347 unlikely(NB_SEGS(loc->mbuf) > 1)) 2348 return MLX5_TXCMP_CODE_MULTI; 2349 /* Check for TSO packet. */ 2350 if (newp && 2351 MLX5_TXOFF_CONFIG(TSO) && 2352 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2353 return MLX5_TXCMP_CODE_TSO; 2354 /* Check if eMPW is enabled at all. */ 2355 if (!MLX5_TXOFF_CONFIG(EMPW)) 2356 return MLX5_TXCMP_CODE_SINGLE; 2357 /* Check if eMPW can be engaged. */ 2358 if (MLX5_TXOFF_CONFIG(VLAN) && 2359 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2360 (!MLX5_TXOFF_CONFIG(INLINE) || 2361 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2362 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2363 /* 2364 * eMPW does not support VLAN insertion offload, we have to 2365 * inline the entire packet but packet is too long for inlining. 2366 */ 2367 return MLX5_TXCMP_CODE_SINGLE; 2368 } 2369 return MLX5_TXCMP_CODE_EMPW; 2370 } 2371 2372 /** 2373 * Check the next packet attributes to match with the eMPW batch ones. 2374 * In addition, for legacy MPW the packet length is checked either. 2375 * 2376 * @param txq 2377 * Pointer to TX queue structure. 2378 * @param es 2379 * Pointer to Ethernet Segment of eMPW batch. 2380 * @param loc 2381 * Pointer to burst routine local context. 2382 * @param dlen 2383 * Length of previous packet in MPW descriptor. 2384 * @param olx 2385 * Configured Tx offloads mask. It is fully defined at 2386 * compile time and may be used for optimization. 2387 * 2388 * @return 2389 * true - packet match with eMPW batch attributes. 2390 * false - no match, eMPW should be restarted. 2391 */ 2392 static __rte_always_inline bool 2393 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2394 struct mlx5_wqe_eseg *__rte_restrict es, 2395 struct mlx5_txq_local *__rte_restrict loc, 2396 uint32_t dlen, 2397 unsigned int olx) 2398 { 2399 uint8_t swp_flags = 0; 2400 2401 /* Compare the checksum flags, if any. */ 2402 if (MLX5_TXOFF_CONFIG(CSUM) && 2403 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2404 return false; 2405 /* Compare the Software Parser offsets and flags. */ 2406 if (MLX5_TXOFF_CONFIG(SWP) && 2407 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2408 es->swp_flags != swp_flags)) 2409 return false; 2410 /* Fill metadata field if needed. */ 2411 if (MLX5_TXOFF_CONFIG(METADATA) && 2412 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2413 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2414 return false; 2415 /* Legacy MPW can send packets with the same length only. */ 2416 if (MLX5_TXOFF_CONFIG(MPW) && 2417 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2418 return false; 2419 /* There must be no VLAN packets in eMPW loop. */ 2420 if (MLX5_TXOFF_CONFIG(VLAN)) 2421 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2422 /* Check if the scheduling is requested. */ 2423 if (MLX5_TXOFF_CONFIG(TXPP) && 2424 loc->mbuf->ol_flags & txq->ts_mask) 2425 return false; 2426 return true; 2427 } 2428 2429 /** 2430 * Update send loop variables and WQE for eMPW loop without data inlining. 2431 * Number of Data Segments is equal to the number of sent packets. 2432 * 2433 * @param txq 2434 * Pointer to TX queue structure. 2435 * @param loc 2436 * Pointer to burst routine local context. 2437 * @param ds 2438 * Number of packets/Data Segments/Packets. 2439 * @param slen 2440 * Accumulated statistics, bytes sent. 2441 * @param olx 2442 * Configured Tx offloads mask. It is fully defined at 2443 * compile time and may be used for optimization. 2444 * 2445 * @return 2446 * true - packet match with eMPW batch attributes. 2447 * false - no match, eMPW should be restarted. 2448 */ 2449 static __rte_always_inline void 2450 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2451 struct mlx5_txq_local *__rte_restrict loc, 2452 unsigned int ds, 2453 unsigned int slen, 2454 unsigned int olx __rte_unused) 2455 { 2456 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2457 #ifdef MLX5_PMD_SOFT_COUNTERS 2458 /* Update sent data bytes counter. */ 2459 txq->stats.obytes += slen; 2460 #else 2461 (void)slen; 2462 #endif 2463 loc->elts_free -= ds; 2464 loc->pkts_sent += ds; 2465 ds += 2; 2466 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2467 txq->wqe_ci += (ds + 3) / 4; 2468 loc->wqe_free -= (ds + 3) / 4; 2469 } 2470 2471 /** 2472 * Update send loop variables and WQE for eMPW loop with data inlining. 2473 * Gets the size of pushed descriptors and data to the WQE. 2474 * 2475 * @param txq 2476 * Pointer to TX queue structure. 2477 * @param loc 2478 * Pointer to burst routine local context. 2479 * @param len 2480 * Total size of descriptor/data in bytes. 2481 * @param slen 2482 * Accumulated statistics, data bytes sent. 2483 * @param wqem 2484 * The base WQE for the eMPW/MPW descriptor. 2485 * @param olx 2486 * Configured Tx offloads mask. It is fully defined at 2487 * compile time and may be used for optimization. 2488 * 2489 * @return 2490 * true - packet match with eMPW batch attributes. 2491 * false - no match, eMPW should be restarted. 2492 */ 2493 static __rte_always_inline void 2494 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2495 struct mlx5_txq_local *__rte_restrict loc, 2496 unsigned int len, 2497 unsigned int slen, 2498 struct mlx5_wqe *__rte_restrict wqem, 2499 unsigned int olx __rte_unused) 2500 { 2501 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2502 2503 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2504 #ifdef MLX5_PMD_SOFT_COUNTERS 2505 /* Update sent data bytes counter. */ 2506 txq->stats.obytes += slen; 2507 #else 2508 (void)slen; 2509 #endif 2510 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2511 /* 2512 * If the legacy MPW session contains the inline packets 2513 * we should set the only inline data segment length 2514 * and align the total length to the segment size. 2515 */ 2516 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2517 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2518 MLX5_ETH_WQE_DATA_INLINE); 2519 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2520 } else { 2521 /* 2522 * The session is not legacy MPW or contains the 2523 * data buffer pointer segments. 2524 */ 2525 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2526 len = len / MLX5_WSEG_SIZE + 2; 2527 } 2528 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2529 txq->wqe_ci += (len + 3) / 4; 2530 loc->wqe_free -= (len + 3) / 4; 2531 loc->wqe_last = wqem; 2532 } 2533 2534 /** 2535 * The set of Tx burst functions for single-segment packets without TSO 2536 * and with Multi-Packet Writing feature support. 2537 * Supports all types of Tx offloads, except multi-packets and TSO. 2538 * 2539 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2540 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2541 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2542 * placed in WQE. 2543 * 2544 * Functions stop sending if it encounters the multi-segment packet or packet 2545 * with TSO requested. 2546 * 2547 * The routines are responsible for storing processed mbuf into elts ring buffer 2548 * and update elts_head if inlining offload is requested. Otherwise the copying 2549 * mbufs to elts can be postponed and completed at the end of burst routine. 2550 * 2551 * @param txq 2552 * Pointer to TX queue structure. 2553 * @param[in] pkts 2554 * Packets to transmit. 2555 * @param pkts_n 2556 * Number of packets in array. 2557 * @param loc 2558 * Pointer to burst routine local context. 2559 * @param olx 2560 * Configured Tx offloads mask. It is fully defined at 2561 * compile time and may be used for optimization. 2562 * 2563 * @return 2564 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2565 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2566 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2567 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2568 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2569 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2570 * 2571 * Local context variables updated. 2572 * 2573 * 2574 * The routine sends packets with MLX5_OPCODE_EMPW 2575 * without inlining, this is dedicated optimized branch. 2576 * No VLAN insertion is supported. 2577 */ 2578 static __rte_always_inline enum mlx5_txcmp_code 2579 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2580 struct rte_mbuf **__rte_restrict pkts, 2581 unsigned int pkts_n, 2582 struct mlx5_txq_local *__rte_restrict loc, 2583 unsigned int olx) 2584 { 2585 /* 2586 * Subroutine is the part of mlx5_tx_burst_single() and sends 2587 * single-segment packet with eMPW opcode without data inlining. 2588 */ 2589 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2590 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2591 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2592 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2593 pkts += loc->pkts_sent + 1; 2594 pkts_n -= loc->pkts_sent; 2595 for (;;) { 2596 struct mlx5_wqe_dseg *__rte_restrict dseg; 2597 struct mlx5_wqe_eseg *__rte_restrict eseg; 2598 enum mlx5_txcmp_code ret; 2599 unsigned int part, loop; 2600 unsigned int slen = 0; 2601 2602 next_empw: 2603 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2604 if (MLX5_TXOFF_CONFIG(TXPP)) { 2605 enum mlx5_txcmp_code wret; 2606 2607 /* Generate WAIT for scheduling if requested. */ 2608 wret = mlx5_tx_schedule_send(txq, loc, olx); 2609 if (wret == MLX5_TXCMP_CODE_EXIT) 2610 return MLX5_TXCMP_CODE_EXIT; 2611 if (wret == MLX5_TXCMP_CODE_ERROR) 2612 return MLX5_TXCMP_CODE_ERROR; 2613 } 2614 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2615 MLX5_MPW_MAX_PACKETS : 2616 MLX5_EMPW_MAX_PACKETS); 2617 if (unlikely(loc->elts_free < part)) { 2618 /* We have no enough elts to save all mbufs. */ 2619 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2620 return MLX5_TXCMP_CODE_EXIT; 2621 /* But we still able to send at least minimal eMPW. */ 2622 part = loc->elts_free; 2623 } 2624 /* Check whether we have enough WQEs */ 2625 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2626 if (unlikely(loc->wqe_free < 2627 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2628 return MLX5_TXCMP_CODE_EXIT; 2629 part = (loc->wqe_free * 4) - 2; 2630 } 2631 if (likely(part > 1)) 2632 rte_prefetch0(*pkts); 2633 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2634 /* 2635 * Build eMPW title WQEBB: 2636 * - Control Segment, eMPW opcode 2637 * - Ethernet Segment, no inline 2638 */ 2639 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2640 MLX5_OPCODE_ENHANCED_MPSW, olx); 2641 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2642 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2643 eseg = &loc->wqe_last->eseg; 2644 dseg = &loc->wqe_last->dseg[0]; 2645 loop = part; 2646 /* Store the packet length for legacy MPW. */ 2647 if (MLX5_TXOFF_CONFIG(MPW)) 2648 eseg->mss = rte_cpu_to_be_16 2649 (rte_pktmbuf_data_len(loc->mbuf)); 2650 for (;;) { 2651 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2652 #ifdef MLX5_PMD_SOFT_COUNTERS 2653 /* Update sent data bytes counter. */ 2654 slen += dlen; 2655 #endif 2656 mlx5_tx_dseg_ptr 2657 (txq, loc, dseg, 2658 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2659 dlen, olx); 2660 if (unlikely(--loop == 0)) 2661 break; 2662 loc->mbuf = *pkts++; 2663 if (likely(loop > 1)) 2664 rte_prefetch0(*pkts); 2665 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2666 /* 2667 * Unroll the completion code to avoid 2668 * returning variable value - it results in 2669 * unoptimized sequent checking in caller. 2670 */ 2671 if (ret == MLX5_TXCMP_CODE_MULTI) { 2672 part -= loop; 2673 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2674 if (unlikely(!loc->elts_free || 2675 !loc->wqe_free)) 2676 return MLX5_TXCMP_CODE_EXIT; 2677 return MLX5_TXCMP_CODE_MULTI; 2678 } 2679 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2680 if (ret == MLX5_TXCMP_CODE_TSO) { 2681 part -= loop; 2682 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2683 if (unlikely(!loc->elts_free || 2684 !loc->wqe_free)) 2685 return MLX5_TXCMP_CODE_EXIT; 2686 return MLX5_TXCMP_CODE_TSO; 2687 } 2688 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2689 part -= loop; 2690 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2691 if (unlikely(!loc->elts_free || 2692 !loc->wqe_free)) 2693 return MLX5_TXCMP_CODE_EXIT; 2694 return MLX5_TXCMP_CODE_SINGLE; 2695 } 2696 if (ret != MLX5_TXCMP_CODE_EMPW) { 2697 MLX5_ASSERT(false); 2698 part -= loop; 2699 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2700 return MLX5_TXCMP_CODE_ERROR; 2701 } 2702 /* 2703 * Check whether packet parameters coincide 2704 * within assumed eMPW batch: 2705 * - check sum settings 2706 * - metadata value 2707 * - software parser settings 2708 * - packets length (legacy MPW only) 2709 * - scheduling is not required 2710 */ 2711 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2712 MLX5_ASSERT(loop); 2713 part -= loop; 2714 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2715 if (unlikely(!loc->elts_free || 2716 !loc->wqe_free)) 2717 return MLX5_TXCMP_CODE_EXIT; 2718 pkts_n -= part; 2719 goto next_empw; 2720 } 2721 /* Packet attributes match, continue the same eMPW. */ 2722 ++dseg; 2723 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2724 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2725 } 2726 /* eMPW is built successfully, update loop parameters. */ 2727 MLX5_ASSERT(!loop); 2728 MLX5_ASSERT(pkts_n >= part); 2729 #ifdef MLX5_PMD_SOFT_COUNTERS 2730 /* Update sent data bytes counter. */ 2731 txq->stats.obytes += slen; 2732 #endif 2733 loc->elts_free -= part; 2734 loc->pkts_sent += part; 2735 txq->wqe_ci += (2 + part + 3) / 4; 2736 loc->wqe_free -= (2 + part + 3) / 4; 2737 pkts_n -= part; 2738 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2739 return MLX5_TXCMP_CODE_EXIT; 2740 loc->mbuf = *pkts++; 2741 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2742 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2743 return ret; 2744 /* Continue sending eMPW batches. */ 2745 } 2746 MLX5_ASSERT(false); 2747 } 2748 2749 /** 2750 * The routine sends packets with MLX5_OPCODE_EMPW 2751 * with inlining, optionally supports VLAN insertion. 2752 */ 2753 static __rte_always_inline enum mlx5_txcmp_code 2754 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2755 struct rte_mbuf **__rte_restrict pkts, 2756 unsigned int pkts_n, 2757 struct mlx5_txq_local *__rte_restrict loc, 2758 unsigned int olx) 2759 { 2760 /* 2761 * Subroutine is the part of mlx5_tx_burst_single() and sends 2762 * single-segment packet with eMPW opcode with data inlining. 2763 */ 2764 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2765 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2766 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2767 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2768 pkts += loc->pkts_sent + 1; 2769 pkts_n -= loc->pkts_sent; 2770 for (;;) { 2771 struct mlx5_wqe_dseg *__rte_restrict dseg; 2772 struct mlx5_wqe *__rte_restrict wqem; 2773 enum mlx5_txcmp_code ret; 2774 unsigned int room, part, nlim; 2775 unsigned int slen = 0; 2776 2777 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2778 if (MLX5_TXOFF_CONFIG(TXPP)) { 2779 enum mlx5_txcmp_code wret; 2780 2781 /* Generate WAIT for scheduling if requested. */ 2782 wret = mlx5_tx_schedule_send(txq, loc, olx); 2783 if (wret == MLX5_TXCMP_CODE_EXIT) 2784 return MLX5_TXCMP_CODE_EXIT; 2785 if (wret == MLX5_TXCMP_CODE_ERROR) 2786 return MLX5_TXCMP_CODE_ERROR; 2787 } 2788 /* 2789 * Limits the amount of packets in one WQE 2790 * to improve CQE latency generation. 2791 */ 2792 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2793 MLX5_MPW_INLINE_MAX_PACKETS : 2794 MLX5_EMPW_MAX_PACKETS); 2795 /* Check whether we have minimal amount WQEs */ 2796 if (unlikely(loc->wqe_free < 2797 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2798 return MLX5_TXCMP_CODE_EXIT; 2799 if (likely(pkts_n > 1)) 2800 rte_prefetch0(*pkts); 2801 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2802 /* 2803 * Build eMPW title WQEBB: 2804 * - Control Segment, eMPW opcode, zero DS 2805 * - Ethernet Segment, no inline 2806 */ 2807 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2808 MLX5_OPCODE_ENHANCED_MPSW, olx); 2809 mlx5_tx_eseg_none(txq, loc, wqem, 2810 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2811 dseg = &wqem->dseg[0]; 2812 /* Store the packet length for legacy MPW. */ 2813 if (MLX5_TXOFF_CONFIG(MPW)) 2814 wqem->eseg.mss = rte_cpu_to_be_16 2815 (rte_pktmbuf_data_len(loc->mbuf)); 2816 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2817 loc->wqe_free) * MLX5_WQE_SIZE - 2818 MLX5_WQE_CSEG_SIZE - 2819 MLX5_WQE_ESEG_SIZE; 2820 /* Limit the room for legacy MPW sessions for performance. */ 2821 if (MLX5_TXOFF_CONFIG(MPW)) 2822 room = RTE_MIN(room, 2823 RTE_MAX(txq->inlen_empw + 2824 sizeof(dseg->bcount) + 2825 (MLX5_TXOFF_CONFIG(VLAN) ? 2826 sizeof(struct rte_vlan_hdr) : 0), 2827 MLX5_MPW_INLINE_MAX_PACKETS * 2828 MLX5_WQE_DSEG_SIZE)); 2829 /* Build WQE till we have space, packets and resources. */ 2830 part = room; 2831 for (;;) { 2832 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2833 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2834 unsigned int tlen; 2835 2836 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2837 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2838 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2839 /* 2840 * Some Tx offloads may cause an error if packet is not 2841 * long enough, check against assumed minimal length. 2842 */ 2843 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2844 part -= room; 2845 if (unlikely(!part)) 2846 return MLX5_TXCMP_CODE_ERROR; 2847 /* 2848 * We have some successfully built 2849 * packet Data Segments to send. 2850 */ 2851 mlx5_tx_idone_empw(txq, loc, part, 2852 slen, wqem, olx); 2853 return MLX5_TXCMP_CODE_ERROR; 2854 } 2855 /* Inline or not inline - that's the Question. */ 2856 if (dlen > txq->inlen_empw || 2857 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2858 goto pointer_empw; 2859 if (MLX5_TXOFF_CONFIG(MPW)) { 2860 if (dlen > txq->inlen_send) 2861 goto pointer_empw; 2862 tlen = dlen; 2863 if (part == room) { 2864 /* Open new inline MPW session. */ 2865 tlen += sizeof(dseg->bcount); 2866 dseg->bcount = RTE_BE32(0); 2867 dseg = RTE_PTR_ADD 2868 (dseg, sizeof(dseg->bcount)); 2869 } else { 2870 /* 2871 * No pointer and inline descriptor 2872 * intermix for legacy MPW sessions. 2873 */ 2874 if (wqem->dseg[0].bcount) 2875 break; 2876 } 2877 } else { 2878 tlen = sizeof(dseg->bcount) + dlen; 2879 } 2880 /* Inline entire packet, optional VLAN insertion. */ 2881 if (MLX5_TXOFF_CONFIG(VLAN) && 2882 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2883 /* 2884 * The packet length must be checked in 2885 * mlx5_tx_able_to_empw() and packet 2886 * fits into inline length guaranteed. 2887 */ 2888 MLX5_ASSERT((dlen + 2889 sizeof(struct rte_vlan_hdr)) <= 2890 txq->inlen_empw); 2891 tlen += sizeof(struct rte_vlan_hdr); 2892 if (room < tlen) 2893 break; 2894 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2895 dptr, dlen, olx); 2896 #ifdef MLX5_PMD_SOFT_COUNTERS 2897 /* Update sent data bytes counter. */ 2898 slen += sizeof(struct rte_vlan_hdr); 2899 #endif 2900 } else { 2901 if (room < tlen) 2902 break; 2903 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2904 dptr, dlen, olx); 2905 } 2906 if (!MLX5_TXOFF_CONFIG(MPW)) 2907 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2908 MLX5_ASSERT(room >= tlen); 2909 room -= tlen; 2910 /* 2911 * Packet data are completely inline, 2912 * we can try to free the packet. 2913 */ 2914 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2915 /* 2916 * All the packets from the burst beginning 2917 * are inline, we can free mbufs directly 2918 * from the origin array on tx_burst exit(). 2919 */ 2920 loc->mbuf_free++; 2921 goto next_mbuf; 2922 } 2923 /* 2924 * In order no to call rte_pktmbuf_free_seg() here, 2925 * in the most inner loop (that might be very 2926 * expensive) we just save the mbuf in elts. 2927 */ 2928 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2929 loc->elts_free--; 2930 goto next_mbuf; 2931 pointer_empw: 2932 /* 2933 * No pointer and inline descriptor 2934 * intermix for legacy MPW sessions. 2935 */ 2936 if (MLX5_TXOFF_CONFIG(MPW) && 2937 part != room && 2938 wqem->dseg[0].bcount == RTE_BE32(0)) 2939 break; 2940 /* 2941 * Not inlinable VLAN packets are 2942 * proceeded outside of this routine. 2943 */ 2944 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2945 if (MLX5_TXOFF_CONFIG(VLAN)) 2946 MLX5_ASSERT(!(loc->mbuf->ol_flags & 2947 RTE_MBUF_F_TX_VLAN)); 2948 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2949 /* We have to store mbuf in elts.*/ 2950 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2951 loc->elts_free--; 2952 room -= MLX5_WQE_DSEG_SIZE; 2953 /* Ring buffer wraparound is checked at the loop end.*/ 2954 ++dseg; 2955 next_mbuf: 2956 #ifdef MLX5_PMD_SOFT_COUNTERS 2957 /* Update sent data bytes counter. */ 2958 slen += dlen; 2959 #endif 2960 loc->pkts_sent++; 2961 pkts_n--; 2962 if (unlikely(!pkts_n || !loc->elts_free)) { 2963 /* 2964 * We have no resources/packets to 2965 * continue build descriptors. 2966 */ 2967 part -= room; 2968 mlx5_tx_idone_empw(txq, loc, part, 2969 slen, wqem, olx); 2970 return MLX5_TXCMP_CODE_EXIT; 2971 } 2972 loc->mbuf = *pkts++; 2973 if (likely(pkts_n > 1)) 2974 rte_prefetch0(*pkts); 2975 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2976 /* 2977 * Unroll the completion code to avoid 2978 * returning variable value - it results in 2979 * unoptimized sequent checking in caller. 2980 */ 2981 if (ret == MLX5_TXCMP_CODE_MULTI) { 2982 part -= room; 2983 mlx5_tx_idone_empw(txq, loc, part, 2984 slen, wqem, olx); 2985 if (unlikely(!loc->elts_free || 2986 !loc->wqe_free)) 2987 return MLX5_TXCMP_CODE_EXIT; 2988 return MLX5_TXCMP_CODE_MULTI; 2989 } 2990 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2991 if (ret == MLX5_TXCMP_CODE_TSO) { 2992 part -= room; 2993 mlx5_tx_idone_empw(txq, loc, part, 2994 slen, wqem, olx); 2995 if (unlikely(!loc->elts_free || 2996 !loc->wqe_free)) 2997 return MLX5_TXCMP_CODE_EXIT; 2998 return MLX5_TXCMP_CODE_TSO; 2999 } 3000 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3001 part -= room; 3002 mlx5_tx_idone_empw(txq, loc, part, 3003 slen, wqem, olx); 3004 if (unlikely(!loc->elts_free || 3005 !loc->wqe_free)) 3006 return MLX5_TXCMP_CODE_EXIT; 3007 return MLX5_TXCMP_CODE_SINGLE; 3008 } 3009 if (ret != MLX5_TXCMP_CODE_EMPW) { 3010 MLX5_ASSERT(false); 3011 part -= room; 3012 mlx5_tx_idone_empw(txq, loc, part, 3013 slen, wqem, olx); 3014 return MLX5_TXCMP_CODE_ERROR; 3015 } 3016 /* Check if we have minimal room left. */ 3017 nlim--; 3018 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 3019 break; 3020 /* 3021 * Check whether packet parameters coincide 3022 * within assumed eMPW batch: 3023 * - check sum settings 3024 * - metadata value 3025 * - software parser settings 3026 * - packets length (legacy MPW only) 3027 * - scheduling is not required 3028 */ 3029 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 3030 loc, dlen, olx)) 3031 break; 3032 /* Packet attributes match, continue the same eMPW. */ 3033 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3034 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3035 } 3036 /* 3037 * We get here to close an existing eMPW 3038 * session and start the new one. 3039 */ 3040 MLX5_ASSERT(pkts_n); 3041 part -= room; 3042 if (unlikely(!part)) 3043 return MLX5_TXCMP_CODE_EXIT; 3044 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 3045 if (unlikely(!loc->elts_free || 3046 !loc->wqe_free)) 3047 return MLX5_TXCMP_CODE_EXIT; 3048 /* Continue the loop with new eMPW session. */ 3049 } 3050 MLX5_ASSERT(false); 3051 } 3052 3053 /** 3054 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3055 * Data inlining and VLAN insertion are supported. 3056 */ 3057 static __rte_always_inline enum mlx5_txcmp_code 3058 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3059 struct rte_mbuf **__rte_restrict pkts, 3060 unsigned int pkts_n, 3061 struct mlx5_txq_local *__rte_restrict loc, 3062 unsigned int olx) 3063 { 3064 /* 3065 * Subroutine is the part of mlx5_tx_burst_single() 3066 * and sends single-segment packet with SEND opcode. 3067 */ 3068 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3069 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3070 pkts += loc->pkts_sent + 1; 3071 pkts_n -= loc->pkts_sent; 3072 for (;;) { 3073 struct mlx5_wqe *__rte_restrict wqe; 3074 enum mlx5_txcmp_code ret; 3075 3076 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3077 if (MLX5_TXOFF_CONFIG(TXPP)) { 3078 enum mlx5_txcmp_code wret; 3079 3080 /* Generate WAIT for scheduling if requested. */ 3081 wret = mlx5_tx_schedule_send(txq, loc, olx); 3082 if (wret == MLX5_TXCMP_CODE_EXIT) 3083 return MLX5_TXCMP_CODE_EXIT; 3084 if (wret == MLX5_TXCMP_CODE_ERROR) 3085 return MLX5_TXCMP_CODE_ERROR; 3086 } 3087 if (MLX5_TXOFF_CONFIG(INLINE)) { 3088 unsigned int inlen, vlan = 0; 3089 3090 inlen = rte_pktmbuf_data_len(loc->mbuf); 3091 if (MLX5_TXOFF_CONFIG(VLAN) && 3092 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3093 vlan = sizeof(struct rte_vlan_hdr); 3094 inlen += vlan; 3095 } 3096 /* 3097 * If inlining is enabled at configuration time 3098 * the limit must be not less than minimal size. 3099 * Otherwise we would do extra check for data 3100 * size to avoid crashes due to length overflow. 3101 */ 3102 MLX5_ASSERT(txq->inlen_send >= 3103 MLX5_ESEG_MIN_INLINE_SIZE); 3104 if (inlen <= txq->inlen_send) { 3105 unsigned int seg_n, wqe_n; 3106 3107 rte_prefetch0(rte_pktmbuf_mtod 3108 (loc->mbuf, uint8_t *)); 3109 /* Check against minimal length. */ 3110 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3111 return MLX5_TXCMP_CODE_ERROR; 3112 if (loc->mbuf->ol_flags & 3113 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3114 /* 3115 * The hint flag not to inline packet 3116 * data is set. Check whether we can 3117 * follow the hint. 3118 */ 3119 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3120 txq->inlen_mode) || 3121 (MLX5_TXOFF_CONFIG(MPW) && 3122 txq->inlen_mode)) { 3123 if (inlen <= txq->inlen_send) 3124 goto single_inline; 3125 /* 3126 * The hardware requires the 3127 * minimal inline data header. 3128 */ 3129 goto single_min_inline; 3130 } 3131 if (MLX5_TXOFF_CONFIG(VLAN) && 3132 vlan && !txq->vlan_en) { 3133 /* 3134 * We must insert VLAN tag 3135 * by software means. 3136 */ 3137 goto single_part_inline; 3138 } 3139 goto single_no_inline; 3140 } 3141 single_inline: 3142 /* 3143 * Completely inlined packet data WQE: 3144 * - Control Segment, SEND opcode 3145 * - Ethernet Segment, no VLAN insertion 3146 * - Data inlined, VLAN optionally inserted 3147 * - Alignment to MLX5_WSEG_SIZE 3148 * Have to estimate amount of WQEBBs 3149 */ 3150 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3151 MLX5_ESEG_MIN_INLINE_SIZE + 3152 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3153 /* Check if there are enough WQEBBs. */ 3154 wqe_n = (seg_n + 3) / 4; 3155 if (wqe_n > loc->wqe_free) 3156 return MLX5_TXCMP_CODE_EXIT; 3157 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3158 loc->wqe_last = wqe; 3159 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3160 MLX5_OPCODE_SEND, olx); 3161 mlx5_tx_eseg_data(txq, loc, wqe, 3162 vlan, inlen, 0, olx); 3163 txq->wqe_ci += wqe_n; 3164 loc->wqe_free -= wqe_n; 3165 /* 3166 * Packet data are completely inlined, 3167 * free the packet immediately. 3168 */ 3169 rte_pktmbuf_free_seg(loc->mbuf); 3170 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3171 MLX5_TXOFF_CONFIG(MPW)) && 3172 txq->inlen_mode) { 3173 /* 3174 * If minimal inlining is requested the eMPW 3175 * feature should be disabled due to data is 3176 * inlined into Ethernet Segment, which can 3177 * not contain inlined data for eMPW due to 3178 * segment shared for all packets. 3179 */ 3180 struct mlx5_wqe_dseg *__rte_restrict dseg; 3181 unsigned int ds; 3182 uint8_t *dptr; 3183 3184 /* 3185 * The inline-mode settings require 3186 * to inline the specified amount of 3187 * data bytes to the Ethernet Segment. 3188 * We should check the free space in 3189 * WQE ring buffer to inline partially. 3190 */ 3191 single_min_inline: 3192 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3193 MLX5_ASSERT(inlen > txq->inlen_mode); 3194 MLX5_ASSERT(txq->inlen_mode >= 3195 MLX5_ESEG_MIN_INLINE_SIZE); 3196 /* 3197 * Check whether there are enough free WQEBBs: 3198 * - Control Segment 3199 * - Ethernet Segment 3200 * - First Segment of inlined Ethernet data 3201 * - ... data continued ... 3202 * - Finishing Data Segment of pointer type 3203 */ 3204 ds = (MLX5_WQE_CSEG_SIZE + 3205 MLX5_WQE_ESEG_SIZE + 3206 MLX5_WQE_DSEG_SIZE + 3207 txq->inlen_mode - 3208 MLX5_ESEG_MIN_INLINE_SIZE + 3209 MLX5_WQE_DSEG_SIZE + 3210 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3211 if (loc->wqe_free < ((ds + 3) / 4)) 3212 return MLX5_TXCMP_CODE_EXIT; 3213 /* 3214 * Build the ordinary SEND WQE: 3215 * - Control Segment 3216 * - Ethernet Segment, inline inlen_mode bytes 3217 * - Data Segment of pointer type 3218 */ 3219 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3220 loc->wqe_last = wqe; 3221 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3222 MLX5_OPCODE_SEND, olx); 3223 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3224 txq->inlen_mode, 3225 0, olx); 3226 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3227 txq->inlen_mode - vlan; 3228 inlen -= txq->inlen_mode; 3229 mlx5_tx_dseg_ptr(txq, loc, dseg, 3230 dptr, inlen, olx); 3231 /* 3232 * WQE is built, update the loop parameters 3233 * and got to the next packet. 3234 */ 3235 txq->wqe_ci += (ds + 3) / 4; 3236 loc->wqe_free -= (ds + 3) / 4; 3237 /* We have to store mbuf in elts.*/ 3238 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3239 txq->elts[txq->elts_head++ & txq->elts_m] = 3240 loc->mbuf; 3241 --loc->elts_free; 3242 } else { 3243 uint8_t *dptr; 3244 unsigned int dlen; 3245 3246 /* 3247 * Partially inlined packet data WQE, we have 3248 * some space in title WQEBB, we can fill it 3249 * with some packet data. It takes one WQEBB, 3250 * it is available, no extra space check: 3251 * - Control Segment, SEND opcode 3252 * - Ethernet Segment, no VLAN insertion 3253 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3254 * - Data Segment, pointer type 3255 * 3256 * We also get here if VLAN insertion is not 3257 * supported by HW, the inline is enabled. 3258 */ 3259 single_part_inline: 3260 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3261 loc->wqe_last = wqe; 3262 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3263 MLX5_OPCODE_SEND, olx); 3264 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3265 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3266 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3267 /* 3268 * The length check is performed above, by 3269 * comparing with txq->inlen_send. We should 3270 * not get overflow here. 3271 */ 3272 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3273 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3274 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3275 dptr, dlen, olx); 3276 ++txq->wqe_ci; 3277 --loc->wqe_free; 3278 /* We have to store mbuf in elts.*/ 3279 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3280 txq->elts[txq->elts_head++ & txq->elts_m] = 3281 loc->mbuf; 3282 --loc->elts_free; 3283 } 3284 #ifdef MLX5_PMD_SOFT_COUNTERS 3285 /* Update sent data bytes counter. */ 3286 txq->stats.obytes += vlan + 3287 rte_pktmbuf_data_len(loc->mbuf); 3288 #endif 3289 } else { 3290 /* 3291 * No inline at all, it means the CPU cycles saving 3292 * is prioritized at configuration, we should not 3293 * copy any packet data to WQE. 3294 * 3295 * SEND WQE, one WQEBB: 3296 * - Control Segment, SEND opcode 3297 * - Ethernet Segment, optional VLAN, no inline 3298 * - Data Segment, pointer type 3299 */ 3300 single_no_inline: 3301 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3302 loc->wqe_last = wqe; 3303 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3304 MLX5_OPCODE_SEND, olx); 3305 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3306 mlx5_tx_dseg_ptr 3307 (txq, loc, &wqe->dseg[0], 3308 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3309 rte_pktmbuf_data_len(loc->mbuf), olx); 3310 ++txq->wqe_ci; 3311 --loc->wqe_free; 3312 /* 3313 * We should not store mbuf pointer in elts 3314 * if no inlining is configured, this is done 3315 * by calling routine in a batch copy. 3316 */ 3317 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3318 --loc->elts_free; 3319 #ifdef MLX5_PMD_SOFT_COUNTERS 3320 /* Update sent data bytes counter. */ 3321 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3322 if (MLX5_TXOFF_CONFIG(VLAN) && 3323 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3324 txq->stats.obytes += 3325 sizeof(struct rte_vlan_hdr); 3326 #endif 3327 } 3328 ++loc->pkts_sent; 3329 --pkts_n; 3330 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3331 return MLX5_TXCMP_CODE_EXIT; 3332 loc->mbuf = *pkts++; 3333 if (pkts_n > 1) 3334 rte_prefetch0(*pkts); 3335 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3336 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3337 return ret; 3338 } 3339 MLX5_ASSERT(false); 3340 } 3341 3342 static __rte_always_inline enum mlx5_txcmp_code 3343 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3344 struct rte_mbuf **__rte_restrict pkts, 3345 unsigned int pkts_n, 3346 struct mlx5_txq_local *__rte_restrict loc, 3347 unsigned int olx) 3348 { 3349 enum mlx5_txcmp_code ret; 3350 3351 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3352 if (ret == MLX5_TXCMP_CODE_SINGLE) 3353 goto ordinary_send; 3354 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3355 for (;;) { 3356 /* Optimize for inline/no inline eMPW send. */ 3357 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3358 mlx5_tx_burst_empw_inline 3359 (txq, pkts, pkts_n, loc, olx) : 3360 mlx5_tx_burst_empw_simple 3361 (txq, pkts, pkts_n, loc, olx); 3362 if (ret != MLX5_TXCMP_CODE_SINGLE) 3363 return ret; 3364 /* The resources to send one packet should remain. */ 3365 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3366 ordinary_send: 3367 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3368 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3369 if (ret != MLX5_TXCMP_CODE_EMPW) 3370 return ret; 3371 /* The resources to send one packet should remain. */ 3372 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3373 } 3374 } 3375 3376 /** 3377 * DPDK Tx callback template. This is configured template used to generate 3378 * routines optimized for specified offload setup. 3379 * One of this generated functions is chosen at SQ configuration time. 3380 * 3381 * @param txq 3382 * Generic pointer to TX queue structure. 3383 * @param[in] pkts 3384 * Packets to transmit. 3385 * @param pkts_n 3386 * Number of packets in array. 3387 * @param olx 3388 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3389 * values. Should be static to take compile time static configuration 3390 * advantages. 3391 * 3392 * @return 3393 * Number of packets successfully transmitted (<= pkts_n). 3394 */ 3395 static __rte_always_inline uint16_t 3396 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3397 struct rte_mbuf **__rte_restrict pkts, 3398 uint16_t pkts_n, 3399 unsigned int olx) 3400 { 3401 struct mlx5_txq_local loc; 3402 enum mlx5_txcmp_code ret; 3403 unsigned int part; 3404 3405 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3406 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3407 if (unlikely(!pkts_n)) 3408 return 0; 3409 if (MLX5_TXOFF_CONFIG(INLINE)) 3410 loc.mbuf_free = 0; 3411 loc.pkts_sent = 0; 3412 loc.pkts_copy = 0; 3413 loc.wqe_last = NULL; 3414 3415 send_loop: 3416 loc.pkts_loop = loc.pkts_sent; 3417 /* 3418 * Check if there are some CQEs, if any: 3419 * - process an encountered errors 3420 * - process the completed WQEs 3421 * - free related mbufs 3422 * - doorbell the NIC about processed CQEs 3423 */ 3424 rte_prefetch0(*(pkts + loc.pkts_sent)); 3425 mlx5_tx_handle_completion(txq, olx); 3426 /* 3427 * Calculate the number of available resources - elts and WQEs. 3428 * There are two possible different scenarios: 3429 * - no data inlining into WQEs, one WQEBB may contains up to 3430 * four packets, in this case elts become scarce resource 3431 * - data inlining into WQEs, one packet may require multiple 3432 * WQEBBs, the WQEs become the limiting factor. 3433 */ 3434 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3435 loc.elts_free = txq->elts_s - 3436 (uint16_t)(txq->elts_head - txq->elts_tail); 3437 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3438 loc.wqe_free = txq->wqe_s - 3439 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3440 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3441 goto burst_exit; 3442 for (;;) { 3443 /* 3444 * Fetch the packet from array. Usually this is the first 3445 * packet in series of multi/single segment packets. 3446 */ 3447 loc.mbuf = *(pkts + loc.pkts_sent); 3448 /* Dedicated branch for multi-segment packets. */ 3449 if (MLX5_TXOFF_CONFIG(MULTI) && 3450 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3451 /* 3452 * Multi-segment packet encountered. 3453 * Hardware is able to process it only 3454 * with SEND/TSO opcodes, one packet 3455 * per WQE, do it in dedicated routine. 3456 */ 3457 enter_send_multi: 3458 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3459 part = loc.pkts_sent - loc.pkts_copy; 3460 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3461 /* 3462 * There are some single-segment mbufs not 3463 * stored in elts. The mbufs must be in the 3464 * same order as WQEs, so we must copy the 3465 * mbufs to elts here, before the coming 3466 * multi-segment packet mbufs is appended. 3467 */ 3468 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3469 part, olx); 3470 loc.pkts_copy = loc.pkts_sent; 3471 } 3472 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3473 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3474 if (!MLX5_TXOFF_CONFIG(INLINE)) 3475 loc.pkts_copy = loc.pkts_sent; 3476 /* 3477 * These returned code checks are supposed 3478 * to be optimized out due to routine inlining. 3479 */ 3480 if (ret == MLX5_TXCMP_CODE_EXIT) { 3481 /* 3482 * The routine returns this code when 3483 * all packets are sent or there is no 3484 * enough resources to complete request. 3485 */ 3486 break; 3487 } 3488 if (ret == MLX5_TXCMP_CODE_ERROR) { 3489 /* 3490 * The routine returns this code when some error 3491 * in the incoming packets format occurred. 3492 */ 3493 txq->stats.oerrors++; 3494 break; 3495 } 3496 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3497 /* 3498 * The single-segment packet was encountered 3499 * in the array, try to send it with the 3500 * best optimized way, possible engaging eMPW. 3501 */ 3502 goto enter_send_single; 3503 } 3504 if (MLX5_TXOFF_CONFIG(TSO) && 3505 ret == MLX5_TXCMP_CODE_TSO) { 3506 /* 3507 * The single-segment TSO packet was 3508 * encountered in the array. 3509 */ 3510 goto enter_send_tso; 3511 } 3512 /* We must not get here. Something is going wrong. */ 3513 MLX5_ASSERT(false); 3514 txq->stats.oerrors++; 3515 break; 3516 } 3517 /* Dedicated branch for single-segment TSO packets. */ 3518 if (MLX5_TXOFF_CONFIG(TSO) && 3519 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3520 /* 3521 * TSO might require special way for inlining 3522 * (dedicated parameters) and is sent with 3523 * MLX5_OPCODE_TSO opcode only, provide this 3524 * in dedicated branch. 3525 */ 3526 enter_send_tso: 3527 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3528 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3529 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3530 /* 3531 * These returned code checks are supposed 3532 * to be optimized out due to routine inlining. 3533 */ 3534 if (ret == MLX5_TXCMP_CODE_EXIT) 3535 break; 3536 if (ret == MLX5_TXCMP_CODE_ERROR) { 3537 txq->stats.oerrors++; 3538 break; 3539 } 3540 if (ret == MLX5_TXCMP_CODE_SINGLE) 3541 goto enter_send_single; 3542 if (MLX5_TXOFF_CONFIG(MULTI) && 3543 ret == MLX5_TXCMP_CODE_MULTI) { 3544 /* 3545 * The multi-segment packet was 3546 * encountered in the array. 3547 */ 3548 goto enter_send_multi; 3549 } 3550 /* We must not get here. Something is going wrong. */ 3551 MLX5_ASSERT(false); 3552 txq->stats.oerrors++; 3553 break; 3554 } 3555 /* 3556 * The dedicated branch for the single-segment packets 3557 * without TSO. Often these ones can be sent using 3558 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3559 * The routine builds the WQEs till it encounters 3560 * the TSO or multi-segment packet (in case if these 3561 * offloads are requested at SQ configuration time). 3562 */ 3563 enter_send_single: 3564 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3565 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3566 /* 3567 * These returned code checks are supposed 3568 * to be optimized out due to routine inlining. 3569 */ 3570 if (ret == MLX5_TXCMP_CODE_EXIT) 3571 break; 3572 if (ret == MLX5_TXCMP_CODE_ERROR) { 3573 txq->stats.oerrors++; 3574 break; 3575 } 3576 if (MLX5_TXOFF_CONFIG(MULTI) && 3577 ret == MLX5_TXCMP_CODE_MULTI) { 3578 /* 3579 * The multi-segment packet was 3580 * encountered in the array. 3581 */ 3582 goto enter_send_multi; 3583 } 3584 if (MLX5_TXOFF_CONFIG(TSO) && 3585 ret == MLX5_TXCMP_CODE_TSO) { 3586 /* 3587 * The single-segment TSO packet was 3588 * encountered in the array. 3589 */ 3590 goto enter_send_tso; 3591 } 3592 /* We must not get here. Something is going wrong. */ 3593 MLX5_ASSERT(false); 3594 txq->stats.oerrors++; 3595 break; 3596 } 3597 /* 3598 * Main Tx loop is completed, do the rest: 3599 * - set completion request if thresholds are reached 3600 * - doorbell the hardware 3601 * - copy the rest of mbufs to elts (if any) 3602 */ 3603 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3604 loc.pkts_sent >= loc.pkts_copy); 3605 /* Take a shortcut if nothing is sent. */ 3606 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3607 goto burst_exit; 3608 /* Request CQE generation if limits are reached. */ 3609 mlx5_tx_request_completion(txq, &loc, olx); 3610 /* 3611 * Ring QP doorbell immediately after WQE building completion 3612 * to improve latencies. The pure software related data treatment 3613 * can be completed after doorbell. Tx CQEs for this SQ are 3614 * processed in this thread only by the polling. 3615 * 3616 * The rdma core library can map doorbell register in two ways, 3617 * depending on the environment variable "MLX5_SHUT_UP_BF": 3618 * 3619 * - as regular cached memory, the variable is either missing or 3620 * set to zero. This type of mapping may cause the significant 3621 * doorbell register writing latency and requires explicit memory 3622 * write barrier to mitigate this issue and prevent write combining. 3623 * 3624 * - as non-cached memory, the variable is present and set to not "0" 3625 * value. This type of mapping may cause performance impact under 3626 * heavy loading conditions but the explicit write memory barrier is 3627 * not required and it may improve core performance. 3628 * 3629 * - the legacy behaviour (prior 19.08 release) was to use some 3630 * heuristics to decide whether write memory barrier should 3631 * be performed. This behavior is supported with specifying 3632 * tx_db_nc=2, write barrier is skipped if application provides 3633 * the full recommended burst of packets, it supposes the next 3634 * packets are coming and the write barrier will be issued on 3635 * the next burst (after descriptor writing, at least). 3636 */ 3637 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3638 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3639 txq->qp_db, !txq->db_nc && 3640 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3641 /* Not all of the mbufs may be stored into elts yet. */ 3642 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3643 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3644 /* 3645 * There are some single-segment mbufs not stored in elts. 3646 * It can be only if the last packet was single-segment. 3647 * The copying is gathered into one place due to it is 3648 * a good opportunity to optimize that with SIMD. 3649 * Unfortunately if inlining is enabled the gaps in pointer 3650 * array may happen due to early freeing of the inlined mbufs. 3651 */ 3652 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3653 loc.pkts_copy = loc.pkts_sent; 3654 } 3655 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3656 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3657 if (pkts_n > loc.pkts_sent) { 3658 /* 3659 * If burst size is large there might be no enough CQE 3660 * fetched from completion queue and no enough resources 3661 * freed to send all the packets. 3662 */ 3663 goto send_loop; 3664 } 3665 burst_exit: 3666 #ifdef MLX5_PMD_SOFT_COUNTERS 3667 /* Increment sent packets counter. */ 3668 txq->stats.opackets += loc.pkts_sent; 3669 #endif 3670 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3671 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3672 return loc.pkts_sent; 3673 } 3674 3675 #endif /* RTE_PMD_MLX5_TX_H_ */ 3676