1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common.h> 18 #include <mlx5_common_mr.h> 19 20 #include "mlx5.h" 21 #include "mlx5_autoconf.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t rt_timestamp:1; /* Realtime timestamp format. */ 142 uint16_t wait_on_time:1; /* WQE with timestamp is supported. */ 143 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 144 uint16_t inlen_send; /* Ordinary send data inline size. */ 145 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 146 uint16_t inlen_mode; /* Minimal data length to inline. */ 147 uint32_t qp_num_8s; /* QP number shifted by 8. */ 148 uint64_t offloads; /* Offloads for Tx Queue. */ 149 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 150 struct mlx5_wqe *wqes; /* Work queue. */ 151 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 152 #ifdef RTE_LIBRTE_MLX5_DEBUG 153 uint32_t *fcqs; /* Free completion queue (debug extended). */ 154 #else 155 uint16_t *fcqs; /* Free completion queue. */ 156 #endif 157 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 158 volatile uint32_t *qp_db; /* Work queue doorbell. */ 159 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 160 uint16_t port_id; /* Port ID of device. */ 161 uint16_t idx; /* Queue index. */ 162 uint64_t rt_timemask; /* Scheduling timestamp mask. */ 163 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 164 int32_t ts_offset; /* Timestamp field dynamic offset. */ 165 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 166 struct mlx5_txq_stats stats; /* TX queue counters. */ 167 struct mlx5_txq_stats stats_reset; /* stats on last reset. */ 168 struct mlx5_uar_data uar_data; 169 struct rte_mbuf *elts[0]; 170 /* Storage for queued packets, must be the last field. */ 171 } __rte_cache_aligned; 172 173 /* TX queue control descriptor. */ 174 struct mlx5_txq_ctrl { 175 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 176 uint32_t refcnt; /* Reference counter. */ 177 unsigned int socket; /* CPU socket ID for allocations. */ 178 bool is_hairpin; /* Whether TxQ type is Hairpin. */ 179 unsigned int max_inline_data; /* Max inline data. */ 180 unsigned int max_tso_header; /* Max TSO header size. */ 181 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 182 struct mlx5_priv *priv; /* Back pointer to private data. */ 183 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 184 uint16_t dump_file_n; /* Number of dump files. */ 185 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 186 uint32_t hairpin_status; /* Hairpin binding status. */ 187 struct mlx5_txq_data txq; /* Data path structure. */ 188 /* Must be the last field in the structure, contains elts[]. */ 189 }; 190 191 /* mlx5_txq.c */ 192 193 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 194 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 195 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 196 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 197 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 198 unsigned int socket, const struct rte_eth_txconf *conf); 199 int mlx5_tx_hairpin_queue_setup 200 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 201 const struct rte_eth_hairpin_conf *hairpin_conf); 202 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 203 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 204 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 205 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 206 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 207 uint16_t desc, unsigned int socket, 208 const struct rte_eth_txconf *conf); 209 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 210 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 211 const struct rte_eth_hairpin_conf *hairpin_conf); 212 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 213 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 214 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 215 int mlx5_txq_verify(struct rte_eth_dev *dev); 216 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 217 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 218 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 219 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 220 221 /* mlx5_tx.c */ 222 223 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 224 unsigned int olx __rte_unused); 225 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 226 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 227 struct rte_eth_txq_info *qinfo); 228 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 229 struct rte_eth_burst_mode *mode); 230 231 /* mlx5_tx_empw.c */ 232 233 MLX5_TXOFF_PRE_DECL(full_empw); 234 MLX5_TXOFF_PRE_DECL(none_empw); 235 MLX5_TXOFF_PRE_DECL(md_empw); 236 MLX5_TXOFF_PRE_DECL(mt_empw); 237 MLX5_TXOFF_PRE_DECL(mtsc_empw); 238 MLX5_TXOFF_PRE_DECL(mti_empw); 239 MLX5_TXOFF_PRE_DECL(mtv_empw); 240 MLX5_TXOFF_PRE_DECL(mtiv_empw); 241 MLX5_TXOFF_PRE_DECL(sc_empw); 242 MLX5_TXOFF_PRE_DECL(sci_empw); 243 MLX5_TXOFF_PRE_DECL(scv_empw); 244 MLX5_TXOFF_PRE_DECL(sciv_empw); 245 MLX5_TXOFF_PRE_DECL(i_empw); 246 MLX5_TXOFF_PRE_DECL(v_empw); 247 MLX5_TXOFF_PRE_DECL(iv_empw); 248 249 /* mlx5_tx_nompw.c */ 250 251 MLX5_TXOFF_PRE_DECL(full); 252 MLX5_TXOFF_PRE_DECL(none); 253 MLX5_TXOFF_PRE_DECL(md); 254 MLX5_TXOFF_PRE_DECL(mt); 255 MLX5_TXOFF_PRE_DECL(mtsc); 256 MLX5_TXOFF_PRE_DECL(mti); 257 MLX5_TXOFF_PRE_DECL(mtv); 258 MLX5_TXOFF_PRE_DECL(mtiv); 259 MLX5_TXOFF_PRE_DECL(sc); 260 MLX5_TXOFF_PRE_DECL(sci); 261 MLX5_TXOFF_PRE_DECL(scv); 262 MLX5_TXOFF_PRE_DECL(sciv); 263 MLX5_TXOFF_PRE_DECL(i); 264 MLX5_TXOFF_PRE_DECL(v); 265 MLX5_TXOFF_PRE_DECL(iv); 266 267 /* mlx5_tx_txpp.c */ 268 269 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 270 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 271 MLX5_TXOFF_PRE_DECL(full_ts); 272 MLX5_TXOFF_PRE_DECL(full_ts_noi); 273 MLX5_TXOFF_PRE_DECL(none_ts); 274 MLX5_TXOFF_PRE_DECL(mdi_ts); 275 MLX5_TXOFF_PRE_DECL(mti_ts); 276 MLX5_TXOFF_PRE_DECL(mtiv_ts); 277 278 /* mlx5_tx_mpw.c */ 279 280 MLX5_TXOFF_PRE_DECL(none_mpw); 281 MLX5_TXOFF_PRE_DECL(mci_mpw); 282 MLX5_TXOFF_PRE_DECL(mc_mpw); 283 MLX5_TXOFF_PRE_DECL(i_mpw); 284 285 static __rte_always_inline struct mlx5_uar_data * 286 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 287 { 288 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 289 } 290 291 /** 292 * Ring TX queue doorbell and flush the update by write memory barrier. 293 * 294 * @param txq 295 * Pointer to TX queue structure. 296 * @param wqe 297 * Pointer to the last WQE posted in the NIC. 298 */ 299 static __rte_always_inline void 300 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 301 { 302 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 303 txq->wqe_ci, txq->qp_db, 1); 304 } 305 306 /** 307 * Convert timestamp from mbuf format to linear counter 308 * of Clock Queue completions (24 bits). 309 * 310 * @param sh 311 * Pointer to the device shared context to fetch Tx 312 * packet pacing timestamp and parameters. 313 * @param ts 314 * Timestamp from mbuf to convert. 315 * @return 316 * positive or zero value - completion ID to wait. 317 * negative value - conversion error. 318 */ 319 static __rte_always_inline int32_t 320 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 321 { 322 uint64_t ts, ci; 323 uint32_t tick; 324 325 do { 326 /* 327 * Read atomically two uint64_t fields and compare lsb bits. 328 * It there is no match - the timestamp was updated in 329 * the service thread, data should be re-read. 330 */ 331 rte_compiler_barrier(); 332 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 333 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 334 rte_compiler_barrier(); 335 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 336 break; 337 } while (true); 338 /* Perform the skew correction, positive value to send earlier. */ 339 mts -= sh->txpp.skew; 340 mts -= ts; 341 if (unlikely(mts >= UINT64_MAX / 2)) { 342 /* We have negative integer, mts is in the past. */ 343 __atomic_fetch_add(&sh->txpp.err_ts_past, 344 1, __ATOMIC_RELAXED); 345 return -1; 346 } 347 tick = sh->txpp.tick; 348 MLX5_ASSERT(tick); 349 /* Convert delta to completions, round up. */ 350 mts = (mts + tick - 1) / tick; 351 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 352 /* We have mts is too distant future. */ 353 __atomic_fetch_add(&sh->txpp.err_ts_future, 354 1, __ATOMIC_RELAXED); 355 return -1; 356 } 357 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 358 ci += mts; 359 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 360 return ci; 361 } 362 363 /** 364 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 365 * Flags must be preliminary initialized to zero. 366 * 367 * @param loc 368 * Pointer to burst routine local context. 369 * @param swp_flags 370 * Pointer to store Software Parser flags. 371 * @param olx 372 * Configured Tx offloads mask. It is fully defined at 373 * compile time and may be used for optimization. 374 * 375 * @return 376 * Software Parser offsets packed in dword. 377 * Software Parser flags are set by pointer. 378 */ 379 static __rte_always_inline uint32_t 380 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 381 uint8_t *swp_flags, 382 unsigned int olx) 383 { 384 uint64_t ol, tunnel; 385 unsigned int idx, off; 386 uint32_t set; 387 388 if (!MLX5_TXOFF_CONFIG(SWP)) 389 return 0; 390 ol = loc->mbuf->ol_flags; 391 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 392 /* 393 * Check whether Software Parser is required. 394 * Only customized tunnels may ask for. 395 */ 396 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 397 return 0; 398 /* 399 * The index should have: 400 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 401 * bit[4] = RTE_MBUF_F_TX_IPV6 402 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 403 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 404 */ 405 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 406 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 407 *swp_flags = mlx5_swp_types_table[idx]; 408 /* 409 * Set offsets for SW parser. Since ConnectX-5, SW parser just 410 * complements HW parser. SW parser starts to engage only if HW parser 411 * can't reach a header. For the older devices, HW parser will not kick 412 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 413 * should be set regardless of HW offload. 414 */ 415 off = loc->mbuf->outer_l2_len; 416 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 417 off += sizeof(struct rte_vlan_hdr); 418 set = (off >> 1) << 8; /* Outer L3 offset. */ 419 off += loc->mbuf->outer_l3_len; 420 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 421 set |= off >> 1; /* Outer L4 offset. */ 422 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 423 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 424 off += loc->mbuf->l2_len; 425 set |= (off >> 1) << 24; /* Inner L3 offset. */ 426 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 427 csum == RTE_MBUF_F_TX_UDP_CKSUM || 428 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 429 off += loc->mbuf->l3_len; 430 set |= (off >> 1) << 16; /* Inner L4 offset. */ 431 } 432 } 433 set = rte_cpu_to_le_32(set); 434 return set; 435 } 436 437 /** 438 * Convert the Checksum offloads to Verbs. 439 * 440 * @param buf 441 * Pointer to the mbuf. 442 * 443 * @return 444 * Converted checksum flags. 445 */ 446 static __rte_always_inline uint8_t 447 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 448 { 449 uint32_t idx; 450 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 451 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 452 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 453 454 /* 455 * The index should have: 456 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 457 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 458 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 459 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 460 * bit[9] = tunnel 461 */ 462 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 463 return mlx5_cksum_table[idx]; 464 } 465 466 /** 467 * Free the mbufs from the linear array of pointers. 468 * 469 * @param txq 470 * Pointer to Tx queue structure. 471 * @param pkts 472 * Pointer to array of packets to be free. 473 * @param pkts_n 474 * Number of packets to be freed. 475 * @param olx 476 * Configured Tx offloads mask. It is fully defined at 477 * compile time and may be used for optimization. 478 */ 479 static __rte_always_inline void 480 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 481 struct rte_mbuf **__rte_restrict pkts, 482 unsigned int pkts_n, 483 unsigned int olx __rte_unused) 484 { 485 struct rte_mempool *pool = NULL; 486 struct rte_mbuf **p_free = NULL; 487 struct rte_mbuf *mbuf; 488 unsigned int n_free = 0; 489 490 /* 491 * The implemented algorithm eliminates 492 * copying pointers to temporary array 493 * for rte_mempool_put_bulk() calls. 494 */ 495 MLX5_ASSERT(pkts); 496 MLX5_ASSERT(pkts_n); 497 /* 498 * Free mbufs directly to the pool in bulk 499 * if fast free offload is engaged 500 */ 501 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 502 mbuf = *pkts; 503 pool = mbuf->pool; 504 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 505 return; 506 } 507 for (;;) { 508 for (;;) { 509 /* 510 * Decrement mbuf reference counter, detach 511 * indirect and external buffers if needed. 512 */ 513 mbuf = rte_pktmbuf_prefree_seg(*pkts); 514 if (likely(mbuf != NULL)) { 515 MLX5_ASSERT(mbuf == *pkts); 516 if (likely(n_free != 0)) { 517 if (unlikely(pool != mbuf->pool)) 518 /* From different pool. */ 519 break; 520 } else { 521 /* Start new scan array. */ 522 pool = mbuf->pool; 523 p_free = pkts; 524 } 525 ++n_free; 526 ++pkts; 527 --pkts_n; 528 if (unlikely(pkts_n == 0)) { 529 mbuf = NULL; 530 break; 531 } 532 } else { 533 /* 534 * This happens if mbuf is still referenced. 535 * We can't put it back to the pool, skip. 536 */ 537 ++pkts; 538 --pkts_n; 539 if (unlikely(n_free != 0)) 540 /* There is some array to free.*/ 541 break; 542 if (unlikely(pkts_n == 0)) 543 /* Last mbuf, nothing to free. */ 544 return; 545 } 546 } 547 for (;;) { 548 /* 549 * This loop is implemented to avoid multiple 550 * inlining of rte_mempool_put_bulk(). 551 */ 552 MLX5_ASSERT(pool); 553 MLX5_ASSERT(p_free); 554 MLX5_ASSERT(n_free); 555 /* 556 * Free the array of pre-freed mbufs 557 * belonging to the same memory pool. 558 */ 559 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 560 if (unlikely(mbuf != NULL)) { 561 /* There is the request to start new scan. */ 562 pool = mbuf->pool; 563 p_free = pkts++; 564 n_free = 1; 565 --pkts_n; 566 if (likely(pkts_n != 0)) 567 break; 568 /* 569 * This is the last mbuf to be freed. 570 * Do one more loop iteration to complete. 571 * This is rare case of the last unique mbuf. 572 */ 573 mbuf = NULL; 574 continue; 575 } 576 if (likely(pkts_n == 0)) 577 return; 578 n_free = 0; 579 break; 580 } 581 } 582 } 583 584 /** 585 * No inline version to free buffers for optimal call 586 * on the tx_burst completion. 587 */ 588 static __rte_noinline void 589 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 590 struct rte_mbuf **__rte_restrict pkts, 591 unsigned int pkts_n, 592 unsigned int olx __rte_unused) 593 { 594 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 595 } 596 597 /** 598 * Free the mbuf from the elts ring buffer till new tail. 599 * 600 * @param txq 601 * Pointer to Tx queue structure. 602 * @param tail 603 * Index in elts to free up to, becomes new elts tail. 604 * @param olx 605 * Configured Tx offloads mask. It is fully defined at 606 * compile time and may be used for optimization. 607 */ 608 static __rte_always_inline void 609 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 610 uint16_t tail, 611 unsigned int olx __rte_unused) 612 { 613 uint16_t n_elts = tail - txq->elts_tail; 614 615 MLX5_ASSERT(n_elts); 616 MLX5_ASSERT(n_elts <= txq->elts_s); 617 /* 618 * Implement a loop to support ring buffer wraparound 619 * with single inlining of mlx5_tx_free_mbuf(). 620 */ 621 do { 622 unsigned int part; 623 624 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 625 part = RTE_MIN(part, n_elts); 626 MLX5_ASSERT(part); 627 MLX5_ASSERT(part <= txq->elts_s); 628 mlx5_tx_free_mbuf(txq, 629 &txq->elts[txq->elts_tail & txq->elts_m], 630 part, olx); 631 txq->elts_tail += part; 632 n_elts -= part; 633 } while (n_elts); 634 } 635 636 /** 637 * Store the mbuf being sent into elts ring buffer. 638 * On Tx completion these mbufs will be freed. 639 * 640 * @param txq 641 * Pointer to Tx queue structure. 642 * @param pkts 643 * Pointer to array of packets to be stored. 644 * @param pkts_n 645 * Number of packets to be stored. 646 * @param olx 647 * Configured Tx offloads mask. It is fully defined at 648 * compile time and may be used for optimization. 649 */ 650 static __rte_always_inline void 651 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 652 struct rte_mbuf **__rte_restrict pkts, 653 unsigned int pkts_n, 654 unsigned int olx __rte_unused) 655 { 656 unsigned int part; 657 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 658 659 MLX5_ASSERT(pkts); 660 MLX5_ASSERT(pkts_n); 661 part = txq->elts_s - (txq->elts_head & txq->elts_m); 662 MLX5_ASSERT(part); 663 MLX5_ASSERT(part <= txq->elts_s); 664 /* This code is a good candidate for vectorizing with SIMD. */ 665 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 666 (void *)pkts, 667 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 668 txq->elts_head += pkts_n; 669 if (unlikely(part < pkts_n)) 670 /* The copy is wrapping around the elts array. */ 671 rte_memcpy((void *)elts, (void *)(pkts + part), 672 (pkts_n - part) * sizeof(struct rte_mbuf *)); 673 } 674 675 /** 676 * Check if the completion request flag should be set in the last WQE. 677 * Both pushed mbufs and WQEs are monitored and the completion request 678 * flag is set if any of thresholds is reached. 679 * 680 * @param txq 681 * Pointer to TX queue structure. 682 * @param loc 683 * Pointer to burst routine local context. 684 * @param olx 685 * Configured Tx offloads mask. It is fully defined at 686 * compile time and may be used for optimization. 687 */ 688 static __rte_always_inline void 689 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 690 struct mlx5_txq_local *__rte_restrict loc, 691 unsigned int olx) 692 { 693 uint16_t head = txq->elts_head; 694 unsigned int part; 695 696 part = MLX5_TXOFF_CONFIG(INLINE) ? 697 0 : loc->pkts_sent - loc->pkts_copy; 698 head += part; 699 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 700 (MLX5_TXOFF_CONFIG(INLINE) && 701 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 702 volatile struct mlx5_wqe *last = loc->wqe_last; 703 704 MLX5_ASSERT(last); 705 txq->elts_comp = head; 706 if (MLX5_TXOFF_CONFIG(INLINE)) 707 txq->wqe_comp = txq->wqe_ci; 708 /* Request unconditional completion on last WQE. */ 709 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 710 MLX5_COMP_MODE_OFFSET); 711 /* Save elts_head in dedicated free on completion queue. */ 712 #ifdef RTE_LIBRTE_MLX5_DEBUG 713 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 714 (last->cseg.opcode >> 8) << 16; 715 #else 716 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 717 #endif 718 /* A CQE slot must always be available. */ 719 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 720 } 721 } 722 723 /** 724 * Build the Control Segment with specified opcode: 725 * - MLX5_OPCODE_SEND 726 * - MLX5_OPCODE_ENHANCED_MPSW 727 * - MLX5_OPCODE_TSO 728 * 729 * @param txq 730 * Pointer to TX queue structure. 731 * @param loc 732 * Pointer to burst routine local context. 733 * @param wqe 734 * Pointer to WQE to fill with built Control Segment. 735 * @param ds 736 * Supposed length of WQE in segments. 737 * @param opcode 738 * SQ WQE opcode to put into Control Segment. 739 * @param olx 740 * Configured Tx offloads mask. It is fully defined at 741 * compile time and may be used for optimization. 742 */ 743 static __rte_always_inline void 744 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 745 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 746 struct mlx5_wqe *__rte_restrict wqe, 747 unsigned int ds, 748 unsigned int opcode, 749 unsigned int olx __rte_unused) 750 { 751 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 752 753 /* For legacy MPW replace the EMPW by TSO with modifier. */ 754 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 755 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 756 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 757 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 758 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 759 MLX5_COMP_MODE_OFFSET); 760 cs->misc = RTE_BE32(0); 761 } 762 763 /** 764 * Build the Synchronize Queue Segment with specified completion index. 765 * 766 * @param txq 767 * Pointer to TX queue structure. 768 * @param loc 769 * Pointer to burst routine local context. 770 * @param wqe 771 * Pointer to WQE to fill with built Control Segment. 772 * @param wci 773 * Completion index in Clock Queue to wait. 774 * @param olx 775 * Configured Tx offloads mask. It is fully defined at 776 * compile time and may be used for optimization. 777 */ 778 static __rte_always_inline void 779 mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq, 780 struct mlx5_txq_local *restrict loc __rte_unused, 781 struct mlx5_wqe *restrict wqe, 782 unsigned int wci, 783 unsigned int olx __rte_unused) 784 { 785 struct mlx5_wqe_qseg *qs; 786 787 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 788 qs->max_index = rte_cpu_to_be_32(wci); 789 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 790 qs->reserved0 = RTE_BE32(0); 791 qs->reserved1 = RTE_BE32(0); 792 } 793 794 /** 795 * Build the Wait on Time Segment with specified timestamp value. 796 * 797 * @param txq 798 * Pointer to TX queue structure. 799 * @param loc 800 * Pointer to burst routine local context. 801 * @param wqe 802 * Pointer to WQE to fill with built Control Segment. 803 * @param ts 804 * Timesatmp value to wait. 805 * @param olx 806 * Configured Tx offloads mask. It is fully defined at 807 * compile time and may be used for optimization. 808 */ 809 static __rte_always_inline void 810 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 811 struct mlx5_txq_local *restrict loc __rte_unused, 812 struct mlx5_wqe *restrict wqe, 813 uint64_t ts, 814 unsigned int olx __rte_unused) 815 { 816 struct mlx5_wqe_wseg *ws; 817 818 ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 819 ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_BIGGER); 820 ws->lkey = RTE_BE32(0); 821 ws->va_high = RTE_BE32(0); 822 ws->va_low = RTE_BE32(0); 823 if (txq->rt_timestamp) { 824 ts = ts % (uint64_t)NS_PER_S 825 | (ts / (uint64_t)NS_PER_S) << 32; 826 } 827 ws->value = rte_cpu_to_be_64(ts); 828 ws->mask = txq->rt_timemask; 829 } 830 831 /** 832 * Build the Ethernet Segment without inlined data. 833 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 834 * 835 * @param txq 836 * Pointer to TX queue structure. 837 * @param loc 838 * Pointer to burst routine local context. 839 * @param wqe 840 * Pointer to WQE to fill with built Ethernet Segment. 841 * @param olx 842 * Configured Tx offloads mask. It is fully defined at 843 * compile time and may be used for optimization. 844 */ 845 static __rte_always_inline void 846 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 847 struct mlx5_txq_local *__rte_restrict loc, 848 struct mlx5_wqe *__rte_restrict wqe, 849 unsigned int olx) 850 { 851 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 852 uint32_t csum; 853 854 /* 855 * Calculate and set check sum flags first, dword field 856 * in segment may be shared with Software Parser flags. 857 */ 858 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 859 es->flags = rte_cpu_to_le_32(csum); 860 /* 861 * Calculate and set Software Parser offsets and flags. 862 * These flags a set for custom UDP and IP tunnel packets. 863 */ 864 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 865 /* Fill metadata field if needed. */ 866 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 867 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 868 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 869 0 : 0; 870 /* Engage VLAN tag insertion feature if requested. */ 871 if (MLX5_TXOFF_CONFIG(VLAN) && 872 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 873 /* 874 * We should get here only if device support 875 * this feature correctly. 876 */ 877 MLX5_ASSERT(txq->vlan_en); 878 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 879 loc->mbuf->vlan_tci); 880 } else { 881 es->inline_hdr = RTE_BE32(0); 882 } 883 } 884 885 /** 886 * Build the Ethernet Segment with minimal inlined data 887 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 888 * used to fill the gap in single WQEBB WQEs. 889 * Supports Software Parser, Checksums and VLAN 890 * insertion Tx offload features. 891 * 892 * @param txq 893 * Pointer to TX queue structure. 894 * @param loc 895 * Pointer to burst routine local context. 896 * @param wqe 897 * Pointer to WQE to fill with built Ethernet Segment. 898 * @param vlan 899 * Length of VLAN tag insertion if any. 900 * @param olx 901 * Configured Tx offloads mask. It is fully defined at 902 * compile time and may be used for optimization. 903 */ 904 static __rte_always_inline void 905 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 906 struct mlx5_txq_local *__rte_restrict loc, 907 struct mlx5_wqe *__rte_restrict wqe, 908 unsigned int vlan, 909 unsigned int olx) 910 { 911 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 912 uint32_t csum; 913 uint8_t *psrc, *pdst; 914 915 /* 916 * Calculate and set check sum flags first, dword field 917 * in segment may be shared with Software Parser flags. 918 */ 919 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 920 es->flags = rte_cpu_to_le_32(csum); 921 /* 922 * Calculate and set Software Parser offsets and flags. 923 * These flags a set for custom UDP and IP tunnel packets. 924 */ 925 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 926 /* Fill metadata field if needed. */ 927 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 928 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 929 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 930 0 : 0; 931 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 932 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 933 es->inline_data = *(unaligned_uint16_t *)psrc; 934 psrc += sizeof(uint16_t); 935 pdst = (uint8_t *)(es + 1); 936 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 937 /* Implement VLAN tag insertion as part inline data. */ 938 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 939 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 940 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 941 /* Insert VLAN ethertype + VLAN tag. */ 942 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 943 ((RTE_ETHER_TYPE_VLAN << 16) | 944 loc->mbuf->vlan_tci); 945 pdst += sizeof(struct rte_vlan_hdr); 946 /* Copy the rest two bytes from packet data. */ 947 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 948 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 949 } else { 950 /* Fill the gap in the title WQEBB with inline data. */ 951 rte_mov16(pdst, psrc); 952 } 953 } 954 955 /** 956 * Build the Ethernet Segment with entire packet data inlining. Checks the 957 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 958 * Checksums and VLAN insertion Tx offload features. 959 * 960 * @param txq 961 * Pointer to TX queue structure. 962 * @param loc 963 * Pointer to burst routine local context. 964 * @param wqe 965 * Pointer to WQE to fill with built Ethernet Segment. 966 * @param vlan 967 * Length of VLAN tag insertion if any. 968 * @param inlen 969 * Length of data to inline (VLAN included, if any). 970 * @param tso 971 * TSO flag, set mss field from the packet. 972 * @param olx 973 * Configured Tx offloads mask. It is fully defined at 974 * compile time and may be used for optimization. 975 * 976 * @return 977 * Pointer to the next Data Segment (aligned and wrapped around). 978 */ 979 static __rte_always_inline struct mlx5_wqe_dseg * 980 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 981 struct mlx5_txq_local *__rte_restrict loc, 982 struct mlx5_wqe *__rte_restrict wqe, 983 unsigned int vlan, 984 unsigned int inlen, 985 unsigned int tso, 986 unsigned int olx) 987 { 988 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 989 uint32_t csum; 990 uint8_t *psrc, *pdst; 991 unsigned int part; 992 993 /* 994 * Calculate and set check sum flags first, dword field 995 * in segment may be shared with Software Parser flags. 996 */ 997 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 998 if (tso) { 999 csum <<= 24; 1000 csum |= loc->mbuf->tso_segsz; 1001 es->flags = rte_cpu_to_be_32(csum); 1002 } else { 1003 es->flags = rte_cpu_to_le_32(csum); 1004 } 1005 /* 1006 * Calculate and set Software Parser offsets and flags. 1007 * These flags a set for custom UDP and IP tunnel packets. 1008 */ 1009 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1010 /* Fill metadata field if needed. */ 1011 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1012 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1013 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1014 0 : 0; 1015 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1016 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 1017 es->inline_data = *(unaligned_uint16_t *)psrc; 1018 psrc += sizeof(uint16_t); 1019 pdst = (uint8_t *)(es + 1); 1020 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1021 /* Implement VLAN tag insertion as part inline data. */ 1022 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1023 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1024 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1025 /* Insert VLAN ethertype + VLAN tag. */ 1026 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1027 ((RTE_ETHER_TYPE_VLAN << 16) | 1028 loc->mbuf->vlan_tci); 1029 pdst += sizeof(struct rte_vlan_hdr); 1030 /* Copy the rest two bytes from packet data. */ 1031 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1032 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1033 psrc += sizeof(uint16_t); 1034 } else { 1035 /* Fill the gap in the title WQEBB with inline data. */ 1036 rte_mov16(pdst, psrc); 1037 psrc += sizeof(rte_v128u32_t); 1038 } 1039 pdst = (uint8_t *)(es + 2); 1040 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1041 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1042 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1043 if (!inlen) { 1044 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1045 return (struct mlx5_wqe_dseg *)pdst; 1046 } 1047 /* 1048 * The WQEBB space availability is checked by caller. 1049 * Here we should be aware of WQE ring buffer wraparound only. 1050 */ 1051 part = (uint8_t *)txq->wqes_end - pdst; 1052 part = RTE_MIN(part, inlen); 1053 do { 1054 rte_memcpy(pdst, psrc, part); 1055 inlen -= part; 1056 if (likely(!inlen)) { 1057 /* 1058 * If return value is not used by the caller 1059 * the code below will be optimized out. 1060 */ 1061 pdst += part; 1062 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1063 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1064 pdst = (uint8_t *)txq->wqes; 1065 return (struct mlx5_wqe_dseg *)pdst; 1066 } 1067 pdst = (uint8_t *)txq->wqes; 1068 psrc += part; 1069 part = inlen; 1070 } while (true); 1071 } 1072 1073 /** 1074 * Copy data from chain of mbuf to the specified linear buffer. 1075 * Checksums and VLAN insertion Tx offload features. If data 1076 * from some mbuf copied completely this mbuf is freed. Local 1077 * structure is used to keep the byte stream state. 1078 * 1079 * @param pdst 1080 * Pointer to the destination linear buffer. 1081 * @param loc 1082 * Pointer to burst routine local context. 1083 * @param len 1084 * Length of data to be copied. 1085 * @param must 1086 * Length of data to be copied ignoring no inline hint. 1087 * @param olx 1088 * Configured Tx offloads mask. It is fully defined at 1089 * compile time and may be used for optimization. 1090 * 1091 * @return 1092 * Number of actual copied data bytes. This is always greater than or 1093 * equal to must parameter and might be lesser than len in no inline 1094 * hint flag is encountered. 1095 */ 1096 static __rte_always_inline unsigned int 1097 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1098 struct mlx5_txq_local *__rte_restrict loc, 1099 unsigned int len, 1100 unsigned int must, 1101 unsigned int olx __rte_unused) 1102 { 1103 struct rte_mbuf *mbuf; 1104 unsigned int part, dlen, copy = 0; 1105 uint8_t *psrc; 1106 1107 MLX5_ASSERT(len); 1108 do { 1109 /* Allow zero length packets, must check first. */ 1110 dlen = rte_pktmbuf_data_len(loc->mbuf); 1111 if (dlen <= loc->mbuf_off) { 1112 /* Exhausted packet, just free. */ 1113 mbuf = loc->mbuf; 1114 loc->mbuf = mbuf->next; 1115 rte_pktmbuf_free_seg(mbuf); 1116 loc->mbuf_off = 0; 1117 MLX5_ASSERT(loc->mbuf_nseg > 1); 1118 MLX5_ASSERT(loc->mbuf); 1119 --loc->mbuf_nseg; 1120 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1121 unsigned int diff; 1122 1123 if (copy >= must) { 1124 /* 1125 * We already copied the minimal 1126 * requested amount of data. 1127 */ 1128 return copy; 1129 } 1130 diff = must - copy; 1131 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1132 /* 1133 * Copy only the minimal required 1134 * part of the data buffer. Limit amount 1135 * of data to be copied to the length of 1136 * available space. 1137 */ 1138 len = RTE_MIN(len, diff); 1139 } 1140 } 1141 continue; 1142 } 1143 dlen -= loc->mbuf_off; 1144 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1145 loc->mbuf_off); 1146 part = RTE_MIN(len, dlen); 1147 rte_memcpy(pdst, psrc, part); 1148 copy += part; 1149 loc->mbuf_off += part; 1150 len -= part; 1151 if (!len) { 1152 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1153 loc->mbuf_off = 0; 1154 /* Exhausted packet, just free. */ 1155 mbuf = loc->mbuf; 1156 loc->mbuf = mbuf->next; 1157 rte_pktmbuf_free_seg(mbuf); 1158 loc->mbuf_off = 0; 1159 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1160 --loc->mbuf_nseg; 1161 } 1162 return copy; 1163 } 1164 pdst += part; 1165 } while (true); 1166 } 1167 1168 /** 1169 * Build the Ethernet Segment with inlined data from multi-segment packet. 1170 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1171 * Parser, Checksums and VLAN insertion Tx offload features. 1172 * 1173 * @param txq 1174 * Pointer to TX queue structure. 1175 * @param loc 1176 * Pointer to burst routine local context. 1177 * @param wqe 1178 * Pointer to WQE to fill with built Ethernet Segment. 1179 * @param vlan 1180 * Length of VLAN tag insertion if any. 1181 * @param inlen 1182 * Length of data to inline (VLAN included, if any). 1183 * @param tso 1184 * TSO flag, set mss field from the packet. 1185 * @param olx 1186 * Configured Tx offloads mask. It is fully defined at 1187 * compile time and may be used for optimization. 1188 * 1189 * @return 1190 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1191 * around - caller should do wrapping check on its own). 1192 */ 1193 static __rte_always_inline struct mlx5_wqe_dseg * 1194 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1195 struct mlx5_txq_local *__rte_restrict loc, 1196 struct mlx5_wqe *__rte_restrict wqe, 1197 unsigned int vlan, 1198 unsigned int inlen, 1199 unsigned int tso, 1200 unsigned int olx) 1201 { 1202 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1203 uint32_t csum; 1204 uint8_t *pdst; 1205 unsigned int part, tlen = 0; 1206 1207 /* 1208 * Calculate and set check sum flags first, uint32_t field 1209 * in segment may be shared with Software Parser flags. 1210 */ 1211 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1212 if (tso) { 1213 csum <<= 24; 1214 csum |= loc->mbuf->tso_segsz; 1215 es->flags = rte_cpu_to_be_32(csum); 1216 } else { 1217 es->flags = rte_cpu_to_le_32(csum); 1218 } 1219 /* 1220 * Calculate and set Software Parser offsets and flags. 1221 * These flags a set for custom UDP and IP tunnel packets. 1222 */ 1223 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1224 /* Fill metadata field if needed. */ 1225 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1226 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1227 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1228 0 : 0; 1229 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1230 pdst = (uint8_t *)&es->inline_data; 1231 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1232 /* Implement VLAN tag insertion as part inline data. */ 1233 mlx5_tx_mseg_memcpy(pdst, loc, 1234 2 * RTE_ETHER_ADDR_LEN, 1235 2 * RTE_ETHER_ADDR_LEN, olx); 1236 pdst += 2 * RTE_ETHER_ADDR_LEN; 1237 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1238 ((RTE_ETHER_TYPE_VLAN << 16) | 1239 loc->mbuf->vlan_tci); 1240 pdst += sizeof(struct rte_vlan_hdr); 1241 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1242 } 1243 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1244 /* 1245 * The WQEBB space availability is checked by caller. 1246 * Here we should be aware of WQE ring buffer wraparound only. 1247 */ 1248 part = (uint8_t *)txq->wqes_end - pdst; 1249 part = RTE_MIN(part, inlen - tlen); 1250 MLX5_ASSERT(part); 1251 do { 1252 unsigned int copy; 1253 1254 /* 1255 * Copying may be interrupted inside the routine 1256 * if run into no inline hint flag. 1257 */ 1258 copy = tso ? inlen : txq->inlen_mode; 1259 copy = tlen >= copy ? 0 : (copy - tlen); 1260 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1261 tlen += copy; 1262 if (likely(inlen <= tlen) || copy < part) { 1263 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1264 pdst += copy; 1265 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1266 return (struct mlx5_wqe_dseg *)pdst; 1267 } 1268 pdst = (uint8_t *)txq->wqes; 1269 part = inlen - tlen; 1270 } while (true); 1271 } 1272 1273 /** 1274 * Build the Data Segment of pointer type. 1275 * 1276 * @param txq 1277 * Pointer to TX queue structure. 1278 * @param loc 1279 * Pointer to burst routine local context. 1280 * @param dseg 1281 * Pointer to WQE to fill with built Data Segment. 1282 * @param buf 1283 * Data buffer to point. 1284 * @param len 1285 * Data buffer length. 1286 * @param olx 1287 * Configured Tx offloads mask. It is fully defined at 1288 * compile time and may be used for optimization. 1289 */ 1290 static __rte_always_inline void 1291 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1292 struct mlx5_txq_local *__rte_restrict loc, 1293 struct mlx5_wqe_dseg *__rte_restrict dseg, 1294 uint8_t *buf, 1295 unsigned int len, 1296 unsigned int olx __rte_unused) 1297 1298 { 1299 MLX5_ASSERT(len); 1300 dseg->bcount = rte_cpu_to_be_32(len); 1301 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1302 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1303 } 1304 1305 /** 1306 * Build the Data Segment of pointer type or inline if data length is less than 1307 * buffer in minimal Data Segment size. 1308 * 1309 * @param txq 1310 * Pointer to TX queue structure. 1311 * @param loc 1312 * Pointer to burst routine local context. 1313 * @param dseg 1314 * Pointer to WQE to fill with built Data Segment. 1315 * @param buf 1316 * Data buffer to point. 1317 * @param len 1318 * Data buffer length. 1319 * @param olx 1320 * Configured Tx offloads mask. It is fully defined at 1321 * compile time and may be used for optimization. 1322 */ 1323 static __rte_always_inline void 1324 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1325 struct mlx5_txq_local *__rte_restrict loc, 1326 struct mlx5_wqe_dseg *__rte_restrict dseg, 1327 uint8_t *buf, 1328 unsigned int len, 1329 unsigned int olx __rte_unused) 1330 1331 { 1332 uintptr_t dst, src; 1333 1334 MLX5_ASSERT(len); 1335 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1336 dseg->bcount = rte_cpu_to_be_32(len); 1337 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1338 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1339 1340 return; 1341 } 1342 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1343 /* Unrolled implementation of generic rte_memcpy. */ 1344 dst = (uintptr_t)&dseg->inline_data[0]; 1345 src = (uintptr_t)buf; 1346 if (len & 0x08) { 1347 #ifdef RTE_ARCH_STRICT_ALIGN 1348 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1349 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1350 dst += sizeof(uint32_t); 1351 src += sizeof(uint32_t); 1352 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1353 dst += sizeof(uint32_t); 1354 src += sizeof(uint32_t); 1355 #else 1356 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1357 dst += sizeof(uint64_t); 1358 src += sizeof(uint64_t); 1359 #endif 1360 } 1361 if (len & 0x04) { 1362 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1363 dst += sizeof(uint32_t); 1364 src += sizeof(uint32_t); 1365 } 1366 if (len & 0x02) { 1367 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1368 dst += sizeof(uint16_t); 1369 src += sizeof(uint16_t); 1370 } 1371 if (len & 0x01) 1372 *(uint8_t *)dst = *(uint8_t *)src; 1373 } 1374 1375 /** 1376 * Build the Data Segment of inlined data from single 1377 * segment packet, no VLAN insertion. 1378 * 1379 * @param txq 1380 * Pointer to TX queue structure. 1381 * @param loc 1382 * Pointer to burst routine local context. 1383 * @param dseg 1384 * Pointer to WQE to fill with built Data Segment. 1385 * @param buf 1386 * Data buffer to point. 1387 * @param len 1388 * Data buffer length. 1389 * @param olx 1390 * Configured Tx offloads mask. It is fully defined at 1391 * compile time and may be used for optimization. 1392 * 1393 * @return 1394 * Pointer to the next Data Segment after inlined data. 1395 * Ring buffer wraparound check is needed. We do not do it here because it 1396 * may not be needed for the last packet in the eMPW session. 1397 */ 1398 static __rte_always_inline struct mlx5_wqe_dseg * 1399 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1400 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1401 struct mlx5_wqe_dseg *__rte_restrict dseg, 1402 uint8_t *buf, 1403 unsigned int len, 1404 unsigned int olx __rte_unused) 1405 { 1406 unsigned int part; 1407 uint8_t *pdst; 1408 1409 if (!MLX5_TXOFF_CONFIG(MPW)) { 1410 /* Store the descriptor byte counter for eMPW sessions. */ 1411 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1412 pdst = &dseg->inline_data[0]; 1413 } else { 1414 /* The entire legacy MPW session counter is stored on close. */ 1415 pdst = (uint8_t *)dseg; 1416 } 1417 /* 1418 * The WQEBB space availability is checked by caller. 1419 * Here we should be aware of WQE ring buffer wraparound only. 1420 */ 1421 part = (uint8_t *)txq->wqes_end - pdst; 1422 part = RTE_MIN(part, len); 1423 do { 1424 rte_memcpy(pdst, buf, part); 1425 len -= part; 1426 if (likely(!len)) { 1427 pdst += part; 1428 if (!MLX5_TXOFF_CONFIG(MPW)) 1429 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1430 /* Note: no final wraparound check here. */ 1431 return (struct mlx5_wqe_dseg *)pdst; 1432 } 1433 pdst = (uint8_t *)txq->wqes; 1434 buf += part; 1435 part = len; 1436 } while (true); 1437 } 1438 1439 /** 1440 * Build the Data Segment of inlined data from single 1441 * segment packet with VLAN insertion. 1442 * 1443 * @param txq 1444 * Pointer to TX queue structure. 1445 * @param loc 1446 * Pointer to burst routine local context. 1447 * @param dseg 1448 * Pointer to the dseg fill with built Data Segment. 1449 * @param buf 1450 * Data buffer to point. 1451 * @param len 1452 * Data buffer length. 1453 * @param olx 1454 * Configured Tx offloads mask. It is fully defined at 1455 * compile time and may be used for optimization. 1456 * 1457 * @return 1458 * Pointer to the next Data Segment after inlined data. 1459 * Ring buffer wraparound check is needed. 1460 */ 1461 static __rte_always_inline struct mlx5_wqe_dseg * 1462 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1463 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1464 struct mlx5_wqe_dseg *__rte_restrict dseg, 1465 uint8_t *buf, 1466 unsigned int len, 1467 unsigned int olx __rte_unused) 1468 1469 { 1470 unsigned int part; 1471 uint8_t *pdst; 1472 1473 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1474 if (!MLX5_TXOFF_CONFIG(MPW)) { 1475 /* Store the descriptor byte counter for eMPW sessions. */ 1476 dseg->bcount = rte_cpu_to_be_32 1477 ((len + sizeof(struct rte_vlan_hdr)) | 1478 MLX5_ETH_WQE_DATA_INLINE); 1479 pdst = &dseg->inline_data[0]; 1480 } else { 1481 /* The entire legacy MPW session counter is stored on close. */ 1482 pdst = (uint8_t *)dseg; 1483 } 1484 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1485 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1486 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1487 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1488 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1489 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1490 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1491 pdst = (uint8_t *)txq->wqes; 1492 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1493 loc->mbuf->vlan_tci); 1494 pdst += sizeof(struct rte_vlan_hdr); 1495 /* 1496 * The WQEBB space availability is checked by caller. 1497 * Here we should be aware of WQE ring buffer wraparound only. 1498 */ 1499 part = (uint8_t *)txq->wqes_end - pdst; 1500 part = RTE_MIN(part, len); 1501 do { 1502 rte_memcpy(pdst, buf, part); 1503 len -= part; 1504 if (likely(!len)) { 1505 pdst += part; 1506 if (!MLX5_TXOFF_CONFIG(MPW)) 1507 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1508 /* Note: no final wraparound check here. */ 1509 return (struct mlx5_wqe_dseg *)pdst; 1510 } 1511 pdst = (uint8_t *)txq->wqes; 1512 buf += part; 1513 part = len; 1514 } while (true); 1515 } 1516 1517 /** 1518 * Build the Ethernet Segment with optionally inlined data with 1519 * VLAN insertion and following Data Segments (if any) from 1520 * multi-segment packet. Used by ordinary send and TSO. 1521 * 1522 * @param txq 1523 * Pointer to TX queue structure. 1524 * @param loc 1525 * Pointer to burst routine local context. 1526 * @param wqe 1527 * Pointer to WQE to fill with built Ethernet/Data Segments. 1528 * @param vlan 1529 * Length of VLAN header to insert, 0 means no VLAN insertion. 1530 * @param inlen 1531 * Data length to inline. For TSO this parameter specifies exact value, 1532 * for ordinary send routine can be aligned by caller to provide better WQE 1533 * space saving and data buffer start address alignment. 1534 * This length includes VLAN header being inserted. 1535 * @param tso 1536 * Zero means ordinary send, inlined data can be extended, 1537 * otherwise this is TSO, inlined data length is fixed. 1538 * @param olx 1539 * Configured Tx offloads mask. It is fully defined at 1540 * compile time and may be used for optimization. 1541 * 1542 * @return 1543 * Actual size of built WQE in segments. 1544 */ 1545 static __rte_always_inline unsigned int 1546 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1547 struct mlx5_txq_local *__rte_restrict loc, 1548 struct mlx5_wqe *__rte_restrict wqe, 1549 unsigned int vlan, 1550 unsigned int inlen, 1551 unsigned int tso, 1552 unsigned int olx __rte_unused) 1553 { 1554 struct mlx5_wqe_dseg *__rte_restrict dseg; 1555 unsigned int ds; 1556 1557 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1558 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1559 loc->mbuf_off = 0; 1560 1561 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1562 if (!loc->mbuf_nseg) 1563 goto dseg_done; 1564 /* 1565 * There are still some mbuf remaining, not inlined. 1566 * The first mbuf may be partially inlined and we 1567 * must process the possible non-zero data offset. 1568 */ 1569 if (loc->mbuf_off) { 1570 unsigned int dlen; 1571 uint8_t *dptr; 1572 1573 /* 1574 * Exhausted packets must be dropped before. 1575 * Non-zero offset means there are some data 1576 * remained in the packet. 1577 */ 1578 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1579 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1580 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1581 loc->mbuf_off); 1582 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1583 /* 1584 * Build the pointer/minimal Data Segment. 1585 * Do ring buffer wrapping check in advance. 1586 */ 1587 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1588 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1589 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1590 /* Store the mbuf to be freed on completion. */ 1591 MLX5_ASSERT(loc->elts_free); 1592 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1593 --loc->elts_free; 1594 ++dseg; 1595 if (--loc->mbuf_nseg == 0) 1596 goto dseg_done; 1597 loc->mbuf = loc->mbuf->next; 1598 loc->mbuf_off = 0; 1599 } 1600 do { 1601 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1602 struct rte_mbuf *mbuf; 1603 1604 /* Zero length segment found, just skip. */ 1605 mbuf = loc->mbuf; 1606 loc->mbuf = loc->mbuf->next; 1607 rte_pktmbuf_free_seg(mbuf); 1608 if (--loc->mbuf_nseg == 0) 1609 break; 1610 } else { 1611 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1612 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1613 mlx5_tx_dseg_iptr 1614 (txq, loc, dseg, 1615 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1616 rte_pktmbuf_data_len(loc->mbuf), olx); 1617 MLX5_ASSERT(loc->elts_free); 1618 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1619 --loc->elts_free; 1620 ++dseg; 1621 if (--loc->mbuf_nseg == 0) 1622 break; 1623 loc->mbuf = loc->mbuf->next; 1624 } 1625 } while (true); 1626 1627 dseg_done: 1628 /* Calculate actual segments used from the dseg pointer. */ 1629 if ((uintptr_t)wqe < (uintptr_t)dseg) 1630 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1631 else 1632 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1633 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1634 return ds; 1635 } 1636 1637 /** 1638 * The routine checks timestamp flag in the current packet, 1639 * and push WAIT WQE into the queue if scheduling is required. 1640 * 1641 * @param txq 1642 * Pointer to TX queue structure. 1643 * @param loc 1644 * Pointer to burst routine local context. 1645 * @param elts 1646 * Number of free elements in elts buffer to be checked, for zero 1647 * value the check is optimized out by compiler. 1648 * @param olx 1649 * Configured Tx offloads mask. It is fully defined at 1650 * compile time and may be used for optimization. 1651 * 1652 * @return 1653 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1654 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1655 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1656 * Local context variables partially updated. 1657 */ 1658 static __rte_always_inline enum mlx5_txcmp_code 1659 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1660 struct mlx5_txq_local *restrict loc, 1661 uint16_t elts, 1662 unsigned int olx) 1663 { 1664 if (MLX5_TXOFF_CONFIG(TXPP) && 1665 loc->mbuf->ol_flags & txq->ts_mask) { 1666 struct mlx5_dev_ctx_shared *sh; 1667 struct mlx5_wqe *wqe; 1668 uint64_t ts; 1669 1670 /* 1671 * Estimate the required space quickly and roughly. 1672 * We would like to ensure the packet can be pushed 1673 * to the queue and we won't get the orphan WAIT WQE. 1674 */ 1675 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1676 loc->elts_free < elts) 1677 return MLX5_TXCMP_CODE_EXIT; 1678 /* Convert the timestamp into completion to wait. */ 1679 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1680 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1681 sh = txq->sh; 1682 if (txq->wait_on_time) { 1683 /* The wait on time capability should be used. */ 1684 ts -= sh->txpp.skew; 1685 mlx5_tx_cseg_init(txq, loc, wqe, 1686 1 + sizeof(struct mlx5_wqe_wseg) / 1687 MLX5_WSEG_SIZE, 1688 MLX5_OPCODE_WAIT | 1689 MLX5_OPC_MOD_WAIT_TIME << 24, olx); 1690 mlx5_tx_wseg_init(txq, loc, wqe, ts, olx); 1691 } else { 1692 /* Legacy cross-channel operation should be used. */ 1693 int32_t wci; 1694 1695 wci = mlx5_txpp_convert_tx_ts(sh, ts); 1696 if (unlikely(wci < 0)) 1697 return MLX5_TXCMP_CODE_SINGLE; 1698 /* Build the WAIT WQE with specified completion. */ 1699 mlx5_tx_cseg_init(txq, loc, wqe, 1700 1 + sizeof(struct mlx5_wqe_qseg) / 1701 MLX5_WSEG_SIZE, 1702 MLX5_OPCODE_WAIT | 1703 MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx); 1704 mlx5_tx_qseg_init(txq, loc, wqe, wci, olx); 1705 } 1706 ++txq->wqe_ci; 1707 --loc->wqe_free; 1708 return MLX5_TXCMP_CODE_MULTI; 1709 } 1710 return MLX5_TXCMP_CODE_SINGLE; 1711 } 1712 1713 /** 1714 * Tx one packet function for multi-segment TSO. Supports all 1715 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1716 * sends one packet per WQE. 1717 * 1718 * This routine is responsible for storing processed mbuf 1719 * into elts ring buffer and update elts_head. 1720 * 1721 * @param txq 1722 * Pointer to TX queue structure. 1723 * @param loc 1724 * Pointer to burst routine local context. 1725 * @param olx 1726 * Configured Tx offloads mask. It is fully defined at 1727 * compile time and may be used for optimization. 1728 * 1729 * @return 1730 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1731 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1732 * Local context variables partially updated. 1733 */ 1734 static __rte_always_inline enum mlx5_txcmp_code 1735 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1736 struct mlx5_txq_local *__rte_restrict loc, 1737 unsigned int olx) 1738 { 1739 struct mlx5_wqe *__rte_restrict wqe; 1740 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1741 1742 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1743 if (MLX5_TXOFF_CONFIG(TXPP)) { 1744 enum mlx5_txcmp_code wret; 1745 1746 /* Generate WAIT for scheduling if requested. */ 1747 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1748 if (wret == MLX5_TXCMP_CODE_EXIT) 1749 return MLX5_TXCMP_CODE_EXIT; 1750 if (wret == MLX5_TXCMP_CODE_ERROR) 1751 return MLX5_TXCMP_CODE_ERROR; 1752 } 1753 /* 1754 * Calculate data length to be inlined to estimate 1755 * the required space in WQE ring buffer. 1756 */ 1757 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1758 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1759 vlan = sizeof(struct rte_vlan_hdr); 1760 inlen = loc->mbuf->l2_len + vlan + 1761 loc->mbuf->l3_len + loc->mbuf->l4_len; 1762 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1763 return MLX5_TXCMP_CODE_ERROR; 1764 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1765 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1766 /* Packet must contain all TSO headers. */ 1767 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1768 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1769 inlen > (dlen + vlan))) 1770 return MLX5_TXCMP_CODE_ERROR; 1771 /* 1772 * Check whether there are enough free WQEBBs: 1773 * - Control Segment 1774 * - Ethernet Segment 1775 * - First Segment of inlined Ethernet data 1776 * - ... data continued ... 1777 * - Data Segments of pointer/min inline type 1778 */ 1779 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1780 MLX5_ESEG_MIN_INLINE_SIZE + 1781 MLX5_WSEG_SIZE + 1782 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1783 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1784 return MLX5_TXCMP_CODE_EXIT; 1785 /* Check for maximal WQE size. */ 1786 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1787 return MLX5_TXCMP_CODE_ERROR; 1788 #ifdef MLX5_PMD_SOFT_COUNTERS 1789 /* Update sent data bytes/packets counters. */ 1790 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1791 loc->mbuf->tso_segsz; 1792 /* 1793 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1794 * from loc->pkts_sent field. 1795 */ 1796 --ntcp; 1797 txq->stats.opackets += ntcp; 1798 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1799 #endif 1800 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1801 loc->wqe_last = wqe; 1802 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1803 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1804 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1805 txq->wqe_ci += (ds + 3) / 4; 1806 loc->wqe_free -= (ds + 3) / 4; 1807 return MLX5_TXCMP_CODE_MULTI; 1808 } 1809 1810 /** 1811 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1812 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1813 * without any data inlining in Ethernet Segment. 1814 * 1815 * This routine is responsible for storing processed mbuf 1816 * into elts ring buffer and update elts_head. 1817 * 1818 * @param txq 1819 * Pointer to TX queue structure. 1820 * @param loc 1821 * Pointer to burst routine local context. 1822 * @param olx 1823 * Configured Tx offloads mask. It is fully defined at 1824 * compile time and may be used for optimization. 1825 * 1826 * @return 1827 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1828 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1829 * Local context variables partially updated. 1830 */ 1831 static __rte_always_inline enum mlx5_txcmp_code 1832 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1833 struct mlx5_txq_local *__rte_restrict loc, 1834 unsigned int olx) 1835 { 1836 struct mlx5_wqe_dseg *__rte_restrict dseg; 1837 struct mlx5_wqe *__rte_restrict wqe; 1838 unsigned int ds, nseg; 1839 1840 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1841 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1842 if (MLX5_TXOFF_CONFIG(TXPP)) { 1843 enum mlx5_txcmp_code wret; 1844 1845 /* Generate WAIT for scheduling if requested. */ 1846 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 1847 if (wret == MLX5_TXCMP_CODE_EXIT) 1848 return MLX5_TXCMP_CODE_EXIT; 1849 if (wret == MLX5_TXCMP_CODE_ERROR) 1850 return MLX5_TXCMP_CODE_ERROR; 1851 } 1852 /* 1853 * No inline at all, it means the CPU cycles saving is prioritized at 1854 * configuration, we should not copy any packet data to WQE. 1855 */ 1856 nseg = NB_SEGS(loc->mbuf); 1857 ds = 2 + nseg; 1858 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1859 return MLX5_TXCMP_CODE_EXIT; 1860 /* Check for maximal WQE size. */ 1861 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 1862 return MLX5_TXCMP_CODE_ERROR; 1863 /* 1864 * Some Tx offloads may cause an error if packet is not long enough, 1865 * check against assumed minimal length. 1866 */ 1867 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1868 return MLX5_TXCMP_CODE_ERROR; 1869 #ifdef MLX5_PMD_SOFT_COUNTERS 1870 /* Update sent data bytes counter. */ 1871 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1872 if (MLX5_TXOFF_CONFIG(VLAN) && 1873 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1874 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1875 #endif 1876 /* 1877 * SEND WQE, one WQEBB: 1878 * - Control Segment, SEND opcode 1879 * - Ethernet Segment, optional VLAN, no inline 1880 * - Data Segments, pointer only type 1881 */ 1882 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1883 loc->wqe_last = wqe; 1884 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1885 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1886 dseg = &wqe->dseg[0]; 1887 do { 1888 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1889 struct rte_mbuf *mbuf; 1890 1891 /* 1892 * Zero length segment found, have to correct total 1893 * size of WQE in segments. 1894 * It is supposed to be rare occasion, so in normal 1895 * case (no zero length segments) we avoid extra 1896 * writing to the Control Segment. 1897 */ 1898 --ds; 1899 wqe->cseg.sq_ds -= RTE_BE32(1); 1900 mbuf = loc->mbuf; 1901 loc->mbuf = mbuf->next; 1902 rte_pktmbuf_free_seg(mbuf); 1903 if (--nseg == 0) 1904 break; 1905 } else { 1906 mlx5_tx_dseg_ptr 1907 (txq, loc, dseg, 1908 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1909 rte_pktmbuf_data_len(loc->mbuf), olx); 1910 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1911 --loc->elts_free; 1912 if (--nseg == 0) 1913 break; 1914 ++dseg; 1915 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1916 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1917 loc->mbuf = loc->mbuf->next; 1918 } 1919 } while (true); 1920 txq->wqe_ci += (ds + 3) / 4; 1921 loc->wqe_free -= (ds + 3) / 4; 1922 return MLX5_TXCMP_CODE_MULTI; 1923 } 1924 1925 /** 1926 * Tx one packet function for multi-segment SEND. Supports all 1927 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1928 * sends one packet per WQE, with data inlining in 1929 * Ethernet Segment and minimal Data Segments. 1930 * 1931 * This routine is responsible for storing processed mbuf 1932 * into elts ring buffer and update elts_head. 1933 * 1934 * @param txq 1935 * Pointer to TX queue structure. 1936 * @param loc 1937 * Pointer to burst routine local context. 1938 * @param olx 1939 * Configured Tx offloads mask. It is fully defined at 1940 * compile time and may be used for optimization. 1941 * 1942 * @return 1943 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1944 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1945 * Local context variables partially updated. 1946 */ 1947 static __rte_always_inline enum mlx5_txcmp_code 1948 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 1949 struct mlx5_txq_local *__rte_restrict loc, 1950 unsigned int olx) 1951 { 1952 struct mlx5_wqe *__rte_restrict wqe; 1953 unsigned int ds, inlen, dlen, vlan = 0; 1954 1955 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 1956 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1957 MLX5_ASSERT(loc->elts_free >= NB_SEGS(loc->mbuf)); 1958 /* 1959 * First calculate data length to be inlined 1960 * to estimate the required space for WQE. 1961 */ 1962 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1963 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1964 vlan = sizeof(struct rte_vlan_hdr); 1965 inlen = dlen + vlan; 1966 /* Check against minimal length. */ 1967 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 1968 return MLX5_TXCMP_CODE_ERROR; 1969 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1970 if (inlen > txq->inlen_send || 1971 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1972 struct rte_mbuf *mbuf; 1973 unsigned int nxlen; 1974 uintptr_t start; 1975 1976 mbuf = loc->mbuf; 1977 nxlen = rte_pktmbuf_data_len(mbuf); 1978 /* 1979 * Packet length exceeds the allowed inline data length, 1980 * check whether the minimal inlining is required. 1981 */ 1982 if (txq->inlen_mode) { 1983 MLX5_ASSERT(txq->inlen_mode >= 1984 MLX5_ESEG_MIN_INLINE_SIZE); 1985 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 1986 inlen = RTE_MIN(txq->inlen_mode, inlen); 1987 } else if (vlan && !txq->vlan_en) { 1988 /* 1989 * VLAN insertion is requested and hardware does not 1990 * support the offload, will do with software inline. 1991 */ 1992 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 1993 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 1994 nxlen > txq->inlen_send) { 1995 return mlx5_tx_packet_multi_send(txq, loc, olx); 1996 } else { 1997 goto do_first; 1998 } 1999 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2000 goto do_build; 2001 /* 2002 * Now we know the minimal amount of data is requested 2003 * to inline. Check whether we should inline the buffers 2004 * from the chain beginning to eliminate some mbufs. 2005 */ 2006 if (unlikely(nxlen <= txq->inlen_send)) { 2007 /* We can inline first mbuf at least. */ 2008 if (nxlen < inlen) { 2009 unsigned int smlen; 2010 2011 /* Scan mbufs till inlen filled. */ 2012 do { 2013 smlen = nxlen; 2014 mbuf = NEXT(mbuf); 2015 MLX5_ASSERT(mbuf); 2016 nxlen = rte_pktmbuf_data_len(mbuf); 2017 nxlen += smlen; 2018 } while (unlikely(nxlen < inlen)); 2019 if (unlikely(nxlen > txq->inlen_send)) { 2020 /* We cannot inline entire mbuf. */ 2021 smlen = inlen - smlen; 2022 start = rte_pktmbuf_mtod_offset 2023 (mbuf, uintptr_t, smlen); 2024 goto do_align; 2025 } 2026 } 2027 do_first: 2028 do { 2029 inlen = nxlen; 2030 mbuf = NEXT(mbuf); 2031 /* There should be not end of packet. */ 2032 MLX5_ASSERT(mbuf); 2033 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2034 break; 2035 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 2036 } while (unlikely(nxlen < txq->inlen_send)); 2037 } 2038 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 2039 /* 2040 * Check whether we can do inline to align start 2041 * address of data buffer to cacheline. 2042 */ 2043 do_align: 2044 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 2045 if (unlikely(start)) { 2046 start += inlen; 2047 if (start <= txq->inlen_send) 2048 inlen = start; 2049 } 2050 } 2051 /* 2052 * Check whether there are enough free WQEBBs: 2053 * - Control Segment 2054 * - Ethernet Segment 2055 * - First Segment of inlined Ethernet data 2056 * - ... data continued ... 2057 * - Data Segments of pointer/min inline type 2058 * 2059 * Estimate the number of Data Segments conservatively, 2060 * supposing no any mbufs is being freed during inlining. 2061 */ 2062 do_build: 2063 if (MLX5_TXOFF_CONFIG(TXPP)) { 2064 enum mlx5_txcmp_code wret; 2065 2066 /* Generate WAIT for scheduling if requested. */ 2067 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2068 if (wret == MLX5_TXCMP_CODE_EXIT) 2069 return MLX5_TXCMP_CODE_EXIT; 2070 if (wret == MLX5_TXCMP_CODE_ERROR) 2071 return MLX5_TXCMP_CODE_ERROR; 2072 } 2073 MLX5_ASSERT(inlen <= txq->inlen_send); 2074 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2075 MLX5_ESEG_MIN_INLINE_SIZE + 2076 MLX5_WSEG_SIZE + 2077 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2078 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2079 return MLX5_TXCMP_CODE_EXIT; 2080 /* Check for maximal WQE size. */ 2081 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) { 2082 /* Check if we can adjust the inline length. */ 2083 if (unlikely(txq->inlen_mode)) { 2084 ds = NB_SEGS(loc->mbuf) + 2 + 2085 (txq->inlen_mode - 2086 MLX5_ESEG_MIN_INLINE_SIZE + 2087 MLX5_WSEG_SIZE + 2088 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2089 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ds)) 2090 return MLX5_TXCMP_CODE_ERROR; 2091 } 2092 /* We have lucky opportunity to adjust. */ 2093 inlen = RTE_MIN(inlen, MLX5_WQE_SIZE_MAX - 2094 MLX5_WSEG_SIZE * 2 - 2095 MLX5_WSEG_SIZE * NB_SEGS(loc->mbuf) - 2096 MLX5_WSEG_SIZE + 2097 MLX5_ESEG_MIN_INLINE_SIZE); 2098 } 2099 #ifdef MLX5_PMD_SOFT_COUNTERS 2100 /* Update sent data bytes/packets counters. */ 2101 txq->stats.obytes += dlen + vlan; 2102 #endif 2103 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2104 loc->wqe_last = wqe; 2105 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2106 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2107 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2108 txq->wqe_ci += (ds + 3) / 4; 2109 loc->wqe_free -= (ds + 3) / 4; 2110 return MLX5_TXCMP_CODE_MULTI; 2111 } 2112 2113 /** 2114 * Tx burst function for multi-segment packets. Supports all 2115 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2116 * sends one packet per WQE. Function stops sending if it 2117 * encounters the single-segment packet. 2118 * 2119 * This routine is responsible for storing processed mbuf 2120 * into elts ring buffer and update elts_head. 2121 * 2122 * @param txq 2123 * Pointer to TX queue structure. 2124 * @param[in] pkts 2125 * Packets to transmit. 2126 * @param pkts_n 2127 * Number of packets in array. 2128 * @param loc 2129 * Pointer to burst routine local context. 2130 * @param olx 2131 * Configured Tx offloads mask. It is fully defined at 2132 * compile time and may be used for optimization. 2133 * 2134 * @return 2135 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2136 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2137 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2138 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2139 * Local context variables updated. 2140 */ 2141 static __rte_always_inline enum mlx5_txcmp_code 2142 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2143 struct rte_mbuf **__rte_restrict pkts, 2144 unsigned int pkts_n, 2145 struct mlx5_txq_local *__rte_restrict loc, 2146 unsigned int olx) 2147 { 2148 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2149 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2150 pkts += loc->pkts_sent + 1; 2151 pkts_n -= loc->pkts_sent; 2152 for (;;) { 2153 enum mlx5_txcmp_code ret; 2154 2155 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2156 /* 2157 * Estimate the number of free elts quickly but conservatively. 2158 * Some segment may be fully inlined and freed, 2159 * ignore this here - precise estimation is costly. 2160 */ 2161 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2162 return MLX5_TXCMP_CODE_EXIT; 2163 if (MLX5_TXOFF_CONFIG(TSO) && 2164 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2165 /* Proceed with multi-segment TSO. */ 2166 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2167 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2168 /* Proceed with multi-segment SEND with inlining. */ 2169 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2170 } else { 2171 /* Proceed with multi-segment SEND w/o inlining. */ 2172 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2173 } 2174 if (ret == MLX5_TXCMP_CODE_EXIT) 2175 return MLX5_TXCMP_CODE_EXIT; 2176 if (ret == MLX5_TXCMP_CODE_ERROR) 2177 return MLX5_TXCMP_CODE_ERROR; 2178 /* WQE is built, go to the next packet. */ 2179 ++loc->pkts_sent; 2180 --pkts_n; 2181 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2182 return MLX5_TXCMP_CODE_EXIT; 2183 loc->mbuf = *pkts++; 2184 if (pkts_n > 1) 2185 rte_prefetch0(*pkts); 2186 if (likely(NB_SEGS(loc->mbuf) > 1)) 2187 continue; 2188 /* Here ends the series of multi-segment packets. */ 2189 if (MLX5_TXOFF_CONFIG(TSO) && 2190 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2191 return MLX5_TXCMP_CODE_TSO; 2192 return MLX5_TXCMP_CODE_SINGLE; 2193 } 2194 MLX5_ASSERT(false); 2195 } 2196 2197 /** 2198 * Tx burst function for single-segment packets with TSO. 2199 * Supports all types of Tx offloads, except multi-packets. 2200 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2201 * Function stops sending if it encounters the multi-segment 2202 * packet or packet without TSO requested. 2203 * 2204 * The routine is responsible for storing processed mbuf into elts ring buffer 2205 * and update elts_head if inline offloads is requested due to possible early 2206 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2207 * 2208 * @param txq 2209 * Pointer to TX queue structure. 2210 * @param[in] pkts 2211 * Packets to transmit. 2212 * @param pkts_n 2213 * Number of packets in array. 2214 * @param loc 2215 * Pointer to burst routine local context. 2216 * @param olx 2217 * Configured Tx offloads mask. It is fully defined at 2218 * compile time and may be used for optimization. 2219 * 2220 * @return 2221 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2222 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2223 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2224 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2225 * Local context variables updated. 2226 */ 2227 static __rte_always_inline enum mlx5_txcmp_code 2228 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2229 struct rte_mbuf **__rte_restrict pkts, 2230 unsigned int pkts_n, 2231 struct mlx5_txq_local *__rte_restrict loc, 2232 unsigned int olx) 2233 { 2234 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2235 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2236 pkts += loc->pkts_sent + 1; 2237 pkts_n -= loc->pkts_sent; 2238 for (;;) { 2239 struct mlx5_wqe_dseg *__rte_restrict dseg; 2240 struct mlx5_wqe *__rte_restrict wqe; 2241 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2242 uint8_t *dptr; 2243 2244 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2245 if (MLX5_TXOFF_CONFIG(TXPP)) { 2246 enum mlx5_txcmp_code wret; 2247 2248 /* Generate WAIT for scheduling if requested. */ 2249 wret = mlx5_tx_schedule_send(txq, loc, 1, olx); 2250 if (wret == MLX5_TXCMP_CODE_EXIT) 2251 return MLX5_TXCMP_CODE_EXIT; 2252 if (wret == MLX5_TXCMP_CODE_ERROR) 2253 return MLX5_TXCMP_CODE_ERROR; 2254 } 2255 dlen = rte_pktmbuf_data_len(loc->mbuf); 2256 if (MLX5_TXOFF_CONFIG(VLAN) && 2257 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2258 vlan = sizeof(struct rte_vlan_hdr); 2259 } 2260 /* 2261 * First calculate the WQE size to check 2262 * whether we have enough space in ring buffer. 2263 */ 2264 hlen = loc->mbuf->l2_len + vlan + 2265 loc->mbuf->l3_len + loc->mbuf->l4_len; 2266 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2267 return MLX5_TXCMP_CODE_ERROR; 2268 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2269 hlen += loc->mbuf->outer_l2_len + 2270 loc->mbuf->outer_l3_len; 2271 /* Segment must contain all TSO headers. */ 2272 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2273 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2274 hlen > (dlen + vlan))) 2275 return MLX5_TXCMP_CODE_ERROR; 2276 /* 2277 * Check whether there are enough free WQEBBs: 2278 * - Control Segment 2279 * - Ethernet Segment 2280 * - First Segment of inlined Ethernet data 2281 * - ... data continued ... 2282 * - Finishing Data Segment of pointer type 2283 */ 2284 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2285 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2286 if (loc->wqe_free < ((ds + 3) / 4)) 2287 return MLX5_TXCMP_CODE_EXIT; 2288 #ifdef MLX5_PMD_SOFT_COUNTERS 2289 /* Update sent data bytes/packets counters. */ 2290 ntcp = (dlen + vlan - hlen + 2291 loc->mbuf->tso_segsz - 1) / 2292 loc->mbuf->tso_segsz; 2293 /* 2294 * One will be added for mbuf itself at the end 2295 * of the mlx5_tx_burst from loc->pkts_sent field. 2296 */ 2297 --ntcp; 2298 txq->stats.opackets += ntcp; 2299 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2300 #endif 2301 /* 2302 * Build the TSO WQE: 2303 * - Control Segment 2304 * - Ethernet Segment with hlen bytes inlined 2305 * - Data Segment of pointer type 2306 */ 2307 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2308 loc->wqe_last = wqe; 2309 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2310 MLX5_OPCODE_TSO, olx); 2311 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2312 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2313 dlen -= hlen - vlan; 2314 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2315 /* 2316 * WQE is built, update the loop parameters 2317 * and go to the next packet. 2318 */ 2319 txq->wqe_ci += (ds + 3) / 4; 2320 loc->wqe_free -= (ds + 3) / 4; 2321 if (MLX5_TXOFF_CONFIG(INLINE)) 2322 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2323 --loc->elts_free; 2324 ++loc->pkts_sent; 2325 --pkts_n; 2326 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2327 return MLX5_TXCMP_CODE_EXIT; 2328 loc->mbuf = *pkts++; 2329 if (pkts_n > 1) 2330 rte_prefetch0(*pkts); 2331 if (MLX5_TXOFF_CONFIG(MULTI) && 2332 unlikely(NB_SEGS(loc->mbuf) > 1)) 2333 return MLX5_TXCMP_CODE_MULTI; 2334 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2335 return MLX5_TXCMP_CODE_SINGLE; 2336 /* Continue with the next TSO packet. */ 2337 } 2338 MLX5_ASSERT(false); 2339 } 2340 2341 /** 2342 * Analyze the packet and select the best method to send. 2343 * 2344 * @param txq 2345 * Pointer to TX queue structure. 2346 * @param loc 2347 * Pointer to burst routine local context. 2348 * @param olx 2349 * Configured Tx offloads mask. It is fully defined at 2350 * compile time and may be used for optimization. 2351 * @param newp 2352 * The predefined flag whether do complete check for 2353 * multi-segment packets and TSO. 2354 * 2355 * @return 2356 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2357 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2358 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2359 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2360 */ 2361 static __rte_always_inline enum mlx5_txcmp_code 2362 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2363 struct mlx5_txq_local *__rte_restrict loc, 2364 unsigned int olx, 2365 bool newp) 2366 { 2367 /* Check for multi-segment packet. */ 2368 if (newp && 2369 MLX5_TXOFF_CONFIG(MULTI) && 2370 unlikely(NB_SEGS(loc->mbuf) > 1)) 2371 return MLX5_TXCMP_CODE_MULTI; 2372 /* Check for TSO packet. */ 2373 if (newp && 2374 MLX5_TXOFF_CONFIG(TSO) && 2375 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2376 return MLX5_TXCMP_CODE_TSO; 2377 /* Check if eMPW is enabled at all. */ 2378 if (!MLX5_TXOFF_CONFIG(EMPW)) 2379 return MLX5_TXCMP_CODE_SINGLE; 2380 /* Check if eMPW can be engaged. */ 2381 if (MLX5_TXOFF_CONFIG(VLAN) && 2382 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2383 (!MLX5_TXOFF_CONFIG(INLINE) || 2384 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2385 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2386 /* 2387 * eMPW does not support VLAN insertion offload, we have to 2388 * inline the entire packet but packet is too long for inlining. 2389 */ 2390 return MLX5_TXCMP_CODE_SINGLE; 2391 } 2392 return MLX5_TXCMP_CODE_EMPW; 2393 } 2394 2395 /** 2396 * Check the next packet attributes to match with the eMPW batch ones. 2397 * In addition, for legacy MPW the packet length is checked either. 2398 * 2399 * @param txq 2400 * Pointer to TX queue structure. 2401 * @param es 2402 * Pointer to Ethernet Segment of eMPW batch. 2403 * @param loc 2404 * Pointer to burst routine local context. 2405 * @param dlen 2406 * Length of previous packet in MPW descriptor. 2407 * @param olx 2408 * Configured Tx offloads mask. It is fully defined at 2409 * compile time and may be used for optimization. 2410 * 2411 * @return 2412 * true - packet match with eMPW batch attributes. 2413 * false - no match, eMPW should be restarted. 2414 */ 2415 static __rte_always_inline bool 2416 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2417 struct mlx5_wqe_eseg *__rte_restrict es, 2418 struct mlx5_txq_local *__rte_restrict loc, 2419 uint32_t dlen, 2420 unsigned int olx) 2421 { 2422 uint8_t swp_flags = 0; 2423 2424 /* Compare the checksum flags, if any. */ 2425 if (MLX5_TXOFF_CONFIG(CSUM) && 2426 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2427 return false; 2428 /* Compare the Software Parser offsets and flags. */ 2429 if (MLX5_TXOFF_CONFIG(SWP) && 2430 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2431 es->swp_flags != swp_flags)) 2432 return false; 2433 /* Fill metadata field if needed. */ 2434 if (MLX5_TXOFF_CONFIG(METADATA) && 2435 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2436 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2437 return false; 2438 /* Legacy MPW can send packets with the same length only. */ 2439 if (MLX5_TXOFF_CONFIG(MPW) && 2440 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2441 return false; 2442 /* There must be no VLAN packets in eMPW loop. */ 2443 if (MLX5_TXOFF_CONFIG(VLAN)) 2444 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2445 /* Check if the scheduling is requested. */ 2446 if (MLX5_TXOFF_CONFIG(TXPP) && 2447 loc->mbuf->ol_flags & txq->ts_mask) 2448 return false; 2449 return true; 2450 } 2451 2452 /** 2453 * Update send loop variables and WQE for eMPW loop without data inlining. 2454 * Number of Data Segments is equal to the number of sent packets. 2455 * 2456 * @param txq 2457 * Pointer to TX queue structure. 2458 * @param loc 2459 * Pointer to burst routine local context. 2460 * @param ds 2461 * Number of packets/Data Segments/Packets. 2462 * @param slen 2463 * Accumulated statistics, bytes sent. 2464 * @param olx 2465 * Configured Tx offloads mask. It is fully defined at 2466 * compile time and may be used for optimization. 2467 * 2468 * @return 2469 * true - packet match with eMPW batch attributes. 2470 * false - no match, eMPW should be restarted. 2471 */ 2472 static __rte_always_inline void 2473 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2474 struct mlx5_txq_local *__rte_restrict loc, 2475 unsigned int ds, 2476 unsigned int slen, 2477 unsigned int olx __rte_unused) 2478 { 2479 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2480 #ifdef MLX5_PMD_SOFT_COUNTERS 2481 /* Update sent data bytes counter. */ 2482 txq->stats.obytes += slen; 2483 #else 2484 (void)slen; 2485 #endif 2486 loc->elts_free -= ds; 2487 loc->pkts_sent += ds; 2488 ds += 2; 2489 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2490 txq->wqe_ci += (ds + 3) / 4; 2491 loc->wqe_free -= (ds + 3) / 4; 2492 } 2493 2494 /** 2495 * Update send loop variables and WQE for eMPW loop with data inlining. 2496 * Gets the size of pushed descriptors and data to the WQE. 2497 * 2498 * @param txq 2499 * Pointer to TX queue structure. 2500 * @param loc 2501 * Pointer to burst routine local context. 2502 * @param len 2503 * Total size of descriptor/data in bytes. 2504 * @param slen 2505 * Accumulated statistics, data bytes sent. 2506 * @param wqem 2507 * The base WQE for the eMPW/MPW descriptor. 2508 * @param olx 2509 * Configured Tx offloads mask. It is fully defined at 2510 * compile time and may be used for optimization. 2511 * 2512 * @return 2513 * true - packet match with eMPW batch attributes. 2514 * false - no match, eMPW should be restarted. 2515 */ 2516 static __rte_always_inline void 2517 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2518 struct mlx5_txq_local *__rte_restrict loc, 2519 unsigned int len, 2520 unsigned int slen, 2521 struct mlx5_wqe *__rte_restrict wqem, 2522 unsigned int olx __rte_unused) 2523 { 2524 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2525 2526 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2527 #ifdef MLX5_PMD_SOFT_COUNTERS 2528 /* Update sent data bytes counter. */ 2529 txq->stats.obytes += slen; 2530 #else 2531 (void)slen; 2532 #endif 2533 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2534 /* 2535 * If the legacy MPW session contains the inline packets 2536 * we should set the only inline data segment length 2537 * and align the total length to the segment size. 2538 */ 2539 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2540 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2541 MLX5_ETH_WQE_DATA_INLINE); 2542 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2543 } else { 2544 /* 2545 * The session is not legacy MPW or contains the 2546 * data buffer pointer segments. 2547 */ 2548 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2549 len = len / MLX5_WSEG_SIZE + 2; 2550 } 2551 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2552 txq->wqe_ci += (len + 3) / 4; 2553 loc->wqe_free -= (len + 3) / 4; 2554 loc->wqe_last = wqem; 2555 } 2556 2557 /** 2558 * The set of Tx burst functions for single-segment packets without TSO 2559 * and with Multi-Packet Writing feature support. 2560 * Supports all types of Tx offloads, except multi-packets and TSO. 2561 * 2562 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2563 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2564 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2565 * placed in WQE. 2566 * 2567 * Functions stop sending if it encounters the multi-segment packet or packet 2568 * with TSO requested. 2569 * 2570 * The routines are responsible for storing processed mbuf into elts ring buffer 2571 * and update elts_head if inlining offload is requested. Otherwise the copying 2572 * mbufs to elts can be postponed and completed at the end of burst routine. 2573 * 2574 * @param txq 2575 * Pointer to TX queue structure. 2576 * @param[in] pkts 2577 * Packets to transmit. 2578 * @param pkts_n 2579 * Number of packets in array. 2580 * @param loc 2581 * Pointer to burst routine local context. 2582 * @param olx 2583 * Configured Tx offloads mask. It is fully defined at 2584 * compile time and may be used for optimization. 2585 * 2586 * @return 2587 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2588 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2589 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2590 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2591 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2592 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2593 * 2594 * Local context variables updated. 2595 * 2596 * 2597 * The routine sends packets with MLX5_OPCODE_EMPW 2598 * without inlining, this is dedicated optimized branch. 2599 * No VLAN insertion is supported. 2600 */ 2601 static __rte_always_inline enum mlx5_txcmp_code 2602 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2603 struct rte_mbuf **__rte_restrict pkts, 2604 unsigned int pkts_n, 2605 struct mlx5_txq_local *__rte_restrict loc, 2606 unsigned int olx) 2607 { 2608 /* 2609 * Subroutine is the part of mlx5_tx_burst_single() and sends 2610 * single-segment packet with eMPW opcode without data inlining. 2611 */ 2612 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2613 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2614 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2615 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2616 pkts += loc->pkts_sent + 1; 2617 pkts_n -= loc->pkts_sent; 2618 for (;;) { 2619 struct mlx5_wqe_dseg *__rte_restrict dseg; 2620 struct mlx5_wqe_eseg *__rte_restrict eseg; 2621 enum mlx5_txcmp_code ret; 2622 unsigned int part, loop; 2623 unsigned int slen = 0; 2624 2625 next_empw: 2626 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2627 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2628 MLX5_MPW_MAX_PACKETS : 2629 MLX5_EMPW_MAX_PACKETS); 2630 if (unlikely(loc->elts_free < part)) { 2631 /* We have no enough elts to save all mbufs. */ 2632 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2633 return MLX5_TXCMP_CODE_EXIT; 2634 /* But we still able to send at least minimal eMPW. */ 2635 part = loc->elts_free; 2636 } 2637 if (MLX5_TXOFF_CONFIG(TXPP)) { 2638 enum mlx5_txcmp_code wret; 2639 2640 /* Generate WAIT for scheduling if requested. */ 2641 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 2642 if (wret == MLX5_TXCMP_CODE_EXIT) 2643 return MLX5_TXCMP_CODE_EXIT; 2644 if (wret == MLX5_TXCMP_CODE_ERROR) 2645 return MLX5_TXCMP_CODE_ERROR; 2646 } 2647 /* Check whether we have enough WQEs */ 2648 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2649 if (unlikely(loc->wqe_free < 2650 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2651 return MLX5_TXCMP_CODE_EXIT; 2652 part = (loc->wqe_free * 4) - 2; 2653 } 2654 if (likely(part > 1)) 2655 rte_prefetch0(*pkts); 2656 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2657 /* 2658 * Build eMPW title WQEBB: 2659 * - Control Segment, eMPW opcode 2660 * - Ethernet Segment, no inline 2661 */ 2662 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2663 MLX5_OPCODE_ENHANCED_MPSW, olx); 2664 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2665 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2666 eseg = &loc->wqe_last->eseg; 2667 dseg = &loc->wqe_last->dseg[0]; 2668 loop = part; 2669 /* Store the packet length for legacy MPW. */ 2670 if (MLX5_TXOFF_CONFIG(MPW)) 2671 eseg->mss = rte_cpu_to_be_16 2672 (rte_pktmbuf_data_len(loc->mbuf)); 2673 for (;;) { 2674 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2675 #ifdef MLX5_PMD_SOFT_COUNTERS 2676 /* Update sent data bytes counter. */ 2677 slen += dlen; 2678 #endif 2679 mlx5_tx_dseg_ptr 2680 (txq, loc, dseg, 2681 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2682 dlen, olx); 2683 if (unlikely(--loop == 0)) 2684 break; 2685 loc->mbuf = *pkts++; 2686 if (likely(loop > 1)) 2687 rte_prefetch0(*pkts); 2688 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2689 /* 2690 * Unroll the completion code to avoid 2691 * returning variable value - it results in 2692 * unoptimized sequent checking in caller. 2693 */ 2694 if (ret == MLX5_TXCMP_CODE_MULTI) { 2695 part -= loop; 2696 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2697 if (unlikely(!loc->elts_free || 2698 !loc->wqe_free)) 2699 return MLX5_TXCMP_CODE_EXIT; 2700 return MLX5_TXCMP_CODE_MULTI; 2701 } 2702 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2703 if (ret == MLX5_TXCMP_CODE_TSO) { 2704 part -= loop; 2705 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2706 if (unlikely(!loc->elts_free || 2707 !loc->wqe_free)) 2708 return MLX5_TXCMP_CODE_EXIT; 2709 return MLX5_TXCMP_CODE_TSO; 2710 } 2711 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2712 part -= loop; 2713 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2714 if (unlikely(!loc->elts_free || 2715 !loc->wqe_free)) 2716 return MLX5_TXCMP_CODE_EXIT; 2717 return MLX5_TXCMP_CODE_SINGLE; 2718 } 2719 if (ret != MLX5_TXCMP_CODE_EMPW) { 2720 MLX5_ASSERT(false); 2721 part -= loop; 2722 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2723 return MLX5_TXCMP_CODE_ERROR; 2724 } 2725 /* 2726 * Check whether packet parameters coincide 2727 * within assumed eMPW batch: 2728 * - check sum settings 2729 * - metadata value 2730 * - software parser settings 2731 * - packets length (legacy MPW only) 2732 * - scheduling is not required 2733 */ 2734 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2735 MLX5_ASSERT(loop); 2736 part -= loop; 2737 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2738 if (unlikely(!loc->elts_free || 2739 !loc->wqe_free)) 2740 return MLX5_TXCMP_CODE_EXIT; 2741 pkts_n -= part; 2742 goto next_empw; 2743 } 2744 /* Packet attributes match, continue the same eMPW. */ 2745 ++dseg; 2746 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2747 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2748 } 2749 /* eMPW is built successfully, update loop parameters. */ 2750 MLX5_ASSERT(!loop); 2751 MLX5_ASSERT(pkts_n >= part); 2752 #ifdef MLX5_PMD_SOFT_COUNTERS 2753 /* Update sent data bytes counter. */ 2754 txq->stats.obytes += slen; 2755 #endif 2756 loc->elts_free -= part; 2757 loc->pkts_sent += part; 2758 txq->wqe_ci += (2 + part + 3) / 4; 2759 loc->wqe_free -= (2 + part + 3) / 4; 2760 pkts_n -= part; 2761 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2762 return MLX5_TXCMP_CODE_EXIT; 2763 loc->mbuf = *pkts++; 2764 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2765 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2766 return ret; 2767 /* Continue sending eMPW batches. */ 2768 } 2769 MLX5_ASSERT(false); 2770 } 2771 2772 /** 2773 * The routine sends packets with MLX5_OPCODE_EMPW 2774 * with inlining, optionally supports VLAN insertion. 2775 */ 2776 static __rte_always_inline enum mlx5_txcmp_code 2777 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2778 struct rte_mbuf **__rte_restrict pkts, 2779 unsigned int pkts_n, 2780 struct mlx5_txq_local *__rte_restrict loc, 2781 unsigned int olx) 2782 { 2783 /* 2784 * Subroutine is the part of mlx5_tx_burst_single() and sends 2785 * single-segment packet with eMPW opcode with data inlining. 2786 */ 2787 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2788 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2789 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2790 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2791 pkts += loc->pkts_sent + 1; 2792 pkts_n -= loc->pkts_sent; 2793 for (;;) { 2794 struct mlx5_wqe_dseg *__rte_restrict dseg; 2795 struct mlx5_wqe *__rte_restrict wqem; 2796 enum mlx5_txcmp_code ret; 2797 unsigned int room, part, nlim; 2798 unsigned int slen = 0; 2799 2800 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2801 /* 2802 * Limits the amount of packets in one WQE 2803 * to improve CQE latency generation. 2804 */ 2805 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2806 MLX5_MPW_INLINE_MAX_PACKETS : 2807 MLX5_EMPW_MAX_PACKETS); 2808 if (MLX5_TXOFF_CONFIG(TXPP)) { 2809 enum mlx5_txcmp_code wret; 2810 2811 /* Generate WAIT for scheduling if requested. */ 2812 wret = mlx5_tx_schedule_send(txq, loc, nlim, olx); 2813 if (wret == MLX5_TXCMP_CODE_EXIT) 2814 return MLX5_TXCMP_CODE_EXIT; 2815 if (wret == MLX5_TXCMP_CODE_ERROR) 2816 return MLX5_TXCMP_CODE_ERROR; 2817 } 2818 /* Check whether we have minimal amount WQEs */ 2819 if (unlikely(loc->wqe_free < 2820 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2821 return MLX5_TXCMP_CODE_EXIT; 2822 if (likely(pkts_n > 1)) 2823 rte_prefetch0(*pkts); 2824 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2825 /* 2826 * Build eMPW title WQEBB: 2827 * - Control Segment, eMPW opcode, zero DS 2828 * - Ethernet Segment, no inline 2829 */ 2830 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2831 MLX5_OPCODE_ENHANCED_MPSW, olx); 2832 mlx5_tx_eseg_none(txq, loc, wqem, 2833 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2834 dseg = &wqem->dseg[0]; 2835 /* Store the packet length for legacy MPW. */ 2836 if (MLX5_TXOFF_CONFIG(MPW)) 2837 wqem->eseg.mss = rte_cpu_to_be_16 2838 (rte_pktmbuf_data_len(loc->mbuf)); 2839 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2840 loc->wqe_free) * MLX5_WQE_SIZE - 2841 MLX5_WQE_CSEG_SIZE - 2842 MLX5_WQE_ESEG_SIZE; 2843 /* Limit the room for legacy MPW sessions for performance. */ 2844 if (MLX5_TXOFF_CONFIG(MPW)) 2845 room = RTE_MIN(room, 2846 RTE_MAX(txq->inlen_empw + 2847 sizeof(dseg->bcount) + 2848 (MLX5_TXOFF_CONFIG(VLAN) ? 2849 sizeof(struct rte_vlan_hdr) : 0), 2850 MLX5_MPW_INLINE_MAX_PACKETS * 2851 MLX5_WQE_DSEG_SIZE)); 2852 /* Build WQE till we have space, packets and resources. */ 2853 part = room; 2854 for (;;) { 2855 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2856 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2857 unsigned int tlen; 2858 2859 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2860 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2861 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2862 /* 2863 * Some Tx offloads may cause an error if packet is not 2864 * long enough, check against assumed minimal length. 2865 */ 2866 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2867 part -= room; 2868 if (unlikely(!part)) 2869 return MLX5_TXCMP_CODE_ERROR; 2870 /* 2871 * We have some successfully built 2872 * packet Data Segments to send. 2873 */ 2874 mlx5_tx_idone_empw(txq, loc, part, 2875 slen, wqem, olx); 2876 return MLX5_TXCMP_CODE_ERROR; 2877 } 2878 /* Inline or not inline - that's the Question. */ 2879 if (dlen > txq->inlen_empw || 2880 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2881 goto pointer_empw; 2882 if (MLX5_TXOFF_CONFIG(MPW)) { 2883 if (dlen > txq->inlen_send) 2884 goto pointer_empw; 2885 tlen = dlen; 2886 if (part == room) { 2887 /* Open new inline MPW session. */ 2888 tlen += sizeof(dseg->bcount); 2889 dseg->bcount = RTE_BE32(0); 2890 dseg = RTE_PTR_ADD 2891 (dseg, sizeof(dseg->bcount)); 2892 } else { 2893 /* 2894 * No pointer and inline descriptor 2895 * intermix for legacy MPW sessions. 2896 */ 2897 if (wqem->dseg[0].bcount) 2898 break; 2899 } 2900 } else { 2901 tlen = sizeof(dseg->bcount) + dlen; 2902 } 2903 /* Inline entire packet, optional VLAN insertion. */ 2904 if (MLX5_TXOFF_CONFIG(VLAN) && 2905 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2906 /* 2907 * The packet length must be checked in 2908 * mlx5_tx_able_to_empw() and packet 2909 * fits into inline length guaranteed. 2910 */ 2911 MLX5_ASSERT((dlen + 2912 sizeof(struct rte_vlan_hdr)) <= 2913 txq->inlen_empw); 2914 tlen += sizeof(struct rte_vlan_hdr); 2915 if (room < tlen) 2916 break; 2917 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2918 dptr, dlen, olx); 2919 #ifdef MLX5_PMD_SOFT_COUNTERS 2920 /* Update sent data bytes counter. */ 2921 slen += sizeof(struct rte_vlan_hdr); 2922 #endif 2923 } else { 2924 if (room < tlen) 2925 break; 2926 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2927 dptr, dlen, olx); 2928 } 2929 if (!MLX5_TXOFF_CONFIG(MPW)) 2930 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2931 MLX5_ASSERT(room >= tlen); 2932 room -= tlen; 2933 /* 2934 * Packet data are completely inline, 2935 * we can try to free the packet. 2936 */ 2937 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2938 /* 2939 * All the packets from the burst beginning 2940 * are inline, we can free mbufs directly 2941 * from the origin array on tx_burst exit(). 2942 */ 2943 loc->mbuf_free++; 2944 goto next_mbuf; 2945 } 2946 /* 2947 * In order no to call rte_pktmbuf_free_seg() here, 2948 * in the most inner loop (that might be very 2949 * expensive) we just save the mbuf in elts. 2950 */ 2951 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2952 loc->elts_free--; 2953 goto next_mbuf; 2954 pointer_empw: 2955 /* 2956 * No pointer and inline descriptor 2957 * intermix for legacy MPW sessions. 2958 */ 2959 if (MLX5_TXOFF_CONFIG(MPW) && 2960 part != room && 2961 wqem->dseg[0].bcount == RTE_BE32(0)) 2962 break; 2963 /* 2964 * Not inlinable VLAN packets are 2965 * proceeded outside of this routine. 2966 */ 2967 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2968 if (MLX5_TXOFF_CONFIG(VLAN)) 2969 MLX5_ASSERT(!(loc->mbuf->ol_flags & 2970 RTE_MBUF_F_TX_VLAN)); 2971 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2972 /* We have to store mbuf in elts.*/ 2973 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2974 loc->elts_free--; 2975 room -= MLX5_WQE_DSEG_SIZE; 2976 /* Ring buffer wraparound is checked at the loop end.*/ 2977 ++dseg; 2978 next_mbuf: 2979 #ifdef MLX5_PMD_SOFT_COUNTERS 2980 /* Update sent data bytes counter. */ 2981 slen += dlen; 2982 #endif 2983 loc->pkts_sent++; 2984 pkts_n--; 2985 if (unlikely(!pkts_n || !loc->elts_free)) { 2986 /* 2987 * We have no resources/packets to 2988 * continue build descriptors. 2989 */ 2990 part -= room; 2991 mlx5_tx_idone_empw(txq, loc, part, 2992 slen, wqem, olx); 2993 return MLX5_TXCMP_CODE_EXIT; 2994 } 2995 loc->mbuf = *pkts++; 2996 if (likely(pkts_n > 1)) 2997 rte_prefetch0(*pkts); 2998 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2999 /* 3000 * Unroll the completion code to avoid 3001 * returning variable value - it results in 3002 * unoptimized sequent checking in caller. 3003 */ 3004 if (ret == MLX5_TXCMP_CODE_MULTI) { 3005 part -= room; 3006 mlx5_tx_idone_empw(txq, loc, part, 3007 slen, wqem, olx); 3008 if (unlikely(!loc->elts_free || 3009 !loc->wqe_free)) 3010 return MLX5_TXCMP_CODE_EXIT; 3011 return MLX5_TXCMP_CODE_MULTI; 3012 } 3013 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3014 if (ret == MLX5_TXCMP_CODE_TSO) { 3015 part -= room; 3016 mlx5_tx_idone_empw(txq, loc, part, 3017 slen, wqem, olx); 3018 if (unlikely(!loc->elts_free || 3019 !loc->wqe_free)) 3020 return MLX5_TXCMP_CODE_EXIT; 3021 return MLX5_TXCMP_CODE_TSO; 3022 } 3023 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3024 part -= room; 3025 mlx5_tx_idone_empw(txq, loc, part, 3026 slen, wqem, olx); 3027 if (unlikely(!loc->elts_free || 3028 !loc->wqe_free)) 3029 return MLX5_TXCMP_CODE_EXIT; 3030 return MLX5_TXCMP_CODE_SINGLE; 3031 } 3032 if (ret != MLX5_TXCMP_CODE_EMPW) { 3033 MLX5_ASSERT(false); 3034 part -= room; 3035 mlx5_tx_idone_empw(txq, loc, part, 3036 slen, wqem, olx); 3037 return MLX5_TXCMP_CODE_ERROR; 3038 } 3039 /* Check if we have minimal room left. */ 3040 nlim--; 3041 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 3042 break; 3043 /* 3044 * Check whether packet parameters coincide 3045 * within assumed eMPW batch: 3046 * - check sum settings 3047 * - metadata value 3048 * - software parser settings 3049 * - packets length (legacy MPW only) 3050 * - scheduling is not required 3051 */ 3052 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 3053 loc, dlen, olx)) 3054 break; 3055 /* Packet attributes match, continue the same eMPW. */ 3056 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3057 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3058 } 3059 /* 3060 * We get here to close an existing eMPW 3061 * session and start the new one. 3062 */ 3063 MLX5_ASSERT(pkts_n); 3064 part -= room; 3065 if (unlikely(!part)) 3066 return MLX5_TXCMP_CODE_EXIT; 3067 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 3068 if (unlikely(!loc->elts_free || 3069 !loc->wqe_free)) 3070 return MLX5_TXCMP_CODE_EXIT; 3071 /* Continue the loop with new eMPW session. */ 3072 } 3073 MLX5_ASSERT(false); 3074 } 3075 3076 /** 3077 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3078 * Data inlining and VLAN insertion are supported. 3079 */ 3080 static __rte_always_inline enum mlx5_txcmp_code 3081 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3082 struct rte_mbuf **__rte_restrict pkts, 3083 unsigned int pkts_n, 3084 struct mlx5_txq_local *__rte_restrict loc, 3085 unsigned int olx) 3086 { 3087 /* 3088 * Subroutine is the part of mlx5_tx_burst_single() 3089 * and sends single-segment packet with SEND opcode. 3090 */ 3091 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3092 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3093 pkts += loc->pkts_sent + 1; 3094 pkts_n -= loc->pkts_sent; 3095 for (;;) { 3096 struct mlx5_wqe *__rte_restrict wqe; 3097 enum mlx5_txcmp_code ret; 3098 3099 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3100 MLX5_ASSERT(loc->elts_free); 3101 if (MLX5_TXOFF_CONFIG(TXPP)) { 3102 enum mlx5_txcmp_code wret; 3103 3104 /* Generate WAIT for scheduling if requested. */ 3105 wret = mlx5_tx_schedule_send(txq, loc, 0, olx); 3106 if (wret == MLX5_TXCMP_CODE_EXIT) 3107 return MLX5_TXCMP_CODE_EXIT; 3108 if (wret == MLX5_TXCMP_CODE_ERROR) 3109 return MLX5_TXCMP_CODE_ERROR; 3110 } 3111 if (MLX5_TXOFF_CONFIG(INLINE)) { 3112 unsigned int inlen, vlan = 0; 3113 3114 inlen = rte_pktmbuf_data_len(loc->mbuf); 3115 if (MLX5_TXOFF_CONFIG(VLAN) && 3116 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3117 vlan = sizeof(struct rte_vlan_hdr); 3118 inlen += vlan; 3119 } 3120 /* 3121 * If inlining is enabled at configuration time 3122 * the limit must be not less than minimal size. 3123 * Otherwise we would do extra check for data 3124 * size to avoid crashes due to length overflow. 3125 */ 3126 MLX5_ASSERT(txq->inlen_send >= 3127 MLX5_ESEG_MIN_INLINE_SIZE); 3128 if (inlen <= txq->inlen_send) { 3129 unsigned int seg_n, wqe_n; 3130 3131 rte_prefetch0(rte_pktmbuf_mtod 3132 (loc->mbuf, uint8_t *)); 3133 /* Check against minimal length. */ 3134 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3135 return MLX5_TXCMP_CODE_ERROR; 3136 if (loc->mbuf->ol_flags & 3137 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3138 /* 3139 * The hint flag not to inline packet 3140 * data is set. Check whether we can 3141 * follow the hint. 3142 */ 3143 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3144 txq->inlen_mode) || 3145 (MLX5_TXOFF_CONFIG(MPW) && 3146 txq->inlen_mode)) { 3147 if (inlen <= txq->inlen_send) 3148 goto single_inline; 3149 /* 3150 * The hardware requires the 3151 * minimal inline data header. 3152 */ 3153 goto single_min_inline; 3154 } 3155 if (MLX5_TXOFF_CONFIG(VLAN) && 3156 vlan && !txq->vlan_en) { 3157 /* 3158 * We must insert VLAN tag 3159 * by software means. 3160 */ 3161 goto single_part_inline; 3162 } 3163 goto single_no_inline; 3164 } 3165 single_inline: 3166 /* 3167 * Completely inlined packet data WQE: 3168 * - Control Segment, SEND opcode 3169 * - Ethernet Segment, no VLAN insertion 3170 * - Data inlined, VLAN optionally inserted 3171 * - Alignment to MLX5_WSEG_SIZE 3172 * Have to estimate amount of WQEBBs 3173 */ 3174 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3175 MLX5_ESEG_MIN_INLINE_SIZE + 3176 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3177 /* Check if there are enough WQEBBs. */ 3178 wqe_n = (seg_n + 3) / 4; 3179 if (wqe_n > loc->wqe_free) 3180 return MLX5_TXCMP_CODE_EXIT; 3181 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3182 loc->wqe_last = wqe; 3183 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3184 MLX5_OPCODE_SEND, olx); 3185 mlx5_tx_eseg_data(txq, loc, wqe, 3186 vlan, inlen, 0, olx); 3187 txq->wqe_ci += wqe_n; 3188 loc->wqe_free -= wqe_n; 3189 /* 3190 * Packet data are completely inlined, 3191 * free the packet immediately. 3192 */ 3193 rte_pktmbuf_free_seg(loc->mbuf); 3194 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3195 MLX5_TXOFF_CONFIG(MPW)) && 3196 txq->inlen_mode) { 3197 /* 3198 * If minimal inlining is requested the eMPW 3199 * feature should be disabled due to data is 3200 * inlined into Ethernet Segment, which can 3201 * not contain inlined data for eMPW due to 3202 * segment shared for all packets. 3203 */ 3204 struct mlx5_wqe_dseg *__rte_restrict dseg; 3205 unsigned int ds; 3206 uint8_t *dptr; 3207 3208 /* 3209 * The inline-mode settings require 3210 * to inline the specified amount of 3211 * data bytes to the Ethernet Segment. 3212 * We should check the free space in 3213 * WQE ring buffer to inline partially. 3214 */ 3215 single_min_inline: 3216 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3217 MLX5_ASSERT(inlen > txq->inlen_mode); 3218 MLX5_ASSERT(txq->inlen_mode >= 3219 MLX5_ESEG_MIN_INLINE_SIZE); 3220 /* 3221 * Check whether there are enough free WQEBBs: 3222 * - Control Segment 3223 * - Ethernet Segment 3224 * - First Segment of inlined Ethernet data 3225 * - ... data continued ... 3226 * - Finishing Data Segment of pointer type 3227 */ 3228 ds = (MLX5_WQE_CSEG_SIZE + 3229 MLX5_WQE_ESEG_SIZE + 3230 MLX5_WQE_DSEG_SIZE + 3231 txq->inlen_mode - 3232 MLX5_ESEG_MIN_INLINE_SIZE + 3233 MLX5_WQE_DSEG_SIZE + 3234 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3235 if (loc->wqe_free < ((ds + 3) / 4)) 3236 return MLX5_TXCMP_CODE_EXIT; 3237 /* 3238 * Build the ordinary SEND WQE: 3239 * - Control Segment 3240 * - Ethernet Segment, inline inlen_mode bytes 3241 * - Data Segment of pointer type 3242 */ 3243 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3244 loc->wqe_last = wqe; 3245 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3246 MLX5_OPCODE_SEND, olx); 3247 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3248 txq->inlen_mode, 3249 0, olx); 3250 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3251 txq->inlen_mode - vlan; 3252 inlen -= txq->inlen_mode; 3253 mlx5_tx_dseg_ptr(txq, loc, dseg, 3254 dptr, inlen, olx); 3255 /* 3256 * WQE is built, update the loop parameters 3257 * and got to the next packet. 3258 */ 3259 txq->wqe_ci += (ds + 3) / 4; 3260 loc->wqe_free -= (ds + 3) / 4; 3261 /* We have to store mbuf in elts.*/ 3262 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3263 txq->elts[txq->elts_head++ & txq->elts_m] = 3264 loc->mbuf; 3265 --loc->elts_free; 3266 } else { 3267 uint8_t *dptr; 3268 unsigned int dlen; 3269 3270 /* 3271 * Partially inlined packet data WQE, we have 3272 * some space in title WQEBB, we can fill it 3273 * with some packet data. It takes one WQEBB, 3274 * it is available, no extra space check: 3275 * - Control Segment, SEND opcode 3276 * - Ethernet Segment, no VLAN insertion 3277 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3278 * - Data Segment, pointer type 3279 * 3280 * We also get here if VLAN insertion is not 3281 * supported by HW, the inline is enabled. 3282 */ 3283 single_part_inline: 3284 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3285 loc->wqe_last = wqe; 3286 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3287 MLX5_OPCODE_SEND, olx); 3288 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3289 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3290 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3291 /* 3292 * The length check is performed above, by 3293 * comparing with txq->inlen_send. We should 3294 * not get overflow here. 3295 */ 3296 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3297 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3298 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3299 dptr, dlen, olx); 3300 ++txq->wqe_ci; 3301 --loc->wqe_free; 3302 /* We have to store mbuf in elts.*/ 3303 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3304 txq->elts[txq->elts_head++ & txq->elts_m] = 3305 loc->mbuf; 3306 --loc->elts_free; 3307 } 3308 #ifdef MLX5_PMD_SOFT_COUNTERS 3309 /* Update sent data bytes counter. */ 3310 txq->stats.obytes += vlan + 3311 rte_pktmbuf_data_len(loc->mbuf); 3312 #endif 3313 } else { 3314 /* 3315 * No inline at all, it means the CPU cycles saving 3316 * is prioritized at configuration, we should not 3317 * copy any packet data to WQE. 3318 * 3319 * SEND WQE, one WQEBB: 3320 * - Control Segment, SEND opcode 3321 * - Ethernet Segment, optional VLAN, no inline 3322 * - Data Segment, pointer type 3323 */ 3324 single_no_inline: 3325 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3326 loc->wqe_last = wqe; 3327 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3328 MLX5_OPCODE_SEND, olx); 3329 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3330 mlx5_tx_dseg_ptr 3331 (txq, loc, &wqe->dseg[0], 3332 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3333 rte_pktmbuf_data_len(loc->mbuf), olx); 3334 ++txq->wqe_ci; 3335 --loc->wqe_free; 3336 /* 3337 * We should not store mbuf pointer in elts 3338 * if no inlining is configured, this is done 3339 * by calling routine in a batch copy. 3340 */ 3341 if (MLX5_TXOFF_CONFIG(INLINE)) 3342 txq->elts[txq->elts_head++ & txq->elts_m] = 3343 loc->mbuf; 3344 --loc->elts_free; 3345 #ifdef MLX5_PMD_SOFT_COUNTERS 3346 /* Update sent data bytes counter. */ 3347 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3348 if (MLX5_TXOFF_CONFIG(VLAN) && 3349 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3350 txq->stats.obytes += 3351 sizeof(struct rte_vlan_hdr); 3352 #endif 3353 } 3354 ++loc->pkts_sent; 3355 --pkts_n; 3356 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3357 return MLX5_TXCMP_CODE_EXIT; 3358 loc->mbuf = *pkts++; 3359 if (pkts_n > 1) 3360 rte_prefetch0(*pkts); 3361 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3362 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3363 return ret; 3364 } 3365 MLX5_ASSERT(false); 3366 } 3367 3368 static __rte_always_inline enum mlx5_txcmp_code 3369 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3370 struct rte_mbuf **__rte_restrict pkts, 3371 unsigned int pkts_n, 3372 struct mlx5_txq_local *__rte_restrict loc, 3373 unsigned int olx) 3374 { 3375 enum mlx5_txcmp_code ret; 3376 3377 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3378 if (ret == MLX5_TXCMP_CODE_SINGLE) 3379 goto ordinary_send; 3380 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3381 for (;;) { 3382 /* Optimize for inline/no inline eMPW send. */ 3383 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3384 mlx5_tx_burst_empw_inline 3385 (txq, pkts, pkts_n, loc, olx) : 3386 mlx5_tx_burst_empw_simple 3387 (txq, pkts, pkts_n, loc, olx); 3388 if (ret != MLX5_TXCMP_CODE_SINGLE) 3389 return ret; 3390 /* The resources to send one packet should remain. */ 3391 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3392 ordinary_send: 3393 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3394 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3395 if (ret != MLX5_TXCMP_CODE_EMPW) 3396 return ret; 3397 /* The resources to send one packet should remain. */ 3398 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3399 } 3400 } 3401 3402 /** 3403 * DPDK Tx callback template. This is configured template used to generate 3404 * routines optimized for specified offload setup. 3405 * One of this generated functions is chosen at SQ configuration time. 3406 * 3407 * @param txq 3408 * Generic pointer to TX queue structure. 3409 * @param[in] pkts 3410 * Packets to transmit. 3411 * @param pkts_n 3412 * Number of packets in array. 3413 * @param olx 3414 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3415 * values. Should be static to take compile time static configuration 3416 * advantages. 3417 * 3418 * @return 3419 * Number of packets successfully transmitted (<= pkts_n). 3420 */ 3421 static __rte_always_inline uint16_t 3422 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3423 struct rte_mbuf **__rte_restrict pkts, 3424 uint16_t pkts_n, 3425 unsigned int olx) 3426 { 3427 struct mlx5_txq_local loc; 3428 enum mlx5_txcmp_code ret; 3429 unsigned int part; 3430 3431 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3432 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3433 if (unlikely(!pkts_n)) 3434 return 0; 3435 if (MLX5_TXOFF_CONFIG(INLINE)) 3436 loc.mbuf_free = 0; 3437 loc.pkts_sent = 0; 3438 loc.pkts_copy = 0; 3439 loc.wqe_last = NULL; 3440 3441 send_loop: 3442 loc.pkts_loop = loc.pkts_sent; 3443 /* 3444 * Check if there are some CQEs, if any: 3445 * - process an encountered errors 3446 * - process the completed WQEs 3447 * - free related mbufs 3448 * - doorbell the NIC about processed CQEs 3449 */ 3450 rte_prefetch0(*(pkts + loc.pkts_sent)); 3451 mlx5_tx_handle_completion(txq, olx); 3452 /* 3453 * Calculate the number of available resources - elts and WQEs. 3454 * There are two possible different scenarios: 3455 * - no data inlining into WQEs, one WQEBB may contains up to 3456 * four packets, in this case elts become scarce resource 3457 * - data inlining into WQEs, one packet may require multiple 3458 * WQEBBs, the WQEs become the limiting factor. 3459 */ 3460 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3461 loc.elts_free = txq->elts_s - 3462 (uint16_t)(txq->elts_head - txq->elts_tail); 3463 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3464 loc.wqe_free = txq->wqe_s - 3465 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3466 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3467 goto burst_exit; 3468 for (;;) { 3469 /* 3470 * Fetch the packet from array. Usually this is the first 3471 * packet in series of multi/single segment packets. 3472 */ 3473 loc.mbuf = *(pkts + loc.pkts_sent); 3474 /* Dedicated branch for multi-segment packets. */ 3475 if (MLX5_TXOFF_CONFIG(MULTI) && 3476 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3477 /* 3478 * Multi-segment packet encountered. 3479 * Hardware is able to process it only 3480 * with SEND/TSO opcodes, one packet 3481 * per WQE, do it in dedicated routine. 3482 */ 3483 enter_send_multi: 3484 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3485 part = loc.pkts_sent - loc.pkts_copy; 3486 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3487 /* 3488 * There are some single-segment mbufs not 3489 * stored in elts. The mbufs must be in the 3490 * same order as WQEs, so we must copy the 3491 * mbufs to elts here, before the coming 3492 * multi-segment packet mbufs is appended. 3493 */ 3494 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3495 part, olx); 3496 loc.pkts_copy = loc.pkts_sent; 3497 } 3498 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3499 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3500 if (!MLX5_TXOFF_CONFIG(INLINE)) 3501 loc.pkts_copy = loc.pkts_sent; 3502 /* 3503 * These returned code checks are supposed 3504 * to be optimized out due to routine inlining. 3505 */ 3506 if (ret == MLX5_TXCMP_CODE_EXIT) { 3507 /* 3508 * The routine returns this code when 3509 * all packets are sent or there is no 3510 * enough resources to complete request. 3511 */ 3512 break; 3513 } 3514 if (ret == MLX5_TXCMP_CODE_ERROR) { 3515 /* 3516 * The routine returns this code when some error 3517 * in the incoming packets format occurred. 3518 */ 3519 txq->stats.oerrors++; 3520 break; 3521 } 3522 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3523 /* 3524 * The single-segment packet was encountered 3525 * in the array, try to send it with the 3526 * best optimized way, possible engaging eMPW. 3527 */ 3528 goto enter_send_single; 3529 } 3530 if (MLX5_TXOFF_CONFIG(TSO) && 3531 ret == MLX5_TXCMP_CODE_TSO) { 3532 /* 3533 * The single-segment TSO packet was 3534 * encountered in the array. 3535 */ 3536 goto enter_send_tso; 3537 } 3538 /* We must not get here. Something is going wrong. */ 3539 MLX5_ASSERT(false); 3540 txq->stats.oerrors++; 3541 break; 3542 } 3543 /* Dedicated branch for single-segment TSO packets. */ 3544 if (MLX5_TXOFF_CONFIG(TSO) && 3545 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3546 /* 3547 * TSO might require special way for inlining 3548 * (dedicated parameters) and is sent with 3549 * MLX5_OPCODE_TSO opcode only, provide this 3550 * in dedicated branch. 3551 */ 3552 enter_send_tso: 3553 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3554 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3555 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3556 /* 3557 * These returned code checks are supposed 3558 * to be optimized out due to routine inlining. 3559 */ 3560 if (ret == MLX5_TXCMP_CODE_EXIT) 3561 break; 3562 if (ret == MLX5_TXCMP_CODE_ERROR) { 3563 txq->stats.oerrors++; 3564 break; 3565 } 3566 if (ret == MLX5_TXCMP_CODE_SINGLE) 3567 goto enter_send_single; 3568 if (MLX5_TXOFF_CONFIG(MULTI) && 3569 ret == MLX5_TXCMP_CODE_MULTI) { 3570 /* 3571 * The multi-segment packet was 3572 * encountered in the array. 3573 */ 3574 goto enter_send_multi; 3575 } 3576 /* We must not get here. Something is going wrong. */ 3577 MLX5_ASSERT(false); 3578 txq->stats.oerrors++; 3579 break; 3580 } 3581 /* 3582 * The dedicated branch for the single-segment packets 3583 * without TSO. Often these ones can be sent using 3584 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3585 * The routine builds the WQEs till it encounters 3586 * the TSO or multi-segment packet (in case if these 3587 * offloads are requested at SQ configuration time). 3588 */ 3589 enter_send_single: 3590 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3591 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3592 /* 3593 * These returned code checks are supposed 3594 * to be optimized out due to routine inlining. 3595 */ 3596 if (ret == MLX5_TXCMP_CODE_EXIT) 3597 break; 3598 if (ret == MLX5_TXCMP_CODE_ERROR) { 3599 txq->stats.oerrors++; 3600 break; 3601 } 3602 if (MLX5_TXOFF_CONFIG(MULTI) && 3603 ret == MLX5_TXCMP_CODE_MULTI) { 3604 /* 3605 * The multi-segment packet was 3606 * encountered in the array. 3607 */ 3608 goto enter_send_multi; 3609 } 3610 if (MLX5_TXOFF_CONFIG(TSO) && 3611 ret == MLX5_TXCMP_CODE_TSO) { 3612 /* 3613 * The single-segment TSO packet was 3614 * encountered in the array. 3615 */ 3616 goto enter_send_tso; 3617 } 3618 /* We must not get here. Something is going wrong. */ 3619 MLX5_ASSERT(false); 3620 txq->stats.oerrors++; 3621 break; 3622 } 3623 /* 3624 * Main Tx loop is completed, do the rest: 3625 * - set completion request if thresholds are reached 3626 * - doorbell the hardware 3627 * - copy the rest of mbufs to elts (if any) 3628 */ 3629 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3630 loc.pkts_sent >= loc.pkts_copy); 3631 /* Take a shortcut if nothing is sent. */ 3632 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3633 goto burst_exit; 3634 /* Request CQE generation if limits are reached. */ 3635 mlx5_tx_request_completion(txq, &loc, olx); 3636 /* 3637 * Ring QP doorbell immediately after WQE building completion 3638 * to improve latencies. The pure software related data treatment 3639 * can be completed after doorbell. Tx CQEs for this SQ are 3640 * processed in this thread only by the polling. 3641 * 3642 * The rdma core library can map doorbell register in two ways, 3643 * depending on the environment variable "MLX5_SHUT_UP_BF": 3644 * 3645 * - as regular cached memory, the variable is either missing or 3646 * set to zero. This type of mapping may cause the significant 3647 * doorbell register writing latency and requires explicit memory 3648 * write barrier to mitigate this issue and prevent write combining. 3649 * 3650 * - as non-cached memory, the variable is present and set to not "0" 3651 * value. This type of mapping may cause performance impact under 3652 * heavy loading conditions but the explicit write memory barrier is 3653 * not required and it may improve core performance. 3654 * 3655 * - the legacy behaviour (prior 19.08 release) was to use some 3656 * heuristics to decide whether write memory barrier should 3657 * be performed. This behavior is supported with specifying 3658 * tx_db_nc=2, write barrier is skipped if application provides 3659 * the full recommended burst of packets, it supposes the next 3660 * packets are coming and the write barrier will be issued on 3661 * the next burst (after descriptor writing, at least). 3662 */ 3663 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3664 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3665 txq->qp_db, !txq->db_nc && 3666 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3667 /* Not all of the mbufs may be stored into elts yet. */ 3668 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3669 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3670 /* 3671 * There are some single-segment mbufs not stored in elts. 3672 * It can be only if the last packet was single-segment. 3673 * The copying is gathered into one place due to it is 3674 * a good opportunity to optimize that with SIMD. 3675 * Unfortunately if inlining is enabled the gaps in pointer 3676 * array may happen due to early freeing of the inlined mbufs. 3677 */ 3678 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3679 loc.pkts_copy = loc.pkts_sent; 3680 } 3681 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3682 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3683 if (pkts_n > loc.pkts_sent) { 3684 /* 3685 * If burst size is large there might be no enough CQE 3686 * fetched from completion queue and no enough resources 3687 * freed to send all the packets. 3688 */ 3689 goto send_loop; 3690 } 3691 burst_exit: 3692 #ifdef MLX5_PMD_SOFT_COUNTERS 3693 /* Increment sent packets counter. */ 3694 txq->stats.opackets += loc.pkts_sent; 3695 #endif 3696 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3697 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3698 return loc.pkts_sent; 3699 } 3700 3701 #endif /* RTE_PMD_MLX5_TX_H_ */ 3702