1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common.h> 18 #include <mlx5_common_mr.h> 19 20 #include "mlx5.h" 21 #include "mlx5_autoconf.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 142 uint16_t inlen_send; /* Ordinary send data inline size. */ 143 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 144 uint16_t inlen_mode; /* Minimal data length to inline. */ 145 uint32_t qp_num_8s; /* QP number shifted by 8. */ 146 uint64_t offloads; /* Offloads for Tx Queue. */ 147 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 148 struct mlx5_wqe *wqes; /* Work queue. */ 149 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 150 #ifdef RTE_LIBRTE_MLX5_DEBUG 151 uint32_t *fcqs; /* Free completion queue (debug extended). */ 152 #else 153 uint16_t *fcqs; /* Free completion queue. */ 154 #endif 155 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 156 volatile uint32_t *qp_db; /* Work queue doorbell. */ 157 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 158 uint16_t port_id; /* Port ID of device. */ 159 uint16_t idx; /* Queue index. */ 160 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 161 int32_t ts_offset; /* Timestamp field dynamic offset. */ 162 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 163 struct mlx5_txq_stats stats; /* TX queue counters. */ 164 struct mlx5_uar_data uar_data; 165 struct rte_mbuf *elts[0]; 166 /* Storage for queued packets, must be the last field. */ 167 } __rte_cache_aligned; 168 169 enum mlx5_txq_type { 170 MLX5_TXQ_TYPE_STANDARD, /* Standard Tx queue. */ 171 MLX5_TXQ_TYPE_HAIRPIN, /* Hairpin Tx queue. */ 172 }; 173 174 /* TX queue control descriptor. */ 175 struct mlx5_txq_ctrl { 176 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 177 uint32_t refcnt; /* Reference counter. */ 178 unsigned int socket; /* CPU socket ID for allocations. */ 179 enum mlx5_txq_type type; /* The txq ctrl type. */ 180 unsigned int max_inline_data; /* Max inline data. */ 181 unsigned int max_tso_header; /* Max TSO header size. */ 182 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 183 struct mlx5_priv *priv; /* Back pointer to private data. */ 184 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 185 uint16_t dump_file_n; /* Number of dump files. */ 186 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 187 uint32_t hairpin_status; /* Hairpin binding status. */ 188 struct mlx5_txq_data txq; /* Data path structure. */ 189 /* Must be the last field in the structure, contains elts[]. */ 190 }; 191 192 /* mlx5_txq.c */ 193 194 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 195 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 196 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 197 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 198 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 199 unsigned int socket, const struct rte_eth_txconf *conf); 200 int mlx5_tx_hairpin_queue_setup 201 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 202 const struct rte_eth_hairpin_conf *hairpin_conf); 203 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 204 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 205 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 206 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 207 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 208 uint16_t desc, unsigned int socket, 209 const struct rte_eth_txconf *conf); 210 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 211 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 212 const struct rte_eth_hairpin_conf *hairpin_conf); 213 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 214 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 215 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 216 int mlx5_txq_verify(struct rte_eth_dev *dev); 217 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 218 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 219 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 220 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 221 222 /* mlx5_tx.c */ 223 224 uint16_t removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 225 uint16_t pkts_n); 226 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 227 unsigned int olx __rte_unused); 228 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 229 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 230 struct rte_eth_txq_info *qinfo); 231 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 232 struct rte_eth_burst_mode *mode); 233 234 /* mlx5_tx_empw.c */ 235 236 MLX5_TXOFF_PRE_DECL(full_empw); 237 MLX5_TXOFF_PRE_DECL(none_empw); 238 MLX5_TXOFF_PRE_DECL(md_empw); 239 MLX5_TXOFF_PRE_DECL(mt_empw); 240 MLX5_TXOFF_PRE_DECL(mtsc_empw); 241 MLX5_TXOFF_PRE_DECL(mti_empw); 242 MLX5_TXOFF_PRE_DECL(mtv_empw); 243 MLX5_TXOFF_PRE_DECL(mtiv_empw); 244 MLX5_TXOFF_PRE_DECL(sc_empw); 245 MLX5_TXOFF_PRE_DECL(sci_empw); 246 MLX5_TXOFF_PRE_DECL(scv_empw); 247 MLX5_TXOFF_PRE_DECL(sciv_empw); 248 MLX5_TXOFF_PRE_DECL(i_empw); 249 MLX5_TXOFF_PRE_DECL(v_empw); 250 MLX5_TXOFF_PRE_DECL(iv_empw); 251 252 /* mlx5_tx_nompw.c */ 253 254 MLX5_TXOFF_PRE_DECL(full); 255 MLX5_TXOFF_PRE_DECL(none); 256 MLX5_TXOFF_PRE_DECL(md); 257 MLX5_TXOFF_PRE_DECL(mt); 258 MLX5_TXOFF_PRE_DECL(mtsc); 259 MLX5_TXOFF_PRE_DECL(mti); 260 MLX5_TXOFF_PRE_DECL(mtv); 261 MLX5_TXOFF_PRE_DECL(mtiv); 262 MLX5_TXOFF_PRE_DECL(sc); 263 MLX5_TXOFF_PRE_DECL(sci); 264 MLX5_TXOFF_PRE_DECL(scv); 265 MLX5_TXOFF_PRE_DECL(sciv); 266 MLX5_TXOFF_PRE_DECL(i); 267 MLX5_TXOFF_PRE_DECL(v); 268 MLX5_TXOFF_PRE_DECL(iv); 269 270 /* mlx5_tx_txpp.c */ 271 272 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 273 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 274 MLX5_TXOFF_PRE_DECL(full_ts); 275 MLX5_TXOFF_PRE_DECL(full_ts_noi); 276 MLX5_TXOFF_PRE_DECL(none_ts); 277 MLX5_TXOFF_PRE_DECL(mdi_ts); 278 MLX5_TXOFF_PRE_DECL(mti_ts); 279 MLX5_TXOFF_PRE_DECL(mtiv_ts); 280 281 /* mlx5_tx_mpw.c */ 282 283 MLX5_TXOFF_PRE_DECL(none_mpw); 284 MLX5_TXOFF_PRE_DECL(mci_mpw); 285 MLX5_TXOFF_PRE_DECL(mc_mpw); 286 MLX5_TXOFF_PRE_DECL(i_mpw); 287 288 static __rte_always_inline struct mlx5_uar_data * 289 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 290 { 291 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 292 } 293 294 /** 295 * Ring TX queue doorbell and flush the update by write memory barrier. 296 * 297 * @param txq 298 * Pointer to TX queue structure. 299 * @param wqe 300 * Pointer to the last WQE posted in the NIC. 301 */ 302 static __rte_always_inline void 303 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 304 { 305 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 306 txq->wqe_ci, txq->qp_db, 1); 307 } 308 309 /** 310 * Convert timestamp from mbuf format to linear counter 311 * of Clock Queue completions (24 bits). 312 * 313 * @param sh 314 * Pointer to the device shared context to fetch Tx 315 * packet pacing timestamp and parameters. 316 * @param ts 317 * Timestamp from mbuf to convert. 318 * @return 319 * positive or zero value - completion ID to wait. 320 * negative value - conversion error. 321 */ 322 static __rte_always_inline int32_t 323 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 324 { 325 uint64_t ts, ci; 326 uint32_t tick; 327 328 do { 329 /* 330 * Read atomically two uint64_t fields and compare lsb bits. 331 * It there is no match - the timestamp was updated in 332 * the service thread, data should be re-read. 333 */ 334 rte_compiler_barrier(); 335 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 336 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 337 rte_compiler_barrier(); 338 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 339 break; 340 } while (true); 341 /* Perform the skew correction, positive value to send earlier. */ 342 mts -= sh->txpp.skew; 343 mts -= ts; 344 if (unlikely(mts >= UINT64_MAX / 2)) { 345 /* We have negative integer, mts is in the past. */ 346 __atomic_fetch_add(&sh->txpp.err_ts_past, 347 1, __ATOMIC_RELAXED); 348 return -1; 349 } 350 tick = sh->txpp.tick; 351 MLX5_ASSERT(tick); 352 /* Convert delta to completions, round up. */ 353 mts = (mts + tick - 1) / tick; 354 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 355 /* We have mts is too distant future. */ 356 __atomic_fetch_add(&sh->txpp.err_ts_future, 357 1, __ATOMIC_RELAXED); 358 return -1; 359 } 360 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 361 ci += mts; 362 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 363 return ci; 364 } 365 366 /** 367 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 368 * Flags must be preliminary initialized to zero. 369 * 370 * @param loc 371 * Pointer to burst routine local context. 372 * @param swp_flags 373 * Pointer to store Software Parser flags. 374 * @param olx 375 * Configured Tx offloads mask. It is fully defined at 376 * compile time and may be used for optimization. 377 * 378 * @return 379 * Software Parser offsets packed in dword. 380 * Software Parser flags are set by pointer. 381 */ 382 static __rte_always_inline uint32_t 383 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 384 uint8_t *swp_flags, 385 unsigned int olx) 386 { 387 uint64_t ol, tunnel; 388 unsigned int idx, off; 389 uint32_t set; 390 391 if (!MLX5_TXOFF_CONFIG(SWP)) 392 return 0; 393 ol = loc->mbuf->ol_flags; 394 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 395 /* 396 * Check whether Software Parser is required. 397 * Only customized tunnels may ask for. 398 */ 399 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 400 return 0; 401 /* 402 * The index should have: 403 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 404 * bit[4] = RTE_MBUF_F_TX_IPV6 405 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 406 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 407 */ 408 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 409 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 410 *swp_flags = mlx5_swp_types_table[idx]; 411 /* 412 * Set offsets for SW parser. Since ConnectX-5, SW parser just 413 * complements HW parser. SW parser starts to engage only if HW parser 414 * can't reach a header. For the older devices, HW parser will not kick 415 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 416 * should be set regardless of HW offload. 417 */ 418 off = loc->mbuf->outer_l2_len; 419 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 420 off += sizeof(struct rte_vlan_hdr); 421 set = (off >> 1) << 8; /* Outer L3 offset. */ 422 off += loc->mbuf->outer_l3_len; 423 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 424 set |= off >> 1; /* Outer L4 offset. */ 425 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 426 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 427 off += loc->mbuf->l2_len; 428 set |= (off >> 1) << 24; /* Inner L3 offset. */ 429 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 430 csum == RTE_MBUF_F_TX_UDP_CKSUM || 431 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 432 off += loc->mbuf->l3_len; 433 set |= (off >> 1) << 16; /* Inner L4 offset. */ 434 } 435 } 436 set = rte_cpu_to_le_32(set); 437 return set; 438 } 439 440 /** 441 * Convert the Checksum offloads to Verbs. 442 * 443 * @param buf 444 * Pointer to the mbuf. 445 * 446 * @return 447 * Converted checksum flags. 448 */ 449 static __rte_always_inline uint8_t 450 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 451 { 452 uint32_t idx; 453 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 454 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 455 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 456 457 /* 458 * The index should have: 459 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 460 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 461 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 462 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 463 * bit[9] = tunnel 464 */ 465 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 466 return mlx5_cksum_table[idx]; 467 } 468 469 /** 470 * Free the mbufs from the linear array of pointers. 471 * 472 * @param txq 473 * Pointer to Tx queue structure. 474 * @param pkts 475 * Pointer to array of packets to be free. 476 * @param pkts_n 477 * Number of packets to be freed. 478 * @param olx 479 * Configured Tx offloads mask. It is fully defined at 480 * compile time and may be used for optimization. 481 */ 482 static __rte_always_inline void 483 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 484 struct rte_mbuf **__rte_restrict pkts, 485 unsigned int pkts_n, 486 unsigned int olx __rte_unused) 487 { 488 struct rte_mempool *pool = NULL; 489 struct rte_mbuf **p_free = NULL; 490 struct rte_mbuf *mbuf; 491 unsigned int n_free = 0; 492 493 /* 494 * The implemented algorithm eliminates 495 * copying pointers to temporary array 496 * for rte_mempool_put_bulk() calls. 497 */ 498 MLX5_ASSERT(pkts); 499 MLX5_ASSERT(pkts_n); 500 /* 501 * Free mbufs directly to the pool in bulk 502 * if fast free offload is engaged 503 */ 504 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 505 mbuf = *pkts; 506 pool = mbuf->pool; 507 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 508 return; 509 } 510 for (;;) { 511 for (;;) { 512 /* 513 * Decrement mbuf reference counter, detach 514 * indirect and external buffers if needed. 515 */ 516 mbuf = rte_pktmbuf_prefree_seg(*pkts); 517 if (likely(mbuf != NULL)) { 518 MLX5_ASSERT(mbuf == *pkts); 519 if (likely(n_free != 0)) { 520 if (unlikely(pool != mbuf->pool)) 521 /* From different pool. */ 522 break; 523 } else { 524 /* Start new scan array. */ 525 pool = mbuf->pool; 526 p_free = pkts; 527 } 528 ++n_free; 529 ++pkts; 530 --pkts_n; 531 if (unlikely(pkts_n == 0)) { 532 mbuf = NULL; 533 break; 534 } 535 } else { 536 /* 537 * This happens if mbuf is still referenced. 538 * We can't put it back to the pool, skip. 539 */ 540 ++pkts; 541 --pkts_n; 542 if (unlikely(n_free != 0)) 543 /* There is some array to free.*/ 544 break; 545 if (unlikely(pkts_n == 0)) 546 /* Last mbuf, nothing to free. */ 547 return; 548 } 549 } 550 for (;;) { 551 /* 552 * This loop is implemented to avoid multiple 553 * inlining of rte_mempool_put_bulk(). 554 */ 555 MLX5_ASSERT(pool); 556 MLX5_ASSERT(p_free); 557 MLX5_ASSERT(n_free); 558 /* 559 * Free the array of pre-freed mbufs 560 * belonging to the same memory pool. 561 */ 562 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 563 if (unlikely(mbuf != NULL)) { 564 /* There is the request to start new scan. */ 565 pool = mbuf->pool; 566 p_free = pkts++; 567 n_free = 1; 568 --pkts_n; 569 if (likely(pkts_n != 0)) 570 break; 571 /* 572 * This is the last mbuf to be freed. 573 * Do one more loop iteration to complete. 574 * This is rare case of the last unique mbuf. 575 */ 576 mbuf = NULL; 577 continue; 578 } 579 if (likely(pkts_n == 0)) 580 return; 581 n_free = 0; 582 break; 583 } 584 } 585 } 586 587 /** 588 * No inline version to free buffers for optimal call 589 * on the tx_burst completion. 590 */ 591 static __rte_noinline void 592 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 593 struct rte_mbuf **__rte_restrict pkts, 594 unsigned int pkts_n, 595 unsigned int olx __rte_unused) 596 { 597 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 598 } 599 600 /** 601 * Free the mbuf from the elts ring buffer till new tail. 602 * 603 * @param txq 604 * Pointer to Tx queue structure. 605 * @param tail 606 * Index in elts to free up to, becomes new elts tail. 607 * @param olx 608 * Configured Tx offloads mask. It is fully defined at 609 * compile time and may be used for optimization. 610 */ 611 static __rte_always_inline void 612 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 613 uint16_t tail, 614 unsigned int olx __rte_unused) 615 { 616 uint16_t n_elts = tail - txq->elts_tail; 617 618 MLX5_ASSERT(n_elts); 619 MLX5_ASSERT(n_elts <= txq->elts_s); 620 /* 621 * Implement a loop to support ring buffer wraparound 622 * with single inlining of mlx5_tx_free_mbuf(). 623 */ 624 do { 625 unsigned int part; 626 627 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 628 part = RTE_MIN(part, n_elts); 629 MLX5_ASSERT(part); 630 MLX5_ASSERT(part <= txq->elts_s); 631 mlx5_tx_free_mbuf(txq, 632 &txq->elts[txq->elts_tail & txq->elts_m], 633 part, olx); 634 txq->elts_tail += part; 635 n_elts -= part; 636 } while (n_elts); 637 } 638 639 /** 640 * Store the mbuf being sent into elts ring buffer. 641 * On Tx completion these mbufs will be freed. 642 * 643 * @param txq 644 * Pointer to Tx queue structure. 645 * @param pkts 646 * Pointer to array of packets to be stored. 647 * @param pkts_n 648 * Number of packets to be stored. 649 * @param olx 650 * Configured Tx offloads mask. It is fully defined at 651 * compile time and may be used for optimization. 652 */ 653 static __rte_always_inline void 654 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 655 struct rte_mbuf **__rte_restrict pkts, 656 unsigned int pkts_n, 657 unsigned int olx __rte_unused) 658 { 659 unsigned int part; 660 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 661 662 MLX5_ASSERT(pkts); 663 MLX5_ASSERT(pkts_n); 664 part = txq->elts_s - (txq->elts_head & txq->elts_m); 665 MLX5_ASSERT(part); 666 MLX5_ASSERT(part <= txq->elts_s); 667 /* This code is a good candidate for vectorizing with SIMD. */ 668 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 669 (void *)pkts, 670 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 671 txq->elts_head += pkts_n; 672 if (unlikely(part < pkts_n)) 673 /* The copy is wrapping around the elts array. */ 674 rte_memcpy((void *)elts, (void *)(pkts + part), 675 (pkts_n - part) * sizeof(struct rte_mbuf *)); 676 } 677 678 /** 679 * Check if the completion request flag should be set in the last WQE. 680 * Both pushed mbufs and WQEs are monitored and the completion request 681 * flag is set if any of thresholds is reached. 682 * 683 * @param txq 684 * Pointer to TX queue structure. 685 * @param loc 686 * Pointer to burst routine local context. 687 * @param olx 688 * Configured Tx offloads mask. It is fully defined at 689 * compile time and may be used for optimization. 690 */ 691 static __rte_always_inline void 692 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 693 struct mlx5_txq_local *__rte_restrict loc, 694 unsigned int olx) 695 { 696 uint16_t head = txq->elts_head; 697 unsigned int part; 698 699 part = MLX5_TXOFF_CONFIG(INLINE) ? 700 0 : loc->pkts_sent - loc->pkts_copy; 701 head += part; 702 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 703 (MLX5_TXOFF_CONFIG(INLINE) && 704 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 705 volatile struct mlx5_wqe *last = loc->wqe_last; 706 707 MLX5_ASSERT(last); 708 txq->elts_comp = head; 709 if (MLX5_TXOFF_CONFIG(INLINE)) 710 txq->wqe_comp = txq->wqe_ci; 711 /* Request unconditional completion on last WQE. */ 712 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 713 MLX5_COMP_MODE_OFFSET); 714 /* Save elts_head in dedicated free on completion queue. */ 715 #ifdef RTE_LIBRTE_MLX5_DEBUG 716 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 717 (last->cseg.opcode >> 8) << 16; 718 #else 719 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 720 #endif 721 /* A CQE slot must always be available. */ 722 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 723 } 724 } 725 726 /** 727 * Build the Control Segment with specified opcode: 728 * - MLX5_OPCODE_SEND 729 * - MLX5_OPCODE_ENHANCED_MPSW 730 * - MLX5_OPCODE_TSO 731 * 732 * @param txq 733 * Pointer to TX queue structure. 734 * @param loc 735 * Pointer to burst routine local context. 736 * @param wqe 737 * Pointer to WQE to fill with built Control Segment. 738 * @param ds 739 * Supposed length of WQE in segments. 740 * @param opcode 741 * SQ WQE opcode to put into Control Segment. 742 * @param olx 743 * Configured Tx offloads mask. It is fully defined at 744 * compile time and may be used for optimization. 745 */ 746 static __rte_always_inline void 747 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 748 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 749 struct mlx5_wqe *__rte_restrict wqe, 750 unsigned int ds, 751 unsigned int opcode, 752 unsigned int olx __rte_unused) 753 { 754 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 755 756 /* For legacy MPW replace the EMPW by TSO with modifier. */ 757 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 758 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 759 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 760 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 761 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 762 MLX5_COMP_MODE_OFFSET); 763 cs->misc = RTE_BE32(0); 764 } 765 766 /** 767 * Build the Synchronize Queue Segment with specified completion index. 768 * 769 * @param txq 770 * Pointer to TX queue structure. 771 * @param loc 772 * Pointer to burst routine local context. 773 * @param wqe 774 * Pointer to WQE to fill with built Control Segment. 775 * @param wci 776 * Completion index in Clock Queue to wait. 777 * @param olx 778 * Configured Tx offloads mask. It is fully defined at 779 * compile time and may be used for optimization. 780 */ 781 static __rte_always_inline void 782 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 783 struct mlx5_txq_local *restrict loc __rte_unused, 784 struct mlx5_wqe *restrict wqe, 785 unsigned int wci, 786 unsigned int olx __rte_unused) 787 { 788 struct mlx5_wqe_qseg *qs; 789 790 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 791 qs->max_index = rte_cpu_to_be_32(wci); 792 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 793 qs->reserved0 = RTE_BE32(0); 794 qs->reserved1 = RTE_BE32(0); 795 } 796 797 /** 798 * Build the Ethernet Segment without inlined data. 799 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 800 * 801 * @param txq 802 * Pointer to TX queue structure. 803 * @param loc 804 * Pointer to burst routine local context. 805 * @param wqe 806 * Pointer to WQE to fill with built Ethernet Segment. 807 * @param olx 808 * Configured Tx offloads mask. It is fully defined at 809 * compile time and may be used for optimization. 810 */ 811 static __rte_always_inline void 812 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 813 struct mlx5_txq_local *__rte_restrict loc, 814 struct mlx5_wqe *__rte_restrict wqe, 815 unsigned int olx) 816 { 817 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 818 uint32_t csum; 819 820 /* 821 * Calculate and set check sum flags first, dword field 822 * in segment may be shared with Software Parser flags. 823 */ 824 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 825 es->flags = rte_cpu_to_le_32(csum); 826 /* 827 * Calculate and set Software Parser offsets and flags. 828 * These flags a set for custom UDP and IP tunnel packets. 829 */ 830 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 831 /* Fill metadata field if needed. */ 832 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 833 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 834 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 835 0 : 0; 836 /* Engage VLAN tag insertion feature if requested. */ 837 if (MLX5_TXOFF_CONFIG(VLAN) && 838 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 839 /* 840 * We should get here only if device support 841 * this feature correctly. 842 */ 843 MLX5_ASSERT(txq->vlan_en); 844 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 845 loc->mbuf->vlan_tci); 846 } else { 847 es->inline_hdr = RTE_BE32(0); 848 } 849 } 850 851 /** 852 * Build the Ethernet Segment with minimal inlined data 853 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 854 * used to fill the gap in single WQEBB WQEs. 855 * Supports Software Parser, Checksums and VLAN 856 * insertion Tx offload features. 857 * 858 * @param txq 859 * Pointer to TX queue structure. 860 * @param loc 861 * Pointer to burst routine local context. 862 * @param wqe 863 * Pointer to WQE to fill with built Ethernet Segment. 864 * @param vlan 865 * Length of VLAN tag insertion if any. 866 * @param olx 867 * Configured Tx offloads mask. It is fully defined at 868 * compile time and may be used for optimization. 869 */ 870 static __rte_always_inline void 871 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 872 struct mlx5_txq_local *__rte_restrict loc, 873 struct mlx5_wqe *__rte_restrict wqe, 874 unsigned int vlan, 875 unsigned int olx) 876 { 877 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 878 uint32_t csum; 879 uint8_t *psrc, *pdst; 880 881 /* 882 * Calculate and set check sum flags first, dword field 883 * in segment may be shared with Software Parser flags. 884 */ 885 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 886 es->flags = rte_cpu_to_le_32(csum); 887 /* 888 * Calculate and set Software Parser offsets and flags. 889 * These flags a set for custom UDP and IP tunnel packets. 890 */ 891 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 892 /* Fill metadata field if needed. */ 893 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 894 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 895 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 896 0 : 0; 897 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 898 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 899 es->inline_data = *(unaligned_uint16_t *)psrc; 900 psrc += sizeof(uint16_t); 901 pdst = (uint8_t *)(es + 1); 902 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 903 /* Implement VLAN tag insertion as part inline data. */ 904 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 905 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 906 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 907 /* Insert VLAN ethertype + VLAN tag. */ 908 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 909 ((RTE_ETHER_TYPE_VLAN << 16) | 910 loc->mbuf->vlan_tci); 911 pdst += sizeof(struct rte_vlan_hdr); 912 /* Copy the rest two bytes from packet data. */ 913 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 914 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 915 } else { 916 /* Fill the gap in the title WQEBB with inline data. */ 917 rte_mov16(pdst, psrc); 918 } 919 } 920 921 /** 922 * Build the Ethernet Segment with entire packet data inlining. Checks the 923 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 924 * Checksums and VLAN insertion Tx offload features. 925 * 926 * @param txq 927 * Pointer to TX queue structure. 928 * @param loc 929 * Pointer to burst routine local context. 930 * @param wqe 931 * Pointer to WQE to fill with built Ethernet Segment. 932 * @param vlan 933 * Length of VLAN tag insertion if any. 934 * @param inlen 935 * Length of data to inline (VLAN included, if any). 936 * @param tso 937 * TSO flag, set mss field from the packet. 938 * @param olx 939 * Configured Tx offloads mask. It is fully defined at 940 * compile time and may be used for optimization. 941 * 942 * @return 943 * Pointer to the next Data Segment (aligned and wrapped around). 944 */ 945 static __rte_always_inline struct mlx5_wqe_dseg * 946 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 947 struct mlx5_txq_local *__rte_restrict loc, 948 struct mlx5_wqe *__rte_restrict wqe, 949 unsigned int vlan, 950 unsigned int inlen, 951 unsigned int tso, 952 unsigned int olx) 953 { 954 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 955 uint32_t csum; 956 uint8_t *psrc, *pdst; 957 unsigned int part; 958 959 /* 960 * Calculate and set check sum flags first, dword field 961 * in segment may be shared with Software Parser flags. 962 */ 963 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 964 if (tso) { 965 csum <<= 24; 966 csum |= loc->mbuf->tso_segsz; 967 es->flags = rte_cpu_to_be_32(csum); 968 } else { 969 es->flags = rte_cpu_to_le_32(csum); 970 } 971 /* 972 * Calculate and set Software Parser offsets and flags. 973 * These flags a set for custom UDP and IP tunnel packets. 974 */ 975 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 976 /* Fill metadata field if needed. */ 977 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 978 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 979 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 980 0 : 0; 981 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 982 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 983 es->inline_data = *(unaligned_uint16_t *)psrc; 984 psrc += sizeof(uint16_t); 985 pdst = (uint8_t *)(es + 1); 986 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 987 /* Implement VLAN tag insertion as part inline data. */ 988 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 989 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 990 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 991 /* Insert VLAN ethertype + VLAN tag. */ 992 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 993 ((RTE_ETHER_TYPE_VLAN << 16) | 994 loc->mbuf->vlan_tci); 995 pdst += sizeof(struct rte_vlan_hdr); 996 /* Copy the rest two bytes from packet data. */ 997 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 998 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 999 psrc += sizeof(uint16_t); 1000 } else { 1001 /* Fill the gap in the title WQEBB with inline data. */ 1002 rte_mov16(pdst, psrc); 1003 psrc += sizeof(rte_v128u32_t); 1004 } 1005 pdst = (uint8_t *)(es + 2); 1006 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1007 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1008 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1009 if (!inlen) { 1010 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1011 return (struct mlx5_wqe_dseg *)pdst; 1012 } 1013 /* 1014 * The WQEBB space availability is checked by caller. 1015 * Here we should be aware of WQE ring buffer wraparound only. 1016 */ 1017 part = (uint8_t *)txq->wqes_end - pdst; 1018 part = RTE_MIN(part, inlen); 1019 do { 1020 rte_memcpy(pdst, psrc, part); 1021 inlen -= part; 1022 if (likely(!inlen)) { 1023 /* 1024 * If return value is not used by the caller 1025 * the code below will be optimized out. 1026 */ 1027 pdst += part; 1028 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1029 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1030 pdst = (uint8_t *)txq->wqes; 1031 return (struct mlx5_wqe_dseg *)pdst; 1032 } 1033 pdst = (uint8_t *)txq->wqes; 1034 psrc += part; 1035 part = inlen; 1036 } while (true); 1037 } 1038 1039 /** 1040 * Copy data from chain of mbuf to the specified linear buffer. 1041 * Checksums and VLAN insertion Tx offload features. If data 1042 * from some mbuf copied completely this mbuf is freed. Local 1043 * structure is used to keep the byte stream state. 1044 * 1045 * @param pdst 1046 * Pointer to the destination linear buffer. 1047 * @param loc 1048 * Pointer to burst routine local context. 1049 * @param len 1050 * Length of data to be copied. 1051 * @param must 1052 * Length of data to be copied ignoring no inline hint. 1053 * @param olx 1054 * Configured Tx offloads mask. It is fully defined at 1055 * compile time and may be used for optimization. 1056 * 1057 * @return 1058 * Number of actual copied data bytes. This is always greater than or 1059 * equal to must parameter and might be lesser than len in no inline 1060 * hint flag is encountered. 1061 */ 1062 static __rte_always_inline unsigned int 1063 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1064 struct mlx5_txq_local *__rte_restrict loc, 1065 unsigned int len, 1066 unsigned int must, 1067 unsigned int olx __rte_unused) 1068 { 1069 struct rte_mbuf *mbuf; 1070 unsigned int part, dlen, copy = 0; 1071 uint8_t *psrc; 1072 1073 MLX5_ASSERT(len); 1074 MLX5_ASSERT(must <= len); 1075 do { 1076 /* Allow zero length packets, must check first. */ 1077 dlen = rte_pktmbuf_data_len(loc->mbuf); 1078 if (dlen <= loc->mbuf_off) { 1079 /* Exhausted packet, just free. */ 1080 mbuf = loc->mbuf; 1081 loc->mbuf = mbuf->next; 1082 rte_pktmbuf_free_seg(mbuf); 1083 loc->mbuf_off = 0; 1084 MLX5_ASSERT(loc->mbuf_nseg > 1); 1085 MLX5_ASSERT(loc->mbuf); 1086 --loc->mbuf_nseg; 1087 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1088 unsigned int diff; 1089 1090 if (copy >= must) { 1091 /* 1092 * We already copied the minimal 1093 * requested amount of data. 1094 */ 1095 return copy; 1096 } 1097 diff = must - copy; 1098 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1099 /* 1100 * Copy only the minimal required 1101 * part of the data buffer. 1102 */ 1103 len = diff; 1104 } 1105 } 1106 continue; 1107 } 1108 dlen -= loc->mbuf_off; 1109 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1110 loc->mbuf_off); 1111 part = RTE_MIN(len, dlen); 1112 rte_memcpy(pdst, psrc, part); 1113 copy += part; 1114 loc->mbuf_off += part; 1115 len -= part; 1116 if (!len) { 1117 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1118 loc->mbuf_off = 0; 1119 /* Exhausted packet, just free. */ 1120 mbuf = loc->mbuf; 1121 loc->mbuf = mbuf->next; 1122 rte_pktmbuf_free_seg(mbuf); 1123 loc->mbuf_off = 0; 1124 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1125 --loc->mbuf_nseg; 1126 } 1127 return copy; 1128 } 1129 pdst += part; 1130 } while (true); 1131 } 1132 1133 /** 1134 * Build the Ethernet Segment with inlined data from multi-segment packet. 1135 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1136 * Parser, Checksums and VLAN insertion Tx offload features. 1137 * 1138 * @param txq 1139 * Pointer to TX queue structure. 1140 * @param loc 1141 * Pointer to burst routine local context. 1142 * @param wqe 1143 * Pointer to WQE to fill with built Ethernet Segment. 1144 * @param vlan 1145 * Length of VLAN tag insertion if any. 1146 * @param inlen 1147 * Length of data to inline (VLAN included, if any). 1148 * @param tso 1149 * TSO flag, set mss field from the packet. 1150 * @param olx 1151 * Configured Tx offloads mask. It is fully defined at 1152 * compile time and may be used for optimization. 1153 * 1154 * @return 1155 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1156 * around - caller should do wrapping check on its own). 1157 */ 1158 static __rte_always_inline struct mlx5_wqe_dseg * 1159 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1160 struct mlx5_txq_local *__rte_restrict loc, 1161 struct mlx5_wqe *__rte_restrict wqe, 1162 unsigned int vlan, 1163 unsigned int inlen, 1164 unsigned int tso, 1165 unsigned int olx) 1166 { 1167 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1168 uint32_t csum; 1169 uint8_t *pdst; 1170 unsigned int part, tlen = 0; 1171 1172 /* 1173 * Calculate and set check sum flags first, uint32_t field 1174 * in segment may be shared with Software Parser flags. 1175 */ 1176 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1177 if (tso) { 1178 csum <<= 24; 1179 csum |= loc->mbuf->tso_segsz; 1180 es->flags = rte_cpu_to_be_32(csum); 1181 } else { 1182 es->flags = rte_cpu_to_le_32(csum); 1183 } 1184 /* 1185 * Calculate and set Software Parser offsets and flags. 1186 * These flags a set for custom UDP and IP tunnel packets. 1187 */ 1188 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1189 /* Fill metadata field if needed. */ 1190 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1191 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1192 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1193 0 : 0; 1194 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1195 pdst = (uint8_t *)&es->inline_data; 1196 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1197 /* Implement VLAN tag insertion as part inline data. */ 1198 mlx5_tx_mseg_memcpy(pdst, loc, 1199 2 * RTE_ETHER_ADDR_LEN, 1200 2 * RTE_ETHER_ADDR_LEN, olx); 1201 pdst += 2 * RTE_ETHER_ADDR_LEN; 1202 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1203 ((RTE_ETHER_TYPE_VLAN << 16) | 1204 loc->mbuf->vlan_tci); 1205 pdst += sizeof(struct rte_vlan_hdr); 1206 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1207 } 1208 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1209 /* 1210 * The WQEBB space availability is checked by caller. 1211 * Here we should be aware of WQE ring buffer wraparound only. 1212 */ 1213 part = (uint8_t *)txq->wqes_end - pdst; 1214 part = RTE_MIN(part, inlen - tlen); 1215 MLX5_ASSERT(part); 1216 do { 1217 unsigned int copy; 1218 1219 /* 1220 * Copying may be interrupted inside the routine 1221 * if run into no inline hint flag. 1222 */ 1223 copy = tso ? inlen : txq->inlen_mode; 1224 copy = tlen >= copy ? 0 : (copy - tlen); 1225 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1226 tlen += copy; 1227 if (likely(inlen <= tlen) || copy < part) { 1228 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1229 pdst += copy; 1230 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1231 return (struct mlx5_wqe_dseg *)pdst; 1232 } 1233 pdst = (uint8_t *)txq->wqes; 1234 part = inlen - tlen; 1235 } while (true); 1236 } 1237 1238 /** 1239 * Build the Data Segment of pointer type. 1240 * 1241 * @param txq 1242 * Pointer to TX queue structure. 1243 * @param loc 1244 * Pointer to burst routine local context. 1245 * @param dseg 1246 * Pointer to WQE to fill with built Data Segment. 1247 * @param buf 1248 * Data buffer to point. 1249 * @param len 1250 * Data buffer length. 1251 * @param olx 1252 * Configured Tx offloads mask. It is fully defined at 1253 * compile time and may be used for optimization. 1254 */ 1255 static __rte_always_inline void 1256 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1257 struct mlx5_txq_local *__rte_restrict loc, 1258 struct mlx5_wqe_dseg *__rte_restrict dseg, 1259 uint8_t *buf, 1260 unsigned int len, 1261 unsigned int olx __rte_unused) 1262 1263 { 1264 MLX5_ASSERT(len); 1265 dseg->bcount = rte_cpu_to_be_32(len); 1266 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1267 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1268 } 1269 1270 /** 1271 * Build the Data Segment of pointer type or inline if data length is less than 1272 * buffer in minimal Data Segment size. 1273 * 1274 * @param txq 1275 * Pointer to TX queue structure. 1276 * @param loc 1277 * Pointer to burst routine local context. 1278 * @param dseg 1279 * Pointer to WQE to fill with built Data Segment. 1280 * @param buf 1281 * Data buffer to point. 1282 * @param len 1283 * Data buffer length. 1284 * @param olx 1285 * Configured Tx offloads mask. It is fully defined at 1286 * compile time and may be used for optimization. 1287 */ 1288 static __rte_always_inline void 1289 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1290 struct mlx5_txq_local *__rte_restrict loc, 1291 struct mlx5_wqe_dseg *__rte_restrict dseg, 1292 uint8_t *buf, 1293 unsigned int len, 1294 unsigned int olx __rte_unused) 1295 1296 { 1297 uintptr_t dst, src; 1298 1299 MLX5_ASSERT(len); 1300 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1301 dseg->bcount = rte_cpu_to_be_32(len); 1302 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1303 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1304 1305 return; 1306 } 1307 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1308 /* Unrolled implementation of generic rte_memcpy. */ 1309 dst = (uintptr_t)&dseg->inline_data[0]; 1310 src = (uintptr_t)buf; 1311 if (len & 0x08) { 1312 #ifdef RTE_ARCH_STRICT_ALIGN 1313 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1314 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1315 dst += sizeof(uint32_t); 1316 src += sizeof(uint32_t); 1317 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1318 dst += sizeof(uint32_t); 1319 src += sizeof(uint32_t); 1320 #else 1321 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1322 dst += sizeof(uint64_t); 1323 src += sizeof(uint64_t); 1324 #endif 1325 } 1326 if (len & 0x04) { 1327 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1328 dst += sizeof(uint32_t); 1329 src += sizeof(uint32_t); 1330 } 1331 if (len & 0x02) { 1332 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1333 dst += sizeof(uint16_t); 1334 src += sizeof(uint16_t); 1335 } 1336 if (len & 0x01) 1337 *(uint8_t *)dst = *(uint8_t *)src; 1338 } 1339 1340 /** 1341 * Build the Data Segment of inlined data from single 1342 * segment packet, no VLAN insertion. 1343 * 1344 * @param txq 1345 * Pointer to TX queue structure. 1346 * @param loc 1347 * Pointer to burst routine local context. 1348 * @param dseg 1349 * Pointer to WQE to fill with built Data Segment. 1350 * @param buf 1351 * Data buffer to point. 1352 * @param len 1353 * Data buffer length. 1354 * @param olx 1355 * Configured Tx offloads mask. It is fully defined at 1356 * compile time and may be used for optimization. 1357 * 1358 * @return 1359 * Pointer to the next Data Segment after inlined data. 1360 * Ring buffer wraparound check is needed. We do not do it here because it 1361 * may not be needed for the last packet in the eMPW session. 1362 */ 1363 static __rte_always_inline struct mlx5_wqe_dseg * 1364 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1365 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1366 struct mlx5_wqe_dseg *__rte_restrict dseg, 1367 uint8_t *buf, 1368 unsigned int len, 1369 unsigned int olx __rte_unused) 1370 { 1371 unsigned int part; 1372 uint8_t *pdst; 1373 1374 if (!MLX5_TXOFF_CONFIG(MPW)) { 1375 /* Store the descriptor byte counter for eMPW sessions. */ 1376 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1377 pdst = &dseg->inline_data[0]; 1378 } else { 1379 /* The entire legacy MPW session counter is stored on close. */ 1380 pdst = (uint8_t *)dseg; 1381 } 1382 /* 1383 * The WQEBB space availability is checked by caller. 1384 * Here we should be aware of WQE ring buffer wraparound only. 1385 */ 1386 part = (uint8_t *)txq->wqes_end - pdst; 1387 part = RTE_MIN(part, len); 1388 do { 1389 rte_memcpy(pdst, buf, part); 1390 len -= part; 1391 if (likely(!len)) { 1392 pdst += part; 1393 if (!MLX5_TXOFF_CONFIG(MPW)) 1394 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1395 /* Note: no final wraparound check here. */ 1396 return (struct mlx5_wqe_dseg *)pdst; 1397 } 1398 pdst = (uint8_t *)txq->wqes; 1399 buf += part; 1400 part = len; 1401 } while (true); 1402 } 1403 1404 /** 1405 * Build the Data Segment of inlined data from single 1406 * segment packet with VLAN insertion. 1407 * 1408 * @param txq 1409 * Pointer to TX queue structure. 1410 * @param loc 1411 * Pointer to burst routine local context. 1412 * @param dseg 1413 * Pointer to the dseg fill with built Data Segment. 1414 * @param buf 1415 * Data buffer to point. 1416 * @param len 1417 * Data buffer length. 1418 * @param olx 1419 * Configured Tx offloads mask. It is fully defined at 1420 * compile time and may be used for optimization. 1421 * 1422 * @return 1423 * Pointer to the next Data Segment after inlined data. 1424 * Ring buffer wraparound check is needed. 1425 */ 1426 static __rte_always_inline struct mlx5_wqe_dseg * 1427 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1428 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1429 struct mlx5_wqe_dseg *__rte_restrict dseg, 1430 uint8_t *buf, 1431 unsigned int len, 1432 unsigned int olx __rte_unused) 1433 1434 { 1435 unsigned int part; 1436 uint8_t *pdst; 1437 1438 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1439 if (!MLX5_TXOFF_CONFIG(MPW)) { 1440 /* Store the descriptor byte counter for eMPW sessions. */ 1441 dseg->bcount = rte_cpu_to_be_32 1442 ((len + sizeof(struct rte_vlan_hdr)) | 1443 MLX5_ETH_WQE_DATA_INLINE); 1444 pdst = &dseg->inline_data[0]; 1445 } else { 1446 /* The entire legacy MPW session counter is stored on close. */ 1447 pdst = (uint8_t *)dseg; 1448 } 1449 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1450 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1451 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1452 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1453 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1454 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1455 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1456 pdst = (uint8_t *)txq->wqes; 1457 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1458 loc->mbuf->vlan_tci); 1459 pdst += sizeof(struct rte_vlan_hdr); 1460 /* 1461 * The WQEBB space availability is checked by caller. 1462 * Here we should be aware of WQE ring buffer wraparound only. 1463 */ 1464 part = (uint8_t *)txq->wqes_end - pdst; 1465 part = RTE_MIN(part, len); 1466 do { 1467 rte_memcpy(pdst, buf, part); 1468 len -= part; 1469 if (likely(!len)) { 1470 pdst += part; 1471 if (!MLX5_TXOFF_CONFIG(MPW)) 1472 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1473 /* Note: no final wraparound check here. */ 1474 return (struct mlx5_wqe_dseg *)pdst; 1475 } 1476 pdst = (uint8_t *)txq->wqes; 1477 buf += part; 1478 part = len; 1479 } while (true); 1480 } 1481 1482 /** 1483 * Build the Ethernet Segment with optionally inlined data with 1484 * VLAN insertion and following Data Segments (if any) from 1485 * multi-segment packet. Used by ordinary send and TSO. 1486 * 1487 * @param txq 1488 * Pointer to TX queue structure. 1489 * @param loc 1490 * Pointer to burst routine local context. 1491 * @param wqe 1492 * Pointer to WQE to fill with built Ethernet/Data Segments. 1493 * @param vlan 1494 * Length of VLAN header to insert, 0 means no VLAN insertion. 1495 * @param inlen 1496 * Data length to inline. For TSO this parameter specifies exact value, 1497 * for ordinary send routine can be aligned by caller to provide better WQE 1498 * space saving and data buffer start address alignment. 1499 * This length includes VLAN header being inserted. 1500 * @param tso 1501 * Zero means ordinary send, inlined data can be extended, 1502 * otherwise this is TSO, inlined data length is fixed. 1503 * @param olx 1504 * Configured Tx offloads mask. It is fully defined at 1505 * compile time and may be used for optimization. 1506 * 1507 * @return 1508 * Actual size of built WQE in segments. 1509 */ 1510 static __rte_always_inline unsigned int 1511 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1512 struct mlx5_txq_local *__rte_restrict loc, 1513 struct mlx5_wqe *__rte_restrict wqe, 1514 unsigned int vlan, 1515 unsigned int inlen, 1516 unsigned int tso, 1517 unsigned int olx __rte_unused) 1518 { 1519 struct mlx5_wqe_dseg *__rte_restrict dseg; 1520 unsigned int ds; 1521 1522 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1523 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1524 loc->mbuf_off = 0; 1525 1526 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1527 if (!loc->mbuf_nseg) 1528 goto dseg_done; 1529 /* 1530 * There are still some mbuf remaining, not inlined. 1531 * The first mbuf may be partially inlined and we 1532 * must process the possible non-zero data offset. 1533 */ 1534 if (loc->mbuf_off) { 1535 unsigned int dlen; 1536 uint8_t *dptr; 1537 1538 /* 1539 * Exhausted packets must be dropped before. 1540 * Non-zero offset means there are some data 1541 * remained in the packet. 1542 */ 1543 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1544 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1545 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1546 loc->mbuf_off); 1547 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1548 /* 1549 * Build the pointer/minimal Data Segment. 1550 * Do ring buffer wrapping check in advance. 1551 */ 1552 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1553 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1554 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1555 /* Store the mbuf to be freed on completion. */ 1556 MLX5_ASSERT(loc->elts_free); 1557 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1558 --loc->elts_free; 1559 ++dseg; 1560 if (--loc->mbuf_nseg == 0) 1561 goto dseg_done; 1562 loc->mbuf = loc->mbuf->next; 1563 loc->mbuf_off = 0; 1564 } 1565 do { 1566 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1567 struct rte_mbuf *mbuf; 1568 1569 /* Zero length segment found, just skip. */ 1570 mbuf = loc->mbuf; 1571 loc->mbuf = loc->mbuf->next; 1572 rte_pktmbuf_free_seg(mbuf); 1573 if (--loc->mbuf_nseg == 0) 1574 break; 1575 } else { 1576 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1577 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1578 mlx5_tx_dseg_iptr 1579 (txq, loc, dseg, 1580 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1581 rte_pktmbuf_data_len(loc->mbuf), olx); 1582 MLX5_ASSERT(loc->elts_free); 1583 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1584 --loc->elts_free; 1585 ++dseg; 1586 if (--loc->mbuf_nseg == 0) 1587 break; 1588 loc->mbuf = loc->mbuf->next; 1589 } 1590 } while (true); 1591 1592 dseg_done: 1593 /* Calculate actual segments used from the dseg pointer. */ 1594 if ((uintptr_t)wqe < (uintptr_t)dseg) 1595 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1596 else 1597 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1598 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1599 return ds; 1600 } 1601 1602 /** 1603 * The routine checks timestamp flag in the current packet, 1604 * and push WAIT WQE into the queue if scheduling is required. 1605 * 1606 * @param txq 1607 * Pointer to TX queue structure. 1608 * @param loc 1609 * Pointer to burst routine local context. 1610 * @param olx 1611 * Configured Tx offloads mask. It is fully defined at 1612 * compile time and may be used for optimization. 1613 * 1614 * @return 1615 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1616 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1617 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1618 * Local context variables partially updated. 1619 */ 1620 static __rte_always_inline enum mlx5_txcmp_code 1621 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1622 struct mlx5_txq_local *restrict loc, 1623 unsigned int olx) 1624 { 1625 if (MLX5_TXOFF_CONFIG(TXPP) && 1626 loc->mbuf->ol_flags & txq->ts_mask) { 1627 struct mlx5_wqe *wqe; 1628 uint64_t ts; 1629 int32_t wci; 1630 1631 /* 1632 * Estimate the required space quickly and roughly. 1633 * We would like to ensure the packet can be pushed 1634 * to the queue and we won't get the orphan WAIT WQE. 1635 */ 1636 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1637 loc->elts_free < NB_SEGS(loc->mbuf)) 1638 return MLX5_TXCMP_CODE_EXIT; 1639 /* Convert the timestamp into completion to wait. */ 1640 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1641 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 1642 if (unlikely(wci < 0)) 1643 return MLX5_TXCMP_CODE_SINGLE; 1644 /* Build the WAIT WQE with specified completion. */ 1645 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1646 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 1647 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 1648 ++txq->wqe_ci; 1649 --loc->wqe_free; 1650 return MLX5_TXCMP_CODE_MULTI; 1651 } 1652 return MLX5_TXCMP_CODE_SINGLE; 1653 } 1654 1655 /** 1656 * Tx one packet function for multi-segment TSO. Supports all 1657 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1658 * sends one packet per WQE. 1659 * 1660 * This routine is responsible for storing processed mbuf 1661 * into elts ring buffer and update elts_head. 1662 * 1663 * @param txq 1664 * Pointer to TX queue structure. 1665 * @param loc 1666 * Pointer to burst routine local context. 1667 * @param olx 1668 * Configured Tx offloads mask. It is fully defined at 1669 * compile time and may be used for optimization. 1670 * 1671 * @return 1672 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1673 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1674 * Local context variables partially updated. 1675 */ 1676 static __rte_always_inline enum mlx5_txcmp_code 1677 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1678 struct mlx5_txq_local *__rte_restrict loc, 1679 unsigned int olx) 1680 { 1681 struct mlx5_wqe *__rte_restrict wqe; 1682 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1683 1684 if (MLX5_TXOFF_CONFIG(TXPP)) { 1685 enum mlx5_txcmp_code wret; 1686 1687 /* Generate WAIT for scheduling if requested. */ 1688 wret = mlx5_tx_schedule_send(txq, loc, olx); 1689 if (wret == MLX5_TXCMP_CODE_EXIT) 1690 return MLX5_TXCMP_CODE_EXIT; 1691 if (wret == MLX5_TXCMP_CODE_ERROR) 1692 return MLX5_TXCMP_CODE_ERROR; 1693 } 1694 /* 1695 * Calculate data length to be inlined to estimate 1696 * the required space in WQE ring buffer. 1697 */ 1698 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1699 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1700 vlan = sizeof(struct rte_vlan_hdr); 1701 inlen = loc->mbuf->l2_len + vlan + 1702 loc->mbuf->l3_len + loc->mbuf->l4_len; 1703 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1704 return MLX5_TXCMP_CODE_ERROR; 1705 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1706 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1707 /* Packet must contain all TSO headers. */ 1708 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1709 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1710 inlen > (dlen + vlan))) 1711 return MLX5_TXCMP_CODE_ERROR; 1712 MLX5_ASSERT(inlen >= txq->inlen_mode); 1713 /* 1714 * Check whether there are enough free WQEBBs: 1715 * - Control Segment 1716 * - Ethernet Segment 1717 * - First Segment of inlined Ethernet data 1718 * - ... data continued ... 1719 * - Data Segments of pointer/min inline type 1720 */ 1721 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1722 MLX5_ESEG_MIN_INLINE_SIZE + 1723 MLX5_WSEG_SIZE + 1724 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1725 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1726 return MLX5_TXCMP_CODE_EXIT; 1727 /* Check for maximal WQE size. */ 1728 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1729 return MLX5_TXCMP_CODE_ERROR; 1730 #ifdef MLX5_PMD_SOFT_COUNTERS 1731 /* Update sent data bytes/packets counters. */ 1732 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1733 loc->mbuf->tso_segsz; 1734 /* 1735 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1736 * from loc->pkts_sent field. 1737 */ 1738 --ntcp; 1739 txq->stats.opackets += ntcp; 1740 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1741 #endif 1742 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1743 loc->wqe_last = wqe; 1744 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1745 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1746 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1747 txq->wqe_ci += (ds + 3) / 4; 1748 loc->wqe_free -= (ds + 3) / 4; 1749 return MLX5_TXCMP_CODE_MULTI; 1750 } 1751 1752 /** 1753 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1754 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1755 * without any data inlining in Ethernet Segment. 1756 * 1757 * This routine is responsible for storing processed mbuf 1758 * into elts ring buffer and update elts_head. 1759 * 1760 * @param txq 1761 * Pointer to TX queue structure. 1762 * @param loc 1763 * Pointer to burst routine local context. 1764 * @param olx 1765 * Configured Tx offloads mask. It is fully defined at 1766 * compile time and may be used for optimization. 1767 * 1768 * @return 1769 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1770 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1771 * Local context variables partially updated. 1772 */ 1773 static __rte_always_inline enum mlx5_txcmp_code 1774 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1775 struct mlx5_txq_local *__rte_restrict loc, 1776 unsigned int olx) 1777 { 1778 struct mlx5_wqe_dseg *__rte_restrict dseg; 1779 struct mlx5_wqe *__rte_restrict wqe; 1780 unsigned int ds, nseg; 1781 1782 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1783 if (MLX5_TXOFF_CONFIG(TXPP)) { 1784 enum mlx5_txcmp_code wret; 1785 1786 /* Generate WAIT for scheduling if requested. */ 1787 wret = mlx5_tx_schedule_send(txq, loc, olx); 1788 if (wret == MLX5_TXCMP_CODE_EXIT) 1789 return MLX5_TXCMP_CODE_EXIT; 1790 if (wret == MLX5_TXCMP_CODE_ERROR) 1791 return MLX5_TXCMP_CODE_ERROR; 1792 } 1793 /* 1794 * No inline at all, it means the CPU cycles saving is prioritized at 1795 * configuration, we should not copy any packet data to WQE. 1796 */ 1797 nseg = NB_SEGS(loc->mbuf); 1798 ds = 2 + nseg; 1799 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1800 return MLX5_TXCMP_CODE_EXIT; 1801 /* Check for maximal WQE size. */ 1802 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1803 return MLX5_TXCMP_CODE_ERROR; 1804 /* 1805 * Some Tx offloads may cause an error if packet is not long enough, 1806 * check against assumed minimal length. 1807 */ 1808 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1809 return MLX5_TXCMP_CODE_ERROR; 1810 #ifdef MLX5_PMD_SOFT_COUNTERS 1811 /* Update sent data bytes counter. */ 1812 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1813 if (MLX5_TXOFF_CONFIG(VLAN) && 1814 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1815 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1816 #endif 1817 /* 1818 * SEND WQE, one WQEBB: 1819 * - Control Segment, SEND opcode 1820 * - Ethernet Segment, optional VLAN, no inline 1821 * - Data Segments, pointer only type 1822 */ 1823 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1824 loc->wqe_last = wqe; 1825 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1826 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1827 dseg = &wqe->dseg[0]; 1828 do { 1829 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1830 struct rte_mbuf *mbuf; 1831 1832 /* 1833 * Zero length segment found, have to correct total 1834 * size of WQE in segments. 1835 * It is supposed to be rare occasion, so in normal 1836 * case (no zero length segments) we avoid extra 1837 * writing to the Control Segment. 1838 */ 1839 --ds; 1840 wqe->cseg.sq_ds -= RTE_BE32(1); 1841 mbuf = loc->mbuf; 1842 loc->mbuf = mbuf->next; 1843 rte_pktmbuf_free_seg(mbuf); 1844 if (--nseg == 0) 1845 break; 1846 } else { 1847 mlx5_tx_dseg_ptr 1848 (txq, loc, dseg, 1849 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1850 rte_pktmbuf_data_len(loc->mbuf), olx); 1851 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1852 --loc->elts_free; 1853 if (--nseg == 0) 1854 break; 1855 ++dseg; 1856 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1857 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1858 loc->mbuf = loc->mbuf->next; 1859 } 1860 } while (true); 1861 txq->wqe_ci += (ds + 3) / 4; 1862 loc->wqe_free -= (ds + 3) / 4; 1863 return MLX5_TXCMP_CODE_MULTI; 1864 } 1865 1866 /** 1867 * Tx one packet function for multi-segment SEND. Supports all 1868 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1869 * sends one packet per WQE, with data inlining in 1870 * Ethernet Segment and minimal Data Segments. 1871 * 1872 * This routine is responsible for storing processed mbuf 1873 * into elts ring buffer and update elts_head. 1874 * 1875 * @param txq 1876 * Pointer to TX queue structure. 1877 * @param loc 1878 * Pointer to burst routine local context. 1879 * @param olx 1880 * Configured Tx offloads mask. It is fully defined at 1881 * compile time and may be used for optimization. 1882 * 1883 * @return 1884 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1885 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1886 * Local context variables partially updated. 1887 */ 1888 static __rte_always_inline enum mlx5_txcmp_code 1889 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 1890 struct mlx5_txq_local *__rte_restrict loc, 1891 unsigned int olx) 1892 { 1893 struct mlx5_wqe *__rte_restrict wqe; 1894 unsigned int ds, inlen, dlen, vlan = 0; 1895 1896 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 1897 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1898 if (MLX5_TXOFF_CONFIG(TXPP)) { 1899 enum mlx5_txcmp_code wret; 1900 1901 /* Generate WAIT for scheduling if requested. */ 1902 wret = mlx5_tx_schedule_send(txq, loc, olx); 1903 if (wret == MLX5_TXCMP_CODE_EXIT) 1904 return MLX5_TXCMP_CODE_EXIT; 1905 if (wret == MLX5_TXCMP_CODE_ERROR) 1906 return MLX5_TXCMP_CODE_ERROR; 1907 } 1908 /* 1909 * First calculate data length to be inlined 1910 * to estimate the required space for WQE. 1911 */ 1912 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1913 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1914 vlan = sizeof(struct rte_vlan_hdr); 1915 inlen = dlen + vlan; 1916 /* Check against minimal length. */ 1917 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 1918 return MLX5_TXCMP_CODE_ERROR; 1919 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1920 if (inlen > txq->inlen_send || 1921 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1922 struct rte_mbuf *mbuf; 1923 unsigned int nxlen; 1924 uintptr_t start; 1925 1926 mbuf = loc->mbuf; 1927 nxlen = rte_pktmbuf_data_len(mbuf); 1928 /* 1929 * Packet length exceeds the allowed inline data length, 1930 * check whether the minimal inlining is required. 1931 */ 1932 if (txq->inlen_mode) { 1933 MLX5_ASSERT(txq->inlen_mode >= 1934 MLX5_ESEG_MIN_INLINE_SIZE); 1935 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 1936 inlen = txq->inlen_mode; 1937 } else if (vlan && !txq->vlan_en) { 1938 /* 1939 * VLAN insertion is requested and hardware does not 1940 * support the offload, will do with software inline. 1941 */ 1942 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 1943 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 1944 nxlen > txq->inlen_send) { 1945 return mlx5_tx_packet_multi_send(txq, loc, olx); 1946 } else { 1947 goto do_first; 1948 } 1949 /* 1950 * Now we know the minimal amount of data is requested 1951 * to inline. Check whether we should inline the buffers 1952 * from the chain beginning to eliminate some mbufs. 1953 */ 1954 if (unlikely(nxlen <= txq->inlen_send)) { 1955 /* We can inline first mbuf at least. */ 1956 if (nxlen < inlen) { 1957 unsigned int smlen; 1958 1959 /* Scan mbufs till inlen filled. */ 1960 do { 1961 smlen = nxlen; 1962 mbuf = NEXT(mbuf); 1963 MLX5_ASSERT(mbuf); 1964 nxlen = rte_pktmbuf_data_len(mbuf); 1965 nxlen += smlen; 1966 } while (unlikely(nxlen < inlen)); 1967 if (unlikely(nxlen > txq->inlen_send)) { 1968 /* We cannot inline entire mbuf. */ 1969 smlen = inlen - smlen; 1970 start = rte_pktmbuf_mtod_offset 1971 (mbuf, uintptr_t, smlen); 1972 goto do_align; 1973 } 1974 } 1975 do_first: 1976 do { 1977 inlen = nxlen; 1978 mbuf = NEXT(mbuf); 1979 /* There should be not end of packet. */ 1980 MLX5_ASSERT(mbuf); 1981 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 1982 } while (unlikely(nxlen < txq->inlen_send)); 1983 } 1984 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 1985 /* 1986 * Check whether we can do inline to align start 1987 * address of data buffer to cacheline. 1988 */ 1989 do_align: 1990 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 1991 if (unlikely(start)) { 1992 start += inlen; 1993 if (start <= txq->inlen_send) 1994 inlen = start; 1995 } 1996 } 1997 /* 1998 * Check whether there are enough free WQEBBs: 1999 * - Control Segment 2000 * - Ethernet Segment 2001 * - First Segment of inlined Ethernet data 2002 * - ... data continued ... 2003 * - Data Segments of pointer/min inline type 2004 * 2005 * Estimate the number of Data Segments conservatively, 2006 * supposing no any mbufs is being freed during inlining. 2007 */ 2008 MLX5_ASSERT(inlen <= txq->inlen_send); 2009 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2010 MLX5_ESEG_MIN_INLINE_SIZE + 2011 MLX5_WSEG_SIZE + 2012 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2013 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2014 return MLX5_TXCMP_CODE_EXIT; 2015 /* Check for maximal WQE size. */ 2016 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 2017 return MLX5_TXCMP_CODE_ERROR; 2018 #ifdef MLX5_PMD_SOFT_COUNTERS 2019 /* Update sent data bytes/packets counters. */ 2020 txq->stats.obytes += dlen + vlan; 2021 #endif 2022 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2023 loc->wqe_last = wqe; 2024 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2025 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2026 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2027 txq->wqe_ci += (ds + 3) / 4; 2028 loc->wqe_free -= (ds + 3) / 4; 2029 return MLX5_TXCMP_CODE_MULTI; 2030 } 2031 2032 /** 2033 * Tx burst function for multi-segment packets. Supports all 2034 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2035 * sends one packet per WQE. Function stops sending if it 2036 * encounters the single-segment packet. 2037 * 2038 * This routine is responsible for storing processed mbuf 2039 * into elts ring buffer and update elts_head. 2040 * 2041 * @param txq 2042 * Pointer to TX queue structure. 2043 * @param[in] pkts 2044 * Packets to transmit. 2045 * @param pkts_n 2046 * Number of packets in array. 2047 * @param loc 2048 * Pointer to burst routine local context. 2049 * @param olx 2050 * Configured Tx offloads mask. It is fully defined at 2051 * compile time and may be used for optimization. 2052 * 2053 * @return 2054 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2055 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2056 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2057 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2058 * Local context variables updated. 2059 */ 2060 static __rte_always_inline enum mlx5_txcmp_code 2061 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2062 struct rte_mbuf **__rte_restrict pkts, 2063 unsigned int pkts_n, 2064 struct mlx5_txq_local *__rte_restrict loc, 2065 unsigned int olx) 2066 { 2067 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2068 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2069 pkts += loc->pkts_sent + 1; 2070 pkts_n -= loc->pkts_sent; 2071 for (;;) { 2072 enum mlx5_txcmp_code ret; 2073 2074 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2075 /* 2076 * Estimate the number of free elts quickly but conservatively. 2077 * Some segment may be fully inlined and freed, 2078 * ignore this here - precise estimation is costly. 2079 */ 2080 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2081 return MLX5_TXCMP_CODE_EXIT; 2082 if (MLX5_TXOFF_CONFIG(TSO) && 2083 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2084 /* Proceed with multi-segment TSO. */ 2085 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2086 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2087 /* Proceed with multi-segment SEND with inlining. */ 2088 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2089 } else { 2090 /* Proceed with multi-segment SEND w/o inlining. */ 2091 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2092 } 2093 if (ret == MLX5_TXCMP_CODE_EXIT) 2094 return MLX5_TXCMP_CODE_EXIT; 2095 if (ret == MLX5_TXCMP_CODE_ERROR) 2096 return MLX5_TXCMP_CODE_ERROR; 2097 /* WQE is built, go to the next packet. */ 2098 ++loc->pkts_sent; 2099 --pkts_n; 2100 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2101 return MLX5_TXCMP_CODE_EXIT; 2102 loc->mbuf = *pkts++; 2103 if (pkts_n > 1) 2104 rte_prefetch0(*pkts); 2105 if (likely(NB_SEGS(loc->mbuf) > 1)) 2106 continue; 2107 /* Here ends the series of multi-segment packets. */ 2108 if (MLX5_TXOFF_CONFIG(TSO) && 2109 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2110 return MLX5_TXCMP_CODE_TSO; 2111 return MLX5_TXCMP_CODE_SINGLE; 2112 } 2113 MLX5_ASSERT(false); 2114 } 2115 2116 /** 2117 * Tx burst function for single-segment packets with TSO. 2118 * Supports all types of Tx offloads, except multi-packets. 2119 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2120 * Function stops sending if it encounters the multi-segment 2121 * packet or packet without TSO requested. 2122 * 2123 * The routine is responsible for storing processed mbuf into elts ring buffer 2124 * and update elts_head if inline offloads is requested due to possible early 2125 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2126 * 2127 * @param txq 2128 * Pointer to TX queue structure. 2129 * @param[in] pkts 2130 * Packets to transmit. 2131 * @param pkts_n 2132 * Number of packets in array. 2133 * @param loc 2134 * Pointer to burst routine local context. 2135 * @param olx 2136 * Configured Tx offloads mask. It is fully defined at 2137 * compile time and may be used for optimization. 2138 * 2139 * @return 2140 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2141 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2142 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2143 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2144 * Local context variables updated. 2145 */ 2146 static __rte_always_inline enum mlx5_txcmp_code 2147 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2148 struct rte_mbuf **__rte_restrict pkts, 2149 unsigned int pkts_n, 2150 struct mlx5_txq_local *__rte_restrict loc, 2151 unsigned int olx) 2152 { 2153 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2154 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2155 pkts += loc->pkts_sent + 1; 2156 pkts_n -= loc->pkts_sent; 2157 for (;;) { 2158 struct mlx5_wqe_dseg *__rte_restrict dseg; 2159 struct mlx5_wqe *__rte_restrict wqe; 2160 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2161 uint8_t *dptr; 2162 2163 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2164 if (MLX5_TXOFF_CONFIG(TXPP)) { 2165 enum mlx5_txcmp_code wret; 2166 2167 /* Generate WAIT for scheduling if requested. */ 2168 wret = mlx5_tx_schedule_send(txq, loc, olx); 2169 if (wret == MLX5_TXCMP_CODE_EXIT) 2170 return MLX5_TXCMP_CODE_EXIT; 2171 if (wret == MLX5_TXCMP_CODE_ERROR) 2172 return MLX5_TXCMP_CODE_ERROR; 2173 } 2174 dlen = rte_pktmbuf_data_len(loc->mbuf); 2175 if (MLX5_TXOFF_CONFIG(VLAN) && 2176 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2177 vlan = sizeof(struct rte_vlan_hdr); 2178 } 2179 /* 2180 * First calculate the WQE size to check 2181 * whether we have enough space in ring buffer. 2182 */ 2183 hlen = loc->mbuf->l2_len + vlan + 2184 loc->mbuf->l3_len + loc->mbuf->l4_len; 2185 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2186 return MLX5_TXCMP_CODE_ERROR; 2187 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2188 hlen += loc->mbuf->outer_l2_len + 2189 loc->mbuf->outer_l3_len; 2190 /* Segment must contain all TSO headers. */ 2191 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2192 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2193 hlen > (dlen + vlan))) 2194 return MLX5_TXCMP_CODE_ERROR; 2195 /* 2196 * Check whether there are enough free WQEBBs: 2197 * - Control Segment 2198 * - Ethernet Segment 2199 * - First Segment of inlined Ethernet data 2200 * - ... data continued ... 2201 * - Finishing Data Segment of pointer type 2202 */ 2203 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2204 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2205 if (loc->wqe_free < ((ds + 3) / 4)) 2206 return MLX5_TXCMP_CODE_EXIT; 2207 #ifdef MLX5_PMD_SOFT_COUNTERS 2208 /* Update sent data bytes/packets counters. */ 2209 ntcp = (dlen + vlan - hlen + 2210 loc->mbuf->tso_segsz - 1) / 2211 loc->mbuf->tso_segsz; 2212 /* 2213 * One will be added for mbuf itself at the end 2214 * of the mlx5_tx_burst from loc->pkts_sent field. 2215 */ 2216 --ntcp; 2217 txq->stats.opackets += ntcp; 2218 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2219 #endif 2220 /* 2221 * Build the TSO WQE: 2222 * - Control Segment 2223 * - Ethernet Segment with hlen bytes inlined 2224 * - Data Segment of pointer type 2225 */ 2226 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2227 loc->wqe_last = wqe; 2228 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2229 MLX5_OPCODE_TSO, olx); 2230 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2231 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2232 dlen -= hlen - vlan; 2233 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2234 /* 2235 * WQE is built, update the loop parameters 2236 * and go to the next packet. 2237 */ 2238 txq->wqe_ci += (ds + 3) / 4; 2239 loc->wqe_free -= (ds + 3) / 4; 2240 if (MLX5_TXOFF_CONFIG(INLINE)) 2241 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2242 --loc->elts_free; 2243 ++loc->pkts_sent; 2244 --pkts_n; 2245 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2246 return MLX5_TXCMP_CODE_EXIT; 2247 loc->mbuf = *pkts++; 2248 if (pkts_n > 1) 2249 rte_prefetch0(*pkts); 2250 if (MLX5_TXOFF_CONFIG(MULTI) && 2251 unlikely(NB_SEGS(loc->mbuf) > 1)) 2252 return MLX5_TXCMP_CODE_MULTI; 2253 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2254 return MLX5_TXCMP_CODE_SINGLE; 2255 /* Continue with the next TSO packet. */ 2256 } 2257 MLX5_ASSERT(false); 2258 } 2259 2260 /** 2261 * Analyze the packet and select the best method to send. 2262 * 2263 * @param txq 2264 * Pointer to TX queue structure. 2265 * @param loc 2266 * Pointer to burst routine local context. 2267 * @param olx 2268 * Configured Tx offloads mask. It is fully defined at 2269 * compile time and may be used for optimization. 2270 * @param newp 2271 * The predefined flag whether do complete check for 2272 * multi-segment packets and TSO. 2273 * 2274 * @return 2275 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2276 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2277 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2278 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2279 */ 2280 static __rte_always_inline enum mlx5_txcmp_code 2281 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2282 struct mlx5_txq_local *__rte_restrict loc, 2283 unsigned int olx, 2284 bool newp) 2285 { 2286 /* Check for multi-segment packet. */ 2287 if (newp && 2288 MLX5_TXOFF_CONFIG(MULTI) && 2289 unlikely(NB_SEGS(loc->mbuf) > 1)) 2290 return MLX5_TXCMP_CODE_MULTI; 2291 /* Check for TSO packet. */ 2292 if (newp && 2293 MLX5_TXOFF_CONFIG(TSO) && 2294 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2295 return MLX5_TXCMP_CODE_TSO; 2296 /* Check if eMPW is enabled at all. */ 2297 if (!MLX5_TXOFF_CONFIG(EMPW)) 2298 return MLX5_TXCMP_CODE_SINGLE; 2299 /* Check if eMPW can be engaged. */ 2300 if (MLX5_TXOFF_CONFIG(VLAN) && 2301 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2302 (!MLX5_TXOFF_CONFIG(INLINE) || 2303 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2304 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2305 /* 2306 * eMPW does not support VLAN insertion offload, we have to 2307 * inline the entire packet but packet is too long for inlining. 2308 */ 2309 return MLX5_TXCMP_CODE_SINGLE; 2310 } 2311 return MLX5_TXCMP_CODE_EMPW; 2312 } 2313 2314 /** 2315 * Check the next packet attributes to match with the eMPW batch ones. 2316 * In addition, for legacy MPW the packet length is checked either. 2317 * 2318 * @param txq 2319 * Pointer to TX queue structure. 2320 * @param es 2321 * Pointer to Ethernet Segment of eMPW batch. 2322 * @param loc 2323 * Pointer to burst routine local context. 2324 * @param dlen 2325 * Length of previous packet in MPW descriptor. 2326 * @param olx 2327 * Configured Tx offloads mask. It is fully defined at 2328 * compile time and may be used for optimization. 2329 * 2330 * @return 2331 * true - packet match with eMPW batch attributes. 2332 * false - no match, eMPW should be restarted. 2333 */ 2334 static __rte_always_inline bool 2335 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2336 struct mlx5_wqe_eseg *__rte_restrict es, 2337 struct mlx5_txq_local *__rte_restrict loc, 2338 uint32_t dlen, 2339 unsigned int olx) 2340 { 2341 uint8_t swp_flags = 0; 2342 2343 /* Compare the checksum flags, if any. */ 2344 if (MLX5_TXOFF_CONFIG(CSUM) && 2345 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2346 return false; 2347 /* Compare the Software Parser offsets and flags. */ 2348 if (MLX5_TXOFF_CONFIG(SWP) && 2349 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2350 es->swp_flags != swp_flags)) 2351 return false; 2352 /* Fill metadata field if needed. */ 2353 if (MLX5_TXOFF_CONFIG(METADATA) && 2354 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2355 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2356 return false; 2357 /* Legacy MPW can send packets with the same length only. */ 2358 if (MLX5_TXOFF_CONFIG(MPW) && 2359 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2360 return false; 2361 /* There must be no VLAN packets in eMPW loop. */ 2362 if (MLX5_TXOFF_CONFIG(VLAN)) 2363 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2364 /* Check if the scheduling is requested. */ 2365 if (MLX5_TXOFF_CONFIG(TXPP) && 2366 loc->mbuf->ol_flags & txq->ts_mask) 2367 return false; 2368 return true; 2369 } 2370 2371 /** 2372 * Update send loop variables and WQE for eMPW loop without data inlining. 2373 * Number of Data Segments is equal to the number of sent packets. 2374 * 2375 * @param txq 2376 * Pointer to TX queue structure. 2377 * @param loc 2378 * Pointer to burst routine local context. 2379 * @param ds 2380 * Number of packets/Data Segments/Packets. 2381 * @param slen 2382 * Accumulated statistics, bytes sent. 2383 * @param olx 2384 * Configured Tx offloads mask. It is fully defined at 2385 * compile time and may be used for optimization. 2386 * 2387 * @return 2388 * true - packet match with eMPW batch attributes. 2389 * false - no match, eMPW should be restarted. 2390 */ 2391 static __rte_always_inline void 2392 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2393 struct mlx5_txq_local *__rte_restrict loc, 2394 unsigned int ds, 2395 unsigned int slen, 2396 unsigned int olx __rte_unused) 2397 { 2398 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2399 #ifdef MLX5_PMD_SOFT_COUNTERS 2400 /* Update sent data bytes counter. */ 2401 txq->stats.obytes += slen; 2402 #else 2403 (void)slen; 2404 #endif 2405 loc->elts_free -= ds; 2406 loc->pkts_sent += ds; 2407 ds += 2; 2408 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2409 txq->wqe_ci += (ds + 3) / 4; 2410 loc->wqe_free -= (ds + 3) / 4; 2411 } 2412 2413 /** 2414 * Update send loop variables and WQE for eMPW loop with data inlining. 2415 * Gets the size of pushed descriptors and data to the WQE. 2416 * 2417 * @param txq 2418 * Pointer to TX queue structure. 2419 * @param loc 2420 * Pointer to burst routine local context. 2421 * @param len 2422 * Total size of descriptor/data in bytes. 2423 * @param slen 2424 * Accumulated statistics, data bytes sent. 2425 * @param wqem 2426 * The base WQE for the eMPW/MPW descriptor. 2427 * @param olx 2428 * Configured Tx offloads mask. It is fully defined at 2429 * compile time and may be used for optimization. 2430 * 2431 * @return 2432 * true - packet match with eMPW batch attributes. 2433 * false - no match, eMPW should be restarted. 2434 */ 2435 static __rte_always_inline void 2436 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2437 struct mlx5_txq_local *__rte_restrict loc, 2438 unsigned int len, 2439 unsigned int slen, 2440 struct mlx5_wqe *__rte_restrict wqem, 2441 unsigned int olx __rte_unused) 2442 { 2443 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2444 2445 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2446 #ifdef MLX5_PMD_SOFT_COUNTERS 2447 /* Update sent data bytes counter. */ 2448 txq->stats.obytes += slen; 2449 #else 2450 (void)slen; 2451 #endif 2452 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2453 /* 2454 * If the legacy MPW session contains the inline packets 2455 * we should set the only inline data segment length 2456 * and align the total length to the segment size. 2457 */ 2458 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2459 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2460 MLX5_ETH_WQE_DATA_INLINE); 2461 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2462 } else { 2463 /* 2464 * The session is not legacy MPW or contains the 2465 * data buffer pointer segments. 2466 */ 2467 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2468 len = len / MLX5_WSEG_SIZE + 2; 2469 } 2470 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2471 txq->wqe_ci += (len + 3) / 4; 2472 loc->wqe_free -= (len + 3) / 4; 2473 loc->wqe_last = wqem; 2474 } 2475 2476 /** 2477 * The set of Tx burst functions for single-segment packets without TSO 2478 * and with Multi-Packet Writing feature support. 2479 * Supports all types of Tx offloads, except multi-packets and TSO. 2480 * 2481 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2482 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2483 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2484 * placed in WQE. 2485 * 2486 * Functions stop sending if it encounters the multi-segment packet or packet 2487 * with TSO requested. 2488 * 2489 * The routines are responsible for storing processed mbuf into elts ring buffer 2490 * and update elts_head if inlining offload is requested. Otherwise the copying 2491 * mbufs to elts can be postponed and completed at the end of burst routine. 2492 * 2493 * @param txq 2494 * Pointer to TX queue structure. 2495 * @param[in] pkts 2496 * Packets to transmit. 2497 * @param pkts_n 2498 * Number of packets in array. 2499 * @param loc 2500 * Pointer to burst routine local context. 2501 * @param olx 2502 * Configured Tx offloads mask. It is fully defined at 2503 * compile time and may be used for optimization. 2504 * 2505 * @return 2506 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2507 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2508 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2509 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2510 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2511 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2512 * 2513 * Local context variables updated. 2514 * 2515 * 2516 * The routine sends packets with MLX5_OPCODE_EMPW 2517 * without inlining, this is dedicated optimized branch. 2518 * No VLAN insertion is supported. 2519 */ 2520 static __rte_always_inline enum mlx5_txcmp_code 2521 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2522 struct rte_mbuf **__rte_restrict pkts, 2523 unsigned int pkts_n, 2524 struct mlx5_txq_local *__rte_restrict loc, 2525 unsigned int olx) 2526 { 2527 /* 2528 * Subroutine is the part of mlx5_tx_burst_single() and sends 2529 * single-segment packet with eMPW opcode without data inlining. 2530 */ 2531 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2532 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2533 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2534 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2535 pkts += loc->pkts_sent + 1; 2536 pkts_n -= loc->pkts_sent; 2537 for (;;) { 2538 struct mlx5_wqe_dseg *__rte_restrict dseg; 2539 struct mlx5_wqe_eseg *__rte_restrict eseg; 2540 enum mlx5_txcmp_code ret; 2541 unsigned int part, loop; 2542 unsigned int slen = 0; 2543 2544 next_empw: 2545 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2546 if (MLX5_TXOFF_CONFIG(TXPP)) { 2547 enum mlx5_txcmp_code wret; 2548 2549 /* Generate WAIT for scheduling if requested. */ 2550 wret = mlx5_tx_schedule_send(txq, loc, olx); 2551 if (wret == MLX5_TXCMP_CODE_EXIT) 2552 return MLX5_TXCMP_CODE_EXIT; 2553 if (wret == MLX5_TXCMP_CODE_ERROR) 2554 return MLX5_TXCMP_CODE_ERROR; 2555 } 2556 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2557 MLX5_MPW_MAX_PACKETS : 2558 MLX5_EMPW_MAX_PACKETS); 2559 if (unlikely(loc->elts_free < part)) { 2560 /* We have no enough elts to save all mbufs. */ 2561 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2562 return MLX5_TXCMP_CODE_EXIT; 2563 /* But we still able to send at least minimal eMPW. */ 2564 part = loc->elts_free; 2565 } 2566 /* Check whether we have enough WQEs */ 2567 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2568 if (unlikely(loc->wqe_free < 2569 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2570 return MLX5_TXCMP_CODE_EXIT; 2571 part = (loc->wqe_free * 4) - 2; 2572 } 2573 if (likely(part > 1)) 2574 rte_prefetch0(*pkts); 2575 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2576 /* 2577 * Build eMPW title WQEBB: 2578 * - Control Segment, eMPW opcode 2579 * - Ethernet Segment, no inline 2580 */ 2581 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2582 MLX5_OPCODE_ENHANCED_MPSW, olx); 2583 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2584 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2585 eseg = &loc->wqe_last->eseg; 2586 dseg = &loc->wqe_last->dseg[0]; 2587 loop = part; 2588 /* Store the packet length for legacy MPW. */ 2589 if (MLX5_TXOFF_CONFIG(MPW)) 2590 eseg->mss = rte_cpu_to_be_16 2591 (rte_pktmbuf_data_len(loc->mbuf)); 2592 for (;;) { 2593 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2594 #ifdef MLX5_PMD_SOFT_COUNTERS 2595 /* Update sent data bytes counter. */ 2596 slen += dlen; 2597 #endif 2598 mlx5_tx_dseg_ptr 2599 (txq, loc, dseg, 2600 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2601 dlen, olx); 2602 if (unlikely(--loop == 0)) 2603 break; 2604 loc->mbuf = *pkts++; 2605 if (likely(loop > 1)) 2606 rte_prefetch0(*pkts); 2607 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2608 /* 2609 * Unroll the completion code to avoid 2610 * returning variable value - it results in 2611 * unoptimized sequent checking in caller. 2612 */ 2613 if (ret == MLX5_TXCMP_CODE_MULTI) { 2614 part -= loop; 2615 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2616 if (unlikely(!loc->elts_free || 2617 !loc->wqe_free)) 2618 return MLX5_TXCMP_CODE_EXIT; 2619 return MLX5_TXCMP_CODE_MULTI; 2620 } 2621 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2622 if (ret == MLX5_TXCMP_CODE_TSO) { 2623 part -= loop; 2624 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2625 if (unlikely(!loc->elts_free || 2626 !loc->wqe_free)) 2627 return MLX5_TXCMP_CODE_EXIT; 2628 return MLX5_TXCMP_CODE_TSO; 2629 } 2630 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2631 part -= loop; 2632 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2633 if (unlikely(!loc->elts_free || 2634 !loc->wqe_free)) 2635 return MLX5_TXCMP_CODE_EXIT; 2636 return MLX5_TXCMP_CODE_SINGLE; 2637 } 2638 if (ret != MLX5_TXCMP_CODE_EMPW) { 2639 MLX5_ASSERT(false); 2640 part -= loop; 2641 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2642 return MLX5_TXCMP_CODE_ERROR; 2643 } 2644 /* 2645 * Check whether packet parameters coincide 2646 * within assumed eMPW batch: 2647 * - check sum settings 2648 * - metadata value 2649 * - software parser settings 2650 * - packets length (legacy MPW only) 2651 * - scheduling is not required 2652 */ 2653 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2654 MLX5_ASSERT(loop); 2655 part -= loop; 2656 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2657 if (unlikely(!loc->elts_free || 2658 !loc->wqe_free)) 2659 return MLX5_TXCMP_CODE_EXIT; 2660 pkts_n -= part; 2661 goto next_empw; 2662 } 2663 /* Packet attributes match, continue the same eMPW. */ 2664 ++dseg; 2665 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2666 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2667 } 2668 /* eMPW is built successfully, update loop parameters. */ 2669 MLX5_ASSERT(!loop); 2670 MLX5_ASSERT(pkts_n >= part); 2671 #ifdef MLX5_PMD_SOFT_COUNTERS 2672 /* Update sent data bytes counter. */ 2673 txq->stats.obytes += slen; 2674 #endif 2675 loc->elts_free -= part; 2676 loc->pkts_sent += part; 2677 txq->wqe_ci += (2 + part + 3) / 4; 2678 loc->wqe_free -= (2 + part + 3) / 4; 2679 pkts_n -= part; 2680 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2681 return MLX5_TXCMP_CODE_EXIT; 2682 loc->mbuf = *pkts++; 2683 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2684 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2685 return ret; 2686 /* Continue sending eMPW batches. */ 2687 } 2688 MLX5_ASSERT(false); 2689 } 2690 2691 /** 2692 * The routine sends packets with MLX5_OPCODE_EMPW 2693 * with inlining, optionally supports VLAN insertion. 2694 */ 2695 static __rte_always_inline enum mlx5_txcmp_code 2696 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2697 struct rte_mbuf **__rte_restrict pkts, 2698 unsigned int pkts_n, 2699 struct mlx5_txq_local *__rte_restrict loc, 2700 unsigned int olx) 2701 { 2702 /* 2703 * Subroutine is the part of mlx5_tx_burst_single() and sends 2704 * single-segment packet with eMPW opcode with data inlining. 2705 */ 2706 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2707 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2708 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2709 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2710 pkts += loc->pkts_sent + 1; 2711 pkts_n -= loc->pkts_sent; 2712 for (;;) { 2713 struct mlx5_wqe_dseg *__rte_restrict dseg; 2714 struct mlx5_wqe *__rte_restrict wqem; 2715 enum mlx5_txcmp_code ret; 2716 unsigned int room, part, nlim; 2717 unsigned int slen = 0; 2718 2719 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2720 if (MLX5_TXOFF_CONFIG(TXPP)) { 2721 enum mlx5_txcmp_code wret; 2722 2723 /* Generate WAIT for scheduling if requested. */ 2724 wret = mlx5_tx_schedule_send(txq, loc, olx); 2725 if (wret == MLX5_TXCMP_CODE_EXIT) 2726 return MLX5_TXCMP_CODE_EXIT; 2727 if (wret == MLX5_TXCMP_CODE_ERROR) 2728 return MLX5_TXCMP_CODE_ERROR; 2729 } 2730 /* 2731 * Limits the amount of packets in one WQE 2732 * to improve CQE latency generation. 2733 */ 2734 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2735 MLX5_MPW_INLINE_MAX_PACKETS : 2736 MLX5_EMPW_MAX_PACKETS); 2737 /* Check whether we have minimal amount WQEs */ 2738 if (unlikely(loc->wqe_free < 2739 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2740 return MLX5_TXCMP_CODE_EXIT; 2741 if (likely(pkts_n > 1)) 2742 rte_prefetch0(*pkts); 2743 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2744 /* 2745 * Build eMPW title WQEBB: 2746 * - Control Segment, eMPW opcode, zero DS 2747 * - Ethernet Segment, no inline 2748 */ 2749 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2750 MLX5_OPCODE_ENHANCED_MPSW, olx); 2751 mlx5_tx_eseg_none(txq, loc, wqem, 2752 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2753 dseg = &wqem->dseg[0]; 2754 /* Store the packet length for legacy MPW. */ 2755 if (MLX5_TXOFF_CONFIG(MPW)) 2756 wqem->eseg.mss = rte_cpu_to_be_16 2757 (rte_pktmbuf_data_len(loc->mbuf)); 2758 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2759 loc->wqe_free) * MLX5_WQE_SIZE - 2760 MLX5_WQE_CSEG_SIZE - 2761 MLX5_WQE_ESEG_SIZE; 2762 /* Limit the room for legacy MPW sessions for performance. */ 2763 if (MLX5_TXOFF_CONFIG(MPW)) 2764 room = RTE_MIN(room, 2765 RTE_MAX(txq->inlen_empw + 2766 sizeof(dseg->bcount) + 2767 (MLX5_TXOFF_CONFIG(VLAN) ? 2768 sizeof(struct rte_vlan_hdr) : 0), 2769 MLX5_MPW_INLINE_MAX_PACKETS * 2770 MLX5_WQE_DSEG_SIZE)); 2771 /* Build WQE till we have space, packets and resources. */ 2772 part = room; 2773 for (;;) { 2774 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2775 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2776 unsigned int tlen; 2777 2778 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2779 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2780 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2781 /* 2782 * Some Tx offloads may cause an error if packet is not 2783 * long enough, check against assumed minimal length. 2784 */ 2785 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2786 part -= room; 2787 if (unlikely(!part)) 2788 return MLX5_TXCMP_CODE_ERROR; 2789 /* 2790 * We have some successfully built 2791 * packet Data Segments to send. 2792 */ 2793 mlx5_tx_idone_empw(txq, loc, part, 2794 slen, wqem, olx); 2795 return MLX5_TXCMP_CODE_ERROR; 2796 } 2797 /* Inline or not inline - that's the Question. */ 2798 if (dlen > txq->inlen_empw || 2799 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2800 goto pointer_empw; 2801 if (MLX5_TXOFF_CONFIG(MPW)) { 2802 if (dlen > txq->inlen_send) 2803 goto pointer_empw; 2804 tlen = dlen; 2805 if (part == room) { 2806 /* Open new inline MPW session. */ 2807 tlen += sizeof(dseg->bcount); 2808 dseg->bcount = RTE_BE32(0); 2809 dseg = RTE_PTR_ADD 2810 (dseg, sizeof(dseg->bcount)); 2811 } else { 2812 /* 2813 * No pointer and inline descriptor 2814 * intermix for legacy MPW sessions. 2815 */ 2816 if (wqem->dseg[0].bcount) 2817 break; 2818 } 2819 } else { 2820 tlen = sizeof(dseg->bcount) + dlen; 2821 } 2822 /* Inline entire packet, optional VLAN insertion. */ 2823 if (MLX5_TXOFF_CONFIG(VLAN) && 2824 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2825 /* 2826 * The packet length must be checked in 2827 * mlx5_tx_able_to_empw() and packet 2828 * fits into inline length guaranteed. 2829 */ 2830 MLX5_ASSERT((dlen + 2831 sizeof(struct rte_vlan_hdr)) <= 2832 txq->inlen_empw); 2833 tlen += sizeof(struct rte_vlan_hdr); 2834 if (room < tlen) 2835 break; 2836 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2837 dptr, dlen, olx); 2838 #ifdef MLX5_PMD_SOFT_COUNTERS 2839 /* Update sent data bytes counter. */ 2840 slen += sizeof(struct rte_vlan_hdr); 2841 #endif 2842 } else { 2843 if (room < tlen) 2844 break; 2845 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2846 dptr, dlen, olx); 2847 } 2848 if (!MLX5_TXOFF_CONFIG(MPW)) 2849 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2850 MLX5_ASSERT(room >= tlen); 2851 room -= tlen; 2852 /* 2853 * Packet data are completely inline, 2854 * we can try to free the packet. 2855 */ 2856 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2857 /* 2858 * All the packets from the burst beginning 2859 * are inline, we can free mbufs directly 2860 * from the origin array on tx_burst exit(). 2861 */ 2862 loc->mbuf_free++; 2863 goto next_mbuf; 2864 } 2865 /* 2866 * In order no to call rte_pktmbuf_free_seg() here, 2867 * in the most inner loop (that might be very 2868 * expensive) we just save the mbuf in elts. 2869 */ 2870 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2871 loc->elts_free--; 2872 goto next_mbuf; 2873 pointer_empw: 2874 /* 2875 * No pointer and inline descriptor 2876 * intermix for legacy MPW sessions. 2877 */ 2878 if (MLX5_TXOFF_CONFIG(MPW) && 2879 part != room && 2880 wqem->dseg[0].bcount == RTE_BE32(0)) 2881 break; 2882 /* 2883 * Not inlinable VLAN packets are 2884 * proceeded outside of this routine. 2885 */ 2886 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2887 if (MLX5_TXOFF_CONFIG(VLAN)) 2888 MLX5_ASSERT(!(loc->mbuf->ol_flags & 2889 RTE_MBUF_F_TX_VLAN)); 2890 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2891 /* We have to store mbuf in elts.*/ 2892 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2893 loc->elts_free--; 2894 room -= MLX5_WQE_DSEG_SIZE; 2895 /* Ring buffer wraparound is checked at the loop end.*/ 2896 ++dseg; 2897 next_mbuf: 2898 #ifdef MLX5_PMD_SOFT_COUNTERS 2899 /* Update sent data bytes counter. */ 2900 slen += dlen; 2901 #endif 2902 loc->pkts_sent++; 2903 pkts_n--; 2904 if (unlikely(!pkts_n || !loc->elts_free)) { 2905 /* 2906 * We have no resources/packets to 2907 * continue build descriptors. 2908 */ 2909 part -= room; 2910 mlx5_tx_idone_empw(txq, loc, part, 2911 slen, wqem, olx); 2912 return MLX5_TXCMP_CODE_EXIT; 2913 } 2914 loc->mbuf = *pkts++; 2915 if (likely(pkts_n > 1)) 2916 rte_prefetch0(*pkts); 2917 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2918 /* 2919 * Unroll the completion code to avoid 2920 * returning variable value - it results in 2921 * unoptimized sequent checking in caller. 2922 */ 2923 if (ret == MLX5_TXCMP_CODE_MULTI) { 2924 part -= room; 2925 mlx5_tx_idone_empw(txq, loc, part, 2926 slen, wqem, olx); 2927 if (unlikely(!loc->elts_free || 2928 !loc->wqe_free)) 2929 return MLX5_TXCMP_CODE_EXIT; 2930 return MLX5_TXCMP_CODE_MULTI; 2931 } 2932 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2933 if (ret == MLX5_TXCMP_CODE_TSO) { 2934 part -= room; 2935 mlx5_tx_idone_empw(txq, loc, part, 2936 slen, wqem, olx); 2937 if (unlikely(!loc->elts_free || 2938 !loc->wqe_free)) 2939 return MLX5_TXCMP_CODE_EXIT; 2940 return MLX5_TXCMP_CODE_TSO; 2941 } 2942 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2943 part -= room; 2944 mlx5_tx_idone_empw(txq, loc, part, 2945 slen, wqem, olx); 2946 if (unlikely(!loc->elts_free || 2947 !loc->wqe_free)) 2948 return MLX5_TXCMP_CODE_EXIT; 2949 return MLX5_TXCMP_CODE_SINGLE; 2950 } 2951 if (ret != MLX5_TXCMP_CODE_EMPW) { 2952 MLX5_ASSERT(false); 2953 part -= room; 2954 mlx5_tx_idone_empw(txq, loc, part, 2955 slen, wqem, olx); 2956 return MLX5_TXCMP_CODE_ERROR; 2957 } 2958 /* Check if we have minimal room left. */ 2959 nlim--; 2960 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 2961 break; 2962 /* 2963 * Check whether packet parameters coincide 2964 * within assumed eMPW batch: 2965 * - check sum settings 2966 * - metadata value 2967 * - software parser settings 2968 * - packets length (legacy MPW only) 2969 * - scheduling is not required 2970 */ 2971 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 2972 loc, dlen, olx)) 2973 break; 2974 /* Packet attributes match, continue the same eMPW. */ 2975 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2976 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2977 } 2978 /* 2979 * We get here to close an existing eMPW 2980 * session and start the new one. 2981 */ 2982 MLX5_ASSERT(pkts_n); 2983 part -= room; 2984 if (unlikely(!part)) 2985 return MLX5_TXCMP_CODE_EXIT; 2986 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 2987 if (unlikely(!loc->elts_free || 2988 !loc->wqe_free)) 2989 return MLX5_TXCMP_CODE_EXIT; 2990 /* Continue the loop with new eMPW session. */ 2991 } 2992 MLX5_ASSERT(false); 2993 } 2994 2995 /** 2996 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 2997 * Data inlining and VLAN insertion are supported. 2998 */ 2999 static __rte_always_inline enum mlx5_txcmp_code 3000 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3001 struct rte_mbuf **__rte_restrict pkts, 3002 unsigned int pkts_n, 3003 struct mlx5_txq_local *__rte_restrict loc, 3004 unsigned int olx) 3005 { 3006 /* 3007 * Subroutine is the part of mlx5_tx_burst_single() 3008 * and sends single-segment packet with SEND opcode. 3009 */ 3010 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3011 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3012 pkts += loc->pkts_sent + 1; 3013 pkts_n -= loc->pkts_sent; 3014 for (;;) { 3015 struct mlx5_wqe *__rte_restrict wqe; 3016 enum mlx5_txcmp_code ret; 3017 3018 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3019 if (MLX5_TXOFF_CONFIG(TXPP)) { 3020 enum mlx5_txcmp_code wret; 3021 3022 /* Generate WAIT for scheduling if requested. */ 3023 wret = mlx5_tx_schedule_send(txq, loc, olx); 3024 if (wret == MLX5_TXCMP_CODE_EXIT) 3025 return MLX5_TXCMP_CODE_EXIT; 3026 if (wret == MLX5_TXCMP_CODE_ERROR) 3027 return MLX5_TXCMP_CODE_ERROR; 3028 } 3029 if (MLX5_TXOFF_CONFIG(INLINE)) { 3030 unsigned int inlen, vlan = 0; 3031 3032 inlen = rte_pktmbuf_data_len(loc->mbuf); 3033 if (MLX5_TXOFF_CONFIG(VLAN) && 3034 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3035 vlan = sizeof(struct rte_vlan_hdr); 3036 inlen += vlan; 3037 } 3038 /* 3039 * If inlining is enabled at configuration time 3040 * the limit must be not less than minimal size. 3041 * Otherwise we would do extra check for data 3042 * size to avoid crashes due to length overflow. 3043 */ 3044 MLX5_ASSERT(txq->inlen_send >= 3045 MLX5_ESEG_MIN_INLINE_SIZE); 3046 if (inlen <= txq->inlen_send) { 3047 unsigned int seg_n, wqe_n; 3048 3049 rte_prefetch0(rte_pktmbuf_mtod 3050 (loc->mbuf, uint8_t *)); 3051 /* Check against minimal length. */ 3052 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3053 return MLX5_TXCMP_CODE_ERROR; 3054 if (loc->mbuf->ol_flags & 3055 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3056 /* 3057 * The hint flag not to inline packet 3058 * data is set. Check whether we can 3059 * follow the hint. 3060 */ 3061 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3062 txq->inlen_mode) || 3063 (MLX5_TXOFF_CONFIG(MPW) && 3064 txq->inlen_mode)) { 3065 if (inlen <= txq->inlen_send) 3066 goto single_inline; 3067 /* 3068 * The hardware requires the 3069 * minimal inline data header. 3070 */ 3071 goto single_min_inline; 3072 } 3073 if (MLX5_TXOFF_CONFIG(VLAN) && 3074 vlan && !txq->vlan_en) { 3075 /* 3076 * We must insert VLAN tag 3077 * by software means. 3078 */ 3079 goto single_part_inline; 3080 } 3081 goto single_no_inline; 3082 } 3083 single_inline: 3084 /* 3085 * Completely inlined packet data WQE: 3086 * - Control Segment, SEND opcode 3087 * - Ethernet Segment, no VLAN insertion 3088 * - Data inlined, VLAN optionally inserted 3089 * - Alignment to MLX5_WSEG_SIZE 3090 * Have to estimate amount of WQEBBs 3091 */ 3092 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3093 MLX5_ESEG_MIN_INLINE_SIZE + 3094 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3095 /* Check if there are enough WQEBBs. */ 3096 wqe_n = (seg_n + 3) / 4; 3097 if (wqe_n > loc->wqe_free) 3098 return MLX5_TXCMP_CODE_EXIT; 3099 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3100 loc->wqe_last = wqe; 3101 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3102 MLX5_OPCODE_SEND, olx); 3103 mlx5_tx_eseg_data(txq, loc, wqe, 3104 vlan, inlen, 0, olx); 3105 txq->wqe_ci += wqe_n; 3106 loc->wqe_free -= wqe_n; 3107 /* 3108 * Packet data are completely inlined, 3109 * free the packet immediately. 3110 */ 3111 rte_pktmbuf_free_seg(loc->mbuf); 3112 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3113 MLX5_TXOFF_CONFIG(MPW)) && 3114 txq->inlen_mode) { 3115 /* 3116 * If minimal inlining is requested the eMPW 3117 * feature should be disabled due to data is 3118 * inlined into Ethernet Segment, which can 3119 * not contain inlined data for eMPW due to 3120 * segment shared for all packets. 3121 */ 3122 struct mlx5_wqe_dseg *__rte_restrict dseg; 3123 unsigned int ds; 3124 uint8_t *dptr; 3125 3126 /* 3127 * The inline-mode settings require 3128 * to inline the specified amount of 3129 * data bytes to the Ethernet Segment. 3130 * We should check the free space in 3131 * WQE ring buffer to inline partially. 3132 */ 3133 single_min_inline: 3134 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3135 MLX5_ASSERT(inlen > txq->inlen_mode); 3136 MLX5_ASSERT(txq->inlen_mode >= 3137 MLX5_ESEG_MIN_INLINE_SIZE); 3138 /* 3139 * Check whether there are enough free WQEBBs: 3140 * - Control Segment 3141 * - Ethernet Segment 3142 * - First Segment of inlined Ethernet data 3143 * - ... data continued ... 3144 * - Finishing Data Segment of pointer type 3145 */ 3146 ds = (MLX5_WQE_CSEG_SIZE + 3147 MLX5_WQE_ESEG_SIZE + 3148 MLX5_WQE_DSEG_SIZE + 3149 txq->inlen_mode - 3150 MLX5_ESEG_MIN_INLINE_SIZE + 3151 MLX5_WQE_DSEG_SIZE + 3152 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3153 if (loc->wqe_free < ((ds + 3) / 4)) 3154 return MLX5_TXCMP_CODE_EXIT; 3155 /* 3156 * Build the ordinary SEND WQE: 3157 * - Control Segment 3158 * - Ethernet Segment, inline inlen_mode bytes 3159 * - Data Segment of pointer type 3160 */ 3161 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3162 loc->wqe_last = wqe; 3163 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3164 MLX5_OPCODE_SEND, olx); 3165 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3166 txq->inlen_mode, 3167 0, olx); 3168 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3169 txq->inlen_mode - vlan; 3170 inlen -= txq->inlen_mode; 3171 mlx5_tx_dseg_ptr(txq, loc, dseg, 3172 dptr, inlen, olx); 3173 /* 3174 * WQE is built, update the loop parameters 3175 * and got to the next packet. 3176 */ 3177 txq->wqe_ci += (ds + 3) / 4; 3178 loc->wqe_free -= (ds + 3) / 4; 3179 /* We have to store mbuf in elts.*/ 3180 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3181 txq->elts[txq->elts_head++ & txq->elts_m] = 3182 loc->mbuf; 3183 --loc->elts_free; 3184 } else { 3185 uint8_t *dptr; 3186 unsigned int dlen; 3187 3188 /* 3189 * Partially inlined packet data WQE, we have 3190 * some space in title WQEBB, we can fill it 3191 * with some packet data. It takes one WQEBB, 3192 * it is available, no extra space check: 3193 * - Control Segment, SEND opcode 3194 * - Ethernet Segment, no VLAN insertion 3195 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3196 * - Data Segment, pointer type 3197 * 3198 * We also get here if VLAN insertion is not 3199 * supported by HW, the inline is enabled. 3200 */ 3201 single_part_inline: 3202 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3203 loc->wqe_last = wqe; 3204 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3205 MLX5_OPCODE_SEND, olx); 3206 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3207 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3208 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3209 /* 3210 * The length check is performed above, by 3211 * comparing with txq->inlen_send. We should 3212 * not get overflow here. 3213 */ 3214 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3215 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3216 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3217 dptr, dlen, olx); 3218 ++txq->wqe_ci; 3219 --loc->wqe_free; 3220 /* We have to store mbuf in elts.*/ 3221 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3222 txq->elts[txq->elts_head++ & txq->elts_m] = 3223 loc->mbuf; 3224 --loc->elts_free; 3225 } 3226 #ifdef MLX5_PMD_SOFT_COUNTERS 3227 /* Update sent data bytes counter. */ 3228 txq->stats.obytes += vlan + 3229 rte_pktmbuf_data_len(loc->mbuf); 3230 #endif 3231 } else { 3232 /* 3233 * No inline at all, it means the CPU cycles saving 3234 * is prioritized at configuration, we should not 3235 * copy any packet data to WQE. 3236 * 3237 * SEND WQE, one WQEBB: 3238 * - Control Segment, SEND opcode 3239 * - Ethernet Segment, optional VLAN, no inline 3240 * - Data Segment, pointer type 3241 */ 3242 single_no_inline: 3243 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3244 loc->wqe_last = wqe; 3245 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3246 MLX5_OPCODE_SEND, olx); 3247 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3248 mlx5_tx_dseg_ptr 3249 (txq, loc, &wqe->dseg[0], 3250 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3251 rte_pktmbuf_data_len(loc->mbuf), olx); 3252 ++txq->wqe_ci; 3253 --loc->wqe_free; 3254 /* 3255 * We should not store mbuf pointer in elts 3256 * if no inlining is configured, this is done 3257 * by calling routine in a batch copy. 3258 */ 3259 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3260 --loc->elts_free; 3261 #ifdef MLX5_PMD_SOFT_COUNTERS 3262 /* Update sent data bytes counter. */ 3263 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3264 if (MLX5_TXOFF_CONFIG(VLAN) && 3265 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3266 txq->stats.obytes += 3267 sizeof(struct rte_vlan_hdr); 3268 #endif 3269 } 3270 ++loc->pkts_sent; 3271 --pkts_n; 3272 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3273 return MLX5_TXCMP_CODE_EXIT; 3274 loc->mbuf = *pkts++; 3275 if (pkts_n > 1) 3276 rte_prefetch0(*pkts); 3277 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3278 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3279 return ret; 3280 } 3281 MLX5_ASSERT(false); 3282 } 3283 3284 static __rte_always_inline enum mlx5_txcmp_code 3285 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3286 struct rte_mbuf **__rte_restrict pkts, 3287 unsigned int pkts_n, 3288 struct mlx5_txq_local *__rte_restrict loc, 3289 unsigned int olx) 3290 { 3291 enum mlx5_txcmp_code ret; 3292 3293 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3294 if (ret == MLX5_TXCMP_CODE_SINGLE) 3295 goto ordinary_send; 3296 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3297 for (;;) { 3298 /* Optimize for inline/no inline eMPW send. */ 3299 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3300 mlx5_tx_burst_empw_inline 3301 (txq, pkts, pkts_n, loc, olx) : 3302 mlx5_tx_burst_empw_simple 3303 (txq, pkts, pkts_n, loc, olx); 3304 if (ret != MLX5_TXCMP_CODE_SINGLE) 3305 return ret; 3306 /* The resources to send one packet should remain. */ 3307 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3308 ordinary_send: 3309 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3310 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3311 if (ret != MLX5_TXCMP_CODE_EMPW) 3312 return ret; 3313 /* The resources to send one packet should remain. */ 3314 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3315 } 3316 } 3317 3318 /** 3319 * DPDK Tx callback template. This is configured template used to generate 3320 * routines optimized for specified offload setup. 3321 * One of this generated functions is chosen at SQ configuration time. 3322 * 3323 * @param txq 3324 * Generic pointer to TX queue structure. 3325 * @param[in] pkts 3326 * Packets to transmit. 3327 * @param pkts_n 3328 * Number of packets in array. 3329 * @param olx 3330 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3331 * values. Should be static to take compile time static configuration 3332 * advantages. 3333 * 3334 * @return 3335 * Number of packets successfully transmitted (<= pkts_n). 3336 */ 3337 static __rte_always_inline uint16_t 3338 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3339 struct rte_mbuf **__rte_restrict pkts, 3340 uint16_t pkts_n, 3341 unsigned int olx) 3342 { 3343 struct mlx5_txq_local loc; 3344 enum mlx5_txcmp_code ret; 3345 unsigned int part; 3346 3347 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3348 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3349 if (unlikely(!pkts_n)) 3350 return 0; 3351 if (MLX5_TXOFF_CONFIG(INLINE)) 3352 loc.mbuf_free = 0; 3353 loc.pkts_sent = 0; 3354 loc.pkts_copy = 0; 3355 loc.wqe_last = NULL; 3356 3357 send_loop: 3358 loc.pkts_loop = loc.pkts_sent; 3359 /* 3360 * Check if there are some CQEs, if any: 3361 * - process an encountered errors 3362 * - process the completed WQEs 3363 * - free related mbufs 3364 * - doorbell the NIC about processed CQEs 3365 */ 3366 rte_prefetch0(*(pkts + loc.pkts_sent)); 3367 mlx5_tx_handle_completion(txq, olx); 3368 /* 3369 * Calculate the number of available resources - elts and WQEs. 3370 * There are two possible different scenarios: 3371 * - no data inlining into WQEs, one WQEBB may contains up to 3372 * four packets, in this case elts become scarce resource 3373 * - data inlining into WQEs, one packet may require multiple 3374 * WQEBBs, the WQEs become the limiting factor. 3375 */ 3376 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3377 loc.elts_free = txq->elts_s - 3378 (uint16_t)(txq->elts_head - txq->elts_tail); 3379 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3380 loc.wqe_free = txq->wqe_s - 3381 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3382 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3383 goto burst_exit; 3384 for (;;) { 3385 /* 3386 * Fetch the packet from array. Usually this is the first 3387 * packet in series of multi/single segment packets. 3388 */ 3389 loc.mbuf = *(pkts + loc.pkts_sent); 3390 /* Dedicated branch for multi-segment packets. */ 3391 if (MLX5_TXOFF_CONFIG(MULTI) && 3392 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3393 /* 3394 * Multi-segment packet encountered. 3395 * Hardware is able to process it only 3396 * with SEND/TSO opcodes, one packet 3397 * per WQE, do it in dedicated routine. 3398 */ 3399 enter_send_multi: 3400 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3401 part = loc.pkts_sent - loc.pkts_copy; 3402 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3403 /* 3404 * There are some single-segment mbufs not 3405 * stored in elts. The mbufs must be in the 3406 * same order as WQEs, so we must copy the 3407 * mbufs to elts here, before the coming 3408 * multi-segment packet mbufs is appended. 3409 */ 3410 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3411 part, olx); 3412 loc.pkts_copy = loc.pkts_sent; 3413 } 3414 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3415 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3416 if (!MLX5_TXOFF_CONFIG(INLINE)) 3417 loc.pkts_copy = loc.pkts_sent; 3418 /* 3419 * These returned code checks are supposed 3420 * to be optimized out due to routine inlining. 3421 */ 3422 if (ret == MLX5_TXCMP_CODE_EXIT) { 3423 /* 3424 * The routine returns this code when 3425 * all packets are sent or there is no 3426 * enough resources to complete request. 3427 */ 3428 break; 3429 } 3430 if (ret == MLX5_TXCMP_CODE_ERROR) { 3431 /* 3432 * The routine returns this code when some error 3433 * in the incoming packets format occurred. 3434 */ 3435 txq->stats.oerrors++; 3436 break; 3437 } 3438 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3439 /* 3440 * The single-segment packet was encountered 3441 * in the array, try to send it with the 3442 * best optimized way, possible engaging eMPW. 3443 */ 3444 goto enter_send_single; 3445 } 3446 if (MLX5_TXOFF_CONFIG(TSO) && 3447 ret == MLX5_TXCMP_CODE_TSO) { 3448 /* 3449 * The single-segment TSO packet was 3450 * encountered in the array. 3451 */ 3452 goto enter_send_tso; 3453 } 3454 /* We must not get here. Something is going wrong. */ 3455 MLX5_ASSERT(false); 3456 txq->stats.oerrors++; 3457 break; 3458 } 3459 /* Dedicated branch for single-segment TSO packets. */ 3460 if (MLX5_TXOFF_CONFIG(TSO) && 3461 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3462 /* 3463 * TSO might require special way for inlining 3464 * (dedicated parameters) and is sent with 3465 * MLX5_OPCODE_TSO opcode only, provide this 3466 * in dedicated branch. 3467 */ 3468 enter_send_tso: 3469 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3470 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3471 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3472 /* 3473 * These returned code checks are supposed 3474 * to be optimized out due to routine inlining. 3475 */ 3476 if (ret == MLX5_TXCMP_CODE_EXIT) 3477 break; 3478 if (ret == MLX5_TXCMP_CODE_ERROR) { 3479 txq->stats.oerrors++; 3480 break; 3481 } 3482 if (ret == MLX5_TXCMP_CODE_SINGLE) 3483 goto enter_send_single; 3484 if (MLX5_TXOFF_CONFIG(MULTI) && 3485 ret == MLX5_TXCMP_CODE_MULTI) { 3486 /* 3487 * The multi-segment packet was 3488 * encountered in the array. 3489 */ 3490 goto enter_send_multi; 3491 } 3492 /* We must not get here. Something is going wrong. */ 3493 MLX5_ASSERT(false); 3494 txq->stats.oerrors++; 3495 break; 3496 } 3497 /* 3498 * The dedicated branch for the single-segment packets 3499 * without TSO. Often these ones can be sent using 3500 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3501 * The routine builds the WQEs till it encounters 3502 * the TSO or multi-segment packet (in case if these 3503 * offloads are requested at SQ configuration time). 3504 */ 3505 enter_send_single: 3506 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3507 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3508 /* 3509 * These returned code checks are supposed 3510 * to be optimized out due to routine inlining. 3511 */ 3512 if (ret == MLX5_TXCMP_CODE_EXIT) 3513 break; 3514 if (ret == MLX5_TXCMP_CODE_ERROR) { 3515 txq->stats.oerrors++; 3516 break; 3517 } 3518 if (MLX5_TXOFF_CONFIG(MULTI) && 3519 ret == MLX5_TXCMP_CODE_MULTI) { 3520 /* 3521 * The multi-segment packet was 3522 * encountered in the array. 3523 */ 3524 goto enter_send_multi; 3525 } 3526 if (MLX5_TXOFF_CONFIG(TSO) && 3527 ret == MLX5_TXCMP_CODE_TSO) { 3528 /* 3529 * The single-segment TSO packet was 3530 * encountered in the array. 3531 */ 3532 goto enter_send_tso; 3533 } 3534 /* We must not get here. Something is going wrong. */ 3535 MLX5_ASSERT(false); 3536 txq->stats.oerrors++; 3537 break; 3538 } 3539 /* 3540 * Main Tx loop is completed, do the rest: 3541 * - set completion request if thresholds are reached 3542 * - doorbell the hardware 3543 * - copy the rest of mbufs to elts (if any) 3544 */ 3545 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3546 loc.pkts_sent >= loc.pkts_copy); 3547 /* Take a shortcut if nothing is sent. */ 3548 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3549 goto burst_exit; 3550 /* Request CQE generation if limits are reached. */ 3551 mlx5_tx_request_completion(txq, &loc, olx); 3552 /* 3553 * Ring QP doorbell immediately after WQE building completion 3554 * to improve latencies. The pure software related data treatment 3555 * can be completed after doorbell. Tx CQEs for this SQ are 3556 * processed in this thread only by the polling. 3557 * 3558 * The rdma core library can map doorbell register in two ways, 3559 * depending on the environment variable "MLX5_SHUT_UP_BF": 3560 * 3561 * - as regular cached memory, the variable is either missing or 3562 * set to zero. This type of mapping may cause the significant 3563 * doorbell register writing latency and requires explicit memory 3564 * write barrier to mitigate this issue and prevent write combining. 3565 * 3566 * - as non-cached memory, the variable is present and set to not "0" 3567 * value. This type of mapping may cause performance impact under 3568 * heavy loading conditions but the explicit write memory barrier is 3569 * not required and it may improve core performance. 3570 * 3571 * - the legacy behaviour (prior 19.08 release) was to use some 3572 * heuristics to decide whether write memory barrier should 3573 * be performed. This behavior is supported with specifying 3574 * tx_db_nc=2, write barrier is skipped if application provides 3575 * the full recommended burst of packets, it supposes the next 3576 * packets are coming and the write barrier will be issued on 3577 * the next burst (after descriptor writing, at least). 3578 */ 3579 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3580 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3581 txq->qp_db, !txq->db_nc && 3582 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3583 /* Not all of the mbufs may be stored into elts yet. */ 3584 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3585 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3586 /* 3587 * There are some single-segment mbufs not stored in elts. 3588 * It can be only if the last packet was single-segment. 3589 * The copying is gathered into one place due to it is 3590 * a good opportunity to optimize that with SIMD. 3591 * Unfortunately if inlining is enabled the gaps in pointer 3592 * array may happen due to early freeing of the inlined mbufs. 3593 */ 3594 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3595 loc.pkts_copy = loc.pkts_sent; 3596 } 3597 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3598 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3599 if (pkts_n > loc.pkts_sent) { 3600 /* 3601 * If burst size is large there might be no enough CQE 3602 * fetched from completion queue and no enough resources 3603 * freed to send all the packets. 3604 */ 3605 goto send_loop; 3606 } 3607 burst_exit: 3608 #ifdef MLX5_PMD_SOFT_COUNTERS 3609 /* Increment sent packets counter. */ 3610 txq->stats.opackets += loc.pkts_sent; 3611 #endif 3612 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3613 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3614 return loc.pkts_sent; 3615 } 3616 3617 #endif /* RTE_PMD_MLX5_TX_H_ */ 3618