1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common.h> 18 #include <mlx5_common_mr.h> 19 20 #include "mlx5.h" 21 #include "mlx5_autoconf.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 142 uint16_t inlen_send; /* Ordinary send data inline size. */ 143 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 144 uint16_t inlen_mode; /* Minimal data length to inline. */ 145 uint32_t qp_num_8s; /* QP number shifted by 8. */ 146 uint64_t offloads; /* Offloads for Tx Queue. */ 147 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 148 struct mlx5_wqe *wqes; /* Work queue. */ 149 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 150 #ifdef RTE_LIBRTE_MLX5_DEBUG 151 uint32_t *fcqs; /* Free completion queue (debug extended). */ 152 #else 153 uint16_t *fcqs; /* Free completion queue. */ 154 #endif 155 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 156 volatile uint32_t *qp_db; /* Work queue doorbell. */ 157 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 158 uint16_t port_id; /* Port ID of device. */ 159 uint16_t idx; /* Queue index. */ 160 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 161 int32_t ts_offset; /* Timestamp field dynamic offset. */ 162 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 163 struct mlx5_txq_stats stats; /* TX queue counters. */ 164 struct mlx5_uar_data uar_data; 165 struct rte_mbuf *elts[0]; 166 /* Storage for queued packets, must be the last field. */ 167 } __rte_cache_aligned; 168 169 enum mlx5_txq_type { 170 MLX5_TXQ_TYPE_STANDARD, /* Standard Tx queue. */ 171 MLX5_TXQ_TYPE_HAIRPIN, /* Hairpin Tx queue. */ 172 }; 173 174 /* TX queue control descriptor. */ 175 struct mlx5_txq_ctrl { 176 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 177 uint32_t refcnt; /* Reference counter. */ 178 unsigned int socket; /* CPU socket ID for allocations. */ 179 enum mlx5_txq_type type; /* The txq ctrl type. */ 180 unsigned int max_inline_data; /* Max inline data. */ 181 unsigned int max_tso_header; /* Max TSO header size. */ 182 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 183 struct mlx5_priv *priv; /* Back pointer to private data. */ 184 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 185 uint16_t dump_file_n; /* Number of dump files. */ 186 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 187 uint32_t hairpin_status; /* Hairpin binding status. */ 188 struct mlx5_txq_data txq; /* Data path structure. */ 189 /* Must be the last field in the structure, contains elts[]. */ 190 }; 191 192 /* mlx5_txq.c */ 193 194 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 195 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 196 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 197 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 198 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 199 unsigned int socket, const struct rte_eth_txconf *conf); 200 int mlx5_tx_hairpin_queue_setup 201 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 202 const struct rte_eth_hairpin_conf *hairpin_conf); 203 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 204 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 205 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 206 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 207 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 208 uint16_t desc, unsigned int socket, 209 const struct rte_eth_txconf *conf); 210 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 211 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 212 const struct rte_eth_hairpin_conf *hairpin_conf); 213 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 214 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 215 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 216 int mlx5_txq_verify(struct rte_eth_dev *dev); 217 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 218 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 219 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 220 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 221 222 /* mlx5_tx.c */ 223 224 uint16_t removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 225 uint16_t pkts_n); 226 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 227 unsigned int olx __rte_unused); 228 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 229 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 230 struct rte_eth_txq_info *qinfo); 231 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 232 struct rte_eth_burst_mode *mode); 233 234 /* mlx5_tx_empw.c */ 235 236 MLX5_TXOFF_PRE_DECL(full_empw); 237 MLX5_TXOFF_PRE_DECL(none_empw); 238 MLX5_TXOFF_PRE_DECL(md_empw); 239 MLX5_TXOFF_PRE_DECL(mt_empw); 240 MLX5_TXOFF_PRE_DECL(mtsc_empw); 241 MLX5_TXOFF_PRE_DECL(mti_empw); 242 MLX5_TXOFF_PRE_DECL(mtv_empw); 243 MLX5_TXOFF_PRE_DECL(mtiv_empw); 244 MLX5_TXOFF_PRE_DECL(sc_empw); 245 MLX5_TXOFF_PRE_DECL(sci_empw); 246 MLX5_TXOFF_PRE_DECL(scv_empw); 247 MLX5_TXOFF_PRE_DECL(sciv_empw); 248 MLX5_TXOFF_PRE_DECL(i_empw); 249 MLX5_TXOFF_PRE_DECL(v_empw); 250 MLX5_TXOFF_PRE_DECL(iv_empw); 251 252 /* mlx5_tx_nompw.c */ 253 254 MLX5_TXOFF_PRE_DECL(full); 255 MLX5_TXOFF_PRE_DECL(none); 256 MLX5_TXOFF_PRE_DECL(md); 257 MLX5_TXOFF_PRE_DECL(mt); 258 MLX5_TXOFF_PRE_DECL(mtsc); 259 MLX5_TXOFF_PRE_DECL(mti); 260 MLX5_TXOFF_PRE_DECL(mtv); 261 MLX5_TXOFF_PRE_DECL(mtiv); 262 MLX5_TXOFF_PRE_DECL(sc); 263 MLX5_TXOFF_PRE_DECL(sci); 264 MLX5_TXOFF_PRE_DECL(scv); 265 MLX5_TXOFF_PRE_DECL(sciv); 266 MLX5_TXOFF_PRE_DECL(i); 267 MLX5_TXOFF_PRE_DECL(v); 268 MLX5_TXOFF_PRE_DECL(iv); 269 270 /* mlx5_tx_txpp.c */ 271 272 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 273 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 274 MLX5_TXOFF_PRE_DECL(full_ts); 275 MLX5_TXOFF_PRE_DECL(full_ts_noi); 276 MLX5_TXOFF_PRE_DECL(none_ts); 277 MLX5_TXOFF_PRE_DECL(mdi_ts); 278 MLX5_TXOFF_PRE_DECL(mti_ts); 279 MLX5_TXOFF_PRE_DECL(mtiv_ts); 280 281 /* mlx5_tx_mpw.c */ 282 283 MLX5_TXOFF_PRE_DECL(none_mpw); 284 MLX5_TXOFF_PRE_DECL(mci_mpw); 285 MLX5_TXOFF_PRE_DECL(mc_mpw); 286 MLX5_TXOFF_PRE_DECL(i_mpw); 287 288 static __rte_always_inline struct mlx5_uar_data * 289 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 290 { 291 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 292 } 293 294 /** 295 * Ring TX queue doorbell and flush the update by write memory barrier. 296 * 297 * @param txq 298 * Pointer to TX queue structure. 299 * @param wqe 300 * Pointer to the last WQE posted in the NIC. 301 */ 302 static __rte_always_inline void 303 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 304 { 305 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 306 txq->wqe_ci, txq->qp_db, 1); 307 } 308 309 /** 310 * Convert timestamp from mbuf format to linear counter 311 * of Clock Queue completions (24 bits). 312 * 313 * @param sh 314 * Pointer to the device shared context to fetch Tx 315 * packet pacing timestamp and parameters. 316 * @param ts 317 * Timestamp from mbuf to convert. 318 * @return 319 * positive or zero value - completion ID to wait. 320 * negative value - conversion error. 321 */ 322 static __rte_always_inline int32_t 323 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 324 { 325 uint64_t ts, ci; 326 uint32_t tick; 327 328 do { 329 /* 330 * Read atomically two uint64_t fields and compare lsb bits. 331 * It there is no match - the timestamp was updated in 332 * the service thread, data should be re-read. 333 */ 334 rte_compiler_barrier(); 335 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 336 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 337 rte_compiler_barrier(); 338 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 339 break; 340 } while (true); 341 /* Perform the skew correction, positive value to send earlier. */ 342 mts -= sh->txpp.skew; 343 mts -= ts; 344 if (unlikely(mts >= UINT64_MAX / 2)) { 345 /* We have negative integer, mts is in the past. */ 346 __atomic_fetch_add(&sh->txpp.err_ts_past, 347 1, __ATOMIC_RELAXED); 348 return -1; 349 } 350 tick = sh->txpp.tick; 351 MLX5_ASSERT(tick); 352 /* Convert delta to completions, round up. */ 353 mts = (mts + tick - 1) / tick; 354 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 355 /* We have mts is too distant future. */ 356 __atomic_fetch_add(&sh->txpp.err_ts_future, 357 1, __ATOMIC_RELAXED); 358 return -1; 359 } 360 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 361 ci += mts; 362 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 363 return ci; 364 } 365 366 /** 367 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 368 * Flags must be preliminary initialized to zero. 369 * 370 * @param loc 371 * Pointer to burst routine local context. 372 * @param swp_flags 373 * Pointer to store Software Parser flags. 374 * @param olx 375 * Configured Tx offloads mask. It is fully defined at 376 * compile time and may be used for optimization. 377 * 378 * @return 379 * Software Parser offsets packed in dword. 380 * Software Parser flags are set by pointer. 381 */ 382 static __rte_always_inline uint32_t 383 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 384 uint8_t *swp_flags, 385 unsigned int olx) 386 { 387 uint64_t ol, tunnel; 388 unsigned int idx, off; 389 uint32_t set; 390 391 if (!MLX5_TXOFF_CONFIG(SWP)) 392 return 0; 393 ol = loc->mbuf->ol_flags; 394 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 395 /* 396 * Check whether Software Parser is required. 397 * Only customized tunnels may ask for. 398 */ 399 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 400 return 0; 401 /* 402 * The index should have: 403 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 404 * bit[4] = RTE_MBUF_F_TX_IPV6 405 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 406 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 407 */ 408 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 409 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 410 *swp_flags = mlx5_swp_types_table[idx]; 411 /* 412 * Set offsets for SW parser. Since ConnectX-5, SW parser just 413 * complements HW parser. SW parser starts to engage only if HW parser 414 * can't reach a header. For the older devices, HW parser will not kick 415 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 416 * should be set regardless of HW offload. 417 */ 418 off = loc->mbuf->outer_l2_len; 419 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 420 off += sizeof(struct rte_vlan_hdr); 421 set = (off >> 1) << 8; /* Outer L3 offset. */ 422 off += loc->mbuf->outer_l3_len; 423 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 424 set |= off >> 1; /* Outer L4 offset. */ 425 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 426 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 427 off += loc->mbuf->l2_len; 428 set |= (off >> 1) << 24; /* Inner L3 offset. */ 429 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 430 csum == RTE_MBUF_F_TX_UDP_CKSUM || 431 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 432 off += loc->mbuf->l3_len; 433 set |= (off >> 1) << 16; /* Inner L4 offset. */ 434 } 435 } 436 set = rte_cpu_to_le_32(set); 437 return set; 438 } 439 440 /** 441 * Convert the Checksum offloads to Verbs. 442 * 443 * @param buf 444 * Pointer to the mbuf. 445 * 446 * @return 447 * Converted checksum flags. 448 */ 449 static __rte_always_inline uint8_t 450 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 451 { 452 uint32_t idx; 453 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 454 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 455 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 456 457 /* 458 * The index should have: 459 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 460 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 461 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 462 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 463 * bit[9] = tunnel 464 */ 465 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 466 return mlx5_cksum_table[idx]; 467 } 468 469 /** 470 * Free the mbufs from the linear array of pointers. 471 * 472 * @param txq 473 * Pointer to Tx queue structure. 474 * @param pkts 475 * Pointer to array of packets to be free. 476 * @param pkts_n 477 * Number of packets to be freed. 478 * @param olx 479 * Configured Tx offloads mask. It is fully defined at 480 * compile time and may be used for optimization. 481 */ 482 static __rte_always_inline void 483 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 484 struct rte_mbuf **__rte_restrict pkts, 485 unsigned int pkts_n, 486 unsigned int olx __rte_unused) 487 { 488 struct rte_mempool *pool = NULL; 489 struct rte_mbuf **p_free = NULL; 490 struct rte_mbuf *mbuf; 491 unsigned int n_free = 0; 492 493 /* 494 * The implemented algorithm eliminates 495 * copying pointers to temporary array 496 * for rte_mempool_put_bulk() calls. 497 */ 498 MLX5_ASSERT(pkts); 499 MLX5_ASSERT(pkts_n); 500 /* 501 * Free mbufs directly to the pool in bulk 502 * if fast free offload is engaged 503 */ 504 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 505 mbuf = *pkts; 506 pool = mbuf->pool; 507 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 508 return; 509 } 510 for (;;) { 511 for (;;) { 512 /* 513 * Decrement mbuf reference counter, detach 514 * indirect and external buffers if needed. 515 */ 516 mbuf = rte_pktmbuf_prefree_seg(*pkts); 517 if (likely(mbuf != NULL)) { 518 MLX5_ASSERT(mbuf == *pkts); 519 if (likely(n_free != 0)) { 520 if (unlikely(pool != mbuf->pool)) 521 /* From different pool. */ 522 break; 523 } else { 524 /* Start new scan array. */ 525 pool = mbuf->pool; 526 p_free = pkts; 527 } 528 ++n_free; 529 ++pkts; 530 --pkts_n; 531 if (unlikely(pkts_n == 0)) { 532 mbuf = NULL; 533 break; 534 } 535 } else { 536 /* 537 * This happens if mbuf is still referenced. 538 * We can't put it back to the pool, skip. 539 */ 540 ++pkts; 541 --pkts_n; 542 if (unlikely(n_free != 0)) 543 /* There is some array to free.*/ 544 break; 545 if (unlikely(pkts_n == 0)) 546 /* Last mbuf, nothing to free. */ 547 return; 548 } 549 } 550 for (;;) { 551 /* 552 * This loop is implemented to avoid multiple 553 * inlining of rte_mempool_put_bulk(). 554 */ 555 MLX5_ASSERT(pool); 556 MLX5_ASSERT(p_free); 557 MLX5_ASSERT(n_free); 558 /* 559 * Free the array of pre-freed mbufs 560 * belonging to the same memory pool. 561 */ 562 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 563 if (unlikely(mbuf != NULL)) { 564 /* There is the request to start new scan. */ 565 pool = mbuf->pool; 566 p_free = pkts++; 567 n_free = 1; 568 --pkts_n; 569 if (likely(pkts_n != 0)) 570 break; 571 /* 572 * This is the last mbuf to be freed. 573 * Do one more loop iteration to complete. 574 * This is rare case of the last unique mbuf. 575 */ 576 mbuf = NULL; 577 continue; 578 } 579 if (likely(pkts_n == 0)) 580 return; 581 n_free = 0; 582 break; 583 } 584 } 585 } 586 587 /** 588 * No inline version to free buffers for optimal call 589 * on the tx_burst completion. 590 */ 591 static __rte_noinline void 592 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 593 struct rte_mbuf **__rte_restrict pkts, 594 unsigned int pkts_n, 595 unsigned int olx __rte_unused) 596 { 597 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 598 } 599 600 /** 601 * Free the mbuf from the elts ring buffer till new tail. 602 * 603 * @param txq 604 * Pointer to Tx queue structure. 605 * @param tail 606 * Index in elts to free up to, becomes new elts tail. 607 * @param olx 608 * Configured Tx offloads mask. It is fully defined at 609 * compile time and may be used for optimization. 610 */ 611 static __rte_always_inline void 612 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 613 uint16_t tail, 614 unsigned int olx __rte_unused) 615 { 616 uint16_t n_elts = tail - txq->elts_tail; 617 618 MLX5_ASSERT(n_elts); 619 MLX5_ASSERT(n_elts <= txq->elts_s); 620 /* 621 * Implement a loop to support ring buffer wraparound 622 * with single inlining of mlx5_tx_free_mbuf(). 623 */ 624 do { 625 unsigned int part; 626 627 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 628 part = RTE_MIN(part, n_elts); 629 MLX5_ASSERT(part); 630 MLX5_ASSERT(part <= txq->elts_s); 631 mlx5_tx_free_mbuf(txq, 632 &txq->elts[txq->elts_tail & txq->elts_m], 633 part, olx); 634 txq->elts_tail += part; 635 n_elts -= part; 636 } while (n_elts); 637 } 638 639 /** 640 * Store the mbuf being sent into elts ring buffer. 641 * On Tx completion these mbufs will be freed. 642 * 643 * @param txq 644 * Pointer to Tx queue structure. 645 * @param pkts 646 * Pointer to array of packets to be stored. 647 * @param pkts_n 648 * Number of packets to be stored. 649 * @param olx 650 * Configured Tx offloads mask. It is fully defined at 651 * compile time and may be used for optimization. 652 */ 653 static __rte_always_inline void 654 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 655 struct rte_mbuf **__rte_restrict pkts, 656 unsigned int pkts_n, 657 unsigned int olx __rte_unused) 658 { 659 unsigned int part; 660 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 661 662 MLX5_ASSERT(pkts); 663 MLX5_ASSERT(pkts_n); 664 part = txq->elts_s - (txq->elts_head & txq->elts_m); 665 MLX5_ASSERT(part); 666 MLX5_ASSERT(part <= txq->elts_s); 667 /* This code is a good candidate for vectorizing with SIMD. */ 668 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 669 (void *)pkts, 670 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 671 txq->elts_head += pkts_n; 672 if (unlikely(part < pkts_n)) 673 /* The copy is wrapping around the elts array. */ 674 rte_memcpy((void *)elts, (void *)(pkts + part), 675 (pkts_n - part) * sizeof(struct rte_mbuf *)); 676 } 677 678 /** 679 * Check if the completion request flag should be set in the last WQE. 680 * Both pushed mbufs and WQEs are monitored and the completion request 681 * flag is set if any of thresholds is reached. 682 * 683 * @param txq 684 * Pointer to TX queue structure. 685 * @param loc 686 * Pointer to burst routine local context. 687 * @param olx 688 * Configured Tx offloads mask. It is fully defined at 689 * compile time and may be used for optimization. 690 */ 691 static __rte_always_inline void 692 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 693 struct mlx5_txq_local *__rte_restrict loc, 694 unsigned int olx) 695 { 696 uint16_t head = txq->elts_head; 697 unsigned int part; 698 699 part = MLX5_TXOFF_CONFIG(INLINE) ? 700 0 : loc->pkts_sent - loc->pkts_copy; 701 head += part; 702 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 703 (MLX5_TXOFF_CONFIG(INLINE) && 704 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 705 volatile struct mlx5_wqe *last = loc->wqe_last; 706 707 MLX5_ASSERT(last); 708 txq->elts_comp = head; 709 if (MLX5_TXOFF_CONFIG(INLINE)) 710 txq->wqe_comp = txq->wqe_ci; 711 /* Request unconditional completion on last WQE. */ 712 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 713 MLX5_COMP_MODE_OFFSET); 714 /* Save elts_head in dedicated free on completion queue. */ 715 #ifdef RTE_LIBRTE_MLX5_DEBUG 716 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 717 (last->cseg.opcode >> 8) << 16; 718 #else 719 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 720 #endif 721 /* A CQE slot must always be available. */ 722 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 723 } 724 } 725 726 /** 727 * Build the Control Segment with specified opcode: 728 * - MLX5_OPCODE_SEND 729 * - MLX5_OPCODE_ENHANCED_MPSW 730 * - MLX5_OPCODE_TSO 731 * 732 * @param txq 733 * Pointer to TX queue structure. 734 * @param loc 735 * Pointer to burst routine local context. 736 * @param wqe 737 * Pointer to WQE to fill with built Control Segment. 738 * @param ds 739 * Supposed length of WQE in segments. 740 * @param opcode 741 * SQ WQE opcode to put into Control Segment. 742 * @param olx 743 * Configured Tx offloads mask. It is fully defined at 744 * compile time and may be used for optimization. 745 */ 746 static __rte_always_inline void 747 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 748 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 749 struct mlx5_wqe *__rte_restrict wqe, 750 unsigned int ds, 751 unsigned int opcode, 752 unsigned int olx __rte_unused) 753 { 754 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 755 756 /* For legacy MPW replace the EMPW by TSO with modifier. */ 757 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 758 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 759 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 760 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 761 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 762 MLX5_COMP_MODE_OFFSET); 763 cs->misc = RTE_BE32(0); 764 } 765 766 /** 767 * Build the Synchronize Queue Segment with specified completion index. 768 * 769 * @param txq 770 * Pointer to TX queue structure. 771 * @param loc 772 * Pointer to burst routine local context. 773 * @param wqe 774 * Pointer to WQE to fill with built Control Segment. 775 * @param wci 776 * Completion index in Clock Queue to wait. 777 * @param olx 778 * Configured Tx offloads mask. It is fully defined at 779 * compile time and may be used for optimization. 780 */ 781 static __rte_always_inline void 782 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 783 struct mlx5_txq_local *restrict loc __rte_unused, 784 struct mlx5_wqe *restrict wqe, 785 unsigned int wci, 786 unsigned int olx __rte_unused) 787 { 788 struct mlx5_wqe_qseg *qs; 789 790 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 791 qs->max_index = rte_cpu_to_be_32(wci); 792 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 793 qs->reserved0 = RTE_BE32(0); 794 qs->reserved1 = RTE_BE32(0); 795 } 796 797 /** 798 * Build the Ethernet Segment without inlined data. 799 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 800 * 801 * @param txq 802 * Pointer to TX queue structure. 803 * @param loc 804 * Pointer to burst routine local context. 805 * @param wqe 806 * Pointer to WQE to fill with built Ethernet Segment. 807 * @param olx 808 * Configured Tx offloads mask. It is fully defined at 809 * compile time and may be used for optimization. 810 */ 811 static __rte_always_inline void 812 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 813 struct mlx5_txq_local *__rte_restrict loc, 814 struct mlx5_wqe *__rte_restrict wqe, 815 unsigned int olx) 816 { 817 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 818 uint32_t csum; 819 820 /* 821 * Calculate and set check sum flags first, dword field 822 * in segment may be shared with Software Parser flags. 823 */ 824 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 825 es->flags = rte_cpu_to_le_32(csum); 826 /* 827 * Calculate and set Software Parser offsets and flags. 828 * These flags a set for custom UDP and IP tunnel packets. 829 */ 830 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 831 /* Fill metadata field if needed. */ 832 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 833 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 834 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 835 0 : 0; 836 /* Engage VLAN tag insertion feature if requested. */ 837 if (MLX5_TXOFF_CONFIG(VLAN) && 838 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 839 /* 840 * We should get here only if device support 841 * this feature correctly. 842 */ 843 MLX5_ASSERT(txq->vlan_en); 844 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 845 loc->mbuf->vlan_tci); 846 } else { 847 es->inline_hdr = RTE_BE32(0); 848 } 849 } 850 851 /** 852 * Build the Ethernet Segment with minimal inlined data 853 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 854 * used to fill the gap in single WQEBB WQEs. 855 * Supports Software Parser, Checksums and VLAN 856 * insertion Tx offload features. 857 * 858 * @param txq 859 * Pointer to TX queue structure. 860 * @param loc 861 * Pointer to burst routine local context. 862 * @param wqe 863 * Pointer to WQE to fill with built Ethernet Segment. 864 * @param vlan 865 * Length of VLAN tag insertion if any. 866 * @param olx 867 * Configured Tx offloads mask. It is fully defined at 868 * compile time and may be used for optimization. 869 */ 870 static __rte_always_inline void 871 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 872 struct mlx5_txq_local *__rte_restrict loc, 873 struct mlx5_wqe *__rte_restrict wqe, 874 unsigned int vlan, 875 unsigned int olx) 876 { 877 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 878 uint32_t csum; 879 uint8_t *psrc, *pdst; 880 881 /* 882 * Calculate and set check sum flags first, dword field 883 * in segment may be shared with Software Parser flags. 884 */ 885 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 886 es->flags = rte_cpu_to_le_32(csum); 887 /* 888 * Calculate and set Software Parser offsets and flags. 889 * These flags a set for custom UDP and IP tunnel packets. 890 */ 891 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 892 /* Fill metadata field if needed. */ 893 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 894 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 895 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 896 0 : 0; 897 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 898 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 899 es->inline_data = *(unaligned_uint16_t *)psrc; 900 psrc += sizeof(uint16_t); 901 pdst = (uint8_t *)(es + 1); 902 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 903 /* Implement VLAN tag insertion as part inline data. */ 904 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 905 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 906 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 907 /* Insert VLAN ethertype + VLAN tag. */ 908 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 909 ((RTE_ETHER_TYPE_VLAN << 16) | 910 loc->mbuf->vlan_tci); 911 pdst += sizeof(struct rte_vlan_hdr); 912 /* Copy the rest two bytes from packet data. */ 913 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 914 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 915 } else { 916 /* Fill the gap in the title WQEBB with inline data. */ 917 rte_mov16(pdst, psrc); 918 } 919 } 920 921 /** 922 * Build the Ethernet Segment with entire packet data inlining. Checks the 923 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 924 * Checksums and VLAN insertion Tx offload features. 925 * 926 * @param txq 927 * Pointer to TX queue structure. 928 * @param loc 929 * Pointer to burst routine local context. 930 * @param wqe 931 * Pointer to WQE to fill with built Ethernet Segment. 932 * @param vlan 933 * Length of VLAN tag insertion if any. 934 * @param inlen 935 * Length of data to inline (VLAN included, if any). 936 * @param tso 937 * TSO flag, set mss field from the packet. 938 * @param olx 939 * Configured Tx offloads mask. It is fully defined at 940 * compile time and may be used for optimization. 941 * 942 * @return 943 * Pointer to the next Data Segment (aligned and wrapped around). 944 */ 945 static __rte_always_inline struct mlx5_wqe_dseg * 946 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 947 struct mlx5_txq_local *__rte_restrict loc, 948 struct mlx5_wqe *__rte_restrict wqe, 949 unsigned int vlan, 950 unsigned int inlen, 951 unsigned int tso, 952 unsigned int olx) 953 { 954 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 955 uint32_t csum; 956 uint8_t *psrc, *pdst; 957 unsigned int part; 958 959 /* 960 * Calculate and set check sum flags first, dword field 961 * in segment may be shared with Software Parser flags. 962 */ 963 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 964 if (tso) { 965 csum <<= 24; 966 csum |= loc->mbuf->tso_segsz; 967 es->flags = rte_cpu_to_be_32(csum); 968 } else { 969 es->flags = rte_cpu_to_le_32(csum); 970 } 971 /* 972 * Calculate and set Software Parser offsets and flags. 973 * These flags a set for custom UDP and IP tunnel packets. 974 */ 975 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 976 /* Fill metadata field if needed. */ 977 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 978 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 979 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 980 0 : 0; 981 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 982 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 983 es->inline_data = *(unaligned_uint16_t *)psrc; 984 psrc += sizeof(uint16_t); 985 pdst = (uint8_t *)(es + 1); 986 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 987 /* Implement VLAN tag insertion as part inline data. */ 988 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 989 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 990 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 991 /* Insert VLAN ethertype + VLAN tag. */ 992 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 993 ((RTE_ETHER_TYPE_VLAN << 16) | 994 loc->mbuf->vlan_tci); 995 pdst += sizeof(struct rte_vlan_hdr); 996 /* Copy the rest two bytes from packet data. */ 997 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 998 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 999 psrc += sizeof(uint16_t); 1000 } else { 1001 /* Fill the gap in the title WQEBB with inline data. */ 1002 rte_mov16(pdst, psrc); 1003 psrc += sizeof(rte_v128u32_t); 1004 } 1005 pdst = (uint8_t *)(es + 2); 1006 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1007 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1008 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1009 if (!inlen) { 1010 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1011 return (struct mlx5_wqe_dseg *)pdst; 1012 } 1013 /* 1014 * The WQEBB space availability is checked by caller. 1015 * Here we should be aware of WQE ring buffer wraparound only. 1016 */ 1017 part = (uint8_t *)txq->wqes_end - pdst; 1018 part = RTE_MIN(part, inlen); 1019 do { 1020 rte_memcpy(pdst, psrc, part); 1021 inlen -= part; 1022 if (likely(!inlen)) { 1023 /* 1024 * If return value is not used by the caller 1025 * the code below will be optimized out. 1026 */ 1027 pdst += part; 1028 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1029 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1030 pdst = (uint8_t *)txq->wqes; 1031 return (struct mlx5_wqe_dseg *)pdst; 1032 } 1033 pdst = (uint8_t *)txq->wqes; 1034 psrc += part; 1035 part = inlen; 1036 } while (true); 1037 } 1038 1039 /** 1040 * Copy data from chain of mbuf to the specified linear buffer. 1041 * Checksums and VLAN insertion Tx offload features. If data 1042 * from some mbuf copied completely this mbuf is freed. Local 1043 * structure is used to keep the byte stream state. 1044 * 1045 * @param pdst 1046 * Pointer to the destination linear buffer. 1047 * @param loc 1048 * Pointer to burst routine local context. 1049 * @param len 1050 * Length of data to be copied. 1051 * @param must 1052 * Length of data to be copied ignoring no inline hint. 1053 * @param olx 1054 * Configured Tx offloads mask. It is fully defined at 1055 * compile time and may be used for optimization. 1056 * 1057 * @return 1058 * Number of actual copied data bytes. This is always greater than or 1059 * equal to must parameter and might be lesser than len in no inline 1060 * hint flag is encountered. 1061 */ 1062 static __rte_always_inline unsigned int 1063 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1064 struct mlx5_txq_local *__rte_restrict loc, 1065 unsigned int len, 1066 unsigned int must, 1067 unsigned int olx __rte_unused) 1068 { 1069 struct rte_mbuf *mbuf; 1070 unsigned int part, dlen, copy = 0; 1071 uint8_t *psrc; 1072 1073 MLX5_ASSERT(len); 1074 do { 1075 /* Allow zero length packets, must check first. */ 1076 dlen = rte_pktmbuf_data_len(loc->mbuf); 1077 if (dlen <= loc->mbuf_off) { 1078 /* Exhausted packet, just free. */ 1079 mbuf = loc->mbuf; 1080 loc->mbuf = mbuf->next; 1081 rte_pktmbuf_free_seg(mbuf); 1082 loc->mbuf_off = 0; 1083 MLX5_ASSERT(loc->mbuf_nseg > 1); 1084 MLX5_ASSERT(loc->mbuf); 1085 --loc->mbuf_nseg; 1086 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1087 unsigned int diff; 1088 1089 if (copy >= must) { 1090 /* 1091 * We already copied the minimal 1092 * requested amount of data. 1093 */ 1094 return copy; 1095 } 1096 diff = must - copy; 1097 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1098 /* 1099 * Copy only the minimal required 1100 * part of the data buffer. Limit amount 1101 * of data to be copied to the length of 1102 * available space. 1103 */ 1104 len = RTE_MIN(len, diff); 1105 } 1106 } 1107 continue; 1108 } 1109 dlen -= loc->mbuf_off; 1110 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1111 loc->mbuf_off); 1112 part = RTE_MIN(len, dlen); 1113 rte_memcpy(pdst, psrc, part); 1114 copy += part; 1115 loc->mbuf_off += part; 1116 len -= part; 1117 if (!len) { 1118 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1119 loc->mbuf_off = 0; 1120 /* Exhausted packet, just free. */ 1121 mbuf = loc->mbuf; 1122 loc->mbuf = mbuf->next; 1123 rte_pktmbuf_free_seg(mbuf); 1124 loc->mbuf_off = 0; 1125 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1126 --loc->mbuf_nseg; 1127 } 1128 return copy; 1129 } 1130 pdst += part; 1131 } while (true); 1132 } 1133 1134 /** 1135 * Build the Ethernet Segment with inlined data from multi-segment packet. 1136 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1137 * Parser, Checksums and VLAN insertion Tx offload features. 1138 * 1139 * @param txq 1140 * Pointer to TX queue structure. 1141 * @param loc 1142 * Pointer to burst routine local context. 1143 * @param wqe 1144 * Pointer to WQE to fill with built Ethernet Segment. 1145 * @param vlan 1146 * Length of VLAN tag insertion if any. 1147 * @param inlen 1148 * Length of data to inline (VLAN included, if any). 1149 * @param tso 1150 * TSO flag, set mss field from the packet. 1151 * @param olx 1152 * Configured Tx offloads mask. It is fully defined at 1153 * compile time and may be used for optimization. 1154 * 1155 * @return 1156 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1157 * around - caller should do wrapping check on its own). 1158 */ 1159 static __rte_always_inline struct mlx5_wqe_dseg * 1160 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1161 struct mlx5_txq_local *__rte_restrict loc, 1162 struct mlx5_wqe *__rte_restrict wqe, 1163 unsigned int vlan, 1164 unsigned int inlen, 1165 unsigned int tso, 1166 unsigned int olx) 1167 { 1168 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1169 uint32_t csum; 1170 uint8_t *pdst; 1171 unsigned int part, tlen = 0; 1172 1173 /* 1174 * Calculate and set check sum flags first, uint32_t field 1175 * in segment may be shared with Software Parser flags. 1176 */ 1177 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1178 if (tso) { 1179 csum <<= 24; 1180 csum |= loc->mbuf->tso_segsz; 1181 es->flags = rte_cpu_to_be_32(csum); 1182 } else { 1183 es->flags = rte_cpu_to_le_32(csum); 1184 } 1185 /* 1186 * Calculate and set Software Parser offsets and flags. 1187 * These flags a set for custom UDP and IP tunnel packets. 1188 */ 1189 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1190 /* Fill metadata field if needed. */ 1191 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1192 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1193 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1194 0 : 0; 1195 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1196 pdst = (uint8_t *)&es->inline_data; 1197 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1198 /* Implement VLAN tag insertion as part inline data. */ 1199 mlx5_tx_mseg_memcpy(pdst, loc, 1200 2 * RTE_ETHER_ADDR_LEN, 1201 2 * RTE_ETHER_ADDR_LEN, olx); 1202 pdst += 2 * RTE_ETHER_ADDR_LEN; 1203 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1204 ((RTE_ETHER_TYPE_VLAN << 16) | 1205 loc->mbuf->vlan_tci); 1206 pdst += sizeof(struct rte_vlan_hdr); 1207 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1208 } 1209 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1210 /* 1211 * The WQEBB space availability is checked by caller. 1212 * Here we should be aware of WQE ring buffer wraparound only. 1213 */ 1214 part = (uint8_t *)txq->wqes_end - pdst; 1215 part = RTE_MIN(part, inlen - tlen); 1216 MLX5_ASSERT(part); 1217 do { 1218 unsigned int copy; 1219 1220 /* 1221 * Copying may be interrupted inside the routine 1222 * if run into no inline hint flag. 1223 */ 1224 copy = tso ? inlen : txq->inlen_mode; 1225 copy = tlen >= copy ? 0 : (copy - tlen); 1226 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1227 tlen += copy; 1228 if (likely(inlen <= tlen) || copy < part) { 1229 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1230 pdst += copy; 1231 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1232 return (struct mlx5_wqe_dseg *)pdst; 1233 } 1234 pdst = (uint8_t *)txq->wqes; 1235 part = inlen - tlen; 1236 } while (true); 1237 } 1238 1239 /** 1240 * Build the Data Segment of pointer type. 1241 * 1242 * @param txq 1243 * Pointer to TX queue structure. 1244 * @param loc 1245 * Pointer to burst routine local context. 1246 * @param dseg 1247 * Pointer to WQE to fill with built Data Segment. 1248 * @param buf 1249 * Data buffer to point. 1250 * @param len 1251 * Data buffer length. 1252 * @param olx 1253 * Configured Tx offloads mask. It is fully defined at 1254 * compile time and may be used for optimization. 1255 */ 1256 static __rte_always_inline void 1257 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1258 struct mlx5_txq_local *__rte_restrict loc, 1259 struct mlx5_wqe_dseg *__rte_restrict dseg, 1260 uint8_t *buf, 1261 unsigned int len, 1262 unsigned int olx __rte_unused) 1263 1264 { 1265 MLX5_ASSERT(len); 1266 dseg->bcount = rte_cpu_to_be_32(len); 1267 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1268 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1269 } 1270 1271 /** 1272 * Build the Data Segment of pointer type or inline if data length is less than 1273 * buffer in minimal Data Segment size. 1274 * 1275 * @param txq 1276 * Pointer to TX queue structure. 1277 * @param loc 1278 * Pointer to burst routine local context. 1279 * @param dseg 1280 * Pointer to WQE to fill with built Data Segment. 1281 * @param buf 1282 * Data buffer to point. 1283 * @param len 1284 * Data buffer length. 1285 * @param olx 1286 * Configured Tx offloads mask. It is fully defined at 1287 * compile time and may be used for optimization. 1288 */ 1289 static __rte_always_inline void 1290 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1291 struct mlx5_txq_local *__rte_restrict loc, 1292 struct mlx5_wqe_dseg *__rte_restrict dseg, 1293 uint8_t *buf, 1294 unsigned int len, 1295 unsigned int olx __rte_unused) 1296 1297 { 1298 uintptr_t dst, src; 1299 1300 MLX5_ASSERT(len); 1301 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1302 dseg->bcount = rte_cpu_to_be_32(len); 1303 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1304 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1305 1306 return; 1307 } 1308 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1309 /* Unrolled implementation of generic rte_memcpy. */ 1310 dst = (uintptr_t)&dseg->inline_data[0]; 1311 src = (uintptr_t)buf; 1312 if (len & 0x08) { 1313 #ifdef RTE_ARCH_STRICT_ALIGN 1314 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1315 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1316 dst += sizeof(uint32_t); 1317 src += sizeof(uint32_t); 1318 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1319 dst += sizeof(uint32_t); 1320 src += sizeof(uint32_t); 1321 #else 1322 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1323 dst += sizeof(uint64_t); 1324 src += sizeof(uint64_t); 1325 #endif 1326 } 1327 if (len & 0x04) { 1328 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1329 dst += sizeof(uint32_t); 1330 src += sizeof(uint32_t); 1331 } 1332 if (len & 0x02) { 1333 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1334 dst += sizeof(uint16_t); 1335 src += sizeof(uint16_t); 1336 } 1337 if (len & 0x01) 1338 *(uint8_t *)dst = *(uint8_t *)src; 1339 } 1340 1341 /** 1342 * Build the Data Segment of inlined data from single 1343 * segment packet, no VLAN insertion. 1344 * 1345 * @param txq 1346 * Pointer to TX queue structure. 1347 * @param loc 1348 * Pointer to burst routine local context. 1349 * @param dseg 1350 * Pointer to WQE to fill with built Data Segment. 1351 * @param buf 1352 * Data buffer to point. 1353 * @param len 1354 * Data buffer length. 1355 * @param olx 1356 * Configured Tx offloads mask. It is fully defined at 1357 * compile time and may be used for optimization. 1358 * 1359 * @return 1360 * Pointer to the next Data Segment after inlined data. 1361 * Ring buffer wraparound check is needed. We do not do it here because it 1362 * may not be needed for the last packet in the eMPW session. 1363 */ 1364 static __rte_always_inline struct mlx5_wqe_dseg * 1365 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1366 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1367 struct mlx5_wqe_dseg *__rte_restrict dseg, 1368 uint8_t *buf, 1369 unsigned int len, 1370 unsigned int olx __rte_unused) 1371 { 1372 unsigned int part; 1373 uint8_t *pdst; 1374 1375 if (!MLX5_TXOFF_CONFIG(MPW)) { 1376 /* Store the descriptor byte counter for eMPW sessions. */ 1377 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1378 pdst = &dseg->inline_data[0]; 1379 } else { 1380 /* The entire legacy MPW session counter is stored on close. */ 1381 pdst = (uint8_t *)dseg; 1382 } 1383 /* 1384 * The WQEBB space availability is checked by caller. 1385 * Here we should be aware of WQE ring buffer wraparound only. 1386 */ 1387 part = (uint8_t *)txq->wqes_end - pdst; 1388 part = RTE_MIN(part, len); 1389 do { 1390 rte_memcpy(pdst, buf, part); 1391 len -= part; 1392 if (likely(!len)) { 1393 pdst += part; 1394 if (!MLX5_TXOFF_CONFIG(MPW)) 1395 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1396 /* Note: no final wraparound check here. */ 1397 return (struct mlx5_wqe_dseg *)pdst; 1398 } 1399 pdst = (uint8_t *)txq->wqes; 1400 buf += part; 1401 part = len; 1402 } while (true); 1403 } 1404 1405 /** 1406 * Build the Data Segment of inlined data from single 1407 * segment packet with VLAN insertion. 1408 * 1409 * @param txq 1410 * Pointer to TX queue structure. 1411 * @param loc 1412 * Pointer to burst routine local context. 1413 * @param dseg 1414 * Pointer to the dseg fill with built Data Segment. 1415 * @param buf 1416 * Data buffer to point. 1417 * @param len 1418 * Data buffer length. 1419 * @param olx 1420 * Configured Tx offloads mask. It is fully defined at 1421 * compile time and may be used for optimization. 1422 * 1423 * @return 1424 * Pointer to the next Data Segment after inlined data. 1425 * Ring buffer wraparound check is needed. 1426 */ 1427 static __rte_always_inline struct mlx5_wqe_dseg * 1428 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1429 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1430 struct mlx5_wqe_dseg *__rte_restrict dseg, 1431 uint8_t *buf, 1432 unsigned int len, 1433 unsigned int olx __rte_unused) 1434 1435 { 1436 unsigned int part; 1437 uint8_t *pdst; 1438 1439 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1440 if (!MLX5_TXOFF_CONFIG(MPW)) { 1441 /* Store the descriptor byte counter for eMPW sessions. */ 1442 dseg->bcount = rte_cpu_to_be_32 1443 ((len + sizeof(struct rte_vlan_hdr)) | 1444 MLX5_ETH_WQE_DATA_INLINE); 1445 pdst = &dseg->inline_data[0]; 1446 } else { 1447 /* The entire legacy MPW session counter is stored on close. */ 1448 pdst = (uint8_t *)dseg; 1449 } 1450 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1451 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1452 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1453 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1454 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1455 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1456 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1457 pdst = (uint8_t *)txq->wqes; 1458 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1459 loc->mbuf->vlan_tci); 1460 pdst += sizeof(struct rte_vlan_hdr); 1461 /* 1462 * The WQEBB space availability is checked by caller. 1463 * Here we should be aware of WQE ring buffer wraparound only. 1464 */ 1465 part = (uint8_t *)txq->wqes_end - pdst; 1466 part = RTE_MIN(part, len); 1467 do { 1468 rte_memcpy(pdst, buf, part); 1469 len -= part; 1470 if (likely(!len)) { 1471 pdst += part; 1472 if (!MLX5_TXOFF_CONFIG(MPW)) 1473 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1474 /* Note: no final wraparound check here. */ 1475 return (struct mlx5_wqe_dseg *)pdst; 1476 } 1477 pdst = (uint8_t *)txq->wqes; 1478 buf += part; 1479 part = len; 1480 } while (true); 1481 } 1482 1483 /** 1484 * Build the Ethernet Segment with optionally inlined data with 1485 * VLAN insertion and following Data Segments (if any) from 1486 * multi-segment packet. Used by ordinary send and TSO. 1487 * 1488 * @param txq 1489 * Pointer to TX queue structure. 1490 * @param loc 1491 * Pointer to burst routine local context. 1492 * @param wqe 1493 * Pointer to WQE to fill with built Ethernet/Data Segments. 1494 * @param vlan 1495 * Length of VLAN header to insert, 0 means no VLAN insertion. 1496 * @param inlen 1497 * Data length to inline. For TSO this parameter specifies exact value, 1498 * for ordinary send routine can be aligned by caller to provide better WQE 1499 * space saving and data buffer start address alignment. 1500 * This length includes VLAN header being inserted. 1501 * @param tso 1502 * Zero means ordinary send, inlined data can be extended, 1503 * otherwise this is TSO, inlined data length is fixed. 1504 * @param olx 1505 * Configured Tx offloads mask. It is fully defined at 1506 * compile time and may be used for optimization. 1507 * 1508 * @return 1509 * Actual size of built WQE in segments. 1510 */ 1511 static __rte_always_inline unsigned int 1512 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1513 struct mlx5_txq_local *__rte_restrict loc, 1514 struct mlx5_wqe *__rte_restrict wqe, 1515 unsigned int vlan, 1516 unsigned int inlen, 1517 unsigned int tso, 1518 unsigned int olx __rte_unused) 1519 { 1520 struct mlx5_wqe_dseg *__rte_restrict dseg; 1521 unsigned int ds; 1522 1523 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1524 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1525 loc->mbuf_off = 0; 1526 1527 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1528 if (!loc->mbuf_nseg) 1529 goto dseg_done; 1530 /* 1531 * There are still some mbuf remaining, not inlined. 1532 * The first mbuf may be partially inlined and we 1533 * must process the possible non-zero data offset. 1534 */ 1535 if (loc->mbuf_off) { 1536 unsigned int dlen; 1537 uint8_t *dptr; 1538 1539 /* 1540 * Exhausted packets must be dropped before. 1541 * Non-zero offset means there are some data 1542 * remained in the packet. 1543 */ 1544 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1545 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1546 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1547 loc->mbuf_off); 1548 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1549 /* 1550 * Build the pointer/minimal Data Segment. 1551 * Do ring buffer wrapping check in advance. 1552 */ 1553 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1554 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1555 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1556 /* Store the mbuf to be freed on completion. */ 1557 MLX5_ASSERT(loc->elts_free); 1558 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1559 --loc->elts_free; 1560 ++dseg; 1561 if (--loc->mbuf_nseg == 0) 1562 goto dseg_done; 1563 loc->mbuf = loc->mbuf->next; 1564 loc->mbuf_off = 0; 1565 } 1566 do { 1567 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1568 struct rte_mbuf *mbuf; 1569 1570 /* Zero length segment found, just skip. */ 1571 mbuf = loc->mbuf; 1572 loc->mbuf = loc->mbuf->next; 1573 rte_pktmbuf_free_seg(mbuf); 1574 if (--loc->mbuf_nseg == 0) 1575 break; 1576 } else { 1577 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1578 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1579 mlx5_tx_dseg_iptr 1580 (txq, loc, dseg, 1581 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1582 rte_pktmbuf_data_len(loc->mbuf), olx); 1583 MLX5_ASSERT(loc->elts_free); 1584 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1585 --loc->elts_free; 1586 ++dseg; 1587 if (--loc->mbuf_nseg == 0) 1588 break; 1589 loc->mbuf = loc->mbuf->next; 1590 } 1591 } while (true); 1592 1593 dseg_done: 1594 /* Calculate actual segments used from the dseg pointer. */ 1595 if ((uintptr_t)wqe < (uintptr_t)dseg) 1596 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1597 else 1598 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1599 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1600 return ds; 1601 } 1602 1603 /** 1604 * The routine checks timestamp flag in the current packet, 1605 * and push WAIT WQE into the queue if scheduling is required. 1606 * 1607 * @param txq 1608 * Pointer to TX queue structure. 1609 * @param loc 1610 * Pointer to burst routine local context. 1611 * @param olx 1612 * Configured Tx offloads mask. It is fully defined at 1613 * compile time and may be used for optimization. 1614 * 1615 * @return 1616 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1617 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1618 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1619 * Local context variables partially updated. 1620 */ 1621 static __rte_always_inline enum mlx5_txcmp_code 1622 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1623 struct mlx5_txq_local *restrict loc, 1624 unsigned int olx) 1625 { 1626 if (MLX5_TXOFF_CONFIG(TXPP) && 1627 loc->mbuf->ol_flags & txq->ts_mask) { 1628 struct mlx5_wqe *wqe; 1629 uint64_t ts; 1630 int32_t wci; 1631 1632 /* 1633 * Estimate the required space quickly and roughly. 1634 * We would like to ensure the packet can be pushed 1635 * to the queue and we won't get the orphan WAIT WQE. 1636 */ 1637 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1638 loc->elts_free < NB_SEGS(loc->mbuf)) 1639 return MLX5_TXCMP_CODE_EXIT; 1640 /* Convert the timestamp into completion to wait. */ 1641 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1642 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 1643 if (unlikely(wci < 0)) 1644 return MLX5_TXCMP_CODE_SINGLE; 1645 /* Build the WAIT WQE with specified completion. */ 1646 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1647 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 1648 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 1649 ++txq->wqe_ci; 1650 --loc->wqe_free; 1651 return MLX5_TXCMP_CODE_MULTI; 1652 } 1653 return MLX5_TXCMP_CODE_SINGLE; 1654 } 1655 1656 /** 1657 * Tx one packet function for multi-segment TSO. Supports all 1658 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1659 * sends one packet per WQE. 1660 * 1661 * This routine is responsible for storing processed mbuf 1662 * into elts ring buffer and update elts_head. 1663 * 1664 * @param txq 1665 * Pointer to TX queue structure. 1666 * @param loc 1667 * Pointer to burst routine local context. 1668 * @param olx 1669 * Configured Tx offloads mask. It is fully defined at 1670 * compile time and may be used for optimization. 1671 * 1672 * @return 1673 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1674 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1675 * Local context variables partially updated. 1676 */ 1677 static __rte_always_inline enum mlx5_txcmp_code 1678 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1679 struct mlx5_txq_local *__rte_restrict loc, 1680 unsigned int olx) 1681 { 1682 struct mlx5_wqe *__rte_restrict wqe; 1683 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1684 1685 if (MLX5_TXOFF_CONFIG(TXPP)) { 1686 enum mlx5_txcmp_code wret; 1687 1688 /* Generate WAIT for scheduling if requested. */ 1689 wret = mlx5_tx_schedule_send(txq, loc, olx); 1690 if (wret == MLX5_TXCMP_CODE_EXIT) 1691 return MLX5_TXCMP_CODE_EXIT; 1692 if (wret == MLX5_TXCMP_CODE_ERROR) 1693 return MLX5_TXCMP_CODE_ERROR; 1694 } 1695 /* 1696 * Calculate data length to be inlined to estimate 1697 * the required space in WQE ring buffer. 1698 */ 1699 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1700 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1701 vlan = sizeof(struct rte_vlan_hdr); 1702 inlen = loc->mbuf->l2_len + vlan + 1703 loc->mbuf->l3_len + loc->mbuf->l4_len; 1704 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1705 return MLX5_TXCMP_CODE_ERROR; 1706 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1707 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1708 /* Packet must contain all TSO headers. */ 1709 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1710 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1711 inlen > (dlen + vlan))) 1712 return MLX5_TXCMP_CODE_ERROR; 1713 MLX5_ASSERT(inlen >= txq->inlen_mode); 1714 /* 1715 * Check whether there are enough free WQEBBs: 1716 * - Control Segment 1717 * - Ethernet Segment 1718 * - First Segment of inlined Ethernet data 1719 * - ... data continued ... 1720 * - Data Segments of pointer/min inline type 1721 */ 1722 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1723 MLX5_ESEG_MIN_INLINE_SIZE + 1724 MLX5_WSEG_SIZE + 1725 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1726 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1727 return MLX5_TXCMP_CODE_EXIT; 1728 /* Check for maximal WQE size. */ 1729 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1730 return MLX5_TXCMP_CODE_ERROR; 1731 #ifdef MLX5_PMD_SOFT_COUNTERS 1732 /* Update sent data bytes/packets counters. */ 1733 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1734 loc->mbuf->tso_segsz; 1735 /* 1736 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1737 * from loc->pkts_sent field. 1738 */ 1739 --ntcp; 1740 txq->stats.opackets += ntcp; 1741 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1742 #endif 1743 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1744 loc->wqe_last = wqe; 1745 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1746 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1747 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1748 txq->wqe_ci += (ds + 3) / 4; 1749 loc->wqe_free -= (ds + 3) / 4; 1750 return MLX5_TXCMP_CODE_MULTI; 1751 } 1752 1753 /** 1754 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1755 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1756 * without any data inlining in Ethernet Segment. 1757 * 1758 * This routine is responsible for storing processed mbuf 1759 * into elts ring buffer and update elts_head. 1760 * 1761 * @param txq 1762 * Pointer to TX queue structure. 1763 * @param loc 1764 * Pointer to burst routine local context. 1765 * @param olx 1766 * Configured Tx offloads mask. It is fully defined at 1767 * compile time and may be used for optimization. 1768 * 1769 * @return 1770 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1771 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1772 * Local context variables partially updated. 1773 */ 1774 static __rte_always_inline enum mlx5_txcmp_code 1775 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1776 struct mlx5_txq_local *__rte_restrict loc, 1777 unsigned int olx) 1778 { 1779 struct mlx5_wqe_dseg *__rte_restrict dseg; 1780 struct mlx5_wqe *__rte_restrict wqe; 1781 unsigned int ds, nseg; 1782 1783 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1784 if (MLX5_TXOFF_CONFIG(TXPP)) { 1785 enum mlx5_txcmp_code wret; 1786 1787 /* Generate WAIT for scheduling if requested. */ 1788 wret = mlx5_tx_schedule_send(txq, loc, olx); 1789 if (wret == MLX5_TXCMP_CODE_EXIT) 1790 return MLX5_TXCMP_CODE_EXIT; 1791 if (wret == MLX5_TXCMP_CODE_ERROR) 1792 return MLX5_TXCMP_CODE_ERROR; 1793 } 1794 /* 1795 * No inline at all, it means the CPU cycles saving is prioritized at 1796 * configuration, we should not copy any packet data to WQE. 1797 */ 1798 nseg = NB_SEGS(loc->mbuf); 1799 ds = 2 + nseg; 1800 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1801 return MLX5_TXCMP_CODE_EXIT; 1802 /* Check for maximal WQE size. */ 1803 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1804 return MLX5_TXCMP_CODE_ERROR; 1805 /* 1806 * Some Tx offloads may cause an error if packet is not long enough, 1807 * check against assumed minimal length. 1808 */ 1809 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1810 return MLX5_TXCMP_CODE_ERROR; 1811 #ifdef MLX5_PMD_SOFT_COUNTERS 1812 /* Update sent data bytes counter. */ 1813 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1814 if (MLX5_TXOFF_CONFIG(VLAN) && 1815 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1816 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1817 #endif 1818 /* 1819 * SEND WQE, one WQEBB: 1820 * - Control Segment, SEND opcode 1821 * - Ethernet Segment, optional VLAN, no inline 1822 * - Data Segments, pointer only type 1823 */ 1824 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1825 loc->wqe_last = wqe; 1826 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1827 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1828 dseg = &wqe->dseg[0]; 1829 do { 1830 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1831 struct rte_mbuf *mbuf; 1832 1833 /* 1834 * Zero length segment found, have to correct total 1835 * size of WQE in segments. 1836 * It is supposed to be rare occasion, so in normal 1837 * case (no zero length segments) we avoid extra 1838 * writing to the Control Segment. 1839 */ 1840 --ds; 1841 wqe->cseg.sq_ds -= RTE_BE32(1); 1842 mbuf = loc->mbuf; 1843 loc->mbuf = mbuf->next; 1844 rte_pktmbuf_free_seg(mbuf); 1845 if (--nseg == 0) 1846 break; 1847 } else { 1848 mlx5_tx_dseg_ptr 1849 (txq, loc, dseg, 1850 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1851 rte_pktmbuf_data_len(loc->mbuf), olx); 1852 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1853 --loc->elts_free; 1854 if (--nseg == 0) 1855 break; 1856 ++dseg; 1857 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1858 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1859 loc->mbuf = loc->mbuf->next; 1860 } 1861 } while (true); 1862 txq->wqe_ci += (ds + 3) / 4; 1863 loc->wqe_free -= (ds + 3) / 4; 1864 return MLX5_TXCMP_CODE_MULTI; 1865 } 1866 1867 /** 1868 * Tx one packet function for multi-segment SEND. Supports all 1869 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1870 * sends one packet per WQE, with data inlining in 1871 * Ethernet Segment and minimal Data Segments. 1872 * 1873 * This routine is responsible for storing processed mbuf 1874 * into elts ring buffer and update elts_head. 1875 * 1876 * @param txq 1877 * Pointer to TX queue structure. 1878 * @param loc 1879 * Pointer to burst routine local context. 1880 * @param olx 1881 * Configured Tx offloads mask. It is fully defined at 1882 * compile time and may be used for optimization. 1883 * 1884 * @return 1885 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1886 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1887 * Local context variables partially updated. 1888 */ 1889 static __rte_always_inline enum mlx5_txcmp_code 1890 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 1891 struct mlx5_txq_local *__rte_restrict loc, 1892 unsigned int olx) 1893 { 1894 struct mlx5_wqe *__rte_restrict wqe; 1895 unsigned int ds, inlen, dlen, vlan = 0; 1896 1897 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 1898 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1899 if (MLX5_TXOFF_CONFIG(TXPP)) { 1900 enum mlx5_txcmp_code wret; 1901 1902 /* Generate WAIT for scheduling if requested. */ 1903 wret = mlx5_tx_schedule_send(txq, loc, olx); 1904 if (wret == MLX5_TXCMP_CODE_EXIT) 1905 return MLX5_TXCMP_CODE_EXIT; 1906 if (wret == MLX5_TXCMP_CODE_ERROR) 1907 return MLX5_TXCMP_CODE_ERROR; 1908 } 1909 /* 1910 * First calculate data length to be inlined 1911 * to estimate the required space for WQE. 1912 */ 1913 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1914 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1915 vlan = sizeof(struct rte_vlan_hdr); 1916 inlen = dlen + vlan; 1917 /* Check against minimal length. */ 1918 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 1919 return MLX5_TXCMP_CODE_ERROR; 1920 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1921 if (inlen > txq->inlen_send || 1922 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1923 struct rte_mbuf *mbuf; 1924 unsigned int nxlen; 1925 uintptr_t start; 1926 1927 mbuf = loc->mbuf; 1928 nxlen = rte_pktmbuf_data_len(mbuf); 1929 /* 1930 * Packet length exceeds the allowed inline data length, 1931 * check whether the minimal inlining is required. 1932 */ 1933 if (txq->inlen_mode) { 1934 MLX5_ASSERT(txq->inlen_mode >= 1935 MLX5_ESEG_MIN_INLINE_SIZE); 1936 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 1937 inlen = RTE_MIN(txq->inlen_mode, inlen); 1938 } else if (vlan && !txq->vlan_en) { 1939 /* 1940 * VLAN insertion is requested and hardware does not 1941 * support the offload, will do with software inline. 1942 */ 1943 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 1944 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 1945 nxlen > txq->inlen_send) { 1946 return mlx5_tx_packet_multi_send(txq, loc, olx); 1947 } else { 1948 goto do_first; 1949 } 1950 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 1951 goto do_build; 1952 /* 1953 * Now we know the minimal amount of data is requested 1954 * to inline. Check whether we should inline the buffers 1955 * from the chain beginning to eliminate some mbufs. 1956 */ 1957 if (unlikely(nxlen <= txq->inlen_send)) { 1958 /* We can inline first mbuf at least. */ 1959 if (nxlen < inlen) { 1960 unsigned int smlen; 1961 1962 /* Scan mbufs till inlen filled. */ 1963 do { 1964 smlen = nxlen; 1965 mbuf = NEXT(mbuf); 1966 MLX5_ASSERT(mbuf); 1967 nxlen = rte_pktmbuf_data_len(mbuf); 1968 nxlen += smlen; 1969 } while (unlikely(nxlen < inlen)); 1970 if (unlikely(nxlen > txq->inlen_send)) { 1971 /* We cannot inline entire mbuf. */ 1972 smlen = inlen - smlen; 1973 start = rte_pktmbuf_mtod_offset 1974 (mbuf, uintptr_t, smlen); 1975 goto do_align; 1976 } 1977 } 1978 do_first: 1979 do { 1980 inlen = nxlen; 1981 mbuf = NEXT(mbuf); 1982 /* There should be not end of packet. */ 1983 MLX5_ASSERT(mbuf); 1984 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 1985 break; 1986 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 1987 } while (unlikely(nxlen < txq->inlen_send)); 1988 } 1989 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 1990 /* 1991 * Check whether we can do inline to align start 1992 * address of data buffer to cacheline. 1993 */ 1994 do_align: 1995 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 1996 if (unlikely(start)) { 1997 start += inlen; 1998 if (start <= txq->inlen_send) 1999 inlen = start; 2000 } 2001 } 2002 /* 2003 * Check whether there are enough free WQEBBs: 2004 * - Control Segment 2005 * - Ethernet Segment 2006 * - First Segment of inlined Ethernet data 2007 * - ... data continued ... 2008 * - Data Segments of pointer/min inline type 2009 * 2010 * Estimate the number of Data Segments conservatively, 2011 * supposing no any mbufs is being freed during inlining. 2012 */ 2013 do_build: 2014 MLX5_ASSERT(inlen <= txq->inlen_send); 2015 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2016 MLX5_ESEG_MIN_INLINE_SIZE + 2017 MLX5_WSEG_SIZE + 2018 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2019 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2020 return MLX5_TXCMP_CODE_EXIT; 2021 /* Check for maximal WQE size. */ 2022 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 2023 return MLX5_TXCMP_CODE_ERROR; 2024 #ifdef MLX5_PMD_SOFT_COUNTERS 2025 /* Update sent data bytes/packets counters. */ 2026 txq->stats.obytes += dlen + vlan; 2027 #endif 2028 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2029 loc->wqe_last = wqe; 2030 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2031 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2032 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2033 txq->wqe_ci += (ds + 3) / 4; 2034 loc->wqe_free -= (ds + 3) / 4; 2035 return MLX5_TXCMP_CODE_MULTI; 2036 } 2037 2038 /** 2039 * Tx burst function for multi-segment packets. Supports all 2040 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2041 * sends one packet per WQE. Function stops sending if it 2042 * encounters the single-segment packet. 2043 * 2044 * This routine is responsible for storing processed mbuf 2045 * into elts ring buffer and update elts_head. 2046 * 2047 * @param txq 2048 * Pointer to TX queue structure. 2049 * @param[in] pkts 2050 * Packets to transmit. 2051 * @param pkts_n 2052 * Number of packets in array. 2053 * @param loc 2054 * Pointer to burst routine local context. 2055 * @param olx 2056 * Configured Tx offloads mask. It is fully defined at 2057 * compile time and may be used for optimization. 2058 * 2059 * @return 2060 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2061 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2062 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2063 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2064 * Local context variables updated. 2065 */ 2066 static __rte_always_inline enum mlx5_txcmp_code 2067 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2068 struct rte_mbuf **__rte_restrict pkts, 2069 unsigned int pkts_n, 2070 struct mlx5_txq_local *__rte_restrict loc, 2071 unsigned int olx) 2072 { 2073 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2074 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2075 pkts += loc->pkts_sent + 1; 2076 pkts_n -= loc->pkts_sent; 2077 for (;;) { 2078 enum mlx5_txcmp_code ret; 2079 2080 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2081 /* 2082 * Estimate the number of free elts quickly but conservatively. 2083 * Some segment may be fully inlined and freed, 2084 * ignore this here - precise estimation is costly. 2085 */ 2086 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2087 return MLX5_TXCMP_CODE_EXIT; 2088 if (MLX5_TXOFF_CONFIG(TSO) && 2089 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2090 /* Proceed with multi-segment TSO. */ 2091 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2092 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2093 /* Proceed with multi-segment SEND with inlining. */ 2094 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2095 } else { 2096 /* Proceed with multi-segment SEND w/o inlining. */ 2097 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2098 } 2099 if (ret == MLX5_TXCMP_CODE_EXIT) 2100 return MLX5_TXCMP_CODE_EXIT; 2101 if (ret == MLX5_TXCMP_CODE_ERROR) 2102 return MLX5_TXCMP_CODE_ERROR; 2103 /* WQE is built, go to the next packet. */ 2104 ++loc->pkts_sent; 2105 --pkts_n; 2106 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2107 return MLX5_TXCMP_CODE_EXIT; 2108 loc->mbuf = *pkts++; 2109 if (pkts_n > 1) 2110 rte_prefetch0(*pkts); 2111 if (likely(NB_SEGS(loc->mbuf) > 1)) 2112 continue; 2113 /* Here ends the series of multi-segment packets. */ 2114 if (MLX5_TXOFF_CONFIG(TSO) && 2115 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2116 return MLX5_TXCMP_CODE_TSO; 2117 return MLX5_TXCMP_CODE_SINGLE; 2118 } 2119 MLX5_ASSERT(false); 2120 } 2121 2122 /** 2123 * Tx burst function for single-segment packets with TSO. 2124 * Supports all types of Tx offloads, except multi-packets. 2125 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2126 * Function stops sending if it encounters the multi-segment 2127 * packet or packet without TSO requested. 2128 * 2129 * The routine is responsible for storing processed mbuf into elts ring buffer 2130 * and update elts_head if inline offloads is requested due to possible early 2131 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2132 * 2133 * @param txq 2134 * Pointer to TX queue structure. 2135 * @param[in] pkts 2136 * Packets to transmit. 2137 * @param pkts_n 2138 * Number of packets in array. 2139 * @param loc 2140 * Pointer to burst routine local context. 2141 * @param olx 2142 * Configured Tx offloads mask. It is fully defined at 2143 * compile time and may be used for optimization. 2144 * 2145 * @return 2146 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2147 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2148 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2149 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2150 * Local context variables updated. 2151 */ 2152 static __rte_always_inline enum mlx5_txcmp_code 2153 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2154 struct rte_mbuf **__rte_restrict pkts, 2155 unsigned int pkts_n, 2156 struct mlx5_txq_local *__rte_restrict loc, 2157 unsigned int olx) 2158 { 2159 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2160 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2161 pkts += loc->pkts_sent + 1; 2162 pkts_n -= loc->pkts_sent; 2163 for (;;) { 2164 struct mlx5_wqe_dseg *__rte_restrict dseg; 2165 struct mlx5_wqe *__rte_restrict wqe; 2166 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2167 uint8_t *dptr; 2168 2169 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2170 if (MLX5_TXOFF_CONFIG(TXPP)) { 2171 enum mlx5_txcmp_code wret; 2172 2173 /* Generate WAIT for scheduling if requested. */ 2174 wret = mlx5_tx_schedule_send(txq, loc, olx); 2175 if (wret == MLX5_TXCMP_CODE_EXIT) 2176 return MLX5_TXCMP_CODE_EXIT; 2177 if (wret == MLX5_TXCMP_CODE_ERROR) 2178 return MLX5_TXCMP_CODE_ERROR; 2179 } 2180 dlen = rte_pktmbuf_data_len(loc->mbuf); 2181 if (MLX5_TXOFF_CONFIG(VLAN) && 2182 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2183 vlan = sizeof(struct rte_vlan_hdr); 2184 } 2185 /* 2186 * First calculate the WQE size to check 2187 * whether we have enough space in ring buffer. 2188 */ 2189 hlen = loc->mbuf->l2_len + vlan + 2190 loc->mbuf->l3_len + loc->mbuf->l4_len; 2191 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2192 return MLX5_TXCMP_CODE_ERROR; 2193 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2194 hlen += loc->mbuf->outer_l2_len + 2195 loc->mbuf->outer_l3_len; 2196 /* Segment must contain all TSO headers. */ 2197 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2198 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2199 hlen > (dlen + vlan))) 2200 return MLX5_TXCMP_CODE_ERROR; 2201 /* 2202 * Check whether there are enough free WQEBBs: 2203 * - Control Segment 2204 * - Ethernet Segment 2205 * - First Segment of inlined Ethernet data 2206 * - ... data continued ... 2207 * - Finishing Data Segment of pointer type 2208 */ 2209 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2210 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2211 if (loc->wqe_free < ((ds + 3) / 4)) 2212 return MLX5_TXCMP_CODE_EXIT; 2213 #ifdef MLX5_PMD_SOFT_COUNTERS 2214 /* Update sent data bytes/packets counters. */ 2215 ntcp = (dlen + vlan - hlen + 2216 loc->mbuf->tso_segsz - 1) / 2217 loc->mbuf->tso_segsz; 2218 /* 2219 * One will be added for mbuf itself at the end 2220 * of the mlx5_tx_burst from loc->pkts_sent field. 2221 */ 2222 --ntcp; 2223 txq->stats.opackets += ntcp; 2224 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2225 #endif 2226 /* 2227 * Build the TSO WQE: 2228 * - Control Segment 2229 * - Ethernet Segment with hlen bytes inlined 2230 * - Data Segment of pointer type 2231 */ 2232 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2233 loc->wqe_last = wqe; 2234 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2235 MLX5_OPCODE_TSO, olx); 2236 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2237 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2238 dlen -= hlen - vlan; 2239 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2240 /* 2241 * WQE is built, update the loop parameters 2242 * and go to the next packet. 2243 */ 2244 txq->wqe_ci += (ds + 3) / 4; 2245 loc->wqe_free -= (ds + 3) / 4; 2246 if (MLX5_TXOFF_CONFIG(INLINE)) 2247 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2248 --loc->elts_free; 2249 ++loc->pkts_sent; 2250 --pkts_n; 2251 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2252 return MLX5_TXCMP_CODE_EXIT; 2253 loc->mbuf = *pkts++; 2254 if (pkts_n > 1) 2255 rte_prefetch0(*pkts); 2256 if (MLX5_TXOFF_CONFIG(MULTI) && 2257 unlikely(NB_SEGS(loc->mbuf) > 1)) 2258 return MLX5_TXCMP_CODE_MULTI; 2259 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2260 return MLX5_TXCMP_CODE_SINGLE; 2261 /* Continue with the next TSO packet. */ 2262 } 2263 MLX5_ASSERT(false); 2264 } 2265 2266 /** 2267 * Analyze the packet and select the best method to send. 2268 * 2269 * @param txq 2270 * Pointer to TX queue structure. 2271 * @param loc 2272 * Pointer to burst routine local context. 2273 * @param olx 2274 * Configured Tx offloads mask. It is fully defined at 2275 * compile time and may be used for optimization. 2276 * @param newp 2277 * The predefined flag whether do complete check for 2278 * multi-segment packets and TSO. 2279 * 2280 * @return 2281 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2282 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2283 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2284 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2285 */ 2286 static __rte_always_inline enum mlx5_txcmp_code 2287 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2288 struct mlx5_txq_local *__rte_restrict loc, 2289 unsigned int olx, 2290 bool newp) 2291 { 2292 /* Check for multi-segment packet. */ 2293 if (newp && 2294 MLX5_TXOFF_CONFIG(MULTI) && 2295 unlikely(NB_SEGS(loc->mbuf) > 1)) 2296 return MLX5_TXCMP_CODE_MULTI; 2297 /* Check for TSO packet. */ 2298 if (newp && 2299 MLX5_TXOFF_CONFIG(TSO) && 2300 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2301 return MLX5_TXCMP_CODE_TSO; 2302 /* Check if eMPW is enabled at all. */ 2303 if (!MLX5_TXOFF_CONFIG(EMPW)) 2304 return MLX5_TXCMP_CODE_SINGLE; 2305 /* Check if eMPW can be engaged. */ 2306 if (MLX5_TXOFF_CONFIG(VLAN) && 2307 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2308 (!MLX5_TXOFF_CONFIG(INLINE) || 2309 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2310 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2311 /* 2312 * eMPW does not support VLAN insertion offload, we have to 2313 * inline the entire packet but packet is too long for inlining. 2314 */ 2315 return MLX5_TXCMP_CODE_SINGLE; 2316 } 2317 return MLX5_TXCMP_CODE_EMPW; 2318 } 2319 2320 /** 2321 * Check the next packet attributes to match with the eMPW batch ones. 2322 * In addition, for legacy MPW the packet length is checked either. 2323 * 2324 * @param txq 2325 * Pointer to TX queue structure. 2326 * @param es 2327 * Pointer to Ethernet Segment of eMPW batch. 2328 * @param loc 2329 * Pointer to burst routine local context. 2330 * @param dlen 2331 * Length of previous packet in MPW descriptor. 2332 * @param olx 2333 * Configured Tx offloads mask. It is fully defined at 2334 * compile time and may be used for optimization. 2335 * 2336 * @return 2337 * true - packet match with eMPW batch attributes. 2338 * false - no match, eMPW should be restarted. 2339 */ 2340 static __rte_always_inline bool 2341 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2342 struct mlx5_wqe_eseg *__rte_restrict es, 2343 struct mlx5_txq_local *__rte_restrict loc, 2344 uint32_t dlen, 2345 unsigned int olx) 2346 { 2347 uint8_t swp_flags = 0; 2348 2349 /* Compare the checksum flags, if any. */ 2350 if (MLX5_TXOFF_CONFIG(CSUM) && 2351 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2352 return false; 2353 /* Compare the Software Parser offsets and flags. */ 2354 if (MLX5_TXOFF_CONFIG(SWP) && 2355 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2356 es->swp_flags != swp_flags)) 2357 return false; 2358 /* Fill metadata field if needed. */ 2359 if (MLX5_TXOFF_CONFIG(METADATA) && 2360 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2361 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2362 return false; 2363 /* Legacy MPW can send packets with the same length only. */ 2364 if (MLX5_TXOFF_CONFIG(MPW) && 2365 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2366 return false; 2367 /* There must be no VLAN packets in eMPW loop. */ 2368 if (MLX5_TXOFF_CONFIG(VLAN)) 2369 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2370 /* Check if the scheduling is requested. */ 2371 if (MLX5_TXOFF_CONFIG(TXPP) && 2372 loc->mbuf->ol_flags & txq->ts_mask) 2373 return false; 2374 return true; 2375 } 2376 2377 /** 2378 * Update send loop variables and WQE for eMPW loop without data inlining. 2379 * Number of Data Segments is equal to the number of sent packets. 2380 * 2381 * @param txq 2382 * Pointer to TX queue structure. 2383 * @param loc 2384 * Pointer to burst routine local context. 2385 * @param ds 2386 * Number of packets/Data Segments/Packets. 2387 * @param slen 2388 * Accumulated statistics, bytes sent. 2389 * @param olx 2390 * Configured Tx offloads mask. It is fully defined at 2391 * compile time and may be used for optimization. 2392 * 2393 * @return 2394 * true - packet match with eMPW batch attributes. 2395 * false - no match, eMPW should be restarted. 2396 */ 2397 static __rte_always_inline void 2398 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2399 struct mlx5_txq_local *__rte_restrict loc, 2400 unsigned int ds, 2401 unsigned int slen, 2402 unsigned int olx __rte_unused) 2403 { 2404 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2405 #ifdef MLX5_PMD_SOFT_COUNTERS 2406 /* Update sent data bytes counter. */ 2407 txq->stats.obytes += slen; 2408 #else 2409 (void)slen; 2410 #endif 2411 loc->elts_free -= ds; 2412 loc->pkts_sent += ds; 2413 ds += 2; 2414 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2415 txq->wqe_ci += (ds + 3) / 4; 2416 loc->wqe_free -= (ds + 3) / 4; 2417 } 2418 2419 /** 2420 * Update send loop variables and WQE for eMPW loop with data inlining. 2421 * Gets the size of pushed descriptors and data to the WQE. 2422 * 2423 * @param txq 2424 * Pointer to TX queue structure. 2425 * @param loc 2426 * Pointer to burst routine local context. 2427 * @param len 2428 * Total size of descriptor/data in bytes. 2429 * @param slen 2430 * Accumulated statistics, data bytes sent. 2431 * @param wqem 2432 * The base WQE for the eMPW/MPW descriptor. 2433 * @param olx 2434 * Configured Tx offloads mask. It is fully defined at 2435 * compile time and may be used for optimization. 2436 * 2437 * @return 2438 * true - packet match with eMPW batch attributes. 2439 * false - no match, eMPW should be restarted. 2440 */ 2441 static __rte_always_inline void 2442 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2443 struct mlx5_txq_local *__rte_restrict loc, 2444 unsigned int len, 2445 unsigned int slen, 2446 struct mlx5_wqe *__rte_restrict wqem, 2447 unsigned int olx __rte_unused) 2448 { 2449 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2450 2451 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2452 #ifdef MLX5_PMD_SOFT_COUNTERS 2453 /* Update sent data bytes counter. */ 2454 txq->stats.obytes += slen; 2455 #else 2456 (void)slen; 2457 #endif 2458 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2459 /* 2460 * If the legacy MPW session contains the inline packets 2461 * we should set the only inline data segment length 2462 * and align the total length to the segment size. 2463 */ 2464 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2465 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2466 MLX5_ETH_WQE_DATA_INLINE); 2467 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2468 } else { 2469 /* 2470 * The session is not legacy MPW or contains the 2471 * data buffer pointer segments. 2472 */ 2473 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2474 len = len / MLX5_WSEG_SIZE + 2; 2475 } 2476 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2477 txq->wqe_ci += (len + 3) / 4; 2478 loc->wqe_free -= (len + 3) / 4; 2479 loc->wqe_last = wqem; 2480 } 2481 2482 /** 2483 * The set of Tx burst functions for single-segment packets without TSO 2484 * and with Multi-Packet Writing feature support. 2485 * Supports all types of Tx offloads, except multi-packets and TSO. 2486 * 2487 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2488 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2489 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2490 * placed in WQE. 2491 * 2492 * Functions stop sending if it encounters the multi-segment packet or packet 2493 * with TSO requested. 2494 * 2495 * The routines are responsible for storing processed mbuf into elts ring buffer 2496 * and update elts_head if inlining offload is requested. Otherwise the copying 2497 * mbufs to elts can be postponed and completed at the end of burst routine. 2498 * 2499 * @param txq 2500 * Pointer to TX queue structure. 2501 * @param[in] pkts 2502 * Packets to transmit. 2503 * @param pkts_n 2504 * Number of packets in array. 2505 * @param loc 2506 * Pointer to burst routine local context. 2507 * @param olx 2508 * Configured Tx offloads mask. It is fully defined at 2509 * compile time and may be used for optimization. 2510 * 2511 * @return 2512 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2513 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2514 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2515 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2516 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2517 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2518 * 2519 * Local context variables updated. 2520 * 2521 * 2522 * The routine sends packets with MLX5_OPCODE_EMPW 2523 * without inlining, this is dedicated optimized branch. 2524 * No VLAN insertion is supported. 2525 */ 2526 static __rte_always_inline enum mlx5_txcmp_code 2527 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2528 struct rte_mbuf **__rte_restrict pkts, 2529 unsigned int pkts_n, 2530 struct mlx5_txq_local *__rte_restrict loc, 2531 unsigned int olx) 2532 { 2533 /* 2534 * Subroutine is the part of mlx5_tx_burst_single() and sends 2535 * single-segment packet with eMPW opcode without data inlining. 2536 */ 2537 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2538 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2539 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2540 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2541 pkts += loc->pkts_sent + 1; 2542 pkts_n -= loc->pkts_sent; 2543 for (;;) { 2544 struct mlx5_wqe_dseg *__rte_restrict dseg; 2545 struct mlx5_wqe_eseg *__rte_restrict eseg; 2546 enum mlx5_txcmp_code ret; 2547 unsigned int part, loop; 2548 unsigned int slen = 0; 2549 2550 next_empw: 2551 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2552 if (MLX5_TXOFF_CONFIG(TXPP)) { 2553 enum mlx5_txcmp_code wret; 2554 2555 /* Generate WAIT for scheduling if requested. */ 2556 wret = mlx5_tx_schedule_send(txq, loc, olx); 2557 if (wret == MLX5_TXCMP_CODE_EXIT) 2558 return MLX5_TXCMP_CODE_EXIT; 2559 if (wret == MLX5_TXCMP_CODE_ERROR) 2560 return MLX5_TXCMP_CODE_ERROR; 2561 } 2562 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2563 MLX5_MPW_MAX_PACKETS : 2564 MLX5_EMPW_MAX_PACKETS); 2565 if (unlikely(loc->elts_free < part)) { 2566 /* We have no enough elts to save all mbufs. */ 2567 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2568 return MLX5_TXCMP_CODE_EXIT; 2569 /* But we still able to send at least minimal eMPW. */ 2570 part = loc->elts_free; 2571 } 2572 /* Check whether we have enough WQEs */ 2573 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2574 if (unlikely(loc->wqe_free < 2575 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2576 return MLX5_TXCMP_CODE_EXIT; 2577 part = (loc->wqe_free * 4) - 2; 2578 } 2579 if (likely(part > 1)) 2580 rte_prefetch0(*pkts); 2581 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2582 /* 2583 * Build eMPW title WQEBB: 2584 * - Control Segment, eMPW opcode 2585 * - Ethernet Segment, no inline 2586 */ 2587 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2588 MLX5_OPCODE_ENHANCED_MPSW, olx); 2589 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2590 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2591 eseg = &loc->wqe_last->eseg; 2592 dseg = &loc->wqe_last->dseg[0]; 2593 loop = part; 2594 /* Store the packet length for legacy MPW. */ 2595 if (MLX5_TXOFF_CONFIG(MPW)) 2596 eseg->mss = rte_cpu_to_be_16 2597 (rte_pktmbuf_data_len(loc->mbuf)); 2598 for (;;) { 2599 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2600 #ifdef MLX5_PMD_SOFT_COUNTERS 2601 /* Update sent data bytes counter. */ 2602 slen += dlen; 2603 #endif 2604 mlx5_tx_dseg_ptr 2605 (txq, loc, dseg, 2606 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2607 dlen, olx); 2608 if (unlikely(--loop == 0)) 2609 break; 2610 loc->mbuf = *pkts++; 2611 if (likely(loop > 1)) 2612 rte_prefetch0(*pkts); 2613 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2614 /* 2615 * Unroll the completion code to avoid 2616 * returning variable value - it results in 2617 * unoptimized sequent checking in caller. 2618 */ 2619 if (ret == MLX5_TXCMP_CODE_MULTI) { 2620 part -= loop; 2621 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2622 if (unlikely(!loc->elts_free || 2623 !loc->wqe_free)) 2624 return MLX5_TXCMP_CODE_EXIT; 2625 return MLX5_TXCMP_CODE_MULTI; 2626 } 2627 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2628 if (ret == MLX5_TXCMP_CODE_TSO) { 2629 part -= loop; 2630 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2631 if (unlikely(!loc->elts_free || 2632 !loc->wqe_free)) 2633 return MLX5_TXCMP_CODE_EXIT; 2634 return MLX5_TXCMP_CODE_TSO; 2635 } 2636 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2637 part -= loop; 2638 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2639 if (unlikely(!loc->elts_free || 2640 !loc->wqe_free)) 2641 return MLX5_TXCMP_CODE_EXIT; 2642 return MLX5_TXCMP_CODE_SINGLE; 2643 } 2644 if (ret != MLX5_TXCMP_CODE_EMPW) { 2645 MLX5_ASSERT(false); 2646 part -= loop; 2647 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2648 return MLX5_TXCMP_CODE_ERROR; 2649 } 2650 /* 2651 * Check whether packet parameters coincide 2652 * within assumed eMPW batch: 2653 * - check sum settings 2654 * - metadata value 2655 * - software parser settings 2656 * - packets length (legacy MPW only) 2657 * - scheduling is not required 2658 */ 2659 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2660 MLX5_ASSERT(loop); 2661 part -= loop; 2662 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2663 if (unlikely(!loc->elts_free || 2664 !loc->wqe_free)) 2665 return MLX5_TXCMP_CODE_EXIT; 2666 pkts_n -= part; 2667 goto next_empw; 2668 } 2669 /* Packet attributes match, continue the same eMPW. */ 2670 ++dseg; 2671 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2672 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2673 } 2674 /* eMPW is built successfully, update loop parameters. */ 2675 MLX5_ASSERT(!loop); 2676 MLX5_ASSERT(pkts_n >= part); 2677 #ifdef MLX5_PMD_SOFT_COUNTERS 2678 /* Update sent data bytes counter. */ 2679 txq->stats.obytes += slen; 2680 #endif 2681 loc->elts_free -= part; 2682 loc->pkts_sent += part; 2683 txq->wqe_ci += (2 + part + 3) / 4; 2684 loc->wqe_free -= (2 + part + 3) / 4; 2685 pkts_n -= part; 2686 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2687 return MLX5_TXCMP_CODE_EXIT; 2688 loc->mbuf = *pkts++; 2689 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2690 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2691 return ret; 2692 /* Continue sending eMPW batches. */ 2693 } 2694 MLX5_ASSERT(false); 2695 } 2696 2697 /** 2698 * The routine sends packets with MLX5_OPCODE_EMPW 2699 * with inlining, optionally supports VLAN insertion. 2700 */ 2701 static __rte_always_inline enum mlx5_txcmp_code 2702 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2703 struct rte_mbuf **__rte_restrict pkts, 2704 unsigned int pkts_n, 2705 struct mlx5_txq_local *__rte_restrict loc, 2706 unsigned int olx) 2707 { 2708 /* 2709 * Subroutine is the part of mlx5_tx_burst_single() and sends 2710 * single-segment packet with eMPW opcode with data inlining. 2711 */ 2712 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2713 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2714 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2715 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2716 pkts += loc->pkts_sent + 1; 2717 pkts_n -= loc->pkts_sent; 2718 for (;;) { 2719 struct mlx5_wqe_dseg *__rte_restrict dseg; 2720 struct mlx5_wqe *__rte_restrict wqem; 2721 enum mlx5_txcmp_code ret; 2722 unsigned int room, part, nlim; 2723 unsigned int slen = 0; 2724 2725 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2726 if (MLX5_TXOFF_CONFIG(TXPP)) { 2727 enum mlx5_txcmp_code wret; 2728 2729 /* Generate WAIT for scheduling if requested. */ 2730 wret = mlx5_tx_schedule_send(txq, loc, olx); 2731 if (wret == MLX5_TXCMP_CODE_EXIT) 2732 return MLX5_TXCMP_CODE_EXIT; 2733 if (wret == MLX5_TXCMP_CODE_ERROR) 2734 return MLX5_TXCMP_CODE_ERROR; 2735 } 2736 /* 2737 * Limits the amount of packets in one WQE 2738 * to improve CQE latency generation. 2739 */ 2740 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2741 MLX5_MPW_INLINE_MAX_PACKETS : 2742 MLX5_EMPW_MAX_PACKETS); 2743 /* Check whether we have minimal amount WQEs */ 2744 if (unlikely(loc->wqe_free < 2745 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2746 return MLX5_TXCMP_CODE_EXIT; 2747 if (likely(pkts_n > 1)) 2748 rte_prefetch0(*pkts); 2749 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2750 /* 2751 * Build eMPW title WQEBB: 2752 * - Control Segment, eMPW opcode, zero DS 2753 * - Ethernet Segment, no inline 2754 */ 2755 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2756 MLX5_OPCODE_ENHANCED_MPSW, olx); 2757 mlx5_tx_eseg_none(txq, loc, wqem, 2758 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2759 dseg = &wqem->dseg[0]; 2760 /* Store the packet length for legacy MPW. */ 2761 if (MLX5_TXOFF_CONFIG(MPW)) 2762 wqem->eseg.mss = rte_cpu_to_be_16 2763 (rte_pktmbuf_data_len(loc->mbuf)); 2764 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2765 loc->wqe_free) * MLX5_WQE_SIZE - 2766 MLX5_WQE_CSEG_SIZE - 2767 MLX5_WQE_ESEG_SIZE; 2768 /* Limit the room for legacy MPW sessions for performance. */ 2769 if (MLX5_TXOFF_CONFIG(MPW)) 2770 room = RTE_MIN(room, 2771 RTE_MAX(txq->inlen_empw + 2772 sizeof(dseg->bcount) + 2773 (MLX5_TXOFF_CONFIG(VLAN) ? 2774 sizeof(struct rte_vlan_hdr) : 0), 2775 MLX5_MPW_INLINE_MAX_PACKETS * 2776 MLX5_WQE_DSEG_SIZE)); 2777 /* Build WQE till we have space, packets and resources. */ 2778 part = room; 2779 for (;;) { 2780 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2781 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2782 unsigned int tlen; 2783 2784 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2785 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2786 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2787 /* 2788 * Some Tx offloads may cause an error if packet is not 2789 * long enough, check against assumed minimal length. 2790 */ 2791 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2792 part -= room; 2793 if (unlikely(!part)) 2794 return MLX5_TXCMP_CODE_ERROR; 2795 /* 2796 * We have some successfully built 2797 * packet Data Segments to send. 2798 */ 2799 mlx5_tx_idone_empw(txq, loc, part, 2800 slen, wqem, olx); 2801 return MLX5_TXCMP_CODE_ERROR; 2802 } 2803 /* Inline or not inline - that's the Question. */ 2804 if (dlen > txq->inlen_empw || 2805 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2806 goto pointer_empw; 2807 if (MLX5_TXOFF_CONFIG(MPW)) { 2808 if (dlen > txq->inlen_send) 2809 goto pointer_empw; 2810 tlen = dlen; 2811 if (part == room) { 2812 /* Open new inline MPW session. */ 2813 tlen += sizeof(dseg->bcount); 2814 dseg->bcount = RTE_BE32(0); 2815 dseg = RTE_PTR_ADD 2816 (dseg, sizeof(dseg->bcount)); 2817 } else { 2818 /* 2819 * No pointer and inline descriptor 2820 * intermix for legacy MPW sessions. 2821 */ 2822 if (wqem->dseg[0].bcount) 2823 break; 2824 } 2825 } else { 2826 tlen = sizeof(dseg->bcount) + dlen; 2827 } 2828 /* Inline entire packet, optional VLAN insertion. */ 2829 if (MLX5_TXOFF_CONFIG(VLAN) && 2830 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2831 /* 2832 * The packet length must be checked in 2833 * mlx5_tx_able_to_empw() and packet 2834 * fits into inline length guaranteed. 2835 */ 2836 MLX5_ASSERT((dlen + 2837 sizeof(struct rte_vlan_hdr)) <= 2838 txq->inlen_empw); 2839 tlen += sizeof(struct rte_vlan_hdr); 2840 if (room < tlen) 2841 break; 2842 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2843 dptr, dlen, olx); 2844 #ifdef MLX5_PMD_SOFT_COUNTERS 2845 /* Update sent data bytes counter. */ 2846 slen += sizeof(struct rte_vlan_hdr); 2847 #endif 2848 } else { 2849 if (room < tlen) 2850 break; 2851 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2852 dptr, dlen, olx); 2853 } 2854 if (!MLX5_TXOFF_CONFIG(MPW)) 2855 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2856 MLX5_ASSERT(room >= tlen); 2857 room -= tlen; 2858 /* 2859 * Packet data are completely inline, 2860 * we can try to free the packet. 2861 */ 2862 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2863 /* 2864 * All the packets from the burst beginning 2865 * are inline, we can free mbufs directly 2866 * from the origin array on tx_burst exit(). 2867 */ 2868 loc->mbuf_free++; 2869 goto next_mbuf; 2870 } 2871 /* 2872 * In order no to call rte_pktmbuf_free_seg() here, 2873 * in the most inner loop (that might be very 2874 * expensive) we just save the mbuf in elts. 2875 */ 2876 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2877 loc->elts_free--; 2878 goto next_mbuf; 2879 pointer_empw: 2880 /* 2881 * No pointer and inline descriptor 2882 * intermix for legacy MPW sessions. 2883 */ 2884 if (MLX5_TXOFF_CONFIG(MPW) && 2885 part != room && 2886 wqem->dseg[0].bcount == RTE_BE32(0)) 2887 break; 2888 /* 2889 * Not inlinable VLAN packets are 2890 * proceeded outside of this routine. 2891 */ 2892 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2893 if (MLX5_TXOFF_CONFIG(VLAN)) 2894 MLX5_ASSERT(!(loc->mbuf->ol_flags & 2895 RTE_MBUF_F_TX_VLAN)); 2896 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2897 /* We have to store mbuf in elts.*/ 2898 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2899 loc->elts_free--; 2900 room -= MLX5_WQE_DSEG_SIZE; 2901 /* Ring buffer wraparound is checked at the loop end.*/ 2902 ++dseg; 2903 next_mbuf: 2904 #ifdef MLX5_PMD_SOFT_COUNTERS 2905 /* Update sent data bytes counter. */ 2906 slen += dlen; 2907 #endif 2908 loc->pkts_sent++; 2909 pkts_n--; 2910 if (unlikely(!pkts_n || !loc->elts_free)) { 2911 /* 2912 * We have no resources/packets to 2913 * continue build descriptors. 2914 */ 2915 part -= room; 2916 mlx5_tx_idone_empw(txq, loc, part, 2917 slen, wqem, olx); 2918 return MLX5_TXCMP_CODE_EXIT; 2919 } 2920 loc->mbuf = *pkts++; 2921 if (likely(pkts_n > 1)) 2922 rte_prefetch0(*pkts); 2923 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2924 /* 2925 * Unroll the completion code to avoid 2926 * returning variable value - it results in 2927 * unoptimized sequent checking in caller. 2928 */ 2929 if (ret == MLX5_TXCMP_CODE_MULTI) { 2930 part -= room; 2931 mlx5_tx_idone_empw(txq, loc, part, 2932 slen, wqem, olx); 2933 if (unlikely(!loc->elts_free || 2934 !loc->wqe_free)) 2935 return MLX5_TXCMP_CODE_EXIT; 2936 return MLX5_TXCMP_CODE_MULTI; 2937 } 2938 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2939 if (ret == MLX5_TXCMP_CODE_TSO) { 2940 part -= room; 2941 mlx5_tx_idone_empw(txq, loc, part, 2942 slen, wqem, olx); 2943 if (unlikely(!loc->elts_free || 2944 !loc->wqe_free)) 2945 return MLX5_TXCMP_CODE_EXIT; 2946 return MLX5_TXCMP_CODE_TSO; 2947 } 2948 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2949 part -= room; 2950 mlx5_tx_idone_empw(txq, loc, part, 2951 slen, wqem, olx); 2952 if (unlikely(!loc->elts_free || 2953 !loc->wqe_free)) 2954 return MLX5_TXCMP_CODE_EXIT; 2955 return MLX5_TXCMP_CODE_SINGLE; 2956 } 2957 if (ret != MLX5_TXCMP_CODE_EMPW) { 2958 MLX5_ASSERT(false); 2959 part -= room; 2960 mlx5_tx_idone_empw(txq, loc, part, 2961 slen, wqem, olx); 2962 return MLX5_TXCMP_CODE_ERROR; 2963 } 2964 /* Check if we have minimal room left. */ 2965 nlim--; 2966 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 2967 break; 2968 /* 2969 * Check whether packet parameters coincide 2970 * within assumed eMPW batch: 2971 * - check sum settings 2972 * - metadata value 2973 * - software parser settings 2974 * - packets length (legacy MPW only) 2975 * - scheduling is not required 2976 */ 2977 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 2978 loc, dlen, olx)) 2979 break; 2980 /* Packet attributes match, continue the same eMPW. */ 2981 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2982 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2983 } 2984 /* 2985 * We get here to close an existing eMPW 2986 * session and start the new one. 2987 */ 2988 MLX5_ASSERT(pkts_n); 2989 part -= room; 2990 if (unlikely(!part)) 2991 return MLX5_TXCMP_CODE_EXIT; 2992 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 2993 if (unlikely(!loc->elts_free || 2994 !loc->wqe_free)) 2995 return MLX5_TXCMP_CODE_EXIT; 2996 /* Continue the loop with new eMPW session. */ 2997 } 2998 MLX5_ASSERT(false); 2999 } 3000 3001 /** 3002 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3003 * Data inlining and VLAN insertion are supported. 3004 */ 3005 static __rte_always_inline enum mlx5_txcmp_code 3006 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3007 struct rte_mbuf **__rte_restrict pkts, 3008 unsigned int pkts_n, 3009 struct mlx5_txq_local *__rte_restrict loc, 3010 unsigned int olx) 3011 { 3012 /* 3013 * Subroutine is the part of mlx5_tx_burst_single() 3014 * and sends single-segment packet with SEND opcode. 3015 */ 3016 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3017 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3018 pkts += loc->pkts_sent + 1; 3019 pkts_n -= loc->pkts_sent; 3020 for (;;) { 3021 struct mlx5_wqe *__rte_restrict wqe; 3022 enum mlx5_txcmp_code ret; 3023 3024 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3025 if (MLX5_TXOFF_CONFIG(TXPP)) { 3026 enum mlx5_txcmp_code wret; 3027 3028 /* Generate WAIT for scheduling if requested. */ 3029 wret = mlx5_tx_schedule_send(txq, loc, olx); 3030 if (wret == MLX5_TXCMP_CODE_EXIT) 3031 return MLX5_TXCMP_CODE_EXIT; 3032 if (wret == MLX5_TXCMP_CODE_ERROR) 3033 return MLX5_TXCMP_CODE_ERROR; 3034 } 3035 if (MLX5_TXOFF_CONFIG(INLINE)) { 3036 unsigned int inlen, vlan = 0; 3037 3038 inlen = rte_pktmbuf_data_len(loc->mbuf); 3039 if (MLX5_TXOFF_CONFIG(VLAN) && 3040 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3041 vlan = sizeof(struct rte_vlan_hdr); 3042 inlen += vlan; 3043 } 3044 /* 3045 * If inlining is enabled at configuration time 3046 * the limit must be not less than minimal size. 3047 * Otherwise we would do extra check for data 3048 * size to avoid crashes due to length overflow. 3049 */ 3050 MLX5_ASSERT(txq->inlen_send >= 3051 MLX5_ESEG_MIN_INLINE_SIZE); 3052 if (inlen <= txq->inlen_send) { 3053 unsigned int seg_n, wqe_n; 3054 3055 rte_prefetch0(rte_pktmbuf_mtod 3056 (loc->mbuf, uint8_t *)); 3057 /* Check against minimal length. */ 3058 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3059 return MLX5_TXCMP_CODE_ERROR; 3060 if (loc->mbuf->ol_flags & 3061 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3062 /* 3063 * The hint flag not to inline packet 3064 * data is set. Check whether we can 3065 * follow the hint. 3066 */ 3067 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3068 txq->inlen_mode) || 3069 (MLX5_TXOFF_CONFIG(MPW) && 3070 txq->inlen_mode)) { 3071 if (inlen <= txq->inlen_send) 3072 goto single_inline; 3073 /* 3074 * The hardware requires the 3075 * minimal inline data header. 3076 */ 3077 goto single_min_inline; 3078 } 3079 if (MLX5_TXOFF_CONFIG(VLAN) && 3080 vlan && !txq->vlan_en) { 3081 /* 3082 * We must insert VLAN tag 3083 * by software means. 3084 */ 3085 goto single_part_inline; 3086 } 3087 goto single_no_inline; 3088 } 3089 single_inline: 3090 /* 3091 * Completely inlined packet data WQE: 3092 * - Control Segment, SEND opcode 3093 * - Ethernet Segment, no VLAN insertion 3094 * - Data inlined, VLAN optionally inserted 3095 * - Alignment to MLX5_WSEG_SIZE 3096 * Have to estimate amount of WQEBBs 3097 */ 3098 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3099 MLX5_ESEG_MIN_INLINE_SIZE + 3100 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3101 /* Check if there are enough WQEBBs. */ 3102 wqe_n = (seg_n + 3) / 4; 3103 if (wqe_n > loc->wqe_free) 3104 return MLX5_TXCMP_CODE_EXIT; 3105 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3106 loc->wqe_last = wqe; 3107 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3108 MLX5_OPCODE_SEND, olx); 3109 mlx5_tx_eseg_data(txq, loc, wqe, 3110 vlan, inlen, 0, olx); 3111 txq->wqe_ci += wqe_n; 3112 loc->wqe_free -= wqe_n; 3113 /* 3114 * Packet data are completely inlined, 3115 * free the packet immediately. 3116 */ 3117 rte_pktmbuf_free_seg(loc->mbuf); 3118 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3119 MLX5_TXOFF_CONFIG(MPW)) && 3120 txq->inlen_mode) { 3121 /* 3122 * If minimal inlining is requested the eMPW 3123 * feature should be disabled due to data is 3124 * inlined into Ethernet Segment, which can 3125 * not contain inlined data for eMPW due to 3126 * segment shared for all packets. 3127 */ 3128 struct mlx5_wqe_dseg *__rte_restrict dseg; 3129 unsigned int ds; 3130 uint8_t *dptr; 3131 3132 /* 3133 * The inline-mode settings require 3134 * to inline the specified amount of 3135 * data bytes to the Ethernet Segment. 3136 * We should check the free space in 3137 * WQE ring buffer to inline partially. 3138 */ 3139 single_min_inline: 3140 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3141 MLX5_ASSERT(inlen > txq->inlen_mode); 3142 MLX5_ASSERT(txq->inlen_mode >= 3143 MLX5_ESEG_MIN_INLINE_SIZE); 3144 /* 3145 * Check whether there are enough free WQEBBs: 3146 * - Control Segment 3147 * - Ethernet Segment 3148 * - First Segment of inlined Ethernet data 3149 * - ... data continued ... 3150 * - Finishing Data Segment of pointer type 3151 */ 3152 ds = (MLX5_WQE_CSEG_SIZE + 3153 MLX5_WQE_ESEG_SIZE + 3154 MLX5_WQE_DSEG_SIZE + 3155 txq->inlen_mode - 3156 MLX5_ESEG_MIN_INLINE_SIZE + 3157 MLX5_WQE_DSEG_SIZE + 3158 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3159 if (loc->wqe_free < ((ds + 3) / 4)) 3160 return MLX5_TXCMP_CODE_EXIT; 3161 /* 3162 * Build the ordinary SEND WQE: 3163 * - Control Segment 3164 * - Ethernet Segment, inline inlen_mode bytes 3165 * - Data Segment of pointer type 3166 */ 3167 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3168 loc->wqe_last = wqe; 3169 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3170 MLX5_OPCODE_SEND, olx); 3171 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3172 txq->inlen_mode, 3173 0, olx); 3174 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3175 txq->inlen_mode - vlan; 3176 inlen -= txq->inlen_mode; 3177 mlx5_tx_dseg_ptr(txq, loc, dseg, 3178 dptr, inlen, olx); 3179 /* 3180 * WQE is built, update the loop parameters 3181 * and got to the next packet. 3182 */ 3183 txq->wqe_ci += (ds + 3) / 4; 3184 loc->wqe_free -= (ds + 3) / 4; 3185 /* We have to store mbuf in elts.*/ 3186 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3187 txq->elts[txq->elts_head++ & txq->elts_m] = 3188 loc->mbuf; 3189 --loc->elts_free; 3190 } else { 3191 uint8_t *dptr; 3192 unsigned int dlen; 3193 3194 /* 3195 * Partially inlined packet data WQE, we have 3196 * some space in title WQEBB, we can fill it 3197 * with some packet data. It takes one WQEBB, 3198 * it is available, no extra space check: 3199 * - Control Segment, SEND opcode 3200 * - Ethernet Segment, no VLAN insertion 3201 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3202 * - Data Segment, pointer type 3203 * 3204 * We also get here if VLAN insertion is not 3205 * supported by HW, the inline is enabled. 3206 */ 3207 single_part_inline: 3208 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3209 loc->wqe_last = wqe; 3210 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3211 MLX5_OPCODE_SEND, olx); 3212 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3213 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3214 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3215 /* 3216 * The length check is performed above, by 3217 * comparing with txq->inlen_send. We should 3218 * not get overflow here. 3219 */ 3220 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3221 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3222 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3223 dptr, dlen, olx); 3224 ++txq->wqe_ci; 3225 --loc->wqe_free; 3226 /* We have to store mbuf in elts.*/ 3227 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3228 txq->elts[txq->elts_head++ & txq->elts_m] = 3229 loc->mbuf; 3230 --loc->elts_free; 3231 } 3232 #ifdef MLX5_PMD_SOFT_COUNTERS 3233 /* Update sent data bytes counter. */ 3234 txq->stats.obytes += vlan + 3235 rte_pktmbuf_data_len(loc->mbuf); 3236 #endif 3237 } else { 3238 /* 3239 * No inline at all, it means the CPU cycles saving 3240 * is prioritized at configuration, we should not 3241 * copy any packet data to WQE. 3242 * 3243 * SEND WQE, one WQEBB: 3244 * - Control Segment, SEND opcode 3245 * - Ethernet Segment, optional VLAN, no inline 3246 * - Data Segment, pointer type 3247 */ 3248 single_no_inline: 3249 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3250 loc->wqe_last = wqe; 3251 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3252 MLX5_OPCODE_SEND, olx); 3253 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3254 mlx5_tx_dseg_ptr 3255 (txq, loc, &wqe->dseg[0], 3256 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3257 rte_pktmbuf_data_len(loc->mbuf), olx); 3258 ++txq->wqe_ci; 3259 --loc->wqe_free; 3260 /* 3261 * We should not store mbuf pointer in elts 3262 * if no inlining is configured, this is done 3263 * by calling routine in a batch copy. 3264 */ 3265 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3266 --loc->elts_free; 3267 #ifdef MLX5_PMD_SOFT_COUNTERS 3268 /* Update sent data bytes counter. */ 3269 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3270 if (MLX5_TXOFF_CONFIG(VLAN) && 3271 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3272 txq->stats.obytes += 3273 sizeof(struct rte_vlan_hdr); 3274 #endif 3275 } 3276 ++loc->pkts_sent; 3277 --pkts_n; 3278 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3279 return MLX5_TXCMP_CODE_EXIT; 3280 loc->mbuf = *pkts++; 3281 if (pkts_n > 1) 3282 rte_prefetch0(*pkts); 3283 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3284 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3285 return ret; 3286 } 3287 MLX5_ASSERT(false); 3288 } 3289 3290 static __rte_always_inline enum mlx5_txcmp_code 3291 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3292 struct rte_mbuf **__rte_restrict pkts, 3293 unsigned int pkts_n, 3294 struct mlx5_txq_local *__rte_restrict loc, 3295 unsigned int olx) 3296 { 3297 enum mlx5_txcmp_code ret; 3298 3299 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3300 if (ret == MLX5_TXCMP_CODE_SINGLE) 3301 goto ordinary_send; 3302 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3303 for (;;) { 3304 /* Optimize for inline/no inline eMPW send. */ 3305 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3306 mlx5_tx_burst_empw_inline 3307 (txq, pkts, pkts_n, loc, olx) : 3308 mlx5_tx_burst_empw_simple 3309 (txq, pkts, pkts_n, loc, olx); 3310 if (ret != MLX5_TXCMP_CODE_SINGLE) 3311 return ret; 3312 /* The resources to send one packet should remain. */ 3313 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3314 ordinary_send: 3315 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3316 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3317 if (ret != MLX5_TXCMP_CODE_EMPW) 3318 return ret; 3319 /* The resources to send one packet should remain. */ 3320 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3321 } 3322 } 3323 3324 /** 3325 * DPDK Tx callback template. This is configured template used to generate 3326 * routines optimized for specified offload setup. 3327 * One of this generated functions is chosen at SQ configuration time. 3328 * 3329 * @param txq 3330 * Generic pointer to TX queue structure. 3331 * @param[in] pkts 3332 * Packets to transmit. 3333 * @param pkts_n 3334 * Number of packets in array. 3335 * @param olx 3336 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3337 * values. Should be static to take compile time static configuration 3338 * advantages. 3339 * 3340 * @return 3341 * Number of packets successfully transmitted (<= pkts_n). 3342 */ 3343 static __rte_always_inline uint16_t 3344 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3345 struct rte_mbuf **__rte_restrict pkts, 3346 uint16_t pkts_n, 3347 unsigned int olx) 3348 { 3349 struct mlx5_txq_local loc; 3350 enum mlx5_txcmp_code ret; 3351 unsigned int part; 3352 3353 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3354 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3355 if (unlikely(!pkts_n)) 3356 return 0; 3357 if (MLX5_TXOFF_CONFIG(INLINE)) 3358 loc.mbuf_free = 0; 3359 loc.pkts_sent = 0; 3360 loc.pkts_copy = 0; 3361 loc.wqe_last = NULL; 3362 3363 send_loop: 3364 loc.pkts_loop = loc.pkts_sent; 3365 /* 3366 * Check if there are some CQEs, if any: 3367 * - process an encountered errors 3368 * - process the completed WQEs 3369 * - free related mbufs 3370 * - doorbell the NIC about processed CQEs 3371 */ 3372 rte_prefetch0(*(pkts + loc.pkts_sent)); 3373 mlx5_tx_handle_completion(txq, olx); 3374 /* 3375 * Calculate the number of available resources - elts and WQEs. 3376 * There are two possible different scenarios: 3377 * - no data inlining into WQEs, one WQEBB may contains up to 3378 * four packets, in this case elts become scarce resource 3379 * - data inlining into WQEs, one packet may require multiple 3380 * WQEBBs, the WQEs become the limiting factor. 3381 */ 3382 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3383 loc.elts_free = txq->elts_s - 3384 (uint16_t)(txq->elts_head - txq->elts_tail); 3385 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3386 loc.wqe_free = txq->wqe_s - 3387 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3388 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3389 goto burst_exit; 3390 for (;;) { 3391 /* 3392 * Fetch the packet from array. Usually this is the first 3393 * packet in series of multi/single segment packets. 3394 */ 3395 loc.mbuf = *(pkts + loc.pkts_sent); 3396 /* Dedicated branch for multi-segment packets. */ 3397 if (MLX5_TXOFF_CONFIG(MULTI) && 3398 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3399 /* 3400 * Multi-segment packet encountered. 3401 * Hardware is able to process it only 3402 * with SEND/TSO opcodes, one packet 3403 * per WQE, do it in dedicated routine. 3404 */ 3405 enter_send_multi: 3406 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3407 part = loc.pkts_sent - loc.pkts_copy; 3408 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3409 /* 3410 * There are some single-segment mbufs not 3411 * stored in elts. The mbufs must be in the 3412 * same order as WQEs, so we must copy the 3413 * mbufs to elts here, before the coming 3414 * multi-segment packet mbufs is appended. 3415 */ 3416 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3417 part, olx); 3418 loc.pkts_copy = loc.pkts_sent; 3419 } 3420 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3421 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3422 if (!MLX5_TXOFF_CONFIG(INLINE)) 3423 loc.pkts_copy = loc.pkts_sent; 3424 /* 3425 * These returned code checks are supposed 3426 * to be optimized out due to routine inlining. 3427 */ 3428 if (ret == MLX5_TXCMP_CODE_EXIT) { 3429 /* 3430 * The routine returns this code when 3431 * all packets are sent or there is no 3432 * enough resources to complete request. 3433 */ 3434 break; 3435 } 3436 if (ret == MLX5_TXCMP_CODE_ERROR) { 3437 /* 3438 * The routine returns this code when some error 3439 * in the incoming packets format occurred. 3440 */ 3441 txq->stats.oerrors++; 3442 break; 3443 } 3444 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3445 /* 3446 * The single-segment packet was encountered 3447 * in the array, try to send it with the 3448 * best optimized way, possible engaging eMPW. 3449 */ 3450 goto enter_send_single; 3451 } 3452 if (MLX5_TXOFF_CONFIG(TSO) && 3453 ret == MLX5_TXCMP_CODE_TSO) { 3454 /* 3455 * The single-segment TSO packet was 3456 * encountered in the array. 3457 */ 3458 goto enter_send_tso; 3459 } 3460 /* We must not get here. Something is going wrong. */ 3461 MLX5_ASSERT(false); 3462 txq->stats.oerrors++; 3463 break; 3464 } 3465 /* Dedicated branch for single-segment TSO packets. */ 3466 if (MLX5_TXOFF_CONFIG(TSO) && 3467 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3468 /* 3469 * TSO might require special way for inlining 3470 * (dedicated parameters) and is sent with 3471 * MLX5_OPCODE_TSO opcode only, provide this 3472 * in dedicated branch. 3473 */ 3474 enter_send_tso: 3475 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3476 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3477 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3478 /* 3479 * These returned code checks are supposed 3480 * to be optimized out due to routine inlining. 3481 */ 3482 if (ret == MLX5_TXCMP_CODE_EXIT) 3483 break; 3484 if (ret == MLX5_TXCMP_CODE_ERROR) { 3485 txq->stats.oerrors++; 3486 break; 3487 } 3488 if (ret == MLX5_TXCMP_CODE_SINGLE) 3489 goto enter_send_single; 3490 if (MLX5_TXOFF_CONFIG(MULTI) && 3491 ret == MLX5_TXCMP_CODE_MULTI) { 3492 /* 3493 * The multi-segment packet was 3494 * encountered in the array. 3495 */ 3496 goto enter_send_multi; 3497 } 3498 /* We must not get here. Something is going wrong. */ 3499 MLX5_ASSERT(false); 3500 txq->stats.oerrors++; 3501 break; 3502 } 3503 /* 3504 * The dedicated branch for the single-segment packets 3505 * without TSO. Often these ones can be sent using 3506 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3507 * The routine builds the WQEs till it encounters 3508 * the TSO or multi-segment packet (in case if these 3509 * offloads are requested at SQ configuration time). 3510 */ 3511 enter_send_single: 3512 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3513 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3514 /* 3515 * These returned code checks are supposed 3516 * to be optimized out due to routine inlining. 3517 */ 3518 if (ret == MLX5_TXCMP_CODE_EXIT) 3519 break; 3520 if (ret == MLX5_TXCMP_CODE_ERROR) { 3521 txq->stats.oerrors++; 3522 break; 3523 } 3524 if (MLX5_TXOFF_CONFIG(MULTI) && 3525 ret == MLX5_TXCMP_CODE_MULTI) { 3526 /* 3527 * The multi-segment packet was 3528 * encountered in the array. 3529 */ 3530 goto enter_send_multi; 3531 } 3532 if (MLX5_TXOFF_CONFIG(TSO) && 3533 ret == MLX5_TXCMP_CODE_TSO) { 3534 /* 3535 * The single-segment TSO packet was 3536 * encountered in the array. 3537 */ 3538 goto enter_send_tso; 3539 } 3540 /* We must not get here. Something is going wrong. */ 3541 MLX5_ASSERT(false); 3542 txq->stats.oerrors++; 3543 break; 3544 } 3545 /* 3546 * Main Tx loop is completed, do the rest: 3547 * - set completion request if thresholds are reached 3548 * - doorbell the hardware 3549 * - copy the rest of mbufs to elts (if any) 3550 */ 3551 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3552 loc.pkts_sent >= loc.pkts_copy); 3553 /* Take a shortcut if nothing is sent. */ 3554 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3555 goto burst_exit; 3556 /* Request CQE generation if limits are reached. */ 3557 mlx5_tx_request_completion(txq, &loc, olx); 3558 /* 3559 * Ring QP doorbell immediately after WQE building completion 3560 * to improve latencies. The pure software related data treatment 3561 * can be completed after doorbell. Tx CQEs for this SQ are 3562 * processed in this thread only by the polling. 3563 * 3564 * The rdma core library can map doorbell register in two ways, 3565 * depending on the environment variable "MLX5_SHUT_UP_BF": 3566 * 3567 * - as regular cached memory, the variable is either missing or 3568 * set to zero. This type of mapping may cause the significant 3569 * doorbell register writing latency and requires explicit memory 3570 * write barrier to mitigate this issue and prevent write combining. 3571 * 3572 * - as non-cached memory, the variable is present and set to not "0" 3573 * value. This type of mapping may cause performance impact under 3574 * heavy loading conditions but the explicit write memory barrier is 3575 * not required and it may improve core performance. 3576 * 3577 * - the legacy behaviour (prior 19.08 release) was to use some 3578 * heuristics to decide whether write memory barrier should 3579 * be performed. This behavior is supported with specifying 3580 * tx_db_nc=2, write barrier is skipped if application provides 3581 * the full recommended burst of packets, it supposes the next 3582 * packets are coming and the write barrier will be issued on 3583 * the next burst (after descriptor writing, at least). 3584 */ 3585 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3586 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3587 txq->qp_db, !txq->db_nc && 3588 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3589 /* Not all of the mbufs may be stored into elts yet. */ 3590 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3591 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3592 /* 3593 * There are some single-segment mbufs not stored in elts. 3594 * It can be only if the last packet was single-segment. 3595 * The copying is gathered into one place due to it is 3596 * a good opportunity to optimize that with SIMD. 3597 * Unfortunately if inlining is enabled the gaps in pointer 3598 * array may happen due to early freeing of the inlined mbufs. 3599 */ 3600 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3601 loc.pkts_copy = loc.pkts_sent; 3602 } 3603 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3604 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3605 if (pkts_n > loc.pkts_sent) { 3606 /* 3607 * If burst size is large there might be no enough CQE 3608 * fetched from completion queue and no enough resources 3609 * freed to send all the packets. 3610 */ 3611 goto send_loop; 3612 } 3613 burst_exit: 3614 #ifdef MLX5_PMD_SOFT_COUNTERS 3615 /* Increment sent packets counter. */ 3616 txq->stats.opackets += loc.pkts_sent; 3617 #endif 3618 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3619 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3620 return loc.pkts_sent; 3621 } 3622 3623 #endif /* RTE_PMD_MLX5_TX_H_ */ 3624