1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common.h> 18 #include <mlx5_common_mr.h> 19 20 #include "mlx5.h" 21 #include "mlx5_autoconf.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t rt_timestamp:1; /* Realtime timestamp format. */ 142 uint16_t wait_on_time:1; /* WQE with timestamp is supported. */ 143 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 144 uint16_t inlen_send; /* Ordinary send data inline size. */ 145 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 146 uint16_t inlen_mode; /* Minimal data length to inline. */ 147 uint32_t qp_num_8s; /* QP number shifted by 8. */ 148 uint64_t offloads; /* Offloads for Tx Queue. */ 149 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 150 struct mlx5_wqe *wqes; /* Work queue. */ 151 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 152 #ifdef RTE_LIBRTE_MLX5_DEBUG 153 uint32_t *fcqs; /* Free completion queue (debug extended). */ 154 #else 155 uint16_t *fcqs; /* Free completion queue. */ 156 #endif 157 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 158 volatile uint32_t *qp_db; /* Work queue doorbell. */ 159 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 160 uint16_t port_id; /* Port ID of device. */ 161 uint16_t idx; /* Queue index. */ 162 uint64_t rt_timemask; /* Scheduling timestamp mask. */ 163 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 164 int32_t ts_offset; /* Timestamp field dynamic offset. */ 165 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 166 struct mlx5_txq_stats stats; /* TX queue counters. */ 167 struct mlx5_uar_data uar_data; 168 struct rte_mbuf *elts[0]; 169 /* Storage for queued packets, must be the last field. */ 170 } __rte_cache_aligned; 171 172 enum mlx5_txq_type { 173 MLX5_TXQ_TYPE_STANDARD, /* Standard Tx queue. */ 174 MLX5_TXQ_TYPE_HAIRPIN, /* Hairpin Tx queue. */ 175 }; 176 177 /* TX queue control descriptor. */ 178 struct mlx5_txq_ctrl { 179 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 180 uint32_t refcnt; /* Reference counter. */ 181 unsigned int socket; /* CPU socket ID for allocations. */ 182 enum mlx5_txq_type type; /* The txq ctrl type. */ 183 unsigned int max_inline_data; /* Max inline data. */ 184 unsigned int max_tso_header; /* Max TSO header size. */ 185 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 186 struct mlx5_priv *priv; /* Back pointer to private data. */ 187 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 188 uint16_t dump_file_n; /* Number of dump files. */ 189 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 190 uint32_t hairpin_status; /* Hairpin binding status. */ 191 struct mlx5_txq_data txq; /* Data path structure. */ 192 /* Must be the last field in the structure, contains elts[]. */ 193 }; 194 195 /* mlx5_txq.c */ 196 197 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 198 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 199 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 200 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 201 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 202 unsigned int socket, const struct rte_eth_txconf *conf); 203 int mlx5_tx_hairpin_queue_setup 204 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 205 const struct rte_eth_hairpin_conf *hairpin_conf); 206 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 207 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 208 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 209 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 210 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 211 uint16_t desc, unsigned int socket, 212 const struct rte_eth_txconf *conf); 213 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 214 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 215 const struct rte_eth_hairpin_conf *hairpin_conf); 216 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 217 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 218 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 219 int mlx5_txq_verify(struct rte_eth_dev *dev); 220 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 221 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 222 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 223 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 224 225 /* mlx5_tx.c */ 226 227 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 228 unsigned int olx __rte_unused); 229 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 230 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 231 struct rte_eth_txq_info *qinfo); 232 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 233 struct rte_eth_burst_mode *mode); 234 235 /* mlx5_tx_empw.c */ 236 237 MLX5_TXOFF_PRE_DECL(full_empw); 238 MLX5_TXOFF_PRE_DECL(none_empw); 239 MLX5_TXOFF_PRE_DECL(md_empw); 240 MLX5_TXOFF_PRE_DECL(mt_empw); 241 MLX5_TXOFF_PRE_DECL(mtsc_empw); 242 MLX5_TXOFF_PRE_DECL(mti_empw); 243 MLX5_TXOFF_PRE_DECL(mtv_empw); 244 MLX5_TXOFF_PRE_DECL(mtiv_empw); 245 MLX5_TXOFF_PRE_DECL(sc_empw); 246 MLX5_TXOFF_PRE_DECL(sci_empw); 247 MLX5_TXOFF_PRE_DECL(scv_empw); 248 MLX5_TXOFF_PRE_DECL(sciv_empw); 249 MLX5_TXOFF_PRE_DECL(i_empw); 250 MLX5_TXOFF_PRE_DECL(v_empw); 251 MLX5_TXOFF_PRE_DECL(iv_empw); 252 253 /* mlx5_tx_nompw.c */ 254 255 MLX5_TXOFF_PRE_DECL(full); 256 MLX5_TXOFF_PRE_DECL(none); 257 MLX5_TXOFF_PRE_DECL(md); 258 MLX5_TXOFF_PRE_DECL(mt); 259 MLX5_TXOFF_PRE_DECL(mtsc); 260 MLX5_TXOFF_PRE_DECL(mti); 261 MLX5_TXOFF_PRE_DECL(mtv); 262 MLX5_TXOFF_PRE_DECL(mtiv); 263 MLX5_TXOFF_PRE_DECL(sc); 264 MLX5_TXOFF_PRE_DECL(sci); 265 MLX5_TXOFF_PRE_DECL(scv); 266 MLX5_TXOFF_PRE_DECL(sciv); 267 MLX5_TXOFF_PRE_DECL(i); 268 MLX5_TXOFF_PRE_DECL(v); 269 MLX5_TXOFF_PRE_DECL(iv); 270 271 /* mlx5_tx_txpp.c */ 272 273 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 274 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 275 MLX5_TXOFF_PRE_DECL(full_ts); 276 MLX5_TXOFF_PRE_DECL(full_ts_noi); 277 MLX5_TXOFF_PRE_DECL(none_ts); 278 MLX5_TXOFF_PRE_DECL(mdi_ts); 279 MLX5_TXOFF_PRE_DECL(mti_ts); 280 MLX5_TXOFF_PRE_DECL(mtiv_ts); 281 282 /* mlx5_tx_mpw.c */ 283 284 MLX5_TXOFF_PRE_DECL(none_mpw); 285 MLX5_TXOFF_PRE_DECL(mci_mpw); 286 MLX5_TXOFF_PRE_DECL(mc_mpw); 287 MLX5_TXOFF_PRE_DECL(i_mpw); 288 289 static __rte_always_inline struct mlx5_uar_data * 290 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 291 { 292 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 293 } 294 295 /** 296 * Ring TX queue doorbell and flush the update by write memory barrier. 297 * 298 * @param txq 299 * Pointer to TX queue structure. 300 * @param wqe 301 * Pointer to the last WQE posted in the NIC. 302 */ 303 static __rte_always_inline void 304 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 305 { 306 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 307 txq->wqe_ci, txq->qp_db, 1); 308 } 309 310 /** 311 * Convert timestamp from mbuf format to linear counter 312 * of Clock Queue completions (24 bits). 313 * 314 * @param sh 315 * Pointer to the device shared context to fetch Tx 316 * packet pacing timestamp and parameters. 317 * @param ts 318 * Timestamp from mbuf to convert. 319 * @return 320 * positive or zero value - completion ID to wait. 321 * negative value - conversion error. 322 */ 323 static __rte_always_inline int32_t 324 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 325 { 326 uint64_t ts, ci; 327 uint32_t tick; 328 329 do { 330 /* 331 * Read atomically two uint64_t fields and compare lsb bits. 332 * It there is no match - the timestamp was updated in 333 * the service thread, data should be re-read. 334 */ 335 rte_compiler_barrier(); 336 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 337 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 338 rte_compiler_barrier(); 339 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 340 break; 341 } while (true); 342 /* Perform the skew correction, positive value to send earlier. */ 343 mts -= sh->txpp.skew; 344 mts -= ts; 345 if (unlikely(mts >= UINT64_MAX / 2)) { 346 /* We have negative integer, mts is in the past. */ 347 __atomic_fetch_add(&sh->txpp.err_ts_past, 348 1, __ATOMIC_RELAXED); 349 return -1; 350 } 351 tick = sh->txpp.tick; 352 MLX5_ASSERT(tick); 353 /* Convert delta to completions, round up. */ 354 mts = (mts + tick - 1) / tick; 355 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 356 /* We have mts is too distant future. */ 357 __atomic_fetch_add(&sh->txpp.err_ts_future, 358 1, __ATOMIC_RELAXED); 359 return -1; 360 } 361 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 362 ci += mts; 363 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 364 return ci; 365 } 366 367 /** 368 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 369 * Flags must be preliminary initialized to zero. 370 * 371 * @param loc 372 * Pointer to burst routine local context. 373 * @param swp_flags 374 * Pointer to store Software Parser flags. 375 * @param olx 376 * Configured Tx offloads mask. It is fully defined at 377 * compile time and may be used for optimization. 378 * 379 * @return 380 * Software Parser offsets packed in dword. 381 * Software Parser flags are set by pointer. 382 */ 383 static __rte_always_inline uint32_t 384 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 385 uint8_t *swp_flags, 386 unsigned int olx) 387 { 388 uint64_t ol, tunnel; 389 unsigned int idx, off; 390 uint32_t set; 391 392 if (!MLX5_TXOFF_CONFIG(SWP)) 393 return 0; 394 ol = loc->mbuf->ol_flags; 395 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 396 /* 397 * Check whether Software Parser is required. 398 * Only customized tunnels may ask for. 399 */ 400 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 401 return 0; 402 /* 403 * The index should have: 404 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 405 * bit[4] = RTE_MBUF_F_TX_IPV6 406 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 407 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 408 */ 409 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 410 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 411 *swp_flags = mlx5_swp_types_table[idx]; 412 /* 413 * Set offsets for SW parser. Since ConnectX-5, SW parser just 414 * complements HW parser. SW parser starts to engage only if HW parser 415 * can't reach a header. For the older devices, HW parser will not kick 416 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 417 * should be set regardless of HW offload. 418 */ 419 off = loc->mbuf->outer_l2_len; 420 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 421 off += sizeof(struct rte_vlan_hdr); 422 set = (off >> 1) << 8; /* Outer L3 offset. */ 423 off += loc->mbuf->outer_l3_len; 424 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 425 set |= off >> 1; /* Outer L4 offset. */ 426 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 427 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 428 off += loc->mbuf->l2_len; 429 set |= (off >> 1) << 24; /* Inner L3 offset. */ 430 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 431 csum == RTE_MBUF_F_TX_UDP_CKSUM || 432 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 433 off += loc->mbuf->l3_len; 434 set |= (off >> 1) << 16; /* Inner L4 offset. */ 435 } 436 } 437 set = rte_cpu_to_le_32(set); 438 return set; 439 } 440 441 /** 442 * Convert the Checksum offloads to Verbs. 443 * 444 * @param buf 445 * Pointer to the mbuf. 446 * 447 * @return 448 * Converted checksum flags. 449 */ 450 static __rte_always_inline uint8_t 451 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 452 { 453 uint32_t idx; 454 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 455 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 456 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 457 458 /* 459 * The index should have: 460 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 461 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 462 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 463 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 464 * bit[9] = tunnel 465 */ 466 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 467 return mlx5_cksum_table[idx]; 468 } 469 470 /** 471 * Free the mbufs from the linear array of pointers. 472 * 473 * @param txq 474 * Pointer to Tx queue structure. 475 * @param pkts 476 * Pointer to array of packets to be free. 477 * @param pkts_n 478 * Number of packets to be freed. 479 * @param olx 480 * Configured Tx offloads mask. It is fully defined at 481 * compile time and may be used for optimization. 482 */ 483 static __rte_always_inline void 484 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 485 struct rte_mbuf **__rte_restrict pkts, 486 unsigned int pkts_n, 487 unsigned int olx __rte_unused) 488 { 489 struct rte_mempool *pool = NULL; 490 struct rte_mbuf **p_free = NULL; 491 struct rte_mbuf *mbuf; 492 unsigned int n_free = 0; 493 494 /* 495 * The implemented algorithm eliminates 496 * copying pointers to temporary array 497 * for rte_mempool_put_bulk() calls. 498 */ 499 MLX5_ASSERT(pkts); 500 MLX5_ASSERT(pkts_n); 501 /* 502 * Free mbufs directly to the pool in bulk 503 * if fast free offload is engaged 504 */ 505 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 506 mbuf = *pkts; 507 pool = mbuf->pool; 508 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 509 return; 510 } 511 for (;;) { 512 for (;;) { 513 /* 514 * Decrement mbuf reference counter, detach 515 * indirect and external buffers if needed. 516 */ 517 mbuf = rte_pktmbuf_prefree_seg(*pkts); 518 if (likely(mbuf != NULL)) { 519 MLX5_ASSERT(mbuf == *pkts); 520 if (likely(n_free != 0)) { 521 if (unlikely(pool != mbuf->pool)) 522 /* From different pool. */ 523 break; 524 } else { 525 /* Start new scan array. */ 526 pool = mbuf->pool; 527 p_free = pkts; 528 } 529 ++n_free; 530 ++pkts; 531 --pkts_n; 532 if (unlikely(pkts_n == 0)) { 533 mbuf = NULL; 534 break; 535 } 536 } else { 537 /* 538 * This happens if mbuf is still referenced. 539 * We can't put it back to the pool, skip. 540 */ 541 ++pkts; 542 --pkts_n; 543 if (unlikely(n_free != 0)) 544 /* There is some array to free.*/ 545 break; 546 if (unlikely(pkts_n == 0)) 547 /* Last mbuf, nothing to free. */ 548 return; 549 } 550 } 551 for (;;) { 552 /* 553 * This loop is implemented to avoid multiple 554 * inlining of rte_mempool_put_bulk(). 555 */ 556 MLX5_ASSERT(pool); 557 MLX5_ASSERT(p_free); 558 MLX5_ASSERT(n_free); 559 /* 560 * Free the array of pre-freed mbufs 561 * belonging to the same memory pool. 562 */ 563 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 564 if (unlikely(mbuf != NULL)) { 565 /* There is the request to start new scan. */ 566 pool = mbuf->pool; 567 p_free = pkts++; 568 n_free = 1; 569 --pkts_n; 570 if (likely(pkts_n != 0)) 571 break; 572 /* 573 * This is the last mbuf to be freed. 574 * Do one more loop iteration to complete. 575 * This is rare case of the last unique mbuf. 576 */ 577 mbuf = NULL; 578 continue; 579 } 580 if (likely(pkts_n == 0)) 581 return; 582 n_free = 0; 583 break; 584 } 585 } 586 } 587 588 /** 589 * No inline version to free buffers for optimal call 590 * on the tx_burst completion. 591 */ 592 static __rte_noinline void 593 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 594 struct rte_mbuf **__rte_restrict pkts, 595 unsigned int pkts_n, 596 unsigned int olx __rte_unused) 597 { 598 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 599 } 600 601 /** 602 * Free the mbuf from the elts ring buffer till new tail. 603 * 604 * @param txq 605 * Pointer to Tx queue structure. 606 * @param tail 607 * Index in elts to free up to, becomes new elts tail. 608 * @param olx 609 * Configured Tx offloads mask. It is fully defined at 610 * compile time and may be used for optimization. 611 */ 612 static __rte_always_inline void 613 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 614 uint16_t tail, 615 unsigned int olx __rte_unused) 616 { 617 uint16_t n_elts = tail - txq->elts_tail; 618 619 MLX5_ASSERT(n_elts); 620 MLX5_ASSERT(n_elts <= txq->elts_s); 621 /* 622 * Implement a loop to support ring buffer wraparound 623 * with single inlining of mlx5_tx_free_mbuf(). 624 */ 625 do { 626 unsigned int part; 627 628 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 629 part = RTE_MIN(part, n_elts); 630 MLX5_ASSERT(part); 631 MLX5_ASSERT(part <= txq->elts_s); 632 mlx5_tx_free_mbuf(txq, 633 &txq->elts[txq->elts_tail & txq->elts_m], 634 part, olx); 635 txq->elts_tail += part; 636 n_elts -= part; 637 } while (n_elts); 638 } 639 640 /** 641 * Store the mbuf being sent into elts ring buffer. 642 * On Tx completion these mbufs will be freed. 643 * 644 * @param txq 645 * Pointer to Tx queue structure. 646 * @param pkts 647 * Pointer to array of packets to be stored. 648 * @param pkts_n 649 * Number of packets to be stored. 650 * @param olx 651 * Configured Tx offloads mask. It is fully defined at 652 * compile time and may be used for optimization. 653 */ 654 static __rte_always_inline void 655 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 656 struct rte_mbuf **__rte_restrict pkts, 657 unsigned int pkts_n, 658 unsigned int olx __rte_unused) 659 { 660 unsigned int part; 661 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 662 663 MLX5_ASSERT(pkts); 664 MLX5_ASSERT(pkts_n); 665 part = txq->elts_s - (txq->elts_head & txq->elts_m); 666 MLX5_ASSERT(part); 667 MLX5_ASSERT(part <= txq->elts_s); 668 /* This code is a good candidate for vectorizing with SIMD. */ 669 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 670 (void *)pkts, 671 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 672 txq->elts_head += pkts_n; 673 if (unlikely(part < pkts_n)) 674 /* The copy is wrapping around the elts array. */ 675 rte_memcpy((void *)elts, (void *)(pkts + part), 676 (pkts_n - part) * sizeof(struct rte_mbuf *)); 677 } 678 679 /** 680 * Check if the completion request flag should be set in the last WQE. 681 * Both pushed mbufs and WQEs are monitored and the completion request 682 * flag is set if any of thresholds is reached. 683 * 684 * @param txq 685 * Pointer to TX queue structure. 686 * @param loc 687 * Pointer to burst routine local context. 688 * @param olx 689 * Configured Tx offloads mask. It is fully defined at 690 * compile time and may be used for optimization. 691 */ 692 static __rte_always_inline void 693 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 694 struct mlx5_txq_local *__rte_restrict loc, 695 unsigned int olx) 696 { 697 uint16_t head = txq->elts_head; 698 unsigned int part; 699 700 part = MLX5_TXOFF_CONFIG(INLINE) ? 701 0 : loc->pkts_sent - loc->pkts_copy; 702 head += part; 703 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 704 (MLX5_TXOFF_CONFIG(INLINE) && 705 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 706 volatile struct mlx5_wqe *last = loc->wqe_last; 707 708 MLX5_ASSERT(last); 709 txq->elts_comp = head; 710 if (MLX5_TXOFF_CONFIG(INLINE)) 711 txq->wqe_comp = txq->wqe_ci; 712 /* Request unconditional completion on last WQE. */ 713 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 714 MLX5_COMP_MODE_OFFSET); 715 /* Save elts_head in dedicated free on completion queue. */ 716 #ifdef RTE_LIBRTE_MLX5_DEBUG 717 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 718 (last->cseg.opcode >> 8) << 16; 719 #else 720 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 721 #endif 722 /* A CQE slot must always be available. */ 723 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 724 } 725 } 726 727 /** 728 * Build the Control Segment with specified opcode: 729 * - MLX5_OPCODE_SEND 730 * - MLX5_OPCODE_ENHANCED_MPSW 731 * - MLX5_OPCODE_TSO 732 * 733 * @param txq 734 * Pointer to TX queue structure. 735 * @param loc 736 * Pointer to burst routine local context. 737 * @param wqe 738 * Pointer to WQE to fill with built Control Segment. 739 * @param ds 740 * Supposed length of WQE in segments. 741 * @param opcode 742 * SQ WQE opcode to put into Control Segment. 743 * @param olx 744 * Configured Tx offloads mask. It is fully defined at 745 * compile time and may be used for optimization. 746 */ 747 static __rte_always_inline void 748 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 749 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 750 struct mlx5_wqe *__rte_restrict wqe, 751 unsigned int ds, 752 unsigned int opcode, 753 unsigned int olx __rte_unused) 754 { 755 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 756 757 /* For legacy MPW replace the EMPW by TSO with modifier. */ 758 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 759 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 760 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 761 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 762 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 763 MLX5_COMP_MODE_OFFSET); 764 cs->misc = RTE_BE32(0); 765 } 766 767 /** 768 * Build the Synchronize Queue Segment with specified completion index. 769 * 770 * @param txq 771 * Pointer to TX queue structure. 772 * @param loc 773 * Pointer to burst routine local context. 774 * @param wqe 775 * Pointer to WQE to fill with built Control Segment. 776 * @param wci 777 * Completion index in Clock Queue to wait. 778 * @param olx 779 * Configured Tx offloads mask. It is fully defined at 780 * compile time and may be used for optimization. 781 */ 782 static __rte_always_inline void 783 mlx5_tx_qseg_init(struct mlx5_txq_data *restrict txq, 784 struct mlx5_txq_local *restrict loc __rte_unused, 785 struct mlx5_wqe *restrict wqe, 786 unsigned int wci, 787 unsigned int olx __rte_unused) 788 { 789 struct mlx5_wqe_qseg *qs; 790 791 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 792 qs->max_index = rte_cpu_to_be_32(wci); 793 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 794 qs->reserved0 = RTE_BE32(0); 795 qs->reserved1 = RTE_BE32(0); 796 } 797 798 /** 799 * Build the Wait on Time Segment with specified timestamp value. 800 * 801 * @param txq 802 * Pointer to TX queue structure. 803 * @param loc 804 * Pointer to burst routine local context. 805 * @param wqe 806 * Pointer to WQE to fill with built Control Segment. 807 * @param ts 808 * Timesatmp value to wait. 809 * @param olx 810 * Configured Tx offloads mask. It is fully defined at 811 * compile time and may be used for optimization. 812 */ 813 static __rte_always_inline void 814 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 815 struct mlx5_txq_local *restrict loc __rte_unused, 816 struct mlx5_wqe *restrict wqe, 817 uint64_t ts, 818 unsigned int olx __rte_unused) 819 { 820 struct mlx5_wqe_wseg *ws; 821 822 ws = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 823 ws->operation = rte_cpu_to_be_32(MLX5_WAIT_COND_CYCLIC_BIGGER); 824 ws->lkey = RTE_BE32(0); 825 ws->va_high = RTE_BE32(0); 826 ws->va_low = RTE_BE32(0); 827 if (txq->rt_timestamp) { 828 ts = ts % (uint64_t)NS_PER_S 829 | (ts / (uint64_t)NS_PER_S) << 32; 830 } 831 ws->value = rte_cpu_to_be_64(ts); 832 ws->mask = txq->rt_timemask; 833 } 834 835 /** 836 * Build the Ethernet Segment without inlined data. 837 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 838 * 839 * @param txq 840 * Pointer to TX queue structure. 841 * @param loc 842 * Pointer to burst routine local context. 843 * @param wqe 844 * Pointer to WQE to fill with built Ethernet Segment. 845 * @param olx 846 * Configured Tx offloads mask. It is fully defined at 847 * compile time and may be used for optimization. 848 */ 849 static __rte_always_inline void 850 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 851 struct mlx5_txq_local *__rte_restrict loc, 852 struct mlx5_wqe *__rte_restrict wqe, 853 unsigned int olx) 854 { 855 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 856 uint32_t csum; 857 858 /* 859 * Calculate and set check sum flags first, dword field 860 * in segment may be shared with Software Parser flags. 861 */ 862 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 863 es->flags = rte_cpu_to_le_32(csum); 864 /* 865 * Calculate and set Software Parser offsets and flags. 866 * These flags a set for custom UDP and IP tunnel packets. 867 */ 868 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 869 /* Fill metadata field if needed. */ 870 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 871 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 872 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 873 0 : 0; 874 /* Engage VLAN tag insertion feature if requested. */ 875 if (MLX5_TXOFF_CONFIG(VLAN) && 876 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 877 /* 878 * We should get here only if device support 879 * this feature correctly. 880 */ 881 MLX5_ASSERT(txq->vlan_en); 882 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 883 loc->mbuf->vlan_tci); 884 } else { 885 es->inline_hdr = RTE_BE32(0); 886 } 887 } 888 889 /** 890 * Build the Ethernet Segment with minimal inlined data 891 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 892 * used to fill the gap in single WQEBB WQEs. 893 * Supports Software Parser, Checksums and VLAN 894 * insertion Tx offload features. 895 * 896 * @param txq 897 * Pointer to TX queue structure. 898 * @param loc 899 * Pointer to burst routine local context. 900 * @param wqe 901 * Pointer to WQE to fill with built Ethernet Segment. 902 * @param vlan 903 * Length of VLAN tag insertion if any. 904 * @param olx 905 * Configured Tx offloads mask. It is fully defined at 906 * compile time and may be used for optimization. 907 */ 908 static __rte_always_inline void 909 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 910 struct mlx5_txq_local *__rte_restrict loc, 911 struct mlx5_wqe *__rte_restrict wqe, 912 unsigned int vlan, 913 unsigned int olx) 914 { 915 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 916 uint32_t csum; 917 uint8_t *psrc, *pdst; 918 919 /* 920 * Calculate and set check sum flags first, dword field 921 * in segment may be shared with Software Parser flags. 922 */ 923 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 924 es->flags = rte_cpu_to_le_32(csum); 925 /* 926 * Calculate and set Software Parser offsets and flags. 927 * These flags a set for custom UDP and IP tunnel packets. 928 */ 929 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 930 /* Fill metadata field if needed. */ 931 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 932 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 933 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 934 0 : 0; 935 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 936 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 937 es->inline_data = *(unaligned_uint16_t *)psrc; 938 psrc += sizeof(uint16_t); 939 pdst = (uint8_t *)(es + 1); 940 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 941 /* Implement VLAN tag insertion as part inline data. */ 942 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 943 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 944 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 945 /* Insert VLAN ethertype + VLAN tag. */ 946 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 947 ((RTE_ETHER_TYPE_VLAN << 16) | 948 loc->mbuf->vlan_tci); 949 pdst += sizeof(struct rte_vlan_hdr); 950 /* Copy the rest two bytes from packet data. */ 951 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 952 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 953 } else { 954 /* Fill the gap in the title WQEBB with inline data. */ 955 rte_mov16(pdst, psrc); 956 } 957 } 958 959 /** 960 * Build the Ethernet Segment with entire packet data inlining. Checks the 961 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 962 * Checksums and VLAN insertion Tx offload features. 963 * 964 * @param txq 965 * Pointer to TX queue structure. 966 * @param loc 967 * Pointer to burst routine local context. 968 * @param wqe 969 * Pointer to WQE to fill with built Ethernet Segment. 970 * @param vlan 971 * Length of VLAN tag insertion if any. 972 * @param inlen 973 * Length of data to inline (VLAN included, if any). 974 * @param tso 975 * TSO flag, set mss field from the packet. 976 * @param olx 977 * Configured Tx offloads mask. It is fully defined at 978 * compile time and may be used for optimization. 979 * 980 * @return 981 * Pointer to the next Data Segment (aligned and wrapped around). 982 */ 983 static __rte_always_inline struct mlx5_wqe_dseg * 984 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 985 struct mlx5_txq_local *__rte_restrict loc, 986 struct mlx5_wqe *__rte_restrict wqe, 987 unsigned int vlan, 988 unsigned int inlen, 989 unsigned int tso, 990 unsigned int olx) 991 { 992 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 993 uint32_t csum; 994 uint8_t *psrc, *pdst; 995 unsigned int part; 996 997 /* 998 * Calculate and set check sum flags first, dword field 999 * in segment may be shared with Software Parser flags. 1000 */ 1001 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1002 if (tso) { 1003 csum <<= 24; 1004 csum |= loc->mbuf->tso_segsz; 1005 es->flags = rte_cpu_to_be_32(csum); 1006 } else { 1007 es->flags = rte_cpu_to_le_32(csum); 1008 } 1009 /* 1010 * Calculate and set Software Parser offsets and flags. 1011 * These flags a set for custom UDP and IP tunnel packets. 1012 */ 1013 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1014 /* Fill metadata field if needed. */ 1015 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1016 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1017 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1018 0 : 0; 1019 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 1020 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 1021 es->inline_data = *(unaligned_uint16_t *)psrc; 1022 psrc += sizeof(uint16_t); 1023 pdst = (uint8_t *)(es + 1); 1024 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1025 /* Implement VLAN tag insertion as part inline data. */ 1026 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 1027 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1028 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 1029 /* Insert VLAN ethertype + VLAN tag. */ 1030 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1031 ((RTE_ETHER_TYPE_VLAN << 16) | 1032 loc->mbuf->vlan_tci); 1033 pdst += sizeof(struct rte_vlan_hdr); 1034 /* Copy the rest two bytes from packet data. */ 1035 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 1036 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1037 psrc += sizeof(uint16_t); 1038 } else { 1039 /* Fill the gap in the title WQEBB with inline data. */ 1040 rte_mov16(pdst, psrc); 1041 psrc += sizeof(rte_v128u32_t); 1042 } 1043 pdst = (uint8_t *)(es + 2); 1044 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1045 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1046 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1047 if (!inlen) { 1048 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1049 return (struct mlx5_wqe_dseg *)pdst; 1050 } 1051 /* 1052 * The WQEBB space availability is checked by caller. 1053 * Here we should be aware of WQE ring buffer wraparound only. 1054 */ 1055 part = (uint8_t *)txq->wqes_end - pdst; 1056 part = RTE_MIN(part, inlen); 1057 do { 1058 rte_memcpy(pdst, psrc, part); 1059 inlen -= part; 1060 if (likely(!inlen)) { 1061 /* 1062 * If return value is not used by the caller 1063 * the code below will be optimized out. 1064 */ 1065 pdst += part; 1066 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1067 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1068 pdst = (uint8_t *)txq->wqes; 1069 return (struct mlx5_wqe_dseg *)pdst; 1070 } 1071 pdst = (uint8_t *)txq->wqes; 1072 psrc += part; 1073 part = inlen; 1074 } while (true); 1075 } 1076 1077 /** 1078 * Copy data from chain of mbuf to the specified linear buffer. 1079 * Checksums and VLAN insertion Tx offload features. If data 1080 * from some mbuf copied completely this mbuf is freed. Local 1081 * structure is used to keep the byte stream state. 1082 * 1083 * @param pdst 1084 * Pointer to the destination linear buffer. 1085 * @param loc 1086 * Pointer to burst routine local context. 1087 * @param len 1088 * Length of data to be copied. 1089 * @param must 1090 * Length of data to be copied ignoring no inline hint. 1091 * @param olx 1092 * Configured Tx offloads mask. It is fully defined at 1093 * compile time and may be used for optimization. 1094 * 1095 * @return 1096 * Number of actual copied data bytes. This is always greater than or 1097 * equal to must parameter and might be lesser than len in no inline 1098 * hint flag is encountered. 1099 */ 1100 static __rte_always_inline unsigned int 1101 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1102 struct mlx5_txq_local *__rte_restrict loc, 1103 unsigned int len, 1104 unsigned int must, 1105 unsigned int olx __rte_unused) 1106 { 1107 struct rte_mbuf *mbuf; 1108 unsigned int part, dlen, copy = 0; 1109 uint8_t *psrc; 1110 1111 MLX5_ASSERT(len); 1112 do { 1113 /* Allow zero length packets, must check first. */ 1114 dlen = rte_pktmbuf_data_len(loc->mbuf); 1115 if (dlen <= loc->mbuf_off) { 1116 /* Exhausted packet, just free. */ 1117 mbuf = loc->mbuf; 1118 loc->mbuf = mbuf->next; 1119 rte_pktmbuf_free_seg(mbuf); 1120 loc->mbuf_off = 0; 1121 MLX5_ASSERT(loc->mbuf_nseg > 1); 1122 MLX5_ASSERT(loc->mbuf); 1123 --loc->mbuf_nseg; 1124 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1125 unsigned int diff; 1126 1127 if (copy >= must) { 1128 /* 1129 * We already copied the minimal 1130 * requested amount of data. 1131 */ 1132 return copy; 1133 } 1134 diff = must - copy; 1135 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1136 /* 1137 * Copy only the minimal required 1138 * part of the data buffer. Limit amount 1139 * of data to be copied to the length of 1140 * available space. 1141 */ 1142 len = RTE_MIN(len, diff); 1143 } 1144 } 1145 continue; 1146 } 1147 dlen -= loc->mbuf_off; 1148 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1149 loc->mbuf_off); 1150 part = RTE_MIN(len, dlen); 1151 rte_memcpy(pdst, psrc, part); 1152 copy += part; 1153 loc->mbuf_off += part; 1154 len -= part; 1155 if (!len) { 1156 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1157 loc->mbuf_off = 0; 1158 /* Exhausted packet, just free. */ 1159 mbuf = loc->mbuf; 1160 loc->mbuf = mbuf->next; 1161 rte_pktmbuf_free_seg(mbuf); 1162 loc->mbuf_off = 0; 1163 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1164 --loc->mbuf_nseg; 1165 } 1166 return copy; 1167 } 1168 pdst += part; 1169 } while (true); 1170 } 1171 1172 /** 1173 * Build the Ethernet Segment with inlined data from multi-segment packet. 1174 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1175 * Parser, Checksums and VLAN insertion Tx offload features. 1176 * 1177 * @param txq 1178 * Pointer to TX queue structure. 1179 * @param loc 1180 * Pointer to burst routine local context. 1181 * @param wqe 1182 * Pointer to WQE to fill with built Ethernet Segment. 1183 * @param vlan 1184 * Length of VLAN tag insertion if any. 1185 * @param inlen 1186 * Length of data to inline (VLAN included, if any). 1187 * @param tso 1188 * TSO flag, set mss field from the packet. 1189 * @param olx 1190 * Configured Tx offloads mask. It is fully defined at 1191 * compile time and may be used for optimization. 1192 * 1193 * @return 1194 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1195 * around - caller should do wrapping check on its own). 1196 */ 1197 static __rte_always_inline struct mlx5_wqe_dseg * 1198 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1199 struct mlx5_txq_local *__rte_restrict loc, 1200 struct mlx5_wqe *__rte_restrict wqe, 1201 unsigned int vlan, 1202 unsigned int inlen, 1203 unsigned int tso, 1204 unsigned int olx) 1205 { 1206 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1207 uint32_t csum; 1208 uint8_t *pdst; 1209 unsigned int part, tlen = 0; 1210 1211 /* 1212 * Calculate and set check sum flags first, uint32_t field 1213 * in segment may be shared with Software Parser flags. 1214 */ 1215 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1216 if (tso) { 1217 csum <<= 24; 1218 csum |= loc->mbuf->tso_segsz; 1219 es->flags = rte_cpu_to_be_32(csum); 1220 } else { 1221 es->flags = rte_cpu_to_le_32(csum); 1222 } 1223 /* 1224 * Calculate and set Software Parser offsets and flags. 1225 * These flags a set for custom UDP and IP tunnel packets. 1226 */ 1227 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1228 /* Fill metadata field if needed. */ 1229 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1230 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1231 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1232 0 : 0; 1233 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1234 pdst = (uint8_t *)&es->inline_data; 1235 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1236 /* Implement VLAN tag insertion as part inline data. */ 1237 mlx5_tx_mseg_memcpy(pdst, loc, 1238 2 * RTE_ETHER_ADDR_LEN, 1239 2 * RTE_ETHER_ADDR_LEN, olx); 1240 pdst += 2 * RTE_ETHER_ADDR_LEN; 1241 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1242 ((RTE_ETHER_TYPE_VLAN << 16) | 1243 loc->mbuf->vlan_tci); 1244 pdst += sizeof(struct rte_vlan_hdr); 1245 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1246 } 1247 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1248 /* 1249 * The WQEBB space availability is checked by caller. 1250 * Here we should be aware of WQE ring buffer wraparound only. 1251 */ 1252 part = (uint8_t *)txq->wqes_end - pdst; 1253 part = RTE_MIN(part, inlen - tlen); 1254 MLX5_ASSERT(part); 1255 do { 1256 unsigned int copy; 1257 1258 /* 1259 * Copying may be interrupted inside the routine 1260 * if run into no inline hint flag. 1261 */ 1262 copy = tso ? inlen : txq->inlen_mode; 1263 copy = tlen >= copy ? 0 : (copy - tlen); 1264 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1265 tlen += copy; 1266 if (likely(inlen <= tlen) || copy < part) { 1267 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1268 pdst += copy; 1269 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1270 return (struct mlx5_wqe_dseg *)pdst; 1271 } 1272 pdst = (uint8_t *)txq->wqes; 1273 part = inlen - tlen; 1274 } while (true); 1275 } 1276 1277 /** 1278 * Build the Data Segment of pointer type. 1279 * 1280 * @param txq 1281 * Pointer to TX queue structure. 1282 * @param loc 1283 * Pointer to burst routine local context. 1284 * @param dseg 1285 * Pointer to WQE to fill with built Data Segment. 1286 * @param buf 1287 * Data buffer to point. 1288 * @param len 1289 * Data buffer length. 1290 * @param olx 1291 * Configured Tx offloads mask. It is fully defined at 1292 * compile time and may be used for optimization. 1293 */ 1294 static __rte_always_inline void 1295 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1296 struct mlx5_txq_local *__rte_restrict loc, 1297 struct mlx5_wqe_dseg *__rte_restrict dseg, 1298 uint8_t *buf, 1299 unsigned int len, 1300 unsigned int olx __rte_unused) 1301 1302 { 1303 MLX5_ASSERT(len); 1304 dseg->bcount = rte_cpu_to_be_32(len); 1305 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1306 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1307 } 1308 1309 /** 1310 * Build the Data Segment of pointer type or inline if data length is less than 1311 * buffer in minimal Data Segment size. 1312 * 1313 * @param txq 1314 * Pointer to TX queue structure. 1315 * @param loc 1316 * Pointer to burst routine local context. 1317 * @param dseg 1318 * Pointer to WQE to fill with built Data Segment. 1319 * @param buf 1320 * Data buffer to point. 1321 * @param len 1322 * Data buffer length. 1323 * @param olx 1324 * Configured Tx offloads mask. It is fully defined at 1325 * compile time and may be used for optimization. 1326 */ 1327 static __rte_always_inline void 1328 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1329 struct mlx5_txq_local *__rte_restrict loc, 1330 struct mlx5_wqe_dseg *__rte_restrict dseg, 1331 uint8_t *buf, 1332 unsigned int len, 1333 unsigned int olx __rte_unused) 1334 1335 { 1336 uintptr_t dst, src; 1337 1338 MLX5_ASSERT(len); 1339 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1340 dseg->bcount = rte_cpu_to_be_32(len); 1341 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1342 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1343 1344 return; 1345 } 1346 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1347 /* Unrolled implementation of generic rte_memcpy. */ 1348 dst = (uintptr_t)&dseg->inline_data[0]; 1349 src = (uintptr_t)buf; 1350 if (len & 0x08) { 1351 #ifdef RTE_ARCH_STRICT_ALIGN 1352 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1353 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1354 dst += sizeof(uint32_t); 1355 src += sizeof(uint32_t); 1356 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1357 dst += sizeof(uint32_t); 1358 src += sizeof(uint32_t); 1359 #else 1360 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1361 dst += sizeof(uint64_t); 1362 src += sizeof(uint64_t); 1363 #endif 1364 } 1365 if (len & 0x04) { 1366 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1367 dst += sizeof(uint32_t); 1368 src += sizeof(uint32_t); 1369 } 1370 if (len & 0x02) { 1371 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1372 dst += sizeof(uint16_t); 1373 src += sizeof(uint16_t); 1374 } 1375 if (len & 0x01) 1376 *(uint8_t *)dst = *(uint8_t *)src; 1377 } 1378 1379 /** 1380 * Build the Data Segment of inlined data from single 1381 * segment packet, no VLAN insertion. 1382 * 1383 * @param txq 1384 * Pointer to TX queue structure. 1385 * @param loc 1386 * Pointer to burst routine local context. 1387 * @param dseg 1388 * Pointer to WQE to fill with built Data Segment. 1389 * @param buf 1390 * Data buffer to point. 1391 * @param len 1392 * Data buffer length. 1393 * @param olx 1394 * Configured Tx offloads mask. It is fully defined at 1395 * compile time and may be used for optimization. 1396 * 1397 * @return 1398 * Pointer to the next Data Segment after inlined data. 1399 * Ring buffer wraparound check is needed. We do not do it here because it 1400 * may not be needed for the last packet in the eMPW session. 1401 */ 1402 static __rte_always_inline struct mlx5_wqe_dseg * 1403 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1404 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1405 struct mlx5_wqe_dseg *__rte_restrict dseg, 1406 uint8_t *buf, 1407 unsigned int len, 1408 unsigned int olx __rte_unused) 1409 { 1410 unsigned int part; 1411 uint8_t *pdst; 1412 1413 if (!MLX5_TXOFF_CONFIG(MPW)) { 1414 /* Store the descriptor byte counter for eMPW sessions. */ 1415 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1416 pdst = &dseg->inline_data[0]; 1417 } else { 1418 /* The entire legacy MPW session counter is stored on close. */ 1419 pdst = (uint8_t *)dseg; 1420 } 1421 /* 1422 * The WQEBB space availability is checked by caller. 1423 * Here we should be aware of WQE ring buffer wraparound only. 1424 */ 1425 part = (uint8_t *)txq->wqes_end - pdst; 1426 part = RTE_MIN(part, len); 1427 do { 1428 rte_memcpy(pdst, buf, part); 1429 len -= part; 1430 if (likely(!len)) { 1431 pdst += part; 1432 if (!MLX5_TXOFF_CONFIG(MPW)) 1433 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1434 /* Note: no final wraparound check here. */ 1435 return (struct mlx5_wqe_dseg *)pdst; 1436 } 1437 pdst = (uint8_t *)txq->wqes; 1438 buf += part; 1439 part = len; 1440 } while (true); 1441 } 1442 1443 /** 1444 * Build the Data Segment of inlined data from single 1445 * segment packet with VLAN insertion. 1446 * 1447 * @param txq 1448 * Pointer to TX queue structure. 1449 * @param loc 1450 * Pointer to burst routine local context. 1451 * @param dseg 1452 * Pointer to the dseg fill with built Data Segment. 1453 * @param buf 1454 * Data buffer to point. 1455 * @param len 1456 * Data buffer length. 1457 * @param olx 1458 * Configured Tx offloads mask. It is fully defined at 1459 * compile time and may be used for optimization. 1460 * 1461 * @return 1462 * Pointer to the next Data Segment after inlined data. 1463 * Ring buffer wraparound check is needed. 1464 */ 1465 static __rte_always_inline struct mlx5_wqe_dseg * 1466 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1467 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1468 struct mlx5_wqe_dseg *__rte_restrict dseg, 1469 uint8_t *buf, 1470 unsigned int len, 1471 unsigned int olx __rte_unused) 1472 1473 { 1474 unsigned int part; 1475 uint8_t *pdst; 1476 1477 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1478 if (!MLX5_TXOFF_CONFIG(MPW)) { 1479 /* Store the descriptor byte counter for eMPW sessions. */ 1480 dseg->bcount = rte_cpu_to_be_32 1481 ((len + sizeof(struct rte_vlan_hdr)) | 1482 MLX5_ETH_WQE_DATA_INLINE); 1483 pdst = &dseg->inline_data[0]; 1484 } else { 1485 /* The entire legacy MPW session counter is stored on close. */ 1486 pdst = (uint8_t *)dseg; 1487 } 1488 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1489 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1490 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1491 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1492 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1493 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1494 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1495 pdst = (uint8_t *)txq->wqes; 1496 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1497 loc->mbuf->vlan_tci); 1498 pdst += sizeof(struct rte_vlan_hdr); 1499 /* 1500 * The WQEBB space availability is checked by caller. 1501 * Here we should be aware of WQE ring buffer wraparound only. 1502 */ 1503 part = (uint8_t *)txq->wqes_end - pdst; 1504 part = RTE_MIN(part, len); 1505 do { 1506 rte_memcpy(pdst, buf, part); 1507 len -= part; 1508 if (likely(!len)) { 1509 pdst += part; 1510 if (!MLX5_TXOFF_CONFIG(MPW)) 1511 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1512 /* Note: no final wraparound check here. */ 1513 return (struct mlx5_wqe_dseg *)pdst; 1514 } 1515 pdst = (uint8_t *)txq->wqes; 1516 buf += part; 1517 part = len; 1518 } while (true); 1519 } 1520 1521 /** 1522 * Build the Ethernet Segment with optionally inlined data with 1523 * VLAN insertion and following Data Segments (if any) from 1524 * multi-segment packet. Used by ordinary send and TSO. 1525 * 1526 * @param txq 1527 * Pointer to TX queue structure. 1528 * @param loc 1529 * Pointer to burst routine local context. 1530 * @param wqe 1531 * Pointer to WQE to fill with built Ethernet/Data Segments. 1532 * @param vlan 1533 * Length of VLAN header to insert, 0 means no VLAN insertion. 1534 * @param inlen 1535 * Data length to inline. For TSO this parameter specifies exact value, 1536 * for ordinary send routine can be aligned by caller to provide better WQE 1537 * space saving and data buffer start address alignment. 1538 * This length includes VLAN header being inserted. 1539 * @param tso 1540 * Zero means ordinary send, inlined data can be extended, 1541 * otherwise this is TSO, inlined data length is fixed. 1542 * @param olx 1543 * Configured Tx offloads mask. It is fully defined at 1544 * compile time and may be used for optimization. 1545 * 1546 * @return 1547 * Actual size of built WQE in segments. 1548 */ 1549 static __rte_always_inline unsigned int 1550 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1551 struct mlx5_txq_local *__rte_restrict loc, 1552 struct mlx5_wqe *__rte_restrict wqe, 1553 unsigned int vlan, 1554 unsigned int inlen, 1555 unsigned int tso, 1556 unsigned int olx __rte_unused) 1557 { 1558 struct mlx5_wqe_dseg *__rte_restrict dseg; 1559 unsigned int ds; 1560 1561 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1562 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1563 loc->mbuf_off = 0; 1564 1565 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1566 if (!loc->mbuf_nseg) 1567 goto dseg_done; 1568 /* 1569 * There are still some mbuf remaining, not inlined. 1570 * The first mbuf may be partially inlined and we 1571 * must process the possible non-zero data offset. 1572 */ 1573 if (loc->mbuf_off) { 1574 unsigned int dlen; 1575 uint8_t *dptr; 1576 1577 /* 1578 * Exhausted packets must be dropped before. 1579 * Non-zero offset means there are some data 1580 * remained in the packet. 1581 */ 1582 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1583 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1584 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1585 loc->mbuf_off); 1586 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1587 /* 1588 * Build the pointer/minimal Data Segment. 1589 * Do ring buffer wrapping check in advance. 1590 */ 1591 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1592 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1593 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1594 /* Store the mbuf to be freed on completion. */ 1595 MLX5_ASSERT(loc->elts_free); 1596 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1597 --loc->elts_free; 1598 ++dseg; 1599 if (--loc->mbuf_nseg == 0) 1600 goto dseg_done; 1601 loc->mbuf = loc->mbuf->next; 1602 loc->mbuf_off = 0; 1603 } 1604 do { 1605 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1606 struct rte_mbuf *mbuf; 1607 1608 /* Zero length segment found, just skip. */ 1609 mbuf = loc->mbuf; 1610 loc->mbuf = loc->mbuf->next; 1611 rte_pktmbuf_free_seg(mbuf); 1612 if (--loc->mbuf_nseg == 0) 1613 break; 1614 } else { 1615 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1616 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1617 mlx5_tx_dseg_iptr 1618 (txq, loc, dseg, 1619 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1620 rte_pktmbuf_data_len(loc->mbuf), olx); 1621 MLX5_ASSERT(loc->elts_free); 1622 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1623 --loc->elts_free; 1624 ++dseg; 1625 if (--loc->mbuf_nseg == 0) 1626 break; 1627 loc->mbuf = loc->mbuf->next; 1628 } 1629 } while (true); 1630 1631 dseg_done: 1632 /* Calculate actual segments used from the dseg pointer. */ 1633 if ((uintptr_t)wqe < (uintptr_t)dseg) 1634 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1635 else 1636 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1637 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1638 return ds; 1639 } 1640 1641 /** 1642 * The routine checks timestamp flag in the current packet, 1643 * and push WAIT WQE into the queue if scheduling is required. 1644 * 1645 * @param txq 1646 * Pointer to TX queue structure. 1647 * @param loc 1648 * Pointer to burst routine local context. 1649 * @param olx 1650 * Configured Tx offloads mask. It is fully defined at 1651 * compile time and may be used for optimization. 1652 * 1653 * @return 1654 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1655 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1656 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1657 * Local context variables partially updated. 1658 */ 1659 static __rte_always_inline enum mlx5_txcmp_code 1660 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1661 struct mlx5_txq_local *restrict loc, 1662 unsigned int olx) 1663 { 1664 if (MLX5_TXOFF_CONFIG(TXPP) && 1665 loc->mbuf->ol_flags & txq->ts_mask) { 1666 struct mlx5_dev_ctx_shared *sh; 1667 struct mlx5_wqe *wqe; 1668 uint64_t ts; 1669 1670 /* 1671 * Estimate the required space quickly and roughly. 1672 * We would like to ensure the packet can be pushed 1673 * to the queue and we won't get the orphan WAIT WQE. 1674 */ 1675 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1676 loc->elts_free < NB_SEGS(loc->mbuf)) 1677 return MLX5_TXCMP_CODE_EXIT; 1678 /* Convert the timestamp into completion to wait. */ 1679 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1680 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1681 sh = txq->sh; 1682 if (txq->wait_on_time) { 1683 /* The wait on time capability should be used. */ 1684 ts -= sh->txpp.skew; 1685 mlx5_tx_cseg_init(txq, loc, wqe, 1686 1 + sizeof(struct mlx5_wqe_wseg) / 1687 MLX5_WSEG_SIZE, 1688 MLX5_OPCODE_WAIT | 1689 MLX5_OPC_MOD_WAIT_TIME << 24, olx); 1690 mlx5_tx_wseg_init(txq, loc, wqe, ts, olx); 1691 } else { 1692 /* Legacy cross-channel operation should be used. */ 1693 int32_t wci; 1694 1695 wci = mlx5_txpp_convert_tx_ts(sh, ts); 1696 if (unlikely(wci < 0)) 1697 return MLX5_TXCMP_CODE_SINGLE; 1698 /* Build the WAIT WQE with specified completion. */ 1699 mlx5_tx_cseg_init(txq, loc, wqe, 1700 1 + sizeof(struct mlx5_wqe_qseg) / 1701 MLX5_WSEG_SIZE, 1702 MLX5_OPCODE_WAIT | 1703 MLX5_OPC_MOD_WAIT_CQ_PI << 24, olx); 1704 mlx5_tx_qseg_init(txq, loc, wqe, wci, olx); 1705 } 1706 ++txq->wqe_ci; 1707 --loc->wqe_free; 1708 return MLX5_TXCMP_CODE_MULTI; 1709 } 1710 return MLX5_TXCMP_CODE_SINGLE; 1711 } 1712 1713 /** 1714 * Tx one packet function for multi-segment TSO. Supports all 1715 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1716 * sends one packet per WQE. 1717 * 1718 * This routine is responsible for storing processed mbuf 1719 * into elts ring buffer and update elts_head. 1720 * 1721 * @param txq 1722 * Pointer to TX queue structure. 1723 * @param loc 1724 * Pointer to burst routine local context. 1725 * @param olx 1726 * Configured Tx offloads mask. It is fully defined at 1727 * compile time and may be used for optimization. 1728 * 1729 * @return 1730 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1731 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1732 * Local context variables partially updated. 1733 */ 1734 static __rte_always_inline enum mlx5_txcmp_code 1735 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1736 struct mlx5_txq_local *__rte_restrict loc, 1737 unsigned int olx) 1738 { 1739 struct mlx5_wqe *__rte_restrict wqe; 1740 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1741 1742 if (MLX5_TXOFF_CONFIG(TXPP)) { 1743 enum mlx5_txcmp_code wret; 1744 1745 /* Generate WAIT for scheduling if requested. */ 1746 wret = mlx5_tx_schedule_send(txq, loc, olx); 1747 if (wret == MLX5_TXCMP_CODE_EXIT) 1748 return MLX5_TXCMP_CODE_EXIT; 1749 if (wret == MLX5_TXCMP_CODE_ERROR) 1750 return MLX5_TXCMP_CODE_ERROR; 1751 } 1752 /* 1753 * Calculate data length to be inlined to estimate 1754 * the required space in WQE ring buffer. 1755 */ 1756 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1757 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1758 vlan = sizeof(struct rte_vlan_hdr); 1759 inlen = loc->mbuf->l2_len + vlan + 1760 loc->mbuf->l3_len + loc->mbuf->l4_len; 1761 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1762 return MLX5_TXCMP_CODE_ERROR; 1763 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1764 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1765 /* Packet must contain all TSO headers. */ 1766 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1767 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1768 inlen > (dlen + vlan))) 1769 return MLX5_TXCMP_CODE_ERROR; 1770 /* 1771 * Check whether there are enough free WQEBBs: 1772 * - Control Segment 1773 * - Ethernet Segment 1774 * - First Segment of inlined Ethernet data 1775 * - ... data continued ... 1776 * - Data Segments of pointer/min inline type 1777 */ 1778 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1779 MLX5_ESEG_MIN_INLINE_SIZE + 1780 MLX5_WSEG_SIZE + 1781 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1782 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1783 return MLX5_TXCMP_CODE_EXIT; 1784 /* Check for maximal WQE size. */ 1785 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1786 return MLX5_TXCMP_CODE_ERROR; 1787 #ifdef MLX5_PMD_SOFT_COUNTERS 1788 /* Update sent data bytes/packets counters. */ 1789 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1790 loc->mbuf->tso_segsz; 1791 /* 1792 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1793 * from loc->pkts_sent field. 1794 */ 1795 --ntcp; 1796 txq->stats.opackets += ntcp; 1797 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1798 #endif 1799 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1800 loc->wqe_last = wqe; 1801 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1802 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1803 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1804 txq->wqe_ci += (ds + 3) / 4; 1805 loc->wqe_free -= (ds + 3) / 4; 1806 return MLX5_TXCMP_CODE_MULTI; 1807 } 1808 1809 /** 1810 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1811 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1812 * without any data inlining in Ethernet Segment. 1813 * 1814 * This routine is responsible for storing processed mbuf 1815 * into elts ring buffer and update elts_head. 1816 * 1817 * @param txq 1818 * Pointer to TX queue structure. 1819 * @param loc 1820 * Pointer to burst routine local context. 1821 * @param olx 1822 * Configured Tx offloads mask. It is fully defined at 1823 * compile time and may be used for optimization. 1824 * 1825 * @return 1826 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1827 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1828 * Local context variables partially updated. 1829 */ 1830 static __rte_always_inline enum mlx5_txcmp_code 1831 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1832 struct mlx5_txq_local *__rte_restrict loc, 1833 unsigned int olx) 1834 { 1835 struct mlx5_wqe_dseg *__rte_restrict dseg; 1836 struct mlx5_wqe *__rte_restrict wqe; 1837 unsigned int ds, nseg; 1838 1839 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1840 if (MLX5_TXOFF_CONFIG(TXPP)) { 1841 enum mlx5_txcmp_code wret; 1842 1843 /* Generate WAIT for scheduling if requested. */ 1844 wret = mlx5_tx_schedule_send(txq, loc, olx); 1845 if (wret == MLX5_TXCMP_CODE_EXIT) 1846 return MLX5_TXCMP_CODE_EXIT; 1847 if (wret == MLX5_TXCMP_CODE_ERROR) 1848 return MLX5_TXCMP_CODE_ERROR; 1849 } 1850 /* 1851 * No inline at all, it means the CPU cycles saving is prioritized at 1852 * configuration, we should not copy any packet data to WQE. 1853 */ 1854 nseg = NB_SEGS(loc->mbuf); 1855 ds = 2 + nseg; 1856 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1857 return MLX5_TXCMP_CODE_EXIT; 1858 /* Check for maximal WQE size. */ 1859 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1860 return MLX5_TXCMP_CODE_ERROR; 1861 /* 1862 * Some Tx offloads may cause an error if packet is not long enough, 1863 * check against assumed minimal length. 1864 */ 1865 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1866 return MLX5_TXCMP_CODE_ERROR; 1867 #ifdef MLX5_PMD_SOFT_COUNTERS 1868 /* Update sent data bytes counter. */ 1869 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1870 if (MLX5_TXOFF_CONFIG(VLAN) && 1871 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1872 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1873 #endif 1874 /* 1875 * SEND WQE, one WQEBB: 1876 * - Control Segment, SEND opcode 1877 * - Ethernet Segment, optional VLAN, no inline 1878 * - Data Segments, pointer only type 1879 */ 1880 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1881 loc->wqe_last = wqe; 1882 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1883 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1884 dseg = &wqe->dseg[0]; 1885 do { 1886 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1887 struct rte_mbuf *mbuf; 1888 1889 /* 1890 * Zero length segment found, have to correct total 1891 * size of WQE in segments. 1892 * It is supposed to be rare occasion, so in normal 1893 * case (no zero length segments) we avoid extra 1894 * writing to the Control Segment. 1895 */ 1896 --ds; 1897 wqe->cseg.sq_ds -= RTE_BE32(1); 1898 mbuf = loc->mbuf; 1899 loc->mbuf = mbuf->next; 1900 rte_pktmbuf_free_seg(mbuf); 1901 if (--nseg == 0) 1902 break; 1903 } else { 1904 mlx5_tx_dseg_ptr 1905 (txq, loc, dseg, 1906 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1907 rte_pktmbuf_data_len(loc->mbuf), olx); 1908 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1909 --loc->elts_free; 1910 if (--nseg == 0) 1911 break; 1912 ++dseg; 1913 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1914 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1915 loc->mbuf = loc->mbuf->next; 1916 } 1917 } while (true); 1918 txq->wqe_ci += (ds + 3) / 4; 1919 loc->wqe_free -= (ds + 3) / 4; 1920 return MLX5_TXCMP_CODE_MULTI; 1921 } 1922 1923 /** 1924 * Tx one packet function for multi-segment SEND. Supports all 1925 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1926 * sends one packet per WQE, with data inlining in 1927 * Ethernet Segment and minimal Data Segments. 1928 * 1929 * This routine is responsible for storing processed mbuf 1930 * into elts ring buffer and update elts_head. 1931 * 1932 * @param txq 1933 * Pointer to TX queue structure. 1934 * @param loc 1935 * Pointer to burst routine local context. 1936 * @param olx 1937 * Configured Tx offloads mask. It is fully defined at 1938 * compile time and may be used for optimization. 1939 * 1940 * @return 1941 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1942 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1943 * Local context variables partially updated. 1944 */ 1945 static __rte_always_inline enum mlx5_txcmp_code 1946 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 1947 struct mlx5_txq_local *__rte_restrict loc, 1948 unsigned int olx) 1949 { 1950 struct mlx5_wqe *__rte_restrict wqe; 1951 unsigned int ds, inlen, dlen, vlan = 0; 1952 1953 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 1954 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1955 if (MLX5_TXOFF_CONFIG(TXPP)) { 1956 enum mlx5_txcmp_code wret; 1957 1958 /* Generate WAIT for scheduling if requested. */ 1959 wret = mlx5_tx_schedule_send(txq, loc, olx); 1960 if (wret == MLX5_TXCMP_CODE_EXIT) 1961 return MLX5_TXCMP_CODE_EXIT; 1962 if (wret == MLX5_TXCMP_CODE_ERROR) 1963 return MLX5_TXCMP_CODE_ERROR; 1964 } 1965 /* 1966 * First calculate data length to be inlined 1967 * to estimate the required space for WQE. 1968 */ 1969 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1970 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1971 vlan = sizeof(struct rte_vlan_hdr); 1972 inlen = dlen + vlan; 1973 /* Check against minimal length. */ 1974 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 1975 return MLX5_TXCMP_CODE_ERROR; 1976 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1977 if (inlen > txq->inlen_send || 1978 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1979 struct rte_mbuf *mbuf; 1980 unsigned int nxlen; 1981 uintptr_t start; 1982 1983 mbuf = loc->mbuf; 1984 nxlen = rte_pktmbuf_data_len(mbuf); 1985 /* 1986 * Packet length exceeds the allowed inline data length, 1987 * check whether the minimal inlining is required. 1988 */ 1989 if (txq->inlen_mode) { 1990 MLX5_ASSERT(txq->inlen_mode >= 1991 MLX5_ESEG_MIN_INLINE_SIZE); 1992 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 1993 inlen = RTE_MIN(txq->inlen_mode, inlen); 1994 } else if (vlan && !txq->vlan_en) { 1995 /* 1996 * VLAN insertion is requested and hardware does not 1997 * support the offload, will do with software inline. 1998 */ 1999 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 2000 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 2001 nxlen > txq->inlen_send) { 2002 return mlx5_tx_packet_multi_send(txq, loc, olx); 2003 } else { 2004 goto do_first; 2005 } 2006 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2007 goto do_build; 2008 /* 2009 * Now we know the minimal amount of data is requested 2010 * to inline. Check whether we should inline the buffers 2011 * from the chain beginning to eliminate some mbufs. 2012 */ 2013 if (unlikely(nxlen <= txq->inlen_send)) { 2014 /* We can inline first mbuf at least. */ 2015 if (nxlen < inlen) { 2016 unsigned int smlen; 2017 2018 /* Scan mbufs till inlen filled. */ 2019 do { 2020 smlen = nxlen; 2021 mbuf = NEXT(mbuf); 2022 MLX5_ASSERT(mbuf); 2023 nxlen = rte_pktmbuf_data_len(mbuf); 2024 nxlen += smlen; 2025 } while (unlikely(nxlen < inlen)); 2026 if (unlikely(nxlen > txq->inlen_send)) { 2027 /* We cannot inline entire mbuf. */ 2028 smlen = inlen - smlen; 2029 start = rte_pktmbuf_mtod_offset 2030 (mbuf, uintptr_t, smlen); 2031 goto do_align; 2032 } 2033 } 2034 do_first: 2035 do { 2036 inlen = nxlen; 2037 mbuf = NEXT(mbuf); 2038 /* There should be not end of packet. */ 2039 MLX5_ASSERT(mbuf); 2040 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2041 break; 2042 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 2043 } while (unlikely(nxlen < txq->inlen_send)); 2044 } 2045 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 2046 /* 2047 * Check whether we can do inline to align start 2048 * address of data buffer to cacheline. 2049 */ 2050 do_align: 2051 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 2052 if (unlikely(start)) { 2053 start += inlen; 2054 if (start <= txq->inlen_send) 2055 inlen = start; 2056 } 2057 } 2058 /* 2059 * Check whether there are enough free WQEBBs: 2060 * - Control Segment 2061 * - Ethernet Segment 2062 * - First Segment of inlined Ethernet data 2063 * - ... data continued ... 2064 * - Data Segments of pointer/min inline type 2065 * 2066 * Estimate the number of Data Segments conservatively, 2067 * supposing no any mbufs is being freed during inlining. 2068 */ 2069 do_build: 2070 MLX5_ASSERT(inlen <= txq->inlen_send); 2071 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2072 MLX5_ESEG_MIN_INLINE_SIZE + 2073 MLX5_WSEG_SIZE + 2074 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2075 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2076 return MLX5_TXCMP_CODE_EXIT; 2077 /* Check for maximal WQE size. */ 2078 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 2079 return MLX5_TXCMP_CODE_ERROR; 2080 #ifdef MLX5_PMD_SOFT_COUNTERS 2081 /* Update sent data bytes/packets counters. */ 2082 txq->stats.obytes += dlen + vlan; 2083 #endif 2084 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2085 loc->wqe_last = wqe; 2086 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2087 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2088 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2089 txq->wqe_ci += (ds + 3) / 4; 2090 loc->wqe_free -= (ds + 3) / 4; 2091 return MLX5_TXCMP_CODE_MULTI; 2092 } 2093 2094 /** 2095 * Tx burst function for multi-segment packets. Supports all 2096 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2097 * sends one packet per WQE. Function stops sending if it 2098 * encounters the single-segment packet. 2099 * 2100 * This routine is responsible for storing processed mbuf 2101 * into elts ring buffer and update elts_head. 2102 * 2103 * @param txq 2104 * Pointer to TX queue structure. 2105 * @param[in] pkts 2106 * Packets to transmit. 2107 * @param pkts_n 2108 * Number of packets in array. 2109 * @param loc 2110 * Pointer to burst routine local context. 2111 * @param olx 2112 * Configured Tx offloads mask. It is fully defined at 2113 * compile time and may be used for optimization. 2114 * 2115 * @return 2116 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2117 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2118 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2119 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2120 * Local context variables updated. 2121 */ 2122 static __rte_always_inline enum mlx5_txcmp_code 2123 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2124 struct rte_mbuf **__rte_restrict pkts, 2125 unsigned int pkts_n, 2126 struct mlx5_txq_local *__rte_restrict loc, 2127 unsigned int olx) 2128 { 2129 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2130 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2131 pkts += loc->pkts_sent + 1; 2132 pkts_n -= loc->pkts_sent; 2133 for (;;) { 2134 enum mlx5_txcmp_code ret; 2135 2136 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2137 /* 2138 * Estimate the number of free elts quickly but conservatively. 2139 * Some segment may be fully inlined and freed, 2140 * ignore this here - precise estimation is costly. 2141 */ 2142 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2143 return MLX5_TXCMP_CODE_EXIT; 2144 if (MLX5_TXOFF_CONFIG(TSO) && 2145 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2146 /* Proceed with multi-segment TSO. */ 2147 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2148 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2149 /* Proceed with multi-segment SEND with inlining. */ 2150 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2151 } else { 2152 /* Proceed with multi-segment SEND w/o inlining. */ 2153 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2154 } 2155 if (ret == MLX5_TXCMP_CODE_EXIT) 2156 return MLX5_TXCMP_CODE_EXIT; 2157 if (ret == MLX5_TXCMP_CODE_ERROR) 2158 return MLX5_TXCMP_CODE_ERROR; 2159 /* WQE is built, go to the next packet. */ 2160 ++loc->pkts_sent; 2161 --pkts_n; 2162 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2163 return MLX5_TXCMP_CODE_EXIT; 2164 loc->mbuf = *pkts++; 2165 if (pkts_n > 1) 2166 rte_prefetch0(*pkts); 2167 if (likely(NB_SEGS(loc->mbuf) > 1)) 2168 continue; 2169 /* Here ends the series of multi-segment packets. */ 2170 if (MLX5_TXOFF_CONFIG(TSO) && 2171 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2172 return MLX5_TXCMP_CODE_TSO; 2173 return MLX5_TXCMP_CODE_SINGLE; 2174 } 2175 MLX5_ASSERT(false); 2176 } 2177 2178 /** 2179 * Tx burst function for single-segment packets with TSO. 2180 * Supports all types of Tx offloads, except multi-packets. 2181 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2182 * Function stops sending if it encounters the multi-segment 2183 * packet or packet without TSO requested. 2184 * 2185 * The routine is responsible for storing processed mbuf into elts ring buffer 2186 * and update elts_head if inline offloads is requested due to possible early 2187 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2188 * 2189 * @param txq 2190 * Pointer to TX queue structure. 2191 * @param[in] pkts 2192 * Packets to transmit. 2193 * @param pkts_n 2194 * Number of packets in array. 2195 * @param loc 2196 * Pointer to burst routine local context. 2197 * @param olx 2198 * Configured Tx offloads mask. It is fully defined at 2199 * compile time and may be used for optimization. 2200 * 2201 * @return 2202 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2203 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2204 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2205 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2206 * Local context variables updated. 2207 */ 2208 static __rte_always_inline enum mlx5_txcmp_code 2209 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2210 struct rte_mbuf **__rte_restrict pkts, 2211 unsigned int pkts_n, 2212 struct mlx5_txq_local *__rte_restrict loc, 2213 unsigned int olx) 2214 { 2215 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2216 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2217 pkts += loc->pkts_sent + 1; 2218 pkts_n -= loc->pkts_sent; 2219 for (;;) { 2220 struct mlx5_wqe_dseg *__rte_restrict dseg; 2221 struct mlx5_wqe *__rte_restrict wqe; 2222 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2223 uint8_t *dptr; 2224 2225 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2226 if (MLX5_TXOFF_CONFIG(TXPP)) { 2227 enum mlx5_txcmp_code wret; 2228 2229 /* Generate WAIT for scheduling if requested. */ 2230 wret = mlx5_tx_schedule_send(txq, loc, olx); 2231 if (wret == MLX5_TXCMP_CODE_EXIT) 2232 return MLX5_TXCMP_CODE_EXIT; 2233 if (wret == MLX5_TXCMP_CODE_ERROR) 2234 return MLX5_TXCMP_CODE_ERROR; 2235 } 2236 dlen = rte_pktmbuf_data_len(loc->mbuf); 2237 if (MLX5_TXOFF_CONFIG(VLAN) && 2238 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2239 vlan = sizeof(struct rte_vlan_hdr); 2240 } 2241 /* 2242 * First calculate the WQE size to check 2243 * whether we have enough space in ring buffer. 2244 */ 2245 hlen = loc->mbuf->l2_len + vlan + 2246 loc->mbuf->l3_len + loc->mbuf->l4_len; 2247 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2248 return MLX5_TXCMP_CODE_ERROR; 2249 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2250 hlen += loc->mbuf->outer_l2_len + 2251 loc->mbuf->outer_l3_len; 2252 /* Segment must contain all TSO headers. */ 2253 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2254 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2255 hlen > (dlen + vlan))) 2256 return MLX5_TXCMP_CODE_ERROR; 2257 /* 2258 * Check whether there are enough free WQEBBs: 2259 * - Control Segment 2260 * - Ethernet Segment 2261 * - First Segment of inlined Ethernet data 2262 * - ... data continued ... 2263 * - Finishing Data Segment of pointer type 2264 */ 2265 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2266 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2267 if (loc->wqe_free < ((ds + 3) / 4)) 2268 return MLX5_TXCMP_CODE_EXIT; 2269 #ifdef MLX5_PMD_SOFT_COUNTERS 2270 /* Update sent data bytes/packets counters. */ 2271 ntcp = (dlen + vlan - hlen + 2272 loc->mbuf->tso_segsz - 1) / 2273 loc->mbuf->tso_segsz; 2274 /* 2275 * One will be added for mbuf itself at the end 2276 * of the mlx5_tx_burst from loc->pkts_sent field. 2277 */ 2278 --ntcp; 2279 txq->stats.opackets += ntcp; 2280 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2281 #endif 2282 /* 2283 * Build the TSO WQE: 2284 * - Control Segment 2285 * - Ethernet Segment with hlen bytes inlined 2286 * - Data Segment of pointer type 2287 */ 2288 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2289 loc->wqe_last = wqe; 2290 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2291 MLX5_OPCODE_TSO, olx); 2292 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2293 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2294 dlen -= hlen - vlan; 2295 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2296 /* 2297 * WQE is built, update the loop parameters 2298 * and go to the next packet. 2299 */ 2300 txq->wqe_ci += (ds + 3) / 4; 2301 loc->wqe_free -= (ds + 3) / 4; 2302 if (MLX5_TXOFF_CONFIG(INLINE)) 2303 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2304 --loc->elts_free; 2305 ++loc->pkts_sent; 2306 --pkts_n; 2307 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2308 return MLX5_TXCMP_CODE_EXIT; 2309 loc->mbuf = *pkts++; 2310 if (pkts_n > 1) 2311 rte_prefetch0(*pkts); 2312 if (MLX5_TXOFF_CONFIG(MULTI) && 2313 unlikely(NB_SEGS(loc->mbuf) > 1)) 2314 return MLX5_TXCMP_CODE_MULTI; 2315 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2316 return MLX5_TXCMP_CODE_SINGLE; 2317 /* Continue with the next TSO packet. */ 2318 } 2319 MLX5_ASSERT(false); 2320 } 2321 2322 /** 2323 * Analyze the packet and select the best method to send. 2324 * 2325 * @param txq 2326 * Pointer to TX queue structure. 2327 * @param loc 2328 * Pointer to burst routine local context. 2329 * @param olx 2330 * Configured Tx offloads mask. It is fully defined at 2331 * compile time and may be used for optimization. 2332 * @param newp 2333 * The predefined flag whether do complete check for 2334 * multi-segment packets and TSO. 2335 * 2336 * @return 2337 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2338 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2339 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2340 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2341 */ 2342 static __rte_always_inline enum mlx5_txcmp_code 2343 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2344 struct mlx5_txq_local *__rte_restrict loc, 2345 unsigned int olx, 2346 bool newp) 2347 { 2348 /* Check for multi-segment packet. */ 2349 if (newp && 2350 MLX5_TXOFF_CONFIG(MULTI) && 2351 unlikely(NB_SEGS(loc->mbuf) > 1)) 2352 return MLX5_TXCMP_CODE_MULTI; 2353 /* Check for TSO packet. */ 2354 if (newp && 2355 MLX5_TXOFF_CONFIG(TSO) && 2356 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2357 return MLX5_TXCMP_CODE_TSO; 2358 /* Check if eMPW is enabled at all. */ 2359 if (!MLX5_TXOFF_CONFIG(EMPW)) 2360 return MLX5_TXCMP_CODE_SINGLE; 2361 /* Check if eMPW can be engaged. */ 2362 if (MLX5_TXOFF_CONFIG(VLAN) && 2363 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2364 (!MLX5_TXOFF_CONFIG(INLINE) || 2365 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2366 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2367 /* 2368 * eMPW does not support VLAN insertion offload, we have to 2369 * inline the entire packet but packet is too long for inlining. 2370 */ 2371 return MLX5_TXCMP_CODE_SINGLE; 2372 } 2373 return MLX5_TXCMP_CODE_EMPW; 2374 } 2375 2376 /** 2377 * Check the next packet attributes to match with the eMPW batch ones. 2378 * In addition, for legacy MPW the packet length is checked either. 2379 * 2380 * @param txq 2381 * Pointer to TX queue structure. 2382 * @param es 2383 * Pointer to Ethernet Segment of eMPW batch. 2384 * @param loc 2385 * Pointer to burst routine local context. 2386 * @param dlen 2387 * Length of previous packet in MPW descriptor. 2388 * @param olx 2389 * Configured Tx offloads mask. It is fully defined at 2390 * compile time and may be used for optimization. 2391 * 2392 * @return 2393 * true - packet match with eMPW batch attributes. 2394 * false - no match, eMPW should be restarted. 2395 */ 2396 static __rte_always_inline bool 2397 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2398 struct mlx5_wqe_eseg *__rte_restrict es, 2399 struct mlx5_txq_local *__rte_restrict loc, 2400 uint32_t dlen, 2401 unsigned int olx) 2402 { 2403 uint8_t swp_flags = 0; 2404 2405 /* Compare the checksum flags, if any. */ 2406 if (MLX5_TXOFF_CONFIG(CSUM) && 2407 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2408 return false; 2409 /* Compare the Software Parser offsets and flags. */ 2410 if (MLX5_TXOFF_CONFIG(SWP) && 2411 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2412 es->swp_flags != swp_flags)) 2413 return false; 2414 /* Fill metadata field if needed. */ 2415 if (MLX5_TXOFF_CONFIG(METADATA) && 2416 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2417 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2418 return false; 2419 /* Legacy MPW can send packets with the same length only. */ 2420 if (MLX5_TXOFF_CONFIG(MPW) && 2421 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2422 return false; 2423 /* There must be no VLAN packets in eMPW loop. */ 2424 if (MLX5_TXOFF_CONFIG(VLAN)) 2425 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2426 /* Check if the scheduling is requested. */ 2427 if (MLX5_TXOFF_CONFIG(TXPP) && 2428 loc->mbuf->ol_flags & txq->ts_mask) 2429 return false; 2430 return true; 2431 } 2432 2433 /** 2434 * Update send loop variables and WQE for eMPW loop without data inlining. 2435 * Number of Data Segments is equal to the number of sent packets. 2436 * 2437 * @param txq 2438 * Pointer to TX queue structure. 2439 * @param loc 2440 * Pointer to burst routine local context. 2441 * @param ds 2442 * Number of packets/Data Segments/Packets. 2443 * @param slen 2444 * Accumulated statistics, bytes sent. 2445 * @param olx 2446 * Configured Tx offloads mask. It is fully defined at 2447 * compile time and may be used for optimization. 2448 * 2449 * @return 2450 * true - packet match with eMPW batch attributes. 2451 * false - no match, eMPW should be restarted. 2452 */ 2453 static __rte_always_inline void 2454 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2455 struct mlx5_txq_local *__rte_restrict loc, 2456 unsigned int ds, 2457 unsigned int slen, 2458 unsigned int olx __rte_unused) 2459 { 2460 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2461 #ifdef MLX5_PMD_SOFT_COUNTERS 2462 /* Update sent data bytes counter. */ 2463 txq->stats.obytes += slen; 2464 #else 2465 (void)slen; 2466 #endif 2467 loc->elts_free -= ds; 2468 loc->pkts_sent += ds; 2469 ds += 2; 2470 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2471 txq->wqe_ci += (ds + 3) / 4; 2472 loc->wqe_free -= (ds + 3) / 4; 2473 } 2474 2475 /** 2476 * Update send loop variables and WQE for eMPW loop with data inlining. 2477 * Gets the size of pushed descriptors and data to the WQE. 2478 * 2479 * @param txq 2480 * Pointer to TX queue structure. 2481 * @param loc 2482 * Pointer to burst routine local context. 2483 * @param len 2484 * Total size of descriptor/data in bytes. 2485 * @param slen 2486 * Accumulated statistics, data bytes sent. 2487 * @param wqem 2488 * The base WQE for the eMPW/MPW descriptor. 2489 * @param olx 2490 * Configured Tx offloads mask. It is fully defined at 2491 * compile time and may be used for optimization. 2492 * 2493 * @return 2494 * true - packet match with eMPW batch attributes. 2495 * false - no match, eMPW should be restarted. 2496 */ 2497 static __rte_always_inline void 2498 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2499 struct mlx5_txq_local *__rte_restrict loc, 2500 unsigned int len, 2501 unsigned int slen, 2502 struct mlx5_wqe *__rte_restrict wqem, 2503 unsigned int olx __rte_unused) 2504 { 2505 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2506 2507 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2508 #ifdef MLX5_PMD_SOFT_COUNTERS 2509 /* Update sent data bytes counter. */ 2510 txq->stats.obytes += slen; 2511 #else 2512 (void)slen; 2513 #endif 2514 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2515 /* 2516 * If the legacy MPW session contains the inline packets 2517 * we should set the only inline data segment length 2518 * and align the total length to the segment size. 2519 */ 2520 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2521 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2522 MLX5_ETH_WQE_DATA_INLINE); 2523 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2524 } else { 2525 /* 2526 * The session is not legacy MPW or contains the 2527 * data buffer pointer segments. 2528 */ 2529 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2530 len = len / MLX5_WSEG_SIZE + 2; 2531 } 2532 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2533 txq->wqe_ci += (len + 3) / 4; 2534 loc->wqe_free -= (len + 3) / 4; 2535 loc->wqe_last = wqem; 2536 } 2537 2538 /** 2539 * The set of Tx burst functions for single-segment packets without TSO 2540 * and with Multi-Packet Writing feature support. 2541 * Supports all types of Tx offloads, except multi-packets and TSO. 2542 * 2543 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2544 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2545 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2546 * placed in WQE. 2547 * 2548 * Functions stop sending if it encounters the multi-segment packet or packet 2549 * with TSO requested. 2550 * 2551 * The routines are responsible for storing processed mbuf into elts ring buffer 2552 * and update elts_head if inlining offload is requested. Otherwise the copying 2553 * mbufs to elts can be postponed and completed at the end of burst routine. 2554 * 2555 * @param txq 2556 * Pointer to TX queue structure. 2557 * @param[in] pkts 2558 * Packets to transmit. 2559 * @param pkts_n 2560 * Number of packets in array. 2561 * @param loc 2562 * Pointer to burst routine local context. 2563 * @param olx 2564 * Configured Tx offloads mask. It is fully defined at 2565 * compile time and may be used for optimization. 2566 * 2567 * @return 2568 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2569 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2570 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2571 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2572 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2573 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2574 * 2575 * Local context variables updated. 2576 * 2577 * 2578 * The routine sends packets with MLX5_OPCODE_EMPW 2579 * without inlining, this is dedicated optimized branch. 2580 * No VLAN insertion is supported. 2581 */ 2582 static __rte_always_inline enum mlx5_txcmp_code 2583 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2584 struct rte_mbuf **__rte_restrict pkts, 2585 unsigned int pkts_n, 2586 struct mlx5_txq_local *__rte_restrict loc, 2587 unsigned int olx) 2588 { 2589 /* 2590 * Subroutine is the part of mlx5_tx_burst_single() and sends 2591 * single-segment packet with eMPW opcode without data inlining. 2592 */ 2593 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2594 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2595 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2596 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2597 pkts += loc->pkts_sent + 1; 2598 pkts_n -= loc->pkts_sent; 2599 for (;;) { 2600 struct mlx5_wqe_dseg *__rte_restrict dseg; 2601 struct mlx5_wqe_eseg *__rte_restrict eseg; 2602 enum mlx5_txcmp_code ret; 2603 unsigned int part, loop; 2604 unsigned int slen = 0; 2605 2606 next_empw: 2607 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2608 if (MLX5_TXOFF_CONFIG(TXPP)) { 2609 enum mlx5_txcmp_code wret; 2610 2611 /* Generate WAIT for scheduling if requested. */ 2612 wret = mlx5_tx_schedule_send(txq, loc, olx); 2613 if (wret == MLX5_TXCMP_CODE_EXIT) 2614 return MLX5_TXCMP_CODE_EXIT; 2615 if (wret == MLX5_TXCMP_CODE_ERROR) 2616 return MLX5_TXCMP_CODE_ERROR; 2617 } 2618 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2619 MLX5_MPW_MAX_PACKETS : 2620 MLX5_EMPW_MAX_PACKETS); 2621 if (unlikely(loc->elts_free < part)) { 2622 /* We have no enough elts to save all mbufs. */ 2623 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2624 return MLX5_TXCMP_CODE_EXIT; 2625 /* But we still able to send at least minimal eMPW. */ 2626 part = loc->elts_free; 2627 } 2628 /* Check whether we have enough WQEs */ 2629 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2630 if (unlikely(loc->wqe_free < 2631 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2632 return MLX5_TXCMP_CODE_EXIT; 2633 part = (loc->wqe_free * 4) - 2; 2634 } 2635 if (likely(part > 1)) 2636 rte_prefetch0(*pkts); 2637 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2638 /* 2639 * Build eMPW title WQEBB: 2640 * - Control Segment, eMPW opcode 2641 * - Ethernet Segment, no inline 2642 */ 2643 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2644 MLX5_OPCODE_ENHANCED_MPSW, olx); 2645 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2646 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2647 eseg = &loc->wqe_last->eseg; 2648 dseg = &loc->wqe_last->dseg[0]; 2649 loop = part; 2650 /* Store the packet length for legacy MPW. */ 2651 if (MLX5_TXOFF_CONFIG(MPW)) 2652 eseg->mss = rte_cpu_to_be_16 2653 (rte_pktmbuf_data_len(loc->mbuf)); 2654 for (;;) { 2655 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2656 #ifdef MLX5_PMD_SOFT_COUNTERS 2657 /* Update sent data bytes counter. */ 2658 slen += dlen; 2659 #endif 2660 mlx5_tx_dseg_ptr 2661 (txq, loc, dseg, 2662 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2663 dlen, olx); 2664 if (unlikely(--loop == 0)) 2665 break; 2666 loc->mbuf = *pkts++; 2667 if (likely(loop > 1)) 2668 rte_prefetch0(*pkts); 2669 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2670 /* 2671 * Unroll the completion code to avoid 2672 * returning variable value - it results in 2673 * unoptimized sequent checking in caller. 2674 */ 2675 if (ret == MLX5_TXCMP_CODE_MULTI) { 2676 part -= loop; 2677 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2678 if (unlikely(!loc->elts_free || 2679 !loc->wqe_free)) 2680 return MLX5_TXCMP_CODE_EXIT; 2681 return MLX5_TXCMP_CODE_MULTI; 2682 } 2683 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2684 if (ret == MLX5_TXCMP_CODE_TSO) { 2685 part -= loop; 2686 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2687 if (unlikely(!loc->elts_free || 2688 !loc->wqe_free)) 2689 return MLX5_TXCMP_CODE_EXIT; 2690 return MLX5_TXCMP_CODE_TSO; 2691 } 2692 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2693 part -= loop; 2694 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2695 if (unlikely(!loc->elts_free || 2696 !loc->wqe_free)) 2697 return MLX5_TXCMP_CODE_EXIT; 2698 return MLX5_TXCMP_CODE_SINGLE; 2699 } 2700 if (ret != MLX5_TXCMP_CODE_EMPW) { 2701 MLX5_ASSERT(false); 2702 part -= loop; 2703 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2704 return MLX5_TXCMP_CODE_ERROR; 2705 } 2706 /* 2707 * Check whether packet parameters coincide 2708 * within assumed eMPW batch: 2709 * - check sum settings 2710 * - metadata value 2711 * - software parser settings 2712 * - packets length (legacy MPW only) 2713 * - scheduling is not required 2714 */ 2715 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2716 MLX5_ASSERT(loop); 2717 part -= loop; 2718 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2719 if (unlikely(!loc->elts_free || 2720 !loc->wqe_free)) 2721 return MLX5_TXCMP_CODE_EXIT; 2722 pkts_n -= part; 2723 goto next_empw; 2724 } 2725 /* Packet attributes match, continue the same eMPW. */ 2726 ++dseg; 2727 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2728 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2729 } 2730 /* eMPW is built successfully, update loop parameters. */ 2731 MLX5_ASSERT(!loop); 2732 MLX5_ASSERT(pkts_n >= part); 2733 #ifdef MLX5_PMD_SOFT_COUNTERS 2734 /* Update sent data bytes counter. */ 2735 txq->stats.obytes += slen; 2736 #endif 2737 loc->elts_free -= part; 2738 loc->pkts_sent += part; 2739 txq->wqe_ci += (2 + part + 3) / 4; 2740 loc->wqe_free -= (2 + part + 3) / 4; 2741 pkts_n -= part; 2742 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2743 return MLX5_TXCMP_CODE_EXIT; 2744 loc->mbuf = *pkts++; 2745 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2746 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2747 return ret; 2748 /* Continue sending eMPW batches. */ 2749 } 2750 MLX5_ASSERT(false); 2751 } 2752 2753 /** 2754 * The routine sends packets with MLX5_OPCODE_EMPW 2755 * with inlining, optionally supports VLAN insertion. 2756 */ 2757 static __rte_always_inline enum mlx5_txcmp_code 2758 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2759 struct rte_mbuf **__rte_restrict pkts, 2760 unsigned int pkts_n, 2761 struct mlx5_txq_local *__rte_restrict loc, 2762 unsigned int olx) 2763 { 2764 /* 2765 * Subroutine is the part of mlx5_tx_burst_single() and sends 2766 * single-segment packet with eMPW opcode with data inlining. 2767 */ 2768 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2769 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2770 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2771 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2772 pkts += loc->pkts_sent + 1; 2773 pkts_n -= loc->pkts_sent; 2774 for (;;) { 2775 struct mlx5_wqe_dseg *__rte_restrict dseg; 2776 struct mlx5_wqe *__rte_restrict wqem; 2777 enum mlx5_txcmp_code ret; 2778 unsigned int room, part, nlim; 2779 unsigned int slen = 0; 2780 2781 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2782 if (MLX5_TXOFF_CONFIG(TXPP)) { 2783 enum mlx5_txcmp_code wret; 2784 2785 /* Generate WAIT for scheduling if requested. */ 2786 wret = mlx5_tx_schedule_send(txq, loc, olx); 2787 if (wret == MLX5_TXCMP_CODE_EXIT) 2788 return MLX5_TXCMP_CODE_EXIT; 2789 if (wret == MLX5_TXCMP_CODE_ERROR) 2790 return MLX5_TXCMP_CODE_ERROR; 2791 } 2792 /* 2793 * Limits the amount of packets in one WQE 2794 * to improve CQE latency generation. 2795 */ 2796 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2797 MLX5_MPW_INLINE_MAX_PACKETS : 2798 MLX5_EMPW_MAX_PACKETS); 2799 /* Check whether we have minimal amount WQEs */ 2800 if (unlikely(loc->wqe_free < 2801 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2802 return MLX5_TXCMP_CODE_EXIT; 2803 if (likely(pkts_n > 1)) 2804 rte_prefetch0(*pkts); 2805 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2806 /* 2807 * Build eMPW title WQEBB: 2808 * - Control Segment, eMPW opcode, zero DS 2809 * - Ethernet Segment, no inline 2810 */ 2811 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2812 MLX5_OPCODE_ENHANCED_MPSW, olx); 2813 mlx5_tx_eseg_none(txq, loc, wqem, 2814 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2815 dseg = &wqem->dseg[0]; 2816 /* Store the packet length for legacy MPW. */ 2817 if (MLX5_TXOFF_CONFIG(MPW)) 2818 wqem->eseg.mss = rte_cpu_to_be_16 2819 (rte_pktmbuf_data_len(loc->mbuf)); 2820 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2821 loc->wqe_free) * MLX5_WQE_SIZE - 2822 MLX5_WQE_CSEG_SIZE - 2823 MLX5_WQE_ESEG_SIZE; 2824 /* Limit the room for legacy MPW sessions for performance. */ 2825 if (MLX5_TXOFF_CONFIG(MPW)) 2826 room = RTE_MIN(room, 2827 RTE_MAX(txq->inlen_empw + 2828 sizeof(dseg->bcount) + 2829 (MLX5_TXOFF_CONFIG(VLAN) ? 2830 sizeof(struct rte_vlan_hdr) : 0), 2831 MLX5_MPW_INLINE_MAX_PACKETS * 2832 MLX5_WQE_DSEG_SIZE)); 2833 /* Build WQE till we have space, packets and resources. */ 2834 part = room; 2835 for (;;) { 2836 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2837 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2838 unsigned int tlen; 2839 2840 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2841 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2842 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2843 /* 2844 * Some Tx offloads may cause an error if packet is not 2845 * long enough, check against assumed minimal length. 2846 */ 2847 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2848 part -= room; 2849 if (unlikely(!part)) 2850 return MLX5_TXCMP_CODE_ERROR; 2851 /* 2852 * We have some successfully built 2853 * packet Data Segments to send. 2854 */ 2855 mlx5_tx_idone_empw(txq, loc, part, 2856 slen, wqem, olx); 2857 return MLX5_TXCMP_CODE_ERROR; 2858 } 2859 /* Inline or not inline - that's the Question. */ 2860 if (dlen > txq->inlen_empw || 2861 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2862 goto pointer_empw; 2863 if (MLX5_TXOFF_CONFIG(MPW)) { 2864 if (dlen > txq->inlen_send) 2865 goto pointer_empw; 2866 tlen = dlen; 2867 if (part == room) { 2868 /* Open new inline MPW session. */ 2869 tlen += sizeof(dseg->bcount); 2870 dseg->bcount = RTE_BE32(0); 2871 dseg = RTE_PTR_ADD 2872 (dseg, sizeof(dseg->bcount)); 2873 } else { 2874 /* 2875 * No pointer and inline descriptor 2876 * intermix for legacy MPW sessions. 2877 */ 2878 if (wqem->dseg[0].bcount) 2879 break; 2880 } 2881 } else { 2882 tlen = sizeof(dseg->bcount) + dlen; 2883 } 2884 /* Inline entire packet, optional VLAN insertion. */ 2885 if (MLX5_TXOFF_CONFIG(VLAN) && 2886 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2887 /* 2888 * The packet length must be checked in 2889 * mlx5_tx_able_to_empw() and packet 2890 * fits into inline length guaranteed. 2891 */ 2892 MLX5_ASSERT((dlen + 2893 sizeof(struct rte_vlan_hdr)) <= 2894 txq->inlen_empw); 2895 tlen += sizeof(struct rte_vlan_hdr); 2896 if (room < tlen) 2897 break; 2898 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2899 dptr, dlen, olx); 2900 #ifdef MLX5_PMD_SOFT_COUNTERS 2901 /* Update sent data bytes counter. */ 2902 slen += sizeof(struct rte_vlan_hdr); 2903 #endif 2904 } else { 2905 if (room < tlen) 2906 break; 2907 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2908 dptr, dlen, olx); 2909 } 2910 if (!MLX5_TXOFF_CONFIG(MPW)) 2911 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2912 MLX5_ASSERT(room >= tlen); 2913 room -= tlen; 2914 /* 2915 * Packet data are completely inline, 2916 * we can try to free the packet. 2917 */ 2918 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2919 /* 2920 * All the packets from the burst beginning 2921 * are inline, we can free mbufs directly 2922 * from the origin array on tx_burst exit(). 2923 */ 2924 loc->mbuf_free++; 2925 goto next_mbuf; 2926 } 2927 /* 2928 * In order no to call rte_pktmbuf_free_seg() here, 2929 * in the most inner loop (that might be very 2930 * expensive) we just save the mbuf in elts. 2931 */ 2932 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2933 loc->elts_free--; 2934 goto next_mbuf; 2935 pointer_empw: 2936 /* 2937 * No pointer and inline descriptor 2938 * intermix for legacy MPW sessions. 2939 */ 2940 if (MLX5_TXOFF_CONFIG(MPW) && 2941 part != room && 2942 wqem->dseg[0].bcount == RTE_BE32(0)) 2943 break; 2944 /* 2945 * Not inlinable VLAN packets are 2946 * proceeded outside of this routine. 2947 */ 2948 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2949 if (MLX5_TXOFF_CONFIG(VLAN)) 2950 MLX5_ASSERT(!(loc->mbuf->ol_flags & 2951 RTE_MBUF_F_TX_VLAN)); 2952 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2953 /* We have to store mbuf in elts.*/ 2954 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2955 loc->elts_free--; 2956 room -= MLX5_WQE_DSEG_SIZE; 2957 /* Ring buffer wraparound is checked at the loop end.*/ 2958 ++dseg; 2959 next_mbuf: 2960 #ifdef MLX5_PMD_SOFT_COUNTERS 2961 /* Update sent data bytes counter. */ 2962 slen += dlen; 2963 #endif 2964 loc->pkts_sent++; 2965 pkts_n--; 2966 if (unlikely(!pkts_n || !loc->elts_free)) { 2967 /* 2968 * We have no resources/packets to 2969 * continue build descriptors. 2970 */ 2971 part -= room; 2972 mlx5_tx_idone_empw(txq, loc, part, 2973 slen, wqem, olx); 2974 return MLX5_TXCMP_CODE_EXIT; 2975 } 2976 loc->mbuf = *pkts++; 2977 if (likely(pkts_n > 1)) 2978 rte_prefetch0(*pkts); 2979 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2980 /* 2981 * Unroll the completion code to avoid 2982 * returning variable value - it results in 2983 * unoptimized sequent checking in caller. 2984 */ 2985 if (ret == MLX5_TXCMP_CODE_MULTI) { 2986 part -= room; 2987 mlx5_tx_idone_empw(txq, loc, part, 2988 slen, wqem, olx); 2989 if (unlikely(!loc->elts_free || 2990 !loc->wqe_free)) 2991 return MLX5_TXCMP_CODE_EXIT; 2992 return MLX5_TXCMP_CODE_MULTI; 2993 } 2994 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2995 if (ret == MLX5_TXCMP_CODE_TSO) { 2996 part -= room; 2997 mlx5_tx_idone_empw(txq, loc, part, 2998 slen, wqem, olx); 2999 if (unlikely(!loc->elts_free || 3000 !loc->wqe_free)) 3001 return MLX5_TXCMP_CODE_EXIT; 3002 return MLX5_TXCMP_CODE_TSO; 3003 } 3004 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3005 part -= room; 3006 mlx5_tx_idone_empw(txq, loc, part, 3007 slen, wqem, olx); 3008 if (unlikely(!loc->elts_free || 3009 !loc->wqe_free)) 3010 return MLX5_TXCMP_CODE_EXIT; 3011 return MLX5_TXCMP_CODE_SINGLE; 3012 } 3013 if (ret != MLX5_TXCMP_CODE_EMPW) { 3014 MLX5_ASSERT(false); 3015 part -= room; 3016 mlx5_tx_idone_empw(txq, loc, part, 3017 slen, wqem, olx); 3018 return MLX5_TXCMP_CODE_ERROR; 3019 } 3020 /* Check if we have minimal room left. */ 3021 nlim--; 3022 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 3023 break; 3024 /* 3025 * Check whether packet parameters coincide 3026 * within assumed eMPW batch: 3027 * - check sum settings 3028 * - metadata value 3029 * - software parser settings 3030 * - packets length (legacy MPW only) 3031 * - scheduling is not required 3032 */ 3033 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 3034 loc, dlen, olx)) 3035 break; 3036 /* Packet attributes match, continue the same eMPW. */ 3037 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 3038 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 3039 } 3040 /* 3041 * We get here to close an existing eMPW 3042 * session and start the new one. 3043 */ 3044 MLX5_ASSERT(pkts_n); 3045 part -= room; 3046 if (unlikely(!part)) 3047 return MLX5_TXCMP_CODE_EXIT; 3048 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 3049 if (unlikely(!loc->elts_free || 3050 !loc->wqe_free)) 3051 return MLX5_TXCMP_CODE_EXIT; 3052 /* Continue the loop with new eMPW session. */ 3053 } 3054 MLX5_ASSERT(false); 3055 } 3056 3057 /** 3058 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3059 * Data inlining and VLAN insertion are supported. 3060 */ 3061 static __rte_always_inline enum mlx5_txcmp_code 3062 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3063 struct rte_mbuf **__rte_restrict pkts, 3064 unsigned int pkts_n, 3065 struct mlx5_txq_local *__rte_restrict loc, 3066 unsigned int olx) 3067 { 3068 /* 3069 * Subroutine is the part of mlx5_tx_burst_single() 3070 * and sends single-segment packet with SEND opcode. 3071 */ 3072 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3073 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3074 pkts += loc->pkts_sent + 1; 3075 pkts_n -= loc->pkts_sent; 3076 for (;;) { 3077 struct mlx5_wqe *__rte_restrict wqe; 3078 enum mlx5_txcmp_code ret; 3079 3080 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3081 if (MLX5_TXOFF_CONFIG(TXPP)) { 3082 enum mlx5_txcmp_code wret; 3083 3084 /* Generate WAIT for scheduling if requested. */ 3085 wret = mlx5_tx_schedule_send(txq, loc, olx); 3086 if (wret == MLX5_TXCMP_CODE_EXIT) 3087 return MLX5_TXCMP_CODE_EXIT; 3088 if (wret == MLX5_TXCMP_CODE_ERROR) 3089 return MLX5_TXCMP_CODE_ERROR; 3090 } 3091 if (MLX5_TXOFF_CONFIG(INLINE)) { 3092 unsigned int inlen, vlan = 0; 3093 3094 inlen = rte_pktmbuf_data_len(loc->mbuf); 3095 if (MLX5_TXOFF_CONFIG(VLAN) && 3096 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3097 vlan = sizeof(struct rte_vlan_hdr); 3098 inlen += vlan; 3099 } 3100 /* 3101 * If inlining is enabled at configuration time 3102 * the limit must be not less than minimal size. 3103 * Otherwise we would do extra check for data 3104 * size to avoid crashes due to length overflow. 3105 */ 3106 MLX5_ASSERT(txq->inlen_send >= 3107 MLX5_ESEG_MIN_INLINE_SIZE); 3108 if (inlen <= txq->inlen_send) { 3109 unsigned int seg_n, wqe_n; 3110 3111 rte_prefetch0(rte_pktmbuf_mtod 3112 (loc->mbuf, uint8_t *)); 3113 /* Check against minimal length. */ 3114 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3115 return MLX5_TXCMP_CODE_ERROR; 3116 if (loc->mbuf->ol_flags & 3117 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3118 /* 3119 * The hint flag not to inline packet 3120 * data is set. Check whether we can 3121 * follow the hint. 3122 */ 3123 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3124 txq->inlen_mode) || 3125 (MLX5_TXOFF_CONFIG(MPW) && 3126 txq->inlen_mode)) { 3127 if (inlen <= txq->inlen_send) 3128 goto single_inline; 3129 /* 3130 * The hardware requires the 3131 * minimal inline data header. 3132 */ 3133 goto single_min_inline; 3134 } 3135 if (MLX5_TXOFF_CONFIG(VLAN) && 3136 vlan && !txq->vlan_en) { 3137 /* 3138 * We must insert VLAN tag 3139 * by software means. 3140 */ 3141 goto single_part_inline; 3142 } 3143 goto single_no_inline; 3144 } 3145 single_inline: 3146 /* 3147 * Completely inlined packet data WQE: 3148 * - Control Segment, SEND opcode 3149 * - Ethernet Segment, no VLAN insertion 3150 * - Data inlined, VLAN optionally inserted 3151 * - Alignment to MLX5_WSEG_SIZE 3152 * Have to estimate amount of WQEBBs 3153 */ 3154 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3155 MLX5_ESEG_MIN_INLINE_SIZE + 3156 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3157 /* Check if there are enough WQEBBs. */ 3158 wqe_n = (seg_n + 3) / 4; 3159 if (wqe_n > loc->wqe_free) 3160 return MLX5_TXCMP_CODE_EXIT; 3161 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3162 loc->wqe_last = wqe; 3163 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3164 MLX5_OPCODE_SEND, olx); 3165 mlx5_tx_eseg_data(txq, loc, wqe, 3166 vlan, inlen, 0, olx); 3167 txq->wqe_ci += wqe_n; 3168 loc->wqe_free -= wqe_n; 3169 /* 3170 * Packet data are completely inlined, 3171 * free the packet immediately. 3172 */ 3173 rte_pktmbuf_free_seg(loc->mbuf); 3174 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3175 MLX5_TXOFF_CONFIG(MPW)) && 3176 txq->inlen_mode) { 3177 /* 3178 * If minimal inlining is requested the eMPW 3179 * feature should be disabled due to data is 3180 * inlined into Ethernet Segment, which can 3181 * not contain inlined data for eMPW due to 3182 * segment shared for all packets. 3183 */ 3184 struct mlx5_wqe_dseg *__rte_restrict dseg; 3185 unsigned int ds; 3186 uint8_t *dptr; 3187 3188 /* 3189 * The inline-mode settings require 3190 * to inline the specified amount of 3191 * data bytes to the Ethernet Segment. 3192 * We should check the free space in 3193 * WQE ring buffer to inline partially. 3194 */ 3195 single_min_inline: 3196 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3197 MLX5_ASSERT(inlen > txq->inlen_mode); 3198 MLX5_ASSERT(txq->inlen_mode >= 3199 MLX5_ESEG_MIN_INLINE_SIZE); 3200 /* 3201 * Check whether there are enough free WQEBBs: 3202 * - Control Segment 3203 * - Ethernet Segment 3204 * - First Segment of inlined Ethernet data 3205 * - ... data continued ... 3206 * - Finishing Data Segment of pointer type 3207 */ 3208 ds = (MLX5_WQE_CSEG_SIZE + 3209 MLX5_WQE_ESEG_SIZE + 3210 MLX5_WQE_DSEG_SIZE + 3211 txq->inlen_mode - 3212 MLX5_ESEG_MIN_INLINE_SIZE + 3213 MLX5_WQE_DSEG_SIZE + 3214 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3215 if (loc->wqe_free < ((ds + 3) / 4)) 3216 return MLX5_TXCMP_CODE_EXIT; 3217 /* 3218 * Build the ordinary SEND WQE: 3219 * - Control Segment 3220 * - Ethernet Segment, inline inlen_mode bytes 3221 * - Data Segment of pointer type 3222 */ 3223 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3224 loc->wqe_last = wqe; 3225 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3226 MLX5_OPCODE_SEND, olx); 3227 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3228 txq->inlen_mode, 3229 0, olx); 3230 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3231 txq->inlen_mode - vlan; 3232 inlen -= txq->inlen_mode; 3233 mlx5_tx_dseg_ptr(txq, loc, dseg, 3234 dptr, inlen, olx); 3235 /* 3236 * WQE is built, update the loop parameters 3237 * and got to the next packet. 3238 */ 3239 txq->wqe_ci += (ds + 3) / 4; 3240 loc->wqe_free -= (ds + 3) / 4; 3241 /* We have to store mbuf in elts.*/ 3242 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3243 txq->elts[txq->elts_head++ & txq->elts_m] = 3244 loc->mbuf; 3245 --loc->elts_free; 3246 } else { 3247 uint8_t *dptr; 3248 unsigned int dlen; 3249 3250 /* 3251 * Partially inlined packet data WQE, we have 3252 * some space in title WQEBB, we can fill it 3253 * with some packet data. It takes one WQEBB, 3254 * it is available, no extra space check: 3255 * - Control Segment, SEND opcode 3256 * - Ethernet Segment, no VLAN insertion 3257 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3258 * - Data Segment, pointer type 3259 * 3260 * We also get here if VLAN insertion is not 3261 * supported by HW, the inline is enabled. 3262 */ 3263 single_part_inline: 3264 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3265 loc->wqe_last = wqe; 3266 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3267 MLX5_OPCODE_SEND, olx); 3268 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3269 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3270 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3271 /* 3272 * The length check is performed above, by 3273 * comparing with txq->inlen_send. We should 3274 * not get overflow here. 3275 */ 3276 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3277 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3278 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3279 dptr, dlen, olx); 3280 ++txq->wqe_ci; 3281 --loc->wqe_free; 3282 /* We have to store mbuf in elts.*/ 3283 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3284 txq->elts[txq->elts_head++ & txq->elts_m] = 3285 loc->mbuf; 3286 --loc->elts_free; 3287 } 3288 #ifdef MLX5_PMD_SOFT_COUNTERS 3289 /* Update sent data bytes counter. */ 3290 txq->stats.obytes += vlan + 3291 rte_pktmbuf_data_len(loc->mbuf); 3292 #endif 3293 } else { 3294 /* 3295 * No inline at all, it means the CPU cycles saving 3296 * is prioritized at configuration, we should not 3297 * copy any packet data to WQE. 3298 * 3299 * SEND WQE, one WQEBB: 3300 * - Control Segment, SEND opcode 3301 * - Ethernet Segment, optional VLAN, no inline 3302 * - Data Segment, pointer type 3303 */ 3304 single_no_inline: 3305 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3306 loc->wqe_last = wqe; 3307 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3308 MLX5_OPCODE_SEND, olx); 3309 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3310 mlx5_tx_dseg_ptr 3311 (txq, loc, &wqe->dseg[0], 3312 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3313 rte_pktmbuf_data_len(loc->mbuf), olx); 3314 ++txq->wqe_ci; 3315 --loc->wqe_free; 3316 /* 3317 * We should not store mbuf pointer in elts 3318 * if no inlining is configured, this is done 3319 * by calling routine in a batch copy. 3320 */ 3321 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3322 --loc->elts_free; 3323 #ifdef MLX5_PMD_SOFT_COUNTERS 3324 /* Update sent data bytes counter. */ 3325 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3326 if (MLX5_TXOFF_CONFIG(VLAN) && 3327 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3328 txq->stats.obytes += 3329 sizeof(struct rte_vlan_hdr); 3330 #endif 3331 } 3332 ++loc->pkts_sent; 3333 --pkts_n; 3334 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3335 return MLX5_TXCMP_CODE_EXIT; 3336 loc->mbuf = *pkts++; 3337 if (pkts_n > 1) 3338 rte_prefetch0(*pkts); 3339 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3340 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3341 return ret; 3342 } 3343 MLX5_ASSERT(false); 3344 } 3345 3346 static __rte_always_inline enum mlx5_txcmp_code 3347 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3348 struct rte_mbuf **__rte_restrict pkts, 3349 unsigned int pkts_n, 3350 struct mlx5_txq_local *__rte_restrict loc, 3351 unsigned int olx) 3352 { 3353 enum mlx5_txcmp_code ret; 3354 3355 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3356 if (ret == MLX5_TXCMP_CODE_SINGLE) 3357 goto ordinary_send; 3358 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3359 for (;;) { 3360 /* Optimize for inline/no inline eMPW send. */ 3361 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3362 mlx5_tx_burst_empw_inline 3363 (txq, pkts, pkts_n, loc, olx) : 3364 mlx5_tx_burst_empw_simple 3365 (txq, pkts, pkts_n, loc, olx); 3366 if (ret != MLX5_TXCMP_CODE_SINGLE) 3367 return ret; 3368 /* The resources to send one packet should remain. */ 3369 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3370 ordinary_send: 3371 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3372 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3373 if (ret != MLX5_TXCMP_CODE_EMPW) 3374 return ret; 3375 /* The resources to send one packet should remain. */ 3376 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3377 } 3378 } 3379 3380 /** 3381 * DPDK Tx callback template. This is configured template used to generate 3382 * routines optimized for specified offload setup. 3383 * One of this generated functions is chosen at SQ configuration time. 3384 * 3385 * @param txq 3386 * Generic pointer to TX queue structure. 3387 * @param[in] pkts 3388 * Packets to transmit. 3389 * @param pkts_n 3390 * Number of packets in array. 3391 * @param olx 3392 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3393 * values. Should be static to take compile time static configuration 3394 * advantages. 3395 * 3396 * @return 3397 * Number of packets successfully transmitted (<= pkts_n). 3398 */ 3399 static __rte_always_inline uint16_t 3400 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3401 struct rte_mbuf **__rte_restrict pkts, 3402 uint16_t pkts_n, 3403 unsigned int olx) 3404 { 3405 struct mlx5_txq_local loc; 3406 enum mlx5_txcmp_code ret; 3407 unsigned int part; 3408 3409 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3410 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3411 if (unlikely(!pkts_n)) 3412 return 0; 3413 if (MLX5_TXOFF_CONFIG(INLINE)) 3414 loc.mbuf_free = 0; 3415 loc.pkts_sent = 0; 3416 loc.pkts_copy = 0; 3417 loc.wqe_last = NULL; 3418 3419 send_loop: 3420 loc.pkts_loop = loc.pkts_sent; 3421 /* 3422 * Check if there are some CQEs, if any: 3423 * - process an encountered errors 3424 * - process the completed WQEs 3425 * - free related mbufs 3426 * - doorbell the NIC about processed CQEs 3427 */ 3428 rte_prefetch0(*(pkts + loc.pkts_sent)); 3429 mlx5_tx_handle_completion(txq, olx); 3430 /* 3431 * Calculate the number of available resources - elts and WQEs. 3432 * There are two possible different scenarios: 3433 * - no data inlining into WQEs, one WQEBB may contains up to 3434 * four packets, in this case elts become scarce resource 3435 * - data inlining into WQEs, one packet may require multiple 3436 * WQEBBs, the WQEs become the limiting factor. 3437 */ 3438 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3439 loc.elts_free = txq->elts_s - 3440 (uint16_t)(txq->elts_head - txq->elts_tail); 3441 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3442 loc.wqe_free = txq->wqe_s - 3443 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3444 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3445 goto burst_exit; 3446 for (;;) { 3447 /* 3448 * Fetch the packet from array. Usually this is the first 3449 * packet in series of multi/single segment packets. 3450 */ 3451 loc.mbuf = *(pkts + loc.pkts_sent); 3452 /* Dedicated branch for multi-segment packets. */ 3453 if (MLX5_TXOFF_CONFIG(MULTI) && 3454 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3455 /* 3456 * Multi-segment packet encountered. 3457 * Hardware is able to process it only 3458 * with SEND/TSO opcodes, one packet 3459 * per WQE, do it in dedicated routine. 3460 */ 3461 enter_send_multi: 3462 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3463 part = loc.pkts_sent - loc.pkts_copy; 3464 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3465 /* 3466 * There are some single-segment mbufs not 3467 * stored in elts. The mbufs must be in the 3468 * same order as WQEs, so we must copy the 3469 * mbufs to elts here, before the coming 3470 * multi-segment packet mbufs is appended. 3471 */ 3472 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3473 part, olx); 3474 loc.pkts_copy = loc.pkts_sent; 3475 } 3476 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3477 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3478 if (!MLX5_TXOFF_CONFIG(INLINE)) 3479 loc.pkts_copy = loc.pkts_sent; 3480 /* 3481 * These returned code checks are supposed 3482 * to be optimized out due to routine inlining. 3483 */ 3484 if (ret == MLX5_TXCMP_CODE_EXIT) { 3485 /* 3486 * The routine returns this code when 3487 * all packets are sent or there is no 3488 * enough resources to complete request. 3489 */ 3490 break; 3491 } 3492 if (ret == MLX5_TXCMP_CODE_ERROR) { 3493 /* 3494 * The routine returns this code when some error 3495 * in the incoming packets format occurred. 3496 */ 3497 txq->stats.oerrors++; 3498 break; 3499 } 3500 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3501 /* 3502 * The single-segment packet was encountered 3503 * in the array, try to send it with the 3504 * best optimized way, possible engaging eMPW. 3505 */ 3506 goto enter_send_single; 3507 } 3508 if (MLX5_TXOFF_CONFIG(TSO) && 3509 ret == MLX5_TXCMP_CODE_TSO) { 3510 /* 3511 * The single-segment TSO packet was 3512 * encountered in the array. 3513 */ 3514 goto enter_send_tso; 3515 } 3516 /* We must not get here. Something is going wrong. */ 3517 MLX5_ASSERT(false); 3518 txq->stats.oerrors++; 3519 break; 3520 } 3521 /* Dedicated branch for single-segment TSO packets. */ 3522 if (MLX5_TXOFF_CONFIG(TSO) && 3523 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3524 /* 3525 * TSO might require special way for inlining 3526 * (dedicated parameters) and is sent with 3527 * MLX5_OPCODE_TSO opcode only, provide this 3528 * in dedicated branch. 3529 */ 3530 enter_send_tso: 3531 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3532 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3533 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3534 /* 3535 * These returned code checks are supposed 3536 * to be optimized out due to routine inlining. 3537 */ 3538 if (ret == MLX5_TXCMP_CODE_EXIT) 3539 break; 3540 if (ret == MLX5_TXCMP_CODE_ERROR) { 3541 txq->stats.oerrors++; 3542 break; 3543 } 3544 if (ret == MLX5_TXCMP_CODE_SINGLE) 3545 goto enter_send_single; 3546 if (MLX5_TXOFF_CONFIG(MULTI) && 3547 ret == MLX5_TXCMP_CODE_MULTI) { 3548 /* 3549 * The multi-segment packet was 3550 * encountered in the array. 3551 */ 3552 goto enter_send_multi; 3553 } 3554 /* We must not get here. Something is going wrong. */ 3555 MLX5_ASSERT(false); 3556 txq->stats.oerrors++; 3557 break; 3558 } 3559 /* 3560 * The dedicated branch for the single-segment packets 3561 * without TSO. Often these ones can be sent using 3562 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3563 * The routine builds the WQEs till it encounters 3564 * the TSO or multi-segment packet (in case if these 3565 * offloads are requested at SQ configuration time). 3566 */ 3567 enter_send_single: 3568 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3569 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3570 /* 3571 * These returned code checks are supposed 3572 * to be optimized out due to routine inlining. 3573 */ 3574 if (ret == MLX5_TXCMP_CODE_EXIT) 3575 break; 3576 if (ret == MLX5_TXCMP_CODE_ERROR) { 3577 txq->stats.oerrors++; 3578 break; 3579 } 3580 if (MLX5_TXOFF_CONFIG(MULTI) && 3581 ret == MLX5_TXCMP_CODE_MULTI) { 3582 /* 3583 * The multi-segment packet was 3584 * encountered in the array. 3585 */ 3586 goto enter_send_multi; 3587 } 3588 if (MLX5_TXOFF_CONFIG(TSO) && 3589 ret == MLX5_TXCMP_CODE_TSO) { 3590 /* 3591 * The single-segment TSO packet was 3592 * encountered in the array. 3593 */ 3594 goto enter_send_tso; 3595 } 3596 /* We must not get here. Something is going wrong. */ 3597 MLX5_ASSERT(false); 3598 txq->stats.oerrors++; 3599 break; 3600 } 3601 /* 3602 * Main Tx loop is completed, do the rest: 3603 * - set completion request if thresholds are reached 3604 * - doorbell the hardware 3605 * - copy the rest of mbufs to elts (if any) 3606 */ 3607 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3608 loc.pkts_sent >= loc.pkts_copy); 3609 /* Take a shortcut if nothing is sent. */ 3610 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3611 goto burst_exit; 3612 /* Request CQE generation if limits are reached. */ 3613 mlx5_tx_request_completion(txq, &loc, olx); 3614 /* 3615 * Ring QP doorbell immediately after WQE building completion 3616 * to improve latencies. The pure software related data treatment 3617 * can be completed after doorbell. Tx CQEs for this SQ are 3618 * processed in this thread only by the polling. 3619 * 3620 * The rdma core library can map doorbell register in two ways, 3621 * depending on the environment variable "MLX5_SHUT_UP_BF": 3622 * 3623 * - as regular cached memory, the variable is either missing or 3624 * set to zero. This type of mapping may cause the significant 3625 * doorbell register writing latency and requires explicit memory 3626 * write barrier to mitigate this issue and prevent write combining. 3627 * 3628 * - as non-cached memory, the variable is present and set to not "0" 3629 * value. This type of mapping may cause performance impact under 3630 * heavy loading conditions but the explicit write memory barrier is 3631 * not required and it may improve core performance. 3632 * 3633 * - the legacy behaviour (prior 19.08 release) was to use some 3634 * heuristics to decide whether write memory barrier should 3635 * be performed. This behavior is supported with specifying 3636 * tx_db_nc=2, write barrier is skipped if application provides 3637 * the full recommended burst of packets, it supposes the next 3638 * packets are coming and the write barrier will be issued on 3639 * the next burst (after descriptor writing, at least). 3640 */ 3641 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3642 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3643 txq->qp_db, !txq->db_nc && 3644 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3645 /* Not all of the mbufs may be stored into elts yet. */ 3646 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3647 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3648 /* 3649 * There are some single-segment mbufs not stored in elts. 3650 * It can be only if the last packet was single-segment. 3651 * The copying is gathered into one place due to it is 3652 * a good opportunity to optimize that with SIMD. 3653 * Unfortunately if inlining is enabled the gaps in pointer 3654 * array may happen due to early freeing of the inlined mbufs. 3655 */ 3656 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3657 loc.pkts_copy = loc.pkts_sent; 3658 } 3659 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3660 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3661 if (pkts_n > loc.pkts_sent) { 3662 /* 3663 * If burst size is large there might be no enough CQE 3664 * fetched from completion queue and no enough resources 3665 * freed to send all the packets. 3666 */ 3667 goto send_loop; 3668 } 3669 burst_exit: 3670 #ifdef MLX5_PMD_SOFT_COUNTERS 3671 /* Increment sent packets counter. */ 3672 txq->stats.opackets += loc.pkts_sent; 3673 #endif 3674 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3675 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3676 return loc.pkts_sent; 3677 } 3678 3679 #endif /* RTE_PMD_MLX5_TX_H_ */ 3680