1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_TX_H_ 7 #define RTE_PMD_MLX5_TX_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 12 #include <rte_mbuf.h> 13 #include <rte_mempool.h> 14 #include <rte_common.h> 15 #include <rte_spinlock.h> 16 17 #include <mlx5_common.h> 18 #include <mlx5_common_mr.h> 19 20 #include "mlx5.h" 21 #include "mlx5_autoconf.h" 22 23 /* TX burst subroutines return codes. */ 24 enum mlx5_txcmp_code { 25 MLX5_TXCMP_CODE_EXIT = 0, 26 MLX5_TXCMP_CODE_ERROR, 27 MLX5_TXCMP_CODE_SINGLE, 28 MLX5_TXCMP_CODE_MULTI, 29 MLX5_TXCMP_CODE_TSO, 30 MLX5_TXCMP_CODE_EMPW, 31 }; 32 33 /* 34 * These defines are used to configure Tx burst routine option set supported 35 * at compile time. The not specified options are optimized out due to if 36 * conditions can be explicitly calculated at compile time. 37 * The offloads with bigger runtime check (require more CPU cycles toskip) 38 * overhead should have the bigger index - this is needed to select the better 39 * matching routine function if no exact match and some offloads are not 40 * actually requested. 41 */ 42 #define MLX5_TXOFF_CONFIG_MULTI (1u << 0) /* Multi-segment packets.*/ 43 #define MLX5_TXOFF_CONFIG_TSO (1u << 1) /* TCP send offload supported.*/ 44 #define MLX5_TXOFF_CONFIG_SWP (1u << 2) /* Tunnels/SW Parser offloads.*/ 45 #define MLX5_TXOFF_CONFIG_CSUM (1u << 3) /* Check Sums offloaded. */ 46 #define MLX5_TXOFF_CONFIG_INLINE (1u << 4) /* Data inlining supported. */ 47 #define MLX5_TXOFF_CONFIG_VLAN (1u << 5) /* VLAN insertion supported.*/ 48 #define MLX5_TXOFF_CONFIG_METADATA (1u << 6) /* Flow metadata. */ 49 #define MLX5_TXOFF_CONFIG_EMPW (1u << 8) /* Enhanced MPW supported.*/ 50 #define MLX5_TXOFF_CONFIG_MPW (1u << 9) /* Legacy MPW supported.*/ 51 #define MLX5_TXOFF_CONFIG_TXPP (1u << 10) /* Scheduling on timestamp.*/ 52 53 /* The most common offloads groups. */ 54 #define MLX5_TXOFF_CONFIG_NONE 0 55 #define MLX5_TXOFF_CONFIG_FULL (MLX5_TXOFF_CONFIG_MULTI | \ 56 MLX5_TXOFF_CONFIG_TSO | \ 57 MLX5_TXOFF_CONFIG_SWP | \ 58 MLX5_TXOFF_CONFIG_CSUM | \ 59 MLX5_TXOFF_CONFIG_INLINE | \ 60 MLX5_TXOFF_CONFIG_VLAN | \ 61 MLX5_TXOFF_CONFIG_METADATA) 62 63 #define MLX5_TXOFF_CONFIG(mask) (olx & MLX5_TXOFF_CONFIG_##mask) 64 65 #define MLX5_TXOFF_PRE_DECL(func) \ 66 uint16_t mlx5_tx_burst_##func(void *txq, \ 67 struct rte_mbuf **pkts, \ 68 uint16_t pkts_n) 69 70 #define MLX5_TXOFF_DECL(func, olx) \ 71 uint16_t mlx5_tx_burst_##func(void *txq, \ 72 struct rte_mbuf **pkts, \ 73 uint16_t pkts_n) \ 74 { \ 75 return mlx5_tx_burst_tmpl((struct mlx5_txq_data *)txq, \ 76 pkts, pkts_n, (olx)); \ 77 } 78 79 /* Mbuf dynamic flag offset for inline. */ 80 extern uint64_t rte_net_mlx5_dynf_inline_mask; 81 #define RTE_MBUF_F_TX_DYNF_NOINLINE rte_net_mlx5_dynf_inline_mask 82 83 extern uint32_t mlx5_ptype_table[] __rte_cache_aligned; 84 extern uint8_t mlx5_cksum_table[1 << 10] __rte_cache_aligned; 85 extern uint8_t mlx5_swp_types_table[1 << 10] __rte_cache_aligned; 86 87 struct mlx5_txq_stats { 88 #ifdef MLX5_PMD_SOFT_COUNTERS 89 uint64_t opackets; /**< Total of successfully sent packets. */ 90 uint64_t obytes; /**< Total of successfully sent bytes. */ 91 #endif 92 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 93 }; 94 95 /* TX queue send local data. */ 96 __extension__ 97 struct mlx5_txq_local { 98 struct mlx5_wqe *wqe_last; /* last sent WQE pointer. */ 99 struct rte_mbuf *mbuf; /* first mbuf to process. */ 100 uint16_t pkts_copy; /* packets copied to elts. */ 101 uint16_t pkts_sent; /* packets sent. */ 102 uint16_t pkts_loop; /* packets sent on loop entry. */ 103 uint16_t elts_free; /* available elts remain. */ 104 uint16_t wqe_free; /* available wqe remain. */ 105 uint16_t mbuf_off; /* data offset in current mbuf. */ 106 uint16_t mbuf_nseg; /* number of remaining mbuf. */ 107 uint16_t mbuf_free; /* number of inline mbufs to free. */ 108 }; 109 110 /* TX queue descriptor. */ 111 __extension__ 112 struct mlx5_txq_data { 113 uint16_t elts_head; /* Current counter in (*elts)[]. */ 114 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 115 uint16_t elts_comp; /* elts index since last completion request. */ 116 uint16_t elts_s; /* Number of mbuf elements. */ 117 uint16_t elts_m; /* Mask for mbuf elements indices. */ 118 /* Fields related to elts mbuf storage. */ 119 uint16_t wqe_ci; /* Consumer index for work queue. */ 120 uint16_t wqe_pi; /* Producer index for work queue. */ 121 uint16_t wqe_s; /* Number of WQ elements. */ 122 uint16_t wqe_m; /* Mask Number for WQ elements. */ 123 uint16_t wqe_comp; /* WQE index since last completion request. */ 124 uint16_t wqe_thres; /* WQE threshold to request completion in CQ. */ 125 /* WQ related fields. */ 126 uint16_t cq_ci; /* Consumer index for completion queue. */ 127 uint16_t cq_pi; /* Production index for completion queue. */ 128 uint16_t cqe_s; /* Number of CQ elements. */ 129 uint16_t cqe_m; /* Mask for CQ indices. */ 130 /* CQ related fields. */ 131 uint16_t elts_n:4; /* elts[] length (in log2). */ 132 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 133 uint16_t wqe_n:4; /* Number of WQ elements (in log2). */ 134 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 135 uint16_t tunnel_en:1; 136 /* When set TX offload for tunneled packets are supported. */ 137 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 138 uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */ 139 uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */ 140 uint16_t db_heu:1; /* Doorbell heuristic write barrier. */ 141 uint16_t rt_timestamp:1; /* Realtime timestamp format. */ 142 uint16_t wait_on_time:1; /* WQE with timestamp is supported. */ 143 uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */ 144 uint16_t inlen_send; /* Ordinary send data inline size. */ 145 uint16_t inlen_empw; /* eMPW max packet size to inline. */ 146 uint16_t inlen_mode; /* Minimal data length to inline. */ 147 uint32_t qp_num_8s; /* QP number shifted by 8. */ 148 uint64_t offloads; /* Offloads for Tx Queue. */ 149 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 150 struct mlx5_wqe *wqes; /* Work queue. */ 151 struct mlx5_wqe *wqes_end; /* Work queue array limit. */ 152 #ifdef RTE_LIBRTE_MLX5_DEBUG 153 uint32_t *fcqs; /* Free completion queue (debug extended). */ 154 #else 155 uint16_t *fcqs; /* Free completion queue. */ 156 #endif 157 volatile struct mlx5_cqe *cqes; /* Completion queue. */ 158 volatile uint32_t *qp_db; /* Work queue doorbell. */ 159 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 160 uint16_t port_id; /* Port ID of device. */ 161 uint16_t idx; /* Queue index. */ 162 uint64_t rt_timemask; /* Scheduling timestamp mask. */ 163 uint64_t ts_mask; /* Timestamp flag dynamic mask. */ 164 int32_t ts_offset; /* Timestamp field dynamic offset. */ 165 struct mlx5_dev_ctx_shared *sh; /* Shared context. */ 166 struct mlx5_txq_stats stats; /* TX queue counters. */ 167 struct mlx5_uar_data uar_data; 168 struct rte_mbuf *elts[0]; 169 /* Storage for queued packets, must be the last field. */ 170 } __rte_cache_aligned; 171 172 enum mlx5_txq_type { 173 MLX5_TXQ_TYPE_STANDARD, /* Standard Tx queue. */ 174 MLX5_TXQ_TYPE_HAIRPIN, /* Hairpin Tx queue. */ 175 }; 176 177 /* TX queue control descriptor. */ 178 struct mlx5_txq_ctrl { 179 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 180 uint32_t refcnt; /* Reference counter. */ 181 unsigned int socket; /* CPU socket ID for allocations. */ 182 enum mlx5_txq_type type; /* The txq ctrl type. */ 183 unsigned int max_inline_data; /* Max inline data. */ 184 unsigned int max_tso_header; /* Max TSO header size. */ 185 struct mlx5_txq_obj *obj; /* Verbs/DevX queue object. */ 186 struct mlx5_priv *priv; /* Back pointer to private data. */ 187 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 188 uint16_t dump_file_n; /* Number of dump files. */ 189 struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */ 190 uint32_t hairpin_status; /* Hairpin binding status. */ 191 struct mlx5_txq_data txq; /* Data path structure. */ 192 /* Must be the last field in the structure, contains elts[]. */ 193 }; 194 195 /* mlx5_txq.c */ 196 197 int mlx5_tx_queue_start(struct rte_eth_dev *dev, uint16_t queue_id); 198 int mlx5_tx_queue_stop(struct rte_eth_dev *dev, uint16_t queue_id); 199 int mlx5_tx_queue_start_primary(struct rte_eth_dev *dev, uint16_t queue_id); 200 int mlx5_tx_queue_stop_primary(struct rte_eth_dev *dev, uint16_t queue_id); 201 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 202 unsigned int socket, const struct rte_eth_txconf *conf); 203 int mlx5_tx_hairpin_queue_setup 204 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 205 const struct rte_eth_hairpin_conf *hairpin_conf); 206 void mlx5_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid); 207 int mlx5_tx_uar_init_secondary(struct rte_eth_dev *dev, int fd); 208 void mlx5_tx_uar_uninit_secondary(struct rte_eth_dev *dev); 209 int mlx5_txq_obj_verify(struct rte_eth_dev *dev); 210 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 211 uint16_t desc, unsigned int socket, 212 const struct rte_eth_txconf *conf); 213 struct mlx5_txq_ctrl *mlx5_txq_hairpin_new 214 (struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 215 const struct rte_eth_hairpin_conf *hairpin_conf); 216 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 217 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 218 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 219 int mlx5_txq_verify(struct rte_eth_dev *dev); 220 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 221 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl); 222 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 223 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev); 224 225 /* mlx5_tx.c */ 226 227 void mlx5_tx_handle_completion(struct mlx5_txq_data *__rte_restrict txq, 228 unsigned int olx __rte_unused); 229 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 230 void mlx5_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, 231 struct rte_eth_txq_info *qinfo); 232 int mlx5_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t tx_queue_id, 233 struct rte_eth_burst_mode *mode); 234 235 /* mlx5_tx_empw.c */ 236 237 MLX5_TXOFF_PRE_DECL(full_empw); 238 MLX5_TXOFF_PRE_DECL(none_empw); 239 MLX5_TXOFF_PRE_DECL(md_empw); 240 MLX5_TXOFF_PRE_DECL(mt_empw); 241 MLX5_TXOFF_PRE_DECL(mtsc_empw); 242 MLX5_TXOFF_PRE_DECL(mti_empw); 243 MLX5_TXOFF_PRE_DECL(mtv_empw); 244 MLX5_TXOFF_PRE_DECL(mtiv_empw); 245 MLX5_TXOFF_PRE_DECL(sc_empw); 246 MLX5_TXOFF_PRE_DECL(sci_empw); 247 MLX5_TXOFF_PRE_DECL(scv_empw); 248 MLX5_TXOFF_PRE_DECL(sciv_empw); 249 MLX5_TXOFF_PRE_DECL(i_empw); 250 MLX5_TXOFF_PRE_DECL(v_empw); 251 MLX5_TXOFF_PRE_DECL(iv_empw); 252 253 /* mlx5_tx_nompw.c */ 254 255 MLX5_TXOFF_PRE_DECL(full); 256 MLX5_TXOFF_PRE_DECL(none); 257 MLX5_TXOFF_PRE_DECL(md); 258 MLX5_TXOFF_PRE_DECL(mt); 259 MLX5_TXOFF_PRE_DECL(mtsc); 260 MLX5_TXOFF_PRE_DECL(mti); 261 MLX5_TXOFF_PRE_DECL(mtv); 262 MLX5_TXOFF_PRE_DECL(mtiv); 263 MLX5_TXOFF_PRE_DECL(sc); 264 MLX5_TXOFF_PRE_DECL(sci); 265 MLX5_TXOFF_PRE_DECL(scv); 266 MLX5_TXOFF_PRE_DECL(sciv); 267 MLX5_TXOFF_PRE_DECL(i); 268 MLX5_TXOFF_PRE_DECL(v); 269 MLX5_TXOFF_PRE_DECL(iv); 270 271 /* mlx5_tx_txpp.c */ 272 273 MLX5_TXOFF_PRE_DECL(full_ts_nompw); 274 MLX5_TXOFF_PRE_DECL(full_ts_nompwi); 275 MLX5_TXOFF_PRE_DECL(full_ts); 276 MLX5_TXOFF_PRE_DECL(full_ts_noi); 277 MLX5_TXOFF_PRE_DECL(none_ts); 278 MLX5_TXOFF_PRE_DECL(mdi_ts); 279 MLX5_TXOFF_PRE_DECL(mti_ts); 280 MLX5_TXOFF_PRE_DECL(mtiv_ts); 281 282 /* mlx5_tx_mpw.c */ 283 284 MLX5_TXOFF_PRE_DECL(none_mpw); 285 MLX5_TXOFF_PRE_DECL(mci_mpw); 286 MLX5_TXOFF_PRE_DECL(mc_mpw); 287 MLX5_TXOFF_PRE_DECL(i_mpw); 288 289 static __rte_always_inline struct mlx5_uar_data * 290 mlx5_tx_bfreg(struct mlx5_txq_data *txq) 291 { 292 return &MLX5_PROC_PRIV(txq->port_id)->uar_table[txq->idx]; 293 } 294 295 /** 296 * Ring TX queue doorbell and flush the update by write memory barrier. 297 * 298 * @param txq 299 * Pointer to TX queue structure. 300 * @param wqe 301 * Pointer to the last WQE posted in the NIC. 302 */ 303 static __rte_always_inline void 304 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 305 { 306 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), *(volatile uint64_t *)wqe, 307 txq->wqe_ci, txq->qp_db, 1); 308 } 309 310 /** 311 * Convert timestamp from mbuf format to linear counter 312 * of Clock Queue completions (24 bits). 313 * 314 * @param sh 315 * Pointer to the device shared context to fetch Tx 316 * packet pacing timestamp and parameters. 317 * @param ts 318 * Timestamp from mbuf to convert. 319 * @return 320 * positive or zero value - completion ID to wait. 321 * negative value - conversion error. 322 */ 323 static __rte_always_inline int32_t 324 mlx5_txpp_convert_tx_ts(struct mlx5_dev_ctx_shared *sh, uint64_t mts) 325 { 326 uint64_t ts, ci; 327 uint32_t tick; 328 329 do { 330 /* 331 * Read atomically two uint64_t fields and compare lsb bits. 332 * It there is no match - the timestamp was updated in 333 * the service thread, data should be re-read. 334 */ 335 rte_compiler_barrier(); 336 ci = __atomic_load_n(&sh->txpp.ts.ci_ts, __ATOMIC_RELAXED); 337 ts = __atomic_load_n(&sh->txpp.ts.ts, __ATOMIC_RELAXED); 338 rte_compiler_barrier(); 339 if (!((ts ^ ci) << (64 - MLX5_CQ_INDEX_WIDTH))) 340 break; 341 } while (true); 342 /* Perform the skew correction, positive value to send earlier. */ 343 mts -= sh->txpp.skew; 344 mts -= ts; 345 if (unlikely(mts >= UINT64_MAX / 2)) { 346 /* We have negative integer, mts is in the past. */ 347 __atomic_fetch_add(&sh->txpp.err_ts_past, 348 1, __ATOMIC_RELAXED); 349 return -1; 350 } 351 tick = sh->txpp.tick; 352 MLX5_ASSERT(tick); 353 /* Convert delta to completions, round up. */ 354 mts = (mts + tick - 1) / tick; 355 if (unlikely(mts >= (1 << MLX5_CQ_INDEX_WIDTH) / 2 - 1)) { 356 /* We have mts is too distant future. */ 357 __atomic_fetch_add(&sh->txpp.err_ts_future, 358 1, __ATOMIC_RELAXED); 359 return -1; 360 } 361 mts <<= 64 - MLX5_CQ_INDEX_WIDTH; 362 ci += mts; 363 ci >>= 64 - MLX5_CQ_INDEX_WIDTH; 364 return ci; 365 } 366 367 /** 368 * Set Software Parser flags and offsets in Ethernet Segment of WQE. 369 * Flags must be preliminary initialized to zero. 370 * 371 * @param loc 372 * Pointer to burst routine local context. 373 * @param swp_flags 374 * Pointer to store Software Parser flags. 375 * @param olx 376 * Configured Tx offloads mask. It is fully defined at 377 * compile time and may be used for optimization. 378 * 379 * @return 380 * Software Parser offsets packed in dword. 381 * Software Parser flags are set by pointer. 382 */ 383 static __rte_always_inline uint32_t 384 txq_mbuf_to_swp(struct mlx5_txq_local *__rte_restrict loc, 385 uint8_t *swp_flags, 386 unsigned int olx) 387 { 388 uint64_t ol, tunnel; 389 unsigned int idx, off; 390 uint32_t set; 391 392 if (!MLX5_TXOFF_CONFIG(SWP)) 393 return 0; 394 ol = loc->mbuf->ol_flags; 395 tunnel = ol & RTE_MBUF_F_TX_TUNNEL_MASK; 396 /* 397 * Check whether Software Parser is required. 398 * Only customized tunnels may ask for. 399 */ 400 if (likely(tunnel != RTE_MBUF_F_TX_TUNNEL_UDP && tunnel != RTE_MBUF_F_TX_TUNNEL_IP)) 401 return 0; 402 /* 403 * The index should have: 404 * bit[0:1] = RTE_MBUF_F_TX_L4_MASK 405 * bit[4] = RTE_MBUF_F_TX_IPV6 406 * bit[8] = RTE_MBUF_F_TX_OUTER_IPV6 407 * bit[9] = RTE_MBUF_F_TX_OUTER_UDP 408 */ 409 idx = (ol & (RTE_MBUF_F_TX_L4_MASK | RTE_MBUF_F_TX_IPV6 | RTE_MBUF_F_TX_OUTER_IPV6)) >> 52; 410 idx |= (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) ? (1 << 9) : 0; 411 *swp_flags = mlx5_swp_types_table[idx]; 412 /* 413 * Set offsets for SW parser. Since ConnectX-5, SW parser just 414 * complements HW parser. SW parser starts to engage only if HW parser 415 * can't reach a header. For the older devices, HW parser will not kick 416 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 417 * should be set regardless of HW offload. 418 */ 419 off = loc->mbuf->outer_l2_len; 420 if (MLX5_TXOFF_CONFIG(VLAN) && ol & RTE_MBUF_F_TX_VLAN) 421 off += sizeof(struct rte_vlan_hdr); 422 set = (off >> 1) << 8; /* Outer L3 offset. */ 423 off += loc->mbuf->outer_l3_len; 424 if (tunnel == RTE_MBUF_F_TX_TUNNEL_UDP) 425 set |= off >> 1; /* Outer L4 offset. */ 426 if (ol & (RTE_MBUF_F_TX_IPV4 | RTE_MBUF_F_TX_IPV6)) { /* Inner IP. */ 427 const uint64_t csum = ol & RTE_MBUF_F_TX_L4_MASK; 428 off += loc->mbuf->l2_len; 429 set |= (off >> 1) << 24; /* Inner L3 offset. */ 430 if (csum == RTE_MBUF_F_TX_TCP_CKSUM || 431 csum == RTE_MBUF_F_TX_UDP_CKSUM || 432 (MLX5_TXOFF_CONFIG(TSO) && ol & RTE_MBUF_F_TX_TCP_SEG)) { 433 off += loc->mbuf->l3_len; 434 set |= (off >> 1) << 16; /* Inner L4 offset. */ 435 } 436 } 437 set = rte_cpu_to_le_32(set); 438 return set; 439 } 440 441 /** 442 * Convert the Checksum offloads to Verbs. 443 * 444 * @param buf 445 * Pointer to the mbuf. 446 * 447 * @return 448 * Converted checksum flags. 449 */ 450 static __rte_always_inline uint8_t 451 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 452 { 453 uint32_t idx; 454 uint8_t is_tunnel = !!(buf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK); 455 const uint64_t ol_flags_mask = RTE_MBUF_F_TX_TCP_SEG | RTE_MBUF_F_TX_L4_MASK | 456 RTE_MBUF_F_TX_IP_CKSUM | RTE_MBUF_F_TX_OUTER_IP_CKSUM; 457 458 /* 459 * The index should have: 460 * bit[0] = RTE_MBUF_F_TX_TCP_SEG 461 * bit[2:3] = RTE_MBUF_F_TX_UDP_CKSUM, RTE_MBUF_F_TX_TCP_CKSUM 462 * bit[4] = RTE_MBUF_F_TX_IP_CKSUM 463 * bit[8] = RTE_MBUF_F_TX_OUTER_IP_CKSUM 464 * bit[9] = tunnel 465 */ 466 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 467 return mlx5_cksum_table[idx]; 468 } 469 470 /** 471 * Free the mbufs from the linear array of pointers. 472 * 473 * @param txq 474 * Pointer to Tx queue structure. 475 * @param pkts 476 * Pointer to array of packets to be free. 477 * @param pkts_n 478 * Number of packets to be freed. 479 * @param olx 480 * Configured Tx offloads mask. It is fully defined at 481 * compile time and may be used for optimization. 482 */ 483 static __rte_always_inline void 484 mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 485 struct rte_mbuf **__rte_restrict pkts, 486 unsigned int pkts_n, 487 unsigned int olx __rte_unused) 488 { 489 struct rte_mempool *pool = NULL; 490 struct rte_mbuf **p_free = NULL; 491 struct rte_mbuf *mbuf; 492 unsigned int n_free = 0; 493 494 /* 495 * The implemented algorithm eliminates 496 * copying pointers to temporary array 497 * for rte_mempool_put_bulk() calls. 498 */ 499 MLX5_ASSERT(pkts); 500 MLX5_ASSERT(pkts_n); 501 /* 502 * Free mbufs directly to the pool in bulk 503 * if fast free offload is engaged 504 */ 505 if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) { 506 mbuf = *pkts; 507 pool = mbuf->pool; 508 rte_mempool_put_bulk(pool, (void *)pkts, pkts_n); 509 return; 510 } 511 for (;;) { 512 for (;;) { 513 /* 514 * Decrement mbuf reference counter, detach 515 * indirect and external buffers if needed. 516 */ 517 mbuf = rte_pktmbuf_prefree_seg(*pkts); 518 if (likely(mbuf != NULL)) { 519 MLX5_ASSERT(mbuf == *pkts); 520 if (likely(n_free != 0)) { 521 if (unlikely(pool != mbuf->pool)) 522 /* From different pool. */ 523 break; 524 } else { 525 /* Start new scan array. */ 526 pool = mbuf->pool; 527 p_free = pkts; 528 } 529 ++n_free; 530 ++pkts; 531 --pkts_n; 532 if (unlikely(pkts_n == 0)) { 533 mbuf = NULL; 534 break; 535 } 536 } else { 537 /* 538 * This happens if mbuf is still referenced. 539 * We can't put it back to the pool, skip. 540 */ 541 ++pkts; 542 --pkts_n; 543 if (unlikely(n_free != 0)) 544 /* There is some array to free.*/ 545 break; 546 if (unlikely(pkts_n == 0)) 547 /* Last mbuf, nothing to free. */ 548 return; 549 } 550 } 551 for (;;) { 552 /* 553 * This loop is implemented to avoid multiple 554 * inlining of rte_mempool_put_bulk(). 555 */ 556 MLX5_ASSERT(pool); 557 MLX5_ASSERT(p_free); 558 MLX5_ASSERT(n_free); 559 /* 560 * Free the array of pre-freed mbufs 561 * belonging to the same memory pool. 562 */ 563 rte_mempool_put_bulk(pool, (void *)p_free, n_free); 564 if (unlikely(mbuf != NULL)) { 565 /* There is the request to start new scan. */ 566 pool = mbuf->pool; 567 p_free = pkts++; 568 n_free = 1; 569 --pkts_n; 570 if (likely(pkts_n != 0)) 571 break; 572 /* 573 * This is the last mbuf to be freed. 574 * Do one more loop iteration to complete. 575 * This is rare case of the last unique mbuf. 576 */ 577 mbuf = NULL; 578 continue; 579 } 580 if (likely(pkts_n == 0)) 581 return; 582 n_free = 0; 583 break; 584 } 585 } 586 } 587 588 /** 589 * No inline version to free buffers for optimal call 590 * on the tx_burst completion. 591 */ 592 static __rte_noinline void 593 __mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq, 594 struct rte_mbuf **__rte_restrict pkts, 595 unsigned int pkts_n, 596 unsigned int olx __rte_unused) 597 { 598 mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx); 599 } 600 601 /** 602 * Free the mbuf from the elts ring buffer till new tail. 603 * 604 * @param txq 605 * Pointer to Tx queue structure. 606 * @param tail 607 * Index in elts to free up to, becomes new elts tail. 608 * @param olx 609 * Configured Tx offloads mask. It is fully defined at 610 * compile time and may be used for optimization. 611 */ 612 static __rte_always_inline void 613 mlx5_tx_free_elts(struct mlx5_txq_data *__rte_restrict txq, 614 uint16_t tail, 615 unsigned int olx __rte_unused) 616 { 617 uint16_t n_elts = tail - txq->elts_tail; 618 619 MLX5_ASSERT(n_elts); 620 MLX5_ASSERT(n_elts <= txq->elts_s); 621 /* 622 * Implement a loop to support ring buffer wraparound 623 * with single inlining of mlx5_tx_free_mbuf(). 624 */ 625 do { 626 unsigned int part; 627 628 part = txq->elts_s - (txq->elts_tail & txq->elts_m); 629 part = RTE_MIN(part, n_elts); 630 MLX5_ASSERT(part); 631 MLX5_ASSERT(part <= txq->elts_s); 632 mlx5_tx_free_mbuf(txq, 633 &txq->elts[txq->elts_tail & txq->elts_m], 634 part, olx); 635 txq->elts_tail += part; 636 n_elts -= part; 637 } while (n_elts); 638 } 639 640 /** 641 * Store the mbuf being sent into elts ring buffer. 642 * On Tx completion these mbufs will be freed. 643 * 644 * @param txq 645 * Pointer to Tx queue structure. 646 * @param pkts 647 * Pointer to array of packets to be stored. 648 * @param pkts_n 649 * Number of packets to be stored. 650 * @param olx 651 * Configured Tx offloads mask. It is fully defined at 652 * compile time and may be used for optimization. 653 */ 654 static __rte_always_inline void 655 mlx5_tx_copy_elts(struct mlx5_txq_data *__rte_restrict txq, 656 struct rte_mbuf **__rte_restrict pkts, 657 unsigned int pkts_n, 658 unsigned int olx __rte_unused) 659 { 660 unsigned int part; 661 struct rte_mbuf **elts = (struct rte_mbuf **)txq->elts; 662 663 MLX5_ASSERT(pkts); 664 MLX5_ASSERT(pkts_n); 665 part = txq->elts_s - (txq->elts_head & txq->elts_m); 666 MLX5_ASSERT(part); 667 MLX5_ASSERT(part <= txq->elts_s); 668 /* This code is a good candidate for vectorizing with SIMD. */ 669 rte_memcpy((void *)(elts + (txq->elts_head & txq->elts_m)), 670 (void *)pkts, 671 RTE_MIN(part, pkts_n) * sizeof(struct rte_mbuf *)); 672 txq->elts_head += pkts_n; 673 if (unlikely(part < pkts_n)) 674 /* The copy is wrapping around the elts array. */ 675 rte_memcpy((void *)elts, (void *)(pkts + part), 676 (pkts_n - part) * sizeof(struct rte_mbuf *)); 677 } 678 679 /** 680 * Check if the completion request flag should be set in the last WQE. 681 * Both pushed mbufs and WQEs are monitored and the completion request 682 * flag is set if any of thresholds is reached. 683 * 684 * @param txq 685 * Pointer to TX queue structure. 686 * @param loc 687 * Pointer to burst routine local context. 688 * @param olx 689 * Configured Tx offloads mask. It is fully defined at 690 * compile time and may be used for optimization. 691 */ 692 static __rte_always_inline void 693 mlx5_tx_request_completion(struct mlx5_txq_data *__rte_restrict txq, 694 struct mlx5_txq_local *__rte_restrict loc, 695 unsigned int olx) 696 { 697 uint16_t head = txq->elts_head; 698 unsigned int part; 699 700 part = MLX5_TXOFF_CONFIG(INLINE) ? 701 0 : loc->pkts_sent - loc->pkts_copy; 702 head += part; 703 if ((uint16_t)(head - txq->elts_comp) >= MLX5_TX_COMP_THRESH || 704 (MLX5_TXOFF_CONFIG(INLINE) && 705 (uint16_t)(txq->wqe_ci - txq->wqe_comp) >= txq->wqe_thres)) { 706 volatile struct mlx5_wqe *last = loc->wqe_last; 707 708 MLX5_ASSERT(last); 709 txq->elts_comp = head; 710 if (MLX5_TXOFF_CONFIG(INLINE)) 711 txq->wqe_comp = txq->wqe_ci; 712 /* Request unconditional completion on last WQE. */ 713 last->cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 714 MLX5_COMP_MODE_OFFSET); 715 /* Save elts_head in dedicated free on completion queue. */ 716 #ifdef RTE_LIBRTE_MLX5_DEBUG 717 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head | 718 (last->cseg.opcode >> 8) << 16; 719 #else 720 txq->fcqs[txq->cq_pi++ & txq->cqe_m] = head; 721 #endif 722 /* A CQE slot must always be available. */ 723 MLX5_ASSERT((txq->cq_pi - txq->cq_ci) <= txq->cqe_s); 724 } 725 } 726 727 /** 728 * Build the Control Segment with specified opcode: 729 * - MLX5_OPCODE_SEND 730 * - MLX5_OPCODE_ENHANCED_MPSW 731 * - MLX5_OPCODE_TSO 732 * 733 * @param txq 734 * Pointer to TX queue structure. 735 * @param loc 736 * Pointer to burst routine local context. 737 * @param wqe 738 * Pointer to WQE to fill with built Control Segment. 739 * @param ds 740 * Supposed length of WQE in segments. 741 * @param opcode 742 * SQ WQE opcode to put into Control Segment. 743 * @param olx 744 * Configured Tx offloads mask. It is fully defined at 745 * compile time and may be used for optimization. 746 */ 747 static __rte_always_inline void 748 mlx5_tx_cseg_init(struct mlx5_txq_data *__rte_restrict txq, 749 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 750 struct mlx5_wqe *__rte_restrict wqe, 751 unsigned int ds, 752 unsigned int opcode, 753 unsigned int olx __rte_unused) 754 { 755 struct mlx5_wqe_cseg *__rte_restrict cs = &wqe->cseg; 756 757 /* For legacy MPW replace the EMPW by TSO with modifier. */ 758 if (MLX5_TXOFF_CONFIG(MPW) && opcode == MLX5_OPCODE_ENHANCED_MPSW) 759 opcode = MLX5_OPCODE_TSO | MLX5_OPC_MOD_MPW << 24; 760 cs->opcode = rte_cpu_to_be_32((txq->wqe_ci << 8) | opcode); 761 cs->sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 762 cs->flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 763 MLX5_COMP_MODE_OFFSET); 764 cs->misc = RTE_BE32(0); 765 } 766 767 /** 768 * Build the Synchronize Queue Segment with specified completion index. 769 * 770 * @param txq 771 * Pointer to TX queue structure. 772 * @param loc 773 * Pointer to burst routine local context. 774 * @param wqe 775 * Pointer to WQE to fill with built Control Segment. 776 * @param wci 777 * Completion index in Clock Queue to wait. 778 * @param olx 779 * Configured Tx offloads mask. It is fully defined at 780 * compile time and may be used for optimization. 781 */ 782 static __rte_always_inline void 783 mlx5_tx_wseg_init(struct mlx5_txq_data *restrict txq, 784 struct mlx5_txq_local *restrict loc __rte_unused, 785 struct mlx5_wqe *restrict wqe, 786 unsigned int wci, 787 unsigned int olx __rte_unused) 788 { 789 struct mlx5_wqe_qseg *qs; 790 791 qs = RTE_PTR_ADD(wqe, MLX5_WSEG_SIZE); 792 qs->max_index = rte_cpu_to_be_32(wci); 793 qs->qpn_cqn = rte_cpu_to_be_32(txq->sh->txpp.clock_queue.cq_obj.cq->id); 794 qs->reserved0 = RTE_BE32(0); 795 qs->reserved1 = RTE_BE32(0); 796 } 797 798 /** 799 * Build the Ethernet Segment without inlined data. 800 * Supports Software Parser, Checksums and VLAN insertion Tx offload features. 801 * 802 * @param txq 803 * Pointer to TX queue structure. 804 * @param loc 805 * Pointer to burst routine local context. 806 * @param wqe 807 * Pointer to WQE to fill with built Ethernet Segment. 808 * @param olx 809 * Configured Tx offloads mask. It is fully defined at 810 * compile time and may be used for optimization. 811 */ 812 static __rte_always_inline void 813 mlx5_tx_eseg_none(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 814 struct mlx5_txq_local *__rte_restrict loc, 815 struct mlx5_wqe *__rte_restrict wqe, 816 unsigned int olx) 817 { 818 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 819 uint32_t csum; 820 821 /* 822 * Calculate and set check sum flags first, dword field 823 * in segment may be shared with Software Parser flags. 824 */ 825 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 826 es->flags = rte_cpu_to_le_32(csum); 827 /* 828 * Calculate and set Software Parser offsets and flags. 829 * These flags a set for custom UDP and IP tunnel packets. 830 */ 831 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 832 /* Fill metadata field if needed. */ 833 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 834 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 835 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 836 0 : 0; 837 /* Engage VLAN tag insertion feature if requested. */ 838 if (MLX5_TXOFF_CONFIG(VLAN) && 839 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 840 /* 841 * We should get here only if device support 842 * this feature correctly. 843 */ 844 MLX5_ASSERT(txq->vlan_en); 845 es->inline_hdr = rte_cpu_to_be_32(MLX5_ETH_WQE_VLAN_INSERT | 846 loc->mbuf->vlan_tci); 847 } else { 848 es->inline_hdr = RTE_BE32(0); 849 } 850 } 851 852 /** 853 * Build the Ethernet Segment with minimal inlined data 854 * of MLX5_ESEG_MIN_INLINE_SIZE bytes length. This is 855 * used to fill the gap in single WQEBB WQEs. 856 * Supports Software Parser, Checksums and VLAN 857 * insertion Tx offload features. 858 * 859 * @param txq 860 * Pointer to TX queue structure. 861 * @param loc 862 * Pointer to burst routine local context. 863 * @param wqe 864 * Pointer to WQE to fill with built Ethernet Segment. 865 * @param vlan 866 * Length of VLAN tag insertion if any. 867 * @param olx 868 * Configured Tx offloads mask. It is fully defined at 869 * compile time and may be used for optimization. 870 */ 871 static __rte_always_inline void 872 mlx5_tx_eseg_dmin(struct mlx5_txq_data *__rte_restrict txq __rte_unused, 873 struct mlx5_txq_local *__rte_restrict loc, 874 struct mlx5_wqe *__rte_restrict wqe, 875 unsigned int vlan, 876 unsigned int olx) 877 { 878 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 879 uint32_t csum; 880 uint8_t *psrc, *pdst; 881 882 /* 883 * Calculate and set check sum flags first, dword field 884 * in segment may be shared with Software Parser flags. 885 */ 886 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 887 es->flags = rte_cpu_to_le_32(csum); 888 /* 889 * Calculate and set Software Parser offsets and flags. 890 * These flags a set for custom UDP and IP tunnel packets. 891 */ 892 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 893 /* Fill metadata field if needed. */ 894 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 895 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 896 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 897 0 : 0; 898 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 899 es->inline_hdr_sz = RTE_BE16(MLX5_ESEG_MIN_INLINE_SIZE); 900 es->inline_data = *(unaligned_uint16_t *)psrc; 901 psrc += sizeof(uint16_t); 902 pdst = (uint8_t *)(es + 1); 903 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 904 /* Implement VLAN tag insertion as part inline data. */ 905 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 906 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 907 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 908 /* Insert VLAN ethertype + VLAN tag. */ 909 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 910 ((RTE_ETHER_TYPE_VLAN << 16) | 911 loc->mbuf->vlan_tci); 912 pdst += sizeof(struct rte_vlan_hdr); 913 /* Copy the rest two bytes from packet data. */ 914 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 915 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 916 } else { 917 /* Fill the gap in the title WQEBB with inline data. */ 918 rte_mov16(pdst, psrc); 919 } 920 } 921 922 /** 923 * Build the Ethernet Segment with entire packet data inlining. Checks the 924 * boundary of WQEBB and ring buffer wrapping, supports Software Parser, 925 * Checksums and VLAN insertion Tx offload features. 926 * 927 * @param txq 928 * Pointer to TX queue structure. 929 * @param loc 930 * Pointer to burst routine local context. 931 * @param wqe 932 * Pointer to WQE to fill with built Ethernet Segment. 933 * @param vlan 934 * Length of VLAN tag insertion if any. 935 * @param inlen 936 * Length of data to inline (VLAN included, if any). 937 * @param tso 938 * TSO flag, set mss field from the packet. 939 * @param olx 940 * Configured Tx offloads mask. It is fully defined at 941 * compile time and may be used for optimization. 942 * 943 * @return 944 * Pointer to the next Data Segment (aligned and wrapped around). 945 */ 946 static __rte_always_inline struct mlx5_wqe_dseg * 947 mlx5_tx_eseg_data(struct mlx5_txq_data *__rte_restrict txq, 948 struct mlx5_txq_local *__rte_restrict loc, 949 struct mlx5_wqe *__rte_restrict wqe, 950 unsigned int vlan, 951 unsigned int inlen, 952 unsigned int tso, 953 unsigned int olx) 954 { 955 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 956 uint32_t csum; 957 uint8_t *psrc, *pdst; 958 unsigned int part; 959 960 /* 961 * Calculate and set check sum flags first, dword field 962 * in segment may be shared with Software Parser flags. 963 */ 964 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 965 if (tso) { 966 csum <<= 24; 967 csum |= loc->mbuf->tso_segsz; 968 es->flags = rte_cpu_to_be_32(csum); 969 } else { 970 es->flags = rte_cpu_to_le_32(csum); 971 } 972 /* 973 * Calculate and set Software Parser offsets and flags. 974 * These flags a set for custom UDP and IP tunnel packets. 975 */ 976 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 977 /* Fill metadata field if needed. */ 978 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 979 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 980 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 981 0 : 0; 982 psrc = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 983 es->inline_hdr_sz = rte_cpu_to_be_16(inlen); 984 es->inline_data = *(unaligned_uint16_t *)psrc; 985 psrc += sizeof(uint16_t); 986 pdst = (uint8_t *)(es + 1); 987 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 988 /* Implement VLAN tag insertion as part inline data. */ 989 memcpy(pdst, psrc, 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t)); 990 pdst += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 991 psrc += 2 * RTE_ETHER_ADDR_LEN - sizeof(uint16_t); 992 /* Insert VLAN ethertype + VLAN tag. */ 993 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 994 ((RTE_ETHER_TYPE_VLAN << 16) | 995 loc->mbuf->vlan_tci); 996 pdst += sizeof(struct rte_vlan_hdr); 997 /* Copy the rest two bytes from packet data. */ 998 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, sizeof(uint16_t))); 999 *(uint16_t *)pdst = *(unaligned_uint16_t *)psrc; 1000 psrc += sizeof(uint16_t); 1001 } else { 1002 /* Fill the gap in the title WQEBB with inline data. */ 1003 rte_mov16(pdst, psrc); 1004 psrc += sizeof(rte_v128u32_t); 1005 } 1006 pdst = (uint8_t *)(es + 2); 1007 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1008 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1009 inlen -= MLX5_ESEG_MIN_INLINE_SIZE; 1010 if (!inlen) { 1011 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1012 return (struct mlx5_wqe_dseg *)pdst; 1013 } 1014 /* 1015 * The WQEBB space availability is checked by caller. 1016 * Here we should be aware of WQE ring buffer wraparound only. 1017 */ 1018 part = (uint8_t *)txq->wqes_end - pdst; 1019 part = RTE_MIN(part, inlen); 1020 do { 1021 rte_memcpy(pdst, psrc, part); 1022 inlen -= part; 1023 if (likely(!inlen)) { 1024 /* 1025 * If return value is not used by the caller 1026 * the code below will be optimized out. 1027 */ 1028 pdst += part; 1029 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1030 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1031 pdst = (uint8_t *)txq->wqes; 1032 return (struct mlx5_wqe_dseg *)pdst; 1033 } 1034 pdst = (uint8_t *)txq->wqes; 1035 psrc += part; 1036 part = inlen; 1037 } while (true); 1038 } 1039 1040 /** 1041 * Copy data from chain of mbuf to the specified linear buffer. 1042 * Checksums and VLAN insertion Tx offload features. If data 1043 * from some mbuf copied completely this mbuf is freed. Local 1044 * structure is used to keep the byte stream state. 1045 * 1046 * @param pdst 1047 * Pointer to the destination linear buffer. 1048 * @param loc 1049 * Pointer to burst routine local context. 1050 * @param len 1051 * Length of data to be copied. 1052 * @param must 1053 * Length of data to be copied ignoring no inline hint. 1054 * @param olx 1055 * Configured Tx offloads mask. It is fully defined at 1056 * compile time and may be used for optimization. 1057 * 1058 * @return 1059 * Number of actual copied data bytes. This is always greater than or 1060 * equal to must parameter and might be lesser than len in no inline 1061 * hint flag is encountered. 1062 */ 1063 static __rte_always_inline unsigned int 1064 mlx5_tx_mseg_memcpy(uint8_t *pdst, 1065 struct mlx5_txq_local *__rte_restrict loc, 1066 unsigned int len, 1067 unsigned int must, 1068 unsigned int olx __rte_unused) 1069 { 1070 struct rte_mbuf *mbuf; 1071 unsigned int part, dlen, copy = 0; 1072 uint8_t *psrc; 1073 1074 MLX5_ASSERT(len); 1075 do { 1076 /* Allow zero length packets, must check first. */ 1077 dlen = rte_pktmbuf_data_len(loc->mbuf); 1078 if (dlen <= loc->mbuf_off) { 1079 /* Exhausted packet, just free. */ 1080 mbuf = loc->mbuf; 1081 loc->mbuf = mbuf->next; 1082 rte_pktmbuf_free_seg(mbuf); 1083 loc->mbuf_off = 0; 1084 MLX5_ASSERT(loc->mbuf_nseg > 1); 1085 MLX5_ASSERT(loc->mbuf); 1086 --loc->mbuf_nseg; 1087 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1088 unsigned int diff; 1089 1090 if (copy >= must) { 1091 /* 1092 * We already copied the minimal 1093 * requested amount of data. 1094 */ 1095 return copy; 1096 } 1097 diff = must - copy; 1098 if (diff <= rte_pktmbuf_data_len(loc->mbuf)) { 1099 /* 1100 * Copy only the minimal required 1101 * part of the data buffer. Limit amount 1102 * of data to be copied to the length of 1103 * available space. 1104 */ 1105 len = RTE_MIN(len, diff); 1106 } 1107 } 1108 continue; 1109 } 1110 dlen -= loc->mbuf_off; 1111 psrc = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1112 loc->mbuf_off); 1113 part = RTE_MIN(len, dlen); 1114 rte_memcpy(pdst, psrc, part); 1115 copy += part; 1116 loc->mbuf_off += part; 1117 len -= part; 1118 if (!len) { 1119 if (loc->mbuf_off >= rte_pktmbuf_data_len(loc->mbuf)) { 1120 loc->mbuf_off = 0; 1121 /* Exhausted packet, just free. */ 1122 mbuf = loc->mbuf; 1123 loc->mbuf = mbuf->next; 1124 rte_pktmbuf_free_seg(mbuf); 1125 loc->mbuf_off = 0; 1126 MLX5_ASSERT(loc->mbuf_nseg >= 1); 1127 --loc->mbuf_nseg; 1128 } 1129 return copy; 1130 } 1131 pdst += part; 1132 } while (true); 1133 } 1134 1135 /** 1136 * Build the Ethernet Segment with inlined data from multi-segment packet. 1137 * Checks the boundary of WQEBB and ring buffer wrapping, supports Software 1138 * Parser, Checksums and VLAN insertion Tx offload features. 1139 * 1140 * @param txq 1141 * Pointer to TX queue structure. 1142 * @param loc 1143 * Pointer to burst routine local context. 1144 * @param wqe 1145 * Pointer to WQE to fill with built Ethernet Segment. 1146 * @param vlan 1147 * Length of VLAN tag insertion if any. 1148 * @param inlen 1149 * Length of data to inline (VLAN included, if any). 1150 * @param tso 1151 * TSO flag, set mss field from the packet. 1152 * @param olx 1153 * Configured Tx offloads mask. It is fully defined at 1154 * compile time and may be used for optimization. 1155 * 1156 * @return 1157 * Pointer to the next Data Segment (aligned and possible NOT wrapped 1158 * around - caller should do wrapping check on its own). 1159 */ 1160 static __rte_always_inline struct mlx5_wqe_dseg * 1161 mlx5_tx_eseg_mdat(struct mlx5_txq_data *__rte_restrict txq, 1162 struct mlx5_txq_local *__rte_restrict loc, 1163 struct mlx5_wqe *__rte_restrict wqe, 1164 unsigned int vlan, 1165 unsigned int inlen, 1166 unsigned int tso, 1167 unsigned int olx) 1168 { 1169 struct mlx5_wqe_eseg *__rte_restrict es = &wqe->eseg; 1170 uint32_t csum; 1171 uint8_t *pdst; 1172 unsigned int part, tlen = 0; 1173 1174 /* 1175 * Calculate and set check sum flags first, uint32_t field 1176 * in segment may be shared with Software Parser flags. 1177 */ 1178 csum = MLX5_TXOFF_CONFIG(CSUM) ? txq_ol_cksum_to_cs(loc->mbuf) : 0; 1179 if (tso) { 1180 csum <<= 24; 1181 csum |= loc->mbuf->tso_segsz; 1182 es->flags = rte_cpu_to_be_32(csum); 1183 } else { 1184 es->flags = rte_cpu_to_le_32(csum); 1185 } 1186 /* 1187 * Calculate and set Software Parser offsets and flags. 1188 * These flags a set for custom UDP and IP tunnel packets. 1189 */ 1190 es->swp_offs = txq_mbuf_to_swp(loc, &es->swp_flags, olx); 1191 /* Fill metadata field if needed. */ 1192 es->metadata = MLX5_TXOFF_CONFIG(METADATA) ? 1193 loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 1194 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 1195 0 : 0; 1196 MLX5_ASSERT(inlen >= MLX5_ESEG_MIN_INLINE_SIZE); 1197 pdst = (uint8_t *)&es->inline_data; 1198 if (MLX5_TXOFF_CONFIG(VLAN) && vlan) { 1199 /* Implement VLAN tag insertion as part inline data. */ 1200 mlx5_tx_mseg_memcpy(pdst, loc, 1201 2 * RTE_ETHER_ADDR_LEN, 1202 2 * RTE_ETHER_ADDR_LEN, olx); 1203 pdst += 2 * RTE_ETHER_ADDR_LEN; 1204 *(unaligned_uint32_t *)pdst = rte_cpu_to_be_32 1205 ((RTE_ETHER_TYPE_VLAN << 16) | 1206 loc->mbuf->vlan_tci); 1207 pdst += sizeof(struct rte_vlan_hdr); 1208 tlen += 2 * RTE_ETHER_ADDR_LEN + sizeof(struct rte_vlan_hdr); 1209 } 1210 MLX5_ASSERT(pdst < (uint8_t *)txq->wqes_end); 1211 /* 1212 * The WQEBB space availability is checked by caller. 1213 * Here we should be aware of WQE ring buffer wraparound only. 1214 */ 1215 part = (uint8_t *)txq->wqes_end - pdst; 1216 part = RTE_MIN(part, inlen - tlen); 1217 MLX5_ASSERT(part); 1218 do { 1219 unsigned int copy; 1220 1221 /* 1222 * Copying may be interrupted inside the routine 1223 * if run into no inline hint flag. 1224 */ 1225 copy = tso ? inlen : txq->inlen_mode; 1226 copy = tlen >= copy ? 0 : (copy - tlen); 1227 copy = mlx5_tx_mseg_memcpy(pdst, loc, part, copy, olx); 1228 tlen += copy; 1229 if (likely(inlen <= tlen) || copy < part) { 1230 es->inline_hdr_sz = rte_cpu_to_be_16(tlen); 1231 pdst += copy; 1232 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1233 return (struct mlx5_wqe_dseg *)pdst; 1234 } 1235 pdst = (uint8_t *)txq->wqes; 1236 part = inlen - tlen; 1237 } while (true); 1238 } 1239 1240 /** 1241 * Build the Data Segment of pointer type. 1242 * 1243 * @param txq 1244 * Pointer to TX queue structure. 1245 * @param loc 1246 * Pointer to burst routine local context. 1247 * @param dseg 1248 * Pointer to WQE to fill with built Data Segment. 1249 * @param buf 1250 * Data buffer to point. 1251 * @param len 1252 * Data buffer length. 1253 * @param olx 1254 * Configured Tx offloads mask. It is fully defined at 1255 * compile time and may be used for optimization. 1256 */ 1257 static __rte_always_inline void 1258 mlx5_tx_dseg_ptr(struct mlx5_txq_data *__rte_restrict txq, 1259 struct mlx5_txq_local *__rte_restrict loc, 1260 struct mlx5_wqe_dseg *__rte_restrict dseg, 1261 uint8_t *buf, 1262 unsigned int len, 1263 unsigned int olx __rte_unused) 1264 1265 { 1266 MLX5_ASSERT(len); 1267 dseg->bcount = rte_cpu_to_be_32(len); 1268 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1269 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1270 } 1271 1272 /** 1273 * Build the Data Segment of pointer type or inline if data length is less than 1274 * buffer in minimal Data Segment size. 1275 * 1276 * @param txq 1277 * Pointer to TX queue structure. 1278 * @param loc 1279 * Pointer to burst routine local context. 1280 * @param dseg 1281 * Pointer to WQE to fill with built Data Segment. 1282 * @param buf 1283 * Data buffer to point. 1284 * @param len 1285 * Data buffer length. 1286 * @param olx 1287 * Configured Tx offloads mask. It is fully defined at 1288 * compile time and may be used for optimization. 1289 */ 1290 static __rte_always_inline void 1291 mlx5_tx_dseg_iptr(struct mlx5_txq_data *__rte_restrict txq, 1292 struct mlx5_txq_local *__rte_restrict loc, 1293 struct mlx5_wqe_dseg *__rte_restrict dseg, 1294 uint8_t *buf, 1295 unsigned int len, 1296 unsigned int olx __rte_unused) 1297 1298 { 1299 uintptr_t dst, src; 1300 1301 MLX5_ASSERT(len); 1302 if (len > MLX5_DSEG_MIN_INLINE_SIZE) { 1303 dseg->bcount = rte_cpu_to_be_32(len); 1304 dseg->lkey = mlx5_mr_mb2mr(&txq->mr_ctrl, loc->mbuf); 1305 dseg->pbuf = rte_cpu_to_be_64((uintptr_t)buf); 1306 1307 return; 1308 } 1309 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1310 /* Unrolled implementation of generic rte_memcpy. */ 1311 dst = (uintptr_t)&dseg->inline_data[0]; 1312 src = (uintptr_t)buf; 1313 if (len & 0x08) { 1314 #ifdef RTE_ARCH_STRICT_ALIGN 1315 MLX5_ASSERT(dst == RTE_PTR_ALIGN(dst, sizeof(uint32_t))); 1316 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1317 dst += sizeof(uint32_t); 1318 src += sizeof(uint32_t); 1319 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1320 dst += sizeof(uint32_t); 1321 src += sizeof(uint32_t); 1322 #else 1323 *(uint64_t *)dst = *(unaligned_uint64_t *)src; 1324 dst += sizeof(uint64_t); 1325 src += sizeof(uint64_t); 1326 #endif 1327 } 1328 if (len & 0x04) { 1329 *(uint32_t *)dst = *(unaligned_uint32_t *)src; 1330 dst += sizeof(uint32_t); 1331 src += sizeof(uint32_t); 1332 } 1333 if (len & 0x02) { 1334 *(uint16_t *)dst = *(unaligned_uint16_t *)src; 1335 dst += sizeof(uint16_t); 1336 src += sizeof(uint16_t); 1337 } 1338 if (len & 0x01) 1339 *(uint8_t *)dst = *(uint8_t *)src; 1340 } 1341 1342 /** 1343 * Build the Data Segment of inlined data from single 1344 * segment packet, no VLAN insertion. 1345 * 1346 * @param txq 1347 * Pointer to TX queue structure. 1348 * @param loc 1349 * Pointer to burst routine local context. 1350 * @param dseg 1351 * Pointer to WQE to fill with built Data Segment. 1352 * @param buf 1353 * Data buffer to point. 1354 * @param len 1355 * Data buffer length. 1356 * @param olx 1357 * Configured Tx offloads mask. It is fully defined at 1358 * compile time and may be used for optimization. 1359 * 1360 * @return 1361 * Pointer to the next Data Segment after inlined data. 1362 * Ring buffer wraparound check is needed. We do not do it here because it 1363 * may not be needed for the last packet in the eMPW session. 1364 */ 1365 static __rte_always_inline struct mlx5_wqe_dseg * 1366 mlx5_tx_dseg_empw(struct mlx5_txq_data *__rte_restrict txq, 1367 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1368 struct mlx5_wqe_dseg *__rte_restrict dseg, 1369 uint8_t *buf, 1370 unsigned int len, 1371 unsigned int olx __rte_unused) 1372 { 1373 unsigned int part; 1374 uint8_t *pdst; 1375 1376 if (!MLX5_TXOFF_CONFIG(MPW)) { 1377 /* Store the descriptor byte counter for eMPW sessions. */ 1378 dseg->bcount = rte_cpu_to_be_32(len | MLX5_ETH_WQE_DATA_INLINE); 1379 pdst = &dseg->inline_data[0]; 1380 } else { 1381 /* The entire legacy MPW session counter is stored on close. */ 1382 pdst = (uint8_t *)dseg; 1383 } 1384 /* 1385 * The WQEBB space availability is checked by caller. 1386 * Here we should be aware of WQE ring buffer wraparound only. 1387 */ 1388 part = (uint8_t *)txq->wqes_end - pdst; 1389 part = RTE_MIN(part, len); 1390 do { 1391 rte_memcpy(pdst, buf, part); 1392 len -= part; 1393 if (likely(!len)) { 1394 pdst += part; 1395 if (!MLX5_TXOFF_CONFIG(MPW)) 1396 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1397 /* Note: no final wraparound check here. */ 1398 return (struct mlx5_wqe_dseg *)pdst; 1399 } 1400 pdst = (uint8_t *)txq->wqes; 1401 buf += part; 1402 part = len; 1403 } while (true); 1404 } 1405 1406 /** 1407 * Build the Data Segment of inlined data from single 1408 * segment packet with VLAN insertion. 1409 * 1410 * @param txq 1411 * Pointer to TX queue structure. 1412 * @param loc 1413 * Pointer to burst routine local context. 1414 * @param dseg 1415 * Pointer to the dseg fill with built Data Segment. 1416 * @param buf 1417 * Data buffer to point. 1418 * @param len 1419 * Data buffer length. 1420 * @param olx 1421 * Configured Tx offloads mask. It is fully defined at 1422 * compile time and may be used for optimization. 1423 * 1424 * @return 1425 * Pointer to the next Data Segment after inlined data. 1426 * Ring buffer wraparound check is needed. 1427 */ 1428 static __rte_always_inline struct mlx5_wqe_dseg * 1429 mlx5_tx_dseg_vlan(struct mlx5_txq_data *__rte_restrict txq, 1430 struct mlx5_txq_local *__rte_restrict loc __rte_unused, 1431 struct mlx5_wqe_dseg *__rte_restrict dseg, 1432 uint8_t *buf, 1433 unsigned int len, 1434 unsigned int olx __rte_unused) 1435 1436 { 1437 unsigned int part; 1438 uint8_t *pdst; 1439 1440 MLX5_ASSERT(len > MLX5_ESEG_MIN_INLINE_SIZE); 1441 if (!MLX5_TXOFF_CONFIG(MPW)) { 1442 /* Store the descriptor byte counter for eMPW sessions. */ 1443 dseg->bcount = rte_cpu_to_be_32 1444 ((len + sizeof(struct rte_vlan_hdr)) | 1445 MLX5_ETH_WQE_DATA_INLINE); 1446 pdst = &dseg->inline_data[0]; 1447 } else { 1448 /* The entire legacy MPW session counter is stored on close. */ 1449 pdst = (uint8_t *)dseg; 1450 } 1451 memcpy(pdst, buf, MLX5_DSEG_MIN_INLINE_SIZE); 1452 buf += MLX5_DSEG_MIN_INLINE_SIZE; 1453 pdst += MLX5_DSEG_MIN_INLINE_SIZE; 1454 len -= MLX5_DSEG_MIN_INLINE_SIZE; 1455 /* Insert VLAN ethertype + VLAN tag. Pointer is aligned. */ 1456 MLX5_ASSERT(pdst == RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE)); 1457 if (unlikely(pdst >= (uint8_t *)txq->wqes_end)) 1458 pdst = (uint8_t *)txq->wqes; 1459 *(uint32_t *)pdst = rte_cpu_to_be_32((RTE_ETHER_TYPE_VLAN << 16) | 1460 loc->mbuf->vlan_tci); 1461 pdst += sizeof(struct rte_vlan_hdr); 1462 /* 1463 * The WQEBB space availability is checked by caller. 1464 * Here we should be aware of WQE ring buffer wraparound only. 1465 */ 1466 part = (uint8_t *)txq->wqes_end - pdst; 1467 part = RTE_MIN(part, len); 1468 do { 1469 rte_memcpy(pdst, buf, part); 1470 len -= part; 1471 if (likely(!len)) { 1472 pdst += part; 1473 if (!MLX5_TXOFF_CONFIG(MPW)) 1474 pdst = RTE_PTR_ALIGN(pdst, MLX5_WSEG_SIZE); 1475 /* Note: no final wraparound check here. */ 1476 return (struct mlx5_wqe_dseg *)pdst; 1477 } 1478 pdst = (uint8_t *)txq->wqes; 1479 buf += part; 1480 part = len; 1481 } while (true); 1482 } 1483 1484 /** 1485 * Build the Ethernet Segment with optionally inlined data with 1486 * VLAN insertion and following Data Segments (if any) from 1487 * multi-segment packet. Used by ordinary send and TSO. 1488 * 1489 * @param txq 1490 * Pointer to TX queue structure. 1491 * @param loc 1492 * Pointer to burst routine local context. 1493 * @param wqe 1494 * Pointer to WQE to fill with built Ethernet/Data Segments. 1495 * @param vlan 1496 * Length of VLAN header to insert, 0 means no VLAN insertion. 1497 * @param inlen 1498 * Data length to inline. For TSO this parameter specifies exact value, 1499 * for ordinary send routine can be aligned by caller to provide better WQE 1500 * space saving and data buffer start address alignment. 1501 * This length includes VLAN header being inserted. 1502 * @param tso 1503 * Zero means ordinary send, inlined data can be extended, 1504 * otherwise this is TSO, inlined data length is fixed. 1505 * @param olx 1506 * Configured Tx offloads mask. It is fully defined at 1507 * compile time and may be used for optimization. 1508 * 1509 * @return 1510 * Actual size of built WQE in segments. 1511 */ 1512 static __rte_always_inline unsigned int 1513 mlx5_tx_mseg_build(struct mlx5_txq_data *__rte_restrict txq, 1514 struct mlx5_txq_local *__rte_restrict loc, 1515 struct mlx5_wqe *__rte_restrict wqe, 1516 unsigned int vlan, 1517 unsigned int inlen, 1518 unsigned int tso, 1519 unsigned int olx __rte_unused) 1520 { 1521 struct mlx5_wqe_dseg *__rte_restrict dseg; 1522 unsigned int ds; 1523 1524 MLX5_ASSERT((rte_pktmbuf_pkt_len(loc->mbuf) + vlan) >= inlen); 1525 loc->mbuf_nseg = NB_SEGS(loc->mbuf); 1526 loc->mbuf_off = 0; 1527 1528 dseg = mlx5_tx_eseg_mdat(txq, loc, wqe, vlan, inlen, tso, olx); 1529 if (!loc->mbuf_nseg) 1530 goto dseg_done; 1531 /* 1532 * There are still some mbuf remaining, not inlined. 1533 * The first mbuf may be partially inlined and we 1534 * must process the possible non-zero data offset. 1535 */ 1536 if (loc->mbuf_off) { 1537 unsigned int dlen; 1538 uint8_t *dptr; 1539 1540 /* 1541 * Exhausted packets must be dropped before. 1542 * Non-zero offset means there are some data 1543 * remained in the packet. 1544 */ 1545 MLX5_ASSERT(loc->mbuf_off < rte_pktmbuf_data_len(loc->mbuf)); 1546 MLX5_ASSERT(rte_pktmbuf_data_len(loc->mbuf)); 1547 dptr = rte_pktmbuf_mtod_offset(loc->mbuf, uint8_t *, 1548 loc->mbuf_off); 1549 dlen = rte_pktmbuf_data_len(loc->mbuf) - loc->mbuf_off; 1550 /* 1551 * Build the pointer/minimal Data Segment. 1552 * Do ring buffer wrapping check in advance. 1553 */ 1554 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1555 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1556 mlx5_tx_dseg_iptr(txq, loc, dseg, dptr, dlen, olx); 1557 /* Store the mbuf to be freed on completion. */ 1558 MLX5_ASSERT(loc->elts_free); 1559 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1560 --loc->elts_free; 1561 ++dseg; 1562 if (--loc->mbuf_nseg == 0) 1563 goto dseg_done; 1564 loc->mbuf = loc->mbuf->next; 1565 loc->mbuf_off = 0; 1566 } 1567 do { 1568 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1569 struct rte_mbuf *mbuf; 1570 1571 /* Zero length segment found, just skip. */ 1572 mbuf = loc->mbuf; 1573 loc->mbuf = loc->mbuf->next; 1574 rte_pktmbuf_free_seg(mbuf); 1575 if (--loc->mbuf_nseg == 0) 1576 break; 1577 } else { 1578 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1579 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1580 mlx5_tx_dseg_iptr 1581 (txq, loc, dseg, 1582 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1583 rte_pktmbuf_data_len(loc->mbuf), olx); 1584 MLX5_ASSERT(loc->elts_free); 1585 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1586 --loc->elts_free; 1587 ++dseg; 1588 if (--loc->mbuf_nseg == 0) 1589 break; 1590 loc->mbuf = loc->mbuf->next; 1591 } 1592 } while (true); 1593 1594 dseg_done: 1595 /* Calculate actual segments used from the dseg pointer. */ 1596 if ((uintptr_t)wqe < (uintptr_t)dseg) 1597 ds = ((uintptr_t)dseg - (uintptr_t)wqe) / MLX5_WSEG_SIZE; 1598 else 1599 ds = (((uintptr_t)dseg - (uintptr_t)wqe) + 1600 txq->wqe_s * MLX5_WQE_SIZE) / MLX5_WSEG_SIZE; 1601 return ds; 1602 } 1603 1604 /** 1605 * The routine checks timestamp flag in the current packet, 1606 * and push WAIT WQE into the queue if scheduling is required. 1607 * 1608 * @param txq 1609 * Pointer to TX queue structure. 1610 * @param loc 1611 * Pointer to burst routine local context. 1612 * @param olx 1613 * Configured Tx offloads mask. It is fully defined at 1614 * compile time and may be used for optimization. 1615 * 1616 * @return 1617 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1618 * MLX5_TXCMP_CODE_SINGLE - continue processing with the packet. 1619 * MLX5_TXCMP_CODE_MULTI - the WAIT inserted, continue processing. 1620 * Local context variables partially updated. 1621 */ 1622 static __rte_always_inline enum mlx5_txcmp_code 1623 mlx5_tx_schedule_send(struct mlx5_txq_data *restrict txq, 1624 struct mlx5_txq_local *restrict loc, 1625 unsigned int olx) 1626 { 1627 if (MLX5_TXOFF_CONFIG(TXPP) && 1628 loc->mbuf->ol_flags & txq->ts_mask) { 1629 struct mlx5_wqe *wqe; 1630 uint64_t ts; 1631 int32_t wci; 1632 1633 /* 1634 * Estimate the required space quickly and roughly. 1635 * We would like to ensure the packet can be pushed 1636 * to the queue and we won't get the orphan WAIT WQE. 1637 */ 1638 if (loc->wqe_free <= MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE || 1639 loc->elts_free < NB_SEGS(loc->mbuf)) 1640 return MLX5_TXCMP_CODE_EXIT; 1641 /* Convert the timestamp into completion to wait. */ 1642 ts = *RTE_MBUF_DYNFIELD(loc->mbuf, txq->ts_offset, uint64_t *); 1643 wci = mlx5_txpp_convert_tx_ts(txq->sh, ts); 1644 if (unlikely(wci < 0)) 1645 return MLX5_TXCMP_CODE_SINGLE; 1646 /* Build the WAIT WQE with specified completion. */ 1647 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1648 mlx5_tx_cseg_init(txq, loc, wqe, 2, MLX5_OPCODE_WAIT, olx); 1649 mlx5_tx_wseg_init(txq, loc, wqe, wci, olx); 1650 ++txq->wqe_ci; 1651 --loc->wqe_free; 1652 return MLX5_TXCMP_CODE_MULTI; 1653 } 1654 return MLX5_TXCMP_CODE_SINGLE; 1655 } 1656 1657 /** 1658 * Tx one packet function for multi-segment TSO. Supports all 1659 * types of Tx offloads, uses MLX5_OPCODE_TSO to build WQEs, 1660 * sends one packet per WQE. 1661 * 1662 * This routine is responsible for storing processed mbuf 1663 * into elts ring buffer and update elts_head. 1664 * 1665 * @param txq 1666 * Pointer to TX queue structure. 1667 * @param loc 1668 * Pointer to burst routine local context. 1669 * @param olx 1670 * Configured Tx offloads mask. It is fully defined at 1671 * compile time and may be used for optimization. 1672 * 1673 * @return 1674 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1675 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1676 * Local context variables partially updated. 1677 */ 1678 static __rte_always_inline enum mlx5_txcmp_code 1679 mlx5_tx_packet_multi_tso(struct mlx5_txq_data *__rte_restrict txq, 1680 struct mlx5_txq_local *__rte_restrict loc, 1681 unsigned int olx) 1682 { 1683 struct mlx5_wqe *__rte_restrict wqe; 1684 unsigned int ds, dlen, inlen, ntcp, vlan = 0; 1685 1686 if (MLX5_TXOFF_CONFIG(TXPP)) { 1687 enum mlx5_txcmp_code wret; 1688 1689 /* Generate WAIT for scheduling if requested. */ 1690 wret = mlx5_tx_schedule_send(txq, loc, olx); 1691 if (wret == MLX5_TXCMP_CODE_EXIT) 1692 return MLX5_TXCMP_CODE_EXIT; 1693 if (wret == MLX5_TXCMP_CODE_ERROR) 1694 return MLX5_TXCMP_CODE_ERROR; 1695 } 1696 /* 1697 * Calculate data length to be inlined to estimate 1698 * the required space in WQE ring buffer. 1699 */ 1700 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1701 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1702 vlan = sizeof(struct rte_vlan_hdr); 1703 inlen = loc->mbuf->l2_len + vlan + 1704 loc->mbuf->l3_len + loc->mbuf->l4_len; 1705 if (unlikely((!inlen || !loc->mbuf->tso_segsz))) 1706 return MLX5_TXCMP_CODE_ERROR; 1707 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 1708 inlen += loc->mbuf->outer_l2_len + loc->mbuf->outer_l3_len; 1709 /* Packet must contain all TSO headers. */ 1710 if (unlikely(inlen > MLX5_MAX_TSO_HEADER || 1711 inlen <= MLX5_ESEG_MIN_INLINE_SIZE || 1712 inlen > (dlen + vlan))) 1713 return MLX5_TXCMP_CODE_ERROR; 1714 /* 1715 * Check whether there are enough free WQEBBs: 1716 * - Control Segment 1717 * - Ethernet Segment 1718 * - First Segment of inlined Ethernet data 1719 * - ... data continued ... 1720 * - Data Segments of pointer/min inline type 1721 */ 1722 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 1723 MLX5_ESEG_MIN_INLINE_SIZE + 1724 MLX5_WSEG_SIZE + 1725 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 1726 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1727 return MLX5_TXCMP_CODE_EXIT; 1728 /* Check for maximal WQE size. */ 1729 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1730 return MLX5_TXCMP_CODE_ERROR; 1731 #ifdef MLX5_PMD_SOFT_COUNTERS 1732 /* Update sent data bytes/packets counters. */ 1733 ntcp = (dlen - (inlen - vlan) + loc->mbuf->tso_segsz - 1) / 1734 loc->mbuf->tso_segsz; 1735 /* 1736 * One will be added for mbuf itself at the end of the mlx5_tx_burst 1737 * from loc->pkts_sent field. 1738 */ 1739 --ntcp; 1740 txq->stats.opackets += ntcp; 1741 txq->stats.obytes += dlen + vlan + ntcp * inlen; 1742 #endif 1743 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1744 loc->wqe_last = wqe; 1745 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_TSO, olx); 1746 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 1, olx); 1747 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 1748 txq->wqe_ci += (ds + 3) / 4; 1749 loc->wqe_free -= (ds + 3) / 4; 1750 return MLX5_TXCMP_CODE_MULTI; 1751 } 1752 1753 /** 1754 * Tx one packet function for multi-segment SEND. Supports all types of Tx 1755 * offloads, uses MLX5_OPCODE_SEND to build WQEs, sends one packet per WQE, 1756 * without any data inlining in Ethernet Segment. 1757 * 1758 * This routine is responsible for storing processed mbuf 1759 * into elts ring buffer and update elts_head. 1760 * 1761 * @param txq 1762 * Pointer to TX queue structure. 1763 * @param loc 1764 * Pointer to burst routine local context. 1765 * @param olx 1766 * Configured Tx offloads mask. It is fully defined at 1767 * compile time and may be used for optimization. 1768 * 1769 * @return 1770 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1771 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1772 * Local context variables partially updated. 1773 */ 1774 static __rte_always_inline enum mlx5_txcmp_code 1775 mlx5_tx_packet_multi_send(struct mlx5_txq_data *__rte_restrict txq, 1776 struct mlx5_txq_local *__rte_restrict loc, 1777 unsigned int olx) 1778 { 1779 struct mlx5_wqe_dseg *__rte_restrict dseg; 1780 struct mlx5_wqe *__rte_restrict wqe; 1781 unsigned int ds, nseg; 1782 1783 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1784 if (MLX5_TXOFF_CONFIG(TXPP)) { 1785 enum mlx5_txcmp_code wret; 1786 1787 /* Generate WAIT for scheduling if requested. */ 1788 wret = mlx5_tx_schedule_send(txq, loc, olx); 1789 if (wret == MLX5_TXCMP_CODE_EXIT) 1790 return MLX5_TXCMP_CODE_EXIT; 1791 if (wret == MLX5_TXCMP_CODE_ERROR) 1792 return MLX5_TXCMP_CODE_ERROR; 1793 } 1794 /* 1795 * No inline at all, it means the CPU cycles saving is prioritized at 1796 * configuration, we should not copy any packet data to WQE. 1797 */ 1798 nseg = NB_SEGS(loc->mbuf); 1799 ds = 2 + nseg; 1800 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 1801 return MLX5_TXCMP_CODE_EXIT; 1802 /* Check for maximal WQE size. */ 1803 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 1804 return MLX5_TXCMP_CODE_ERROR; 1805 /* 1806 * Some Tx offloads may cause an error if packet is not long enough, 1807 * check against assumed minimal length. 1808 */ 1809 if (rte_pktmbuf_pkt_len(loc->mbuf) <= MLX5_ESEG_MIN_INLINE_SIZE) 1810 return MLX5_TXCMP_CODE_ERROR; 1811 #ifdef MLX5_PMD_SOFT_COUNTERS 1812 /* Update sent data bytes counter. */ 1813 txq->stats.obytes += rte_pktmbuf_pkt_len(loc->mbuf); 1814 if (MLX5_TXOFF_CONFIG(VLAN) && 1815 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1816 txq->stats.obytes += sizeof(struct rte_vlan_hdr); 1817 #endif 1818 /* 1819 * SEND WQE, one WQEBB: 1820 * - Control Segment, SEND opcode 1821 * - Ethernet Segment, optional VLAN, no inline 1822 * - Data Segments, pointer only type 1823 */ 1824 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 1825 loc->wqe_last = wqe; 1826 mlx5_tx_cseg_init(txq, loc, wqe, ds, MLX5_OPCODE_SEND, olx); 1827 mlx5_tx_eseg_none(txq, loc, wqe, olx); 1828 dseg = &wqe->dseg[0]; 1829 do { 1830 if (unlikely(!rte_pktmbuf_data_len(loc->mbuf))) { 1831 struct rte_mbuf *mbuf; 1832 1833 /* 1834 * Zero length segment found, have to correct total 1835 * size of WQE in segments. 1836 * It is supposed to be rare occasion, so in normal 1837 * case (no zero length segments) we avoid extra 1838 * writing to the Control Segment. 1839 */ 1840 --ds; 1841 wqe->cseg.sq_ds -= RTE_BE32(1); 1842 mbuf = loc->mbuf; 1843 loc->mbuf = mbuf->next; 1844 rte_pktmbuf_free_seg(mbuf); 1845 if (--nseg == 0) 1846 break; 1847 } else { 1848 mlx5_tx_dseg_ptr 1849 (txq, loc, dseg, 1850 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 1851 rte_pktmbuf_data_len(loc->mbuf), olx); 1852 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 1853 --loc->elts_free; 1854 if (--nseg == 0) 1855 break; 1856 ++dseg; 1857 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 1858 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 1859 loc->mbuf = loc->mbuf->next; 1860 } 1861 } while (true); 1862 txq->wqe_ci += (ds + 3) / 4; 1863 loc->wqe_free -= (ds + 3) / 4; 1864 return MLX5_TXCMP_CODE_MULTI; 1865 } 1866 1867 /** 1868 * Tx one packet function for multi-segment SEND. Supports all 1869 * types of Tx offloads, uses MLX5_OPCODE_SEND to build WQEs, 1870 * sends one packet per WQE, with data inlining in 1871 * Ethernet Segment and minimal Data Segments. 1872 * 1873 * This routine is responsible for storing processed mbuf 1874 * into elts ring buffer and update elts_head. 1875 * 1876 * @param txq 1877 * Pointer to TX queue structure. 1878 * @param loc 1879 * Pointer to burst routine local context. 1880 * @param olx 1881 * Configured Tx offloads mask. It is fully defined at 1882 * compile time and may be used for optimization. 1883 * 1884 * @return 1885 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 1886 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 1887 * Local context variables partially updated. 1888 */ 1889 static __rte_always_inline enum mlx5_txcmp_code 1890 mlx5_tx_packet_multi_inline(struct mlx5_txq_data *__rte_restrict txq, 1891 struct mlx5_txq_local *__rte_restrict loc, 1892 unsigned int olx) 1893 { 1894 struct mlx5_wqe *__rte_restrict wqe; 1895 unsigned int ds, inlen, dlen, vlan = 0; 1896 1897 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 1898 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 1899 if (MLX5_TXOFF_CONFIG(TXPP)) { 1900 enum mlx5_txcmp_code wret; 1901 1902 /* Generate WAIT for scheduling if requested. */ 1903 wret = mlx5_tx_schedule_send(txq, loc, olx); 1904 if (wret == MLX5_TXCMP_CODE_EXIT) 1905 return MLX5_TXCMP_CODE_EXIT; 1906 if (wret == MLX5_TXCMP_CODE_ERROR) 1907 return MLX5_TXCMP_CODE_ERROR; 1908 } 1909 /* 1910 * First calculate data length to be inlined 1911 * to estimate the required space for WQE. 1912 */ 1913 dlen = rte_pktmbuf_pkt_len(loc->mbuf); 1914 if (MLX5_TXOFF_CONFIG(VLAN) && loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 1915 vlan = sizeof(struct rte_vlan_hdr); 1916 inlen = dlen + vlan; 1917 /* Check against minimal length. */ 1918 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 1919 return MLX5_TXCMP_CODE_ERROR; 1920 MLX5_ASSERT(txq->inlen_send >= MLX5_ESEG_MIN_INLINE_SIZE); 1921 if (inlen > txq->inlen_send || 1922 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) { 1923 struct rte_mbuf *mbuf; 1924 unsigned int nxlen; 1925 uintptr_t start; 1926 1927 mbuf = loc->mbuf; 1928 nxlen = rte_pktmbuf_data_len(mbuf); 1929 /* 1930 * Packet length exceeds the allowed inline data length, 1931 * check whether the minimal inlining is required. 1932 */ 1933 if (txq->inlen_mode) { 1934 MLX5_ASSERT(txq->inlen_mode >= 1935 MLX5_ESEG_MIN_INLINE_SIZE); 1936 MLX5_ASSERT(txq->inlen_mode <= txq->inlen_send); 1937 inlen = RTE_MIN(txq->inlen_mode, inlen); 1938 } else if (vlan && !txq->vlan_en) { 1939 /* 1940 * VLAN insertion is requested and hardware does not 1941 * support the offload, will do with software inline. 1942 */ 1943 inlen = MLX5_ESEG_MIN_INLINE_SIZE; 1944 } else if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE || 1945 nxlen > txq->inlen_send) { 1946 return mlx5_tx_packet_multi_send(txq, loc, olx); 1947 } else { 1948 goto do_first; 1949 } 1950 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 1951 goto do_build; 1952 /* 1953 * Now we know the minimal amount of data is requested 1954 * to inline. Check whether we should inline the buffers 1955 * from the chain beginning to eliminate some mbufs. 1956 */ 1957 if (unlikely(nxlen <= txq->inlen_send)) { 1958 /* We can inline first mbuf at least. */ 1959 if (nxlen < inlen) { 1960 unsigned int smlen; 1961 1962 /* Scan mbufs till inlen filled. */ 1963 do { 1964 smlen = nxlen; 1965 mbuf = NEXT(mbuf); 1966 MLX5_ASSERT(mbuf); 1967 nxlen = rte_pktmbuf_data_len(mbuf); 1968 nxlen += smlen; 1969 } while (unlikely(nxlen < inlen)); 1970 if (unlikely(nxlen > txq->inlen_send)) { 1971 /* We cannot inline entire mbuf. */ 1972 smlen = inlen - smlen; 1973 start = rte_pktmbuf_mtod_offset 1974 (mbuf, uintptr_t, smlen); 1975 goto do_align; 1976 } 1977 } 1978 do_first: 1979 do { 1980 inlen = nxlen; 1981 mbuf = NEXT(mbuf); 1982 /* There should be not end of packet. */ 1983 MLX5_ASSERT(mbuf); 1984 if (mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 1985 break; 1986 nxlen = inlen + rte_pktmbuf_data_len(mbuf); 1987 } while (unlikely(nxlen < txq->inlen_send)); 1988 } 1989 start = rte_pktmbuf_mtod(mbuf, uintptr_t); 1990 /* 1991 * Check whether we can do inline to align start 1992 * address of data buffer to cacheline. 1993 */ 1994 do_align: 1995 start = (~start + 1) & (RTE_CACHE_LINE_SIZE - 1); 1996 if (unlikely(start)) { 1997 start += inlen; 1998 if (start <= txq->inlen_send) 1999 inlen = start; 2000 } 2001 } 2002 /* 2003 * Check whether there are enough free WQEBBs: 2004 * - Control Segment 2005 * - Ethernet Segment 2006 * - First Segment of inlined Ethernet data 2007 * - ... data continued ... 2008 * - Data Segments of pointer/min inline type 2009 * 2010 * Estimate the number of Data Segments conservatively, 2011 * supposing no any mbufs is being freed during inlining. 2012 */ 2013 do_build: 2014 MLX5_ASSERT(inlen <= txq->inlen_send); 2015 ds = NB_SEGS(loc->mbuf) + 2 + (inlen - 2016 MLX5_ESEG_MIN_INLINE_SIZE + 2017 MLX5_WSEG_SIZE + 2018 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2019 if (unlikely(loc->wqe_free < ((ds + 3) / 4))) 2020 return MLX5_TXCMP_CODE_EXIT; 2021 /* Check for maximal WQE size. */ 2022 if (unlikely((MLX5_WQE_SIZE_MAX / MLX5_WSEG_SIZE) < ((ds + 3) / 4))) 2023 return MLX5_TXCMP_CODE_ERROR; 2024 #ifdef MLX5_PMD_SOFT_COUNTERS 2025 /* Update sent data bytes/packets counters. */ 2026 txq->stats.obytes += dlen + vlan; 2027 #endif 2028 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2029 loc->wqe_last = wqe; 2030 mlx5_tx_cseg_init(txq, loc, wqe, 0, MLX5_OPCODE_SEND, olx); 2031 ds = mlx5_tx_mseg_build(txq, loc, wqe, vlan, inlen, 0, olx); 2032 wqe->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2033 txq->wqe_ci += (ds + 3) / 4; 2034 loc->wqe_free -= (ds + 3) / 4; 2035 return MLX5_TXCMP_CODE_MULTI; 2036 } 2037 2038 /** 2039 * Tx burst function for multi-segment packets. Supports all 2040 * types of Tx offloads, uses MLX5_OPCODE_SEND/TSO to build WQEs, 2041 * sends one packet per WQE. Function stops sending if it 2042 * encounters the single-segment packet. 2043 * 2044 * This routine is responsible for storing processed mbuf 2045 * into elts ring buffer and update elts_head. 2046 * 2047 * @param txq 2048 * Pointer to TX queue structure. 2049 * @param[in] pkts 2050 * Packets to transmit. 2051 * @param pkts_n 2052 * Number of packets in array. 2053 * @param loc 2054 * Pointer to burst routine local context. 2055 * @param olx 2056 * Configured Tx offloads mask. It is fully defined at 2057 * compile time and may be used for optimization. 2058 * 2059 * @return 2060 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2061 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2062 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2063 * MLX5_TXCMP_CODE_TSO - TSO single-segment packet encountered. 2064 * Local context variables updated. 2065 */ 2066 static __rte_always_inline enum mlx5_txcmp_code 2067 mlx5_tx_burst_mseg(struct mlx5_txq_data *__rte_restrict txq, 2068 struct rte_mbuf **__rte_restrict pkts, 2069 unsigned int pkts_n, 2070 struct mlx5_txq_local *__rte_restrict loc, 2071 unsigned int olx) 2072 { 2073 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2074 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2075 pkts += loc->pkts_sent + 1; 2076 pkts_n -= loc->pkts_sent; 2077 for (;;) { 2078 enum mlx5_txcmp_code ret; 2079 2080 MLX5_ASSERT(NB_SEGS(loc->mbuf) > 1); 2081 /* 2082 * Estimate the number of free elts quickly but conservatively. 2083 * Some segment may be fully inlined and freed, 2084 * ignore this here - precise estimation is costly. 2085 */ 2086 if (loc->elts_free < NB_SEGS(loc->mbuf)) 2087 return MLX5_TXCMP_CODE_EXIT; 2088 if (MLX5_TXOFF_CONFIG(TSO) && 2089 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 2090 /* Proceed with multi-segment TSO. */ 2091 ret = mlx5_tx_packet_multi_tso(txq, loc, olx); 2092 } else if (MLX5_TXOFF_CONFIG(INLINE)) { 2093 /* Proceed with multi-segment SEND with inlining. */ 2094 ret = mlx5_tx_packet_multi_inline(txq, loc, olx); 2095 } else { 2096 /* Proceed with multi-segment SEND w/o inlining. */ 2097 ret = mlx5_tx_packet_multi_send(txq, loc, olx); 2098 } 2099 if (ret == MLX5_TXCMP_CODE_EXIT) 2100 return MLX5_TXCMP_CODE_EXIT; 2101 if (ret == MLX5_TXCMP_CODE_ERROR) 2102 return MLX5_TXCMP_CODE_ERROR; 2103 /* WQE is built, go to the next packet. */ 2104 ++loc->pkts_sent; 2105 --pkts_n; 2106 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2107 return MLX5_TXCMP_CODE_EXIT; 2108 loc->mbuf = *pkts++; 2109 if (pkts_n > 1) 2110 rte_prefetch0(*pkts); 2111 if (likely(NB_SEGS(loc->mbuf) > 1)) 2112 continue; 2113 /* Here ends the series of multi-segment packets. */ 2114 if (MLX5_TXOFF_CONFIG(TSO) && 2115 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2116 return MLX5_TXCMP_CODE_TSO; 2117 return MLX5_TXCMP_CODE_SINGLE; 2118 } 2119 MLX5_ASSERT(false); 2120 } 2121 2122 /** 2123 * Tx burst function for single-segment packets with TSO. 2124 * Supports all types of Tx offloads, except multi-packets. 2125 * Uses MLX5_OPCODE_TSO to build WQEs, sends one packet per WQE. 2126 * Function stops sending if it encounters the multi-segment 2127 * packet or packet without TSO requested. 2128 * 2129 * The routine is responsible for storing processed mbuf into elts ring buffer 2130 * and update elts_head if inline offloads is requested due to possible early 2131 * freeing of the inlined mbufs (can not store pkts array in elts as a batch). 2132 * 2133 * @param txq 2134 * Pointer to TX queue structure. 2135 * @param[in] pkts 2136 * Packets to transmit. 2137 * @param pkts_n 2138 * Number of packets in array. 2139 * @param loc 2140 * Pointer to burst routine local context. 2141 * @param olx 2142 * Configured Tx offloads mask. It is fully defined at 2143 * compile time and may be used for optimization. 2144 * 2145 * @return 2146 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2147 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2148 * MLX5_TXCMP_CODE_SINGLE - single-segment packet encountered. 2149 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2150 * Local context variables updated. 2151 */ 2152 static __rte_always_inline enum mlx5_txcmp_code 2153 mlx5_tx_burst_tso(struct mlx5_txq_data *__rte_restrict txq, 2154 struct rte_mbuf **__rte_restrict pkts, 2155 unsigned int pkts_n, 2156 struct mlx5_txq_local *__rte_restrict loc, 2157 unsigned int olx) 2158 { 2159 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2160 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2161 pkts += loc->pkts_sent + 1; 2162 pkts_n -= loc->pkts_sent; 2163 for (;;) { 2164 struct mlx5_wqe_dseg *__rte_restrict dseg; 2165 struct mlx5_wqe *__rte_restrict wqe; 2166 unsigned int ds, dlen, hlen, ntcp, vlan = 0; 2167 uint8_t *dptr; 2168 2169 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2170 if (MLX5_TXOFF_CONFIG(TXPP)) { 2171 enum mlx5_txcmp_code wret; 2172 2173 /* Generate WAIT for scheduling if requested. */ 2174 wret = mlx5_tx_schedule_send(txq, loc, olx); 2175 if (wret == MLX5_TXCMP_CODE_EXIT) 2176 return MLX5_TXCMP_CODE_EXIT; 2177 if (wret == MLX5_TXCMP_CODE_ERROR) 2178 return MLX5_TXCMP_CODE_ERROR; 2179 } 2180 dlen = rte_pktmbuf_data_len(loc->mbuf); 2181 if (MLX5_TXOFF_CONFIG(VLAN) && 2182 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2183 vlan = sizeof(struct rte_vlan_hdr); 2184 } 2185 /* 2186 * First calculate the WQE size to check 2187 * whether we have enough space in ring buffer. 2188 */ 2189 hlen = loc->mbuf->l2_len + vlan + 2190 loc->mbuf->l3_len + loc->mbuf->l4_len; 2191 if (unlikely((!hlen || !loc->mbuf->tso_segsz))) 2192 return MLX5_TXCMP_CODE_ERROR; 2193 if (loc->mbuf->ol_flags & RTE_MBUF_F_TX_TUNNEL_MASK) 2194 hlen += loc->mbuf->outer_l2_len + 2195 loc->mbuf->outer_l3_len; 2196 /* Segment must contain all TSO headers. */ 2197 if (unlikely(hlen > MLX5_MAX_TSO_HEADER || 2198 hlen <= MLX5_ESEG_MIN_INLINE_SIZE || 2199 hlen > (dlen + vlan))) 2200 return MLX5_TXCMP_CODE_ERROR; 2201 /* 2202 * Check whether there are enough free WQEBBs: 2203 * - Control Segment 2204 * - Ethernet Segment 2205 * - First Segment of inlined Ethernet data 2206 * - ... data continued ... 2207 * - Finishing Data Segment of pointer type 2208 */ 2209 ds = 4 + (hlen - MLX5_ESEG_MIN_INLINE_SIZE + 2210 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 2211 if (loc->wqe_free < ((ds + 3) / 4)) 2212 return MLX5_TXCMP_CODE_EXIT; 2213 #ifdef MLX5_PMD_SOFT_COUNTERS 2214 /* Update sent data bytes/packets counters. */ 2215 ntcp = (dlen + vlan - hlen + 2216 loc->mbuf->tso_segsz - 1) / 2217 loc->mbuf->tso_segsz; 2218 /* 2219 * One will be added for mbuf itself at the end 2220 * of the mlx5_tx_burst from loc->pkts_sent field. 2221 */ 2222 --ntcp; 2223 txq->stats.opackets += ntcp; 2224 txq->stats.obytes += dlen + vlan + ntcp * hlen; 2225 #endif 2226 /* 2227 * Build the TSO WQE: 2228 * - Control Segment 2229 * - Ethernet Segment with hlen bytes inlined 2230 * - Data Segment of pointer type 2231 */ 2232 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2233 loc->wqe_last = wqe; 2234 mlx5_tx_cseg_init(txq, loc, wqe, ds, 2235 MLX5_OPCODE_TSO, olx); 2236 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, hlen, 1, olx); 2237 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + hlen - vlan; 2238 dlen -= hlen - vlan; 2239 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2240 /* 2241 * WQE is built, update the loop parameters 2242 * and go to the next packet. 2243 */ 2244 txq->wqe_ci += (ds + 3) / 4; 2245 loc->wqe_free -= (ds + 3) / 4; 2246 if (MLX5_TXOFF_CONFIG(INLINE)) 2247 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2248 --loc->elts_free; 2249 ++loc->pkts_sent; 2250 --pkts_n; 2251 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2252 return MLX5_TXCMP_CODE_EXIT; 2253 loc->mbuf = *pkts++; 2254 if (pkts_n > 1) 2255 rte_prefetch0(*pkts); 2256 if (MLX5_TXOFF_CONFIG(MULTI) && 2257 unlikely(NB_SEGS(loc->mbuf) > 1)) 2258 return MLX5_TXCMP_CODE_MULTI; 2259 if (likely(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG))) 2260 return MLX5_TXCMP_CODE_SINGLE; 2261 /* Continue with the next TSO packet. */ 2262 } 2263 MLX5_ASSERT(false); 2264 } 2265 2266 /** 2267 * Analyze the packet and select the best method to send. 2268 * 2269 * @param txq 2270 * Pointer to TX queue structure. 2271 * @param loc 2272 * Pointer to burst routine local context. 2273 * @param olx 2274 * Configured Tx offloads mask. It is fully defined at 2275 * compile time and may be used for optimization. 2276 * @param newp 2277 * The predefined flag whether do complete check for 2278 * multi-segment packets and TSO. 2279 * 2280 * @return 2281 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2282 * MLX5_TXCMP_CODE_TSO - TSO required, use TSO/LSO. 2283 * MLX5_TXCMP_CODE_SINGLE - single-segment packet, use SEND. 2284 * MLX5_TXCMP_CODE_EMPW - single-segment packet, use MPW. 2285 */ 2286 static __rte_always_inline enum mlx5_txcmp_code 2287 mlx5_tx_able_to_empw(struct mlx5_txq_data *__rte_restrict txq, 2288 struct mlx5_txq_local *__rte_restrict loc, 2289 unsigned int olx, 2290 bool newp) 2291 { 2292 /* Check for multi-segment packet. */ 2293 if (newp && 2294 MLX5_TXOFF_CONFIG(MULTI) && 2295 unlikely(NB_SEGS(loc->mbuf) > 1)) 2296 return MLX5_TXCMP_CODE_MULTI; 2297 /* Check for TSO packet. */ 2298 if (newp && 2299 MLX5_TXOFF_CONFIG(TSO) && 2300 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) 2301 return MLX5_TXCMP_CODE_TSO; 2302 /* Check if eMPW is enabled at all. */ 2303 if (!MLX5_TXOFF_CONFIG(EMPW)) 2304 return MLX5_TXCMP_CODE_SINGLE; 2305 /* Check if eMPW can be engaged. */ 2306 if (MLX5_TXOFF_CONFIG(VLAN) && 2307 unlikely(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) && 2308 (!MLX5_TXOFF_CONFIG(INLINE) || 2309 unlikely((rte_pktmbuf_data_len(loc->mbuf) + 2310 sizeof(struct rte_vlan_hdr)) > txq->inlen_empw))) { 2311 /* 2312 * eMPW does not support VLAN insertion offload, we have to 2313 * inline the entire packet but packet is too long for inlining. 2314 */ 2315 return MLX5_TXCMP_CODE_SINGLE; 2316 } 2317 return MLX5_TXCMP_CODE_EMPW; 2318 } 2319 2320 /** 2321 * Check the next packet attributes to match with the eMPW batch ones. 2322 * In addition, for legacy MPW the packet length is checked either. 2323 * 2324 * @param txq 2325 * Pointer to TX queue structure. 2326 * @param es 2327 * Pointer to Ethernet Segment of eMPW batch. 2328 * @param loc 2329 * Pointer to burst routine local context. 2330 * @param dlen 2331 * Length of previous packet in MPW descriptor. 2332 * @param olx 2333 * Configured Tx offloads mask. It is fully defined at 2334 * compile time and may be used for optimization. 2335 * 2336 * @return 2337 * true - packet match with eMPW batch attributes. 2338 * false - no match, eMPW should be restarted. 2339 */ 2340 static __rte_always_inline bool 2341 mlx5_tx_match_empw(struct mlx5_txq_data *__rte_restrict txq, 2342 struct mlx5_wqe_eseg *__rte_restrict es, 2343 struct mlx5_txq_local *__rte_restrict loc, 2344 uint32_t dlen, 2345 unsigned int olx) 2346 { 2347 uint8_t swp_flags = 0; 2348 2349 /* Compare the checksum flags, if any. */ 2350 if (MLX5_TXOFF_CONFIG(CSUM) && 2351 txq_ol_cksum_to_cs(loc->mbuf) != es->cs_flags) 2352 return false; 2353 /* Compare the Software Parser offsets and flags. */ 2354 if (MLX5_TXOFF_CONFIG(SWP) && 2355 (es->swp_offs != txq_mbuf_to_swp(loc, &swp_flags, olx) || 2356 es->swp_flags != swp_flags)) 2357 return false; 2358 /* Fill metadata field if needed. */ 2359 if (MLX5_TXOFF_CONFIG(METADATA) && 2360 es->metadata != (loc->mbuf->ol_flags & RTE_MBUF_DYNFLAG_TX_METADATA ? 2361 rte_cpu_to_be_32(*RTE_FLOW_DYNF_METADATA(loc->mbuf)) : 0)) 2362 return false; 2363 /* Legacy MPW can send packets with the same length only. */ 2364 if (MLX5_TXOFF_CONFIG(MPW) && 2365 dlen != rte_pktmbuf_data_len(loc->mbuf)) 2366 return false; 2367 /* There must be no VLAN packets in eMPW loop. */ 2368 if (MLX5_TXOFF_CONFIG(VLAN)) 2369 MLX5_ASSERT(!(loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN)); 2370 /* Check if the scheduling is requested. */ 2371 if (MLX5_TXOFF_CONFIG(TXPP) && 2372 loc->mbuf->ol_flags & txq->ts_mask) 2373 return false; 2374 return true; 2375 } 2376 2377 /** 2378 * Update send loop variables and WQE for eMPW loop without data inlining. 2379 * Number of Data Segments is equal to the number of sent packets. 2380 * 2381 * @param txq 2382 * Pointer to TX queue structure. 2383 * @param loc 2384 * Pointer to burst routine local context. 2385 * @param ds 2386 * Number of packets/Data Segments/Packets. 2387 * @param slen 2388 * Accumulated statistics, bytes sent. 2389 * @param olx 2390 * Configured Tx offloads mask. It is fully defined at 2391 * compile time and may be used for optimization. 2392 * 2393 * @return 2394 * true - packet match with eMPW batch attributes. 2395 * false - no match, eMPW should be restarted. 2396 */ 2397 static __rte_always_inline void 2398 mlx5_tx_sdone_empw(struct mlx5_txq_data *__rte_restrict txq, 2399 struct mlx5_txq_local *__rte_restrict loc, 2400 unsigned int ds, 2401 unsigned int slen, 2402 unsigned int olx __rte_unused) 2403 { 2404 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2405 #ifdef MLX5_PMD_SOFT_COUNTERS 2406 /* Update sent data bytes counter. */ 2407 txq->stats.obytes += slen; 2408 #else 2409 (void)slen; 2410 #endif 2411 loc->elts_free -= ds; 2412 loc->pkts_sent += ds; 2413 ds += 2; 2414 loc->wqe_last->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | ds); 2415 txq->wqe_ci += (ds + 3) / 4; 2416 loc->wqe_free -= (ds + 3) / 4; 2417 } 2418 2419 /** 2420 * Update send loop variables and WQE for eMPW loop with data inlining. 2421 * Gets the size of pushed descriptors and data to the WQE. 2422 * 2423 * @param txq 2424 * Pointer to TX queue structure. 2425 * @param loc 2426 * Pointer to burst routine local context. 2427 * @param len 2428 * Total size of descriptor/data in bytes. 2429 * @param slen 2430 * Accumulated statistics, data bytes sent. 2431 * @param wqem 2432 * The base WQE for the eMPW/MPW descriptor. 2433 * @param olx 2434 * Configured Tx offloads mask. It is fully defined at 2435 * compile time and may be used for optimization. 2436 * 2437 * @return 2438 * true - packet match with eMPW batch attributes. 2439 * false - no match, eMPW should be restarted. 2440 */ 2441 static __rte_always_inline void 2442 mlx5_tx_idone_empw(struct mlx5_txq_data *__rte_restrict txq, 2443 struct mlx5_txq_local *__rte_restrict loc, 2444 unsigned int len, 2445 unsigned int slen, 2446 struct mlx5_wqe *__rte_restrict wqem, 2447 unsigned int olx __rte_unused) 2448 { 2449 struct mlx5_wqe_dseg *dseg = &wqem->dseg[0]; 2450 2451 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2452 #ifdef MLX5_PMD_SOFT_COUNTERS 2453 /* Update sent data bytes counter. */ 2454 txq->stats.obytes += slen; 2455 #else 2456 (void)slen; 2457 #endif 2458 if (MLX5_TXOFF_CONFIG(MPW) && dseg->bcount == RTE_BE32(0)) { 2459 /* 2460 * If the legacy MPW session contains the inline packets 2461 * we should set the only inline data segment length 2462 * and align the total length to the segment size. 2463 */ 2464 MLX5_ASSERT(len > sizeof(dseg->bcount)); 2465 dseg->bcount = rte_cpu_to_be_32((len - sizeof(dseg->bcount)) | 2466 MLX5_ETH_WQE_DATA_INLINE); 2467 len = (len + MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE + 2; 2468 } else { 2469 /* 2470 * The session is not legacy MPW or contains the 2471 * data buffer pointer segments. 2472 */ 2473 MLX5_ASSERT((len % MLX5_WSEG_SIZE) == 0); 2474 len = len / MLX5_WSEG_SIZE + 2; 2475 } 2476 wqem->cseg.sq_ds = rte_cpu_to_be_32(txq->qp_num_8s | len); 2477 txq->wqe_ci += (len + 3) / 4; 2478 loc->wqe_free -= (len + 3) / 4; 2479 loc->wqe_last = wqem; 2480 } 2481 2482 /** 2483 * The set of Tx burst functions for single-segment packets without TSO 2484 * and with Multi-Packet Writing feature support. 2485 * Supports all types of Tx offloads, except multi-packets and TSO. 2486 * 2487 * Uses MLX5_OPCODE_EMPW to build WQEs if possible and sends as many packet 2488 * per WQE as it can. If eMPW is not configured or packet can not be sent with 2489 * eMPW (VLAN insertion) the ordinary SEND opcode is used and only one packet 2490 * placed in WQE. 2491 * 2492 * Functions stop sending if it encounters the multi-segment packet or packet 2493 * with TSO requested. 2494 * 2495 * The routines are responsible for storing processed mbuf into elts ring buffer 2496 * and update elts_head if inlining offload is requested. Otherwise the copying 2497 * mbufs to elts can be postponed and completed at the end of burst routine. 2498 * 2499 * @param txq 2500 * Pointer to TX queue structure. 2501 * @param[in] pkts 2502 * Packets to transmit. 2503 * @param pkts_n 2504 * Number of packets in array. 2505 * @param loc 2506 * Pointer to burst routine local context. 2507 * @param olx 2508 * Configured Tx offloads mask. It is fully defined at 2509 * compile time and may be used for optimization. 2510 * 2511 * @return 2512 * MLX5_TXCMP_CODE_EXIT - sending is done or impossible. 2513 * MLX5_TXCMP_CODE_ERROR - some unrecoverable error occurred. 2514 * MLX5_TXCMP_CODE_MULTI - multi-segment packet encountered. 2515 * MLX5_TXCMP_CODE_TSO - TSO packet encountered. 2516 * MLX5_TXCMP_CODE_SINGLE - used inside functions set. 2517 * MLX5_TXCMP_CODE_EMPW - used inside functions set. 2518 * 2519 * Local context variables updated. 2520 * 2521 * 2522 * The routine sends packets with MLX5_OPCODE_EMPW 2523 * without inlining, this is dedicated optimized branch. 2524 * No VLAN insertion is supported. 2525 */ 2526 static __rte_always_inline enum mlx5_txcmp_code 2527 mlx5_tx_burst_empw_simple(struct mlx5_txq_data *__rte_restrict txq, 2528 struct rte_mbuf **__rte_restrict pkts, 2529 unsigned int pkts_n, 2530 struct mlx5_txq_local *__rte_restrict loc, 2531 unsigned int olx) 2532 { 2533 /* 2534 * Subroutine is the part of mlx5_tx_burst_single() and sends 2535 * single-segment packet with eMPW opcode without data inlining. 2536 */ 2537 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 2538 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2539 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2540 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2541 pkts += loc->pkts_sent + 1; 2542 pkts_n -= loc->pkts_sent; 2543 for (;;) { 2544 struct mlx5_wqe_dseg *__rte_restrict dseg; 2545 struct mlx5_wqe_eseg *__rte_restrict eseg; 2546 enum mlx5_txcmp_code ret; 2547 unsigned int part, loop; 2548 unsigned int slen = 0; 2549 2550 next_empw: 2551 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2552 if (MLX5_TXOFF_CONFIG(TXPP)) { 2553 enum mlx5_txcmp_code wret; 2554 2555 /* Generate WAIT for scheduling if requested. */ 2556 wret = mlx5_tx_schedule_send(txq, loc, olx); 2557 if (wret == MLX5_TXCMP_CODE_EXIT) 2558 return MLX5_TXCMP_CODE_EXIT; 2559 if (wret == MLX5_TXCMP_CODE_ERROR) 2560 return MLX5_TXCMP_CODE_ERROR; 2561 } 2562 part = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2563 MLX5_MPW_MAX_PACKETS : 2564 MLX5_EMPW_MAX_PACKETS); 2565 if (unlikely(loc->elts_free < part)) { 2566 /* We have no enough elts to save all mbufs. */ 2567 if (unlikely(loc->elts_free < MLX5_EMPW_MIN_PACKETS)) 2568 return MLX5_TXCMP_CODE_EXIT; 2569 /* But we still able to send at least minimal eMPW. */ 2570 part = loc->elts_free; 2571 } 2572 /* Check whether we have enough WQEs */ 2573 if (unlikely(loc->wqe_free < ((2 + part + 3) / 4))) { 2574 if (unlikely(loc->wqe_free < 2575 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2576 return MLX5_TXCMP_CODE_EXIT; 2577 part = (loc->wqe_free * 4) - 2; 2578 } 2579 if (likely(part > 1)) 2580 rte_prefetch0(*pkts); 2581 loc->wqe_last = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2582 /* 2583 * Build eMPW title WQEBB: 2584 * - Control Segment, eMPW opcode 2585 * - Ethernet Segment, no inline 2586 */ 2587 mlx5_tx_cseg_init(txq, loc, loc->wqe_last, part + 2, 2588 MLX5_OPCODE_ENHANCED_MPSW, olx); 2589 mlx5_tx_eseg_none(txq, loc, loc->wqe_last, 2590 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2591 eseg = &loc->wqe_last->eseg; 2592 dseg = &loc->wqe_last->dseg[0]; 2593 loop = part; 2594 /* Store the packet length for legacy MPW. */ 2595 if (MLX5_TXOFF_CONFIG(MPW)) 2596 eseg->mss = rte_cpu_to_be_16 2597 (rte_pktmbuf_data_len(loc->mbuf)); 2598 for (;;) { 2599 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2600 #ifdef MLX5_PMD_SOFT_COUNTERS 2601 /* Update sent data bytes counter. */ 2602 slen += dlen; 2603 #endif 2604 mlx5_tx_dseg_ptr 2605 (txq, loc, dseg, 2606 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 2607 dlen, olx); 2608 if (unlikely(--loop == 0)) 2609 break; 2610 loc->mbuf = *pkts++; 2611 if (likely(loop > 1)) 2612 rte_prefetch0(*pkts); 2613 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2614 /* 2615 * Unroll the completion code to avoid 2616 * returning variable value - it results in 2617 * unoptimized sequent checking in caller. 2618 */ 2619 if (ret == MLX5_TXCMP_CODE_MULTI) { 2620 part -= loop; 2621 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2622 if (unlikely(!loc->elts_free || 2623 !loc->wqe_free)) 2624 return MLX5_TXCMP_CODE_EXIT; 2625 return MLX5_TXCMP_CODE_MULTI; 2626 } 2627 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2628 if (ret == MLX5_TXCMP_CODE_TSO) { 2629 part -= loop; 2630 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2631 if (unlikely(!loc->elts_free || 2632 !loc->wqe_free)) 2633 return MLX5_TXCMP_CODE_EXIT; 2634 return MLX5_TXCMP_CODE_TSO; 2635 } 2636 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2637 part -= loop; 2638 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2639 if (unlikely(!loc->elts_free || 2640 !loc->wqe_free)) 2641 return MLX5_TXCMP_CODE_EXIT; 2642 return MLX5_TXCMP_CODE_SINGLE; 2643 } 2644 if (ret != MLX5_TXCMP_CODE_EMPW) { 2645 MLX5_ASSERT(false); 2646 part -= loop; 2647 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2648 return MLX5_TXCMP_CODE_ERROR; 2649 } 2650 /* 2651 * Check whether packet parameters coincide 2652 * within assumed eMPW batch: 2653 * - check sum settings 2654 * - metadata value 2655 * - software parser settings 2656 * - packets length (legacy MPW only) 2657 * - scheduling is not required 2658 */ 2659 if (!mlx5_tx_match_empw(txq, eseg, loc, dlen, olx)) { 2660 MLX5_ASSERT(loop); 2661 part -= loop; 2662 mlx5_tx_sdone_empw(txq, loc, part, slen, olx); 2663 if (unlikely(!loc->elts_free || 2664 !loc->wqe_free)) 2665 return MLX5_TXCMP_CODE_EXIT; 2666 pkts_n -= part; 2667 goto next_empw; 2668 } 2669 /* Packet attributes match, continue the same eMPW. */ 2670 ++dseg; 2671 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2672 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2673 } 2674 /* eMPW is built successfully, update loop parameters. */ 2675 MLX5_ASSERT(!loop); 2676 MLX5_ASSERT(pkts_n >= part); 2677 #ifdef MLX5_PMD_SOFT_COUNTERS 2678 /* Update sent data bytes counter. */ 2679 txq->stats.obytes += slen; 2680 #endif 2681 loc->elts_free -= part; 2682 loc->pkts_sent += part; 2683 txq->wqe_ci += (2 + part + 3) / 4; 2684 loc->wqe_free -= (2 + part + 3) / 4; 2685 pkts_n -= part; 2686 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 2687 return MLX5_TXCMP_CODE_EXIT; 2688 loc->mbuf = *pkts++; 2689 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2690 if (unlikely(ret != MLX5_TXCMP_CODE_EMPW)) 2691 return ret; 2692 /* Continue sending eMPW batches. */ 2693 } 2694 MLX5_ASSERT(false); 2695 } 2696 2697 /** 2698 * The routine sends packets with MLX5_OPCODE_EMPW 2699 * with inlining, optionally supports VLAN insertion. 2700 */ 2701 static __rte_always_inline enum mlx5_txcmp_code 2702 mlx5_tx_burst_empw_inline(struct mlx5_txq_data *__rte_restrict txq, 2703 struct rte_mbuf **__rte_restrict pkts, 2704 unsigned int pkts_n, 2705 struct mlx5_txq_local *__rte_restrict loc, 2706 unsigned int olx) 2707 { 2708 /* 2709 * Subroutine is the part of mlx5_tx_burst_single() and sends 2710 * single-segment packet with eMPW opcode with data inlining. 2711 */ 2712 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 2713 MLX5_ASSERT(MLX5_TXOFF_CONFIG(EMPW)); 2714 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 2715 MLX5_ASSERT(pkts_n > loc->pkts_sent); 2716 pkts += loc->pkts_sent + 1; 2717 pkts_n -= loc->pkts_sent; 2718 for (;;) { 2719 struct mlx5_wqe_dseg *__rte_restrict dseg; 2720 struct mlx5_wqe *__rte_restrict wqem; 2721 enum mlx5_txcmp_code ret; 2722 unsigned int room, part, nlim; 2723 unsigned int slen = 0; 2724 2725 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2726 if (MLX5_TXOFF_CONFIG(TXPP)) { 2727 enum mlx5_txcmp_code wret; 2728 2729 /* Generate WAIT for scheduling if requested. */ 2730 wret = mlx5_tx_schedule_send(txq, loc, olx); 2731 if (wret == MLX5_TXCMP_CODE_EXIT) 2732 return MLX5_TXCMP_CODE_EXIT; 2733 if (wret == MLX5_TXCMP_CODE_ERROR) 2734 return MLX5_TXCMP_CODE_ERROR; 2735 } 2736 /* 2737 * Limits the amount of packets in one WQE 2738 * to improve CQE latency generation. 2739 */ 2740 nlim = RTE_MIN(pkts_n, MLX5_TXOFF_CONFIG(MPW) ? 2741 MLX5_MPW_INLINE_MAX_PACKETS : 2742 MLX5_EMPW_MAX_PACKETS); 2743 /* Check whether we have minimal amount WQEs */ 2744 if (unlikely(loc->wqe_free < 2745 ((2 + MLX5_EMPW_MIN_PACKETS + 3) / 4))) 2746 return MLX5_TXCMP_CODE_EXIT; 2747 if (likely(pkts_n > 1)) 2748 rte_prefetch0(*pkts); 2749 wqem = txq->wqes + (txq->wqe_ci & txq->wqe_m); 2750 /* 2751 * Build eMPW title WQEBB: 2752 * - Control Segment, eMPW opcode, zero DS 2753 * - Ethernet Segment, no inline 2754 */ 2755 mlx5_tx_cseg_init(txq, loc, wqem, 0, 2756 MLX5_OPCODE_ENHANCED_MPSW, olx); 2757 mlx5_tx_eseg_none(txq, loc, wqem, 2758 olx & ~MLX5_TXOFF_CONFIG_VLAN); 2759 dseg = &wqem->dseg[0]; 2760 /* Store the packet length for legacy MPW. */ 2761 if (MLX5_TXOFF_CONFIG(MPW)) 2762 wqem->eseg.mss = rte_cpu_to_be_16 2763 (rte_pktmbuf_data_len(loc->mbuf)); 2764 room = RTE_MIN(MLX5_WQE_SIZE_MAX / MLX5_WQE_SIZE, 2765 loc->wqe_free) * MLX5_WQE_SIZE - 2766 MLX5_WQE_CSEG_SIZE - 2767 MLX5_WQE_ESEG_SIZE; 2768 /* Limit the room for legacy MPW sessions for performance. */ 2769 if (MLX5_TXOFF_CONFIG(MPW)) 2770 room = RTE_MIN(room, 2771 RTE_MAX(txq->inlen_empw + 2772 sizeof(dseg->bcount) + 2773 (MLX5_TXOFF_CONFIG(VLAN) ? 2774 sizeof(struct rte_vlan_hdr) : 0), 2775 MLX5_MPW_INLINE_MAX_PACKETS * 2776 MLX5_WQE_DSEG_SIZE)); 2777 /* Build WQE till we have space, packets and resources. */ 2778 part = room; 2779 for (;;) { 2780 uint32_t dlen = rte_pktmbuf_data_len(loc->mbuf); 2781 uint8_t *dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *); 2782 unsigned int tlen; 2783 2784 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2785 MLX5_ASSERT((room % MLX5_WQE_DSEG_SIZE) == 0); 2786 MLX5_ASSERT((uintptr_t)dseg < (uintptr_t)txq->wqes_end); 2787 /* 2788 * Some Tx offloads may cause an error if packet is not 2789 * long enough, check against assumed minimal length. 2790 */ 2791 if (unlikely(dlen <= MLX5_ESEG_MIN_INLINE_SIZE)) { 2792 part -= room; 2793 if (unlikely(!part)) 2794 return MLX5_TXCMP_CODE_ERROR; 2795 /* 2796 * We have some successfully built 2797 * packet Data Segments to send. 2798 */ 2799 mlx5_tx_idone_empw(txq, loc, part, 2800 slen, wqem, olx); 2801 return MLX5_TXCMP_CODE_ERROR; 2802 } 2803 /* Inline or not inline - that's the Question. */ 2804 if (dlen > txq->inlen_empw || 2805 loc->mbuf->ol_flags & RTE_MBUF_F_TX_DYNF_NOINLINE) 2806 goto pointer_empw; 2807 if (MLX5_TXOFF_CONFIG(MPW)) { 2808 if (dlen > txq->inlen_send) 2809 goto pointer_empw; 2810 tlen = dlen; 2811 if (part == room) { 2812 /* Open new inline MPW session. */ 2813 tlen += sizeof(dseg->bcount); 2814 dseg->bcount = RTE_BE32(0); 2815 dseg = RTE_PTR_ADD 2816 (dseg, sizeof(dseg->bcount)); 2817 } else { 2818 /* 2819 * No pointer and inline descriptor 2820 * intermix for legacy MPW sessions. 2821 */ 2822 if (wqem->dseg[0].bcount) 2823 break; 2824 } 2825 } else { 2826 tlen = sizeof(dseg->bcount) + dlen; 2827 } 2828 /* Inline entire packet, optional VLAN insertion. */ 2829 if (MLX5_TXOFF_CONFIG(VLAN) && 2830 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 2831 /* 2832 * The packet length must be checked in 2833 * mlx5_tx_able_to_empw() and packet 2834 * fits into inline length guaranteed. 2835 */ 2836 MLX5_ASSERT((dlen + 2837 sizeof(struct rte_vlan_hdr)) <= 2838 txq->inlen_empw); 2839 tlen += sizeof(struct rte_vlan_hdr); 2840 if (room < tlen) 2841 break; 2842 dseg = mlx5_tx_dseg_vlan(txq, loc, dseg, 2843 dptr, dlen, olx); 2844 #ifdef MLX5_PMD_SOFT_COUNTERS 2845 /* Update sent data bytes counter. */ 2846 slen += sizeof(struct rte_vlan_hdr); 2847 #endif 2848 } else { 2849 if (room < tlen) 2850 break; 2851 dseg = mlx5_tx_dseg_empw(txq, loc, dseg, 2852 dptr, dlen, olx); 2853 } 2854 if (!MLX5_TXOFF_CONFIG(MPW)) 2855 tlen = RTE_ALIGN(tlen, MLX5_WSEG_SIZE); 2856 MLX5_ASSERT(room >= tlen); 2857 room -= tlen; 2858 /* 2859 * Packet data are completely inline, 2860 * we can try to free the packet. 2861 */ 2862 if (likely(loc->pkts_sent == loc->mbuf_free)) { 2863 /* 2864 * All the packets from the burst beginning 2865 * are inline, we can free mbufs directly 2866 * from the origin array on tx_burst exit(). 2867 */ 2868 loc->mbuf_free++; 2869 goto next_mbuf; 2870 } 2871 /* 2872 * In order no to call rte_pktmbuf_free_seg() here, 2873 * in the most inner loop (that might be very 2874 * expensive) we just save the mbuf in elts. 2875 */ 2876 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2877 loc->elts_free--; 2878 goto next_mbuf; 2879 pointer_empw: 2880 /* 2881 * No pointer and inline descriptor 2882 * intermix for legacy MPW sessions. 2883 */ 2884 if (MLX5_TXOFF_CONFIG(MPW) && 2885 part != room && 2886 wqem->dseg[0].bcount == RTE_BE32(0)) 2887 break; 2888 /* 2889 * Not inlinable VLAN packets are 2890 * proceeded outside of this routine. 2891 */ 2892 MLX5_ASSERT(room >= MLX5_WQE_DSEG_SIZE); 2893 if (MLX5_TXOFF_CONFIG(VLAN)) 2894 MLX5_ASSERT(!(loc->mbuf->ol_flags & 2895 RTE_MBUF_F_TX_VLAN)); 2896 mlx5_tx_dseg_ptr(txq, loc, dseg, dptr, dlen, olx); 2897 /* We have to store mbuf in elts.*/ 2898 txq->elts[txq->elts_head++ & txq->elts_m] = loc->mbuf; 2899 loc->elts_free--; 2900 room -= MLX5_WQE_DSEG_SIZE; 2901 /* Ring buffer wraparound is checked at the loop end.*/ 2902 ++dseg; 2903 next_mbuf: 2904 #ifdef MLX5_PMD_SOFT_COUNTERS 2905 /* Update sent data bytes counter. */ 2906 slen += dlen; 2907 #endif 2908 loc->pkts_sent++; 2909 pkts_n--; 2910 if (unlikely(!pkts_n || !loc->elts_free)) { 2911 /* 2912 * We have no resources/packets to 2913 * continue build descriptors. 2914 */ 2915 part -= room; 2916 mlx5_tx_idone_empw(txq, loc, part, 2917 slen, wqem, olx); 2918 return MLX5_TXCMP_CODE_EXIT; 2919 } 2920 loc->mbuf = *pkts++; 2921 if (likely(pkts_n > 1)) 2922 rte_prefetch0(*pkts); 2923 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 2924 /* 2925 * Unroll the completion code to avoid 2926 * returning variable value - it results in 2927 * unoptimized sequent checking in caller. 2928 */ 2929 if (ret == MLX5_TXCMP_CODE_MULTI) { 2930 part -= room; 2931 mlx5_tx_idone_empw(txq, loc, part, 2932 slen, wqem, olx); 2933 if (unlikely(!loc->elts_free || 2934 !loc->wqe_free)) 2935 return MLX5_TXCMP_CODE_EXIT; 2936 return MLX5_TXCMP_CODE_MULTI; 2937 } 2938 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 2939 if (ret == MLX5_TXCMP_CODE_TSO) { 2940 part -= room; 2941 mlx5_tx_idone_empw(txq, loc, part, 2942 slen, wqem, olx); 2943 if (unlikely(!loc->elts_free || 2944 !loc->wqe_free)) 2945 return MLX5_TXCMP_CODE_EXIT; 2946 return MLX5_TXCMP_CODE_TSO; 2947 } 2948 if (ret == MLX5_TXCMP_CODE_SINGLE) { 2949 part -= room; 2950 mlx5_tx_idone_empw(txq, loc, part, 2951 slen, wqem, olx); 2952 if (unlikely(!loc->elts_free || 2953 !loc->wqe_free)) 2954 return MLX5_TXCMP_CODE_EXIT; 2955 return MLX5_TXCMP_CODE_SINGLE; 2956 } 2957 if (ret != MLX5_TXCMP_CODE_EMPW) { 2958 MLX5_ASSERT(false); 2959 part -= room; 2960 mlx5_tx_idone_empw(txq, loc, part, 2961 slen, wqem, olx); 2962 return MLX5_TXCMP_CODE_ERROR; 2963 } 2964 /* Check if we have minimal room left. */ 2965 nlim--; 2966 if (unlikely(!nlim || room < MLX5_WQE_DSEG_SIZE)) 2967 break; 2968 /* 2969 * Check whether packet parameters coincide 2970 * within assumed eMPW batch: 2971 * - check sum settings 2972 * - metadata value 2973 * - software parser settings 2974 * - packets length (legacy MPW only) 2975 * - scheduling is not required 2976 */ 2977 if (!mlx5_tx_match_empw(txq, &wqem->eseg, 2978 loc, dlen, olx)) 2979 break; 2980 /* Packet attributes match, continue the same eMPW. */ 2981 if ((uintptr_t)dseg >= (uintptr_t)txq->wqes_end) 2982 dseg = (struct mlx5_wqe_dseg *)txq->wqes; 2983 } 2984 /* 2985 * We get here to close an existing eMPW 2986 * session and start the new one. 2987 */ 2988 MLX5_ASSERT(pkts_n); 2989 part -= room; 2990 if (unlikely(!part)) 2991 return MLX5_TXCMP_CODE_EXIT; 2992 mlx5_tx_idone_empw(txq, loc, part, slen, wqem, olx); 2993 if (unlikely(!loc->elts_free || 2994 !loc->wqe_free)) 2995 return MLX5_TXCMP_CODE_EXIT; 2996 /* Continue the loop with new eMPW session. */ 2997 } 2998 MLX5_ASSERT(false); 2999 } 3000 3001 /** 3002 * The routine sends packets with ordinary MLX5_OPCODE_SEND. 3003 * Data inlining and VLAN insertion are supported. 3004 */ 3005 static __rte_always_inline enum mlx5_txcmp_code 3006 mlx5_tx_burst_single_send(struct mlx5_txq_data *__rte_restrict txq, 3007 struct rte_mbuf **__rte_restrict pkts, 3008 unsigned int pkts_n, 3009 struct mlx5_txq_local *__rte_restrict loc, 3010 unsigned int olx) 3011 { 3012 /* 3013 * Subroutine is the part of mlx5_tx_burst_single() 3014 * and sends single-segment packet with SEND opcode. 3015 */ 3016 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3017 MLX5_ASSERT(pkts_n > loc->pkts_sent); 3018 pkts += loc->pkts_sent + 1; 3019 pkts_n -= loc->pkts_sent; 3020 for (;;) { 3021 struct mlx5_wqe *__rte_restrict wqe; 3022 enum mlx5_txcmp_code ret; 3023 3024 MLX5_ASSERT(NB_SEGS(loc->mbuf) == 1); 3025 if (MLX5_TXOFF_CONFIG(TXPP)) { 3026 enum mlx5_txcmp_code wret; 3027 3028 /* Generate WAIT for scheduling if requested. */ 3029 wret = mlx5_tx_schedule_send(txq, loc, olx); 3030 if (wret == MLX5_TXCMP_CODE_EXIT) 3031 return MLX5_TXCMP_CODE_EXIT; 3032 if (wret == MLX5_TXCMP_CODE_ERROR) 3033 return MLX5_TXCMP_CODE_ERROR; 3034 } 3035 if (MLX5_TXOFF_CONFIG(INLINE)) { 3036 unsigned int inlen, vlan = 0; 3037 3038 inlen = rte_pktmbuf_data_len(loc->mbuf); 3039 if (MLX5_TXOFF_CONFIG(VLAN) && 3040 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) { 3041 vlan = sizeof(struct rte_vlan_hdr); 3042 inlen += vlan; 3043 } 3044 /* 3045 * If inlining is enabled at configuration time 3046 * the limit must be not less than minimal size. 3047 * Otherwise we would do extra check for data 3048 * size to avoid crashes due to length overflow. 3049 */ 3050 MLX5_ASSERT(txq->inlen_send >= 3051 MLX5_ESEG_MIN_INLINE_SIZE); 3052 if (inlen <= txq->inlen_send) { 3053 unsigned int seg_n, wqe_n; 3054 3055 rte_prefetch0(rte_pktmbuf_mtod 3056 (loc->mbuf, uint8_t *)); 3057 /* Check against minimal length. */ 3058 if (inlen <= MLX5_ESEG_MIN_INLINE_SIZE) 3059 return MLX5_TXCMP_CODE_ERROR; 3060 if (loc->mbuf->ol_flags & 3061 RTE_MBUF_F_TX_DYNF_NOINLINE) { 3062 /* 3063 * The hint flag not to inline packet 3064 * data is set. Check whether we can 3065 * follow the hint. 3066 */ 3067 if ((!MLX5_TXOFF_CONFIG(EMPW) && 3068 txq->inlen_mode) || 3069 (MLX5_TXOFF_CONFIG(MPW) && 3070 txq->inlen_mode)) { 3071 if (inlen <= txq->inlen_send) 3072 goto single_inline; 3073 /* 3074 * The hardware requires the 3075 * minimal inline data header. 3076 */ 3077 goto single_min_inline; 3078 } 3079 if (MLX5_TXOFF_CONFIG(VLAN) && 3080 vlan && !txq->vlan_en) { 3081 /* 3082 * We must insert VLAN tag 3083 * by software means. 3084 */ 3085 goto single_part_inline; 3086 } 3087 goto single_no_inline; 3088 } 3089 single_inline: 3090 /* 3091 * Completely inlined packet data WQE: 3092 * - Control Segment, SEND opcode 3093 * - Ethernet Segment, no VLAN insertion 3094 * - Data inlined, VLAN optionally inserted 3095 * - Alignment to MLX5_WSEG_SIZE 3096 * Have to estimate amount of WQEBBs 3097 */ 3098 seg_n = (inlen + 3 * MLX5_WSEG_SIZE - 3099 MLX5_ESEG_MIN_INLINE_SIZE + 3100 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3101 /* Check if there are enough WQEBBs. */ 3102 wqe_n = (seg_n + 3) / 4; 3103 if (wqe_n > loc->wqe_free) 3104 return MLX5_TXCMP_CODE_EXIT; 3105 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3106 loc->wqe_last = wqe; 3107 mlx5_tx_cseg_init(txq, loc, wqe, seg_n, 3108 MLX5_OPCODE_SEND, olx); 3109 mlx5_tx_eseg_data(txq, loc, wqe, 3110 vlan, inlen, 0, olx); 3111 txq->wqe_ci += wqe_n; 3112 loc->wqe_free -= wqe_n; 3113 /* 3114 * Packet data are completely inlined, 3115 * free the packet immediately. 3116 */ 3117 rte_pktmbuf_free_seg(loc->mbuf); 3118 } else if ((!MLX5_TXOFF_CONFIG(EMPW) || 3119 MLX5_TXOFF_CONFIG(MPW)) && 3120 txq->inlen_mode) { 3121 /* 3122 * If minimal inlining is requested the eMPW 3123 * feature should be disabled due to data is 3124 * inlined into Ethernet Segment, which can 3125 * not contain inlined data for eMPW due to 3126 * segment shared for all packets. 3127 */ 3128 struct mlx5_wqe_dseg *__rte_restrict dseg; 3129 unsigned int ds; 3130 uint8_t *dptr; 3131 3132 /* 3133 * The inline-mode settings require 3134 * to inline the specified amount of 3135 * data bytes to the Ethernet Segment. 3136 * We should check the free space in 3137 * WQE ring buffer to inline partially. 3138 */ 3139 single_min_inline: 3140 MLX5_ASSERT(txq->inlen_send >= txq->inlen_mode); 3141 MLX5_ASSERT(inlen > txq->inlen_mode); 3142 MLX5_ASSERT(txq->inlen_mode >= 3143 MLX5_ESEG_MIN_INLINE_SIZE); 3144 /* 3145 * Check whether there are enough free WQEBBs: 3146 * - Control Segment 3147 * - Ethernet Segment 3148 * - First Segment of inlined Ethernet data 3149 * - ... data continued ... 3150 * - Finishing Data Segment of pointer type 3151 */ 3152 ds = (MLX5_WQE_CSEG_SIZE + 3153 MLX5_WQE_ESEG_SIZE + 3154 MLX5_WQE_DSEG_SIZE + 3155 txq->inlen_mode - 3156 MLX5_ESEG_MIN_INLINE_SIZE + 3157 MLX5_WQE_DSEG_SIZE + 3158 MLX5_WSEG_SIZE - 1) / MLX5_WSEG_SIZE; 3159 if (loc->wqe_free < ((ds + 3) / 4)) 3160 return MLX5_TXCMP_CODE_EXIT; 3161 /* 3162 * Build the ordinary SEND WQE: 3163 * - Control Segment 3164 * - Ethernet Segment, inline inlen_mode bytes 3165 * - Data Segment of pointer type 3166 */ 3167 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3168 loc->wqe_last = wqe; 3169 mlx5_tx_cseg_init(txq, loc, wqe, ds, 3170 MLX5_OPCODE_SEND, olx); 3171 dseg = mlx5_tx_eseg_data(txq, loc, wqe, vlan, 3172 txq->inlen_mode, 3173 0, olx); 3174 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3175 txq->inlen_mode - vlan; 3176 inlen -= txq->inlen_mode; 3177 mlx5_tx_dseg_ptr(txq, loc, dseg, 3178 dptr, inlen, olx); 3179 /* 3180 * WQE is built, update the loop parameters 3181 * and got to the next packet. 3182 */ 3183 txq->wqe_ci += (ds + 3) / 4; 3184 loc->wqe_free -= (ds + 3) / 4; 3185 /* We have to store mbuf in elts.*/ 3186 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3187 txq->elts[txq->elts_head++ & txq->elts_m] = 3188 loc->mbuf; 3189 --loc->elts_free; 3190 } else { 3191 uint8_t *dptr; 3192 unsigned int dlen; 3193 3194 /* 3195 * Partially inlined packet data WQE, we have 3196 * some space in title WQEBB, we can fill it 3197 * with some packet data. It takes one WQEBB, 3198 * it is available, no extra space check: 3199 * - Control Segment, SEND opcode 3200 * - Ethernet Segment, no VLAN insertion 3201 * - MLX5_ESEG_MIN_INLINE_SIZE bytes of Data 3202 * - Data Segment, pointer type 3203 * 3204 * We also get here if VLAN insertion is not 3205 * supported by HW, the inline is enabled. 3206 */ 3207 single_part_inline: 3208 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3209 loc->wqe_last = wqe; 3210 mlx5_tx_cseg_init(txq, loc, wqe, 4, 3211 MLX5_OPCODE_SEND, olx); 3212 mlx5_tx_eseg_dmin(txq, loc, wqe, vlan, olx); 3213 dptr = rte_pktmbuf_mtod(loc->mbuf, uint8_t *) + 3214 MLX5_ESEG_MIN_INLINE_SIZE - vlan; 3215 /* 3216 * The length check is performed above, by 3217 * comparing with txq->inlen_send. We should 3218 * not get overflow here. 3219 */ 3220 MLX5_ASSERT(inlen > MLX5_ESEG_MIN_INLINE_SIZE); 3221 dlen = inlen - MLX5_ESEG_MIN_INLINE_SIZE; 3222 mlx5_tx_dseg_ptr(txq, loc, &wqe->dseg[1], 3223 dptr, dlen, olx); 3224 ++txq->wqe_ci; 3225 --loc->wqe_free; 3226 /* We have to store mbuf in elts.*/ 3227 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE)); 3228 txq->elts[txq->elts_head++ & txq->elts_m] = 3229 loc->mbuf; 3230 --loc->elts_free; 3231 } 3232 #ifdef MLX5_PMD_SOFT_COUNTERS 3233 /* Update sent data bytes counter. */ 3234 txq->stats.obytes += vlan + 3235 rte_pktmbuf_data_len(loc->mbuf); 3236 #endif 3237 } else { 3238 /* 3239 * No inline at all, it means the CPU cycles saving 3240 * is prioritized at configuration, we should not 3241 * copy any packet data to WQE. 3242 * 3243 * SEND WQE, one WQEBB: 3244 * - Control Segment, SEND opcode 3245 * - Ethernet Segment, optional VLAN, no inline 3246 * - Data Segment, pointer type 3247 */ 3248 single_no_inline: 3249 wqe = txq->wqes + (txq->wqe_ci & txq->wqe_m); 3250 loc->wqe_last = wqe; 3251 mlx5_tx_cseg_init(txq, loc, wqe, 3, 3252 MLX5_OPCODE_SEND, olx); 3253 mlx5_tx_eseg_none(txq, loc, wqe, olx); 3254 mlx5_tx_dseg_ptr 3255 (txq, loc, &wqe->dseg[0], 3256 rte_pktmbuf_mtod(loc->mbuf, uint8_t *), 3257 rte_pktmbuf_data_len(loc->mbuf), olx); 3258 ++txq->wqe_ci; 3259 --loc->wqe_free; 3260 /* 3261 * We should not store mbuf pointer in elts 3262 * if no inlining is configured, this is done 3263 * by calling routine in a batch copy. 3264 */ 3265 MLX5_ASSERT(!MLX5_TXOFF_CONFIG(INLINE)); 3266 --loc->elts_free; 3267 #ifdef MLX5_PMD_SOFT_COUNTERS 3268 /* Update sent data bytes counter. */ 3269 txq->stats.obytes += rte_pktmbuf_data_len(loc->mbuf); 3270 if (MLX5_TXOFF_CONFIG(VLAN) && 3271 loc->mbuf->ol_flags & RTE_MBUF_F_TX_VLAN) 3272 txq->stats.obytes += 3273 sizeof(struct rte_vlan_hdr); 3274 #endif 3275 } 3276 ++loc->pkts_sent; 3277 --pkts_n; 3278 if (unlikely(!pkts_n || !loc->elts_free || !loc->wqe_free)) 3279 return MLX5_TXCMP_CODE_EXIT; 3280 loc->mbuf = *pkts++; 3281 if (pkts_n > 1) 3282 rte_prefetch0(*pkts); 3283 ret = mlx5_tx_able_to_empw(txq, loc, olx, true); 3284 if (unlikely(ret != MLX5_TXCMP_CODE_SINGLE)) 3285 return ret; 3286 } 3287 MLX5_ASSERT(false); 3288 } 3289 3290 static __rte_always_inline enum mlx5_txcmp_code 3291 mlx5_tx_burst_single(struct mlx5_txq_data *__rte_restrict txq, 3292 struct rte_mbuf **__rte_restrict pkts, 3293 unsigned int pkts_n, 3294 struct mlx5_txq_local *__rte_restrict loc, 3295 unsigned int olx) 3296 { 3297 enum mlx5_txcmp_code ret; 3298 3299 ret = mlx5_tx_able_to_empw(txq, loc, olx, false); 3300 if (ret == MLX5_TXCMP_CODE_SINGLE) 3301 goto ordinary_send; 3302 MLX5_ASSERT(ret == MLX5_TXCMP_CODE_EMPW); 3303 for (;;) { 3304 /* Optimize for inline/no inline eMPW send. */ 3305 ret = (MLX5_TXOFF_CONFIG(INLINE)) ? 3306 mlx5_tx_burst_empw_inline 3307 (txq, pkts, pkts_n, loc, olx) : 3308 mlx5_tx_burst_empw_simple 3309 (txq, pkts, pkts_n, loc, olx); 3310 if (ret != MLX5_TXCMP_CODE_SINGLE) 3311 return ret; 3312 /* The resources to send one packet should remain. */ 3313 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3314 ordinary_send: 3315 ret = mlx5_tx_burst_single_send(txq, pkts, pkts_n, loc, olx); 3316 MLX5_ASSERT(ret != MLX5_TXCMP_CODE_SINGLE); 3317 if (ret != MLX5_TXCMP_CODE_EMPW) 3318 return ret; 3319 /* The resources to send one packet should remain. */ 3320 MLX5_ASSERT(loc->elts_free && loc->wqe_free); 3321 } 3322 } 3323 3324 /** 3325 * DPDK Tx callback template. This is configured template used to generate 3326 * routines optimized for specified offload setup. 3327 * One of this generated functions is chosen at SQ configuration time. 3328 * 3329 * @param txq 3330 * Generic pointer to TX queue structure. 3331 * @param[in] pkts 3332 * Packets to transmit. 3333 * @param pkts_n 3334 * Number of packets in array. 3335 * @param olx 3336 * Configured offloads mask, presents the bits of MLX5_TXOFF_CONFIG_xxx 3337 * values. Should be static to take compile time static configuration 3338 * advantages. 3339 * 3340 * @return 3341 * Number of packets successfully transmitted (<= pkts_n). 3342 */ 3343 static __rte_always_inline uint16_t 3344 mlx5_tx_burst_tmpl(struct mlx5_txq_data *__rte_restrict txq, 3345 struct rte_mbuf **__rte_restrict pkts, 3346 uint16_t pkts_n, 3347 unsigned int olx) 3348 { 3349 struct mlx5_txq_local loc; 3350 enum mlx5_txcmp_code ret; 3351 unsigned int part; 3352 3353 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3354 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3355 if (unlikely(!pkts_n)) 3356 return 0; 3357 if (MLX5_TXOFF_CONFIG(INLINE)) 3358 loc.mbuf_free = 0; 3359 loc.pkts_sent = 0; 3360 loc.pkts_copy = 0; 3361 loc.wqe_last = NULL; 3362 3363 send_loop: 3364 loc.pkts_loop = loc.pkts_sent; 3365 /* 3366 * Check if there are some CQEs, if any: 3367 * - process an encountered errors 3368 * - process the completed WQEs 3369 * - free related mbufs 3370 * - doorbell the NIC about processed CQEs 3371 */ 3372 rte_prefetch0(*(pkts + loc.pkts_sent)); 3373 mlx5_tx_handle_completion(txq, olx); 3374 /* 3375 * Calculate the number of available resources - elts and WQEs. 3376 * There are two possible different scenarios: 3377 * - no data inlining into WQEs, one WQEBB may contains up to 3378 * four packets, in this case elts become scarce resource 3379 * - data inlining into WQEs, one packet may require multiple 3380 * WQEBBs, the WQEs become the limiting factor. 3381 */ 3382 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3383 loc.elts_free = txq->elts_s - 3384 (uint16_t)(txq->elts_head - txq->elts_tail); 3385 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3386 loc.wqe_free = txq->wqe_s - 3387 (uint16_t)(txq->wqe_ci - txq->wqe_pi); 3388 if (unlikely(!loc.elts_free || !loc.wqe_free)) 3389 goto burst_exit; 3390 for (;;) { 3391 /* 3392 * Fetch the packet from array. Usually this is the first 3393 * packet in series of multi/single segment packets. 3394 */ 3395 loc.mbuf = *(pkts + loc.pkts_sent); 3396 /* Dedicated branch for multi-segment packets. */ 3397 if (MLX5_TXOFF_CONFIG(MULTI) && 3398 unlikely(NB_SEGS(loc.mbuf) > 1)) { 3399 /* 3400 * Multi-segment packet encountered. 3401 * Hardware is able to process it only 3402 * with SEND/TSO opcodes, one packet 3403 * per WQE, do it in dedicated routine. 3404 */ 3405 enter_send_multi: 3406 MLX5_ASSERT(loc.pkts_sent >= loc.pkts_copy); 3407 part = loc.pkts_sent - loc.pkts_copy; 3408 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3409 /* 3410 * There are some single-segment mbufs not 3411 * stored in elts. The mbufs must be in the 3412 * same order as WQEs, so we must copy the 3413 * mbufs to elts here, before the coming 3414 * multi-segment packet mbufs is appended. 3415 */ 3416 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, 3417 part, olx); 3418 loc.pkts_copy = loc.pkts_sent; 3419 } 3420 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3421 ret = mlx5_tx_burst_mseg(txq, pkts, pkts_n, &loc, olx); 3422 if (!MLX5_TXOFF_CONFIG(INLINE)) 3423 loc.pkts_copy = loc.pkts_sent; 3424 /* 3425 * These returned code checks are supposed 3426 * to be optimized out due to routine inlining. 3427 */ 3428 if (ret == MLX5_TXCMP_CODE_EXIT) { 3429 /* 3430 * The routine returns this code when 3431 * all packets are sent or there is no 3432 * enough resources to complete request. 3433 */ 3434 break; 3435 } 3436 if (ret == MLX5_TXCMP_CODE_ERROR) { 3437 /* 3438 * The routine returns this code when some error 3439 * in the incoming packets format occurred. 3440 */ 3441 txq->stats.oerrors++; 3442 break; 3443 } 3444 if (ret == MLX5_TXCMP_CODE_SINGLE) { 3445 /* 3446 * The single-segment packet was encountered 3447 * in the array, try to send it with the 3448 * best optimized way, possible engaging eMPW. 3449 */ 3450 goto enter_send_single; 3451 } 3452 if (MLX5_TXOFF_CONFIG(TSO) && 3453 ret == MLX5_TXCMP_CODE_TSO) { 3454 /* 3455 * The single-segment TSO packet was 3456 * encountered in the array. 3457 */ 3458 goto enter_send_tso; 3459 } 3460 /* We must not get here. Something is going wrong. */ 3461 MLX5_ASSERT(false); 3462 txq->stats.oerrors++; 3463 break; 3464 } 3465 /* Dedicated branch for single-segment TSO packets. */ 3466 if (MLX5_TXOFF_CONFIG(TSO) && 3467 unlikely(loc.mbuf->ol_flags & RTE_MBUF_F_TX_TCP_SEG)) { 3468 /* 3469 * TSO might require special way for inlining 3470 * (dedicated parameters) and is sent with 3471 * MLX5_OPCODE_TSO opcode only, provide this 3472 * in dedicated branch. 3473 */ 3474 enter_send_tso: 3475 MLX5_ASSERT(NB_SEGS(loc.mbuf) == 1); 3476 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3477 ret = mlx5_tx_burst_tso(txq, pkts, pkts_n, &loc, olx); 3478 /* 3479 * These returned code checks are supposed 3480 * to be optimized out due to routine inlining. 3481 */ 3482 if (ret == MLX5_TXCMP_CODE_EXIT) 3483 break; 3484 if (ret == MLX5_TXCMP_CODE_ERROR) { 3485 txq->stats.oerrors++; 3486 break; 3487 } 3488 if (ret == MLX5_TXCMP_CODE_SINGLE) 3489 goto enter_send_single; 3490 if (MLX5_TXOFF_CONFIG(MULTI) && 3491 ret == MLX5_TXCMP_CODE_MULTI) { 3492 /* 3493 * The multi-segment packet was 3494 * encountered in the array. 3495 */ 3496 goto enter_send_multi; 3497 } 3498 /* We must not get here. Something is going wrong. */ 3499 MLX5_ASSERT(false); 3500 txq->stats.oerrors++; 3501 break; 3502 } 3503 /* 3504 * The dedicated branch for the single-segment packets 3505 * without TSO. Often these ones can be sent using 3506 * MLX5_OPCODE_EMPW with multiple packets in one WQE. 3507 * The routine builds the WQEs till it encounters 3508 * the TSO or multi-segment packet (in case if these 3509 * offloads are requested at SQ configuration time). 3510 */ 3511 enter_send_single: 3512 MLX5_ASSERT(pkts_n > loc.pkts_sent); 3513 ret = mlx5_tx_burst_single(txq, pkts, pkts_n, &loc, olx); 3514 /* 3515 * These returned code checks are supposed 3516 * to be optimized out due to routine inlining. 3517 */ 3518 if (ret == MLX5_TXCMP_CODE_EXIT) 3519 break; 3520 if (ret == MLX5_TXCMP_CODE_ERROR) { 3521 txq->stats.oerrors++; 3522 break; 3523 } 3524 if (MLX5_TXOFF_CONFIG(MULTI) && 3525 ret == MLX5_TXCMP_CODE_MULTI) { 3526 /* 3527 * The multi-segment packet was 3528 * encountered in the array. 3529 */ 3530 goto enter_send_multi; 3531 } 3532 if (MLX5_TXOFF_CONFIG(TSO) && 3533 ret == MLX5_TXCMP_CODE_TSO) { 3534 /* 3535 * The single-segment TSO packet was 3536 * encountered in the array. 3537 */ 3538 goto enter_send_tso; 3539 } 3540 /* We must not get here. Something is going wrong. */ 3541 MLX5_ASSERT(false); 3542 txq->stats.oerrors++; 3543 break; 3544 } 3545 /* 3546 * Main Tx loop is completed, do the rest: 3547 * - set completion request if thresholds are reached 3548 * - doorbell the hardware 3549 * - copy the rest of mbufs to elts (if any) 3550 */ 3551 MLX5_ASSERT(MLX5_TXOFF_CONFIG(INLINE) || 3552 loc.pkts_sent >= loc.pkts_copy); 3553 /* Take a shortcut if nothing is sent. */ 3554 if (unlikely(loc.pkts_sent == loc.pkts_loop)) 3555 goto burst_exit; 3556 /* Request CQE generation if limits are reached. */ 3557 mlx5_tx_request_completion(txq, &loc, olx); 3558 /* 3559 * Ring QP doorbell immediately after WQE building completion 3560 * to improve latencies. The pure software related data treatment 3561 * can be completed after doorbell. Tx CQEs for this SQ are 3562 * processed in this thread only by the polling. 3563 * 3564 * The rdma core library can map doorbell register in two ways, 3565 * depending on the environment variable "MLX5_SHUT_UP_BF": 3566 * 3567 * - as regular cached memory, the variable is either missing or 3568 * set to zero. This type of mapping may cause the significant 3569 * doorbell register writing latency and requires explicit memory 3570 * write barrier to mitigate this issue and prevent write combining. 3571 * 3572 * - as non-cached memory, the variable is present and set to not "0" 3573 * value. This type of mapping may cause performance impact under 3574 * heavy loading conditions but the explicit write memory barrier is 3575 * not required and it may improve core performance. 3576 * 3577 * - the legacy behaviour (prior 19.08 release) was to use some 3578 * heuristics to decide whether write memory barrier should 3579 * be performed. This behavior is supported with specifying 3580 * tx_db_nc=2, write barrier is skipped if application provides 3581 * the full recommended burst of packets, it supposes the next 3582 * packets are coming and the write barrier will be issued on 3583 * the next burst (after descriptor writing, at least). 3584 */ 3585 mlx5_doorbell_ring(mlx5_tx_bfreg(txq), 3586 *(volatile uint64_t *)loc.wqe_last, txq->wqe_ci, 3587 txq->qp_db, !txq->db_nc && 3588 (!txq->db_heu || pkts_n % MLX5_TX_DEFAULT_BURST)); 3589 /* Not all of the mbufs may be stored into elts yet. */ 3590 part = MLX5_TXOFF_CONFIG(INLINE) ? 0 : loc.pkts_sent - loc.pkts_copy; 3591 if (!MLX5_TXOFF_CONFIG(INLINE) && part) { 3592 /* 3593 * There are some single-segment mbufs not stored in elts. 3594 * It can be only if the last packet was single-segment. 3595 * The copying is gathered into one place due to it is 3596 * a good opportunity to optimize that with SIMD. 3597 * Unfortunately if inlining is enabled the gaps in pointer 3598 * array may happen due to early freeing of the inlined mbufs. 3599 */ 3600 mlx5_tx_copy_elts(txq, pkts + loc.pkts_copy, part, olx); 3601 loc.pkts_copy = loc.pkts_sent; 3602 } 3603 MLX5_ASSERT(txq->elts_s >= (uint16_t)(txq->elts_head - txq->elts_tail)); 3604 MLX5_ASSERT(txq->wqe_s >= (uint16_t)(txq->wqe_ci - txq->wqe_pi)); 3605 if (pkts_n > loc.pkts_sent) { 3606 /* 3607 * If burst size is large there might be no enough CQE 3608 * fetched from completion queue and no enough resources 3609 * freed to send all the packets. 3610 */ 3611 goto send_loop; 3612 } 3613 burst_exit: 3614 #ifdef MLX5_PMD_SOFT_COUNTERS 3615 /* Increment sent packets counter. */ 3616 txq->stats.opackets += loc.pkts_sent; 3617 #endif 3618 if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free) 3619 __mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx); 3620 return loc.pkts_sent; 3621 } 3622 3623 #endif /* RTE_PMD_MLX5_TX_H_ */ 3624