1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_RXTX_H_ 7 #define RTE_PMD_MLX5_RXTX_H_ 8 9 #include <stddef.h> 10 #include <stdint.h> 11 #include <sys/queue.h> 12 13 /* Verbs header. */ 14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 15 #ifdef PEDANTIC 16 #pragma GCC diagnostic ignored "-Wpedantic" 17 #endif 18 #include <infiniband/verbs.h> 19 #include <infiniband/mlx5dv.h> 20 #ifdef PEDANTIC 21 #pragma GCC diagnostic error "-Wpedantic" 22 #endif 23 24 #include <rte_mbuf.h> 25 #include <rte_mempool.h> 26 #include <rte_common.h> 27 #include <rte_hexdump.h> 28 #include <rte_atomic.h> 29 30 #include "mlx5_utils.h" 31 #include "mlx5.h" 32 #include "mlx5_mr.h" 33 #include "mlx5_autoconf.h" 34 #include "mlx5_defs.h" 35 #include "mlx5_prm.h" 36 37 struct mlx5_rxq_stats { 38 unsigned int idx; /**< Mapping index. */ 39 #ifdef MLX5_PMD_SOFT_COUNTERS 40 uint64_t ipackets; /**< Total of successfully received packets. */ 41 uint64_t ibytes; /**< Total of successfully received bytes. */ 42 #endif 43 uint64_t idropped; /**< Total of packets dropped when RX ring full. */ 44 uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */ 45 }; 46 47 struct mlx5_txq_stats { 48 unsigned int idx; /**< Mapping index. */ 49 #ifdef MLX5_PMD_SOFT_COUNTERS 50 uint64_t opackets; /**< Total of successfully sent packets. */ 51 uint64_t obytes; /**< Total of successfully sent bytes. */ 52 #endif 53 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 54 }; 55 56 struct priv; 57 58 /* Compressed CQE context. */ 59 struct rxq_zip { 60 uint16_t ai; /* Array index. */ 61 uint16_t ca; /* Current array index. */ 62 uint16_t na; /* Next array index. */ 63 uint16_t cq_ci; /* The next CQE. */ 64 uint32_t cqe_cnt; /* Number of CQEs. */ 65 }; 66 67 /* Multi-Packet RQ buffer header. */ 68 struct mlx5_mprq_buf { 69 struct rte_mempool *mp; 70 rte_atomic16_t refcnt; /* Atomically accessed refcnt. */ 71 uint8_t pad[RTE_PKTMBUF_HEADROOM]; /* Headroom for the first packet. */ 72 } __rte_cache_aligned; 73 74 /* Get pointer to the first stride. */ 75 #define mlx5_mprq_buf_addr(ptr) ((ptr) + 1) 76 77 /* RX queue descriptor. */ 78 struct mlx5_rxq_data { 79 unsigned int csum:1; /* Enable checksum offloading. */ 80 unsigned int hw_timestamp:1; /* Enable HW timestamp. */ 81 unsigned int vlan_strip:1; /* Enable VLAN stripping. */ 82 unsigned int crc_present:1; /* CRC must be subtracted. */ 83 unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */ 84 unsigned int cqe_n:4; /* Log 2 of CQ elements. */ 85 unsigned int elts_n:4; /* Log 2 of Mbufs. */ 86 unsigned int rss_hash:1; /* RSS hash result is enabled. */ 87 unsigned int mark:1; /* Marked flow available on the queue. */ 88 unsigned int strd_num_n:5; /* Log 2 of the number of stride. */ 89 unsigned int strd_sz_n:4; /* Log 2 of stride size. */ 90 unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */ 91 unsigned int :6; /* Remaining bits. */ 92 volatile uint32_t *rq_db; 93 volatile uint32_t *cq_db; 94 uint16_t port_id; 95 uint16_t rq_ci; 96 uint16_t strd_ci; /* Stride index in a WQE for Multi-Packet RQ. */ 97 uint16_t rq_pi; 98 uint16_t cq_ci; 99 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 100 uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */ 101 volatile void *wqes; 102 volatile struct mlx5_cqe(*cqes)[]; 103 struct rxq_zip zip; /* Compressed context. */ 104 RTE_STD_C11 105 union { 106 struct rte_mbuf *(*elts)[]; 107 struct mlx5_mprq_buf *(*mprq_bufs)[]; 108 }; 109 struct rte_mempool *mp; 110 struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */ 111 struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */ 112 struct mlx5_rxq_stats stats; 113 uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */ 114 struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */ 115 void *cq_uar; /* CQ user access region. */ 116 uint32_t cqn; /* CQ number. */ 117 uint8_t cq_arm_sn; /* CQ arm seq number. */ 118 uint32_t tunnel; /* Tunnel information. */ 119 } __rte_cache_aligned; 120 121 /* Verbs Rx queue elements. */ 122 struct mlx5_rxq_ibv { 123 LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */ 124 rte_atomic32_t refcnt; /* Reference counter. */ 125 struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */ 126 struct ibv_cq *cq; /* Completion Queue. */ 127 struct ibv_wq *wq; /* Work Queue. */ 128 struct ibv_comp_channel *channel; 129 }; 130 131 /* RX queue control descriptor. */ 132 struct mlx5_rxq_ctrl { 133 LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */ 134 rte_atomic32_t refcnt; /* Reference counter. */ 135 struct mlx5_rxq_ibv *ibv; /* Verbs elements. */ 136 struct priv *priv; /* Back pointer to private data. */ 137 struct mlx5_rxq_data rxq; /* Data path structure. */ 138 unsigned int socket; /* CPU socket ID for allocations. */ 139 uint32_t tunnel_types[16]; /* Tunnel type counter. */ 140 unsigned int irq:1; /* Whether IRQ is enabled. */ 141 uint16_t idx; /* Queue index. */ 142 }; 143 144 /* Indirection table. */ 145 struct mlx5_ind_table_ibv { 146 LIST_ENTRY(mlx5_ind_table_ibv) next; /* Pointer to the next element. */ 147 rte_atomic32_t refcnt; /* Reference counter. */ 148 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */ 149 uint32_t queues_n; /**< Number of queues in the list. */ 150 uint16_t queues[]; /**< Queue list. */ 151 }; 152 153 /* Hash Rx queue. */ 154 struct mlx5_hrxq { 155 LIST_ENTRY(mlx5_hrxq) next; /* Pointer to the next element. */ 156 rte_atomic32_t refcnt; /* Reference counter. */ 157 struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */ 158 struct ibv_qp *qp; /* Verbs queue pair. */ 159 uint64_t hash_fields; /* Verbs Hash fields. */ 160 uint32_t tunnel; /* Tunnel type. */ 161 uint32_t rss_level; /* RSS on tunnel level. */ 162 uint32_t rss_key_len; /* Hash key length in bytes. */ 163 uint8_t rss_key[]; /* Hash key. */ 164 }; 165 166 /* TX queue descriptor. */ 167 __extension__ 168 struct mlx5_txq_data { 169 uint16_t elts_head; /* Current counter in (*elts)[]. */ 170 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 171 uint16_t elts_comp; /* Counter since last completion request. */ 172 uint16_t mpw_comp; /* WQ index since last completion request. */ 173 uint16_t cq_ci; /* Consumer index for completion queue. */ 174 #ifndef NDEBUG 175 uint16_t cq_pi; /* Producer index for completion queue. */ 176 #endif 177 uint16_t wqe_ci; /* Consumer index for work queue. */ 178 uint16_t wqe_pi; /* Producer index for work queue. */ 179 uint16_t elts_n:4; /* (*elts)[] length (in log2). */ 180 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 181 uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */ 182 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 183 uint16_t tunnel_en:1; 184 /* When set TX offload for tunneled packets are supported. */ 185 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 186 uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */ 187 uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ 188 uint16_t inline_max_packet_sz; /* Max packet size for inlining. */ 189 uint32_t qp_num_8s; /* QP number shifted by 8. */ 190 uint64_t offloads; /* Offloads for Tx Queue. */ 191 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 192 volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ 193 volatile void *wqes; /* Work queue (use volatile to write into). */ 194 volatile uint32_t *qp_db; /* Work queue doorbell. */ 195 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 196 volatile void *bf_reg; /* Blueflame register remapped. */ 197 struct rte_mbuf *(*elts)[]; /* TX elements. */ 198 struct mlx5_txq_stats stats; /* TX queue counters. */ 199 } __rte_cache_aligned; 200 201 /* Verbs Rx queue elements. */ 202 struct mlx5_txq_ibv { 203 LIST_ENTRY(mlx5_txq_ibv) next; /* Pointer to the next element. */ 204 rte_atomic32_t refcnt; /* Reference counter. */ 205 struct mlx5_txq_ctrl *txq_ctrl; /* Pointer to the control queue. */ 206 struct ibv_cq *cq; /* Completion Queue. */ 207 struct ibv_qp *qp; /* Queue Pair. */ 208 }; 209 210 /* TX queue control descriptor. */ 211 struct mlx5_txq_ctrl { 212 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 213 rte_atomic32_t refcnt; /* Reference counter. */ 214 unsigned int socket; /* CPU socket ID for allocations. */ 215 unsigned int max_inline_data; /* Max inline data. */ 216 unsigned int max_tso_header; /* Max TSO header size. */ 217 struct mlx5_txq_ibv *ibv; /* Verbs queue object. */ 218 struct priv *priv; /* Back pointer to private data. */ 219 struct mlx5_txq_data txq; /* Data path structure. */ 220 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 221 volatile void *bf_reg_orig; /* Blueflame register from verbs. */ 222 uint16_t idx; /* Queue index. */ 223 }; 224 225 /* mlx5_rxq.c */ 226 227 extern uint8_t rss_hash_default_key[]; 228 extern const size_t rss_hash_default_key_len; 229 230 int mlx5_check_mprq_support(struct rte_eth_dev *dev); 231 int mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq); 232 int mlx5_mprq_enabled(struct rte_eth_dev *dev); 233 int mlx5_mprq_free_mp(struct rte_eth_dev *dev); 234 int mlx5_mprq_alloc_mp(struct rte_eth_dev *dev); 235 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl); 236 int mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 237 unsigned int socket, const struct rte_eth_rxconf *conf, 238 struct rte_mempool *mp); 239 void mlx5_rx_queue_release(void *dpdk_rxq); 240 int mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev); 241 void mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev); 242 int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id); 243 int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id); 244 struct mlx5_rxq_ibv *mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx); 245 struct mlx5_rxq_ibv *mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx); 246 int mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv); 247 int mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv); 248 int mlx5_rxq_ibv_verify(struct rte_eth_dev *dev); 249 struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, 250 uint16_t desc, unsigned int socket, 251 const struct rte_eth_rxconf *conf, 252 struct rte_mempool *mp); 253 struct mlx5_rxq_ctrl *mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx); 254 int mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx); 255 int mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx); 256 int mlx5_rxq_verify(struct rte_eth_dev *dev); 257 int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl); 258 int rxq_alloc_mprq_buf(struct mlx5_rxq_ctrl *rxq_ctrl); 259 struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_new(struct rte_eth_dev *dev, 260 const uint16_t *queues, 261 uint32_t queues_n); 262 struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_get(struct rte_eth_dev *dev, 263 const uint16_t *queues, 264 uint32_t queues_n); 265 int mlx5_ind_table_ibv_release(struct rte_eth_dev *dev, 266 struct mlx5_ind_table_ibv *ind_tbl); 267 int mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev); 268 struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev, 269 const uint8_t *rss_key, uint32_t rss_key_len, 270 uint64_t hash_fields, 271 const uint16_t *queues, uint32_t queues_n, 272 uint32_t tunnel, uint32_t rss_level); 273 struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev, 274 const uint8_t *rss_key, uint32_t rss_key_len, 275 uint64_t hash_fields, 276 const uint16_t *queues, uint32_t queues_n, 277 uint32_t tunnel, uint32_t rss_level); 278 int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq); 279 int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev); 280 uint64_t mlx5_get_rx_port_offloads(void); 281 uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev); 282 283 /* mlx5_txq.c */ 284 285 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 286 unsigned int socket, const struct rte_eth_txconf *conf); 287 void mlx5_tx_queue_release(void *dpdk_txq); 288 int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd); 289 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx); 290 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx); 291 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); 292 int mlx5_txq_ibv_releasable(struct mlx5_txq_ibv *txq_ibv); 293 int mlx5_txq_ibv_verify(struct rte_eth_dev *dev); 294 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 295 uint16_t desc, unsigned int socket, 296 const struct rte_eth_txconf *conf); 297 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 298 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 299 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 300 int mlx5_txq_verify(struct rte_eth_dev *dev); 301 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 302 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 303 304 /* mlx5_rxtx.c */ 305 306 extern uint32_t mlx5_ptype_table[]; 307 extern uint8_t mlx5_cksum_table[]; 308 extern uint8_t mlx5_swp_types_table[]; 309 310 void mlx5_set_ptype_table(void); 311 void mlx5_set_cksum_table(void); 312 void mlx5_set_swp_types_table(void); 313 uint16_t mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 314 uint16_t pkts_n); 315 uint16_t mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, 316 uint16_t pkts_n); 317 uint16_t mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, 318 uint16_t pkts_n); 319 uint16_t mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, 320 uint16_t pkts_n); 321 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); 322 void mlx5_mprq_buf_free_cb(void *addr, void *opaque); 323 void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf); 324 uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, 325 uint16_t pkts_n); 326 uint16_t removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 327 uint16_t pkts_n); 328 uint16_t removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 329 uint16_t pkts_n); 330 int mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset); 331 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 332 333 /* Vectorized version of mlx5_rxtx.c */ 334 int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev); 335 int mlx5_check_vec_tx_support(struct rte_eth_dev *dev); 336 int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data); 337 int mlx5_check_vec_rx_support(struct rte_eth_dev *dev); 338 uint16_t mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, 339 uint16_t pkts_n); 340 uint16_t mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, 341 uint16_t pkts_n); 342 uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, 343 uint16_t pkts_n); 344 345 /* mlx5_mr.c */ 346 347 void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl); 348 uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr); 349 uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr); 350 351 #ifndef NDEBUG 352 /** 353 * Verify or set magic value in CQE. 354 * 355 * @param cqe 356 * Pointer to CQE. 357 * 358 * @return 359 * 0 the first time. 360 */ 361 static inline int 362 check_cqe_seen(volatile struct mlx5_cqe *cqe) 363 { 364 static const uint8_t magic[] = "seen"; 365 volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0; 366 int ret = 1; 367 unsigned int i; 368 369 for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i) 370 if (!ret || (*buf)[i] != magic[i]) { 371 ret = 0; 372 (*buf)[i] = magic[i]; 373 } 374 return ret; 375 } 376 #endif /* NDEBUG */ 377 378 /** 379 * Check whether CQE is valid. 380 * 381 * @param cqe 382 * Pointer to CQE. 383 * @param cqes_n 384 * Size of completion queue. 385 * @param ci 386 * Consumer index. 387 * 388 * @return 389 * 0 on success, 1 on failure. 390 */ 391 static __rte_always_inline int 392 check_cqe(volatile struct mlx5_cqe *cqe, 393 unsigned int cqes_n, const uint16_t ci) 394 { 395 uint16_t idx = ci & cqes_n; 396 uint8_t op_own = cqe->op_own; 397 uint8_t op_owner = MLX5_CQE_OWNER(op_own); 398 uint8_t op_code = MLX5_CQE_OPCODE(op_own); 399 400 if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID))) 401 return 1; /* No CQE. */ 402 #ifndef NDEBUG 403 if ((op_code == MLX5_CQE_RESP_ERR) || 404 (op_code == MLX5_CQE_REQ_ERR)) { 405 volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe; 406 uint8_t syndrome = err_cqe->syndrome; 407 408 if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) || 409 (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR)) 410 return 0; 411 if (!check_cqe_seen(cqe)) { 412 DRV_LOG(ERR, 413 "unexpected CQE error %u (0x%02x) syndrome" 414 " 0x%02x", 415 op_code, op_code, syndrome); 416 rte_hexdump(stderr, "MLX5 Error CQE:", 417 (const void *)((uintptr_t)err_cqe), 418 sizeof(*err_cqe)); 419 } 420 return 1; 421 } else if ((op_code != MLX5_CQE_RESP_SEND) && 422 (op_code != MLX5_CQE_REQ)) { 423 if (!check_cqe_seen(cqe)) { 424 DRV_LOG(ERR, "unexpected CQE opcode %u (0x%02x)", 425 op_code, op_code); 426 rte_hexdump(stderr, "MLX5 CQE:", 427 (const void *)((uintptr_t)cqe), 428 sizeof(*cqe)); 429 } 430 return 1; 431 } 432 #endif /* NDEBUG */ 433 return 0; 434 } 435 436 /** 437 * Return the address of the WQE. 438 * 439 * @param txq 440 * Pointer to TX queue structure. 441 * @param wqe_ci 442 * WQE consumer index. 443 * 444 * @return 445 * WQE address. 446 */ 447 static inline uintptr_t * 448 tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci) 449 { 450 ci &= ((1 << txq->wqe_n) - 1); 451 return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE); 452 } 453 454 /** 455 * Manage TX completions. 456 * 457 * When sending a burst, mlx5_tx_burst() posts several WRs. 458 * 459 * @param txq 460 * Pointer to TX queue structure. 461 */ 462 static __rte_always_inline void 463 mlx5_tx_complete(struct mlx5_txq_data *txq) 464 { 465 const uint16_t elts_n = 1 << txq->elts_n; 466 const uint16_t elts_m = elts_n - 1; 467 const unsigned int cqe_n = 1 << txq->cqe_n; 468 const unsigned int cqe_cnt = cqe_n - 1; 469 uint16_t elts_free = txq->elts_tail; 470 uint16_t elts_tail; 471 uint16_t cq_ci = txq->cq_ci; 472 volatile struct mlx5_cqe *cqe = NULL; 473 volatile struct mlx5_wqe_ctrl *ctrl; 474 struct rte_mbuf *m, *free[elts_n]; 475 struct rte_mempool *pool = NULL; 476 unsigned int blk_n = 0; 477 478 cqe = &(*txq->cqes)[cq_ci & cqe_cnt]; 479 if (unlikely(check_cqe(cqe, cqe_n, cq_ci))) 480 return; 481 #ifndef NDEBUG 482 if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) || 483 (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) { 484 if (!check_cqe_seen(cqe)) { 485 DRV_LOG(ERR, "unexpected error CQE, Tx stopped"); 486 rte_hexdump(stderr, "MLX5 TXQ:", 487 (const void *)((uintptr_t)txq->wqes), 488 ((1 << txq->wqe_n) * 489 MLX5_WQE_SIZE)); 490 } 491 return; 492 } 493 #endif /* NDEBUG */ 494 ++cq_ci; 495 txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter); 496 ctrl = (volatile struct mlx5_wqe_ctrl *) 497 tx_mlx5_wqe(txq, txq->wqe_pi); 498 elts_tail = ctrl->ctrl3; 499 assert((elts_tail & elts_m) < (1 << txq->wqe_n)); 500 /* Free buffers. */ 501 while (elts_free != elts_tail) { 502 m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]); 503 if (likely(m != NULL)) { 504 if (likely(m->pool == pool)) { 505 free[blk_n++] = m; 506 } else { 507 if (likely(pool != NULL)) 508 rte_mempool_put_bulk(pool, 509 (void *)free, 510 blk_n); 511 free[0] = m; 512 pool = m->pool; 513 blk_n = 1; 514 } 515 } 516 } 517 if (blk_n) 518 rte_mempool_put_bulk(pool, (void *)free, blk_n); 519 #ifndef NDEBUG 520 elts_free = txq->elts_tail; 521 /* Poisoning. */ 522 while (elts_free != elts_tail) { 523 memset(&(*txq->elts)[elts_free & elts_m], 524 0x66, 525 sizeof((*txq->elts)[elts_free & elts_m])); 526 ++elts_free; 527 } 528 #endif 529 txq->cq_ci = cq_ci; 530 txq->elts_tail = elts_tail; 531 /* Update the consumer index. */ 532 rte_compiler_barrier(); 533 *txq->cq_db = rte_cpu_to_be_32(cq_ci); 534 } 535 536 /** 537 * Query LKey from a packet buffer for Rx. No need to flush local caches for Rx 538 * as mempool is pre-configured and static. 539 * 540 * @param rxq 541 * Pointer to Rx queue structure. 542 * @param addr 543 * Address to search. 544 * 545 * @return 546 * Searched LKey on success, UINT32_MAX on no match. 547 */ 548 static __rte_always_inline uint32_t 549 mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr) 550 { 551 struct mlx5_mr_ctrl *mr_ctrl = &rxq->mr_ctrl; 552 uint32_t lkey; 553 554 /* Linear search on MR cache array. */ 555 lkey = mlx5_mr_lookup_cache(mr_ctrl->cache, &mr_ctrl->mru, 556 MLX5_MR_CACHE_N, addr); 557 if (likely(lkey != UINT32_MAX)) 558 return lkey; 559 /* Take slower bottom-half (Binary Search) on miss. */ 560 return mlx5_rx_addr2mr_bh(rxq, addr); 561 } 562 563 #define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr)) 564 565 /** 566 * Query LKey from a packet buffer for Tx. If not found, add the mempool. 567 * 568 * @param txq 569 * Pointer to Tx queue structure. 570 * @param addr 571 * Address to search. 572 * 573 * @return 574 * Searched LKey on success, UINT32_MAX on no match. 575 */ 576 static __rte_always_inline uint32_t 577 mlx5_tx_addr2mr(struct mlx5_txq_data *txq, uintptr_t addr) 578 { 579 struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl; 580 uint32_t lkey; 581 582 /* Check generation bit to see if there's any change on existing MRs. */ 583 if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen)) 584 mlx5_mr_flush_local_cache(mr_ctrl); 585 /* Linear search on MR cache array. */ 586 lkey = mlx5_mr_lookup_cache(mr_ctrl->cache, &mr_ctrl->mru, 587 MLX5_MR_CACHE_N, addr); 588 if (likely(lkey != UINT32_MAX)) 589 return lkey; 590 /* Take slower bottom-half (binary search) on miss. */ 591 return mlx5_tx_addr2mr_bh(txq, addr); 592 } 593 594 #define mlx5_tx_mb2mr(rxq, mb) mlx5_tx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr)) 595 596 /** 597 * Ring TX queue doorbell and flush the update if requested. 598 * 599 * @param txq 600 * Pointer to TX queue structure. 601 * @param wqe 602 * Pointer to the last WQE posted in the NIC. 603 * @param cond 604 * Request for write memory barrier after BlueFlame update. 605 */ 606 static __rte_always_inline void 607 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe, 608 int cond) 609 { 610 uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg); 611 volatile uint64_t *src = ((volatile uint64_t *)wqe); 612 613 rte_cio_wmb(); 614 *txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci); 615 /* Ensure ordering between DB record and BF copy. */ 616 rte_wmb(); 617 *dst = *src; 618 if (cond) 619 rte_wmb(); 620 } 621 622 /** 623 * Ring TX queue doorbell and flush the update by write memory barrier. 624 * 625 * @param txq 626 * Pointer to TX queue structure. 627 * @param wqe 628 * Pointer to the last WQE posted in the NIC. 629 */ 630 static __rte_always_inline void 631 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 632 { 633 mlx5_tx_dbrec_cond_wmb(txq, wqe, 1); 634 } 635 636 /** 637 * Convert mbuf to Verb SWP. 638 * 639 * @param txq_data 640 * Pointer to the Tx queue. 641 * @param buf 642 * Pointer to the mbuf. 643 * @param tso 644 * TSO offloads enabled. 645 * @param vlan 646 * VLAN offloads enabled 647 * @param offsets 648 * Pointer to the SWP header offsets. 649 * @param swp_types 650 * Pointer to the SWP header types. 651 */ 652 static __rte_always_inline void 653 txq_mbuf_to_swp(struct mlx5_txq_data *txq, struct rte_mbuf *buf, 654 uint8_t *offsets, uint8_t *swp_types) 655 { 656 const uint64_t vlan = buf->ol_flags & PKT_TX_VLAN_PKT; 657 const uint64_t tunnel = buf->ol_flags & PKT_TX_TUNNEL_MASK; 658 const uint64_t tso = buf->ol_flags & PKT_TX_TCP_SEG; 659 const uint64_t csum_flags = buf->ol_flags & PKT_TX_L4_MASK; 660 const uint64_t inner_ip = 661 buf->ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6); 662 const uint64_t ol_flags_mask = PKT_TX_L4_MASK | PKT_TX_IPV6 | 663 PKT_TX_OUTER_IPV6; 664 uint16_t idx; 665 uint16_t off; 666 667 if (likely(!txq->swp_en || (tunnel != PKT_TX_TUNNEL_UDP && 668 tunnel != PKT_TX_TUNNEL_IP))) 669 return; 670 /* 671 * The index should have: 672 * bit[0:1] = PKT_TX_L4_MASK 673 * bit[4] = PKT_TX_IPV6 674 * bit[8] = PKT_TX_OUTER_IPV6 675 * bit[9] = PKT_TX_OUTER_UDP 676 */ 677 idx = (buf->ol_flags & ol_flags_mask) >> 52; 678 if (tunnel == PKT_TX_TUNNEL_UDP) 679 idx |= 1 << 9; 680 *swp_types = mlx5_swp_types_table[idx]; 681 /* 682 * Set offsets for SW parser. Since ConnectX-5, SW parser just 683 * complements HW parser. SW parser starts to engage only if HW parser 684 * can't reach a header. For the older devices, HW parser will not kick 685 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 686 * should be set regardless of HW offload. 687 */ 688 off = buf->outer_l2_len + (vlan ? sizeof(struct vlan_hdr) : 0); 689 offsets[1] = off >> 1; /* Outer L3 offset. */ 690 off += buf->outer_l3_len; 691 if (tunnel == PKT_TX_TUNNEL_UDP) 692 offsets[0] = off >> 1; /* Outer L4 offset. */ 693 if (inner_ip) { 694 off += buf->l2_len; 695 offsets[3] = off >> 1; /* Inner L3 offset. */ 696 if (csum_flags == PKT_TX_TCP_CKSUM || tso || 697 csum_flags == PKT_TX_UDP_CKSUM) { 698 off += buf->l3_len; 699 offsets[2] = off >> 1; /* Inner L4 offset. */ 700 } 701 } 702 } 703 704 /** 705 * Convert the Checksum offloads to Verbs. 706 * 707 * @param buf 708 * Pointer to the mbuf. 709 * 710 * @return 711 * Converted checksum flags. 712 */ 713 static __rte_always_inline uint8_t 714 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 715 { 716 uint32_t idx; 717 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 718 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 719 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 720 721 /* 722 * The index should have: 723 * bit[0] = PKT_TX_TCP_SEG 724 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 725 * bit[4] = PKT_TX_IP_CKSUM 726 * bit[8] = PKT_TX_OUTER_IP_CKSUM 727 * bit[9] = tunnel 728 */ 729 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 730 return mlx5_cksum_table[idx]; 731 } 732 733 /** 734 * Count the number of contiguous single segment packets. 735 * 736 * @param pkts 737 * Pointer to array of packets. 738 * @param pkts_n 739 * Number of packets. 740 * 741 * @return 742 * Number of contiguous single segment packets. 743 */ 744 static __rte_always_inline unsigned int 745 txq_count_contig_single_seg(struct rte_mbuf **pkts, uint16_t pkts_n) 746 { 747 unsigned int pos; 748 749 if (!pkts_n) 750 return 0; 751 /* Count the number of contiguous single segment packets. */ 752 for (pos = 0; pos < pkts_n; ++pos) 753 if (NB_SEGS(pkts[pos]) > 1) 754 break; 755 return pos; 756 } 757 758 /** 759 * Count the number of contiguous multi-segment packets. 760 * 761 * @param pkts 762 * Pointer to array of packets. 763 * @param pkts_n 764 * Number of packets. 765 * 766 * @return 767 * Number of contiguous multi-segment packets. 768 */ 769 static __rte_always_inline unsigned int 770 txq_count_contig_multi_seg(struct rte_mbuf **pkts, uint16_t pkts_n) 771 { 772 unsigned int pos; 773 774 if (!pkts_n) 775 return 0; 776 /* Count the number of contiguous multi-segment packets. */ 777 for (pos = 0; pos < pkts_n; ++pos) 778 if (NB_SEGS(pkts[pos]) == 1) 779 break; 780 return pos; 781 } 782 783 #endif /* RTE_PMD_MLX5_RXTX_H_ */ 784