1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2015 6WIND S.A. 3 * Copyright 2015 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef RTE_PMD_MLX5_RXTX_H_ 7 #define RTE_PMD_MLX5_RXTX_H_ 8 9 #include <stddef.h> 10 #include <stdint.h> 11 #include <sys/queue.h> 12 13 /* Verbs header. */ 14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 15 #ifdef PEDANTIC 16 #pragma GCC diagnostic ignored "-Wpedantic" 17 #endif 18 #include <infiniband/verbs.h> 19 #include <infiniband/mlx5dv.h> 20 #ifdef PEDANTIC 21 #pragma GCC diagnostic error "-Wpedantic" 22 #endif 23 24 #include <rte_mbuf.h> 25 #include <rte_mempool.h> 26 #include <rte_common.h> 27 #include <rte_hexdump.h> 28 #include <rte_atomic.h> 29 #include <rte_spinlock.h> 30 #include <rte_io.h> 31 32 #include "mlx5_utils.h" 33 #include "mlx5.h" 34 #include "mlx5_mr.h" 35 #include "mlx5_autoconf.h" 36 #include "mlx5_defs.h" 37 #include "mlx5_prm.h" 38 39 /* Support tunnel matching. */ 40 #define MLX5_FLOW_TUNNEL 5 41 42 struct mlx5_rxq_stats { 43 unsigned int idx; /**< Mapping index. */ 44 #ifdef MLX5_PMD_SOFT_COUNTERS 45 uint64_t ipackets; /**< Total of successfully received packets. */ 46 uint64_t ibytes; /**< Total of successfully received bytes. */ 47 #endif 48 uint64_t idropped; /**< Total of packets dropped when RX ring full. */ 49 uint64_t rx_nombuf; /**< Total of RX mbuf allocation failures. */ 50 }; 51 52 struct mlx5_txq_stats { 53 unsigned int idx; /**< Mapping index. */ 54 #ifdef MLX5_PMD_SOFT_COUNTERS 55 uint64_t opackets; /**< Total of successfully sent packets. */ 56 uint64_t obytes; /**< Total of successfully sent bytes. */ 57 #endif 58 uint64_t oerrors; /**< Total number of failed transmitted packets. */ 59 }; 60 61 struct priv; 62 63 /* Compressed CQE context. */ 64 struct rxq_zip { 65 uint16_t ai; /* Array index. */ 66 uint16_t ca; /* Current array index. */ 67 uint16_t na; /* Next array index. */ 68 uint16_t cq_ci; /* The next CQE. */ 69 uint32_t cqe_cnt; /* Number of CQEs. */ 70 }; 71 72 /* Multi-Packet RQ buffer header. */ 73 struct mlx5_mprq_buf { 74 struct rte_mempool *mp; 75 rte_atomic16_t refcnt; /* Atomically accessed refcnt. */ 76 uint8_t pad[RTE_PKTMBUF_HEADROOM]; /* Headroom for the first packet. */ 77 } __rte_cache_aligned; 78 79 /* Get pointer to the first stride. */ 80 #define mlx5_mprq_buf_addr(ptr) ((ptr) + 1) 81 82 /* RX queue descriptor. */ 83 struct mlx5_rxq_data { 84 unsigned int csum:1; /* Enable checksum offloading. */ 85 unsigned int hw_timestamp:1; /* Enable HW timestamp. */ 86 unsigned int vlan_strip:1; /* Enable VLAN stripping. */ 87 unsigned int crc_present:1; /* CRC must be subtracted. */ 88 unsigned int sges_n:2; /* Log 2 of SGEs (max buffers per packet). */ 89 unsigned int cqe_n:4; /* Log 2 of CQ elements. */ 90 unsigned int elts_n:4; /* Log 2 of Mbufs. */ 91 unsigned int rss_hash:1; /* RSS hash result is enabled. */ 92 unsigned int mark:1; /* Marked flow available on the queue. */ 93 unsigned int strd_num_n:5; /* Log 2 of the number of stride. */ 94 unsigned int strd_sz_n:4; /* Log 2 of stride size. */ 95 unsigned int strd_shift_en:1; /* Enable 2bytes shift on a stride. */ 96 unsigned int :6; /* Remaining bits. */ 97 volatile uint32_t *rq_db; 98 volatile uint32_t *cq_db; 99 uint16_t port_id; 100 uint32_t rq_ci; 101 uint16_t consumed_strd; /* Number of consumed strides in WQE. */ 102 uint32_t rq_pi; 103 uint32_t cq_ci; 104 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 105 uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */ 106 volatile void *wqes; 107 volatile struct mlx5_cqe(*cqes)[]; 108 struct rxq_zip zip; /* Compressed context. */ 109 RTE_STD_C11 110 union { 111 struct rte_mbuf *(*elts)[]; 112 struct mlx5_mprq_buf *(*mprq_bufs)[]; 113 }; 114 struct rte_mempool *mp; 115 struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */ 116 struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */ 117 struct mlx5_rxq_stats stats; 118 uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */ 119 struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */ 120 void *cq_uar; /* CQ user access region. */ 121 uint32_t cqn; /* CQ number. */ 122 uint8_t cq_arm_sn; /* CQ arm seq number. */ 123 #ifndef RTE_ARCH_64 124 rte_spinlock_t *uar_lock_cq; 125 /* CQ (UAR) access lock required for 32bit implementations */ 126 #endif 127 uint32_t tunnel; /* Tunnel information. */ 128 } __rte_cache_aligned; 129 130 /* Verbs Rx queue elements. */ 131 struct mlx5_rxq_ibv { 132 LIST_ENTRY(mlx5_rxq_ibv) next; /* Pointer to the next element. */ 133 rte_atomic32_t refcnt; /* Reference counter. */ 134 struct mlx5_rxq_ctrl *rxq_ctrl; /* Back pointer to parent. */ 135 struct ibv_cq *cq; /* Completion Queue. */ 136 struct ibv_wq *wq; /* Work Queue. */ 137 struct ibv_comp_channel *channel; 138 }; 139 140 /* RX queue control descriptor. */ 141 struct mlx5_rxq_ctrl { 142 LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */ 143 rte_atomic32_t refcnt; /* Reference counter. */ 144 struct mlx5_rxq_ibv *ibv; /* Verbs elements. */ 145 struct priv *priv; /* Back pointer to private data. */ 146 struct mlx5_rxq_data rxq; /* Data path structure. */ 147 unsigned int socket; /* CPU socket ID for allocations. */ 148 unsigned int irq:1; /* Whether IRQ is enabled. */ 149 uint16_t idx; /* Queue index. */ 150 uint32_t flow_mark_n; /* Number of Mark/Flag flows using this Queue. */ 151 uint32_t flow_tunnels_n[MLX5_FLOW_TUNNEL]; /* Tunnels counters. */ 152 }; 153 154 /* Indirection table. */ 155 struct mlx5_ind_table_ibv { 156 LIST_ENTRY(mlx5_ind_table_ibv) next; /* Pointer to the next element. */ 157 rte_atomic32_t refcnt; /* Reference counter. */ 158 struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */ 159 uint32_t queues_n; /**< Number of queues in the list. */ 160 uint16_t queues[]; /**< Queue list. */ 161 }; 162 163 /* Hash Rx queue. */ 164 struct mlx5_hrxq { 165 LIST_ENTRY(mlx5_hrxq) next; /* Pointer to the next element. */ 166 rte_atomic32_t refcnt; /* Reference counter. */ 167 struct mlx5_ind_table_ibv *ind_table; /* Indirection table. */ 168 struct ibv_qp *qp; /* Verbs queue pair. */ 169 uint64_t hash_fields; /* Verbs Hash fields. */ 170 uint32_t rss_key_len; /* Hash key length in bytes. */ 171 uint8_t rss_key[]; /* Hash key. */ 172 }; 173 174 /* TX queue descriptor. */ 175 __extension__ 176 struct mlx5_txq_data { 177 uint16_t elts_head; /* Current counter in (*elts)[]. */ 178 uint16_t elts_tail; /* Counter of first element awaiting completion. */ 179 uint16_t elts_comp; /* Counter since last completion request. */ 180 uint16_t mpw_comp; /* WQ index since last completion request. */ 181 uint16_t cq_ci; /* Consumer index for completion queue. */ 182 #ifndef NDEBUG 183 uint16_t cq_pi; /* Producer index for completion queue. */ 184 #endif 185 uint16_t wqe_ci; /* Consumer index for work queue. */ 186 uint16_t wqe_pi; /* Producer index for work queue. */ 187 uint16_t elts_n:4; /* (*elts)[] length (in log2). */ 188 uint16_t cqe_n:4; /* Number of CQ elements (in log2). */ 189 uint16_t wqe_n:4; /* Number of of WQ elements (in log2). */ 190 uint16_t tso_en:1; /* When set hardware TSO is enabled. */ 191 uint16_t tunnel_en:1; 192 /* When set TX offload for tunneled packets are supported. */ 193 uint16_t swp_en:1; /* Whether SW parser is enabled. */ 194 uint16_t mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */ 195 uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */ 196 uint16_t inline_max_packet_sz; /* Max packet size for inlining. */ 197 uint32_t qp_num_8s; /* QP number shifted by 8. */ 198 uint64_t offloads; /* Offloads for Tx Queue. */ 199 struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */ 200 volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */ 201 volatile void *wqes; /* Work queue (use volatile to write into). */ 202 volatile uint32_t *qp_db; /* Work queue doorbell. */ 203 volatile uint32_t *cq_db; /* Completion queue doorbell. */ 204 volatile void *bf_reg; /* Blueflame register remapped. */ 205 struct rte_mbuf *(*elts)[]; /* TX elements. */ 206 struct mlx5_txq_stats stats; /* TX queue counters. */ 207 #ifndef RTE_ARCH_64 208 rte_spinlock_t *uar_lock; 209 /* UAR access lock required for 32bit implementations */ 210 #endif 211 } __rte_cache_aligned; 212 213 /* Verbs Rx queue elements. */ 214 struct mlx5_txq_ibv { 215 LIST_ENTRY(mlx5_txq_ibv) next; /* Pointer to the next element. */ 216 rte_atomic32_t refcnt; /* Reference counter. */ 217 struct mlx5_txq_ctrl *txq_ctrl; /* Pointer to the control queue. */ 218 struct ibv_cq *cq; /* Completion Queue. */ 219 struct ibv_qp *qp; /* Queue Pair. */ 220 }; 221 222 /* TX queue control descriptor. */ 223 struct mlx5_txq_ctrl { 224 LIST_ENTRY(mlx5_txq_ctrl) next; /* Pointer to the next element. */ 225 rte_atomic32_t refcnt; /* Reference counter. */ 226 unsigned int socket; /* CPU socket ID for allocations. */ 227 unsigned int max_inline_data; /* Max inline data. */ 228 unsigned int max_tso_header; /* Max TSO header size. */ 229 struct mlx5_txq_ibv *ibv; /* Verbs queue object. */ 230 struct priv *priv; /* Back pointer to private data. */ 231 struct mlx5_txq_data txq; /* Data path structure. */ 232 off_t uar_mmap_offset; /* UAR mmap offset for non-primary process. */ 233 volatile void *bf_reg_orig; /* Blueflame register from verbs. */ 234 uint16_t idx; /* Queue index. */ 235 }; 236 237 /* mlx5_rxq.c */ 238 239 extern uint8_t rss_hash_default_key[]; 240 241 int mlx5_check_mprq_support(struct rte_eth_dev *dev); 242 int mlx5_rxq_mprq_enabled(struct mlx5_rxq_data *rxq); 243 int mlx5_mprq_enabled(struct rte_eth_dev *dev); 244 int mlx5_mprq_free_mp(struct rte_eth_dev *dev); 245 int mlx5_mprq_alloc_mp(struct rte_eth_dev *dev); 246 void mlx5_rxq_cleanup(struct mlx5_rxq_ctrl *rxq_ctrl); 247 int mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 248 unsigned int socket, const struct rte_eth_rxconf *conf, 249 struct rte_mempool *mp); 250 void mlx5_rx_queue_release(void *dpdk_rxq); 251 int mlx5_rx_intr_vec_enable(struct rte_eth_dev *dev); 252 void mlx5_rx_intr_vec_disable(struct rte_eth_dev *dev); 253 int mlx5_rx_intr_enable(struct rte_eth_dev *dev, uint16_t rx_queue_id); 254 int mlx5_rx_intr_disable(struct rte_eth_dev *dev, uint16_t rx_queue_id); 255 struct mlx5_rxq_ibv *mlx5_rxq_ibv_new(struct rte_eth_dev *dev, uint16_t idx); 256 struct mlx5_rxq_ibv *mlx5_rxq_ibv_get(struct rte_eth_dev *dev, uint16_t idx); 257 int mlx5_rxq_ibv_release(struct mlx5_rxq_ibv *rxq_ibv); 258 int mlx5_rxq_ibv_releasable(struct mlx5_rxq_ibv *rxq_ibv); 259 struct mlx5_rxq_ibv *mlx5_rxq_ibv_drop_new(struct rte_eth_dev *dev); 260 void mlx5_rxq_ibv_drop_release(struct rte_eth_dev *dev); 261 int mlx5_rxq_ibv_verify(struct rte_eth_dev *dev); 262 struct mlx5_rxq_ctrl *mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, 263 uint16_t desc, unsigned int socket, 264 const struct rte_eth_rxconf *conf, 265 struct rte_mempool *mp); 266 struct mlx5_rxq_ctrl *mlx5_rxq_get(struct rte_eth_dev *dev, uint16_t idx); 267 int mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx); 268 int mlx5_rxq_releasable(struct rte_eth_dev *dev, uint16_t idx); 269 int mlx5_rxq_verify(struct rte_eth_dev *dev); 270 int rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl); 271 int rxq_alloc_mprq_buf(struct mlx5_rxq_ctrl *rxq_ctrl); 272 struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_new(struct rte_eth_dev *dev, 273 const uint16_t *queues, 274 uint32_t queues_n); 275 struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_get(struct rte_eth_dev *dev, 276 const uint16_t *queues, 277 uint32_t queues_n); 278 int mlx5_ind_table_ibv_release(struct rte_eth_dev *dev, 279 struct mlx5_ind_table_ibv *ind_tbl); 280 int mlx5_ind_table_ibv_verify(struct rte_eth_dev *dev); 281 struct mlx5_ind_table_ibv *mlx5_ind_table_ibv_drop_new(struct rte_eth_dev *dev); 282 void mlx5_ind_table_ibv_drop_release(struct rte_eth_dev *dev); 283 struct mlx5_hrxq *mlx5_hrxq_new(struct rte_eth_dev *dev, 284 const uint8_t *rss_key, uint32_t rss_key_len, 285 uint64_t hash_fields, 286 const uint16_t *queues, uint32_t queues_n, 287 int tunnel __rte_unused); 288 struct mlx5_hrxq *mlx5_hrxq_get(struct rte_eth_dev *dev, 289 const uint8_t *rss_key, uint32_t rss_key_len, 290 uint64_t hash_fields, 291 const uint16_t *queues, uint32_t queues_n); 292 int mlx5_hrxq_release(struct rte_eth_dev *dev, struct mlx5_hrxq *hxrq); 293 int mlx5_hrxq_ibv_verify(struct rte_eth_dev *dev); 294 struct mlx5_hrxq *mlx5_hrxq_drop_new(struct rte_eth_dev *dev); 295 void mlx5_hrxq_drop_release(struct rte_eth_dev *dev); 296 uint64_t mlx5_get_rx_port_offloads(void); 297 uint64_t mlx5_get_rx_queue_offloads(struct rte_eth_dev *dev); 298 299 /* mlx5_txq.c */ 300 301 int mlx5_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc, 302 unsigned int socket, const struct rte_eth_txconf *conf); 303 void mlx5_tx_queue_release(void *dpdk_txq); 304 int mlx5_tx_uar_remap(struct rte_eth_dev *dev, int fd); 305 struct mlx5_txq_ibv *mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx); 306 struct mlx5_txq_ibv *mlx5_txq_ibv_get(struct rte_eth_dev *dev, uint16_t idx); 307 int mlx5_txq_ibv_release(struct mlx5_txq_ibv *txq_ibv); 308 int mlx5_txq_ibv_releasable(struct mlx5_txq_ibv *txq_ibv); 309 int mlx5_txq_ibv_verify(struct rte_eth_dev *dev); 310 struct mlx5_txq_ctrl *mlx5_txq_new(struct rte_eth_dev *dev, uint16_t idx, 311 uint16_t desc, unsigned int socket, 312 const struct rte_eth_txconf *conf); 313 struct mlx5_txq_ctrl *mlx5_txq_get(struct rte_eth_dev *dev, uint16_t idx); 314 int mlx5_txq_release(struct rte_eth_dev *dev, uint16_t idx); 315 int mlx5_txq_releasable(struct rte_eth_dev *dev, uint16_t idx); 316 int mlx5_txq_verify(struct rte_eth_dev *dev); 317 void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl); 318 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev); 319 320 /* mlx5_rxtx.c */ 321 322 extern uint32_t mlx5_ptype_table[]; 323 extern uint8_t mlx5_cksum_table[]; 324 extern uint8_t mlx5_swp_types_table[]; 325 326 void mlx5_set_ptype_table(void); 327 void mlx5_set_cksum_table(void); 328 void mlx5_set_swp_types_table(void); 329 uint16_t mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 330 uint16_t pkts_n); 331 uint16_t mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, 332 uint16_t pkts_n); 333 uint16_t mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts, 334 uint16_t pkts_n); 335 uint16_t mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, 336 uint16_t pkts_n); 337 uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n); 338 void mlx5_mprq_buf_free_cb(void *addr, void *opaque); 339 void mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf); 340 uint16_t mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, 341 uint16_t pkts_n); 342 uint16_t removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, 343 uint16_t pkts_n); 344 uint16_t removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, 345 uint16_t pkts_n); 346 int mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset); 347 int mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset); 348 349 /* Vectorized version of mlx5_rxtx.c */ 350 int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev); 351 int mlx5_check_vec_tx_support(struct rte_eth_dev *dev); 352 int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data); 353 int mlx5_check_vec_rx_support(struct rte_eth_dev *dev); 354 uint16_t mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, 355 uint16_t pkts_n); 356 uint16_t mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, 357 uint16_t pkts_n); 358 uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, 359 uint16_t pkts_n); 360 361 /* mlx5_mr.c */ 362 363 void mlx5_mr_flush_local_cache(struct mlx5_mr_ctrl *mr_ctrl); 364 uint32_t mlx5_rx_addr2mr_bh(struct mlx5_rxq_data *rxq, uintptr_t addr); 365 uint32_t mlx5_tx_addr2mr_bh(struct mlx5_txq_data *txq, uintptr_t addr); 366 uint32_t mlx5_tx_update_ext_mp(struct mlx5_txq_data *txq, uintptr_t addr, 367 struct rte_mempool *mp); 368 369 /** 370 * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and 371 * 64bit architectures. 372 * 373 * @param val 374 * value to write in CPU endian format. 375 * @param addr 376 * Address to write to. 377 * @param lock 378 * Address of the lock to use for that UAR access. 379 */ 380 static __rte_always_inline void 381 __mlx5_uar_write64_relaxed(uint64_t val, volatile void *addr, 382 rte_spinlock_t *lock __rte_unused) 383 { 384 #ifdef RTE_ARCH_64 385 rte_write64_relaxed(val, addr); 386 #else /* !RTE_ARCH_64 */ 387 rte_spinlock_lock(lock); 388 rte_write32_relaxed(val, addr); 389 rte_io_wmb(); 390 rte_write32_relaxed(val >> 32, 391 (volatile void *)((volatile char *)addr + 4)); 392 rte_spinlock_unlock(lock); 393 #endif 394 } 395 396 /** 397 * Provide safe 64bit store operation to mlx5 UAR region for both 32bit and 398 * 64bit architectures while guaranteeing the order of execution with the 399 * code being executed. 400 * 401 * @param val 402 * value to write in CPU endian format. 403 * @param addr 404 * Address to write to. 405 * @param lock 406 * Address of the lock to use for that UAR access. 407 */ 408 static __rte_always_inline void 409 __mlx5_uar_write64(uint64_t val, volatile void *addr, rte_spinlock_t *lock) 410 { 411 rte_io_wmb(); 412 __mlx5_uar_write64_relaxed(val, addr, lock); 413 } 414 415 /* Assist macros, used instead of directly calling the functions they wrap. */ 416 #ifdef RTE_ARCH_64 417 #define mlx5_uar_write64_relaxed(val, dst, lock) \ 418 __mlx5_uar_write64_relaxed(val, dst, NULL) 419 #define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, NULL) 420 #else 421 #define mlx5_uar_write64_relaxed(val, dst, lock) \ 422 __mlx5_uar_write64_relaxed(val, dst, lock) 423 #define mlx5_uar_write64(val, dst, lock) __mlx5_uar_write64(val, dst, lock) 424 #endif 425 426 #ifndef NDEBUG 427 /** 428 * Verify or set magic value in CQE. 429 * 430 * @param cqe 431 * Pointer to CQE. 432 * 433 * @return 434 * 0 the first time. 435 */ 436 static inline int 437 check_cqe_seen(volatile struct mlx5_cqe *cqe) 438 { 439 static const uint8_t magic[] = "seen"; 440 volatile uint8_t (*buf)[sizeof(cqe->rsvd1)] = &cqe->rsvd1; 441 int ret = 1; 442 unsigned int i; 443 444 for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i) 445 if (!ret || (*buf)[i] != magic[i]) { 446 ret = 0; 447 (*buf)[i] = magic[i]; 448 } 449 return ret; 450 } 451 #endif /* NDEBUG */ 452 453 /** 454 * Check whether CQE is valid. 455 * 456 * @param cqe 457 * Pointer to CQE. 458 * @param cqes_n 459 * Size of completion queue. 460 * @param ci 461 * Consumer index. 462 * 463 * @return 464 * 0 on success, 1 on failure. 465 */ 466 static __rte_always_inline int 467 check_cqe(volatile struct mlx5_cqe *cqe, 468 unsigned int cqes_n, const uint16_t ci) 469 { 470 uint16_t idx = ci & cqes_n; 471 uint8_t op_own = cqe->op_own; 472 uint8_t op_owner = MLX5_CQE_OWNER(op_own); 473 uint8_t op_code = MLX5_CQE_OPCODE(op_own); 474 475 if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID))) 476 return 1; /* No CQE. */ 477 #ifndef NDEBUG 478 if ((op_code == MLX5_CQE_RESP_ERR) || 479 (op_code == MLX5_CQE_REQ_ERR)) { 480 volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe; 481 uint8_t syndrome = err_cqe->syndrome; 482 483 if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) || 484 (syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR)) 485 return 0; 486 if (!check_cqe_seen(cqe)) { 487 DRV_LOG(ERR, 488 "unexpected CQE error %u (0x%02x) syndrome" 489 " 0x%02x", 490 op_code, op_code, syndrome); 491 rte_hexdump(stderr, "MLX5 Error CQE:", 492 (const void *)((uintptr_t)err_cqe), 493 sizeof(*err_cqe)); 494 } 495 return 1; 496 } else if ((op_code != MLX5_CQE_RESP_SEND) && 497 (op_code != MLX5_CQE_REQ)) { 498 if (!check_cqe_seen(cqe)) { 499 DRV_LOG(ERR, "unexpected CQE opcode %u (0x%02x)", 500 op_code, op_code); 501 rte_hexdump(stderr, "MLX5 CQE:", 502 (const void *)((uintptr_t)cqe), 503 sizeof(*cqe)); 504 } 505 return 1; 506 } 507 #endif /* NDEBUG */ 508 return 0; 509 } 510 511 /** 512 * Return the address of the WQE. 513 * 514 * @param txq 515 * Pointer to TX queue structure. 516 * @param wqe_ci 517 * WQE consumer index. 518 * 519 * @return 520 * WQE address. 521 */ 522 static inline uintptr_t * 523 tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci) 524 { 525 ci &= ((1 << txq->wqe_n) - 1); 526 return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE); 527 } 528 529 /** 530 * Manage TX completions. 531 * 532 * When sending a burst, mlx5_tx_burst() posts several WRs. 533 * 534 * @param txq 535 * Pointer to TX queue structure. 536 */ 537 static __rte_always_inline void 538 mlx5_tx_complete(struct mlx5_txq_data *txq) 539 { 540 const uint16_t elts_n = 1 << txq->elts_n; 541 const uint16_t elts_m = elts_n - 1; 542 const unsigned int cqe_n = 1 << txq->cqe_n; 543 const unsigned int cqe_cnt = cqe_n - 1; 544 uint16_t elts_free = txq->elts_tail; 545 uint16_t elts_tail; 546 uint16_t cq_ci = txq->cq_ci; 547 volatile struct mlx5_cqe *cqe = NULL; 548 volatile struct mlx5_wqe_ctrl *ctrl; 549 struct rte_mbuf *m, *free[elts_n]; 550 struct rte_mempool *pool = NULL; 551 unsigned int blk_n = 0; 552 553 cqe = &(*txq->cqes)[cq_ci & cqe_cnt]; 554 if (unlikely(check_cqe(cqe, cqe_n, cq_ci))) 555 return; 556 #ifndef NDEBUG 557 if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) || 558 (MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) { 559 if (!check_cqe_seen(cqe)) { 560 DRV_LOG(ERR, "unexpected error CQE, Tx stopped"); 561 rte_hexdump(stderr, "MLX5 TXQ:", 562 (const void *)((uintptr_t)txq->wqes), 563 ((1 << txq->wqe_n) * 564 MLX5_WQE_SIZE)); 565 } 566 return; 567 } 568 #endif /* NDEBUG */ 569 ++cq_ci; 570 txq->wqe_pi = rte_be_to_cpu_16(cqe->wqe_counter); 571 ctrl = (volatile struct mlx5_wqe_ctrl *) 572 tx_mlx5_wqe(txq, txq->wqe_pi); 573 elts_tail = ctrl->ctrl3; 574 assert((elts_tail & elts_m) < (1 << txq->wqe_n)); 575 /* Free buffers. */ 576 while (elts_free != elts_tail) { 577 m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]); 578 if (likely(m != NULL)) { 579 if (likely(m->pool == pool)) { 580 free[blk_n++] = m; 581 } else { 582 if (likely(pool != NULL)) 583 rte_mempool_put_bulk(pool, 584 (void *)free, 585 blk_n); 586 free[0] = m; 587 pool = m->pool; 588 blk_n = 1; 589 } 590 } 591 } 592 if (blk_n) 593 rte_mempool_put_bulk(pool, (void *)free, blk_n); 594 #ifndef NDEBUG 595 elts_free = txq->elts_tail; 596 /* Poisoning. */ 597 while (elts_free != elts_tail) { 598 memset(&(*txq->elts)[elts_free & elts_m], 599 0x66, 600 sizeof((*txq->elts)[elts_free & elts_m])); 601 ++elts_free; 602 } 603 #endif 604 txq->cq_ci = cq_ci; 605 txq->elts_tail = elts_tail; 606 /* Update the consumer index. */ 607 rte_compiler_barrier(); 608 *txq->cq_db = rte_cpu_to_be_32(cq_ci); 609 } 610 611 /** 612 * Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the 613 * cloned mbuf is allocated is returned instead. 614 * 615 * @param buf 616 * Pointer to mbuf. 617 * 618 * @return 619 * Memory pool where data is located for given mbuf. 620 */ 621 static struct rte_mempool * 622 mlx5_mb2mp(struct rte_mbuf *buf) 623 { 624 if (unlikely(RTE_MBUF_INDIRECT(buf))) 625 return rte_mbuf_from_indirect(buf)->pool; 626 return buf->pool; 627 } 628 629 /** 630 * Query LKey from a packet buffer for Rx. No need to flush local caches for Rx 631 * as mempool is pre-configured and static. 632 * 633 * @param rxq 634 * Pointer to Rx queue structure. 635 * @param addr 636 * Address to search. 637 * 638 * @return 639 * Searched LKey on success, UINT32_MAX on no match. 640 */ 641 static __rte_always_inline uint32_t 642 mlx5_rx_addr2mr(struct mlx5_rxq_data *rxq, uintptr_t addr) 643 { 644 struct mlx5_mr_ctrl *mr_ctrl = &rxq->mr_ctrl; 645 uint32_t lkey; 646 647 /* Linear search on MR cache array. */ 648 lkey = mlx5_mr_lookup_cache(mr_ctrl->cache, &mr_ctrl->mru, 649 MLX5_MR_CACHE_N, addr); 650 if (likely(lkey != UINT32_MAX)) 651 return lkey; 652 /* Take slower bottom-half (Binary Search) on miss. */ 653 return mlx5_rx_addr2mr_bh(rxq, addr); 654 } 655 656 #define mlx5_rx_mb2mr(rxq, mb) mlx5_rx_addr2mr(rxq, (uintptr_t)((mb)->buf_addr)) 657 658 /** 659 * Query LKey from a packet buffer for Tx. If not found, add the mempool. 660 * 661 * @param txq 662 * Pointer to Tx queue structure. 663 * @param addr 664 * Address to search. 665 * 666 * @return 667 * Searched LKey on success, UINT32_MAX on no match. 668 */ 669 static __rte_always_inline uint32_t 670 mlx5_tx_addr2mr(struct mlx5_txq_data *txq, uintptr_t addr) 671 { 672 struct mlx5_mr_ctrl *mr_ctrl = &txq->mr_ctrl; 673 uint32_t lkey; 674 675 /* Check generation bit to see if there's any change on existing MRs. */ 676 if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen)) 677 mlx5_mr_flush_local_cache(mr_ctrl); 678 /* Linear search on MR cache array. */ 679 lkey = mlx5_mr_lookup_cache(mr_ctrl->cache, &mr_ctrl->mru, 680 MLX5_MR_CACHE_N, addr); 681 if (likely(lkey != UINT32_MAX)) 682 return lkey; 683 /* Take slower bottom-half (binary search) on miss. */ 684 return mlx5_tx_addr2mr_bh(txq, addr); 685 } 686 687 static __rte_always_inline uint32_t 688 mlx5_tx_mb2mr(struct mlx5_txq_data *txq, struct rte_mbuf *mb) 689 { 690 uintptr_t addr = (uintptr_t)mb->buf_addr; 691 uint32_t lkey = mlx5_tx_addr2mr(txq, addr); 692 693 if (likely(lkey != UINT32_MAX)) 694 return lkey; 695 if (rte_errno == ENXIO) { 696 /* Mempool may have externally allocated memory. */ 697 lkey = mlx5_tx_update_ext_mp(txq, addr, mlx5_mb2mp(mb)); 698 } 699 return lkey; 700 } 701 702 /** 703 * Ring TX queue doorbell and flush the update if requested. 704 * 705 * @param txq 706 * Pointer to TX queue structure. 707 * @param wqe 708 * Pointer to the last WQE posted in the NIC. 709 * @param cond 710 * Request for write memory barrier after BlueFlame update. 711 */ 712 static __rte_always_inline void 713 mlx5_tx_dbrec_cond_wmb(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe, 714 int cond) 715 { 716 uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg); 717 volatile uint64_t *src = ((volatile uint64_t *)wqe); 718 719 rte_cio_wmb(); 720 *txq->qp_db = rte_cpu_to_be_32(txq->wqe_ci); 721 /* Ensure ordering between DB record and BF copy. */ 722 rte_wmb(); 723 mlx5_uar_write64_relaxed(*src, dst, txq->uar_lock); 724 if (cond) 725 rte_wmb(); 726 } 727 728 /** 729 * Ring TX queue doorbell and flush the update by write memory barrier. 730 * 731 * @param txq 732 * Pointer to TX queue structure. 733 * @param wqe 734 * Pointer to the last WQE posted in the NIC. 735 */ 736 static __rte_always_inline void 737 mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe) 738 { 739 mlx5_tx_dbrec_cond_wmb(txq, wqe, 1); 740 } 741 742 /** 743 * Convert mbuf to Verb SWP. 744 * 745 * @param txq_data 746 * Pointer to the Tx queue. 747 * @param buf 748 * Pointer to the mbuf. 749 * @param tso 750 * TSO offloads enabled. 751 * @param vlan 752 * VLAN offloads enabled 753 * @param offsets 754 * Pointer to the SWP header offsets. 755 * @param swp_types 756 * Pointer to the SWP header types. 757 */ 758 static __rte_always_inline void 759 txq_mbuf_to_swp(struct mlx5_txq_data *txq, struct rte_mbuf *buf, 760 uint8_t *offsets, uint8_t *swp_types) 761 { 762 const uint64_t vlan = buf->ol_flags & PKT_TX_VLAN_PKT; 763 const uint64_t tunnel = buf->ol_flags & PKT_TX_TUNNEL_MASK; 764 const uint64_t tso = buf->ol_flags & PKT_TX_TCP_SEG; 765 const uint64_t csum_flags = buf->ol_flags & PKT_TX_L4_MASK; 766 const uint64_t inner_ip = 767 buf->ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6); 768 const uint64_t ol_flags_mask = PKT_TX_L4_MASK | PKT_TX_IPV6 | 769 PKT_TX_OUTER_IPV6; 770 uint16_t idx; 771 uint16_t off; 772 773 if (likely(!txq->swp_en || (tunnel != PKT_TX_TUNNEL_UDP && 774 tunnel != PKT_TX_TUNNEL_IP))) 775 return; 776 /* 777 * The index should have: 778 * bit[0:1] = PKT_TX_L4_MASK 779 * bit[4] = PKT_TX_IPV6 780 * bit[8] = PKT_TX_OUTER_IPV6 781 * bit[9] = PKT_TX_OUTER_UDP 782 */ 783 idx = (buf->ol_flags & ol_flags_mask) >> 52; 784 if (tunnel == PKT_TX_TUNNEL_UDP) 785 idx |= 1 << 9; 786 *swp_types = mlx5_swp_types_table[idx]; 787 /* 788 * Set offsets for SW parser. Since ConnectX-5, SW parser just 789 * complements HW parser. SW parser starts to engage only if HW parser 790 * can't reach a header. For the older devices, HW parser will not kick 791 * in if any of SWP offsets is set. Therefore, all of the L3 offsets 792 * should be set regardless of HW offload. 793 */ 794 off = buf->outer_l2_len + (vlan ? sizeof(struct vlan_hdr) : 0); 795 offsets[1] = off >> 1; /* Outer L3 offset. */ 796 off += buf->outer_l3_len; 797 if (tunnel == PKT_TX_TUNNEL_UDP) 798 offsets[0] = off >> 1; /* Outer L4 offset. */ 799 if (inner_ip) { 800 off += buf->l2_len; 801 offsets[3] = off >> 1; /* Inner L3 offset. */ 802 if (csum_flags == PKT_TX_TCP_CKSUM || tso || 803 csum_flags == PKT_TX_UDP_CKSUM) { 804 off += buf->l3_len; 805 offsets[2] = off >> 1; /* Inner L4 offset. */ 806 } 807 } 808 } 809 810 /** 811 * Convert the Checksum offloads to Verbs. 812 * 813 * @param buf 814 * Pointer to the mbuf. 815 * 816 * @return 817 * Converted checksum flags. 818 */ 819 static __rte_always_inline uint8_t 820 txq_ol_cksum_to_cs(struct rte_mbuf *buf) 821 { 822 uint32_t idx; 823 uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK); 824 const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK | 825 PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM; 826 827 /* 828 * The index should have: 829 * bit[0] = PKT_TX_TCP_SEG 830 * bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM 831 * bit[4] = PKT_TX_IP_CKSUM 832 * bit[8] = PKT_TX_OUTER_IP_CKSUM 833 * bit[9] = tunnel 834 */ 835 idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9); 836 return mlx5_cksum_table[idx]; 837 } 838 839 /** 840 * Count the number of contiguous single segment packets. 841 * 842 * @param pkts 843 * Pointer to array of packets. 844 * @param pkts_n 845 * Number of packets. 846 * 847 * @return 848 * Number of contiguous single segment packets. 849 */ 850 static __rte_always_inline unsigned int 851 txq_count_contig_single_seg(struct rte_mbuf **pkts, uint16_t pkts_n) 852 { 853 unsigned int pos; 854 855 if (!pkts_n) 856 return 0; 857 /* Count the number of contiguous single segment packets. */ 858 for (pos = 0; pos < pkts_n; ++pos) 859 if (NB_SEGS(pkts[pos]) > 1) 860 break; 861 return pos; 862 } 863 864 /** 865 * Count the number of contiguous multi-segment packets. 866 * 867 * @param pkts 868 * Pointer to array of packets. 869 * @param pkts_n 870 * Number of packets. 871 * 872 * @return 873 * Number of contiguous multi-segment packets. 874 */ 875 static __rte_always_inline unsigned int 876 txq_count_contig_multi_seg(struct rte_mbuf **pkts, uint16_t pkts_n) 877 { 878 unsigned int pos; 879 880 if (!pkts_n) 881 return 0; 882 /* Count the number of contiguous multi-segment packets. */ 883 for (pos = 0; pos < pkts_n; ++pos) 884 if (NB_SEGS(pkts[pos]) == 1) 885 break; 886 return pos; 887 } 888 889 #endif /* RTE_PMD_MLX5_RXTX_H_ */ 890