1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 #include <mlx5_common_mr.h> 22 #include <rte_pmd_mlx5.h> 23 24 #include "mlx5_autoconf.h" 25 #include "mlx5_defs.h" 26 #include "mlx5.h" 27 #include "mlx5_utils.h" 28 #include "mlx5_rxtx.h" 29 #include "mlx5_devx.h" 30 #include "mlx5_rx.h" 31 #ifdef HAVE_MLX5_MSTFLINT 32 #include <mstflint/mtcr.h> 33 #endif 34 35 36 static __rte_always_inline uint32_t 37 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 38 volatile struct mlx5_mini_cqe8 *mcqe); 39 40 static __rte_always_inline int 41 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 42 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 43 44 static __rte_always_inline uint32_t 45 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 46 47 static __rte_always_inline void 48 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 49 volatile struct mlx5_cqe *cqe, 50 volatile struct mlx5_mini_cqe8 *mcqe); 51 52 static inline void 53 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 54 volatile struct mlx5_cqe *__rte_restrict cqe, 55 uint32_t phcsum, uint8_t l4_type); 56 57 static inline void 58 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 59 volatile struct mlx5_cqe *__rte_restrict cqe, 60 volatile struct mlx5_mini_cqe8 *mcqe, 61 struct mlx5_rxq_data *rxq, uint32_t len); 62 63 64 /** 65 * Internal function to compute the number of used descriptors in an RX queue. 66 * 67 * @param rxq 68 * The Rx queue. 69 * 70 * @return 71 * The number of used Rx descriptor. 72 */ 73 static uint32_t 74 rx_queue_count(struct mlx5_rxq_data *rxq) 75 { 76 struct rxq_zip *zip = &rxq->zip; 77 volatile struct mlx5_cqe *cqe; 78 const unsigned int cqe_n = (1 << rxq->cqe_n); 79 const unsigned int sges_n = (1 << rxq->sges_n); 80 const unsigned int elts_n = (1 << rxq->elts_n); 81 const unsigned int strd_n = RTE_BIT32(rxq->log_strd_num); 82 const unsigned int cqe_cnt = cqe_n - 1; 83 unsigned int cq_ci, used; 84 85 /* if we are processing a compressed cqe */ 86 if (zip->ai) { 87 used = zip->cqe_cnt - zip->ai; 88 cq_ci = zip->cq_ci; 89 } else { 90 used = 0; 91 cq_ci = rxq->cq_ci; 92 } 93 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 94 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 95 int8_t op_own; 96 unsigned int n; 97 98 op_own = cqe->op_own; 99 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 100 n = rte_be_to_cpu_32(cqe->byte_cnt); 101 else 102 n = 1; 103 cq_ci += n; 104 used += n; 105 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 106 } 107 used = RTE_MIN(used * sges_n, elts_n * strd_n); 108 return used; 109 } 110 111 /** 112 * DPDK callback to check the status of a Rx descriptor. 113 * 114 * @param rx_queue 115 * The Rx queue. 116 * @param[in] offset 117 * The index of the descriptor in the ring. 118 * 119 * @return 120 * The status of the Rx descriptor. 121 */ 122 int 123 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 124 { 125 struct mlx5_rxq_data *rxq = rx_queue; 126 127 if (offset >= (1 << rxq->cqe_n)) { 128 rte_errno = EINVAL; 129 return -rte_errno; 130 } 131 if (offset < rx_queue_count(rxq)) 132 return RTE_ETH_RX_DESC_DONE; 133 return RTE_ETH_RX_DESC_AVAIL; 134 } 135 136 /* Get rxq lwm percentage according to lwm number. */ 137 static uint8_t 138 mlx5_rxq_lwm_to_percentage(struct mlx5_rxq_priv *rxq) 139 { 140 struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq; 141 uint32_t wqe_cnt = 1 << (rxq_data->elts_n - rxq_data->sges_n); 142 143 return rxq->lwm * 100 / wqe_cnt; 144 } 145 146 /** 147 * DPDK callback to get the RX queue information. 148 * 149 * @param dev 150 * Pointer to the device structure. 151 * 152 * @param rx_queue_id 153 * Rx queue identificator. 154 * 155 * @param qinfo 156 * Pointer to the RX queue information structure. 157 * 158 * @return 159 * None. 160 */ 161 162 void 163 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 164 struct rte_eth_rxq_info *qinfo) 165 { 166 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, rx_queue_id); 167 struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, rx_queue_id); 168 struct mlx5_rxq_priv *rxq_priv = mlx5_rxq_get(dev, rx_queue_id); 169 170 if (!rxq) 171 return; 172 qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ? 173 rxq->mprq_mp : rxq->mp; 174 qinfo->conf.rx_thresh.pthresh = 0; 175 qinfo->conf.rx_thresh.hthresh = 0; 176 qinfo->conf.rx_thresh.wthresh = 0; 177 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 178 qinfo->conf.rx_drop_en = 1; 179 if (rxq_ctrl == NULL || rxq_ctrl->obj == NULL) 180 qinfo->conf.rx_deferred_start = 0; 181 else 182 qinfo->conf.rx_deferred_start = 1; 183 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 184 qinfo->scattered_rx = dev->data->scattered_rx; 185 qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ? 186 RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) : 187 RTE_BIT32(rxq->elts_n); 188 qinfo->avail_thresh = rxq_priv ? 189 mlx5_rxq_lwm_to_percentage(rxq_priv) : 0; 190 } 191 192 /** 193 * DPDK callback to get the RX packet burst mode information. 194 * 195 * @param dev 196 * Pointer to the device structure. 197 * 198 * @param rx_queue_id 199 * Rx queue identification. 200 * 201 * @param mode 202 * Pointer to the burts mode information. 203 * 204 * @return 205 * 0 as success, -EINVAL as failure. 206 */ 207 int 208 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 209 uint16_t rx_queue_id __rte_unused, 210 struct rte_eth_burst_mode *mode) 211 { 212 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 213 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 214 215 if (!rxq) { 216 rte_errno = EINVAL; 217 return -rte_errno; 218 } 219 if (pkt_burst == mlx5_rx_burst) { 220 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 221 } else if (pkt_burst == mlx5_rx_burst_mprq) { 222 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 223 } else if (pkt_burst == mlx5_rx_burst_vec) { 224 #if defined RTE_ARCH_X86_64 225 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 226 #elif defined RTE_ARCH_ARM64 227 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 228 #elif defined RTE_ARCH_PPC_64 229 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 230 #else 231 return -EINVAL; 232 #endif 233 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { 234 #if defined RTE_ARCH_X86_64 235 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); 236 #elif defined RTE_ARCH_ARM64 237 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); 238 #elif defined RTE_ARCH_PPC_64 239 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); 240 #else 241 return -EINVAL; 242 #endif 243 } else { 244 return -EINVAL; 245 } 246 return 0; 247 } 248 249 /** 250 * DPDK callback to get the number of used descriptors in a RX queue. 251 * 252 * @param rx_queue 253 * The Rx queue pointer. 254 * 255 * @return 256 * The number of used rx descriptor. 257 * -EINVAL if the queue is invalid 258 */ 259 uint32_t 260 mlx5_rx_queue_count(void *rx_queue) 261 { 262 struct mlx5_rxq_data *rxq = rx_queue; 263 struct rte_eth_dev *dev; 264 265 if (!rxq) { 266 rte_errno = EINVAL; 267 return -rte_errno; 268 } 269 270 dev = &rte_eth_devices[rxq->port_id]; 271 272 if (dev->rx_pkt_burst == NULL || 273 dev->rx_pkt_burst == rte_eth_pkt_burst_dummy) { 274 rte_errno = ENOTSUP; 275 return -rte_errno; 276 } 277 278 return rx_queue_count(rxq); 279 } 280 281 #define CLB_VAL_IDX 0 282 #define CLB_MSK_IDX 1 283 static int 284 mlx5_monitor_callback(const uint64_t value, 285 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ]) 286 { 287 const uint64_t m = opaque[CLB_MSK_IDX]; 288 const uint64_t v = opaque[CLB_VAL_IDX]; 289 290 return (value & m) == v ? -1 : 0; 291 } 292 293 int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) 294 { 295 struct mlx5_rxq_data *rxq = rx_queue; 296 const unsigned int cqe_num = 1 << rxq->cqe_n; 297 const unsigned int cqe_mask = cqe_num - 1; 298 const uint16_t idx = rxq->cq_ci & cqe_num; 299 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 300 301 if (unlikely(rxq->cqes == NULL)) { 302 rte_errno = EINVAL; 303 return -rte_errno; 304 } 305 pmc->addr = &cqe->op_own; 306 pmc->opaque[CLB_VAL_IDX] = !!idx; 307 pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_OWNER_MASK; 308 pmc->fn = mlx5_monitor_callback; 309 pmc->size = sizeof(uint8_t); 310 return 0; 311 } 312 313 /** 314 * Translate RX completion flags to packet type. 315 * 316 * @param[in] rxq 317 * Pointer to RX queue structure. 318 * @param[in] cqe 319 * Pointer to CQE. 320 * 321 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 322 * 323 * @return 324 * Packet type for struct rte_mbuf. 325 */ 326 static inline uint32_t 327 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 328 volatile struct mlx5_mini_cqe8 *mcqe) 329 { 330 uint8_t idx; 331 uint8_t ptype; 332 uint8_t pinfo = (cqe->pkt_info & 0x3) << 6; 333 334 /* Get l3/l4 header from mini-CQE in case L3/L4 format*/ 335 if (mcqe == NULL || 336 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 337 ptype = (cqe->hdr_type_etc & 0xfc00) >> 10; 338 else 339 ptype = mcqe->hdr_type >> 2; 340 /* 341 * The index to the array should have: 342 * bit[1:0] = l3_hdr_type 343 * bit[4:2] = l4_hdr_type 344 * bit[5] = ip_frag 345 * bit[6] = tunneled 346 * bit[7] = outer_l3_type 347 */ 348 idx = pinfo | ptype; 349 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 350 } 351 352 /** 353 * Initialize Rx WQ and indexes. 354 * 355 * @param[in] rxq 356 * Pointer to RX queue structure. 357 */ 358 void 359 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 360 { 361 const unsigned int wqe_n = 1 << rxq->elts_n; 362 unsigned int i; 363 364 for (i = 0; (i != wqe_n); ++i) { 365 volatile struct mlx5_wqe_data_seg *scat; 366 uintptr_t addr; 367 uint32_t byte_count; 368 uint32_t lkey; 369 370 if (mlx5_rxq_mprq_enabled(rxq)) { 371 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 372 373 scat = &((volatile struct mlx5_wqe_mprq *) 374 rxq->wqes)[i].dseg; 375 addr = (uintptr_t)mlx5_mprq_buf_addr 376 (buf, RTE_BIT32(rxq->log_strd_num)); 377 byte_count = RTE_BIT32(rxq->log_strd_sz) * 378 RTE_BIT32(rxq->log_strd_num); 379 lkey = mlx5_rx_addr2mr(rxq, addr); 380 } else { 381 struct rte_mbuf *buf = (*rxq->elts)[i]; 382 383 scat = &((volatile struct mlx5_wqe_data_seg *) 384 rxq->wqes)[i]; 385 addr = rte_pktmbuf_mtod(buf, uintptr_t); 386 byte_count = DATA_LEN(buf); 387 lkey = mlx5_rx_mb2mr(rxq, buf); 388 } 389 /* scat->addr must be able to store a pointer. */ 390 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 391 *scat = (struct mlx5_wqe_data_seg){ 392 .addr = rte_cpu_to_be_64(addr), 393 .byte_count = rte_cpu_to_be_32(byte_count), 394 .lkey = lkey, 395 }; 396 } 397 rxq->consumed_strd = 0; 398 rxq->decompressed = 0; 399 rxq->rq_pi = 0; 400 rxq->zip = (struct rxq_zip){ 401 .ai = 0, 402 }; 403 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? 404 (wqe_n >> rxq->sges_n) * RTE_BIT32(rxq->log_strd_num) : 0; 405 /* Update doorbell counter. */ 406 rxq->rq_ci = wqe_n >> rxq->sges_n; 407 rte_io_wmb(); 408 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 409 } 410 411 /** 412 * Handle a Rx error. 413 * The function inserts the RQ state to reset when the first error CQE is 414 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 415 * it moves the RQ state to ready and initializes the RQ. 416 * Next CQE identification and error counting are in the caller responsibility. 417 * 418 * @param[in] rxq 419 * Pointer to RX queue structure. 420 * @param[in] vec 421 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 422 * 0 when called from non-vectorized Rx burst. 423 * 424 * @return 425 * -1 in case of recovery error, otherwise the CQE status. 426 */ 427 int 428 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 429 { 430 const uint16_t cqe_n = 1 << rxq->cqe_n; 431 const uint16_t cqe_mask = cqe_n - 1; 432 const uint16_t wqe_n = 1 << rxq->elts_n; 433 const uint16_t strd_n = RTE_BIT32(rxq->log_strd_num); 434 struct mlx5_rxq_ctrl *rxq_ctrl = 435 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 436 union { 437 volatile struct mlx5_cqe *cqe; 438 volatile struct mlx5_err_cqe *err_cqe; 439 } u = { 440 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 441 }; 442 struct mlx5_mp_arg_queue_state_modify sm; 443 int ret; 444 445 switch (rxq->err_state) { 446 case MLX5_RXQ_ERR_STATE_NO_ERROR: 447 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 448 /* Fall-through */ 449 case MLX5_RXQ_ERR_STATE_NEED_RESET: 450 sm.is_wq = 1; 451 sm.queue_id = rxq->idx; 452 sm.state = IBV_WQS_RESET; 453 if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm)) 454 return -1; 455 if (rxq_ctrl->dump_file_n < 456 RXQ_PORT(rxq_ctrl)->config.max_dump_files_num) { 457 MKSTR(err_str, "Unexpected CQE error syndrome " 458 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 459 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 460 rxq->cqn, rxq_ctrl->wqn, 461 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 462 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 463 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 464 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 465 mlx5_dump_debug_information(name, NULL, err_str, 0); 466 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 467 (const void *)((uintptr_t) 468 rxq->cqes), 469 sizeof(*u.cqe) * cqe_n); 470 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 471 (const void *)((uintptr_t) 472 rxq->wqes), 473 16 * wqe_n); 474 rxq_ctrl->dump_file_n++; 475 } 476 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 477 /* Fall-through */ 478 case MLX5_RXQ_ERR_STATE_NEED_READY: 479 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 480 if (ret == MLX5_CQE_STATUS_HW_OWN) { 481 rte_io_wmb(); 482 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 483 rte_io_wmb(); 484 /* 485 * The RQ consumer index must be zeroed while moving 486 * from RESET state to RDY state. 487 */ 488 *rxq->rq_db = rte_cpu_to_be_32(0); 489 rte_io_wmb(); 490 sm.is_wq = 1; 491 sm.queue_id = rxq->idx; 492 sm.state = IBV_WQS_RDY; 493 if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm)) 494 return -1; 495 if (vec) { 496 const uint32_t elts_n = 497 mlx5_rxq_mprq_enabled(rxq) ? 498 wqe_n * strd_n : wqe_n; 499 const uint32_t e_mask = elts_n - 1; 500 uint32_t elts_ci = 501 mlx5_rxq_mprq_enabled(rxq) ? 502 rxq->elts_ci : rxq->rq_ci; 503 uint32_t elt_idx; 504 struct rte_mbuf **elt; 505 int i; 506 unsigned int n = elts_n - (elts_ci - 507 rxq->rq_pi); 508 509 for (i = 0; i < (int)n; ++i) { 510 elt_idx = (elts_ci + i) & e_mask; 511 elt = &(*rxq->elts)[elt_idx]; 512 *elt = rte_mbuf_raw_alloc(rxq->mp); 513 if (!*elt) { 514 for (i--; i >= 0; --i) { 515 elt_idx = (elts_ci + 516 i) & elts_n; 517 elt = &(*rxq->elts) 518 [elt_idx]; 519 rte_pktmbuf_free_seg 520 (*elt); 521 } 522 return -1; 523 } 524 } 525 for (i = 0; i < (int)elts_n; ++i) { 526 elt = &(*rxq->elts)[i]; 527 DATA_LEN(*elt) = 528 (uint16_t)((*elt)->buf_len - 529 rte_pktmbuf_headroom(*elt)); 530 } 531 /* Padding with a fake mbuf for vec Rx. */ 532 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 533 (*rxq->elts)[elts_n + i] = 534 &rxq->fake_mbuf; 535 } 536 mlx5_rxq_initialize(rxq); 537 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 538 } 539 return ret; 540 default: 541 return -1; 542 } 543 } 544 545 /** 546 * Get size of the next packet for a given CQE. For compressed CQEs, the 547 * consumer index is updated only once all packets of the current one have 548 * been processed. 549 * 550 * @param rxq 551 * Pointer to RX queue. 552 * @param cqe 553 * CQE to process. 554 * @param[out] mcqe 555 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 556 * written. 557 * 558 * @return 559 * 0 in case of empty CQE, otherwise the packet size in bytes. 560 */ 561 static inline int 562 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 563 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 564 { 565 struct rxq_zip *zip = &rxq->zip; 566 uint16_t cqe_n = cqe_cnt + 1; 567 int len; 568 uint16_t idx, end; 569 570 do { 571 len = 0; 572 /* Process compressed data in the CQE and mini arrays. */ 573 if (zip->ai) { 574 volatile struct mlx5_mini_cqe8 (*mc)[8] = 575 (volatile struct mlx5_mini_cqe8 (*)[8]) 576 (uintptr_t)(&(*rxq->cqes)[zip->ca & 577 cqe_cnt].pkt_info); 578 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & 579 rxq->byte_mask); 580 *mcqe = &(*mc)[zip->ai & 7]; 581 if ((++zip->ai & 7) == 0) { 582 /* Invalidate consumed CQEs */ 583 idx = zip->ca; 584 end = zip->na; 585 while (idx != end) { 586 (*rxq->cqes)[idx & cqe_cnt].op_own = 587 MLX5_CQE_INVALIDATE; 588 ++idx; 589 } 590 /* 591 * Increment consumer index to skip the number 592 * of CQEs consumed. Hardware leaves holes in 593 * the CQ ring for software use. 594 */ 595 zip->ca = zip->na; 596 zip->na += 8; 597 } 598 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 599 /* Invalidate the rest */ 600 idx = zip->ca; 601 end = zip->cq_ci; 602 603 while (idx != end) { 604 (*rxq->cqes)[idx & cqe_cnt].op_own = 605 MLX5_CQE_INVALIDATE; 606 ++idx; 607 } 608 rxq->cq_ci = zip->cq_ci; 609 zip->ai = 0; 610 } 611 /* 612 * No compressed data, get next CQE and verify if it is 613 * compressed. 614 */ 615 } else { 616 int ret; 617 int8_t op_own; 618 uint32_t cq_ci; 619 620 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 621 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 622 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 623 rxq->err_state)) { 624 ret = mlx5_rx_err_handle(rxq, 0); 625 if (ret == MLX5_CQE_STATUS_HW_OWN || 626 ret == -1) 627 return 0; 628 } else { 629 return 0; 630 } 631 } 632 /* 633 * Introduce the local variable to have queue cq_ci 634 * index in queue structure always consistent with 635 * actual CQE boundary (not pointing to the middle 636 * of compressed CQE session). 637 */ 638 cq_ci = rxq->cq_ci + 1; 639 op_own = cqe->op_own; 640 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 641 volatile struct mlx5_mini_cqe8 (*mc)[8] = 642 (volatile struct mlx5_mini_cqe8 (*)[8]) 643 (uintptr_t)(&(*rxq->cqes) 644 [cq_ci & cqe_cnt].pkt_info); 645 646 /* Fix endianness. */ 647 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 648 /* 649 * Current mini array position is the one 650 * returned by check_cqe64(). 651 * 652 * If completion comprises several mini arrays, 653 * as a special case the second one is located 654 * 7 CQEs after the initial CQE instead of 8 655 * for subsequent ones. 656 */ 657 zip->ca = cq_ci; 658 zip->na = zip->ca + 7; 659 /* Compute the next non compressed CQE. */ 660 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 661 /* Get packet size to return. */ 662 len = rte_be_to_cpu_32((*mc)[0].byte_cnt & 663 rxq->byte_mask); 664 *mcqe = &(*mc)[0]; 665 zip->ai = 1; 666 /* Prefetch all to be invalidated */ 667 idx = zip->ca; 668 end = zip->cq_ci; 669 while (idx != end) { 670 rte_prefetch0(&(*rxq->cqes)[(idx) & 671 cqe_cnt]); 672 ++idx; 673 } 674 } else { 675 rxq->cq_ci = cq_ci; 676 len = rte_be_to_cpu_32(cqe->byte_cnt); 677 } 678 } 679 if (unlikely(rxq->err_state)) { 680 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 681 ++rxq->stats.idropped; 682 } else { 683 return len; 684 } 685 } while (1); 686 } 687 688 /** 689 * Translate RX completion flags to offload flags. 690 * 691 * @param[in] cqe 692 * Pointer to CQE. 693 * 694 * @return 695 * Offload flags (ol_flags) for struct rte_mbuf. 696 */ 697 static inline uint32_t 698 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 699 { 700 uint32_t ol_flags = 0; 701 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 702 703 ol_flags = 704 TRANSPOSE(flags, 705 MLX5_CQE_RX_L3_HDR_VALID, 706 RTE_MBUF_F_RX_IP_CKSUM_GOOD) | 707 TRANSPOSE(flags, 708 MLX5_CQE_RX_L4_HDR_VALID, 709 RTE_MBUF_F_RX_L4_CKSUM_GOOD); 710 return ol_flags; 711 } 712 713 /** 714 * Fill in mbuf fields from RX completion flags. 715 * Note that pkt->ol_flags should be initialized outside of this function. 716 * 717 * @param rxq 718 * Pointer to RX queue. 719 * @param pkt 720 * mbuf to fill. 721 * @param cqe 722 * CQE to process. 723 * @param rss_hash_res 724 * Packet RSS Hash result. 725 */ 726 static inline void 727 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 728 volatile struct mlx5_cqe *cqe, 729 volatile struct mlx5_mini_cqe8 *mcqe) 730 { 731 /* Update packet information. */ 732 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe); 733 pkt->port = unlikely(rxq->shared) ? cqe->user_index_low : rxq->port_id; 734 735 if (rxq->rss_hash) { 736 uint32_t rss_hash_res = 0; 737 738 /* If compressed, take hash result from mini-CQE. */ 739 if (mcqe == NULL || 740 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH) 741 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 742 else 743 rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result); 744 if (rss_hash_res) { 745 pkt->hash.rss = rss_hash_res; 746 pkt->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 747 } 748 } 749 if (rxq->mark) { 750 uint32_t mark = 0; 751 752 /* If compressed, take flow tag from mini-CQE. */ 753 if (mcqe == NULL || 754 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 755 mark = cqe->sop_drop_qpn; 756 else 757 mark = ((mcqe->byte_cnt_flow & 0xff) << 8) | 758 (mcqe->flow_tag_high << 16); 759 if (MLX5_FLOW_MARK_IS_VALID(mark)) { 760 pkt->ol_flags |= RTE_MBUF_F_RX_FDIR; 761 if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { 762 pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID; 763 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 764 } 765 } 766 } 767 if (rxq->dynf_meta) { 768 uint32_t meta = rte_be_to_cpu_32(cqe->flow_table_metadata) & 769 rxq->flow_meta_port_mask; 770 771 if (meta) { 772 pkt->ol_flags |= rxq->flow_meta_mask; 773 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, 774 uint32_t *) = meta; 775 } 776 } 777 if (rxq->csum) 778 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 779 if (rxq->vlan_strip) { 780 bool vlan_strip; 781 782 if (mcqe == NULL || 783 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 784 vlan_strip = cqe->hdr_type_etc & 785 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 786 else 787 vlan_strip = mcqe->hdr_type & 788 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 789 if (vlan_strip) { 790 pkt->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; 791 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 792 } 793 } 794 if (rxq->hw_timestamp) { 795 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 796 797 if (rxq->rt_timestamp) 798 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 799 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 800 pkt->ol_flags |= rxq->timestamp_rx_flag; 801 } 802 } 803 804 /** 805 * DPDK callback for RX. 806 * 807 * @param dpdk_rxq 808 * Generic pointer to RX queue structure. 809 * @param[out] pkts 810 * Array to store received packets. 811 * @param pkts_n 812 * Maximum number of packets in array. 813 * 814 * @return 815 * Number of packets successfully received (<= pkts_n). 816 */ 817 uint16_t 818 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 819 { 820 struct mlx5_rxq_data *rxq = dpdk_rxq; 821 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 822 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 823 const unsigned int sges_n = rxq->sges_n; 824 struct rte_mbuf *pkt = NULL; 825 struct rte_mbuf *seg = NULL; 826 volatile struct mlx5_cqe *cqe = 827 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 828 unsigned int i = 0; 829 unsigned int rq_ci = rxq->rq_ci << sges_n; 830 int len = 0; /* keep its value across iterations. */ 831 832 while (pkts_n) { 833 unsigned int idx = rq_ci & wqe_cnt; 834 volatile struct mlx5_wqe_data_seg *wqe = 835 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 836 struct rte_mbuf *rep = (*rxq->elts)[idx]; 837 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 838 839 if (pkt) 840 NEXT(seg) = rep; 841 seg = rep; 842 rte_prefetch0(seg); 843 rte_prefetch0(cqe); 844 rte_prefetch0(wqe); 845 /* Allocate the buf from the same pool. */ 846 rep = rte_mbuf_raw_alloc(seg->pool); 847 if (unlikely(rep == NULL)) { 848 ++rxq->stats.rx_nombuf; 849 if (!pkt) { 850 /* 851 * no buffers before we even started, 852 * bail out silently. 853 */ 854 break; 855 } 856 while (pkt != seg) { 857 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 858 rep = NEXT(pkt); 859 NEXT(pkt) = NULL; 860 NB_SEGS(pkt) = 1; 861 rte_mbuf_raw_free(pkt); 862 pkt = rep; 863 } 864 rq_ci >>= sges_n; 865 ++rq_ci; 866 rq_ci <<= sges_n; 867 break; 868 } 869 if (!pkt) { 870 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 871 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 872 if (!len) { 873 rte_mbuf_raw_free(rep); 874 break; 875 } 876 pkt = seg; 877 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 878 pkt->ol_flags &= RTE_MBUF_F_EXTERNAL; 879 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 880 if (rxq->crc_present) 881 len -= RTE_ETHER_CRC_LEN; 882 PKT_LEN(pkt) = len; 883 if (cqe->lro_num_seg > 1) { 884 mlx5_lro_update_hdr 885 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 886 mcqe, rxq, len); 887 pkt->ol_flags |= RTE_MBUF_F_RX_LRO; 888 pkt->tso_segsz = len / cqe->lro_num_seg; 889 } 890 } 891 DATA_LEN(rep) = DATA_LEN(seg); 892 PKT_LEN(rep) = PKT_LEN(seg); 893 SET_DATA_OFF(rep, DATA_OFF(seg)); 894 PORT(rep) = PORT(seg); 895 (*rxq->elts)[idx] = rep; 896 /* 897 * Fill NIC descriptor with the new buffer. The lkey and size 898 * of the buffers are already known, only the buffer address 899 * changes. 900 */ 901 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 902 /* If there's only one MR, no need to replace LKey in WQE. */ 903 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 904 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 905 if (len > DATA_LEN(seg)) { 906 len -= DATA_LEN(seg); 907 ++NB_SEGS(pkt); 908 ++rq_ci; 909 continue; 910 } 911 DATA_LEN(seg) = len; 912 #ifdef MLX5_PMD_SOFT_COUNTERS 913 /* Increment bytes counter. */ 914 rxq->stats.ibytes += PKT_LEN(pkt); 915 #endif 916 /* Return packet. */ 917 *(pkts++) = pkt; 918 pkt = NULL; 919 --pkts_n; 920 ++i; 921 /* Align consumer index to the next stride. */ 922 rq_ci >>= sges_n; 923 ++rq_ci; 924 rq_ci <<= sges_n; 925 } 926 if (unlikely(i == 0 && ((rq_ci >> sges_n) == rxq->rq_ci))) 927 return 0; 928 /* Update the consumer index. */ 929 rxq->rq_ci = rq_ci >> sges_n; 930 rte_io_wmb(); 931 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 932 rte_io_wmb(); 933 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 934 #ifdef MLX5_PMD_SOFT_COUNTERS 935 /* Increment packets counter. */ 936 rxq->stats.ipackets += i; 937 #endif 938 return i; 939 } 940 941 /** 942 * Update LRO packet TCP header. 943 * The HW LRO feature doesn't update the TCP header after coalescing the 944 * TCP segments but supplies information in CQE to fill it by SW. 945 * 946 * @param tcp 947 * Pointer to the TCP header. 948 * @param cqe 949 * Pointer to the completion entry. 950 * @param phcsum 951 * The L3 pseudo-header checksum. 952 */ 953 static inline void 954 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 955 volatile struct mlx5_cqe *__rte_restrict cqe, 956 uint32_t phcsum, uint8_t l4_type) 957 { 958 /* 959 * The HW calculates only the TCP payload checksum, need to complete 960 * the TCP header checksum and the L3 pseudo-header checksum. 961 */ 962 uint32_t csum = phcsum + cqe->csum; 963 964 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 965 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 966 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 967 tcp->recv_ack = cqe->lro_ack_seq_num; 968 tcp->rx_win = cqe->lro_tcp_win; 969 } 970 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 971 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 972 tcp->cksum = 0; 973 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 974 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 975 csum = (~csum) & 0xffff; 976 if (csum == 0) 977 csum = 0xffff; 978 tcp->cksum = csum; 979 } 980 981 /** 982 * Update LRO packet headers. 983 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 984 * TCP segments but supply information in CQE to fill it by SW. 985 * 986 * @param padd 987 * The packet address. 988 * @param cqe 989 * Pointer to the completion entry. 990 * @param len 991 * The packet length. 992 */ 993 static inline void 994 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 995 volatile struct mlx5_cqe *__rte_restrict cqe, 996 volatile struct mlx5_mini_cqe8 *mcqe, 997 struct mlx5_rxq_data *rxq, uint32_t len) 998 { 999 union { 1000 struct rte_ether_hdr *eth; 1001 struct rte_vlan_hdr *vlan; 1002 struct rte_ipv4_hdr *ipv4; 1003 struct rte_ipv6_hdr *ipv6; 1004 struct rte_tcp_hdr *tcp; 1005 uint8_t *hdr; 1006 } h = { 1007 .hdr = padd, 1008 }; 1009 uint16_t proto = h.eth->ether_type; 1010 uint32_t phcsum; 1011 uint8_t l4_type; 1012 1013 h.eth++; 1014 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1015 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1016 proto = h.vlan->eth_proto; 1017 h.vlan++; 1018 } 1019 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1020 h.ipv4->time_to_live = cqe->lro_min_ttl; 1021 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1022 h.ipv4->hdr_checksum = 0; 1023 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1024 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1025 h.ipv4++; 1026 } else { 1027 h.ipv6->hop_limits = cqe->lro_min_ttl; 1028 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1029 sizeof(*h.ipv6)); 1030 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1031 h.ipv6++; 1032 } 1033 if (mcqe == NULL || 1034 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1035 l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1036 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1037 else 1038 l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) & 1039 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1040 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type); 1041 } 1042 1043 void 1044 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1045 { 1046 mlx5_mprq_buf_free_cb(NULL, buf); 1047 } 1048 1049 /** 1050 * DPDK callback for RX with Multi-Packet RQ support. 1051 * 1052 * @param dpdk_rxq 1053 * Generic pointer to RX queue structure. 1054 * @param[out] pkts 1055 * Array to store received packets. 1056 * @param pkts_n 1057 * Maximum number of packets in array. 1058 * 1059 * @return 1060 * Number of packets successfully received (<= pkts_n). 1061 */ 1062 uint16_t 1063 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1064 { 1065 struct mlx5_rxq_data *rxq = dpdk_rxq; 1066 const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); 1067 const uint32_t strd_sz = RTE_BIT32(rxq->log_strd_sz); 1068 const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; 1069 const uint32_t wq_mask = (1 << rxq->elts_n) - 1; 1070 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1071 unsigned int i = 0; 1072 uint32_t rq_ci = rxq->rq_ci; 1073 uint16_t consumed_strd = rxq->consumed_strd; 1074 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1075 1076 while (i < pkts_n) { 1077 struct rte_mbuf *pkt; 1078 int ret; 1079 uint32_t len; 1080 uint16_t strd_cnt; 1081 uint16_t strd_idx; 1082 uint32_t byte_cnt; 1083 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1084 enum mlx5_rqx_code rxq_code; 1085 1086 if (consumed_strd == strd_n) { 1087 /* Replace WQE if the buffer is still in use. */ 1088 mprq_buf_replace(rxq, rq_ci & wq_mask); 1089 /* Advance to the next WQE. */ 1090 consumed_strd = 0; 1091 ++rq_ci; 1092 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1093 } 1094 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1095 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1096 if (!ret) 1097 break; 1098 byte_cnt = ret; 1099 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1100 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1101 if (rxq->crc_present) 1102 len -= RTE_ETHER_CRC_LEN; 1103 if (mcqe && 1104 rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1105 strd_cnt = (len / strd_sz) + !!(len % strd_sz); 1106 else 1107 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1108 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1109 MLX5_ASSERT(strd_cnt); 1110 consumed_strd += strd_cnt; 1111 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1112 continue; 1113 strd_idx = rte_be_to_cpu_16(mcqe == NULL ? 1114 cqe->wqe_counter : 1115 mcqe->stride_idx); 1116 MLX5_ASSERT(strd_idx < strd_n); 1117 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1118 wq_mask)); 1119 pkt = rte_pktmbuf_alloc(rxq->mp); 1120 if (unlikely(pkt == NULL)) { 1121 ++rxq->stats.rx_nombuf; 1122 break; 1123 } 1124 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1125 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1126 if (rxq->crc_present) 1127 len -= RTE_ETHER_CRC_LEN; 1128 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, 1129 strd_idx, strd_cnt); 1130 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 1131 rte_pktmbuf_free_seg(pkt); 1132 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 1133 ++rxq->stats.idropped; 1134 continue; 1135 } 1136 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 1137 ++rxq->stats.rx_nombuf; 1138 break; 1139 } 1140 } 1141 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1142 if (cqe->lro_num_seg > 1) { 1143 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), 1144 cqe, mcqe, rxq, len); 1145 pkt->ol_flags |= RTE_MBUF_F_RX_LRO; 1146 pkt->tso_segsz = len / cqe->lro_num_seg; 1147 } 1148 PKT_LEN(pkt) = len; 1149 PORT(pkt) = rxq->port_id; 1150 #ifdef MLX5_PMD_SOFT_COUNTERS 1151 /* Increment bytes counter. */ 1152 rxq->stats.ibytes += PKT_LEN(pkt); 1153 #endif 1154 /* Return packet. */ 1155 *(pkts++) = pkt; 1156 ++i; 1157 } 1158 /* Update the consumer indexes. */ 1159 rxq->consumed_strd = consumed_strd; 1160 rte_io_wmb(); 1161 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1162 if (rq_ci != rxq->rq_ci) { 1163 rxq->rq_ci = rq_ci; 1164 rte_io_wmb(); 1165 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1166 } 1167 #ifdef MLX5_PMD_SOFT_COUNTERS 1168 /* Increment packets counter. */ 1169 rxq->stats.ipackets += i; 1170 #endif 1171 return i; 1172 } 1173 1174 /* 1175 * Vectorized Rx routines are not compiled in when required vector instructions 1176 * are not supported on a target architecture. 1177 * The following null stubs are needed for linkage when those are not included 1178 * outside of this file (e.g. mlx5_rxtx_vec_sse.c for x86). 1179 */ 1180 1181 __rte_weak uint16_t 1182 mlx5_rx_burst_vec(void *dpdk_rxq __rte_unused, 1183 struct rte_mbuf **pkts __rte_unused, 1184 uint16_t pkts_n __rte_unused) 1185 { 1186 return 0; 1187 } 1188 1189 __rte_weak uint16_t 1190 mlx5_rx_burst_mprq_vec(void *dpdk_rxq __rte_unused, 1191 struct rte_mbuf **pkts __rte_unused, 1192 uint16_t pkts_n __rte_unused) 1193 { 1194 return 0; 1195 } 1196 1197 __rte_weak int 1198 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1199 { 1200 return -ENOTSUP; 1201 } 1202 1203 __rte_weak int 1204 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1205 { 1206 return -ENOTSUP; 1207 } 1208 1209 int 1210 mlx5_rx_queue_lwm_query(struct rte_eth_dev *dev, 1211 uint16_t *queue_id, uint8_t *lwm) 1212 { 1213 struct mlx5_priv *priv = dev->data->dev_private; 1214 unsigned int rxq_id, found = 0, n; 1215 struct mlx5_rxq_priv *rxq; 1216 1217 if (!queue_id) 1218 return -EINVAL; 1219 /* Query all the Rx queues of the port in a circular way. */ 1220 for (rxq_id = *queue_id, n = 0; n < priv->rxqs_n; n++) { 1221 rxq = mlx5_rxq_get(dev, rxq_id); 1222 if (rxq && rxq->lwm_event_pending) { 1223 pthread_mutex_lock(&priv->sh->lwm_config_lock); 1224 rxq->lwm_event_pending = 0; 1225 pthread_mutex_unlock(&priv->sh->lwm_config_lock); 1226 *queue_id = rxq_id; 1227 found = 1; 1228 if (lwm) 1229 *lwm = mlx5_rxq_lwm_to_percentage(rxq); 1230 break; 1231 } 1232 rxq_id = (rxq_id + 1) % priv->rxqs_n; 1233 } 1234 return found; 1235 } 1236 1237 /** 1238 * Rte interrupt handler for LWM event. 1239 * It first checks if the event arrives, if so process the callback for 1240 * RTE_ETH_EVENT_RX_LWM. 1241 * 1242 * @param args 1243 * Generic pointer to mlx5_priv. 1244 */ 1245 void 1246 mlx5_dev_interrupt_handler_lwm(void *args) 1247 { 1248 struct mlx5_priv *priv = args; 1249 struct mlx5_rxq_priv *rxq; 1250 struct rte_eth_dev *dev; 1251 int ret, rxq_idx = 0, port_id = 0; 1252 1253 ret = priv->obj_ops.rxq_event_get_lwm(priv, &rxq_idx, &port_id); 1254 if (unlikely(ret < 0)) { 1255 DRV_LOG(WARNING, "Cannot get LWM event context."); 1256 return; 1257 } 1258 DRV_LOG(INFO, "%s get LWM event, port_id:%d rxq_id:%d.", __func__, 1259 port_id, rxq_idx); 1260 dev = &rte_eth_devices[port_id]; 1261 rxq = mlx5_rxq_get(dev, rxq_idx); 1262 if (rxq) { 1263 pthread_mutex_lock(&priv->sh->lwm_config_lock); 1264 rxq->lwm_event_pending = 1; 1265 pthread_mutex_unlock(&priv->sh->lwm_config_lock); 1266 } 1267 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_RX_AVAIL_THRESH, NULL); 1268 } 1269 1270 /** 1271 * DPDK callback to arm an Rx queue LWM(limit watermark) event. 1272 * While the Rx queue fullness reaches the LWM limit, the driver catches 1273 * an HW event and invokes the user event callback. 1274 * After the last event handling, the user needs to call this API again 1275 * to arm an additional event. 1276 * 1277 * @param dev 1278 * Pointer to the device structure. 1279 * @param[in] rx_queue_id 1280 * Rx queue identificator. 1281 * @param[in] lwm 1282 * The LWM value, is defined by a percentage of the Rx queue size. 1283 * [1-99] to set a new LWM (update the old value). 1284 * 0 to unarm the event. 1285 * 1286 * @return 1287 * 0 : operation success. 1288 * Otherwise: 1289 * - ENOMEM - not enough memory to create LWM event channel. 1290 * - EINVAL - the input Rxq is not created by devx. 1291 * - E2BIG - lwm is bigger than 99. 1292 */ 1293 int 1294 mlx5_rx_queue_lwm_set(struct rte_eth_dev *dev, uint16_t rx_queue_id, 1295 uint8_t lwm) 1296 { 1297 struct mlx5_priv *priv = dev->data->dev_private; 1298 uint16_t port_id = PORT_ID(priv); 1299 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1300 uint16_t event_nums[1] = {MLX5_EVENT_TYPE_SRQ_LIMIT_REACHED}; 1301 struct mlx5_rxq_data *rxq_data; 1302 uint32_t wqe_cnt; 1303 uint64_t cookie; 1304 int ret = 0; 1305 1306 if (!rxq) { 1307 rte_errno = EINVAL; 1308 return -rte_errno; 1309 } 1310 rxq_data = &rxq->ctrl->rxq; 1311 /* Ensure the Rq is created by devx. */ 1312 if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) { 1313 rte_errno = EINVAL; 1314 return -rte_errno; 1315 } 1316 if (lwm > 99) { 1317 DRV_LOG(WARNING, "Too big LWM configuration."); 1318 rte_errno = E2BIG; 1319 return -rte_errno; 1320 } 1321 /* Start config LWM. */ 1322 pthread_mutex_lock(&priv->sh->lwm_config_lock); 1323 if (rxq->lwm == 0 && lwm == 0) { 1324 /* Both old/new values are 0, do nothing. */ 1325 ret = 0; 1326 goto end; 1327 } 1328 wqe_cnt = 1 << (rxq_data->elts_n - rxq_data->sges_n); 1329 if (lwm) { 1330 if (!priv->sh->devx_channel_lwm) { 1331 ret = mlx5_lwm_setup(priv); 1332 if (ret) { 1333 DRV_LOG(WARNING, 1334 "Failed to create shared_lwm."); 1335 rte_errno = ENOMEM; 1336 ret = -rte_errno; 1337 goto end; 1338 } 1339 } 1340 if (!rxq->lwm_devx_subscribed) { 1341 cookie = ((uint32_t) 1342 (port_id << LWM_COOKIE_PORTID_OFFSET)) | 1343 (rx_queue_id << LWM_COOKIE_RXQID_OFFSET); 1344 ret = mlx5_os_devx_subscribe_devx_event 1345 (priv->sh->devx_channel_lwm, 1346 rxq->devx_rq.rq->obj, 1347 sizeof(event_nums), 1348 event_nums, 1349 cookie); 1350 if (ret) { 1351 rte_errno = rte_errno ? rte_errno : EINVAL; 1352 ret = -rte_errno; 1353 goto end; 1354 } 1355 rxq->lwm_devx_subscribed = 1; 1356 } 1357 } 1358 /* Save LWM to rxq and send modify_rq devx command. */ 1359 rxq->lwm = lwm * wqe_cnt / 100; 1360 /* Prevent integer division loss when switch lwm number to percentage. */ 1361 if (lwm && (lwm * wqe_cnt % 100)) { 1362 rxq->lwm = ((uint32_t)(rxq->lwm + 1) >= wqe_cnt) ? 1363 rxq->lwm : (rxq->lwm + 1); 1364 } 1365 if (lwm && !rxq->lwm) { 1366 /* With mprq, wqe_cnt may be < 100. */ 1367 DRV_LOG(WARNING, "Too small LWM configuration."); 1368 rte_errno = EINVAL; 1369 ret = -rte_errno; 1370 goto end; 1371 } 1372 ret = mlx5_devx_modify_rq(rxq, MLX5_RXQ_MOD_RDY2RDY); 1373 end: 1374 pthread_mutex_unlock(&priv->sh->lwm_config_lock); 1375 return ret; 1376 } 1377 1378 /** 1379 * Mlx5 access register function to configure host shaper. 1380 * It calls API in libmtcr_ul to access QSHR(Qos Shaper Host Register) 1381 * in firmware. 1382 * 1383 * @param dev 1384 * Pointer to rte_eth_dev. 1385 * @param lwm_triggered 1386 * Flag to enable/disable lwm_triggered bit in QSHR. 1387 * @param rate 1388 * Host shaper rate, unit is 100Mbps, set to 0 means disable the shaper. 1389 * @return 1390 * 0 : operation success. 1391 * Otherwise: 1392 * - ENOENT - no ibdev interface. 1393 * - EBUSY - the register access unit is busy. 1394 * - EIO - the register access command meets IO error. 1395 */ 1396 static int 1397 mlxreg_host_shaper_config(struct rte_eth_dev *dev, 1398 bool lwm_triggered, uint8_t rate) 1399 { 1400 #ifdef HAVE_MLX5_MSTFLINT 1401 struct mlx5_priv *priv = dev->data->dev_private; 1402 uint32_t data[MLX5_ST_SZ_DW(register_qshr)] = {0}; 1403 int rc, retry_count = 3; 1404 mfile *mf = NULL; 1405 int status; 1406 void *ptr; 1407 1408 mf = mopen(priv->sh->ibdev_name); 1409 if (!mf) { 1410 DRV_LOG(WARNING, "mopen failed\n"); 1411 rte_errno = ENOENT; 1412 return -rte_errno; 1413 } 1414 MLX5_SET(register_qshr, data, connected_host, 1); 1415 MLX5_SET(register_qshr, data, fast_response, lwm_triggered ? 1 : 0); 1416 MLX5_SET(register_qshr, data, local_port, 1); 1417 ptr = MLX5_ADDR_OF(register_qshr, data, global_config); 1418 MLX5_SET(ets_global_config_register, ptr, rate_limit_update, 1); 1419 MLX5_SET(ets_global_config_register, ptr, max_bw_units, 1420 rate ? ETS_GLOBAL_CONFIG_BW_UNIT_HUNDREDS_MBPS : 1421 ETS_GLOBAL_CONFIG_BW_UNIT_DISABLED); 1422 MLX5_SET(ets_global_config_register, ptr, max_bw_value, rate); 1423 do { 1424 rc = maccess_reg(mf, 1425 MLX5_QSHR_REGISTER_ID, 1426 MACCESS_REG_METHOD_SET, 1427 (u_int32_t *)&data[0], 1428 sizeof(data), 1429 sizeof(data), 1430 sizeof(data), 1431 &status); 1432 if ((rc != ME_ICMD_STATUS_IFC_BUSY && 1433 status != ME_REG_ACCESS_BAD_PARAM) || 1434 !(mf->flags & MDEVS_REM)) { 1435 break; 1436 } 1437 DRV_LOG(WARNING, "%s retry.", __func__); 1438 usleep(10000); 1439 } while (retry_count-- > 0); 1440 mclose(mf); 1441 rte_errno = (rc == ME_REG_ACCESS_DEV_BUSY) ? EBUSY : EIO; 1442 return rc ? -rte_errno : 0; 1443 #else 1444 (void)dev; 1445 (void)lwm_triggered; 1446 (void)rate; 1447 return -1; 1448 #endif 1449 } 1450 1451 int rte_pmd_mlx5_host_shaper_config(int port_id, uint8_t rate, 1452 uint32_t flags) 1453 { 1454 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 1455 struct mlx5_priv *priv = dev->data->dev_private; 1456 bool lwm_triggered = 1457 !!(flags & RTE_BIT32(MLX5_HOST_SHAPER_FLAG_AVAIL_THRESH_TRIGGERED)); 1458 1459 if (!lwm_triggered) { 1460 priv->sh->host_shaper_rate = rate; 1461 } else { 1462 switch (rate) { 1463 case 0: 1464 /* Rate 0 means disable lwm_triggered. */ 1465 priv->sh->lwm_triggered = 0; 1466 break; 1467 case 1: 1468 /* Rate 1 means enable lwm_triggered. */ 1469 priv->sh->lwm_triggered = 1; 1470 break; 1471 default: 1472 return -ENOTSUP; 1473 } 1474 } 1475 return mlxreg_host_shaper_config(dev, priv->sh->lwm_triggered, 1476 priv->sh->host_shaper_rate); 1477 } 1478