1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 #include <mlx5_common_mr.h> 22 #include <rte_pmd_mlx5.h> 23 24 #include "mlx5_autoconf.h" 25 #include "mlx5_defs.h" 26 #include "mlx5.h" 27 #include "mlx5_utils.h" 28 #include "mlx5_rxtx.h" 29 #include "mlx5_devx.h" 30 #include "mlx5_rx.h" 31 #ifdef HAVE_MLX5_MSTFLINT 32 #include <mstflint/mtcr.h> 33 #endif 34 35 36 static __rte_always_inline uint32_t 37 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 38 volatile struct mlx5_mini_cqe8 *mcqe); 39 40 static __rte_always_inline int 41 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 42 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe); 43 44 static __rte_always_inline uint32_t 45 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 46 47 static __rte_always_inline void 48 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 49 volatile struct mlx5_cqe *cqe, 50 volatile struct mlx5_mini_cqe8 *mcqe); 51 52 static inline void 53 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 54 volatile struct mlx5_cqe *__rte_restrict cqe, 55 uint32_t phcsum, uint8_t l4_type); 56 57 static inline void 58 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 59 volatile struct mlx5_cqe *__rte_restrict cqe, 60 volatile struct mlx5_mini_cqe8 *mcqe, 61 struct mlx5_rxq_data *rxq, uint32_t len); 62 63 64 /** 65 * Internal function to compute the number of used descriptors in an RX queue. 66 * 67 * @param rxq 68 * The Rx queue. 69 * 70 * @return 71 * The number of used Rx descriptor. 72 */ 73 static uint32_t 74 rx_queue_count(struct mlx5_rxq_data *rxq) 75 { 76 struct rxq_zip *zip = &rxq->zip; 77 volatile struct mlx5_cqe *cqe; 78 const unsigned int cqe_n = (1 << rxq->cqe_n); 79 const unsigned int sges_n = (1 << rxq->sges_n); 80 const unsigned int elts_n = (1 << rxq->elts_n); 81 const unsigned int strd_n = RTE_BIT32(rxq->log_strd_num); 82 const unsigned int cqe_cnt = cqe_n - 1; 83 unsigned int cq_ci, used; 84 85 /* if we are processing a compressed cqe */ 86 if (zip->ai) { 87 used = zip->cqe_cnt - zip->ai; 88 cq_ci = zip->cq_ci; 89 } else { 90 used = 0; 91 cq_ci = rxq->cq_ci; 92 } 93 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 94 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 95 int8_t op_own; 96 unsigned int n; 97 98 op_own = cqe->op_own; 99 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 100 n = rte_be_to_cpu_32(cqe->byte_cnt); 101 else 102 n = 1; 103 cq_ci += n; 104 used += n; 105 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 106 } 107 used = RTE_MIN(used * sges_n, elts_n * strd_n); 108 return used; 109 } 110 111 /** 112 * DPDK callback to check the status of a Rx descriptor. 113 * 114 * @param rx_queue 115 * The Rx queue. 116 * @param[in] offset 117 * The index of the descriptor in the ring. 118 * 119 * @return 120 * The status of the Rx descriptor. 121 */ 122 int 123 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 124 { 125 struct mlx5_rxq_data *rxq = rx_queue; 126 127 if (offset >= (1 << rxq->cqe_n)) { 128 rte_errno = EINVAL; 129 return -rte_errno; 130 } 131 if (offset < rx_queue_count(rxq)) 132 return RTE_ETH_RX_DESC_DONE; 133 return RTE_ETH_RX_DESC_AVAIL; 134 } 135 136 /* Get rxq lwm percentage according to lwm number. */ 137 static uint8_t 138 mlx5_rxq_lwm_to_percentage(struct mlx5_rxq_priv *rxq) 139 { 140 struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq; 141 uint32_t wqe_cnt = 1 << (rxq_data->elts_n - rxq_data->sges_n); 142 143 return rxq->lwm * 100 / wqe_cnt; 144 } 145 146 /** 147 * DPDK callback to get the RX queue information. 148 * 149 * @param dev 150 * Pointer to the device structure. 151 * 152 * @param rx_queue_id 153 * Rx queue identificator. 154 * 155 * @param qinfo 156 * Pointer to the RX queue information structure. 157 * 158 * @return 159 * None. 160 */ 161 162 void 163 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 164 struct rte_eth_rxq_info *qinfo) 165 { 166 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, rx_queue_id); 167 struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, rx_queue_id); 168 struct mlx5_rxq_priv *rxq_priv = mlx5_rxq_get(dev, rx_queue_id); 169 170 if (!rxq) 171 return; 172 qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ? 173 rxq->mprq_mp : rxq->mp; 174 qinfo->conf.rx_thresh.pthresh = 0; 175 qinfo->conf.rx_thresh.hthresh = 0; 176 qinfo->conf.rx_thresh.wthresh = 0; 177 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 178 qinfo->conf.rx_drop_en = 1; 179 if (rxq_ctrl == NULL || rxq_ctrl->obj == NULL) 180 qinfo->conf.rx_deferred_start = 0; 181 else 182 qinfo->conf.rx_deferred_start = 1; 183 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 184 qinfo->scattered_rx = dev->data->scattered_rx; 185 qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ? 186 RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) : 187 RTE_BIT32(rxq->elts_n); 188 qinfo->avail_thresh = rxq_priv ? 189 mlx5_rxq_lwm_to_percentage(rxq_priv) : 0; 190 } 191 192 /** 193 * DPDK callback to get the RX packet burst mode information. 194 * 195 * @param dev 196 * Pointer to the device structure. 197 * 198 * @param rx_queue_id 199 * Rx queue identification. 200 * 201 * @param mode 202 * Pointer to the burts mode information. 203 * 204 * @return 205 * 0 as success, -EINVAL as failure. 206 */ 207 int 208 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 209 uint16_t rx_queue_id __rte_unused, 210 struct rte_eth_burst_mode *mode) 211 { 212 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 213 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 214 215 if (!rxq) { 216 rte_errno = EINVAL; 217 return -rte_errno; 218 } 219 if (pkt_burst == mlx5_rx_burst) { 220 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 221 } else if (pkt_burst == mlx5_rx_burst_mprq) { 222 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 223 } else if (pkt_burst == mlx5_rx_burst_vec) { 224 #if defined RTE_ARCH_X86_64 225 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 226 #elif defined RTE_ARCH_ARM64 227 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 228 #elif defined RTE_ARCH_PPC_64 229 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 230 #else 231 return -EINVAL; 232 #endif 233 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { 234 #if defined RTE_ARCH_X86_64 235 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); 236 #elif defined RTE_ARCH_ARM64 237 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); 238 #elif defined RTE_ARCH_PPC_64 239 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); 240 #else 241 return -EINVAL; 242 #endif 243 } else { 244 return -EINVAL; 245 } 246 return 0; 247 } 248 249 /** 250 * DPDK callback to get the number of used descriptors in a RX queue. 251 * 252 * @param rx_queue 253 * The Rx queue pointer. 254 * 255 * @return 256 * The number of used rx descriptor. 257 * -EINVAL if the queue is invalid 258 */ 259 uint32_t 260 mlx5_rx_queue_count(void *rx_queue) 261 { 262 struct mlx5_rxq_data *rxq = rx_queue; 263 struct rte_eth_dev *dev; 264 265 if (!rxq) { 266 rte_errno = EINVAL; 267 return -rte_errno; 268 } 269 270 dev = &rte_eth_devices[rxq->port_id]; 271 272 if (dev->rx_pkt_burst == NULL || 273 dev->rx_pkt_burst == rte_eth_pkt_burst_dummy) { 274 rte_errno = ENOTSUP; 275 return -rte_errno; 276 } 277 278 return rx_queue_count(rxq); 279 } 280 281 #define CLB_VAL_IDX 0 282 #define CLB_MSK_IDX 1 283 static int 284 mlx5_monitor_callback(const uint64_t value, 285 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ]) 286 { 287 const uint64_t m = opaque[CLB_MSK_IDX]; 288 const uint64_t v = opaque[CLB_VAL_IDX]; 289 290 return (value & m) == v ? -1 : 0; 291 } 292 293 int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) 294 { 295 struct mlx5_rxq_data *rxq = rx_queue; 296 const unsigned int cqe_num = 1 << rxq->cqe_n; 297 const unsigned int cqe_mask = cqe_num - 1; 298 const uint16_t idx = rxq->cq_ci & cqe_num; 299 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 300 301 if (unlikely(rxq->cqes == NULL)) { 302 rte_errno = EINVAL; 303 return -rte_errno; 304 } 305 pmc->addr = &cqe->op_own; 306 pmc->opaque[CLB_VAL_IDX] = !!idx; 307 pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_OWNER_MASK; 308 pmc->fn = mlx5_monitor_callback; 309 pmc->size = sizeof(uint8_t); 310 return 0; 311 } 312 313 /** 314 * Translate RX completion flags to packet type. 315 * 316 * @param[in] rxq 317 * Pointer to RX queue structure. 318 * @param[in] cqe 319 * Pointer to CQE. 320 * 321 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 322 * 323 * @return 324 * Packet type for struct rte_mbuf. 325 */ 326 static inline uint32_t 327 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 328 volatile struct mlx5_mini_cqe8 *mcqe) 329 { 330 uint8_t idx; 331 uint8_t ptype; 332 uint8_t pinfo = (cqe->pkt_info & 0x3) << 6; 333 334 /* Get l3/l4 header from mini-CQE in case L3/L4 format*/ 335 if (mcqe == NULL || 336 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 337 ptype = (cqe->hdr_type_etc & 0xfc00) >> 10; 338 else 339 ptype = mcqe->hdr_type >> 2; 340 /* 341 * The index to the array should have: 342 * bit[1:0] = l3_hdr_type 343 * bit[4:2] = l4_hdr_type 344 * bit[5] = ip_frag 345 * bit[6] = tunneled 346 * bit[7] = outer_l3_type 347 */ 348 idx = pinfo | ptype; 349 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 350 } 351 352 /** 353 * Initialize Rx WQ and indexes. 354 * 355 * @param[in] rxq 356 * Pointer to RX queue structure. 357 */ 358 void 359 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 360 { 361 const unsigned int wqe_n = 1 << rxq->elts_n; 362 unsigned int i; 363 364 for (i = 0; (i != wqe_n); ++i) { 365 volatile struct mlx5_wqe_data_seg *scat; 366 uintptr_t addr; 367 uint32_t byte_count; 368 uint32_t lkey; 369 370 if (mlx5_rxq_mprq_enabled(rxq)) { 371 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 372 373 scat = &((volatile struct mlx5_wqe_mprq *) 374 rxq->wqes)[i].dseg; 375 addr = (uintptr_t)mlx5_mprq_buf_addr 376 (buf, RTE_BIT32(rxq->log_strd_num)); 377 byte_count = RTE_BIT32(rxq->log_strd_sz) * 378 RTE_BIT32(rxq->log_strd_num); 379 lkey = mlx5_rx_addr2mr(rxq, addr); 380 } else { 381 struct rte_mbuf *buf = (*rxq->elts)[i]; 382 383 scat = &((volatile struct mlx5_wqe_data_seg *) 384 rxq->wqes)[i]; 385 addr = rte_pktmbuf_mtod(buf, uintptr_t); 386 byte_count = DATA_LEN(buf); 387 lkey = mlx5_rx_mb2mr(rxq, buf); 388 } 389 /* scat->addr must be able to store a pointer. */ 390 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 391 *scat = (struct mlx5_wqe_data_seg){ 392 .addr = rte_cpu_to_be_64(addr), 393 .byte_count = rte_cpu_to_be_32(byte_count), 394 .lkey = lkey, 395 }; 396 } 397 rxq->consumed_strd = 0; 398 rxq->decompressed = 0; 399 rxq->rq_pi = 0; 400 rxq->zip = (struct rxq_zip){ 401 .ai = 0, 402 }; 403 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? 404 (wqe_n >> rxq->sges_n) * RTE_BIT32(rxq->log_strd_num) : 0; 405 /* Update doorbell counter. */ 406 rxq->rq_ci = wqe_n >> rxq->sges_n; 407 rte_io_wmb(); 408 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 409 } 410 411 /* Must be negative. */ 412 #define MLX5_ERROR_CQE_RET (-1) 413 /* Must not be negative. */ 414 #define MLX5_RECOVERY_ERROR_RET 0 415 416 /** 417 * Handle a Rx error. 418 * The function inserts the RQ state to reset when the first error CQE is 419 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 420 * it moves the RQ state to ready and initializes the RQ. 421 * Next CQE identification and error counting are in the caller responsibility. 422 * 423 * @param[in] rxq 424 * Pointer to RX queue structure. 425 * @param[in] vec 426 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 427 * 0 when called from non-vectorized Rx burst. 428 * 429 * @return 430 * MLX5_RECOVERY_ERROR_RET in case of recovery error, otherwise the CQE status. 431 */ 432 int 433 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec) 434 { 435 const uint16_t cqe_n = 1 << rxq->cqe_n; 436 const uint16_t cqe_mask = cqe_n - 1; 437 const uint16_t wqe_n = 1 << rxq->elts_n; 438 const uint16_t strd_n = RTE_BIT32(rxq->log_strd_num); 439 struct mlx5_rxq_ctrl *rxq_ctrl = 440 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 441 union { 442 volatile struct mlx5_cqe *cqe; 443 volatile struct mlx5_err_cqe *err_cqe; 444 } u = { 445 .cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask], 446 }; 447 struct mlx5_mp_arg_queue_state_modify sm; 448 int ret; 449 450 switch (rxq->err_state) { 451 case MLX5_RXQ_ERR_STATE_NO_ERROR: 452 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 453 /* Fall-through */ 454 case MLX5_RXQ_ERR_STATE_NEED_RESET: 455 sm.is_wq = 1; 456 sm.queue_id = rxq->idx; 457 sm.state = IBV_WQS_RESET; 458 if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm)) 459 return MLX5_RECOVERY_ERROR_RET; 460 if (rxq_ctrl->dump_file_n < 461 RXQ_PORT(rxq_ctrl)->config.max_dump_files_num) { 462 MKSTR(err_str, "Unexpected CQE error syndrome " 463 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 464 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 465 rxq->cqn, rxq_ctrl->wqn, 466 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 467 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 468 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 469 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 470 mlx5_dump_debug_information(name, NULL, err_str, 0); 471 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 472 (const void *)((uintptr_t) 473 rxq->cqes), 474 sizeof(*u.cqe) * cqe_n); 475 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 476 (const void *)((uintptr_t) 477 rxq->wqes), 478 16 * wqe_n); 479 rxq_ctrl->dump_file_n++; 480 } 481 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 482 /* Fall-through */ 483 case MLX5_RXQ_ERR_STATE_NEED_READY: 484 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 485 if (ret == MLX5_CQE_STATUS_HW_OWN) { 486 rte_io_wmb(); 487 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 488 rte_io_wmb(); 489 /* 490 * The RQ consumer index must be zeroed while moving 491 * from RESET state to RDY state. 492 */ 493 *rxq->rq_db = rte_cpu_to_be_32(0); 494 rte_io_wmb(); 495 sm.is_wq = 1; 496 sm.queue_id = rxq->idx; 497 sm.state = IBV_WQS_RDY; 498 if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm)) 499 return MLX5_RECOVERY_ERROR_RET; 500 if (vec) { 501 const uint32_t elts_n = 502 mlx5_rxq_mprq_enabled(rxq) ? 503 wqe_n * strd_n : wqe_n; 504 const uint32_t e_mask = elts_n - 1; 505 uint32_t elts_ci = 506 mlx5_rxq_mprq_enabled(rxq) ? 507 rxq->elts_ci : rxq->rq_ci; 508 uint32_t elt_idx; 509 struct rte_mbuf **elt; 510 int i; 511 unsigned int n = elts_n - (elts_ci - 512 rxq->rq_pi); 513 514 for (i = 0; i < (int)n; ++i) { 515 elt_idx = (elts_ci + i) & e_mask; 516 elt = &(*rxq->elts)[elt_idx]; 517 *elt = rte_mbuf_raw_alloc(rxq->mp); 518 if (!*elt) { 519 for (i--; i >= 0; --i) { 520 elt_idx = (elts_ci + 521 i) & elts_n; 522 elt = &(*rxq->elts) 523 [elt_idx]; 524 rte_pktmbuf_free_seg 525 (*elt); 526 } 527 return MLX5_RECOVERY_ERROR_RET; 528 } 529 } 530 for (i = 0; i < (int)elts_n; ++i) { 531 elt = &(*rxq->elts)[i]; 532 DATA_LEN(*elt) = 533 (uint16_t)((*elt)->buf_len - 534 rte_pktmbuf_headroom(*elt)); 535 } 536 /* Padding with a fake mbuf for vec Rx. */ 537 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 538 (*rxq->elts)[elts_n + i] = 539 &rxq->fake_mbuf; 540 } 541 mlx5_rxq_initialize(rxq); 542 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 543 } 544 return ret; 545 default: 546 return MLX5_RECOVERY_ERROR_RET; 547 } 548 } 549 550 /** 551 * Get size of the next packet for a given CQE. For compressed CQEs, the 552 * consumer index is updated only once all packets of the current one have 553 * been processed. 554 * 555 * @param rxq 556 * Pointer to RX queue. 557 * @param cqe 558 * CQE to process. 559 * @param[out] mcqe 560 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 561 * written. 562 * 563 * @return 564 * 0 in case of empty CQE, MLX5_ERROR_CQE_RET in case of error CQE, 565 * otherwise the packet size in regular RxQ, and striding byte 566 * count format in mprq case. 567 */ 568 static inline int 569 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 570 uint16_t cqe_cnt, volatile struct mlx5_mini_cqe8 **mcqe) 571 { 572 struct rxq_zip *zip = &rxq->zip; 573 uint16_t cqe_n = cqe_cnt + 1; 574 int len; 575 uint16_t idx, end; 576 577 do { 578 len = 0; 579 /* Process compressed data in the CQE and mini arrays. */ 580 if (zip->ai) { 581 volatile struct mlx5_mini_cqe8 (*mc)[8] = 582 (volatile struct mlx5_mini_cqe8 (*)[8]) 583 (uintptr_t)(&(*rxq->cqes)[zip->ca & 584 cqe_cnt].pkt_info); 585 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & 586 rxq->byte_mask); 587 *mcqe = &(*mc)[zip->ai & 7]; 588 if ((++zip->ai & 7) == 0) { 589 /* Invalidate consumed CQEs */ 590 idx = zip->ca; 591 end = zip->na; 592 while (idx != end) { 593 (*rxq->cqes)[idx & cqe_cnt].op_own = 594 MLX5_CQE_INVALIDATE; 595 ++idx; 596 } 597 /* 598 * Increment consumer index to skip the number 599 * of CQEs consumed. Hardware leaves holes in 600 * the CQ ring for software use. 601 */ 602 zip->ca = zip->na; 603 zip->na += 8; 604 } 605 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 606 /* Invalidate the rest */ 607 idx = zip->ca; 608 end = zip->cq_ci; 609 610 while (idx != end) { 611 (*rxq->cqes)[idx & cqe_cnt].op_own = 612 MLX5_CQE_INVALIDATE; 613 ++idx; 614 } 615 rxq->cq_ci = zip->cq_ci; 616 zip->ai = 0; 617 } 618 /* 619 * No compressed data, get next CQE and verify if it is 620 * compressed. 621 */ 622 } else { 623 int ret; 624 int8_t op_own; 625 uint32_t cq_ci; 626 627 ret = check_cqe(cqe, cqe_n, rxq->cq_ci); 628 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 629 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 630 rxq->err_state)) { 631 ret = mlx5_rx_err_handle(rxq, 0); 632 if (ret == MLX5_CQE_STATUS_HW_OWN || 633 ret == MLX5_RECOVERY_ERROR_RET) 634 return MLX5_ERROR_CQE_RET; 635 } else { 636 return 0; 637 } 638 } 639 /* 640 * Introduce the local variable to have queue cq_ci 641 * index in queue structure always consistent with 642 * actual CQE boundary (not pointing to the middle 643 * of compressed CQE session). 644 */ 645 cq_ci = rxq->cq_ci + 1; 646 op_own = cqe->op_own; 647 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 648 volatile struct mlx5_mini_cqe8 (*mc)[8] = 649 (volatile struct mlx5_mini_cqe8 (*)[8]) 650 (uintptr_t)(&(*rxq->cqes) 651 [cq_ci & cqe_cnt].pkt_info); 652 653 /* Fix endianness. */ 654 zip->cqe_cnt = rte_be_to_cpu_32(cqe->byte_cnt); 655 /* 656 * Current mini array position is the one 657 * returned by check_cqe64(). 658 * 659 * If completion comprises several mini arrays, 660 * as a special case the second one is located 661 * 7 CQEs after the initial CQE instead of 8 662 * for subsequent ones. 663 */ 664 zip->ca = cq_ci; 665 zip->na = zip->ca + 7; 666 /* Compute the next non compressed CQE. */ 667 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 668 /* Get packet size to return. */ 669 len = rte_be_to_cpu_32((*mc)[0].byte_cnt & 670 rxq->byte_mask); 671 *mcqe = &(*mc)[0]; 672 zip->ai = 1; 673 /* Prefetch all to be invalidated */ 674 idx = zip->ca; 675 end = zip->cq_ci; 676 while (idx != end) { 677 rte_prefetch0(&(*rxq->cqes)[(idx) & 678 cqe_cnt]); 679 ++idx; 680 } 681 } else { 682 rxq->cq_ci = cq_ci; 683 len = rte_be_to_cpu_32(cqe->byte_cnt); 684 } 685 } 686 if (unlikely(rxq->err_state)) { 687 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 688 ++rxq->stats.idropped; 689 } else { 690 return len; 691 } 692 } while (1); 693 } 694 695 /** 696 * Translate RX completion flags to offload flags. 697 * 698 * @param[in] cqe 699 * Pointer to CQE. 700 * 701 * @return 702 * Offload flags (ol_flags) for struct rte_mbuf. 703 */ 704 static inline uint32_t 705 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 706 { 707 uint32_t ol_flags = 0; 708 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 709 710 ol_flags = 711 TRANSPOSE(flags, 712 MLX5_CQE_RX_L3_HDR_VALID, 713 RTE_MBUF_F_RX_IP_CKSUM_GOOD) | 714 TRANSPOSE(flags, 715 MLX5_CQE_RX_L4_HDR_VALID, 716 RTE_MBUF_F_RX_L4_CKSUM_GOOD); 717 return ol_flags; 718 } 719 720 /** 721 * Fill in mbuf fields from RX completion flags. 722 * Note that pkt->ol_flags should be initialized outside of this function. 723 * 724 * @param rxq 725 * Pointer to RX queue. 726 * @param pkt 727 * mbuf to fill. 728 * @param cqe 729 * CQE to process. 730 * @param rss_hash_res 731 * Packet RSS Hash result. 732 */ 733 static inline void 734 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 735 volatile struct mlx5_cqe *cqe, 736 volatile struct mlx5_mini_cqe8 *mcqe) 737 { 738 /* Update packet information. */ 739 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe); 740 pkt->port = unlikely(rxq->shared) ? cqe->user_index_low : rxq->port_id; 741 742 if (rxq->rss_hash) { 743 uint32_t rss_hash_res = 0; 744 745 /* If compressed, take hash result from mini-CQE. */ 746 if (mcqe == NULL || 747 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH) 748 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 749 else 750 rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result); 751 if (rss_hash_res) { 752 pkt->hash.rss = rss_hash_res; 753 pkt->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 754 } 755 } 756 if (rxq->mark) { 757 uint32_t mark = 0; 758 759 /* If compressed, take flow tag from mini-CQE. */ 760 if (mcqe == NULL || 761 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 762 mark = cqe->sop_drop_qpn; 763 else 764 mark = ((mcqe->byte_cnt_flow & 0xff) << 8) | 765 (mcqe->flow_tag_high << 16); 766 if (MLX5_FLOW_MARK_IS_VALID(mark)) { 767 pkt->ol_flags |= RTE_MBUF_F_RX_FDIR; 768 if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { 769 pkt->ol_flags |= RTE_MBUF_F_RX_FDIR_ID; 770 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 771 } 772 } 773 } 774 if (rxq->dynf_meta) { 775 uint32_t meta = rte_be_to_cpu_32(cqe->flow_table_metadata) & 776 rxq->flow_meta_port_mask; 777 778 if (meta) { 779 pkt->ol_flags |= rxq->flow_meta_mask; 780 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, 781 uint32_t *) = meta; 782 } 783 } 784 if (rxq->csum) 785 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 786 if (rxq->vlan_strip) { 787 bool vlan_strip; 788 789 if (mcqe == NULL || 790 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 791 vlan_strip = cqe->hdr_type_etc & 792 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 793 else 794 vlan_strip = mcqe->hdr_type & 795 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 796 if (vlan_strip) { 797 pkt->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; 798 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 799 } 800 } 801 if (rxq->hw_timestamp) { 802 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 803 804 if (rxq->rt_timestamp) 805 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 806 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 807 pkt->ol_flags |= rxq->timestamp_rx_flag; 808 } 809 } 810 811 /** 812 * DPDK callback for RX. 813 * 814 * @param dpdk_rxq 815 * Generic pointer to RX queue structure. 816 * @param[out] pkts 817 * Array to store received packets. 818 * @param pkts_n 819 * Maximum number of packets in array. 820 * 821 * @return 822 * Number of packets successfully received (<= pkts_n). 823 */ 824 uint16_t 825 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 826 { 827 struct mlx5_rxq_data *rxq = dpdk_rxq; 828 const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1; 829 const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1; 830 const unsigned int sges_n = rxq->sges_n; 831 struct rte_mbuf *pkt = NULL; 832 struct rte_mbuf *seg = NULL; 833 volatile struct mlx5_cqe *cqe = 834 &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 835 unsigned int i = 0; 836 unsigned int rq_ci = rxq->rq_ci << sges_n; 837 int len = 0; /* keep its value across iterations. */ 838 839 while (pkts_n) { 840 unsigned int idx = rq_ci & wqe_cnt; 841 volatile struct mlx5_wqe_data_seg *wqe = 842 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 843 struct rte_mbuf *rep = (*rxq->elts)[idx]; 844 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 845 846 if (pkt) 847 NEXT(seg) = rep; 848 seg = rep; 849 rte_prefetch0(seg); 850 rte_prefetch0(cqe); 851 rte_prefetch0(wqe); 852 /* Allocate the buf from the same pool. */ 853 rep = rte_mbuf_raw_alloc(seg->pool); 854 if (unlikely(rep == NULL)) { 855 ++rxq->stats.rx_nombuf; 856 if (!pkt) { 857 /* 858 * no buffers before we even started, 859 * bail out silently. 860 */ 861 break; 862 } 863 while (pkt != seg) { 864 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 865 rep = NEXT(pkt); 866 NEXT(pkt) = NULL; 867 NB_SEGS(pkt) = 1; 868 rte_mbuf_raw_free(pkt); 869 pkt = rep; 870 } 871 rq_ci >>= sges_n; 872 ++rq_ci; 873 rq_ci <<= sges_n; 874 break; 875 } 876 if (!pkt) { 877 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt]; 878 len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt, &mcqe); 879 if (len <= 0) { 880 rte_mbuf_raw_free(rep); 881 if (unlikely(len == MLX5_ERROR_CQE_RET)) 882 rq_ci = rxq->rq_ci << sges_n; 883 break; 884 } 885 pkt = seg; 886 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 887 pkt->ol_flags &= RTE_MBUF_F_EXTERNAL; 888 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 889 if (rxq->crc_present) 890 len -= RTE_ETHER_CRC_LEN; 891 PKT_LEN(pkt) = len; 892 if (cqe->lro_num_seg > 1) { 893 mlx5_lro_update_hdr 894 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 895 mcqe, rxq, len); 896 pkt->ol_flags |= RTE_MBUF_F_RX_LRO; 897 pkt->tso_segsz = len / cqe->lro_num_seg; 898 } 899 } 900 DATA_LEN(rep) = DATA_LEN(seg); 901 PKT_LEN(rep) = PKT_LEN(seg); 902 SET_DATA_OFF(rep, DATA_OFF(seg)); 903 PORT(rep) = PORT(seg); 904 (*rxq->elts)[idx] = rep; 905 /* 906 * Fill NIC descriptor with the new buffer. The lkey and size 907 * of the buffers are already known, only the buffer address 908 * changes. 909 */ 910 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 911 /* If there's only one MR, no need to replace LKey in WQE. */ 912 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 913 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 914 if (len > DATA_LEN(seg)) { 915 len -= DATA_LEN(seg); 916 ++NB_SEGS(pkt); 917 ++rq_ci; 918 continue; 919 } 920 DATA_LEN(seg) = len; 921 #ifdef MLX5_PMD_SOFT_COUNTERS 922 /* Increment bytes counter. */ 923 rxq->stats.ibytes += PKT_LEN(pkt); 924 #endif 925 /* Return packet. */ 926 *(pkts++) = pkt; 927 pkt = NULL; 928 --pkts_n; 929 ++i; 930 /* Align consumer index to the next stride. */ 931 rq_ci >>= sges_n; 932 ++rq_ci; 933 rq_ci <<= sges_n; 934 } 935 if (unlikely(i == 0 && ((rq_ci >> sges_n) == rxq->rq_ci))) 936 return 0; 937 /* Update the consumer index. */ 938 rxq->rq_ci = rq_ci >> sges_n; 939 rte_io_wmb(); 940 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 941 rte_io_wmb(); 942 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 943 #ifdef MLX5_PMD_SOFT_COUNTERS 944 /* Increment packets counter. */ 945 rxq->stats.ipackets += i; 946 #endif 947 return i; 948 } 949 950 /** 951 * Update LRO packet TCP header. 952 * The HW LRO feature doesn't update the TCP header after coalescing the 953 * TCP segments but supplies information in CQE to fill it by SW. 954 * 955 * @param tcp 956 * Pointer to the TCP header. 957 * @param cqe 958 * Pointer to the completion entry. 959 * @param phcsum 960 * The L3 pseudo-header checksum. 961 */ 962 static inline void 963 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 964 volatile struct mlx5_cqe *__rte_restrict cqe, 965 uint32_t phcsum, uint8_t l4_type) 966 { 967 /* 968 * The HW calculates only the TCP payload checksum, need to complete 969 * the TCP header checksum and the L3 pseudo-header checksum. 970 */ 971 uint32_t csum = phcsum + cqe->csum; 972 973 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 974 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 975 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 976 tcp->recv_ack = cqe->lro_ack_seq_num; 977 tcp->rx_win = cqe->lro_tcp_win; 978 } 979 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 980 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 981 tcp->cksum = 0; 982 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 983 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 984 csum = (~csum) & 0xffff; 985 if (csum == 0) 986 csum = 0xffff; 987 tcp->cksum = csum; 988 } 989 990 /** 991 * Update LRO packet headers. 992 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 993 * TCP segments but supply information in CQE to fill it by SW. 994 * 995 * @param padd 996 * The packet address. 997 * @param cqe 998 * Pointer to the completion entry. 999 * @param len 1000 * The packet length. 1001 */ 1002 static inline void 1003 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1004 volatile struct mlx5_cqe *__rte_restrict cqe, 1005 volatile struct mlx5_mini_cqe8 *mcqe, 1006 struct mlx5_rxq_data *rxq, uint32_t len) 1007 { 1008 union { 1009 struct rte_ether_hdr *eth; 1010 struct rte_vlan_hdr *vlan; 1011 struct rte_ipv4_hdr *ipv4; 1012 struct rte_ipv6_hdr *ipv6; 1013 struct rte_tcp_hdr *tcp; 1014 uint8_t *hdr; 1015 } h = { 1016 .hdr = padd, 1017 }; 1018 uint16_t proto = h.eth->ether_type; 1019 uint32_t phcsum; 1020 uint8_t l4_type; 1021 1022 h.eth++; 1023 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1024 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1025 proto = h.vlan->eth_proto; 1026 h.vlan++; 1027 } 1028 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1029 h.ipv4->time_to_live = cqe->lro_min_ttl; 1030 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1031 h.ipv4->hdr_checksum = 0; 1032 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1033 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1034 h.ipv4++; 1035 } else { 1036 h.ipv6->hop_limits = cqe->lro_min_ttl; 1037 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1038 sizeof(*h.ipv6)); 1039 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1040 h.ipv6++; 1041 } 1042 if (mcqe == NULL || 1043 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1044 l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1045 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1046 else 1047 l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) & 1048 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1049 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type); 1050 } 1051 1052 void 1053 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1054 { 1055 mlx5_mprq_buf_free_cb(NULL, buf); 1056 } 1057 1058 /** 1059 * DPDK callback for RX with Multi-Packet RQ support. 1060 * 1061 * @param dpdk_rxq 1062 * Generic pointer to RX queue structure. 1063 * @param[out] pkts 1064 * Array to store received packets. 1065 * @param pkts_n 1066 * Maximum number of packets in array. 1067 * 1068 * @return 1069 * Number of packets successfully received (<= pkts_n). 1070 */ 1071 uint16_t 1072 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1073 { 1074 struct mlx5_rxq_data *rxq = dpdk_rxq; 1075 const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); 1076 const uint32_t strd_sz = RTE_BIT32(rxq->log_strd_sz); 1077 const uint32_t cq_mask = (1 << rxq->cqe_n) - 1; 1078 const uint32_t wq_mask = (1 << rxq->elts_n) - 1; 1079 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1080 unsigned int i = 0; 1081 uint32_t rq_ci = rxq->rq_ci; 1082 uint16_t consumed_strd = rxq->consumed_strd; 1083 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1084 1085 while (i < pkts_n) { 1086 struct rte_mbuf *pkt; 1087 int ret; 1088 uint32_t len; 1089 uint16_t strd_cnt; 1090 uint16_t strd_idx; 1091 uint32_t byte_cnt; 1092 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1093 enum mlx5_rqx_code rxq_code; 1094 1095 if (consumed_strd == strd_n) { 1096 /* Replace WQE if the buffer is still in use. */ 1097 mprq_buf_replace(rxq, rq_ci & wq_mask); 1098 /* Advance to the next WQE. */ 1099 consumed_strd = 0; 1100 ++rq_ci; 1101 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1102 } 1103 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1104 ret = mlx5_rx_poll_len(rxq, cqe, cq_mask, &mcqe); 1105 if (ret == 0) 1106 break; 1107 if (unlikely(ret == MLX5_ERROR_CQE_RET)) { 1108 rq_ci = rxq->rq_ci; 1109 consumed_strd = rxq->consumed_strd; 1110 break; 1111 } 1112 byte_cnt = ret; 1113 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1114 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1115 if (rxq->crc_present) 1116 len -= RTE_ETHER_CRC_LEN; 1117 if (mcqe && 1118 rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1119 strd_cnt = (len / strd_sz) + !!(len % strd_sz); 1120 else 1121 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1122 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1123 MLX5_ASSERT(strd_cnt); 1124 consumed_strd += strd_cnt; 1125 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1126 continue; 1127 strd_idx = rte_be_to_cpu_16(mcqe == NULL ? 1128 cqe->wqe_counter : 1129 mcqe->stride_idx); 1130 MLX5_ASSERT(strd_idx < strd_n); 1131 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1132 wq_mask)); 1133 pkt = rte_pktmbuf_alloc(rxq->mp); 1134 if (unlikely(pkt == NULL)) { 1135 ++rxq->stats.rx_nombuf; 1136 break; 1137 } 1138 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1139 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1140 if (rxq->crc_present) 1141 len -= RTE_ETHER_CRC_LEN; 1142 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, 1143 strd_idx, strd_cnt); 1144 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 1145 rte_pktmbuf_free_seg(pkt); 1146 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 1147 ++rxq->stats.idropped; 1148 continue; 1149 } 1150 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 1151 ++rxq->stats.rx_nombuf; 1152 break; 1153 } 1154 } 1155 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1156 if (cqe->lro_num_seg > 1) { 1157 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), 1158 cqe, mcqe, rxq, len); 1159 pkt->ol_flags |= RTE_MBUF_F_RX_LRO; 1160 pkt->tso_segsz = len / cqe->lro_num_seg; 1161 } 1162 PKT_LEN(pkt) = len; 1163 PORT(pkt) = rxq->port_id; 1164 #ifdef MLX5_PMD_SOFT_COUNTERS 1165 /* Increment bytes counter. */ 1166 rxq->stats.ibytes += PKT_LEN(pkt); 1167 #endif 1168 /* Return packet. */ 1169 *(pkts++) = pkt; 1170 ++i; 1171 } 1172 /* Update the consumer indexes. */ 1173 rxq->consumed_strd = consumed_strd; 1174 rte_io_wmb(); 1175 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1176 if (rq_ci != rxq->rq_ci) { 1177 rxq->rq_ci = rq_ci; 1178 rte_io_wmb(); 1179 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1180 } 1181 #ifdef MLX5_PMD_SOFT_COUNTERS 1182 /* Increment packets counter. */ 1183 rxq->stats.ipackets += i; 1184 #endif 1185 return i; 1186 } 1187 1188 /* 1189 * Vectorized Rx routines are not compiled in when required vector instructions 1190 * are not supported on a target architecture. 1191 * The following null stubs are needed for linkage when those are not included 1192 * outside of this file (e.g. mlx5_rxtx_vec_sse.c for x86). 1193 */ 1194 1195 __rte_weak uint16_t 1196 mlx5_rx_burst_vec(void *dpdk_rxq __rte_unused, 1197 struct rte_mbuf **pkts __rte_unused, 1198 uint16_t pkts_n __rte_unused) 1199 { 1200 return 0; 1201 } 1202 1203 __rte_weak uint16_t 1204 mlx5_rx_burst_mprq_vec(void *dpdk_rxq __rte_unused, 1205 struct rte_mbuf **pkts __rte_unused, 1206 uint16_t pkts_n __rte_unused) 1207 { 1208 return 0; 1209 } 1210 1211 __rte_weak int 1212 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused) 1213 { 1214 return -ENOTSUP; 1215 } 1216 1217 __rte_weak int 1218 mlx5_check_vec_rx_support(struct rte_eth_dev *dev __rte_unused) 1219 { 1220 return -ENOTSUP; 1221 } 1222 1223 int 1224 mlx5_rx_queue_lwm_query(struct rte_eth_dev *dev, 1225 uint16_t *queue_id, uint8_t *lwm) 1226 { 1227 struct mlx5_priv *priv = dev->data->dev_private; 1228 unsigned int rxq_id, found = 0, n; 1229 struct mlx5_rxq_priv *rxq; 1230 1231 if (!queue_id) 1232 return -EINVAL; 1233 /* Query all the Rx queues of the port in a circular way. */ 1234 for (rxq_id = *queue_id, n = 0; n < priv->rxqs_n; n++) { 1235 rxq = mlx5_rxq_get(dev, rxq_id); 1236 if (rxq && rxq->lwm_event_pending) { 1237 pthread_mutex_lock(&priv->sh->lwm_config_lock); 1238 rxq->lwm_event_pending = 0; 1239 pthread_mutex_unlock(&priv->sh->lwm_config_lock); 1240 *queue_id = rxq_id; 1241 found = 1; 1242 if (lwm) 1243 *lwm = mlx5_rxq_lwm_to_percentage(rxq); 1244 break; 1245 } 1246 rxq_id = (rxq_id + 1) % priv->rxqs_n; 1247 } 1248 return found; 1249 } 1250 1251 /** 1252 * Rte interrupt handler for LWM event. 1253 * It first checks if the event arrives, if so process the callback for 1254 * RTE_ETH_EVENT_RX_LWM. 1255 * 1256 * @param args 1257 * Generic pointer to mlx5_priv. 1258 */ 1259 void 1260 mlx5_dev_interrupt_handler_lwm(void *args) 1261 { 1262 struct mlx5_priv *priv = args; 1263 struct mlx5_rxq_priv *rxq; 1264 struct rte_eth_dev *dev; 1265 int ret, rxq_idx = 0, port_id = 0; 1266 1267 ret = priv->obj_ops.rxq_event_get_lwm(priv, &rxq_idx, &port_id); 1268 if (unlikely(ret < 0)) { 1269 DRV_LOG(WARNING, "Cannot get LWM event context."); 1270 return; 1271 } 1272 DRV_LOG(INFO, "%s get LWM event, port_id:%d rxq_id:%d.", __func__, 1273 port_id, rxq_idx); 1274 dev = &rte_eth_devices[port_id]; 1275 rxq = mlx5_rxq_get(dev, rxq_idx); 1276 if (rxq) { 1277 pthread_mutex_lock(&priv->sh->lwm_config_lock); 1278 rxq->lwm_event_pending = 1; 1279 pthread_mutex_unlock(&priv->sh->lwm_config_lock); 1280 } 1281 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_RX_AVAIL_THRESH, NULL); 1282 } 1283 1284 /** 1285 * DPDK callback to arm an Rx queue LWM(limit watermark) event. 1286 * While the Rx queue fullness reaches the LWM limit, the driver catches 1287 * an HW event and invokes the user event callback. 1288 * After the last event handling, the user needs to call this API again 1289 * to arm an additional event. 1290 * 1291 * @param dev 1292 * Pointer to the device structure. 1293 * @param[in] rx_queue_id 1294 * Rx queue identificator. 1295 * @param[in] lwm 1296 * The LWM value, is defined by a percentage of the Rx queue size. 1297 * [1-99] to set a new LWM (update the old value). 1298 * 0 to unarm the event. 1299 * 1300 * @return 1301 * 0 : operation success. 1302 * Otherwise: 1303 * - ENOMEM - not enough memory to create LWM event channel. 1304 * - EINVAL - the input Rxq is not created by devx. 1305 * - E2BIG - lwm is bigger than 99. 1306 */ 1307 int 1308 mlx5_rx_queue_lwm_set(struct rte_eth_dev *dev, uint16_t rx_queue_id, 1309 uint8_t lwm) 1310 { 1311 struct mlx5_priv *priv = dev->data->dev_private; 1312 uint16_t port_id = PORT_ID(priv); 1313 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1314 uint16_t event_nums[1] = {MLX5_EVENT_TYPE_SRQ_LIMIT_REACHED}; 1315 struct mlx5_rxq_data *rxq_data; 1316 uint32_t wqe_cnt; 1317 uint64_t cookie; 1318 int ret = 0; 1319 1320 if (!rxq) { 1321 rte_errno = EINVAL; 1322 return -rte_errno; 1323 } 1324 rxq_data = &rxq->ctrl->rxq; 1325 /* Ensure the Rq is created by devx. */ 1326 if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) { 1327 rte_errno = EINVAL; 1328 return -rte_errno; 1329 } 1330 if (lwm > 99) { 1331 DRV_LOG(WARNING, "Too big LWM configuration."); 1332 rte_errno = E2BIG; 1333 return -rte_errno; 1334 } 1335 /* Start config LWM. */ 1336 pthread_mutex_lock(&priv->sh->lwm_config_lock); 1337 if (rxq->lwm == 0 && lwm == 0) { 1338 /* Both old/new values are 0, do nothing. */ 1339 ret = 0; 1340 goto end; 1341 } 1342 wqe_cnt = 1 << (rxq_data->elts_n - rxq_data->sges_n); 1343 if (lwm) { 1344 if (!priv->sh->devx_channel_lwm) { 1345 ret = mlx5_lwm_setup(priv); 1346 if (ret) { 1347 DRV_LOG(WARNING, 1348 "Failed to create shared_lwm."); 1349 rte_errno = ENOMEM; 1350 ret = -rte_errno; 1351 goto end; 1352 } 1353 } 1354 if (!rxq->lwm_devx_subscribed) { 1355 cookie = ((uint32_t) 1356 (port_id << LWM_COOKIE_PORTID_OFFSET)) | 1357 (rx_queue_id << LWM_COOKIE_RXQID_OFFSET); 1358 ret = mlx5_os_devx_subscribe_devx_event 1359 (priv->sh->devx_channel_lwm, 1360 rxq->devx_rq.rq->obj, 1361 sizeof(event_nums), 1362 event_nums, 1363 cookie); 1364 if (ret) { 1365 rte_errno = rte_errno ? rte_errno : EINVAL; 1366 ret = -rte_errno; 1367 goto end; 1368 } 1369 rxq->lwm_devx_subscribed = 1; 1370 } 1371 } 1372 /* Save LWM to rxq and send modify_rq devx command. */ 1373 rxq->lwm = lwm * wqe_cnt / 100; 1374 /* Prevent integer division loss when switch lwm number to percentage. */ 1375 if (lwm && (lwm * wqe_cnt % 100)) { 1376 rxq->lwm = ((uint32_t)(rxq->lwm + 1) >= wqe_cnt) ? 1377 rxq->lwm : (rxq->lwm + 1); 1378 } 1379 if (lwm && !rxq->lwm) { 1380 /* With mprq, wqe_cnt may be < 100. */ 1381 DRV_LOG(WARNING, "Too small LWM configuration."); 1382 rte_errno = EINVAL; 1383 ret = -rte_errno; 1384 goto end; 1385 } 1386 ret = mlx5_devx_modify_rq(rxq, MLX5_RXQ_MOD_RDY2RDY); 1387 end: 1388 pthread_mutex_unlock(&priv->sh->lwm_config_lock); 1389 return ret; 1390 } 1391 1392 /** 1393 * Mlx5 access register function to configure host shaper. 1394 * It calls API in libmtcr_ul to access QSHR(Qos Shaper Host Register) 1395 * in firmware. 1396 * 1397 * @param dev 1398 * Pointer to rte_eth_dev. 1399 * @param lwm_triggered 1400 * Flag to enable/disable lwm_triggered bit in QSHR. 1401 * @param rate 1402 * Host shaper rate, unit is 100Mbps, set to 0 means disable the shaper. 1403 * @return 1404 * 0 : operation success. 1405 * Otherwise: 1406 * - ENOENT - no ibdev interface. 1407 * - EBUSY - the register access unit is busy. 1408 * - EIO - the register access command meets IO error. 1409 */ 1410 static int 1411 mlxreg_host_shaper_config(struct rte_eth_dev *dev, 1412 bool lwm_triggered, uint8_t rate) 1413 { 1414 #ifdef HAVE_MLX5_MSTFLINT 1415 struct mlx5_priv *priv = dev->data->dev_private; 1416 uint32_t data[MLX5_ST_SZ_DW(register_qshr)] = {0}; 1417 int rc, retry_count = 3; 1418 mfile *mf = NULL; 1419 int status; 1420 void *ptr; 1421 1422 mf = mopen(priv->sh->ibdev_name); 1423 if (!mf) { 1424 DRV_LOG(WARNING, "mopen failed\n"); 1425 rte_errno = ENOENT; 1426 return -rte_errno; 1427 } 1428 MLX5_SET(register_qshr, data, connected_host, 1); 1429 MLX5_SET(register_qshr, data, fast_response, lwm_triggered ? 1 : 0); 1430 MLX5_SET(register_qshr, data, local_port, 1); 1431 ptr = MLX5_ADDR_OF(register_qshr, data, global_config); 1432 MLX5_SET(ets_global_config_register, ptr, rate_limit_update, 1); 1433 MLX5_SET(ets_global_config_register, ptr, max_bw_units, 1434 rate ? ETS_GLOBAL_CONFIG_BW_UNIT_HUNDREDS_MBPS : 1435 ETS_GLOBAL_CONFIG_BW_UNIT_DISABLED); 1436 MLX5_SET(ets_global_config_register, ptr, max_bw_value, rate); 1437 do { 1438 rc = maccess_reg(mf, 1439 MLX5_QSHR_REGISTER_ID, 1440 MACCESS_REG_METHOD_SET, 1441 (u_int32_t *)&data[0], 1442 sizeof(data), 1443 sizeof(data), 1444 sizeof(data), 1445 &status); 1446 if ((rc != ME_ICMD_STATUS_IFC_BUSY && 1447 status != ME_REG_ACCESS_BAD_PARAM) || 1448 !(mf->flags & MDEVS_REM)) { 1449 break; 1450 } 1451 DRV_LOG(WARNING, "%s retry.", __func__); 1452 usleep(10000); 1453 } while (retry_count-- > 0); 1454 mclose(mf); 1455 rte_errno = (rc == ME_REG_ACCESS_DEV_BUSY) ? EBUSY : EIO; 1456 return rc ? -rte_errno : 0; 1457 #else 1458 (void)dev; 1459 (void)lwm_triggered; 1460 (void)rate; 1461 return -1; 1462 #endif 1463 } 1464 1465 int rte_pmd_mlx5_host_shaper_config(int port_id, uint8_t rate, 1466 uint32_t flags) 1467 { 1468 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 1469 struct mlx5_priv *priv = dev->data->dev_private; 1470 bool lwm_triggered = 1471 !!(flags & RTE_BIT32(MLX5_HOST_SHAPER_FLAG_AVAIL_THRESH_TRIGGERED)); 1472 1473 if (!lwm_triggered) { 1474 priv->sh->host_shaper_rate = rate; 1475 } else { 1476 switch (rate) { 1477 case 0: 1478 /* Rate 0 means disable lwm_triggered. */ 1479 priv->sh->lwm_triggered = 0; 1480 break; 1481 case 1: 1482 /* Rate 1 means enable lwm_triggered. */ 1483 priv->sh->lwm_triggered = 1; 1484 break; 1485 default: 1486 return -ENOTSUP; 1487 } 1488 } 1489 return mlxreg_host_shaper_config(dev, priv->sh->lwm_triggered, 1490 priv->sh->host_shaper_rate); 1491 } 1492