1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 6WIND S.A. 3 * Copyright 2021 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_common.h> 14 #include <rte_branch_prediction.h> 15 #include <rte_ether.h> 16 #include <rte_cycles.h> 17 #include <rte_flow.h> 18 19 #include <mlx5_prm.h> 20 #include <mlx5_common.h> 21 #include <mlx5_common_mr.h> 22 #include <rte_pmd_mlx5.h> 23 24 #include "mlx5_autoconf.h" 25 #include "mlx5_defs.h" 26 #include "mlx5.h" 27 #include "mlx5_utils.h" 28 #include "mlx5_rxtx.h" 29 #include "mlx5_devx.h" 30 #include "mlx5_rx.h" 31 #ifdef HAVE_MLX5_MSTFLINT 32 #include <mstflint/mtcr.h> 33 #endif 34 35 36 static __rte_always_inline uint32_t 37 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 38 volatile struct mlx5_mini_cqe8 *mcqe); 39 40 static __rte_always_inline int 41 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 42 uint16_t cqe_n, uint16_t cqe_mask, 43 volatile struct mlx5_mini_cqe8 **mcqe, 44 uint16_t *skip_cnt, bool mprq); 45 46 static __rte_always_inline uint32_t 47 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe); 48 49 static __rte_always_inline void 50 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 51 volatile struct mlx5_cqe *cqe, 52 volatile struct mlx5_mini_cqe8 *mcqe); 53 54 static inline void 55 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 56 volatile struct mlx5_cqe *__rte_restrict cqe, 57 uint32_t phcsum, uint8_t l4_type); 58 59 static inline void 60 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 61 volatile struct mlx5_cqe *__rte_restrict cqe, 62 volatile struct mlx5_mini_cqe8 *mcqe, 63 struct mlx5_rxq_data *rxq, uint32_t len); 64 65 66 /** 67 * Internal function to compute the number of used descriptors in an RX queue. 68 * 69 * @param rxq 70 * The Rx queue. 71 * 72 * @return 73 * The number of used Rx descriptor. 74 */ 75 static uint32_t 76 rx_queue_count(struct mlx5_rxq_data *rxq) 77 { 78 struct rxq_zip *zip = &rxq->zip; 79 volatile struct mlx5_cqe *cqe; 80 const unsigned int cqe_n = (1 << rxq->cqe_n); 81 const unsigned int sges_n = (1 << rxq->sges_n); 82 const unsigned int elts_n = (1 << rxq->elts_n); 83 const unsigned int strd_n = RTE_BIT32(rxq->log_strd_num); 84 const unsigned int cqe_cnt = cqe_n - 1; 85 unsigned int cq_ci, used; 86 87 /* if we are processing a compressed cqe */ 88 if (zip->ai) { 89 used = zip->cqe_cnt - zip->ai; 90 cq_ci = zip->cq_ci; 91 } else { 92 used = 0; 93 cq_ci = rxq->cq_ci; 94 } 95 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 96 while (check_cqe(cqe, cqe_n, cq_ci) != MLX5_CQE_STATUS_HW_OWN) { 97 int8_t op_own; 98 unsigned int n; 99 100 op_own = cqe->op_own; 101 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) 102 n = rte_be_to_cpu_32(cqe->byte_cnt); 103 else 104 n = 1; 105 cq_ci += n; 106 used += n; 107 cqe = &(*rxq->cqes)[cq_ci & cqe_cnt]; 108 } 109 used = RTE_MIN(used * sges_n, elts_n * strd_n); 110 return used; 111 } 112 113 /** 114 * DPDK callback to check the status of a Rx descriptor. 115 * 116 * @param rx_queue 117 * The Rx queue. 118 * @param[in] offset 119 * The index of the descriptor in the ring. 120 * 121 * @return 122 * The status of the Rx descriptor. 123 */ 124 int 125 mlx5_rx_descriptor_status(void *rx_queue, uint16_t offset) 126 { 127 struct mlx5_rxq_data *rxq = rx_queue; 128 129 if (offset >= (1 << rxq->cqe_n)) { 130 rte_errno = EINVAL; 131 return -rte_errno; 132 } 133 if (offset < rx_queue_count(rxq)) 134 return RTE_ETH_RX_DESC_DONE; 135 return RTE_ETH_RX_DESC_AVAIL; 136 } 137 138 /* Get rxq lwm percentage according to lwm number. */ 139 static uint8_t 140 mlx5_rxq_lwm_to_percentage(struct mlx5_rxq_priv *rxq) 141 { 142 struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq; 143 uint32_t wqe_cnt = 1 << (rxq_data->elts_n - rxq_data->sges_n); 144 145 return rxq->lwm * 100 / wqe_cnt; 146 } 147 148 /** 149 * DPDK callback to get the RX queue information. 150 * 151 * @param dev 152 * Pointer to the device structure. 153 * 154 * @param rx_queue_id 155 * Rx queue identificator. 156 * 157 * @param qinfo 158 * Pointer to the RX queue information structure. 159 * 160 * @return 161 * None. 162 */ 163 164 void 165 mlx5_rxq_info_get(struct rte_eth_dev *dev, uint16_t rx_queue_id, 166 struct rte_eth_rxq_info *qinfo) 167 { 168 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, rx_queue_id); 169 struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, rx_queue_id); 170 struct mlx5_rxq_priv *rxq_priv = mlx5_rxq_get(dev, rx_queue_id); 171 172 if (!rxq) 173 return; 174 qinfo->mp = mlx5_rxq_mprq_enabled(rxq) ? 175 rxq->mprq_mp : rxq->mp; 176 qinfo->conf.rx_thresh.pthresh = 0; 177 qinfo->conf.rx_thresh.hthresh = 0; 178 qinfo->conf.rx_thresh.wthresh = 0; 179 qinfo->conf.rx_free_thresh = rxq->rq_repl_thresh; 180 qinfo->conf.rx_drop_en = 1; 181 if (rxq_ctrl == NULL || rxq_ctrl->obj == NULL) 182 qinfo->conf.rx_deferred_start = 0; 183 else 184 qinfo->conf.rx_deferred_start = 1; 185 qinfo->conf.offloads = dev->data->dev_conf.rxmode.offloads; 186 qinfo->scattered_rx = dev->data->scattered_rx; 187 qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ? 188 RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) : 189 RTE_BIT32(rxq->elts_n); 190 qinfo->avail_thresh = rxq_priv ? 191 mlx5_rxq_lwm_to_percentage(rxq_priv) : 0; 192 } 193 194 /** 195 * DPDK callback to get the RX packet burst mode information. 196 * 197 * @param dev 198 * Pointer to the device structure. 199 * 200 * @param rx_queue_id 201 * Rx queue identification. 202 * 203 * @param mode 204 * Pointer to the burts mode information. 205 * 206 * @return 207 * 0 as success, -EINVAL as failure. 208 */ 209 int 210 mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, 211 uint16_t rx_queue_id __rte_unused, 212 struct rte_eth_burst_mode *mode) 213 { 214 eth_rx_burst_t pkt_burst = dev->rx_pkt_burst; 215 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 216 217 if (!rxq) { 218 rte_errno = EINVAL; 219 return -rte_errno; 220 } 221 if (pkt_burst == mlx5_rx_burst) { 222 snprintf(mode->info, sizeof(mode->info), "%s", "Scalar"); 223 } else if (pkt_burst == mlx5_rx_burst_mprq) { 224 snprintf(mode->info, sizeof(mode->info), "%s", "Multi-Packet RQ"); 225 } else if (pkt_burst == mlx5_rx_burst_vec) { 226 #if defined RTE_ARCH_X86_64 227 snprintf(mode->info, sizeof(mode->info), "%s", "Vector SSE"); 228 #elif defined RTE_ARCH_ARM64 229 snprintf(mode->info, sizeof(mode->info), "%s", "Vector Neon"); 230 #elif defined RTE_ARCH_PPC_64 231 snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec"); 232 #else 233 return -EINVAL; 234 #endif 235 } else if (pkt_burst == mlx5_rx_burst_mprq_vec) { 236 #if defined RTE_ARCH_X86_64 237 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE"); 238 #elif defined RTE_ARCH_ARM64 239 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon"); 240 #elif defined RTE_ARCH_PPC_64 241 snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec"); 242 #else 243 return -EINVAL; 244 #endif 245 } else { 246 return -EINVAL; 247 } 248 return 0; 249 } 250 251 /** 252 * DPDK callback to get the number of used descriptors in a RX queue. 253 * 254 * @param rx_queue 255 * The Rx queue pointer. 256 * 257 * @return 258 * The number of used rx descriptor. 259 * -EINVAL if the queue is invalid 260 */ 261 uint32_t 262 mlx5_rx_queue_count(void *rx_queue) 263 { 264 struct mlx5_rxq_data *rxq = rx_queue; 265 struct rte_eth_dev *dev; 266 267 if (!rxq) { 268 rte_errno = EINVAL; 269 return -rte_errno; 270 } 271 272 dev = &rte_eth_devices[rxq->port_id]; 273 274 if (dev->rx_pkt_burst == NULL || 275 dev->rx_pkt_burst == rte_eth_pkt_burst_dummy) { 276 rte_errno = ENOTSUP; 277 return -rte_errno; 278 } 279 280 return rx_queue_count(rxq); 281 } 282 283 #define CLB_VAL_IDX 0 284 #define CLB_MSK_IDX 1 285 static int 286 mlx5_monitor_callback(const uint64_t value, 287 const uint64_t opaque[RTE_POWER_MONITOR_OPAQUE_SZ]) 288 { 289 const uint64_t m = opaque[CLB_MSK_IDX]; 290 const uint64_t v = opaque[CLB_VAL_IDX]; 291 292 return (value & m) == v ? -1 : 0; 293 } 294 295 int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) 296 { 297 struct mlx5_rxq_data *rxq = rx_queue; 298 const unsigned int cqe_num = 1 << rxq->cqe_n; 299 const unsigned int cqe_mask = cqe_num - 1; 300 const uint16_t idx = rxq->cq_ci & cqe_num; 301 const uint8_t vic = rxq->cq_ci >> rxq->cqe_n; 302 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 303 304 if (unlikely(rxq->cqes == NULL)) { 305 rte_errno = EINVAL; 306 return -rte_errno; 307 } 308 if (rxq->cqe_comp_layout) { 309 pmc->addr = &cqe->validity_iteration_count; 310 pmc->opaque[CLB_VAL_IDX] = vic; 311 pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_VIC_INIT; 312 } else { 313 pmc->addr = &cqe->op_own; 314 pmc->opaque[CLB_VAL_IDX] = !!idx; 315 pmc->opaque[CLB_MSK_IDX] = MLX5_CQE_OWNER_MASK; 316 } 317 pmc->fn = mlx5_monitor_callback; 318 pmc->size = sizeof(uint8_t); 319 return 0; 320 } 321 322 /** 323 * Translate RX completion flags to packet type. 324 * 325 * @param[in] rxq 326 * Pointer to RX queue structure. 327 * @param[in] cqe 328 * Pointer to CQE. 329 * 330 * @note: fix mlx5_dev_supported_ptypes_get() if any change here. 331 * 332 * @return 333 * Packet type for struct rte_mbuf. 334 */ 335 static inline uint32_t 336 rxq_cq_to_pkt_type(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 337 volatile struct mlx5_mini_cqe8 *mcqe) 338 { 339 uint8_t idx; 340 uint8_t ptype; 341 uint8_t pinfo = (cqe->pkt_info & 0x3) << 6; 342 343 /* Get l3/l4 header from mini-CQE in case L3/L4 format*/ 344 if (mcqe == NULL || 345 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 346 ptype = (cqe->hdr_type_etc & 0xfc00) >> 10; 347 else 348 ptype = mcqe->hdr_type >> 2; 349 /* 350 * The index to the array should have: 351 * bit[1:0] = l3_hdr_type 352 * bit[4:2] = l4_hdr_type 353 * bit[5] = ip_frag 354 * bit[6] = tunneled 355 * bit[7] = outer_l3_type 356 */ 357 idx = pinfo | ptype; 358 return mlx5_ptype_table[idx] | rxq->tunnel * !!(idx & (1 << 6)); 359 } 360 361 /** 362 * Initialize Rx WQ and indexes. 363 * 364 * @param[in] rxq 365 * Pointer to RX queue structure. 366 */ 367 void 368 mlx5_rxq_initialize(struct mlx5_rxq_data *rxq) 369 { 370 const unsigned int wqe_n = 1 << rxq->elts_n; 371 unsigned int i; 372 373 for (i = 0; (i != wqe_n); ++i) { 374 volatile struct mlx5_wqe_data_seg *scat; 375 uintptr_t addr; 376 uint32_t byte_count; 377 uint32_t lkey; 378 379 if (mlx5_rxq_mprq_enabled(rxq)) { 380 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[i]; 381 382 scat = &((volatile struct mlx5_wqe_mprq *) 383 rxq->wqes)[i].dseg; 384 addr = (uintptr_t)mlx5_mprq_buf_addr 385 (buf, RTE_BIT32(rxq->log_strd_num)); 386 byte_count = RTE_BIT32(rxq->log_strd_sz) * 387 RTE_BIT32(rxq->log_strd_num); 388 lkey = mlx5_rx_addr2mr(rxq, addr); 389 } else { 390 struct rte_mbuf *buf = (*rxq->elts)[i]; 391 392 scat = &((volatile struct mlx5_wqe_data_seg *) 393 rxq->wqes)[i]; 394 addr = rte_pktmbuf_mtod(buf, uintptr_t); 395 byte_count = DATA_LEN(buf); 396 lkey = mlx5_rx_mb2mr(rxq, buf); 397 } 398 /* scat->addr must be able to store a pointer. */ 399 MLX5_ASSERT(sizeof(scat->addr) >= sizeof(uintptr_t)); 400 *scat = (struct mlx5_wqe_data_seg){ 401 .addr = rte_cpu_to_be_64(addr), 402 .byte_count = rte_cpu_to_be_32(byte_count), 403 .lkey = lkey, 404 }; 405 } 406 rxq->consumed_strd = 0; 407 rxq->decompressed = 0; 408 rxq->rq_pi = 0; 409 rxq->zip = (struct rxq_zip){ 410 .ai = 0, 411 }; 412 rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ? 413 (wqe_n >> rxq->sges_n) * RTE_BIT32(rxq->log_strd_num) : 0; 414 /* Update doorbell counter. */ 415 rxq->rq_ci = wqe_n >> rxq->sges_n; 416 rte_io_wmb(); 417 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 418 } 419 420 #define MLX5_ERROR_CQE_MASK 0x40000000 421 /* Must be negative. */ 422 #define MLX5_REGULAR_ERROR_CQE_RET (-5) 423 #define MLX5_CRITICAL_ERROR_CQE_RET (-4) 424 /* Must not be negative. */ 425 #define MLX5_RECOVERY_ERROR_RET 0 426 #define MLX5_RECOVERY_IGNORE_RET 1 427 #define MLX5_RECOVERY_COMPLETED_RET 2 428 429 /** 430 * Handle a Rx error. 431 * The function inserts the RQ state to reset when the first error CQE is 432 * shown, then drains the CQ by the caller function loop. When the CQ is empty, 433 * it moves the RQ state to ready and initializes the RQ. 434 * Next CQE identification and error counting are in the caller responsibility. 435 * 436 * @param[in] rxq 437 * Pointer to RX queue structure. 438 * @param[in] vec 439 * 1 when called from vectorized Rx burst, need to prepare mbufs for the RQ. 440 * 0 when called from non-vectorized Rx burst. 441 * @param[in] err_n 442 * Number of CQEs to check for an error. 443 * 444 * @return 445 * MLX5_RECOVERY_ERROR_RET in case of recovery error, 446 * MLX5_RECOVERY_IGNORE_RET in case of non-critical error syndrome, 447 * MLX5_RECOVERY_COMPLETED_RET in case of recovery is completed, 448 * otherwise the CQE status after ignored error syndrome or queue reset. 449 */ 450 int 451 mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec, 452 uint16_t err_n, uint16_t *skip_cnt) 453 { 454 const uint16_t cqe_n = 1 << rxq->cqe_n; 455 const uint16_t cqe_mask = cqe_n - 1; 456 const uint16_t wqe_n = 1 << rxq->elts_n; 457 const uint16_t strd_n = RTE_BIT32(rxq->log_strd_num); 458 struct mlx5_rxq_ctrl *rxq_ctrl = 459 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 460 union { 461 volatile struct mlx5_cqe *cqe; 462 volatile struct mlx5_error_cqe *err_cqe; 463 } u = { 464 .cqe = &(*rxq->cqes)[(rxq->cq_ci - vec) & cqe_mask], 465 }; 466 struct mlx5_mp_arg_queue_state_modify sm; 467 bool critical_syndrome = false; 468 int ret, i; 469 470 switch (rxq->err_state) { 471 case MLX5_RXQ_ERR_STATE_IGNORE: 472 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci - vec); 473 if (ret != MLX5_CQE_STATUS_ERR) { 474 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 475 return ret; 476 } 477 /* Fall-through */ 478 case MLX5_RXQ_ERR_STATE_NO_ERROR: 479 for (i = 0; i < (int)err_n; i++) { 480 u.cqe = &(*rxq->cqes)[(rxq->cq_ci - vec - i) & cqe_mask]; 481 if (MLX5_CQE_OPCODE(u.cqe->op_own) == MLX5_CQE_RESP_ERR) { 482 if (u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR || 483 u.err_cqe->syndrome == MLX5_CQE_SYNDROME_LOCAL_PROT_ERR || 484 u.err_cqe->syndrome == MLX5_CQE_SYNDROME_WR_FLUSH_ERR) 485 critical_syndrome = true; 486 break; 487 } 488 } 489 if (!critical_syndrome) { 490 if (rxq->err_state == MLX5_RXQ_ERR_STATE_NO_ERROR) { 491 *skip_cnt = 0; 492 if (i == err_n) 493 rxq->err_state = MLX5_RXQ_ERR_STATE_IGNORE; 494 } 495 return MLX5_RECOVERY_IGNORE_RET; 496 } 497 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_RESET; 498 /* Fall-through */ 499 case MLX5_RXQ_ERR_STATE_NEED_RESET: 500 sm.is_wq = 1; 501 sm.queue_id = rxq->idx; 502 sm.state = IBV_WQS_RESET; 503 if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm)) 504 return MLX5_RECOVERY_ERROR_RET; 505 if (rxq_ctrl->dump_file_n < 506 RXQ_PORT(rxq_ctrl)->config.max_dump_files_num) { 507 MKSTR(err_str, "Unexpected CQE error syndrome " 508 "0x%02x CQN = %u RQN = %u wqe_counter = %u" 509 " rq_ci = %u cq_ci = %u", u.err_cqe->syndrome, 510 rxq->cqn, rxq_ctrl->wqn, 511 rte_be_to_cpu_16(u.err_cqe->wqe_counter), 512 rxq->rq_ci << rxq->sges_n, rxq->cq_ci); 513 MKSTR(name, "dpdk_mlx5_port_%u_rxq_%u_%u", 514 rxq->port_id, rxq->idx, (uint32_t)rte_rdtsc()); 515 mlx5_dump_debug_information(name, NULL, err_str, 0); 516 mlx5_dump_debug_information(name, "MLX5 Error CQ:", 517 (const void *)((uintptr_t) 518 rxq->cqes), 519 sizeof(*u.cqe) * cqe_n); 520 mlx5_dump_debug_information(name, "MLX5 Error RQ:", 521 (const void *)((uintptr_t) 522 rxq->wqes), 523 16 * wqe_n); 524 rxq_ctrl->dump_file_n++; 525 } 526 rxq->err_state = MLX5_RXQ_ERR_STATE_NEED_READY; 527 /* Fall-through */ 528 case MLX5_RXQ_ERR_STATE_NEED_READY: 529 ret = check_cqe(u.cqe, cqe_n, rxq->cq_ci); 530 if (ret == MLX5_CQE_STATUS_HW_OWN) { 531 rte_io_wmb(); 532 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 533 rte_io_wmb(); 534 /* 535 * The RQ consumer index must be zeroed while moving 536 * from RESET state to RDY state. 537 */ 538 *rxq->rq_db = rte_cpu_to_be_32(0); 539 rte_io_wmb(); 540 sm.is_wq = 1; 541 sm.queue_id = rxq->idx; 542 sm.state = IBV_WQS_RDY; 543 if (mlx5_queue_state_modify(RXQ_DEV(rxq_ctrl), &sm)) 544 return MLX5_RECOVERY_ERROR_RET; 545 if (vec) { 546 const uint32_t elts_n = 547 mlx5_rxq_mprq_enabled(rxq) ? 548 wqe_n * strd_n : wqe_n; 549 const uint32_t e_mask = elts_n - 1; 550 uint32_t elts_ci = 551 mlx5_rxq_mprq_enabled(rxq) ? 552 rxq->elts_ci : rxq->rq_ci; 553 uint32_t elt_idx; 554 struct rte_mbuf **elt; 555 unsigned int n = elts_n - (elts_ci - 556 rxq->rq_pi); 557 558 for (i = 0; i < (int)n; ++i) { 559 elt_idx = (elts_ci + i) & e_mask; 560 elt = &(*rxq->elts)[elt_idx]; 561 *elt = rte_mbuf_raw_alloc(rxq->mp); 562 if (!*elt) { 563 for (i--; i >= 0; --i) { 564 elt_idx = (elts_ci + 565 i) & elts_n; 566 elt = &(*rxq->elts) 567 [elt_idx]; 568 rte_pktmbuf_free_seg 569 (*elt); 570 } 571 return MLX5_RECOVERY_ERROR_RET; 572 } 573 } 574 for (i = 0; i < (int)elts_n; ++i) { 575 elt = &(*rxq->elts)[i]; 576 DATA_LEN(*elt) = 577 (uint16_t)((*elt)->buf_len - 578 rte_pktmbuf_headroom(*elt)); 579 } 580 /* Padding with a fake mbuf for vec Rx. */ 581 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 582 (*rxq->elts)[elts_n + i] = 583 &rxq->fake_mbuf; 584 } 585 mlx5_rxq_initialize(rxq); 586 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 587 return MLX5_RECOVERY_COMPLETED_RET; 588 } 589 return ret; 590 default: 591 return MLX5_RECOVERY_ERROR_RET; 592 } 593 } 594 595 /** 596 * Get size of the next packet for a given CQE. For compressed CQEs, the 597 * consumer index is updated only once all packets of the current one have 598 * been processed. 599 * 600 * @param rxq 601 * Pointer to RX queue. 602 * @param cqe 603 * CQE to process. 604 * @param cqe_n 605 * Completion queue count. 606 * @param cqe_mask 607 * Completion queue mask. 608 * @param[out] mcqe 609 * Store pointer to mini-CQE if compressed. Otherwise, the pointer is not 610 * written. 611 * @param[out] skip_cnt 612 * Number of packets skipped due to recoverable errors. 613 * @param mprq 614 * Indication if it is called from MPRQ. 615 * @return 616 * 0 in case of empty CQE, 617 * MLX5_REGULAR_ERROR_CQE_RET in case of error CQE, 618 * MLX5_CRITICAL_ERROR_CQE_RET in case of error CQE lead to Rx queue reset, 619 * otherwise the packet size in regular RxQ, 620 * and striding byte count format in mprq case. 621 */ 622 static inline int 623 mlx5_rx_poll_len(struct mlx5_rxq_data *rxq, volatile struct mlx5_cqe *cqe, 624 uint16_t cqe_n, uint16_t cqe_mask, 625 volatile struct mlx5_mini_cqe8 **mcqe, 626 uint16_t *skip_cnt, bool mprq) 627 { 628 struct rxq_zip *zip = &rxq->zip; 629 int len = 0, ret = 0; 630 uint32_t idx, end; 631 632 do { 633 len = 0; 634 /* Process compressed data in the CQE and mini arrays. */ 635 if (zip->ai) { 636 volatile struct mlx5_mini_cqe8 (*mc)[8] = 637 (volatile struct mlx5_mini_cqe8 (*)[8]) 638 (uintptr_t)(&(*rxq->cqes)[zip->ca & 639 cqe_mask].pkt_info); 640 len = rte_be_to_cpu_32((*mc)[zip->ai & 7].byte_cnt & 641 rxq->byte_mask); 642 *mcqe = &(*mc)[zip->ai & 7]; 643 if (rxq->cqe_comp_layout) { 644 zip->ai++; 645 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 646 rxq->cq_ci = zip->cq_ci; 647 zip->ai = 0; 648 } 649 } else { 650 if ((++zip->ai & 7) == 0) { 651 /* Invalidate consumed CQEs */ 652 idx = zip->ca; 653 end = zip->na; 654 while (idx != end) { 655 (*rxq->cqes)[idx & cqe_mask].op_own = 656 MLX5_CQE_INVALIDATE; 657 ++idx; 658 } 659 /* 660 * Increment consumer index to skip the number 661 * of CQEs consumed. Hardware leaves holes in 662 * the CQ ring for software use. 663 */ 664 zip->ca = zip->na; 665 zip->na += 8; 666 } 667 if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) { 668 /* Invalidate the rest */ 669 idx = zip->ca; 670 end = zip->cq_ci; 671 672 while (idx != end) { 673 (*rxq->cqes)[idx & cqe_mask].op_own = 674 MLX5_CQE_INVALIDATE; 675 ++idx; 676 } 677 rxq->cq_ci = zip->cq_ci; 678 zip->ai = 0; 679 } 680 } 681 /* 682 * No compressed data, get next CQE and verify if it is 683 * compressed. 684 */ 685 } else { 686 int8_t op_own; 687 uint32_t cq_ci; 688 689 ret = (rxq->cqe_comp_layout) ? 690 check_cqe_iteration(cqe, rxq->cqe_n, rxq->cq_ci) : 691 check_cqe(cqe, cqe_n, rxq->cq_ci); 692 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 693 if (unlikely(ret == MLX5_CQE_STATUS_ERR || 694 rxq->err_state)) { 695 ret = mlx5_rx_err_handle(rxq, 0, 1, skip_cnt); 696 if (ret == MLX5_CQE_STATUS_HW_OWN) 697 return MLX5_ERROR_CQE_MASK; 698 if (ret == MLX5_RECOVERY_ERROR_RET || 699 ret == MLX5_RECOVERY_COMPLETED_RET) 700 return MLX5_CRITICAL_ERROR_CQE_RET; 701 if (!mprq && ret == MLX5_RECOVERY_IGNORE_RET) { 702 *skip_cnt = 1; 703 ++rxq->cq_ci; 704 return MLX5_ERROR_CQE_MASK; 705 } 706 } else { 707 return 0; 708 } 709 } 710 /* 711 * Introduce the local variable to have queue cq_ci 712 * index in queue structure always consistent with 713 * actual CQE boundary (not pointing to the middle 714 * of compressed CQE session). 715 */ 716 cq_ci = rxq->cq_ci + !rxq->cqe_comp_layout; 717 op_own = cqe->op_own; 718 if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) { 719 volatile struct mlx5_mini_cqe8 (*mc)[8] = 720 (volatile struct mlx5_mini_cqe8 (*)[8]) 721 (uintptr_t)(&(*rxq->cqes) 722 [cq_ci & cqe_mask].pkt_info); 723 724 /* Fix endianness. */ 725 zip->cqe_cnt = rxq->cqe_comp_layout ? 726 (MLX5_CQE_NUM_MINIS(op_own) + 1U) : 727 rte_be_to_cpu_32(cqe->byte_cnt); 728 /* 729 * Current mini array position is the one 730 * returned by check_cqe64(). 731 * 732 * If completion comprises several mini arrays, 733 * as a special case the second one is located 734 * 7 CQEs after the initial CQE instead of 8 735 * for subsequent ones. 736 */ 737 zip->ca = cq_ci; 738 zip->na = zip->ca + 7; 739 /* Compute the next non compressed CQE. */ 740 zip->cq_ci = rxq->cq_ci + zip->cqe_cnt; 741 /* Get packet size to return. */ 742 len = rte_be_to_cpu_32((*mc)[0].byte_cnt & 743 rxq->byte_mask); 744 *mcqe = &(*mc)[0]; 745 if (rxq->cqe_comp_layout) { 746 if (MLX5_CQE_NUM_MINIS(op_own)) 747 zip->ai = 1; 748 else 749 rxq->cq_ci = zip->cq_ci; 750 } else { 751 zip->ai = 1; 752 /* Prefetch all to be invalidated */ 753 idx = zip->ca; 754 end = zip->cq_ci; 755 while (idx != end) { 756 rte_prefetch0(&(*rxq->cqes)[(idx) & cqe_mask]); 757 ++idx; 758 } 759 } 760 } else { 761 ++rxq->cq_ci; 762 len = rte_be_to_cpu_32(cqe->byte_cnt); 763 if (rxq->cqe_comp_layout) { 764 volatile struct mlx5_cqe *next; 765 766 next = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 767 ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci); 768 if (ret != MLX5_CQE_STATUS_SW_OWN || 769 MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED) 770 rte_memcpy(&rxq->title_cqe, 771 (const void *)(uintptr_t)cqe, 772 sizeof(struct mlx5_cqe)); 773 } 774 } 775 } 776 if (unlikely(rxq->err_state)) { 777 if (rxq->err_state == MLX5_RXQ_ERR_STATE_IGNORE && 778 ret == MLX5_CQE_STATUS_SW_OWN) { 779 rxq->err_state = MLX5_RXQ_ERR_STATE_NO_ERROR; 780 return len & MLX5_ERROR_CQE_MASK; 781 } 782 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 783 ++rxq->stats.idropped; 784 (*skip_cnt) += mprq ? (len & MLX5_MPRQ_STRIDE_NUM_MASK) >> 785 MLX5_MPRQ_STRIDE_NUM_SHIFT : 1; 786 } else { 787 return len; 788 } 789 } while (1); 790 } 791 792 /** 793 * Translate RX completion flags to offload flags. 794 * 795 * @param[in] cqe 796 * Pointer to CQE. 797 * 798 * @return 799 * Offload flags (ol_flags) for struct rte_mbuf. 800 */ 801 static inline uint32_t 802 rxq_cq_to_ol_flags(volatile struct mlx5_cqe *cqe) 803 { 804 uint32_t ol_flags = 0; 805 uint16_t flags = rte_be_to_cpu_16(cqe->hdr_type_etc); 806 807 ol_flags = 808 TRANSPOSE(flags, 809 MLX5_CQE_RX_L3_HDR_VALID, 810 RTE_MBUF_F_RX_IP_CKSUM_GOOD) | 811 TRANSPOSE(flags, 812 MLX5_CQE_RX_L4_HDR_VALID, 813 RTE_MBUF_F_RX_L4_CKSUM_GOOD); 814 return ol_flags; 815 } 816 817 /** 818 * Fill in mbuf fields from RX completion flags. 819 * Note that pkt->ol_flags should be initialized outside of this function. 820 * 821 * @param rxq 822 * Pointer to RX queue. 823 * @param pkt 824 * mbuf to fill. 825 * @param cqe 826 * CQE to process. 827 * @param rss_hash_res 828 * Packet RSS Hash result. 829 */ 830 static inline void 831 rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, 832 volatile struct mlx5_cqe *cqe, 833 volatile struct mlx5_mini_cqe8 *mcqe) 834 { 835 /* Update packet information. */ 836 pkt->packet_type = rxq_cq_to_pkt_type(rxq, cqe, mcqe); 837 pkt->port = unlikely(rxq->shared) ? cqe->user_index_low : rxq->port_id; 838 839 if (rxq->rss_hash) { 840 uint32_t rss_hash_res = 0; 841 842 /* If compressed, take hash result from mini-CQE. */ 843 if (mcqe == NULL || 844 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_HASH) 845 rss_hash_res = rte_be_to_cpu_32(cqe->rx_hash_res); 846 else 847 rss_hash_res = rte_be_to_cpu_32(mcqe->rx_hash_result); 848 if (rss_hash_res) { 849 pkt->hash.rss = rss_hash_res; 850 pkt->ol_flags |= RTE_MBUF_F_RX_RSS_HASH; 851 } 852 } 853 if (rxq->mark) { 854 uint32_t mark = 0; 855 856 /* If compressed, take flow tag from mini-CQE. */ 857 if (mcqe == NULL || 858 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 859 mark = cqe->sop_drop_qpn; 860 else 861 mark = ((mcqe->byte_cnt_flow & 0xff) << 8) | 862 (mcqe->flow_tag_high << 16); 863 if (MLX5_FLOW_MARK_IS_VALID(mark)) { 864 pkt->ol_flags |= RTE_MBUF_F_RX_FDIR; 865 if (mark != RTE_BE32(MLX5_FLOW_MARK_DEFAULT)) { 866 pkt->ol_flags |= rxq->mark_flag; 867 pkt->hash.fdir.hi = mlx5_flow_mark_get(mark); 868 } 869 } 870 } 871 if (rxq->dynf_meta) { 872 uint32_t meta = rte_be_to_cpu_32(cqe->flow_table_metadata) & 873 rxq->flow_meta_port_mask; 874 875 if (meta) { 876 pkt->ol_flags |= rxq->flow_meta_mask; 877 *RTE_MBUF_DYNFIELD(pkt, rxq->flow_meta_offset, 878 uint32_t *) = meta; 879 } 880 } 881 if (rxq->csum) 882 pkt->ol_flags |= rxq_cq_to_ol_flags(cqe); 883 if (rxq->vlan_strip) { 884 bool vlan_strip; 885 886 if (mcqe == NULL || 887 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 888 vlan_strip = cqe->hdr_type_etc & 889 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 890 else 891 vlan_strip = mcqe->hdr_type & 892 RTE_BE16(MLX5_CQE_VLAN_STRIPPED); 893 if (vlan_strip) { 894 pkt->ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED; 895 pkt->vlan_tci = rte_be_to_cpu_16(cqe->vlan_info); 896 } 897 } 898 if (rxq->hw_timestamp) { 899 uint64_t ts = rte_be_to_cpu_64(cqe->timestamp); 900 901 if (rxq->rt_timestamp) 902 ts = mlx5_txpp_convert_rx_ts(rxq->sh, ts); 903 mlx5_timestamp_set(pkt, rxq->timestamp_offset, ts); 904 pkt->ol_flags |= rxq->timestamp_rx_flag; 905 } 906 } 907 908 /** 909 * DPDK callback for RX. 910 * 911 * @param dpdk_rxq 912 * Generic pointer to RX queue structure. 913 * @param[out] pkts 914 * Array to store received packets. 915 * @param pkts_n 916 * Maximum number of packets in array. 917 * 918 * @return 919 * Number of packets successfully received (<= pkts_n). 920 */ 921 uint16_t 922 mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 923 { 924 struct mlx5_rxq_data *rxq = dpdk_rxq; 925 const uint32_t wqe_n = 1 << rxq->elts_n; 926 const uint32_t wqe_mask = wqe_n - 1; 927 const uint32_t cqe_n = 1 << rxq->cqe_n; 928 const uint32_t cqe_mask = cqe_n - 1; 929 const unsigned int sges_n = rxq->sges_n; 930 struct rte_mbuf *pkt = NULL; 931 struct rte_mbuf *seg = NULL; 932 volatile struct mlx5_cqe *cqe = 933 &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 934 unsigned int i = 0; 935 unsigned int rq_ci = rxq->rq_ci << sges_n; 936 int len = 0; /* keep its value across iterations. */ 937 938 while (pkts_n) { 939 uint16_t skip_cnt; 940 unsigned int idx = rq_ci & wqe_mask; 941 volatile struct mlx5_wqe_data_seg *wqe = 942 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[idx]; 943 struct rte_mbuf *rep = (*rxq->elts)[idx]; 944 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 945 946 if (pkt) 947 NEXT(seg) = rep; 948 seg = rep; 949 rte_prefetch0(seg); 950 rte_prefetch0(cqe); 951 rte_prefetch0(wqe); 952 /* Allocate the buf from the same pool. */ 953 rep = rte_mbuf_raw_alloc(seg->pool); 954 if (unlikely(rep == NULL)) { 955 ++rxq->stats.rx_nombuf; 956 if (!pkt) { 957 /* 958 * no buffers before we even started, 959 * bail out silently. 960 */ 961 break; 962 } 963 while (pkt != seg) { 964 MLX5_ASSERT(pkt != (*rxq->elts)[idx]); 965 rep = NEXT(pkt); 966 NEXT(pkt) = NULL; 967 NB_SEGS(pkt) = 1; 968 rte_mbuf_raw_free(pkt); 969 pkt = rep; 970 } 971 rq_ci >>= sges_n; 972 ++rq_ci; 973 rq_ci <<= sges_n; 974 break; 975 } 976 if (!pkt) { 977 cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_mask]; 978 len = mlx5_rx_poll_len(rxq, cqe, cqe_n, cqe_mask, &mcqe, &skip_cnt, false); 979 if (unlikely(len & MLX5_ERROR_CQE_MASK)) { 980 /* We drop packets with non-critical errors */ 981 rte_mbuf_raw_free(rep); 982 if (len == MLX5_CRITICAL_ERROR_CQE_RET) { 983 rq_ci = rxq->rq_ci << sges_n; 984 break; 985 } 986 /* Skip specified amount of error CQEs packets */ 987 rq_ci >>= sges_n; 988 rq_ci += skip_cnt; 989 rq_ci <<= sges_n; 990 MLX5_ASSERT(!pkt); 991 continue; 992 } 993 if (len == 0) { 994 rte_mbuf_raw_free(rep); 995 break; 996 } 997 pkt = seg; 998 MLX5_ASSERT(len >= (rxq->crc_present << 2)); 999 pkt->ol_flags &= RTE_MBUF_F_EXTERNAL; 1000 if (rxq->cqe_comp_layout && mcqe) 1001 cqe = &rxq->title_cqe; 1002 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1003 if (rxq->crc_present) 1004 len -= RTE_ETHER_CRC_LEN; 1005 PKT_LEN(pkt) = len; 1006 if (cqe->lro_num_seg > 1) { 1007 mlx5_lro_update_hdr 1008 (rte_pktmbuf_mtod(pkt, uint8_t *), cqe, 1009 mcqe, rxq, len); 1010 pkt->ol_flags |= RTE_MBUF_F_RX_LRO; 1011 pkt->tso_segsz = len / cqe->lro_num_seg; 1012 } 1013 } 1014 DATA_LEN(rep) = DATA_LEN(seg); 1015 PKT_LEN(rep) = PKT_LEN(seg); 1016 SET_DATA_OFF(rep, DATA_OFF(seg)); 1017 PORT(rep) = PORT(seg); 1018 (*rxq->elts)[idx] = rep; 1019 /* 1020 * Fill NIC descriptor with the new buffer. The lkey and size 1021 * of the buffers are already known, only the buffer address 1022 * changes. 1023 */ 1024 wqe->addr = rte_cpu_to_be_64(rte_pktmbuf_mtod(rep, uintptr_t)); 1025 /* If there's only one MR, no need to replace LKey in WQE. */ 1026 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) 1027 wqe->lkey = mlx5_rx_mb2mr(rxq, rep); 1028 if (len > DATA_LEN(seg)) { 1029 len -= DATA_LEN(seg); 1030 ++NB_SEGS(pkt); 1031 ++rq_ci; 1032 continue; 1033 } 1034 DATA_LEN(seg) = len; 1035 #ifdef MLX5_PMD_SOFT_COUNTERS 1036 /* Increment bytes counter. */ 1037 rxq->stats.ibytes += PKT_LEN(pkt); 1038 #endif 1039 /* Return packet. */ 1040 *(pkts++) = pkt; 1041 pkt = NULL; 1042 --pkts_n; 1043 ++i; 1044 /* Align consumer index to the next stride. */ 1045 rq_ci >>= sges_n; 1046 ++rq_ci; 1047 rq_ci <<= sges_n; 1048 } 1049 if (unlikely(i == 0 && ((rq_ci >> sges_n) == rxq->rq_ci))) 1050 return 0; 1051 /* Update the consumer index. */ 1052 rxq->rq_ci = rq_ci >> sges_n; 1053 rte_io_wmb(); 1054 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1055 rte_io_wmb(); 1056 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1057 #ifdef MLX5_PMD_SOFT_COUNTERS 1058 /* Increment packets counter. */ 1059 rxq->stats.ipackets += i; 1060 #endif 1061 return i; 1062 } 1063 1064 /** 1065 * Update LRO packet TCP header. 1066 * The HW LRO feature doesn't update the TCP header after coalescing the 1067 * TCP segments but supplies information in CQE to fill it by SW. 1068 * 1069 * @param tcp 1070 * Pointer to the TCP header. 1071 * @param cqe 1072 * Pointer to the completion entry. 1073 * @param phcsum 1074 * The L3 pseudo-header checksum. 1075 */ 1076 static inline void 1077 mlx5_lro_update_tcp_hdr(struct rte_tcp_hdr *__rte_restrict tcp, 1078 volatile struct mlx5_cqe *__rte_restrict cqe, 1079 uint32_t phcsum, uint8_t l4_type) 1080 { 1081 /* 1082 * The HW calculates only the TCP payload checksum, need to complete 1083 * the TCP header checksum and the L3 pseudo-header checksum. 1084 */ 1085 uint32_t csum = phcsum + cqe->csum; 1086 1087 if (l4_type == MLX5_L4_HDR_TYPE_TCP_EMPTY_ACK || 1088 l4_type == MLX5_L4_HDR_TYPE_TCP_WITH_ACL) { 1089 tcp->tcp_flags |= RTE_TCP_ACK_FLAG; 1090 tcp->recv_ack = cqe->lro_ack_seq_num; 1091 tcp->rx_win = cqe->lro_tcp_win; 1092 } 1093 if (cqe->lro_tcppsh_abort_dupack & MLX5_CQE_LRO_PUSH_MASK) 1094 tcp->tcp_flags |= RTE_TCP_PSH_FLAG; 1095 tcp->cksum = 0; 1096 csum += rte_raw_cksum(tcp, (tcp->data_off >> 4) * 4); 1097 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1098 csum = ((csum & 0xffff0000) >> 16) + (csum & 0xffff); 1099 csum = (~csum) & 0xffff; 1100 if (csum == 0) 1101 csum = 0xffff; 1102 tcp->cksum = csum; 1103 } 1104 1105 /** 1106 * Update LRO packet headers. 1107 * The HW LRO feature doesn't update the L3/TCP headers after coalescing the 1108 * TCP segments but supply information in CQE to fill it by SW. 1109 * 1110 * @param padd 1111 * The packet address. 1112 * @param cqe 1113 * Pointer to the completion entry. 1114 * @param len 1115 * The packet length. 1116 */ 1117 static inline void 1118 mlx5_lro_update_hdr(uint8_t *__rte_restrict padd, 1119 volatile struct mlx5_cqe *__rte_restrict cqe, 1120 volatile struct mlx5_mini_cqe8 *mcqe, 1121 struct mlx5_rxq_data *rxq, uint32_t len) 1122 { 1123 union { 1124 struct rte_ether_hdr *eth; 1125 struct rte_vlan_hdr *vlan; 1126 struct rte_ipv4_hdr *ipv4; 1127 struct rte_ipv6_hdr *ipv6; 1128 struct rte_tcp_hdr *tcp; 1129 uint8_t *hdr; 1130 } h = { 1131 .hdr = padd, 1132 }; 1133 uint16_t proto = h.eth->ether_type; 1134 uint32_t phcsum; 1135 uint8_t l4_type; 1136 1137 h.eth++; 1138 while (proto == RTE_BE16(RTE_ETHER_TYPE_VLAN) || 1139 proto == RTE_BE16(RTE_ETHER_TYPE_QINQ)) { 1140 proto = h.vlan->eth_proto; 1141 h.vlan++; 1142 } 1143 if (proto == RTE_BE16(RTE_ETHER_TYPE_IPV4)) { 1144 h.ipv4->time_to_live = cqe->lro_min_ttl; 1145 h.ipv4->total_length = rte_cpu_to_be_16(len - (h.hdr - padd)); 1146 h.ipv4->hdr_checksum = 0; 1147 h.ipv4->hdr_checksum = rte_ipv4_cksum(h.ipv4); 1148 phcsum = rte_ipv4_phdr_cksum(h.ipv4, 0); 1149 h.ipv4++; 1150 } else { 1151 h.ipv6->hop_limits = cqe->lro_min_ttl; 1152 h.ipv6->payload_len = rte_cpu_to_be_16(len - (h.hdr - padd) - 1153 sizeof(*h.ipv6)); 1154 phcsum = rte_ipv6_phdr_cksum(h.ipv6, 0); 1155 h.ipv6++; 1156 } 1157 if (mcqe == NULL || 1158 rxq->mcqe_format != MLX5_CQE_RESP_FORMAT_L34H_STRIDX) 1159 l4_type = (rte_be_to_cpu_16(cqe->hdr_type_etc) & 1160 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1161 else 1162 l4_type = (rte_be_to_cpu_16(mcqe->hdr_type) & 1163 MLX5_CQE_L4_TYPE_MASK) >> MLX5_CQE_L4_TYPE_SHIFT; 1164 mlx5_lro_update_tcp_hdr(h.tcp, cqe, phcsum, l4_type); 1165 } 1166 1167 void 1168 mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf) 1169 { 1170 mlx5_mprq_buf_free_cb(NULL, buf); 1171 } 1172 1173 /** 1174 * DPDK callback for RX with Multi-Packet RQ support. 1175 * 1176 * @param dpdk_rxq 1177 * Generic pointer to RX queue structure. 1178 * @param[out] pkts 1179 * Array to store received packets. 1180 * @param pkts_n 1181 * Maximum number of packets in array. 1182 * 1183 * @return 1184 * Number of packets successfully received (<= pkts_n). 1185 */ 1186 uint16_t 1187 mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 1188 { 1189 struct mlx5_rxq_data *rxq = dpdk_rxq; 1190 const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); 1191 const uint32_t strd_sz = RTE_BIT32(rxq->log_strd_sz); 1192 const uint32_t cqe_n = 1 << rxq->cqe_n; 1193 const uint32_t cq_mask = cqe_n - 1; 1194 const uint32_t wqe_n = 1 << rxq->elts_n; 1195 const uint32_t wq_mask = wqe_n - 1; 1196 volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1197 unsigned int i = 0; 1198 uint32_t rq_ci = rxq->rq_ci; 1199 uint16_t consumed_strd = rxq->consumed_strd; 1200 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1201 1202 while (i < pkts_n) { 1203 struct rte_mbuf *pkt; 1204 int ret; 1205 uint32_t len; 1206 uint16_t strd_cnt; 1207 uint16_t strd_idx; 1208 uint32_t byte_cnt; 1209 uint16_t skip_cnt; 1210 volatile struct mlx5_mini_cqe8 *mcqe = NULL; 1211 enum mlx5_rqx_code rxq_code; 1212 1213 if (consumed_strd == strd_n) { 1214 /* Replace WQE if the buffer is still in use. */ 1215 mprq_buf_replace(rxq, rq_ci & wq_mask); 1216 /* Advance to the next WQE. */ 1217 consumed_strd = 0; 1218 ++rq_ci; 1219 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1220 } 1221 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1222 ret = mlx5_rx_poll_len(rxq, cqe, cqe_n, cq_mask, &mcqe, &skip_cnt, true); 1223 if (unlikely(ret & MLX5_ERROR_CQE_MASK)) { 1224 if (ret == MLX5_CRITICAL_ERROR_CQE_RET) { 1225 rq_ci = rxq->rq_ci; 1226 consumed_strd = rxq->consumed_strd; 1227 break; 1228 } 1229 consumed_strd += skip_cnt; 1230 while (consumed_strd >= strd_n) { 1231 /* Replace WQE if the buffer is still in use. */ 1232 mprq_buf_replace(rxq, rq_ci & wq_mask); 1233 /* Advance to the next WQE. */ 1234 consumed_strd -= strd_n; 1235 ++rq_ci; 1236 buf = (*rxq->mprq_bufs)[rq_ci & wq_mask]; 1237 } 1238 cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask]; 1239 } 1240 if (ret == 0) 1241 break; 1242 byte_cnt = ret; 1243 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1244 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1245 if (rxq->crc_present) 1246 len -= RTE_ETHER_CRC_LEN; 1247 if (mcqe && 1248 rxq->mcqe_format == MLX5_CQE_RESP_FORMAT_FTAG_STRIDX) 1249 strd_cnt = (len / strd_sz) + !!(len % strd_sz); 1250 else 1251 strd_cnt = (byte_cnt & MLX5_MPRQ_STRIDE_NUM_MASK) >> 1252 MLX5_MPRQ_STRIDE_NUM_SHIFT; 1253 MLX5_ASSERT(strd_cnt); 1254 consumed_strd += strd_cnt; 1255 if (byte_cnt & MLX5_MPRQ_FILLER_MASK) 1256 continue; 1257 if (rxq->cqe_comp_layout && mcqe) 1258 cqe = &rxq->title_cqe; 1259 strd_idx = rte_be_to_cpu_16(mcqe == NULL ? 1260 cqe->wqe_counter : 1261 mcqe->stride_idx); 1262 MLX5_ASSERT(strd_idx < strd_n); 1263 MLX5_ASSERT(!((rte_be_to_cpu_16(cqe->wqe_id) ^ rq_ci) & 1264 wq_mask)); 1265 pkt = rte_pktmbuf_alloc(rxq->mp); 1266 if (unlikely(pkt == NULL)) { 1267 ++rxq->stats.rx_nombuf; 1268 break; 1269 } 1270 len = (byte_cnt & MLX5_MPRQ_LEN_MASK) >> MLX5_MPRQ_LEN_SHIFT; 1271 MLX5_ASSERT((int)len >= (rxq->crc_present << 2)); 1272 if (rxq->crc_present) 1273 len -= RTE_ETHER_CRC_LEN; 1274 rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf, 1275 strd_idx, strd_cnt); 1276 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 1277 rte_pktmbuf_free_seg(pkt); 1278 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 1279 ++rxq->stats.idropped; 1280 continue; 1281 } 1282 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 1283 ++rxq->stats.rx_nombuf; 1284 break; 1285 } 1286 } 1287 rxq_cq_to_mbuf(rxq, pkt, cqe, mcqe); 1288 if (cqe->lro_num_seg > 1) { 1289 mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *), 1290 cqe, mcqe, rxq, len); 1291 pkt->ol_flags |= RTE_MBUF_F_RX_LRO; 1292 pkt->tso_segsz = len / cqe->lro_num_seg; 1293 } 1294 PKT_LEN(pkt) = len; 1295 PORT(pkt) = rxq->port_id; 1296 #ifdef MLX5_PMD_SOFT_COUNTERS 1297 /* Increment bytes counter. */ 1298 rxq->stats.ibytes += PKT_LEN(pkt); 1299 #endif 1300 /* Return packet. */ 1301 *(pkts++) = pkt; 1302 ++i; 1303 } 1304 /* Update the consumer indexes. */ 1305 rxq->consumed_strd = consumed_strd; 1306 rte_io_wmb(); 1307 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 1308 if (rq_ci != rxq->rq_ci) { 1309 rxq->rq_ci = rq_ci; 1310 rte_io_wmb(); 1311 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 1312 } 1313 #ifdef MLX5_PMD_SOFT_COUNTERS 1314 /* Increment packets counter. */ 1315 rxq->stats.ipackets += i; 1316 #endif 1317 return i; 1318 } 1319 1320 int 1321 mlx5_rx_queue_lwm_query(struct rte_eth_dev *dev, 1322 uint16_t *queue_id, uint8_t *lwm) 1323 { 1324 struct mlx5_priv *priv = dev->data->dev_private; 1325 unsigned int rxq_id, found = 0, n; 1326 struct mlx5_rxq_priv *rxq; 1327 1328 if (!queue_id) 1329 return -EINVAL; 1330 /* Query all the Rx queues of the port in a circular way. */ 1331 for (rxq_id = *queue_id, n = 0; n < priv->rxqs_n; n++) { 1332 rxq = mlx5_rxq_get(dev, rxq_id); 1333 if (rxq && rxq->lwm_event_pending) { 1334 pthread_mutex_lock(&priv->sh->lwm_config_lock); 1335 rxq->lwm_event_pending = 0; 1336 pthread_mutex_unlock(&priv->sh->lwm_config_lock); 1337 *queue_id = rxq_id; 1338 found = 1; 1339 if (lwm) 1340 *lwm = mlx5_rxq_lwm_to_percentage(rxq); 1341 break; 1342 } 1343 rxq_id = (rxq_id + 1) % priv->rxqs_n; 1344 } 1345 return found; 1346 } 1347 1348 /** 1349 * Rte interrupt handler for LWM event. 1350 * It first checks if the event arrives, if so process the callback for 1351 * RTE_ETH_EVENT_RX_LWM. 1352 * 1353 * @param args 1354 * Generic pointer to mlx5_priv. 1355 */ 1356 void 1357 mlx5_dev_interrupt_handler_lwm(void *args) 1358 { 1359 struct mlx5_priv *priv = args; 1360 struct mlx5_rxq_priv *rxq; 1361 struct rte_eth_dev *dev; 1362 int ret, rxq_idx = 0, port_id = 0; 1363 1364 ret = priv->obj_ops.rxq_event_get_lwm(priv, &rxq_idx, &port_id); 1365 if (unlikely(ret < 0)) { 1366 DRV_LOG(WARNING, "Cannot get LWM event context."); 1367 return; 1368 } 1369 DRV_LOG(INFO, "%s get LWM event, port_id:%d rxq_id:%d.", __func__, 1370 port_id, rxq_idx); 1371 dev = &rte_eth_devices[port_id]; 1372 rxq = mlx5_rxq_get(dev, rxq_idx); 1373 if (rxq) { 1374 pthread_mutex_lock(&priv->sh->lwm_config_lock); 1375 rxq->lwm_event_pending = 1; 1376 pthread_mutex_unlock(&priv->sh->lwm_config_lock); 1377 } 1378 rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_RX_AVAIL_THRESH, NULL); 1379 } 1380 1381 /** 1382 * DPDK callback to arm an Rx queue LWM(limit watermark) event. 1383 * While the Rx queue fullness reaches the LWM limit, the driver catches 1384 * an HW event and invokes the user event callback. 1385 * After the last event handling, the user needs to call this API again 1386 * to arm an additional event. 1387 * 1388 * @param dev 1389 * Pointer to the device structure. 1390 * @param[in] rx_queue_id 1391 * Rx queue identificator. 1392 * @param[in] lwm 1393 * The LWM value, is defined by a percentage of the Rx queue size. 1394 * [1-99] to set a new LWM (update the old value). 1395 * 0 to unarm the event. 1396 * 1397 * @return 1398 * 0 : operation success. 1399 * Otherwise: 1400 * - ENOMEM - not enough memory to create LWM event channel. 1401 * - EINVAL - the input Rxq is not created by devx. 1402 * - E2BIG - lwm is bigger than 99. 1403 */ 1404 int 1405 mlx5_rx_queue_lwm_set(struct rte_eth_dev *dev, uint16_t rx_queue_id, 1406 uint8_t lwm) 1407 { 1408 struct mlx5_priv *priv = dev->data->dev_private; 1409 uint16_t port_id = PORT_ID(priv); 1410 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id); 1411 uint16_t event_nums[1] = {MLX5_EVENT_TYPE_SRQ_LIMIT_REACHED}; 1412 struct mlx5_rxq_data *rxq_data; 1413 uint32_t wqe_cnt; 1414 uint64_t cookie; 1415 int ret = 0; 1416 1417 if (!rxq) { 1418 rte_errno = EINVAL; 1419 return -rte_errno; 1420 } 1421 rxq_data = &rxq->ctrl->rxq; 1422 /* Ensure the Rq is created by devx. */ 1423 if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) { 1424 rte_errno = EINVAL; 1425 return -rte_errno; 1426 } 1427 if (lwm > 99) { 1428 DRV_LOG(WARNING, "Too big LWM configuration."); 1429 rte_errno = E2BIG; 1430 return -rte_errno; 1431 } 1432 /* Start config LWM. */ 1433 pthread_mutex_lock(&priv->sh->lwm_config_lock); 1434 if (rxq->lwm == 0 && lwm == 0) { 1435 /* Both old/new values are 0, do nothing. */ 1436 ret = 0; 1437 goto end; 1438 } 1439 wqe_cnt = 1 << (rxq_data->elts_n - rxq_data->sges_n); 1440 if (lwm) { 1441 if (!priv->sh->devx_channel_lwm) { 1442 ret = mlx5_lwm_setup(priv); 1443 if (ret) { 1444 DRV_LOG(WARNING, 1445 "Failed to create shared_lwm."); 1446 rte_errno = ENOMEM; 1447 ret = -rte_errno; 1448 goto end; 1449 } 1450 } 1451 if (!rxq->lwm_devx_subscribed) { 1452 cookie = ((uint32_t) 1453 (port_id << LWM_COOKIE_PORTID_OFFSET)) | 1454 (rx_queue_id << LWM_COOKIE_RXQID_OFFSET); 1455 ret = mlx5_os_devx_subscribe_devx_event 1456 (priv->sh->devx_channel_lwm, 1457 rxq->devx_rq.rq->obj, 1458 sizeof(event_nums), 1459 event_nums, 1460 cookie); 1461 if (ret) { 1462 rte_errno = rte_errno ? rte_errno : EINVAL; 1463 ret = -rte_errno; 1464 goto end; 1465 } 1466 rxq->lwm_devx_subscribed = 1; 1467 } 1468 } 1469 /* Save LWM to rxq and send modify_rq devx command. */ 1470 rxq->lwm = lwm * wqe_cnt / 100; 1471 /* Prevent integer division loss when switch lwm number to percentage. */ 1472 if (lwm && (lwm * wqe_cnt % 100)) { 1473 rxq->lwm = ((uint32_t)(rxq->lwm + 1) >= wqe_cnt) ? 1474 rxq->lwm : (rxq->lwm + 1); 1475 } 1476 if (lwm && !rxq->lwm) { 1477 /* With mprq, wqe_cnt may be < 100. */ 1478 DRV_LOG(WARNING, "Too small LWM configuration."); 1479 rte_errno = EINVAL; 1480 ret = -rte_errno; 1481 goto end; 1482 } 1483 ret = mlx5_devx_modify_rq(rxq, MLX5_RXQ_MOD_RDY2RDY); 1484 end: 1485 pthread_mutex_unlock(&priv->sh->lwm_config_lock); 1486 return ret; 1487 } 1488 1489 /** 1490 * Mlx5 access register function to configure host shaper. 1491 * It calls API in libmtcr_ul to access QSHR(Qos Shaper Host Register) 1492 * in firmware. 1493 * 1494 * @param dev 1495 * Pointer to rte_eth_dev. 1496 * @param lwm_triggered 1497 * Flag to enable/disable lwm_triggered bit in QSHR. 1498 * @param rate 1499 * Host shaper rate, unit is 100Mbps, set to 0 means disable the shaper. 1500 * @return 1501 * 0 : operation success. 1502 * Otherwise: 1503 * - ENOENT - no ibdev interface. 1504 * - EBUSY - the register access unit is busy. 1505 * - EIO - the register access command meets IO error. 1506 */ 1507 static int 1508 mlxreg_host_shaper_config(struct rte_eth_dev *dev, 1509 bool lwm_triggered, uint8_t rate) 1510 { 1511 #ifdef HAVE_MLX5_MSTFLINT 1512 struct mlx5_priv *priv = dev->data->dev_private; 1513 uint32_t data[MLX5_ST_SZ_DW(register_qshr)] = {0}; 1514 int rc, retry_count = 3; 1515 mfile *mf = NULL; 1516 int status; 1517 void *ptr; 1518 1519 mf = mopen(priv->sh->ibdev_name); 1520 if (!mf) { 1521 DRV_LOG(WARNING, "mopen failed\n"); 1522 rte_errno = ENOENT; 1523 return -rte_errno; 1524 } 1525 MLX5_SET(register_qshr, data, connected_host, 1); 1526 MLX5_SET(register_qshr, data, fast_response, lwm_triggered ? 1 : 0); 1527 MLX5_SET(register_qshr, data, local_port, 1); 1528 ptr = MLX5_ADDR_OF(register_qshr, data, global_config); 1529 MLX5_SET(ets_global_config_register, ptr, rate_limit_update, 1); 1530 MLX5_SET(ets_global_config_register, ptr, max_bw_units, 1531 rate ? ETS_GLOBAL_CONFIG_BW_UNIT_HUNDREDS_MBPS : 1532 ETS_GLOBAL_CONFIG_BW_UNIT_DISABLED); 1533 MLX5_SET(ets_global_config_register, ptr, max_bw_value, rate); 1534 do { 1535 rc = maccess_reg(mf, 1536 MLX5_QSHR_REGISTER_ID, 1537 MACCESS_REG_METHOD_SET, 1538 (u_int32_t *)&data[0], 1539 sizeof(data), 1540 sizeof(data), 1541 sizeof(data), 1542 &status); 1543 if ((rc != ME_ICMD_STATUS_IFC_BUSY && 1544 status != ME_REG_ACCESS_BAD_PARAM) || 1545 !(mf->flags & MDEVS_REM)) { 1546 break; 1547 } 1548 DRV_LOG(WARNING, "%s retry.", __func__); 1549 usleep(10000); 1550 } while (retry_count-- > 0); 1551 mclose(mf); 1552 rte_errno = (rc == ME_REG_ACCESS_DEV_BUSY) ? EBUSY : EIO; 1553 return rc ? -rte_errno : 0; 1554 #else 1555 (void)dev; 1556 (void)lwm_triggered; 1557 (void)rate; 1558 return -1; 1559 #endif 1560 } 1561 1562 int rte_pmd_mlx5_host_shaper_config(int port_id, uint8_t rate, 1563 uint32_t flags) 1564 { 1565 struct rte_eth_dev *dev = &rte_eth_devices[port_id]; 1566 struct mlx5_priv *priv = dev->data->dev_private; 1567 bool lwm_triggered = 1568 !!(flags & RTE_BIT32(RTE_PMD_MLX5_HOST_SHAPER_FLAG_AVAIL_THRESH_TRIGGERED)); 1569 1570 if (!lwm_triggered) { 1571 priv->sh->host_shaper_rate = rate; 1572 } else { 1573 switch (rate) { 1574 case 0: 1575 /* Rate 0 means disable lwm_triggered. */ 1576 priv->sh->lwm_triggered = 0; 1577 break; 1578 case 1: 1579 /* Rate 1 means enable lwm_triggered. */ 1580 priv->sh->lwm_triggered = 1; 1581 break; 1582 default: 1583 return -ENOTSUP; 1584 } 1585 } 1586 return mlxreg_host_shaper_config(dev, priv->sh->lwm_triggered, 1587 priv->sh->host_shaper_rate); 1588 } 1589 1590 /** 1591 * Dump RQ/CQ Context to a file. 1592 * 1593 * @param[in] port_id 1594 * Port ID 1595 * @param[in] queue_id 1596 * Queue ID 1597 * @param[in] filename 1598 * Name of file to dump the Rx Queue Context 1599 * 1600 * @return 1601 * 0 for Success, non-zero value depending on failure type 1602 */ 1603 int rte_pmd_mlx5_rxq_dump_contexts(uint16_t port_id, uint16_t queue_id, const char *filename) 1604 { 1605 struct rte_eth_dev *dev; 1606 struct mlx5_rxq_priv *rxq; 1607 struct mlx5_rxq_ctrl *rxq_ctrl; 1608 struct mlx5_rxq_obj *rxq_obj; 1609 struct mlx5_devx_rq *rq; 1610 struct mlx5_devx_cq *cq; 1611 struct mlx5_devx_obj *rq_devx_obj; 1612 struct mlx5_devx_obj *cq_devx_obj; 1613 1614 uint32_t rq_out[MLX5_ST_SZ_DW(query_rq_out)] = {0}; 1615 uint32_t cq_out[MLX5_ST_SZ_DW(query_cq_out)] = {0}; 1616 1617 int ret; 1618 FILE *fd; 1619 MKSTR(path, "./%s", filename); 1620 1621 if (!rte_eth_dev_is_valid_port(port_id)) 1622 return -ENODEV; 1623 1624 if (rte_eth_rx_queue_is_valid(port_id, queue_id)) 1625 return -EINVAL; 1626 1627 fd = fopen(path, "w"); 1628 if (!fd) { 1629 rte_errno = errno; 1630 return -EIO; 1631 } 1632 1633 dev = &rte_eth_devices[port_id]; 1634 rxq = mlx5_rxq_ref(dev, queue_id); 1635 rxq_ctrl = rxq->ctrl; 1636 rxq_obj = rxq_ctrl->obj; 1637 rq = &rxq->devx_rq; 1638 cq = &rxq_obj->cq_obj; 1639 rq_devx_obj = rq->rq; 1640 cq_devx_obj = cq->cq; 1641 1642 do { 1643 ret = mlx5_devx_cmd_query_rq(rq_devx_obj, rq_out, sizeof(rq_out)); 1644 if (ret) 1645 break; 1646 1647 /* Dump rq query output to file */ 1648 MKSTR(rq_headline, "RQ DevX ID = %u Port = %u Queue index = %u ", 1649 rq_devx_obj->id, port_id, queue_id); 1650 mlx5_dump_to_file(fd, NULL, rq_headline, 0); 1651 mlx5_dump_to_file(fd, "Query RQ Dump:", 1652 (const void *)((uintptr_t)rq_out), 1653 sizeof(rq_out)); 1654 1655 ret = mlx5_devx_cmd_query_cq(cq_devx_obj, cq_out, sizeof(cq_out)); 1656 if (ret) 1657 break; 1658 1659 /* Dump cq query output to file */ 1660 MKSTR(cq_headline, "CQ DevX ID = %u Port = %u Queue index = %u ", 1661 cq_devx_obj->id, port_id, queue_id); 1662 mlx5_dump_to_file(fd, NULL, cq_headline, 0); 1663 mlx5_dump_to_file(fd, "Query CQ Dump:", 1664 (const void *)((uintptr_t)cq_out), 1665 sizeof(cq_out)); 1666 } while (false); 1667 1668 fclose(fd); 1669 return ret; 1670 } 1671