1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_vect.h> 14 15 #include <mlx5_glue.h> 16 #include <mlx5_prm.h> 17 18 #include "mlx5_defs.h" 19 #include "mlx5.h" 20 #include "mlx5_utils.h" 21 #include "mlx5_rxtx.h" 22 #include "mlx5_rx.h" 23 #include "mlx5_rxtx_vec.h" 24 #include "mlx5_autoconf.h" 25 26 #if defined RTE_ARCH_X86_64 27 #include "mlx5_rxtx_vec_sse.h" 28 #elif defined RTE_ARCH_ARM64 29 #include "mlx5_rxtx_vec_neon.h" 30 #elif defined RTE_ARCH_PPC_64 31 #include "mlx5_rxtx_vec_altivec.h" 32 #else 33 #error "This should not be compiled if SIMD instructions are not supported." 34 #endif 35 36 /** 37 * Skip error packets. 38 * 39 * @param rxq 40 * Pointer to RX queue structure. 41 * @param[out] pkts 42 * Array to store received packets. 43 * @param pkts_n 44 * Maximum number of packets in array. 45 * 46 * @return 47 * Number of packets successfully received (<= pkts_n). 48 */ 49 static uint16_t 50 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 51 uint16_t pkts_n) 52 { 53 uint16_t n = 0; 54 uint16_t skip_cnt; 55 unsigned int i; 56 #ifdef MLX5_PMD_SOFT_COUNTERS 57 uint32_t err_bytes = 0; 58 #endif 59 60 for (i = 0; i < pkts_n; ++i) { 61 struct rte_mbuf *pkt = pkts[i]; 62 63 if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) { 64 #ifdef MLX5_PMD_SOFT_COUNTERS 65 err_bytes += PKT_LEN(pkt); 66 #endif 67 rte_pktmbuf_free_seg(pkt); 68 } else { 69 pkts[n++] = pkt; 70 } 71 } 72 rxq->stats.idropped += (pkts_n - n); 73 #ifdef MLX5_PMD_SOFT_COUNTERS 74 /* Correct counters of errored completions. */ 75 rxq->stats.ipackets -= (pkts_n - n); 76 rxq->stats.ibytes -= err_bytes; 77 #endif 78 mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt); 79 return n; 80 } 81 82 /** 83 * Replenish buffers for RX in bulk. 84 * 85 * @param rxq 86 * Pointer to RX queue structure. 87 */ 88 static inline void 89 mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq) 90 { 91 const uint16_t q_n = 1 << rxq->elts_n; 92 const uint16_t q_mask = q_n - 1; 93 uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi); 94 uint16_t elts_idx = rxq->rq_ci & q_mask; 95 struct rte_mbuf **elts = &(*rxq->elts)[elts_idx]; 96 volatile struct mlx5_wqe_data_seg *wq = 97 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx]; 98 unsigned int i; 99 100 if (n >= rxq->rq_repl_thresh) { 101 MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n)); 102 MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) > 103 MLX5_VPMD_DESCS_PER_LOOP); 104 /* Not to cross queue end. */ 105 n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx); 106 if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) { 107 rxq->stats.rx_nombuf += n; 108 return; 109 } 110 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) { 111 for (i = 0; i < n; ++i) { 112 /* 113 * In order to support the mbufs with external attached 114 * data buffer we should use the buf_addr pointer 115 * instead of rte_mbuf_buf_addr(). It touches the mbuf 116 * itself and may impact the performance. 117 */ 118 void *buf_addr = elts[i]->buf_addr; 119 120 wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr + 121 RTE_PKTMBUF_HEADROOM); 122 wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]); 123 } 124 } else { 125 for (i = 0; i < n; ++i) { 126 void *buf_addr = elts[i]->buf_addr; 127 128 wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr + 129 RTE_PKTMBUF_HEADROOM); 130 } 131 } 132 rxq->rq_ci += n; 133 /* Prevent overflowing into consumed mbufs. */ 134 elts_idx = rxq->rq_ci & q_mask; 135 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 136 (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf; 137 rte_io_wmb(); 138 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 139 } 140 } 141 142 /** 143 * Replenish buffers for MPRQ RX in bulk. 144 * 145 * @param rxq 146 * Pointer to RX queue structure. 147 */ 148 static inline void 149 mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq) 150 { 151 const uint16_t wqe_n = 1 << rxq->elts_n; 152 const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); 153 const uint32_t elts_n = wqe_n * strd_n; 154 const uint32_t wqe_mask = elts_n - 1; 155 uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi); 156 uint32_t elts_idx = rxq->elts_ci & wqe_mask; 157 struct rte_mbuf **elts = &(*rxq->elts)[elts_idx]; 158 unsigned int i; 159 160 if (n >= rxq->rq_repl_thresh && 161 rxq->elts_ci - rxq->rq_pi <= 162 rxq->rq_repl_thresh + MLX5_VPMD_RX_MAX_BURST) { 163 MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n)); 164 MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) > 165 MLX5_VPMD_DESCS_PER_LOOP); 166 /* Not to cross queue end. */ 167 n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx); 168 /* Limit replenish number to threshold value. */ 169 n = RTE_MIN(n, rxq->rq_repl_thresh); 170 if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) { 171 rxq->stats.rx_nombuf += n; 172 return; 173 } 174 rxq->elts_ci += n; 175 /* Prevent overflowing into consumed mbufs. */ 176 elts_idx = rxq->elts_ci & wqe_mask; 177 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 178 (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf; 179 } 180 } 181 182 /** 183 * Copy or attach MPRQ buffers to RX SW ring. 184 * 185 * @param rxq 186 * Pointer to RX queue structure. 187 * @param pkts 188 * Pointer to array of packets to be stored. 189 * @param pkts_n 190 * Number of packets to be stored. 191 * 192 * @return 193 * Number of packets successfully copied/attached (<= pkts_n). 194 */ 195 static inline uint16_t 196 rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq, 197 struct rte_mbuf **pkts, uint16_t pkts_n) 198 { 199 const uint16_t wqe_n = 1 << rxq->elts_n; 200 const uint16_t wqe_mask = wqe_n - 1; 201 const uint16_t strd_sz = RTE_BIT32(rxq->log_strd_sz); 202 const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); 203 const uint32_t elts_n = wqe_n * strd_n; 204 const uint32_t elts_mask = elts_n - 1; 205 uint32_t elts_idx = rxq->rq_pi & elts_mask; 206 struct rte_mbuf **elts = &(*rxq->elts)[elts_idx]; 207 uint32_t rq_ci = rxq->rq_ci; 208 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask]; 209 uint16_t copied = 0; 210 uint16_t i = 0; 211 212 for (i = 0; i < pkts_n; ++i) { 213 uint16_t strd_cnt; 214 enum mlx5_rqx_code rxq_code; 215 216 if (rxq->consumed_strd == strd_n) { 217 /* Replace WQE if the buffer is still in use. */ 218 mprq_buf_replace(rxq, rq_ci & wqe_mask); 219 /* Advance to the next WQE. */ 220 rxq->consumed_strd = 0; 221 rq_ci++; 222 buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask]; 223 } 224 225 if (!elts[i]->pkt_len) { 226 rxq->consumed_strd = strd_n; 227 rte_pktmbuf_free_seg(elts[i]); 228 #ifdef MLX5_PMD_SOFT_COUNTERS 229 rxq->stats.ipackets -= 1; 230 #endif 231 continue; 232 } 233 strd_cnt = (elts[i]->pkt_len / strd_sz) + 234 ((elts[i]->pkt_len % strd_sz) ? 1 : 0); 235 rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len, 236 buf, rxq->consumed_strd, strd_cnt); 237 rxq->consumed_strd += strd_cnt; 238 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 239 rte_pktmbuf_free_seg(elts[i]); 240 #ifdef MLX5_PMD_SOFT_COUNTERS 241 rxq->stats.ipackets -= 1; 242 rxq->stats.ibytes -= elts[i]->pkt_len; 243 #endif 244 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 245 ++rxq->stats.rx_nombuf; 246 break; 247 } 248 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 249 ++rxq->stats.idropped; 250 continue; 251 } 252 } 253 pkts[copied++] = elts[i]; 254 } 255 rxq->rq_pi += i; 256 rxq->cq_ci += i; 257 if (rq_ci != rxq->rq_ci) { 258 rxq->rq_ci = rq_ci; 259 rte_io_wmb(); 260 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 261 } 262 return copied; 263 } 264 265 /** 266 * Receive burst of packets. An errored completion also consumes a mbuf, but the 267 * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed 268 * before returning to application. 269 * 270 * @param rxq 271 * Pointer to RX queue structure. 272 * @param[out] pkts 273 * Array to store received packets. 274 * @param pkts_n 275 * Maximum number of packets in array. 276 * @param[out] err 277 * Pointer to a flag. Set non-zero value if pkts array has at least one error 278 * packet to handle. 279 * @param[out] no_cq 280 * Pointer to a boolean. Set true if no new CQE seen. 281 * 282 * @return 283 * Number of packets received including errors (<= pkts_n). 284 */ 285 static inline uint16_t 286 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 287 uint16_t pkts_n, uint64_t *err, bool *no_cq) 288 { 289 const uint16_t q_n = 1 << rxq->cqe_n; 290 const uint16_t q_mask = q_n - 1; 291 const uint16_t e_n = 1 << rxq->elts_n; 292 const uint16_t e_mask = e_n - 1; 293 volatile struct mlx5_cqe *cq, *next; 294 struct rte_mbuf **elts; 295 uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP; 296 uint16_t nocmp_n = 0; 297 uint16_t rcvd_pkt = 0; 298 unsigned int cq_idx = rxq->cq_ci & q_mask; 299 unsigned int elts_idx; 300 int ret; 301 302 MLX5_ASSERT(rxq->sges_n == 0); 303 MLX5_ASSERT(rxq->cqe_n == rxq->elts_n); 304 cq = &(*rxq->cqes)[cq_idx]; 305 rte_prefetch0(cq); 306 rte_prefetch0(cq + 1); 307 rte_prefetch0(cq + 2); 308 rte_prefetch0(cq + 3); 309 pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST); 310 mlx5_rx_replenish_bulk_mbuf(rxq); 311 /* See if there're unreturned mbufs from compressed CQE. */ 312 rcvd_pkt = rxq->decompressed; 313 if (rcvd_pkt > 0) { 314 rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n); 315 rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask], 316 pkts, rcvd_pkt); 317 rxq->rq_pi += rcvd_pkt; 318 rxq->decompressed -= rcvd_pkt; 319 pkts += rcvd_pkt; 320 } 321 elts_idx = rxq->rq_pi & e_mask; 322 elts = &(*rxq->elts)[elts_idx]; 323 /* Not to overflow pkts array. */ 324 pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP); 325 /* Not to cross queue end. */ 326 pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); 327 pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); 328 /* Not to move past the allocated mbufs. */ 329 pkts_n = RTE_MIN(pkts_n, RTE_ALIGN_FLOOR(rxq->rq_ci - rxq->rq_pi, 330 MLX5_VPMD_DESCS_PER_LOOP)); 331 if (!pkts_n) { 332 *no_cq = !rcvd_pkt; 333 return rcvd_pkt; 334 } 335 /* At this point, there shouldn't be any remaining packets. */ 336 MLX5_ASSERT(rxq->decompressed == 0); 337 /* Go directly to unzipping in case the first CQE is compressed. */ 338 if (rxq->cqe_comp_layout) { 339 ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci); 340 if (ret == MLX5_CQE_STATUS_SW_OWN && 341 (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) { 342 comp_idx = 0; 343 goto decompress; 344 } 345 } 346 /* Process all the CQEs */ 347 nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx); 348 /* If no new CQE seen, return without updating cq_db. */ 349 if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { 350 *no_cq = true; 351 return rcvd_pkt; 352 } 353 /* Update the consumer indexes for non-compressed CQEs. */ 354 MLX5_ASSERT(nocmp_n <= pkts_n); 355 rxq->cq_ci += nocmp_n; 356 rxq->rq_pi += nocmp_n; 357 rcvd_pkt += nocmp_n; 358 /* Copy title packet for future compressed sessions. */ 359 if (rxq->cqe_comp_layout) { 360 ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci); 361 if (ret == MLX5_CQE_STATUS_SW_OWN && 362 (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) { 363 next = &(*rxq->cqes)[rxq->cq_ci & q_mask]; 364 ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci); 365 if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED || 366 ret != MLX5_CQE_STATUS_SW_OWN) 367 rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1], 368 sizeof(struct rte_mbuf)); 369 } 370 } 371 decompress: 372 /* Decompress the last CQE if compressed. */ 373 if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) { 374 MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); 375 rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], 376 &elts[nocmp_n], true); 377 rxq->cq_ci += rxq->decompressed; 378 /* Return more packets if needed. */ 379 if (nocmp_n < pkts_n) { 380 uint16_t n = rxq->decompressed; 381 382 n = RTE_MIN(n, pkts_n - nocmp_n); 383 rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask], 384 &pkts[nocmp_n], n); 385 rxq->rq_pi += n; 386 rcvd_pkt += n; 387 rxq->decompressed -= n; 388 } 389 } 390 *no_cq = !rcvd_pkt; 391 return rcvd_pkt; 392 } 393 394 /** 395 * DPDK callback for vectorized RX. 396 * 397 * @param dpdk_rxq 398 * Generic pointer to RX queue structure. 399 * @param[out] pkts 400 * Array to store received packets. 401 * @param pkts_n 402 * Maximum number of packets in array. 403 * 404 * @return 405 * Number of packets successfully received (<= pkts_n). 406 */ 407 uint16_t 408 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 409 { 410 struct mlx5_rxq_data *rxq = dpdk_rxq; 411 uint16_t nb_rx = 0; 412 uint16_t tn = 0; 413 uint64_t err = 0; 414 bool no_cq = false; 415 416 do { 417 err = 0; 418 nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn, 419 &err, &no_cq); 420 if (unlikely(err | rxq->err_state)) 421 nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx); 422 tn += nb_rx; 423 if (unlikely(no_cq)) 424 break; 425 rte_io_wmb(); 426 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 427 } while (tn != pkts_n); 428 return tn; 429 } 430 431 /** 432 * Receive burst of packets. An errored completion also consumes a mbuf, but the 433 * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed 434 * before returning to application. 435 * 436 * @param rxq 437 * Pointer to RX queue structure. 438 * @param[out] pkts 439 * Array to store received packets. 440 * @param pkts_n 441 * Maximum number of packets in array. 442 * @param[out] err 443 * Pointer to a flag. Set non-zero value if pkts array has at least one error 444 * packet to handle. 445 * @param[out] no_cq 446 * Pointer to a boolean. Set true if no new CQE seen. 447 * 448 * @return 449 * Number of packets received including errors (<= pkts_n). 450 */ 451 static inline uint16_t 452 rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 453 uint16_t pkts_n, uint64_t *err, bool *no_cq) 454 { 455 const uint16_t q_n = 1 << rxq->cqe_n; 456 const uint16_t q_mask = q_n - 1; 457 const uint16_t wqe_n = 1 << rxq->elts_n; 458 const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); 459 const uint32_t elts_n = wqe_n * strd_n; 460 const uint32_t elts_mask = elts_n - 1; 461 volatile struct mlx5_cqe *cq, *next; 462 struct rte_mbuf **elts; 463 uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP; 464 uint16_t nocmp_n = 0; 465 uint16_t rcvd_pkt = 0; 466 uint16_t cp_pkt = 0; 467 unsigned int cq_idx = rxq->cq_ci & q_mask; 468 unsigned int elts_idx; 469 int ret; 470 471 MLX5_ASSERT(rxq->sges_n == 0); 472 cq = &(*rxq->cqes)[cq_idx]; 473 rte_prefetch0(cq); 474 rte_prefetch0(cq + 1); 475 rte_prefetch0(cq + 2); 476 rte_prefetch0(cq + 3); 477 pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST); 478 mlx5_rx_mprq_replenish_bulk_mbuf(rxq); 479 /* Not to move past the allocated mbufs. */ 480 pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi); 481 /* See if there're unreturned mbufs from compressed CQE. */ 482 rcvd_pkt = rxq->decompressed; 483 if (rcvd_pkt > 0) { 484 rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n); 485 cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt); 486 rxq->decompressed -= rcvd_pkt; 487 pkts += cp_pkt; 488 } 489 elts_idx = rxq->rq_pi & elts_mask; 490 elts = &(*rxq->elts)[elts_idx]; 491 /* Not to overflow pkts array. */ 492 pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP); 493 /* Not to cross queue end. */ 494 pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx); 495 pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); 496 if (!pkts_n) { 497 *no_cq = !cp_pkt; 498 return cp_pkt; 499 } 500 /* At this point, there shouldn't be any remaining packets. */ 501 MLX5_ASSERT(rxq->decompressed == 0); 502 /* Go directly to unzipping in case the first CQE is compressed. */ 503 if (rxq->cqe_comp_layout) { 504 ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci); 505 if (ret == MLX5_CQE_STATUS_SW_OWN && 506 (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) { 507 comp_idx = 0; 508 goto decompress; 509 } 510 } 511 /* Process all the CQEs */ 512 nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx); 513 /* If no new CQE seen, return without updating cq_db. */ 514 if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { 515 *no_cq = true; 516 return cp_pkt; 517 } 518 /* Update the consumer indexes for non-compressed CQEs. */ 519 MLX5_ASSERT(nocmp_n <= pkts_n); 520 cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n); 521 rcvd_pkt += cp_pkt; 522 /* Copy title packet for future compressed sessions. */ 523 if (rxq->cqe_comp_layout) { 524 ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci); 525 if (ret == MLX5_CQE_STATUS_SW_OWN && 526 (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) { 527 next = &(*rxq->cqes)[rxq->cq_ci & q_mask]; 528 ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci); 529 if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED || 530 ret != MLX5_CQE_STATUS_SW_OWN) 531 rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1], 532 sizeof(struct rte_mbuf)); 533 } 534 } 535 decompress: 536 /* Decompress the last CQE if compressed. */ 537 if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) { 538 MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); 539 rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], 540 &elts[nocmp_n], false); 541 /* Return more packets if needed. */ 542 if (nocmp_n < pkts_n) { 543 uint16_t n = rxq->decompressed; 544 545 n = RTE_MIN(n, pkts_n - nocmp_n); 546 cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n); 547 rcvd_pkt += cp_pkt; 548 rxq->decompressed -= n; 549 } 550 } 551 *no_cq = !rcvd_pkt; 552 return rcvd_pkt; 553 } 554 555 /** 556 * DPDK callback for vectorized MPRQ RX. 557 * 558 * @param dpdk_rxq 559 * Generic pointer to RX queue structure. 560 * @param[out] pkts 561 * Array to store received packets. 562 * @param pkts_n 563 * Maximum number of packets in array. 564 * 565 * @return 566 * Number of packets successfully received (<= pkts_n). 567 */ 568 uint16_t 569 mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 570 { 571 struct mlx5_rxq_data *rxq = dpdk_rxq; 572 uint16_t nb_rx = 0; 573 uint16_t tn = 0; 574 uint64_t err = 0; 575 bool no_cq = false; 576 577 do { 578 err = 0; 579 nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn, 580 &err, &no_cq); 581 if (unlikely(err | rxq->err_state)) 582 nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx); 583 tn += nb_rx; 584 if (unlikely(no_cq)) 585 break; 586 rte_io_wmb(); 587 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 588 } while (tn != pkts_n); 589 return tn; 590 } 591 592 /** 593 * Check a RX queue can support vectorized RX. 594 * 595 * @param rxq 596 * Pointer to RX queue. 597 * 598 * @return 599 * 1 if supported, negative errno value if not. 600 */ 601 int __rte_cold 602 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq) 603 { 604 struct mlx5_rxq_ctrl *ctrl = 605 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 606 607 if (!RXQ_PORT(ctrl)->config.rx_vec_en || rxq->sges_n != 0) 608 return -ENOTSUP; 609 if (rxq->lro) 610 return -ENOTSUP; 611 return 1; 612 } 613 614 /** 615 * Check a device can support vectorized RX. 616 * 617 * @param dev 618 * Pointer to Ethernet device. 619 * 620 * @return 621 * 1 if supported, negative errno value if not. 622 */ 623 int __rte_cold 624 mlx5_check_vec_rx_support(struct rte_eth_dev *dev) 625 { 626 struct mlx5_priv *priv = dev->data->dev_private; 627 uint32_t i; 628 629 if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) 630 return -ENOTSUP; 631 if (!priv->config.rx_vec_en) 632 return -ENOTSUP; 633 /* All the configured queues should support. */ 634 for (i = 0; i < priv->rxqs_n; ++i) { 635 struct mlx5_rxq_data *rxq_data = mlx5_rxq_data_get(dev, i); 636 637 if (!rxq_data) 638 continue; 639 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 640 break; 641 } 642 if (i != priv->rxqs_n) 643 return -ENOTSUP; 644 return 1; 645 } 646