1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #include <stdint.h> 7 #include <string.h> 8 #include <stdlib.h> 9 10 #include <rte_mbuf.h> 11 #include <rte_mempool.h> 12 #include <rte_prefetch.h> 13 #include <rte_vect.h> 14 15 #include <mlx5_glue.h> 16 #include <mlx5_prm.h> 17 18 #include "mlx5_defs.h" 19 #include "mlx5.h" 20 #include "mlx5_utils.h" 21 #include "mlx5_rxtx.h" 22 #include "mlx5_rx.h" 23 #include "mlx5_rxtx_vec.h" 24 #include "mlx5_autoconf.h" 25 26 #if defined RTE_ARCH_X86_64 27 #include "mlx5_rxtx_vec_sse.h" 28 #elif defined RTE_ARCH_ARM64 29 #include "mlx5_rxtx_vec_neon.h" 30 #elif defined RTE_ARCH_PPC_64 31 #include "mlx5_rxtx_vec_altivec.h" 32 #else 33 #error "This should not be compiled if SIMD instructions are not supported." 34 #endif 35 36 /** 37 * Skip error packets. 38 * 39 * @param rxq 40 * Pointer to RX queue structure. 41 * @param[out] pkts 42 * Array to store received packets. 43 * @param pkts_n 44 * Maximum number of packets in array. 45 * 46 * @return 47 * Number of packets successfully received (<= pkts_n). 48 */ 49 static uint16_t 50 rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 51 uint16_t pkts_n) 52 { 53 uint16_t n = 0; 54 uint16_t skip_cnt; 55 unsigned int i; 56 #ifdef MLX5_PMD_SOFT_COUNTERS 57 uint32_t err_bytes = 0; 58 #endif 59 60 for (i = 0; i < pkts_n; ++i) { 61 struct rte_mbuf *pkt = pkts[i]; 62 63 if (pkt->packet_type == RTE_PTYPE_ALL_MASK || rxq->err_state) { 64 #ifdef MLX5_PMD_SOFT_COUNTERS 65 err_bytes += PKT_LEN(pkt); 66 #endif 67 rte_pktmbuf_free_seg(pkt); 68 } else { 69 pkts[n++] = pkt; 70 } 71 } 72 rxq->stats.idropped += (pkts_n - n); 73 #ifdef MLX5_PMD_SOFT_COUNTERS 74 /* Correct counters of errored completions. */ 75 rxq->stats.ipackets -= (pkts_n - n); 76 rxq->stats.ibytes -= err_bytes; 77 #endif 78 mlx5_rx_err_handle(rxq, 1, pkts_n, &skip_cnt); 79 return n; 80 } 81 82 /** 83 * Replenish buffers for RX in bulk. 84 * 85 * @param rxq 86 * Pointer to RX queue structure. 87 */ 88 static inline void 89 mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq) 90 { 91 const uint16_t q_n = 1 << rxq->elts_n; 92 const uint16_t q_mask = q_n - 1; 93 uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi); 94 uint16_t elts_idx = rxq->rq_ci & q_mask; 95 struct rte_mbuf **elts = &(*rxq->elts)[elts_idx]; 96 volatile struct mlx5_wqe_data_seg *wq = 97 &((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx]; 98 unsigned int i; 99 100 if (n >= rxq->rq_repl_thresh) { 101 MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n)); 102 MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) > 103 MLX5_VPMD_DESCS_PER_LOOP); 104 /* Not to cross queue end. */ 105 n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx); 106 if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) { 107 rxq->stats.rx_nombuf += n; 108 return; 109 } 110 if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1)) { 111 for (i = 0; i < n; ++i) { 112 /* 113 * In order to support the mbufs with external attached 114 * data buffer we should use the buf_addr pointer 115 * instead of rte_mbuf_buf_addr(). It touches the mbuf 116 * itself and may impact the performance. 117 */ 118 void *buf_addr = elts[i]->buf_addr; 119 120 wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr + 121 RTE_PKTMBUF_HEADROOM); 122 wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]); 123 } 124 } else { 125 for (i = 0; i < n; ++i) { 126 void *buf_addr = elts[i]->buf_addr; 127 128 wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr + 129 RTE_PKTMBUF_HEADROOM); 130 } 131 } 132 rxq->rq_ci += n; 133 /* Prevent overflowing into consumed mbufs. */ 134 elts_idx = rxq->rq_ci & q_mask; 135 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 136 (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf; 137 rte_io_wmb(); 138 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 139 } 140 } 141 142 /** 143 * Replenish buffers for MPRQ RX in bulk. 144 * 145 * @param rxq 146 * Pointer to RX queue structure. 147 */ 148 static inline void 149 mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq) 150 { 151 const uint16_t wqe_n = 1 << rxq->elts_n; 152 const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); 153 const uint32_t elts_n = wqe_n * strd_n; 154 const uint32_t wqe_mask = elts_n - 1; 155 uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi); 156 uint32_t elts_idx = rxq->elts_ci & wqe_mask; 157 struct rte_mbuf **elts = &(*rxq->elts)[elts_idx]; 158 unsigned int i; 159 160 if (n >= rxq->rq_repl_thresh && 161 rxq->elts_ci - rxq->rq_pi <= 162 rxq->rq_repl_thresh + MLX5_VPMD_RX_MAX_BURST) { 163 MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n)); 164 MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) > 165 MLX5_VPMD_DESCS_PER_LOOP); 166 /* Not to cross queue end. */ 167 n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, elts_n - elts_idx); 168 /* Limit replenish number to threshold value. */ 169 n = RTE_MIN(n, rxq->rq_repl_thresh); 170 if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) { 171 rxq->stats.rx_nombuf += n; 172 return; 173 } 174 rxq->elts_ci += n; 175 /* Prevent overflowing into consumed mbufs. */ 176 elts_idx = rxq->elts_ci & wqe_mask; 177 for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i) 178 (*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf; 179 } 180 } 181 182 /** 183 * Copy or attach MPRQ buffers to RX SW ring. 184 * 185 * @param rxq 186 * Pointer to RX queue structure. 187 * @param pkts 188 * Pointer to array of packets to be stored. 189 * @param pkts_n 190 * Number of packets to be stored. 191 * 192 * @return 193 * Number of packets successfully copied/attached (<= pkts_n). 194 */ 195 static inline uint16_t 196 rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq, 197 struct rte_mbuf **pkts, uint16_t pkts_n) 198 { 199 const uint16_t wqe_n = 1 << rxq->elts_n; 200 const uint16_t wqe_mask = wqe_n - 1; 201 const uint16_t strd_sz = RTE_BIT32(rxq->log_strd_sz); 202 const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); 203 const uint32_t elts_n = wqe_n * strd_n; 204 const uint32_t elts_mask = elts_n - 1; 205 uint32_t elts_idx = rxq->rq_pi & elts_mask; 206 struct rte_mbuf **elts = &(*rxq->elts)[elts_idx]; 207 uint32_t rq_ci = rxq->rq_ci; 208 struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask]; 209 uint16_t copied = 0; 210 uint16_t i = 0; 211 212 for (i = 0; i < pkts_n; ++i) { 213 uint16_t strd_cnt; 214 enum mlx5_rqx_code rxq_code; 215 216 if (rxq->consumed_strd == strd_n) { 217 /* Replace WQE if the buffer is still in use. */ 218 mprq_buf_replace(rxq, rq_ci & wqe_mask); 219 /* Advance to the next WQE. */ 220 rxq->consumed_strd = 0; 221 rq_ci++; 222 buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask]; 223 } 224 225 if (!elts[i]->pkt_len) { 226 rxq->consumed_strd = strd_n; 227 rte_pktmbuf_free_seg(elts[i]); 228 #ifdef MLX5_PMD_SOFT_COUNTERS 229 rxq->stats.ipackets -= 1; 230 #endif 231 continue; 232 } 233 strd_cnt = (elts[i]->pkt_len / strd_sz) + 234 ((elts[i]->pkt_len % strd_sz) ? 1 : 0); 235 rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len, 236 buf, rxq->consumed_strd, strd_cnt); 237 rxq->consumed_strd += strd_cnt; 238 if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) { 239 rte_pktmbuf_free_seg(elts[i]); 240 #ifdef MLX5_PMD_SOFT_COUNTERS 241 rxq->stats.ipackets -= 1; 242 rxq->stats.ibytes -= elts[i]->pkt_len; 243 #endif 244 if (rxq_code == MLX5_RXQ_CODE_NOMBUF) { 245 ++rxq->stats.rx_nombuf; 246 break; 247 } 248 if (rxq_code == MLX5_RXQ_CODE_DROPPED) { 249 ++rxq->stats.idropped; 250 continue; 251 } 252 } 253 pkts[copied++] = elts[i]; 254 } 255 rxq->rq_pi += i; 256 rxq->cq_ci += i; 257 if (rq_ci != rxq->rq_ci) { 258 rxq->rq_ci = rq_ci; 259 rte_io_wmb(); 260 *rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci); 261 } 262 return copied; 263 } 264 265 /** 266 * Receive burst of packets. An errored completion also consumes a mbuf, but the 267 * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed 268 * before returning to application. 269 * 270 * @param rxq 271 * Pointer to RX queue structure. 272 * @param[out] pkts 273 * Array to store received packets. 274 * @param pkts_n 275 * Maximum number of packets in array. 276 * @param[out] err 277 * Pointer to a flag. Set non-zero value if pkts array has at least one error 278 * packet to handle. 279 * @param[out] no_cq 280 * Pointer to a boolean. Set true if no new CQE seen. 281 * 282 * @return 283 * Number of packets received including errors (<= pkts_n). 284 */ 285 static inline uint16_t 286 rxq_burst_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 287 uint16_t pkts_n, uint64_t *err, bool *no_cq) 288 { 289 const uint16_t q_n = 1 << rxq->cqe_n; 290 const uint16_t q_mask = q_n - 1; 291 const uint16_t e_n = 1 << rxq->elts_n; 292 const uint16_t e_mask = e_n - 1; 293 volatile struct mlx5_cqe *cq, *next; 294 struct rte_mbuf **elts; 295 uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP; 296 uint16_t nocmp_n = 0; 297 uint16_t rcvd_pkt = 0; 298 unsigned int cq_idx = rxq->cq_ci & q_mask; 299 unsigned int elts_idx; 300 int ret; 301 302 MLX5_ASSERT(rxq->sges_n == 0); 303 MLX5_ASSERT(rxq->cqe_n == rxq->elts_n); 304 cq = &(*rxq->cqes)[cq_idx]; 305 rte_prefetch0(cq); 306 rte_prefetch0(cq + 1); 307 rte_prefetch0(cq + 2); 308 rte_prefetch0(cq + 3); 309 pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST); 310 mlx5_rx_replenish_bulk_mbuf(rxq); 311 /* See if there're unreturned mbufs from compressed CQE. */ 312 rcvd_pkt = rxq->decompressed; 313 if (rcvd_pkt > 0) { 314 rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n); 315 rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask], 316 pkts, rcvd_pkt); 317 rxq->rq_pi += rcvd_pkt; 318 rxq->decompressed -= rcvd_pkt; 319 pkts += rcvd_pkt; 320 } 321 elts_idx = rxq->rq_pi & e_mask; 322 elts = &(*rxq->elts)[elts_idx]; 323 /* Not to overflow pkts array. */ 324 pkts_n = RTE_ALIGN_FLOOR(pkts_n - rcvd_pkt, MLX5_VPMD_DESCS_PER_LOOP); 325 /* Not to cross queue end. */ 326 pkts_n = RTE_MIN(pkts_n, q_n - elts_idx); 327 pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); 328 if (!pkts_n) { 329 *no_cq = !rcvd_pkt; 330 return rcvd_pkt; 331 } 332 /* At this point, there shouldn't be any remaining packets. */ 333 MLX5_ASSERT(rxq->decompressed == 0); 334 /* Go directly to unzipping in case the first CQE is compressed. */ 335 if (rxq->cqe_comp_layout) { 336 ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci); 337 if (ret == MLX5_CQE_STATUS_SW_OWN && 338 (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) { 339 comp_idx = 0; 340 goto decompress; 341 } 342 } 343 /* Process all the CQEs */ 344 nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx); 345 /* If no new CQE seen, return without updating cq_db. */ 346 if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { 347 *no_cq = true; 348 return rcvd_pkt; 349 } 350 /* Update the consumer indexes for non-compressed CQEs. */ 351 MLX5_ASSERT(nocmp_n <= pkts_n); 352 rxq->cq_ci += nocmp_n; 353 rxq->rq_pi += nocmp_n; 354 rcvd_pkt += nocmp_n; 355 /* Copy title packet for future compressed sessions. */ 356 if (rxq->cqe_comp_layout) { 357 ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci); 358 if (ret == MLX5_CQE_STATUS_SW_OWN && 359 (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) { 360 next = &(*rxq->cqes)[rxq->cq_ci & q_mask]; 361 ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci); 362 if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED || 363 ret != MLX5_CQE_STATUS_SW_OWN) 364 rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1], 365 sizeof(struct rte_mbuf)); 366 } 367 } 368 decompress: 369 /* Decompress the last CQE if compressed. */ 370 if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) { 371 MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); 372 rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], 373 &elts[nocmp_n], true); 374 rxq->cq_ci += rxq->decompressed; 375 /* Return more packets if needed. */ 376 if (nocmp_n < pkts_n) { 377 uint16_t n = rxq->decompressed; 378 379 n = RTE_MIN(n, pkts_n - nocmp_n); 380 rxq_copy_mbuf_v(&(*rxq->elts)[rxq->rq_pi & e_mask], 381 &pkts[nocmp_n], n); 382 rxq->rq_pi += n; 383 rcvd_pkt += n; 384 rxq->decompressed -= n; 385 } 386 } 387 *no_cq = !rcvd_pkt; 388 return rcvd_pkt; 389 } 390 391 /** 392 * DPDK callback for vectorized RX. 393 * 394 * @param dpdk_rxq 395 * Generic pointer to RX queue structure. 396 * @param[out] pkts 397 * Array to store received packets. 398 * @param pkts_n 399 * Maximum number of packets in array. 400 * 401 * @return 402 * Number of packets successfully received (<= pkts_n). 403 */ 404 uint16_t 405 mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 406 { 407 struct mlx5_rxq_data *rxq = dpdk_rxq; 408 uint16_t nb_rx = 0; 409 uint16_t tn = 0; 410 uint64_t err = 0; 411 bool no_cq = false; 412 413 do { 414 err = 0; 415 nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn, 416 &err, &no_cq); 417 if (unlikely(err | rxq->err_state)) 418 nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx); 419 tn += nb_rx; 420 if (unlikely(no_cq)) 421 break; 422 rte_io_wmb(); 423 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 424 } while (tn != pkts_n); 425 return tn; 426 } 427 428 /** 429 * Receive burst of packets. An errored completion also consumes a mbuf, but the 430 * packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed 431 * before returning to application. 432 * 433 * @param rxq 434 * Pointer to RX queue structure. 435 * @param[out] pkts 436 * Array to store received packets. 437 * @param pkts_n 438 * Maximum number of packets in array. 439 * @param[out] err 440 * Pointer to a flag. Set non-zero value if pkts array has at least one error 441 * packet to handle. 442 * @param[out] no_cq 443 * Pointer to a boolean. Set true if no new CQE seen. 444 * 445 * @return 446 * Number of packets received including errors (<= pkts_n). 447 */ 448 static inline uint16_t 449 rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts, 450 uint16_t pkts_n, uint64_t *err, bool *no_cq) 451 { 452 const uint16_t q_n = 1 << rxq->cqe_n; 453 const uint16_t q_mask = q_n - 1; 454 const uint16_t wqe_n = 1 << rxq->elts_n; 455 const uint32_t strd_n = RTE_BIT32(rxq->log_strd_num); 456 const uint32_t elts_n = wqe_n * strd_n; 457 const uint32_t elts_mask = elts_n - 1; 458 volatile struct mlx5_cqe *cq, *next; 459 struct rte_mbuf **elts; 460 uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP; 461 uint16_t nocmp_n = 0; 462 uint16_t rcvd_pkt = 0; 463 uint16_t cp_pkt = 0; 464 unsigned int cq_idx = rxq->cq_ci & q_mask; 465 unsigned int elts_idx; 466 int ret; 467 468 MLX5_ASSERT(rxq->sges_n == 0); 469 cq = &(*rxq->cqes)[cq_idx]; 470 rte_prefetch0(cq); 471 rte_prefetch0(cq + 1); 472 rte_prefetch0(cq + 2); 473 rte_prefetch0(cq + 3); 474 pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST); 475 mlx5_rx_mprq_replenish_bulk_mbuf(rxq); 476 /* Not to move past the allocated mbufs. */ 477 pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi); 478 /* See if there're unreturned mbufs from compressed CQE. */ 479 rcvd_pkt = rxq->decompressed; 480 if (rcvd_pkt > 0) { 481 rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n); 482 cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt); 483 rxq->decompressed -= rcvd_pkt; 484 pkts += cp_pkt; 485 } 486 elts_idx = rxq->rq_pi & elts_mask; 487 elts = &(*rxq->elts)[elts_idx]; 488 /* Not to overflow pkts array. */ 489 pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP); 490 /* Not to cross queue end. */ 491 pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx); 492 pkts_n = RTE_MIN(pkts_n, q_n - cq_idx); 493 if (!pkts_n) { 494 *no_cq = !cp_pkt; 495 return cp_pkt; 496 } 497 /* At this point, there shouldn't be any remaining packets. */ 498 MLX5_ASSERT(rxq->decompressed == 0); 499 /* Go directly to unzipping in case the first CQE is compressed. */ 500 if (rxq->cqe_comp_layout) { 501 ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci); 502 if (ret == MLX5_CQE_STATUS_SW_OWN && 503 (MLX5_CQE_FORMAT(cq->op_own) == MLX5_COMPRESSED)) { 504 comp_idx = 0; 505 goto decompress; 506 } 507 } 508 /* Process all the CQEs */ 509 nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx); 510 /* If no new CQE seen, return without updating cq_db. */ 511 if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) { 512 *no_cq = true; 513 return cp_pkt; 514 } 515 /* Update the consumer indexes for non-compressed CQEs. */ 516 MLX5_ASSERT(nocmp_n <= pkts_n); 517 cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n); 518 rcvd_pkt += cp_pkt; 519 /* Copy title packet for future compressed sessions. */ 520 if (rxq->cqe_comp_layout) { 521 ret = check_cqe_iteration(cq, rxq->cqe_n, rxq->cq_ci); 522 if (ret == MLX5_CQE_STATUS_SW_OWN && 523 (MLX5_CQE_FORMAT(cq->op_own) != MLX5_COMPRESSED)) { 524 next = &(*rxq->cqes)[rxq->cq_ci & q_mask]; 525 ret = check_cqe_iteration(next, rxq->cqe_n, rxq->cq_ci); 526 if (MLX5_CQE_FORMAT(next->op_own) == MLX5_COMPRESSED || 527 ret != MLX5_CQE_STATUS_SW_OWN) 528 rte_memcpy(&rxq->title_pkt, elts[nocmp_n - 1], 529 sizeof(struct rte_mbuf)); 530 } 531 } 532 decompress: 533 /* Decompress the last CQE if compressed. */ 534 if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) { 535 MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP)); 536 rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n], 537 &elts[nocmp_n], false); 538 /* Return more packets if needed. */ 539 if (nocmp_n < pkts_n) { 540 uint16_t n = rxq->decompressed; 541 542 n = RTE_MIN(n, pkts_n - nocmp_n); 543 cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n); 544 rcvd_pkt += cp_pkt; 545 rxq->decompressed -= n; 546 } 547 } 548 *no_cq = !rcvd_pkt; 549 return rcvd_pkt; 550 } 551 552 /** 553 * DPDK callback for vectorized MPRQ RX. 554 * 555 * @param dpdk_rxq 556 * Generic pointer to RX queue structure. 557 * @param[out] pkts 558 * Array to store received packets. 559 * @param pkts_n 560 * Maximum number of packets in array. 561 * 562 * @return 563 * Number of packets successfully received (<= pkts_n). 564 */ 565 uint16_t 566 mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n) 567 { 568 struct mlx5_rxq_data *rxq = dpdk_rxq; 569 uint16_t nb_rx = 0; 570 uint16_t tn = 0; 571 uint64_t err = 0; 572 bool no_cq = false; 573 574 do { 575 err = 0; 576 nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn, 577 &err, &no_cq); 578 if (unlikely(err | rxq->err_state)) 579 nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx); 580 tn += nb_rx; 581 if (unlikely(no_cq)) 582 break; 583 rte_io_wmb(); 584 *rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci); 585 } while (tn != pkts_n); 586 return tn; 587 } 588 589 /** 590 * Check a RX queue can support vectorized RX. 591 * 592 * @param rxq 593 * Pointer to RX queue. 594 * 595 * @return 596 * 1 if supported, negative errno value if not. 597 */ 598 int __rte_cold 599 mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq) 600 { 601 struct mlx5_rxq_ctrl *ctrl = 602 container_of(rxq, struct mlx5_rxq_ctrl, rxq); 603 604 if (!RXQ_PORT(ctrl)->config.rx_vec_en || rxq->sges_n != 0) 605 return -ENOTSUP; 606 if (rxq->lro) 607 return -ENOTSUP; 608 return 1; 609 } 610 611 /** 612 * Check a device can support vectorized RX. 613 * 614 * @param dev 615 * Pointer to Ethernet device. 616 * 617 * @return 618 * 1 if supported, negative errno value if not. 619 */ 620 int __rte_cold 621 mlx5_check_vec_rx_support(struct rte_eth_dev *dev) 622 { 623 struct mlx5_priv *priv = dev->data->dev_private; 624 uint32_t i; 625 626 if (rte_vect_get_max_simd_bitwidth() < RTE_VECT_SIMD_128) 627 return -ENOTSUP; 628 if (!priv->config.rx_vec_en) 629 return -ENOTSUP; 630 /* All the configured queues should support. */ 631 for (i = 0; i < priv->rxqs_n; ++i) { 632 struct mlx5_rxq_data *rxq_data = mlx5_rxq_data_get(dev, i); 633 634 if (!rxq_data) 635 continue; 636 if (mlx5_rxq_check_vec_support(rxq_data) < 0) 637 break; 638 } 639 if (i != priv->rxqs_n) 640 return -ENOTSUP; 641 return 1; 642 } 643