1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2021 Mellanox Technologies, Ltd 3 */ 4 5 #include <rte_malloc.h> 6 #include <rte_log.h> 7 #include <rte_errno.h> 8 #include <bus_pci_driver.h> 9 #include <rte_spinlock.h> 10 #include <rte_comp.h> 11 #include <rte_compressdev.h> 12 #include <rte_compressdev_pmd.h> 13 14 #include <mlx5_glue.h> 15 #include <mlx5_common.h> 16 #include <mlx5_devx_cmds.h> 17 #include <mlx5_common_os.h> 18 #include <mlx5_common_devx.h> 19 #include <mlx5_common_mr.h> 20 #include <mlx5_prm.h> 21 22 #include "mlx5_compress_utils.h" 23 24 #define MLX5_COMPRESS_DRIVER_NAME mlx5_compress 25 #define MLX5_COMPRESS_MAX_QPS 1024 26 #define MLX5_COMP_MAX_WIN_SIZE_CONF 6u 27 #define MLX5_COMP_NUM_SUP_ALGO 4 28 29 struct mlx5_compress_devarg_params { 30 uint32_t log_block_sz; 31 }; 32 33 struct mlx5_compress_xform { 34 LIST_ENTRY(mlx5_compress_xform) next; 35 enum rte_comp_xform_type type; 36 enum rte_comp_checksum_type csum_type; 37 uint32_t opcode; 38 uint32_t gga_ctrl1; /* BE. */ 39 }; 40 41 struct mlx5_compress_priv { 42 TAILQ_ENTRY(mlx5_compress_priv) next; 43 struct rte_compressdev *compressdev; 44 struct mlx5_common_device *cdev; /* Backend mlx5 device. */ 45 struct mlx5_uar uar; 46 struct rte_compressdev_config dev_config; 47 struct rte_compressdev_capabilities caps[MLX5_COMP_NUM_SUP_ALGO]; 48 LIST_HEAD(xform_list, mlx5_compress_xform) xform_list; 49 rte_spinlock_t xform_sl; 50 uint32_t log_block_sz; 51 uint32_t crc32_opaq_offs; 52 }; 53 54 struct mlx5_compress_qp { 55 uint16_t qp_id; 56 uint16_t entries_n; 57 uint16_t pi; 58 uint16_t ci; 59 struct mlx5_mr_ctrl mr_ctrl; 60 int socket_id; 61 struct mlx5_devx_cq cq; 62 struct mlx5_devx_qp qp; 63 struct mlx5_pmd_mr opaque_mr; 64 struct rte_comp_op **ops; 65 struct mlx5_compress_priv *priv; 66 struct rte_compressdev_stats stats; 67 }; 68 69 TAILQ_HEAD(mlx5_compress_privs, mlx5_compress_priv) mlx5_compress_priv_list = 70 TAILQ_HEAD_INITIALIZER(mlx5_compress_priv_list); 71 static pthread_mutex_t priv_list_lock = PTHREAD_MUTEX_INITIALIZER; 72 73 int mlx5_compress_logtype; 74 75 static void 76 mlx5_compress_dev_info_get(struct rte_compressdev *dev, 77 struct rte_compressdev_info *info) 78 { 79 if (dev != NULL && info != NULL) { 80 struct mlx5_compress_priv *priv = dev->data->dev_private; 81 82 info->max_nb_queue_pairs = MLX5_COMPRESS_MAX_QPS; 83 info->feature_flags = RTE_COMPDEV_FF_HW_ACCELERATED; 84 info->capabilities = priv->caps; 85 } 86 } 87 88 static int 89 mlx5_compress_dev_configure(struct rte_compressdev *dev, 90 struct rte_compressdev_config *config) 91 { 92 struct mlx5_compress_priv *priv; 93 94 if (dev == NULL || config == NULL) 95 return -EINVAL; 96 priv = dev->data->dev_private; 97 priv->dev_config = *config; 98 return 0; 99 } 100 101 static int 102 mlx5_compress_dev_close(struct rte_compressdev *dev) 103 { 104 RTE_SET_USED(dev); 105 return 0; 106 } 107 108 static int 109 mlx5_compress_qp_release(struct rte_compressdev *dev, uint16_t qp_id) 110 { 111 struct mlx5_compress_qp *qp = dev->data->queue_pairs[qp_id]; 112 113 if (qp->qp.qp != NULL) 114 mlx5_devx_qp_destroy(&qp->qp); 115 if (qp->cq.cq != NULL) 116 mlx5_devx_cq_destroy(&qp->cq); 117 if (qp->opaque_mr.obj != NULL) { 118 void *opaq = qp->opaque_mr.addr; 119 120 mlx5_common_verbs_dereg_mr(&qp->opaque_mr); 121 rte_free(opaq); 122 } 123 mlx5_mr_btree_free(&qp->mr_ctrl.cache_bh); 124 rte_free(qp); 125 dev->data->queue_pairs[qp_id] = NULL; 126 return 0; 127 } 128 129 static void 130 mlx5_compress_init_qp(struct mlx5_compress_qp *qp) 131 { 132 volatile struct mlx5_gga_wqe *restrict wqe = 133 (volatile struct mlx5_gga_wqe *)qp->qp.wqes; 134 volatile union mlx5_gga_compress_opaque *opaq = qp->opaque_mr.addr; 135 const uint32_t sq_ds = rte_cpu_to_be_32((qp->qp.qp->id << 8) | 4u); 136 const uint32_t flags = RTE_BE32(MLX5_COMP_ALWAYS << 137 MLX5_COMP_MODE_OFFSET); 138 const uint32_t opaq_lkey = rte_cpu_to_be_32(qp->opaque_mr.lkey); 139 int i; 140 141 /* All the next fields state should stay constant. */ 142 for (i = 0; i < qp->entries_n; ++i, ++wqe) { 143 wqe->sq_ds = sq_ds; 144 wqe->flags = flags; 145 wqe->opaque_lkey = opaq_lkey; 146 wqe->opaque_vaddr = rte_cpu_to_be_64 147 ((uint64_t)(uintptr_t)&opaq[i]); 148 } 149 } 150 151 static int 152 mlx5_compress_qp_setup(struct rte_compressdev *dev, uint16_t qp_id, 153 uint32_t max_inflight_ops, int socket_id) 154 { 155 struct mlx5_compress_priv *priv = dev->data->dev_private; 156 struct mlx5_hca_attr *attr = &priv->cdev->config.hca_attr; 157 struct mlx5_compress_qp *qp; 158 struct mlx5_devx_cq_attr cq_attr = { 159 .uar_page_id = mlx5_os_get_devx_uar_page_id(priv->uar.obj), 160 }; 161 struct mlx5_devx_qp_attr qp_attr = { 162 .pd = priv->cdev->pdn, 163 .uar_index = mlx5_os_get_devx_uar_page_id(priv->uar.obj), 164 .user_index = qp_id, 165 }; 166 uint32_t log_ops_n = rte_log2_u32(max_inflight_ops); 167 uint32_t alloc_size = sizeof(*qp); 168 void *opaq_buf; 169 int ret; 170 171 alloc_size = RTE_ALIGN(alloc_size, RTE_CACHE_LINE_SIZE); 172 alloc_size += sizeof(struct rte_comp_op *) * (1u << log_ops_n); 173 qp = rte_zmalloc_socket(__func__, alloc_size, RTE_CACHE_LINE_SIZE, 174 socket_id); 175 if (qp == NULL) { 176 DRV_LOG(ERR, "Failed to allocate qp memory."); 177 rte_errno = ENOMEM; 178 return -rte_errno; 179 } 180 dev->data->queue_pairs[qp_id] = qp; 181 if (mlx5_mr_ctrl_init(&qp->mr_ctrl, &priv->cdev->mr_scache.dev_gen, 182 priv->dev_config.socket_id)) { 183 DRV_LOG(ERR, "Cannot allocate MR Btree for qp %u.", 184 (uint32_t)qp_id); 185 rte_errno = ENOMEM; 186 goto err; 187 } 188 opaq_buf = rte_calloc(__func__, (size_t)1 << log_ops_n, 189 sizeof(union mlx5_gga_compress_opaque), 190 sizeof(union mlx5_gga_compress_opaque)); 191 if (opaq_buf == NULL) { 192 DRV_LOG(ERR, "Failed to allocate opaque memory."); 193 rte_errno = ENOMEM; 194 goto err; 195 } 196 qp->entries_n = 1 << log_ops_n; 197 qp->socket_id = socket_id; 198 qp->qp_id = qp_id; 199 qp->priv = priv; 200 qp->ops = (struct rte_comp_op **)RTE_ALIGN((uintptr_t)(qp + 1), 201 RTE_CACHE_LINE_SIZE); 202 if (mlx5_common_verbs_reg_mr(priv->cdev->pd, opaq_buf, qp->entries_n * 203 sizeof(union mlx5_gga_compress_opaque), 204 &qp->opaque_mr) != 0) { 205 rte_free(opaq_buf); 206 DRV_LOG(ERR, "Failed to register opaque MR."); 207 rte_errno = ENOMEM; 208 goto err; 209 } 210 ret = mlx5_devx_cq_create(priv->cdev->ctx, &qp->cq, log_ops_n, &cq_attr, 211 socket_id); 212 if (ret != 0) { 213 DRV_LOG(ERR, "Failed to create CQ."); 214 goto err; 215 } 216 qp_attr.cqn = qp->cq.cq->id; 217 qp_attr.ts_format = mlx5_ts_format_conv(attr->qp_ts_format); 218 qp_attr.num_of_receive_wqes = 0; 219 qp_attr.num_of_send_wqbbs = RTE_BIT32(log_ops_n); 220 qp_attr.mmo = attr->mmo_compress_qp_en || attr->mmo_dma_qp_en || 221 attr->decomp_lz4_checksum_en || 222 attr->decomp_lz4_no_checksum_en || 223 attr->decomp_deflate_v1_en || attr->decomp_deflate_v2_en; 224 ret = mlx5_devx_qp_create(priv->cdev->ctx, &qp->qp, 225 qp_attr.num_of_send_wqbbs * 226 MLX5_WQE_SIZE, &qp_attr, socket_id); 227 if (ret != 0) { 228 DRV_LOG(ERR, "Failed to create QP."); 229 goto err; 230 } 231 mlx5_compress_init_qp(qp); 232 ret = mlx5_devx_qp2rts(&qp->qp, 0); 233 if (ret) 234 goto err; 235 DRV_LOG(INFO, "QP %u: SQN=0x%X CQN=0x%X entries num = %u", 236 (uint32_t)qp_id, qp->qp.qp->id, qp->cq.cq->id, qp->entries_n); 237 return 0; 238 err: 239 mlx5_compress_qp_release(dev, qp_id); 240 return -1; 241 } 242 243 static int 244 mlx5_compress_xform_free(struct rte_compressdev *dev, void *xform) 245 { 246 struct mlx5_compress_priv *priv = dev->data->dev_private; 247 248 rte_spinlock_lock(&priv->xform_sl); 249 LIST_REMOVE((struct mlx5_compress_xform *)xform, next); 250 rte_spinlock_unlock(&priv->xform_sl); 251 rte_free(xform); 252 return 0; 253 } 254 255 static int 256 mlx5_compress_xform_validate(const struct rte_comp_xform *xform, 257 const struct mlx5_hca_attr *attr) 258 { 259 switch (xform->type) { 260 case RTE_COMP_COMPRESS: 261 if (xform->compress.algo == RTE_COMP_ALGO_NULL && 262 !attr->mmo_dma_qp_en && !attr->mmo_dma_sq_en) { 263 DRV_LOG(ERR, "Not enough capabilities to support DMA operation, maybe old FW/OFED version?"); 264 return -ENOTSUP; 265 } else if (!attr->mmo_compress_qp_en && 266 !attr->mmo_compress_sq_en) { 267 DRV_LOG(ERR, "Not enough capabilities to support compress operation."); 268 return -ENOTSUP; 269 } 270 if (xform->compress.algo == RTE_COMP_ALGO_LZ4) { 271 DRV_LOG(ERR, "LZ4 compression is not supported."); 272 return -ENOTSUP; 273 } 274 if (xform->compress.level == RTE_COMP_LEVEL_NONE) { 275 DRV_LOG(ERR, "Non-compressed block is not supported."); 276 return -ENOTSUP; 277 } 278 if (xform->compress.hash_algo != RTE_COMP_HASH_ALGO_NONE) { 279 DRV_LOG(ERR, "SHA is not supported."); 280 return -ENOTSUP; 281 } 282 if (xform->compress.chksum == RTE_COMP_CHECKSUM_XXHASH32) { 283 DRV_LOG(ERR, "xxHash32 checksum isn't supported in compress operation."); 284 return -ENOTSUP; 285 } 286 break; 287 case RTE_COMP_DECOMPRESS: 288 switch (xform->decompress.algo) { 289 case RTE_COMP_ALGO_NULL: 290 if (!attr->mmo_dma_qp_en && !attr->mmo_dma_sq_en) { 291 DRV_LOG(ERR, "Not enough capabilities to support DMA operation, maybe old FW/OFED version?"); 292 return -ENOTSUP; 293 } 294 break; 295 case RTE_COMP_ALGO_DEFLATE: 296 if (!attr->decomp_deflate_v1_en && 297 !attr->decomp_deflate_v2_en && 298 !attr->mmo_decompress_sq_en) { 299 DRV_LOG(ERR, "Not enough capabilities to support decompress DEFLATE algorithm, maybe old FW/OFED version?"); 300 return -ENOTSUP; 301 } 302 switch (xform->decompress.chksum) { 303 case RTE_COMP_CHECKSUM_NONE: 304 case RTE_COMP_CHECKSUM_CRC32: 305 case RTE_COMP_CHECKSUM_ADLER32: 306 case RTE_COMP_CHECKSUM_CRC32_ADLER32: 307 break; 308 case RTE_COMP_CHECKSUM_XXHASH32: 309 default: 310 DRV_LOG(ERR, "DEFLATE algorithm doesn't support %u checksum.", 311 xform->decompress.chksum); 312 return -ENOTSUP; 313 } 314 break; 315 case RTE_COMP_ALGO_LZ4: 316 if (!attr->decomp_lz4_no_checksum_en && 317 !attr->decomp_lz4_checksum_en) { 318 DRV_LOG(ERR, "Not enough capabilities to support decompress LZ4 algorithm, maybe old FW/OFED version?"); 319 return -ENOTSUP; 320 } 321 if (xform->decompress.lz4.flags & 322 RTE_COMP_LZ4_FLAG_BLOCK_CHECKSUM) { 323 if (!attr->decomp_lz4_checksum_en) { 324 DRV_LOG(ERR, "Not enough capabilities to support decompress LZ4 block with checksum param, maybe old FW/OFED version?"); 325 return -ENOTSUP; 326 } 327 } else { 328 if (!attr->decomp_lz4_no_checksum_en) { 329 DRV_LOG(ERR, "Not enough capabilities to support decompress LZ4 block without checksum param, maybe old FW/OFED version?"); 330 return -ENOTSUP; 331 } 332 } 333 if (xform->decompress.chksum != 334 RTE_COMP_CHECKSUM_XXHASH32 && 335 xform->decompress.chksum != 336 RTE_COMP_CHECKSUM_NONE) { 337 DRV_LOG(ERR, "LZ4 algorithm supports only xxHash32 checksum."); 338 return -ENOTSUP; 339 } 340 break; 341 default: 342 DRV_LOG(ERR, "Algorithm %u is not supported.", 343 xform->decompress.algo); 344 return -ENOTSUP; 345 } 346 if (xform->decompress.hash_algo != RTE_COMP_HASH_ALGO_NONE) { 347 DRV_LOG(ERR, "SHA is not supported."); 348 return -ENOTSUP; 349 } 350 break; 351 default: 352 DRV_LOG(ERR, "Xform type should be compress/decompress"); 353 return -ENOTSUP; 354 } 355 return 0; 356 } 357 358 static int 359 mlx5_compress_xform_create(struct rte_compressdev *dev, 360 const struct rte_comp_xform *xform, 361 void **private_xform) 362 { 363 struct mlx5_compress_priv *priv = dev->data->dev_private; 364 struct mlx5_compress_xform *xfrm; 365 uint32_t size; 366 int ret; 367 368 ret = mlx5_compress_xform_validate(xform, &priv->cdev->config.hca_attr); 369 if (ret < 0) 370 return ret; 371 xfrm = rte_zmalloc_socket(__func__, sizeof(*xfrm), 0, 372 priv->dev_config.socket_id); 373 if (xfrm == NULL) 374 return -ENOMEM; 375 xfrm->opcode = MLX5_OPCODE_MMO; 376 xfrm->type = xform->type; 377 switch (xform->type) { 378 case RTE_COMP_COMPRESS: 379 switch (xform->compress.algo) { 380 case RTE_COMP_ALGO_NULL: 381 xfrm->opcode += MLX5_OPC_MOD_MMO_DMA << 382 WQE_CSEG_OPC_MOD_OFFSET; 383 break; 384 case RTE_COMP_ALGO_DEFLATE: 385 size = 1 << xform->compress.window_size; 386 size /= MLX5_GGA_COMP_WIN_SIZE_UNITS; 387 xfrm->gga_ctrl1 += RTE_MIN(rte_log2_u32(size), 388 MLX5_COMP_MAX_WIN_SIZE_CONF) << 389 WQE_GGA_COMP_WIN_SIZE_OFFSET; 390 size = priv->log_block_sz; 391 xfrm->gga_ctrl1 += size << 392 WQE_GGA_COMP_BLOCK_SIZE_OFFSET; 393 xfrm->opcode += MLX5_OPC_MOD_MMO_COMP << 394 WQE_CSEG_OPC_MOD_OFFSET; 395 size = xform->compress.deflate.huffman == 396 RTE_COMP_HUFFMAN_DYNAMIC ? 397 MLX5_GGA_COMP_LOG_DYNAMIC_SIZE_MAX : 398 MLX5_GGA_COMP_LOG_DYNAMIC_SIZE_MIN; 399 xfrm->gga_ctrl1 += size << 400 WQE_GGA_COMP_DYNAMIC_SIZE_OFFSET; 401 break; 402 default: 403 goto err; 404 } 405 xfrm->csum_type = xform->compress.chksum; 406 break; 407 case RTE_COMP_DECOMPRESS: 408 switch (xform->decompress.algo) { 409 case RTE_COMP_ALGO_NULL: 410 xfrm->opcode += MLX5_OPC_MOD_MMO_DMA << 411 WQE_CSEG_OPC_MOD_OFFSET; 412 break; 413 case RTE_COMP_ALGO_DEFLATE: 414 xfrm->opcode += MLX5_OPC_MOD_MMO_DECOMP << 415 WQE_CSEG_OPC_MOD_OFFSET; 416 xfrm->gga_ctrl1 += WQE_GGA_DECOMP_DEFLATE << 417 WQE_GGA_DECOMP_TYPE_OFFSET; 418 break; 419 case RTE_COMP_ALGO_LZ4: 420 xfrm->opcode += MLX5_OPC_MOD_MMO_DECOMP << 421 WQE_CSEG_OPC_MOD_OFFSET; 422 xfrm->gga_ctrl1 += WQE_GGA_DECOMP_LZ4 << 423 WQE_GGA_DECOMP_TYPE_OFFSET; 424 if (xform->decompress.lz4.flags & 425 RTE_COMP_LZ4_FLAG_BLOCK_CHECKSUM) 426 xfrm->gga_ctrl1 += 427 MLX5_GGA_DECOMP_LZ4_BLOCK_WITH_CHECKSUM << 428 WQE_GGA_DECOMP_PARAMS_OFFSET; 429 else 430 xfrm->gga_ctrl1 += 431 MLX5_GGA_DECOMP_LZ4_BLOCK_WITHOUT_CHECKSUM 432 << WQE_GGA_DECOMP_PARAMS_OFFSET; 433 if (xform->decompress.lz4.flags & 434 RTE_COMP_LZ4_FLAG_BLOCK_INDEPENDENCE) 435 xfrm->gga_ctrl1 += 1u << 436 WQE_GGA_DECOMP_BLOCK_INDEPENDENT_OFFSET; 437 break; 438 default: 439 goto err; 440 } 441 xfrm->csum_type = xform->decompress.chksum; 442 break; 443 default: 444 DRV_LOG(ERR, "Operation %u is not supported.", xform->type); 445 goto err; 446 } 447 DRV_LOG(DEBUG, "New xform: gga ctrl1 = 0x%08X opcode = 0x%08X csum " 448 "type = %d.", xfrm->gga_ctrl1, xfrm->opcode, xfrm->csum_type); 449 xfrm->gga_ctrl1 = rte_cpu_to_be_32(xfrm->gga_ctrl1); 450 rte_spinlock_lock(&priv->xform_sl); 451 LIST_INSERT_HEAD(&priv->xform_list, xfrm, next); 452 rte_spinlock_unlock(&priv->xform_sl); 453 *private_xform = xfrm; 454 return 0; 455 err: 456 rte_free(xfrm); 457 return -ENOTSUP; 458 } 459 460 static void 461 mlx5_compress_dev_stop(struct rte_compressdev *dev) 462 { 463 RTE_SET_USED(dev); 464 } 465 466 static int 467 mlx5_compress_dev_start(struct rte_compressdev *dev) 468 { 469 struct mlx5_compress_priv *priv = dev->data->dev_private; 470 471 return mlx5_dev_mempool_subscribe(priv->cdev); 472 } 473 474 static void 475 mlx5_compress_stats_get(struct rte_compressdev *dev, 476 struct rte_compressdev_stats *stats) 477 { 478 int qp_id; 479 480 for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { 481 struct mlx5_compress_qp *qp = dev->data->queue_pairs[qp_id]; 482 483 stats->enqueued_count += qp->stats.enqueued_count; 484 stats->dequeued_count += qp->stats.dequeued_count; 485 stats->enqueue_err_count += qp->stats.enqueue_err_count; 486 stats->dequeue_err_count += qp->stats.dequeue_err_count; 487 } 488 } 489 490 static void 491 mlx5_compress_stats_reset(struct rte_compressdev *dev) 492 { 493 int qp_id; 494 495 for (qp_id = 0; qp_id < dev->data->nb_queue_pairs; qp_id++) { 496 struct mlx5_compress_qp *qp = dev->data->queue_pairs[qp_id]; 497 498 memset(&qp->stats, 0, sizeof(qp->stats)); 499 } 500 } 501 502 static struct rte_compressdev_ops mlx5_compress_ops = { 503 .dev_configure = mlx5_compress_dev_configure, 504 .dev_start = mlx5_compress_dev_start, 505 .dev_stop = mlx5_compress_dev_stop, 506 .dev_close = mlx5_compress_dev_close, 507 .dev_infos_get = mlx5_compress_dev_info_get, 508 .stats_get = mlx5_compress_stats_get, 509 .stats_reset = mlx5_compress_stats_reset, 510 .queue_pair_setup = mlx5_compress_qp_setup, 511 .queue_pair_release = mlx5_compress_qp_release, 512 .private_xform_create = mlx5_compress_xform_create, 513 .private_xform_free = mlx5_compress_xform_free, 514 .stream_create = NULL, 515 .stream_free = NULL, 516 }; 517 518 static __rte_always_inline uint32_t 519 mlx5_compress_dseg_set(struct mlx5_compress_qp *qp, 520 volatile struct mlx5_wqe_dseg *restrict dseg, 521 struct rte_mbuf *restrict mbuf, 522 uint32_t offset, uint32_t len) 523 { 524 uintptr_t addr = rte_pktmbuf_mtod_offset(mbuf, uintptr_t, offset); 525 526 dseg->bcount = rte_cpu_to_be_32(len); 527 dseg->lkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf); 528 dseg->pbuf = rte_cpu_to_be_64(addr); 529 return dseg->lkey; 530 } 531 532 static uint16_t 533 mlx5_compress_enqueue_burst(void *queue_pair, struct rte_comp_op **ops, 534 uint16_t nb_ops) 535 { 536 struct mlx5_compress_qp *qp = queue_pair; 537 volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe *) 538 qp->qp.wqes, *wqe; 539 struct mlx5_compress_xform *xform; 540 struct rte_comp_op *op; 541 uint16_t mask = qp->entries_n - 1; 542 uint16_t remain = qp->entries_n - (qp->pi - qp->ci); 543 uint16_t idx; 544 bool invalid; 545 546 if (remain < nb_ops) 547 nb_ops = remain; 548 else 549 remain = nb_ops; 550 if (unlikely(remain == 0)) 551 return 0; 552 do { 553 idx = qp->pi & mask; 554 wqe = &wqes[idx]; 555 rte_prefetch0(&wqes[(qp->pi + 1) & mask]); 556 op = *ops++; 557 xform = op->private_xform; 558 /* 559 * Check operation arguments and error cases: 560 * - Operation type must be state-less. 561 * - Compress operation flush flag must be FULL or FINAL. 562 * - Source and destination buffers must be mapped internally. 563 */ 564 invalid = op->op_type != RTE_COMP_OP_STATELESS || 565 (xform->type == RTE_COMP_COMPRESS && 566 op->flush_flag < RTE_COMP_FLUSH_FULL); 567 if (unlikely(invalid || 568 (mlx5_compress_dseg_set(qp, &wqe->gather, 569 op->m_src, 570 op->src.offset, 571 op->src.length) == 572 UINT32_MAX) || 573 (mlx5_compress_dseg_set(qp, &wqe->scatter, 574 op->m_dst, 575 op->dst.offset, 576 rte_pktmbuf_pkt_len(op->m_dst) - 577 op->dst.offset) == 578 UINT32_MAX))) { 579 op->status = invalid ? RTE_COMP_OP_STATUS_INVALID_ARGS : 580 RTE_COMP_OP_STATUS_ERROR; 581 nb_ops -= remain; 582 if (unlikely(nb_ops == 0)) 583 return 0; 584 break; 585 } 586 wqe->gga_ctrl1 = xform->gga_ctrl1; 587 wqe->opcode = rte_cpu_to_be_32(xform->opcode + (qp->pi << 8)); 588 qp->ops[idx] = op; 589 qp->pi++; 590 } while (--remain); 591 qp->stats.enqueued_count += nb_ops; 592 mlx5_doorbell_ring(&qp->priv->uar.bf_db, *(volatile uint64_t *)wqe, 593 qp->pi, &qp->qp.db_rec[MLX5_SND_DBR], 594 !qp->priv->uar.dbnc); 595 return nb_ops; 596 } 597 598 static void 599 mlx5_compress_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe, 600 volatile uint32_t *opaq) 601 { 602 size_t i; 603 604 DRV_LOG(ERR, "Error cqe:"); 605 for (i = 0; i < sizeof(struct mlx5_error_cqe) >> 2; i += 4) 606 DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1], 607 cqe[i + 2], cqe[i + 3]); 608 DRV_LOG(ERR, "\nError wqe:"); 609 for (i = 0; i < sizeof(struct mlx5_gga_wqe) >> 2; i += 4) 610 DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1], 611 wqe[i + 2], wqe[i + 3]); 612 DRV_LOG(ERR, "\nError opaq:"); 613 for (i = 0; i < sizeof(union mlx5_gga_compress_opaque) >> 2; i += 4) 614 DRV_LOG(ERR, "%08X %08X %08X %08X", opaq[i], opaq[i + 1], 615 opaq[i + 2], opaq[i + 3]); 616 } 617 618 static void 619 mlx5_compress_cqe_err_handle(struct mlx5_compress_qp *qp, 620 struct rte_comp_op *op) 621 { 622 const uint32_t idx = qp->ci & (qp->entries_n - 1); 623 volatile struct mlx5_error_cqe *cqe = (volatile struct mlx5_error_cqe *) 624 &qp->cq.cqes[idx]; 625 volatile struct mlx5_gga_wqe *wqes = (volatile struct mlx5_gga_wqe *) 626 qp->qp.wqes; 627 volatile union mlx5_gga_compress_opaque *opaq = qp->opaque_mr.addr; 628 629 volatile uint32_t *synd_word = RTE_PTR_ADD(cqe, MLX5_ERROR_CQE_SYNDROME_OFFSET); 630 switch (*synd_word) { 631 case MLX5_GGA_COMP_OUT_OF_SPACE_SYNDROME_BE: 632 op->status = RTE_COMP_OP_STATUS_OUT_OF_SPACE_TERMINATED; 633 DRV_LOG(DEBUG, "OUT OF SPACE error, output is bigger than dst buffer."); 634 break; 635 case MLX5_GGA_COMP_MISSING_BFINAL_SYNDROME_BE: 636 DRV_LOG(DEBUG, "The last compressed block missed the B-final flag; maybe the compressed data is not complete or garbaged?"); 637 /* fallthrough */ 638 default: 639 op->status = RTE_COMP_OP_STATUS_ERROR; 640 } 641 op->consumed = 0; 642 op->produced = 0; 643 op->output_chksum = 0; 644 op->debug_status = rte_be_to_cpu_32(opaq[idx].syndrome) | 645 ((uint64_t)rte_be_to_cpu_32(cqe->syndrome) << 32); 646 mlx5_compress_dump_err_objs((volatile uint32_t *)cqe, 647 (volatile uint32_t *)&wqes[idx], 648 (volatile uint32_t *)&opaq[idx]); 649 qp->stats.dequeue_err_count++; 650 } 651 652 static uint16_t 653 mlx5_compress_dequeue_burst(void *queue_pair, struct rte_comp_op **ops, 654 uint16_t nb_ops) 655 { 656 struct mlx5_compress_qp *qp = queue_pair; 657 volatile struct mlx5_compress_xform *restrict xform; 658 volatile struct mlx5_cqe *restrict cqe; 659 volatile union mlx5_gga_compress_opaque *opaq = qp->opaque_mr.addr; 660 struct rte_comp_op *restrict op; 661 const unsigned int cq_size = qp->entries_n; 662 const unsigned int mask = cq_size - 1; 663 uint32_t idx; 664 uint32_t next_idx = qp->ci & mask; 665 const uint16_t max = RTE_MIN((uint16_t)(qp->pi - qp->ci), nb_ops); 666 uint32_t crc32_idx = qp->priv->crc32_opaq_offs; 667 uint16_t i = 0; 668 int ret; 669 670 if (unlikely(max == 0)) 671 return 0; 672 do { 673 idx = next_idx; 674 next_idx = (qp->ci + 1) & mask; 675 rte_prefetch0(&qp->cq.cqes[next_idx]); 676 rte_prefetch0(qp->ops[next_idx]); 677 op = qp->ops[idx]; 678 cqe = &qp->cq.cqes[idx]; 679 ret = check_cqe(cqe, cq_size, qp->ci); 680 /* 681 * Be sure owner read is done before any other cookie field or 682 * opaque field. 683 */ 684 rte_io_rmb(); 685 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 686 if (likely(ret == MLX5_CQE_STATUS_HW_OWN)) 687 break; 688 mlx5_compress_cqe_err_handle(qp, op); 689 } else { 690 xform = op->private_xform; 691 op->status = RTE_COMP_OP_STATUS_SUCCESS; 692 op->consumed = op->src.length; 693 op->produced = rte_be_to_cpu_32(cqe->byte_cnt); 694 MLX5_ASSERT(cqe->byte_cnt == 695 opaq[idx].scattered_length); 696 switch (xform->csum_type) { 697 case RTE_COMP_CHECKSUM_CRC32: 698 op->output_chksum = (uint64_t)rte_be_to_cpu_32 699 (opaq[idx].data[crc32_idx]); 700 break; 701 case RTE_COMP_CHECKSUM_ADLER32: 702 op->output_chksum = (uint64_t)rte_be_to_cpu_32 703 (opaq[idx].data[crc32_idx + 1]); 704 break; 705 case RTE_COMP_CHECKSUM_CRC32_ADLER32: 706 op->output_chksum = (uint64_t)rte_be_to_cpu_32 707 (opaq[idx].data[crc32_idx]) | 708 ((uint64_t)rte_be_to_cpu_32 709 (opaq[idx].data[crc32_idx + 1]) << 32); 710 break; 711 case RTE_COMP_CHECKSUM_XXHASH32: 712 op->output_chksum = (uint64_t)rte_be_to_cpu_32 713 (opaq[idx].v2.xxh32); 714 break; 715 default: 716 break; 717 } 718 } 719 ops[i++] = op; 720 qp->ci++; 721 } while (i < max); 722 if (likely(i != 0)) { 723 rte_io_wmb(); 724 qp->cq.db_rec[0] = rte_cpu_to_be_32(qp->ci); 725 qp->stats.dequeued_count += i; 726 } 727 return i; 728 } 729 730 static int 731 mlx5_compress_args_check_handler(const char *key, const char *val, void *opaque) 732 { 733 struct mlx5_compress_devarg_params *devarg_prms = opaque; 734 735 if (strcmp(key, "log-block-size") == 0) { 736 errno = 0; 737 devarg_prms->log_block_sz = (uint32_t)strtoul(val, NULL, 10); 738 if (errno) { 739 DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", 740 key, val); 741 return -errno; 742 } 743 } 744 return 0; 745 } 746 747 static int 748 mlx5_compress_handle_devargs(struct mlx5_kvargs_ctrl *mkvlist, 749 struct mlx5_compress_devarg_params *devarg_prms, 750 struct mlx5_hca_attr *att) 751 { 752 const char **params = (const char *[]){ 753 "log-block-size", 754 NULL, 755 }; 756 757 devarg_prms->log_block_sz = MLX5_GGA_COMP_LOG_BLOCK_SIZE_MAX; 758 if (mkvlist == NULL) 759 return 0; 760 if (mlx5_kvargs_process(mkvlist, params, 761 mlx5_compress_args_check_handler, 762 devarg_prms) != 0) { 763 DRV_LOG(ERR, "Devargs handler function Failed."); 764 rte_errno = EINVAL; 765 return -1; 766 } 767 if (devarg_prms->log_block_sz > MLX5_GGA_COMP_LOG_BLOCK_SIZE_MAX || 768 devarg_prms->log_block_sz < att->compress_min_block_size) { 769 DRV_LOG(WARNING, "Log block size provided is out of range(" 770 "%u); default it to %u.", 771 devarg_prms->log_block_sz, 772 MLX5_GGA_COMP_LOG_BLOCK_SIZE_MAX); 773 devarg_prms->log_block_sz = MLX5_GGA_COMP_LOG_BLOCK_SIZE_MAX; 774 } 775 return 0; 776 } 777 778 static void 779 mlx5_compress_fill_caps(struct mlx5_compress_priv *priv, 780 const struct mlx5_hca_attr *attr) 781 { 782 struct rte_compressdev_capabilities caps[] = { 783 { 784 .algo = RTE_COMP_ALGO_NULL, 785 .comp_feature_flags = RTE_COMP_FF_ADLER32_CHECKSUM | 786 RTE_COMP_FF_CRC32_CHECKSUM | 787 RTE_COMP_FF_CRC32_ADLER32_CHECKSUM | 788 RTE_COMP_FF_SHAREABLE_PRIV_XFORM, 789 }, 790 { 791 .algo = RTE_COMP_ALGO_DEFLATE, 792 .comp_feature_flags = RTE_COMP_FF_ADLER32_CHECKSUM | 793 RTE_COMP_FF_CRC32_CHECKSUM | 794 RTE_COMP_FF_CRC32_ADLER32_CHECKSUM | 795 RTE_COMP_FF_SHAREABLE_PRIV_XFORM | 796 RTE_COMP_FF_HUFFMAN_FIXED | 797 RTE_COMP_FF_HUFFMAN_DYNAMIC, 798 .window_size = {.min = 10, .max = 15, .increment = 1}, 799 }, 800 { 801 .algo = RTE_COMP_ALGO_LZ4, 802 .comp_feature_flags = RTE_COMP_FF_XXHASH32_CHECKSUM | 803 RTE_COMP_FF_SHAREABLE_PRIV_XFORM | 804 RTE_COMP_FF_LZ4_BLOCK_INDEPENDENCE, 805 .window_size = {.min = 1, .max = 15, .increment = 1}, 806 }, 807 RTE_COMP_END_OF_CAPABILITIES_LIST() 808 }; 809 priv->caps[0] = caps[0]; 810 priv->caps[1] = caps[1]; 811 if (attr->decomp_lz4_checksum_en || attr->decomp_lz4_no_checksum_en) { 812 priv->caps[2] = caps[2]; 813 if (attr->decomp_lz4_checksum_en) 814 priv->caps[2].comp_feature_flags |= 815 RTE_COMP_FF_LZ4_BLOCK_WITH_CHECKSUM; 816 priv->caps[3] = caps[3]; 817 } else 818 priv->caps[2] = caps[3]; 819 } 820 821 static int 822 mlx5_compress_dev_probe(struct mlx5_common_device *cdev, 823 struct mlx5_kvargs_ctrl *mkvlist) 824 { 825 struct rte_compressdev *compressdev; 826 struct mlx5_compress_priv *priv; 827 struct mlx5_hca_attr *attr = &cdev->config.hca_attr; 828 struct mlx5_compress_devarg_params devarg_prms = {0}; 829 struct rte_compressdev_pmd_init_params init_params = { 830 .name = "", 831 .socket_id = cdev->dev->numa_node, 832 }; 833 const char *ibdev_name = mlx5_os_get_ctx_device_name(cdev->ctx); 834 uint32_t crc32_opaq_offset; 835 836 if (rte_eal_process_type() != RTE_PROC_PRIMARY) { 837 DRV_LOG(ERR, "Non-primary process type is not supported."); 838 rte_errno = ENOTSUP; 839 return -rte_errno; 840 } 841 if (!attr->decomp_lz4_checksum_en && !attr->decomp_lz4_no_checksum_en && 842 !attr->decomp_deflate_v1_en && !attr->decomp_deflate_v2_en && 843 !attr->mmo_decompress_sq_en && !attr->mmo_compress_qp_en && 844 !attr->mmo_compress_sq_en && !attr->mmo_dma_qp_en && 845 !attr->mmo_dma_sq_en) { 846 DRV_LOG(ERR, "Not enough capabilities to support compress operations, maybe old FW/OFED version?"); 847 rte_errno = ENOTSUP; 848 return -ENOTSUP; 849 } 850 mlx5_compress_handle_devargs(mkvlist, &devarg_prms, attr); 851 compressdev = rte_compressdev_pmd_create(ibdev_name, cdev->dev, 852 sizeof(*priv), &init_params); 853 if (compressdev == NULL) { 854 DRV_LOG(ERR, "Failed to create device \"%s\".", ibdev_name); 855 return -ENODEV; 856 } 857 DRV_LOG(INFO, 858 "Compress device %s was created successfully.", ibdev_name); 859 compressdev->dev_ops = &mlx5_compress_ops; 860 compressdev->dequeue_burst = mlx5_compress_dequeue_burst; 861 compressdev->enqueue_burst = mlx5_compress_enqueue_burst; 862 compressdev->feature_flags = RTE_COMPDEV_FF_HW_ACCELERATED; 863 priv = compressdev->data->dev_private; 864 priv->log_block_sz = devarg_prms.log_block_sz; 865 if (attr->decomp_deflate_v2_en || attr->decomp_lz4_checksum_en || 866 attr->decomp_lz4_no_checksum_en) 867 crc32_opaq_offset = offsetof(union mlx5_gga_compress_opaque, 868 v2.crc32); 869 else 870 crc32_opaq_offset = offsetof(union mlx5_gga_compress_opaque, 871 v1.crc32); 872 MLX5_ASSERT((crc32_opaq_offset % 4) == 0); 873 priv->crc32_opaq_offs = crc32_opaq_offset / 4; 874 priv->cdev = cdev; 875 priv->compressdev = compressdev; 876 mlx5_compress_fill_caps(priv, attr); 877 if (mlx5_devx_uar_prepare(cdev, &priv->uar) != 0) { 878 rte_compressdev_pmd_destroy(priv->compressdev); 879 return -1; 880 } 881 pthread_mutex_lock(&priv_list_lock); 882 TAILQ_INSERT_TAIL(&mlx5_compress_priv_list, priv, next); 883 pthread_mutex_unlock(&priv_list_lock); 884 return 0; 885 } 886 887 static int 888 mlx5_compress_dev_remove(struct mlx5_common_device *cdev) 889 { 890 struct mlx5_compress_priv *priv = NULL; 891 892 pthread_mutex_lock(&priv_list_lock); 893 TAILQ_FOREACH(priv, &mlx5_compress_priv_list, next) 894 if (priv->compressdev->device == cdev->dev) 895 break; 896 if (priv) 897 TAILQ_REMOVE(&mlx5_compress_priv_list, priv, next); 898 pthread_mutex_unlock(&priv_list_lock); 899 if (priv) { 900 mlx5_devx_uar_release(&priv->uar); 901 rte_compressdev_pmd_destroy(priv->compressdev); 902 } 903 return 0; 904 } 905 906 static const struct rte_pci_id mlx5_compress_pci_id_map[] = { 907 { 908 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 909 PCI_DEVICE_ID_MELLANOX_BLUEFIELD2) 910 }, 911 { 912 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 913 PCI_DEVICE_ID_MELLANOX_BLUEFIELD3) 914 }, 915 { 916 RTE_PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, 917 PCI_DEVICE_ID_MELLANOX_CONNECTXVF) 918 }, 919 { 920 .vendor_id = 0 921 } 922 }; 923 924 static struct mlx5_class_driver mlx5_compress_driver = { 925 .drv_class = MLX5_CLASS_COMPRESS, 926 .name = RTE_STR(MLX5_COMPRESS_DRIVER_NAME), 927 .id_table = mlx5_compress_pci_id_map, 928 .probe = mlx5_compress_dev_probe, 929 .remove = mlx5_compress_dev_remove, 930 }; 931 932 RTE_INIT(rte_mlx5_compress_init) 933 { 934 mlx5_common_init(); 935 if (mlx5_glue != NULL) 936 mlx5_class_driver_register(&mlx5_compress_driver); 937 } 938 939 RTE_LOG_REGISTER_DEFAULT(mlx5_compress_logtype, NOTICE) 940 RTE_PMD_EXPORT_NAME(MLX5_COMPRESS_DRIVER_NAME, __COUNTER__); 941 RTE_PMD_REGISTER_PCI_TABLE(MLX5_COMPRESS_DRIVER_NAME, mlx5_compress_pci_id_map); 942 RTE_PMD_REGISTER_KMOD_DEP(MLX5_COMPRESS_DRIVER_NAME, "* ib_uverbs & mlx5_core & mlx5_ib"); 943