1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 5 #include <unistd.h> 6 #include <strings.h> 7 #include <stdint.h> 8 #include <sys/mman.h> 9 10 #include <rte_malloc.h> 11 #include <rte_log.h> 12 #include <rte_errno.h> 13 #include <bus_pci_driver.h> 14 #include <rte_pci.h> 15 #include <rte_regexdev_driver.h> 16 #include <rte_mbuf.h> 17 18 #include <infiniband/mlx5dv.h> 19 #include <mlx5_glue.h> 20 #include <mlx5_common.h> 21 #include <mlx5_prm.h> 22 23 #include "mlx5_regex_utils.h" 24 #include "mlx5_rxp.h" 25 #include "mlx5_regex.h" 26 27 #define MLX5_REGEX_MAX_WQE_INDEX 0xffff 28 #define MLX5_REGEX_METADATA_SIZE ((size_t)64) 29 #define MLX5_REGEX_MAX_OUTPUT (((size_t)1) << 11) 30 #define MLX5_REGEX_WQE_CTRL_OFFSET 12 31 #define MLX5_REGEX_WQE_METADATA_OFFSET 16 32 #define MLX5_REGEX_WQE_GATHER_OFFSET 32 33 #define MLX5_REGEX_WQE_SCATTER_OFFSET 48 34 #define MLX5_REGEX_METADATA_OFF 32 35 #define MLX5_REGEX_UMR_WQE_SIZE 192 36 /* The maximum KLMs can be added to one UMR indirect mkey. */ 37 #define MLX5_REGEX_MAX_KLM_NUM 128 38 /* The KLM array size for one job. */ 39 #define MLX5_REGEX_KLMS_SIZE \ 40 ((MLX5_REGEX_MAX_KLM_NUM) * sizeof(struct mlx5_klm)) 41 /* In WQE set mode, the pi should be quarter of the MLX5_REGEX_MAX_WQE_INDEX. */ 42 #define MLX5_REGEX_UMR_QP_PI_IDX(pi, ops) \ 43 (((pi) + (ops)) & (MLX5_REGEX_MAX_WQE_INDEX >> 2)) 44 #ifdef RTE_LIBRTE_MLX5_DEBUG 45 #define MLX5_REGEX_DEBUG 0 46 #endif 47 #ifdef HAVE_MLX5_UMR_IMKEY 48 static uint16_t max_nb_segs = MLX5_REGEX_MAX_KLM_NUM; 49 #else 50 static uint16_t max_nb_segs = 1; 51 #endif 52 53 uint16_t 54 mlx5_regexdev_max_segs_get(void) 55 { 56 return max_nb_segs; 57 } 58 59 #ifdef MLX5_REGEX_DEBUG 60 static inline uint16_t 61 validate_ops(struct rte_regex_ops **ops, uint16_t nb_ops) 62 { 63 uint16_t nb_left = nb_ops; 64 struct rte_mbuf *mbuf; 65 66 while (nb_left--) { 67 mbuf = ops[nb_left]->mbuf; 68 if ((mbuf->pkt_len > MLX5_RXP_MAX_JOB_LENGTH) || 69 (mbuf->nb_segs > max_nb_segs)) { 70 DRV_LOG(ERR, "Failed to validate regex ops"); 71 return 1; 72 } 73 } 74 return 0; 75 } 76 #endif 77 78 static inline uint32_t 79 qp_size_get(struct mlx5_regex_hw_qp *qp) 80 { 81 return (1U << qp->log_nb_desc); 82 } 83 84 static inline uint32_t 85 cq_size_get(struct mlx5_regex_cq *cq) 86 { 87 return (1U << cq->log_nb_desc); 88 } 89 90 struct mlx5_regex_job { 91 uint64_t user_id; 92 volatile uint8_t *output; 93 volatile uint8_t *metadata; 94 struct mlx5_klm *imkey_array; /* Indirect mkey's KLM array. */ 95 struct mlx5_devx_obj *imkey; /* UMR WQE's indirect meky. */ 96 } __rte_cached_aligned; 97 98 static inline void 99 set_data_seg(struct mlx5_wqe_data_seg *seg, 100 uint32_t length, uint32_t lkey, 101 uintptr_t address) 102 { 103 seg->byte_count = rte_cpu_to_be_32(length); 104 seg->lkey = rte_cpu_to_be_32(lkey); 105 seg->addr = rte_cpu_to_be_64(address); 106 } 107 108 static inline void 109 set_metadata_seg(struct mlx5_wqe_metadata_seg *seg, 110 uint32_t mmo_control_31_0, uint32_t lkey, 111 uintptr_t address) 112 { 113 seg->mmo_control_31_0 = htobe32(mmo_control_31_0); 114 seg->lkey = rte_cpu_to_be_32(lkey); 115 seg->addr = rte_cpu_to_be_64(address); 116 } 117 118 static inline void 119 set_regex_ctrl_seg(void *seg, uint8_t le, uint16_t subset_id0, 120 uint16_t subset_id1, uint16_t subset_id2, 121 uint16_t subset_id3, uint8_t ctrl) 122 { 123 MLX5_SET(regexp_mmo_control, seg, le, le); 124 MLX5_SET(regexp_mmo_control, seg, ctrl, ctrl); 125 MLX5_SET(regexp_mmo_control, seg, subset_id_0, subset_id0); 126 MLX5_SET(regexp_mmo_control, seg, subset_id_1, subset_id1); 127 MLX5_SET(regexp_mmo_control, seg, subset_id_2, subset_id2); 128 MLX5_SET(regexp_mmo_control, seg, subset_id_3, subset_id3); 129 } 130 131 static inline void 132 set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, uint8_t opcode, 133 uint8_t opmod, uint32_t qp_num, uint8_t fm_ce_se, uint8_t ds, 134 uint8_t signature, uint32_t imm) 135 { 136 seg->opmod_idx_opcode = rte_cpu_to_be_32(((uint32_t)opmod << 24) | 137 ((uint32_t)pi << 8) | 138 opcode); 139 seg->qpn_ds = rte_cpu_to_be_32((qp_num << 8) | ds); 140 seg->fm_ce_se = fm_ce_se; 141 seg->signature = signature; 142 seg->imm = imm; 143 } 144 145 static inline void 146 __prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_hw_qp *qp_obj, 147 struct rte_regex_ops *op, struct mlx5_regex_job *job, 148 size_t pi, struct mlx5_klm *klm) 149 { 150 size_t wqe_offset = (pi & (qp_size_get(qp_obj) - 1)) * 151 (MLX5_SEND_WQE_BB << (priv->has_umr ? 2 : 0)) + 152 (priv->has_umr ? MLX5_REGEX_UMR_WQE_SIZE : 0); 153 uint16_t group0 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID0_VALID_F ? 154 op->group_id0 : 0; 155 uint16_t group1 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID1_VALID_F ? 156 op->group_id1 : 0; 157 uint16_t group2 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID2_VALID_F ? 158 op->group_id2 : 0; 159 uint16_t group3 = op->req_flags & RTE_REGEX_OPS_REQ_GROUP_ID3_VALID_F ? 160 op->group_id3 : 0; 161 uint8_t control = 0x0; 162 163 if (op->req_flags & RTE_REGEX_OPS_REQ_MATCH_HIGH_PRIORITY_F) 164 control = 0x1; 165 else if (op->req_flags & RTE_REGEX_OPS_REQ_STOP_ON_MATCH_F) 166 control = 0x2; 167 168 /* For backward compatibility. */ 169 if (!(op->req_flags & (RTE_REGEX_OPS_REQ_GROUP_ID0_VALID_F | 170 RTE_REGEX_OPS_REQ_GROUP_ID1_VALID_F | 171 RTE_REGEX_OPS_REQ_GROUP_ID2_VALID_F | 172 RTE_REGEX_OPS_REQ_GROUP_ID3_VALID_F))) 173 group0 = op->group_id0; 174 uint8_t *wqe = (uint8_t *)(uintptr_t)qp_obj->qp_obj.wqes + wqe_offset; 175 int ds = 4; /* ctrl + meta + input + output */ 176 177 set_wqe_ctrl_seg((struct mlx5_wqe_ctrl_seg *)wqe, 178 (priv->has_umr ? (pi * 4 + 3) : pi), 179 MLX5_OPCODE_MMO, MLX5_OPC_MOD_MMO_REGEX, 180 qp_obj->qp_obj.qp->id, 0, ds, 0, 0); 181 set_regex_ctrl_seg(wqe + 12, 0, group0, group1, group2, group3, 182 control); 183 struct mlx5_wqe_data_seg *input_seg = 184 (struct mlx5_wqe_data_seg *)(wqe + 185 MLX5_REGEX_WQE_GATHER_OFFSET); 186 input_seg->byte_count = rte_cpu_to_be_32(klm->byte_count); 187 input_seg->addr = rte_cpu_to_be_64(klm->address); 188 input_seg->lkey = klm->mkey; 189 job->user_id = op->user_id; 190 } 191 192 static inline void 193 prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp, 194 struct mlx5_regex_hw_qp *qp_obj, struct rte_regex_ops *op, 195 struct mlx5_regex_job *job) 196 { 197 struct mlx5_klm klm; 198 199 klm.byte_count = rte_pktmbuf_data_len(op->mbuf); 200 klm.mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, op->mbuf); 201 klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t); 202 __prep_one(priv, qp_obj, op, job, qp_obj->pi, &klm); 203 qp_obj->db_pi = qp_obj->pi; 204 qp_obj->pi = (qp_obj->pi + 1) & MLX5_REGEX_MAX_WQE_INDEX; 205 } 206 207 static inline void 208 send_doorbell(struct mlx5_regex_priv *priv, struct mlx5_regex_hw_qp *qp) 209 { 210 size_t wqe_offset = (qp->db_pi & (qp_size_get(qp) - 1)) * 211 (MLX5_SEND_WQE_BB << (priv->has_umr ? 2 : 0)) + 212 (priv->has_umr ? MLX5_REGEX_UMR_WQE_SIZE : 0); 213 uint8_t *wqe = (uint8_t *)(uintptr_t)qp->qp_obj.wqes + wqe_offset; 214 uint32_t actual_pi = (priv->has_umr ? ((1 + qp->db_pi) * 4) : qp->db_pi) 215 & MLX5_REGEX_MAX_WQE_INDEX; 216 217 /* Or the fm_ce_se instead of set, avoid the fence be cleared. */ 218 ((struct mlx5_wqe_ctrl_seg *)wqe)->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE; 219 mlx5_doorbell_ring(&priv->uar.bf_db, *(volatile uint64_t *)wqe, 220 actual_pi, &qp->qp_obj.db_rec[MLX5_SND_DBR], 221 !priv->uar.dbnc); 222 } 223 224 static inline int 225 get_free(struct mlx5_regex_hw_qp *qp, uint8_t has_umr) { 226 return (qp_size_get(qp) - ((qp->pi - qp->ci) & 227 (has_umr ? (MLX5_REGEX_MAX_WQE_INDEX >> 2) : 228 MLX5_REGEX_MAX_WQE_INDEX))); 229 } 230 231 static inline uint32_t 232 job_id_get(uint32_t qid, size_t qp_size, size_t index) { 233 return qid * qp_size + (index & (qp_size - 1)); 234 } 235 236 #ifdef HAVE_MLX5_UMR_IMKEY 237 static inline int 238 mkey_klm_available(struct mlx5_klm *klm, uint32_t pos, uint32_t new) 239 { 240 return (klm && ((pos + new) <= MLX5_REGEX_MAX_KLM_NUM)); 241 } 242 243 static inline void 244 complete_umr_wqe(struct mlx5_regex_qp *qp, struct mlx5_regex_hw_qp *qp_obj, 245 struct mlx5_regex_job *mkey_job, 246 size_t umr_index, uint32_t klm_size, uint32_t total_len) 247 { 248 size_t wqe_offset = (umr_index & (qp_size_get(qp_obj) - 1)) * 249 (MLX5_SEND_WQE_BB * 4); 250 struct mlx5_wqe_ctrl_seg *wqe = (struct mlx5_wqe_ctrl_seg *)((uint8_t *) 251 (uintptr_t)qp_obj->qp_obj.wqes + wqe_offset); 252 struct mlx5_wqe_umr_ctrl_seg *ucseg = 253 (struct mlx5_wqe_umr_ctrl_seg *)(wqe + 1); 254 struct mlx5_wqe_mkey_context_seg *mkc = 255 (struct mlx5_wqe_mkey_context_seg *)(ucseg + 1); 256 struct mlx5_klm *iklm = (struct mlx5_klm *)(mkc + 1); 257 uint16_t klm_align = RTE_ALIGN(klm_size, 4); 258 259 memset(wqe, 0, MLX5_REGEX_UMR_WQE_SIZE); 260 /* Set WQE control seg. Non-inline KLM UMR WQE size must be 9 WQE_DS. */ 261 set_wqe_ctrl_seg(wqe, (umr_index * 4), MLX5_OPCODE_UMR, 262 0, qp_obj->qp_obj.qp->id, 0, 9, 0, 263 rte_cpu_to_be_32(mkey_job->imkey->id)); 264 /* Set UMR WQE control seg. */ 265 ucseg->mkey_mask |= rte_cpu_to_be_64(MLX5_WQE_UMR_CTRL_MKEY_MASK_LEN | 266 MLX5_WQE_UMR_CTRL_FLAG_TRNSLATION_OFFSET | 267 MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_LOCAL_WRITE); 268 ucseg->klm_octowords = rte_cpu_to_be_16(klm_align); 269 /* Set mkey context seg. */ 270 mkc->len = rte_cpu_to_be_64(total_len); 271 mkc->qpn_mkey = rte_cpu_to_be_32(0xffffff00 | 272 (mkey_job->imkey->id & 0xff)); 273 /* Set UMR pointer to data seg. */ 274 iklm->address = rte_cpu_to_be_64 275 ((uintptr_t)((char *)mkey_job->imkey_array)); 276 iklm->mkey = rte_cpu_to_be_32(qp->imkey_addr->lkey); 277 iklm->byte_count = rte_cpu_to_be_32(klm_align); 278 /* Clear the padding memory. */ 279 memset((uint8_t *)&mkey_job->imkey_array[klm_size], 0, 280 sizeof(struct mlx5_klm) * (klm_align - klm_size)); 281 282 /* Add the following RegEx WQE with fence. */ 283 wqe = (struct mlx5_wqe_ctrl_seg *) 284 (((uint8_t *)wqe) + MLX5_REGEX_UMR_WQE_SIZE); 285 wqe->fm_ce_se |= MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE; 286 } 287 288 static inline void 289 prep_nop_regex_wqe_set(struct mlx5_regex_priv *priv, 290 struct mlx5_regex_hw_qp *qp, struct rte_regex_ops *op, 291 struct mlx5_regex_job *job, size_t pi, struct mlx5_klm *klm) 292 { 293 size_t wqe_offset = (pi & (qp_size_get(qp) - 1)) * 294 (MLX5_SEND_WQE_BB << 2); 295 struct mlx5_wqe_ctrl_seg *wqe = (struct mlx5_wqe_ctrl_seg *)((uint8_t *) 296 (uintptr_t)qp->qp_obj.wqes + wqe_offset); 297 298 /* Clear the WQE memory used as UMR WQE previously. */ 299 if ((rte_be_to_cpu_32(wqe->opmod_idx_opcode) & 0xff) != MLX5_OPCODE_NOP) 300 memset(wqe, 0, MLX5_REGEX_UMR_WQE_SIZE); 301 /* UMR WQE size is 9 DS, align nop WQE to 3 WQEBBS(12 DS). */ 302 set_wqe_ctrl_seg(wqe, pi * 4, MLX5_OPCODE_NOP, 0, qp->qp_obj.qp->id, 303 0, 12, 0, 0); 304 __prep_one(priv, qp, op, job, pi, klm); 305 } 306 307 static inline void 308 prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp, 309 struct mlx5_regex_hw_qp *qp_obj, struct rte_regex_ops **op, 310 size_t nb_ops) 311 { 312 struct mlx5_regex_job *job = NULL; 313 size_t hw_qpid = qp_obj->qpn, mkey_job_id = 0; 314 size_t left_ops = nb_ops; 315 uint32_t klm_num = 0; 316 uint32_t len = 0; 317 struct mlx5_klm *mkey_klm = NULL; 318 struct mlx5_klm klm; 319 uintptr_t addr; 320 321 while (left_ops--) 322 rte_prefetch0(op[left_ops]); 323 left_ops = nb_ops; 324 /* 325 * Build the WQE set by reverse. In case the burst may consume 326 * multiple mkeys, build the WQE set as normal will hard to 327 * address the last mkey index, since we will only know the last 328 * RegEx WQE's index when finishes building. 329 */ 330 while (left_ops--) { 331 struct rte_mbuf *mbuf = op[left_ops]->mbuf; 332 size_t pi = MLX5_REGEX_UMR_QP_PI_IDX(qp_obj->pi, left_ops); 333 334 if (mbuf->nb_segs > 1) { 335 size_t scatter_size = 0; 336 337 if (!mkey_klm_available(mkey_klm, klm_num, 338 mbuf->nb_segs)) { 339 /* 340 * The mkey's KLM is full, create the UMR 341 * WQE in the next WQE set. 342 */ 343 if (mkey_klm) 344 complete_umr_wqe(qp, qp_obj, 345 &qp->jobs[mkey_job_id], 346 MLX5_REGEX_UMR_QP_PI_IDX(pi, 1), 347 klm_num, len); 348 /* 349 * Get the indircet mkey and KLM array index 350 * from the last WQE set. 351 */ 352 mkey_job_id = job_id_get(hw_qpid, 353 qp_size_get(qp_obj), pi); 354 mkey_klm = qp->jobs[mkey_job_id].imkey_array; 355 klm_num = 0; 356 len = 0; 357 } 358 /* Build RegEx WQE's data segment KLM. */ 359 klm.address = len; 360 klm.mkey = rte_cpu_to_be_32 361 (qp->jobs[mkey_job_id].imkey->id); 362 while (mbuf) { 363 addr = rte_pktmbuf_mtod(mbuf, uintptr_t); 364 /* Build indirect mkey seg's KLM. */ 365 mkey_klm->mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, 366 mbuf); 367 mkey_klm->address = rte_cpu_to_be_64(addr); 368 mkey_klm->byte_count = rte_cpu_to_be_32 369 (rte_pktmbuf_data_len(mbuf)); 370 /* 371 * Save the mbuf's total size for RegEx data 372 * segment. 373 */ 374 scatter_size += rte_pktmbuf_data_len(mbuf); 375 mkey_klm++; 376 klm_num++; 377 mbuf = mbuf->next; 378 } 379 len += scatter_size; 380 klm.byte_count = scatter_size; 381 } else { 382 /* The single mubf case. Build the KLM directly. */ 383 klm.mkey = mlx5_mr_mb2mr(&qp->mr_ctrl, mbuf); 384 klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t); 385 klm.byte_count = rte_pktmbuf_data_len(mbuf); 386 } 387 job = &qp->jobs[job_id_get(hw_qpid, qp_size_get(qp_obj), pi)]; 388 /* 389 * Build the nop + RegEx WQE set by default. The fist nop WQE 390 * will be updated later as UMR WQE if scattered mubf exist. 391 */ 392 prep_nop_regex_wqe_set(priv, qp_obj, op[left_ops], job, pi, 393 &klm); 394 } 395 /* 396 * Scattered mbuf have been added to the KLM array. Complete the build 397 * of UMR WQE, update the first nop WQE as UMR WQE. 398 */ 399 if (mkey_klm) 400 complete_umr_wqe(qp, qp_obj, &qp->jobs[mkey_job_id], qp_obj->pi, 401 klm_num, len); 402 qp_obj->db_pi = MLX5_REGEX_UMR_QP_PI_IDX(qp_obj->pi, nb_ops - 1); 403 qp_obj->pi = MLX5_REGEX_UMR_QP_PI_IDX(qp_obj->pi, nb_ops); 404 } 405 406 uint16_t 407 mlx5_regexdev_enqueue_gga(struct rte_regexdev *dev, uint16_t qp_id, 408 struct rte_regex_ops **ops, uint16_t nb_ops) 409 { 410 struct mlx5_regex_priv *priv = dev->data->dev_private; 411 struct mlx5_regex_qp *queue = &priv->qps[qp_id]; 412 struct mlx5_regex_hw_qp *qp_obj; 413 size_t hw_qpid, nb_left = nb_ops, nb_desc; 414 415 #ifdef MLX5_REGEX_DEBUG 416 if (validate_ops(ops, nb_ops)) 417 return 0; 418 #endif 419 420 while ((hw_qpid = ffsll(queue->free_qps))) { 421 hw_qpid--; /* ffs returns 1 for bit 0 */ 422 qp_obj = &queue->qps[hw_qpid]; 423 nb_desc = get_free(qp_obj, priv->has_umr); 424 if (nb_desc) { 425 /* The ops be handled can't exceed nb_ops. */ 426 if (nb_desc > nb_left) 427 nb_desc = nb_left; 428 else 429 queue->free_qps &= ~(1ULL << hw_qpid); 430 prep_regex_umr_wqe_set(priv, queue, qp_obj, ops, 431 nb_desc); 432 send_doorbell(priv, qp_obj); 433 nb_left -= nb_desc; 434 } 435 if (!nb_left) 436 break; 437 ops += nb_desc; 438 } 439 nb_ops -= nb_left; 440 queue->pi += nb_ops; 441 return nb_ops; 442 } 443 #endif 444 445 uint16_t 446 mlx5_regexdev_enqueue(struct rte_regexdev *dev, uint16_t qp_id, 447 struct rte_regex_ops **ops, uint16_t nb_ops) 448 { 449 struct mlx5_regex_priv *priv = dev->data->dev_private; 450 struct mlx5_regex_qp *queue = &priv->qps[qp_id]; 451 struct mlx5_regex_hw_qp *qp_obj; 452 size_t hw_qpid, job_id, i = 0; 453 454 #ifdef MLX5_REGEX_DEBUG 455 if (validate_ops(ops, nb_ops)) 456 return 0; 457 #endif 458 459 while ((hw_qpid = ffsll(queue->free_qps))) { 460 hw_qpid--; /* ffs returns 1 for bit 0 */ 461 qp_obj = &queue->qps[hw_qpid]; 462 while (get_free(qp_obj, priv->has_umr)) { 463 job_id = job_id_get(hw_qpid, qp_size_get(qp_obj), 464 qp_obj->pi); 465 prep_one(priv, queue, qp_obj, ops[i], 466 &queue->jobs[job_id]); 467 i++; 468 if (unlikely(i == nb_ops)) { 469 send_doorbell(priv, qp_obj); 470 goto out; 471 } 472 } 473 queue->free_qps &= ~(1ULL << hw_qpid); 474 send_doorbell(priv, qp_obj); 475 } 476 477 out: 478 queue->pi += i; 479 return i; 480 } 481 482 #define MLX5_REGEX_RESP_SZ 8 483 484 static inline void 485 extract_result(struct rte_regex_ops *op, struct mlx5_regex_job *job) 486 { 487 size_t j; 488 size_t offset; 489 uint16_t status; 490 491 op->user_id = job->user_id; 492 op->nb_matches = MLX5_GET_VOLATILE(regexp_metadata, job->metadata + 493 MLX5_REGEX_METADATA_OFF, 494 match_count); 495 op->nb_actual_matches = MLX5_GET_VOLATILE(regexp_metadata, 496 job->metadata + 497 MLX5_REGEX_METADATA_OFF, 498 detected_match_count); 499 for (j = 0; j < op->nb_matches; j++) { 500 offset = MLX5_REGEX_RESP_SZ * j; 501 op->matches[j].rule_id = 502 MLX5_GET_VOLATILE(regexp_match_tuple, 503 (job->output + offset), rule_id); 504 op->matches[j].start_offset = 505 MLX5_GET_VOLATILE(regexp_match_tuple, 506 (job->output + offset), start_ptr); 507 op->matches[j].len = 508 MLX5_GET_VOLATILE(regexp_match_tuple, 509 (job->output + offset), length); 510 } 511 status = MLX5_GET_VOLATILE(regexp_metadata, job->metadata + 512 MLX5_REGEX_METADATA_OFF, 513 status); 514 op->rsp_flags = 0; 515 if (status & MLX5_RXP_RESP_STATUS_PMI_SOJ) 516 op->rsp_flags |= RTE_REGEX_OPS_RSP_PMI_SOJ_F; 517 if (status & MLX5_RXP_RESP_STATUS_PMI_EOJ) 518 op->rsp_flags |= RTE_REGEX_OPS_RSP_PMI_EOJ_F; 519 if (status & MLX5_RXP_RESP_STATUS_MAX_LATENCY) 520 op->rsp_flags |= RTE_REGEX_OPS_RSP_MAX_SCAN_TIMEOUT_F; 521 if (status & MLX5_RXP_RESP_STATUS_MAX_MATCH) 522 op->rsp_flags |= RTE_REGEX_OPS_RSP_MAX_MATCH_F; 523 if (status & MLX5_RXP_RESP_STATUS_MAX_PREFIX) 524 op->rsp_flags |= RTE_REGEX_OPS_RSP_MAX_PREFIX_F; 525 if (status & MLX5_RXP_RESP_STATUS_MAX_PRI_THREADS) 526 op->rsp_flags |= RTE_REGEX_OPS_RSP_RESOURCE_LIMIT_REACHED_F; 527 if (status & MLX5_RXP_RESP_STATUS_MAX_SEC_THREADS) 528 op->rsp_flags |= RTE_REGEX_OPS_RSP_RESOURCE_LIMIT_REACHED_F; 529 } 530 531 static inline volatile struct mlx5_cqe * 532 poll_one(struct mlx5_regex_cq *cq) 533 { 534 volatile struct mlx5_cqe *cqe; 535 size_t next_cqe_offset; 536 537 next_cqe_offset = (cq->ci & (cq_size_get(cq) - 1)); 538 cqe = (volatile struct mlx5_cqe *)(cq->cq_obj.cqes + next_cqe_offset); 539 rte_io_wmb(); 540 541 int ret = check_cqe(cqe, cq_size_get(cq), cq->ci); 542 543 if (unlikely(ret == MLX5_CQE_STATUS_ERR)) { 544 DRV_LOG(ERR, "Completion with error on qp 0x%x", 0); 545 return NULL; 546 } 547 548 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) 549 return NULL; 550 551 return cqe; 552 } 553 554 555 /** 556 * DPDK callback for dequeue. 557 * 558 * @param dev 559 * Pointer to the regex dev structure. 560 * @param qp_id 561 * The queue to enqueue the traffic to. 562 * @param ops 563 * List of regex ops to dequeue. 564 * @param nb_ops 565 * Number of ops in ops parameter. 566 * 567 * @return 568 * Number of packets successfully dequeued (<= pkts_n). 569 */ 570 uint16_t 571 mlx5_regexdev_dequeue(struct rte_regexdev *dev, uint16_t qp_id, 572 struct rte_regex_ops **ops, uint16_t nb_ops) 573 { 574 struct mlx5_regex_priv *priv = dev->data->dev_private; 575 struct mlx5_regex_qp *queue = &priv->qps[qp_id]; 576 struct mlx5_regex_cq *cq = &queue->cq; 577 volatile struct mlx5_cqe *cqe; 578 size_t i = 0; 579 580 while ((cqe = poll_one(cq))) { 581 uint16_t wq_counter 582 = (rte_be_to_cpu_16(cqe->wqe_counter) + 1) & 583 MLX5_REGEX_MAX_WQE_INDEX; 584 size_t hw_qpid = cqe->user_index_bytes[2]; 585 struct mlx5_regex_hw_qp *qp_obj = &queue->qps[hw_qpid]; 586 587 /* UMR mode WQE counter move as WQE set(4 WQEBBS).*/ 588 if (priv->has_umr) 589 wq_counter >>= 2; 590 while (qp_obj->ci != wq_counter) { 591 if (unlikely(i == nb_ops)) { 592 /* Return without updating cq->ci */ 593 goto out; 594 } 595 uint32_t job_id = job_id_get(hw_qpid, 596 qp_size_get(qp_obj), qp_obj->ci); 597 extract_result(ops[i], &queue->jobs[job_id]); 598 qp_obj->ci = (qp_obj->ci + 1) & (priv->has_umr ? 599 (MLX5_REGEX_MAX_WQE_INDEX >> 2) : 600 MLX5_REGEX_MAX_WQE_INDEX); 601 i++; 602 } 603 cq->ci = (cq->ci + 1) & 0xffffff; 604 rte_wmb(); 605 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->ci); 606 queue->free_qps |= (1ULL << hw_qpid); 607 } 608 609 out: 610 queue->ci += i; 611 return i; 612 } 613 614 static void 615 setup_qps(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *queue) 616 { 617 size_t hw_qpid, entry; 618 uint32_t job_id; 619 for (hw_qpid = 0; hw_qpid < queue->nb_obj; hw_qpid++) { 620 struct mlx5_regex_hw_qp *qp_obj = &queue->qps[hw_qpid]; 621 uint8_t *wqe = (uint8_t *)(uintptr_t)qp_obj->qp_obj.wqes; 622 for (entry = 0 ; entry < qp_size_get(qp_obj); entry++) { 623 job_id = hw_qpid * qp_size_get(qp_obj) + entry; 624 struct mlx5_regex_job *job = &queue->jobs[job_id]; 625 626 /* Fill UMR WQE with NOP in advanced. */ 627 if (priv->has_umr) { 628 set_wqe_ctrl_seg 629 ((struct mlx5_wqe_ctrl_seg *)wqe, 630 entry * 2, MLX5_OPCODE_NOP, 0, 631 qp_obj->qp_obj.qp->id, 0, 12, 0, 0); 632 wqe += MLX5_REGEX_UMR_WQE_SIZE; 633 } 634 set_metadata_seg((struct mlx5_wqe_metadata_seg *) 635 (wqe + MLX5_REGEX_WQE_METADATA_OFFSET), 636 0, queue->metadata->lkey, 637 (uintptr_t)job->metadata); 638 set_data_seg((struct mlx5_wqe_data_seg *) 639 (wqe + MLX5_REGEX_WQE_SCATTER_OFFSET), 640 MLX5_REGEX_MAX_OUTPUT, 641 queue->outputs->lkey, 642 (uintptr_t)job->output); 643 wqe += 64; 644 } 645 queue->free_qps |= 1ULL << hw_qpid; 646 } 647 } 648 649 static int 650 setup_buffers(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp) 651 { 652 struct ibv_pd *pd = priv->cdev->pd; 653 uint32_t i; 654 int err; 655 656 void *ptr = rte_calloc(__func__, qp->nb_desc, 657 MLX5_REGEX_METADATA_SIZE, 658 MLX5_REGEX_METADATA_SIZE); 659 if (!ptr) 660 return -ENOMEM; 661 662 qp->metadata = mlx5_glue->reg_mr(pd, ptr, 663 MLX5_REGEX_METADATA_SIZE * qp->nb_desc, 664 IBV_ACCESS_LOCAL_WRITE); 665 if (!qp->metadata) { 666 DRV_LOG(ERR, "Failed to register metadata"); 667 rte_free(ptr); 668 return -EINVAL; 669 } 670 671 ptr = rte_calloc(__func__, qp->nb_desc, 672 MLX5_REGEX_MAX_OUTPUT, 673 MLX5_REGEX_MAX_OUTPUT); 674 if (!ptr) { 675 err = -ENOMEM; 676 goto err_output; 677 } 678 qp->outputs = mlx5_glue->reg_mr(pd, ptr, 679 MLX5_REGEX_MAX_OUTPUT * qp->nb_desc, 680 IBV_ACCESS_LOCAL_WRITE); 681 if (!qp->outputs) { 682 rte_free(ptr); 683 DRV_LOG(ERR, "Failed to register output"); 684 err = -EINVAL; 685 goto err_output; 686 } 687 688 if (priv->has_umr) { 689 ptr = rte_calloc(__func__, qp->nb_desc, MLX5_REGEX_KLMS_SIZE, 690 MLX5_REGEX_KLMS_SIZE); 691 if (!ptr) { 692 err = -ENOMEM; 693 goto err_imkey; 694 } 695 qp->imkey_addr = mlx5_glue->reg_mr(pd, ptr, 696 MLX5_REGEX_KLMS_SIZE * qp->nb_desc, 697 IBV_ACCESS_LOCAL_WRITE); 698 if (!qp->imkey_addr) { 699 rte_free(ptr); 700 DRV_LOG(ERR, "Failed to register output"); 701 err = -EINVAL; 702 goto err_imkey; 703 } 704 } 705 706 /* distribute buffers to jobs */ 707 for (i = 0; i < qp->nb_desc; i++) { 708 qp->jobs[i].output = 709 (uint8_t *)qp->outputs->addr + 710 (i % qp->nb_desc) * MLX5_REGEX_MAX_OUTPUT; 711 qp->jobs[i].metadata = 712 (uint8_t *)qp->metadata->addr + 713 (i % qp->nb_desc) * MLX5_REGEX_METADATA_SIZE; 714 if (qp->imkey_addr) 715 qp->jobs[i].imkey_array = (struct mlx5_klm *) 716 qp->imkey_addr->addr + 717 (i % qp->nb_desc) * MLX5_REGEX_MAX_KLM_NUM; 718 } 719 720 return 0; 721 722 err_imkey: 723 ptr = qp->outputs->addr; 724 rte_free(ptr); 725 mlx5_glue->dereg_mr(qp->outputs); 726 err_output: 727 ptr = qp->metadata->addr; 728 rte_free(ptr); 729 mlx5_glue->dereg_mr(qp->metadata); 730 return err; 731 } 732 733 int 734 mlx5_regexdev_setup_fastpath(struct mlx5_regex_priv *priv, uint32_t qp_id) 735 { 736 struct mlx5_regex_qp *qp = &priv->qps[qp_id]; 737 struct mlx5_klm klm = { 0 }; 738 struct mlx5_devx_mkey_attr attr = { 739 .klm_array = &klm, 740 .klm_num = 1, 741 .umr_en = 1, 742 }; 743 uint32_t i; 744 int err = 0; 745 746 qp->jobs = rte_calloc(__func__, qp->nb_desc, sizeof(*qp->jobs), 64); 747 if (!qp->jobs) 748 return -ENOMEM; 749 err = setup_buffers(priv, qp); 750 if (err) { 751 rte_free(qp->jobs); 752 qp->jobs = NULL; 753 return err; 754 } 755 756 setup_qps(priv, qp); 757 758 if (priv->has_umr) { 759 #ifdef HAVE_IBV_FLOW_DV_SUPPORT 760 attr.pd = priv->cdev->pdn; 761 #endif 762 for (i = 0; i < qp->nb_desc; i++) { 763 attr.klm_num = MLX5_REGEX_MAX_KLM_NUM; 764 attr.klm_array = qp->jobs[i].imkey_array; 765 qp->jobs[i].imkey = mlx5_devx_cmd_mkey_create 766 (priv->cdev->ctx, &attr); 767 if (!qp->jobs[i].imkey) { 768 err = -rte_errno; 769 DRV_LOG(ERR, "Failed to allocate imkey."); 770 mlx5_regexdev_teardown_fastpath(priv, qp_id); 771 } 772 } 773 } 774 return err; 775 } 776 777 static void 778 free_buffers(struct mlx5_regex_qp *qp) 779 { 780 if (qp->imkey_addr) { 781 mlx5_glue->dereg_mr(qp->imkey_addr); 782 rte_free(qp->imkey_addr->addr); 783 } 784 if (qp->metadata) { 785 mlx5_glue->dereg_mr(qp->metadata); 786 rte_free(qp->metadata->addr); 787 } 788 if (qp->outputs) { 789 mlx5_glue->dereg_mr(qp->outputs); 790 rte_free(qp->outputs->addr); 791 } 792 } 793 794 void 795 mlx5_regexdev_teardown_fastpath(struct mlx5_regex_priv *priv, uint32_t qp_id) 796 { 797 struct mlx5_regex_qp *qp = &priv->qps[qp_id]; 798 uint32_t i; 799 800 if (qp->jobs) { 801 for (i = 0; i < qp->nb_desc; i++) { 802 if (qp->jobs[i].imkey) 803 claim_zero(mlx5_devx_cmd_destroy 804 (qp->jobs[i].imkey)); 805 } 806 free_buffers(qp); 807 rte_free(qp->jobs); 808 qp->jobs = NULL; 809 } 810 } 811