1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 #include <mlx5_prm.h> 5 #include <rte_malloc.h> 6 #include <rte_cycles.h> 7 #include <rte_eal_paging.h> 8 9 #include <mlx5_malloc.h> 10 #include <mlx5_common_os.h> 11 #include <mlx5_common_devx.h> 12 13 #include "mlx5.h" 14 #include "mlx5_flow.h" 15 16 /** 17 * Destroy Completion Queue used for ASO access. 18 * 19 * @param[in] cq 20 * ASO CQ to destroy. 21 */ 22 static void 23 mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq) 24 { 25 if (cq->cq_obj.cq) 26 mlx5_devx_cq_destroy(&cq->cq_obj); 27 memset(cq, 0, sizeof(*cq)); 28 } 29 30 /** 31 * Create Completion Queue used for ASO access. 32 * 33 * @param[in] ctx 34 * Context returned from mlx5 open_device() glue function. 35 * @param[in/out] cq 36 * Pointer to CQ to create. 37 * @param[in] log_desc_n 38 * Log of number of descriptors in queue. 39 * @param[in] socket 40 * Socket to use for allocation. 41 * @param[in] uar_page_id 42 * UAR page ID to use. 43 * 44 * @return 45 * 0 on success, a negative errno value otherwise and rte_errno is set. 46 */ 47 static int 48 mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n, 49 int socket, int uar_page_id) 50 { 51 struct mlx5_devx_cq_attr attr = { 52 .uar_page_id = uar_page_id, 53 }; 54 55 cq->log_desc_n = log_desc_n; 56 cq->cq_ci = 0; 57 return mlx5_devx_cq_create(ctx, &cq->cq_obj, log_desc_n, &attr, socket); 58 } 59 60 /** 61 * Free MR resources. 62 * 63 * @param[in] cdev 64 * Pointer to the mlx5 common device. 65 * @param[in] mr 66 * MR to free. 67 */ 68 static void 69 mlx5_aso_dereg_mr(struct mlx5_common_device *cdev, struct mlx5_pmd_mr *mr) 70 { 71 void *addr = mr->addr; 72 73 cdev->mr_scache.dereg_mr_cb(mr); 74 mlx5_free(addr); 75 memset(mr, 0, sizeof(*mr)); 76 } 77 78 /** 79 * Register Memory Region. 80 * 81 * @param[in] cdev 82 * Pointer to the mlx5 common device. 83 * @param[in] length 84 * Size of MR buffer. 85 * @param[in/out] mr 86 * Pointer to MR to create. 87 * @param[in] socket 88 * Socket to use for allocation. 89 * 90 * @return 91 * 0 on success, a negative errno value otherwise and rte_errno is set. 92 */ 93 static int 94 mlx5_aso_reg_mr(struct mlx5_common_device *cdev, size_t length, 95 struct mlx5_pmd_mr *mr, int socket) 96 { 97 98 int ret; 99 100 mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096, 101 socket); 102 if (!mr->addr) { 103 DRV_LOG(ERR, "Failed to create ASO bits mem for MR."); 104 return -1; 105 } 106 ret = cdev->mr_scache.reg_mr_cb(cdev->pd, mr->addr, length, mr); 107 if (ret) { 108 DRV_LOG(ERR, "Failed to create direct Mkey."); 109 mlx5_free(mr->addr); 110 return -1; 111 } 112 return 0; 113 } 114 115 /** 116 * Destroy Send Queue used for ASO access. 117 * 118 * @param[in] sq 119 * ASO SQ to destroy. 120 */ 121 static void 122 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq) 123 { 124 mlx5_devx_sq_destroy(&sq->sq_obj); 125 mlx5_aso_cq_destroy(&sq->cq); 126 memset(sq, 0, sizeof(*sq)); 127 } 128 129 /** 130 * Initialize Send Queue used for ASO access. 131 * 132 * @param[in] sq 133 * ASO SQ to initialize. 134 */ 135 static void 136 mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq) 137 { 138 volatile struct mlx5_aso_wqe *restrict wqe; 139 int i; 140 int size = 1 << sq->log_desc_n; 141 uint64_t addr; 142 143 /* All the next fields state should stay constant. */ 144 for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) { 145 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) | 146 (sizeof(*wqe) >> 4)); 147 wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey); 148 addr = (uint64_t)((uint64_t *)sq->mr.addr + i * 149 MLX5_ASO_AGE_ACTIONS_PER_POOL / 64); 150 wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32)); 151 wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u); 152 wqe->aso_cseg.operand_masks = rte_cpu_to_be_32 153 (0u | 154 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) | 155 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) | 156 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) | 157 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET)); 158 wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX); 159 } 160 } 161 162 /** 163 * Initialize Send Queue used for ASO flow meter access. 164 * 165 * @param[in] sq 166 * ASO SQ to initialize. 167 */ 168 static void 169 mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq) 170 { 171 volatile struct mlx5_aso_wqe *restrict wqe; 172 int i; 173 int size = 1 << sq->log_desc_n; 174 175 /* All the next fields state should stay constant. */ 176 for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) { 177 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) | 178 (sizeof(*wqe) >> 4)); 179 wqe->aso_cseg.operand_masks = RTE_BE32(0u | 180 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) | 181 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) | 182 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) | 183 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET)); 184 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 185 MLX5_COMP_MODE_OFFSET); 186 } 187 } 188 189 /* 190 * Initialize Send Queue used for ASO connection tracking. 191 * 192 * @param[in] sq 193 * ASO SQ to initialize. 194 */ 195 static void 196 mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq) 197 { 198 volatile struct mlx5_aso_wqe *restrict wqe; 199 int i; 200 int size = 1 << sq->log_desc_n; 201 uint64_t addr; 202 203 /* All the next fields state should stay constant. */ 204 for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) { 205 wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) | 206 (sizeof(*wqe) >> 4)); 207 /* One unique MR for the query data. */ 208 wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey); 209 /* Magic number 64 represents the length of a ASO CT obj. */ 210 addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64); 211 wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32)); 212 wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u); 213 /* 214 * The values of operand_masks are different for modify 215 * and query. 216 * And data_mask may be different for each modification. In 217 * query, it could be zero and ignored. 218 * CQE generation is always needed, in order to decide when 219 * it is available to create the flow or read the data. 220 */ 221 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 222 MLX5_COMP_MODE_OFFSET); 223 } 224 } 225 226 /** 227 * Create Send Queue used for ASO access. 228 * 229 * @param[in] ctx 230 * Context returned from mlx5 open_device() glue function. 231 * @param[in/out] sq 232 * Pointer to SQ to create. 233 * @param[in] socket 234 * Socket to use for allocation. 235 * @param[in] uar 236 * User Access Region object. 237 * @param[in] pdn 238 * Protection Domain number to use. 239 * @param[in] log_desc_n 240 * Log of number of descriptors in queue. 241 * @param[in] ts_format 242 * timestamp format supported by the queue. 243 * 244 * @return 245 * 0 on success, a negative errno value otherwise and rte_errno is set. 246 */ 247 static int 248 mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar, 249 uint32_t pdn, uint16_t log_desc_n, uint32_t ts_format) 250 { 251 struct mlx5_devx_create_sq_attr attr = { 252 .user_index = 0xFFFF, 253 .wq_attr = (struct mlx5_devx_wq_attr){ 254 .pd = pdn, 255 .uar_page = mlx5_os_get_devx_uar_page_id(uar), 256 }, 257 .ts_format = mlx5_ts_format_conv(ts_format), 258 }; 259 struct mlx5_devx_modify_sq_attr modify_attr = { 260 .state = MLX5_SQC_STATE_RDY, 261 }; 262 uint16_t log_wqbb_n; 263 int ret; 264 265 if (mlx5_aso_cq_create(ctx, &sq->cq, log_desc_n, socket, 266 mlx5_os_get_devx_uar_page_id(uar))) 267 goto error; 268 sq->log_desc_n = log_desc_n; 269 attr.cqn = sq->cq.cq_obj.cq->id; 270 /* for mlx5_aso_wqe that is twice the size of mlx5_wqe */ 271 log_wqbb_n = log_desc_n + 1; 272 ret = mlx5_devx_sq_create(ctx, &sq->sq_obj, log_wqbb_n, &attr, socket); 273 if (ret) { 274 DRV_LOG(ERR, "Can't create SQ object."); 275 rte_errno = ENOMEM; 276 goto error; 277 } 278 ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr); 279 if (ret) { 280 DRV_LOG(ERR, "Can't change SQ state to ready."); 281 rte_errno = ENOMEM; 282 goto error; 283 } 284 sq->pi = 0; 285 sq->head = 0; 286 sq->tail = 0; 287 sq->sqn = sq->sq_obj.sq->id; 288 rte_spinlock_init(&sq->sqsl); 289 return 0; 290 error: 291 mlx5_aso_destroy_sq(sq); 292 return -1; 293 } 294 295 /** 296 * API to create and initialize Send Queue used for ASO access. 297 * 298 * @param[in] sh 299 * Pointer to shared device context. 300 * @param[in] aso_opc_mod 301 * Mode of ASO feature. 302 * 303 * @return 304 * 0 on success, a negative errno value otherwise and rte_errno is set. 305 */ 306 int 307 mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh, 308 enum mlx5_access_aso_opc_mod aso_opc_mod) 309 { 310 uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC; 311 struct mlx5_common_device *cdev = sh->cdev; 312 313 switch (aso_opc_mod) { 314 case ASO_OPC_MOD_FLOW_HIT: 315 if (mlx5_aso_reg_mr(cdev, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) * 316 sq_desc_n, &sh->aso_age_mng->aso_sq.mr, 0)) 317 return -1; 318 if (mlx5_aso_sq_create(cdev->ctx, &sh->aso_age_mng->aso_sq, 0, 319 sh->tx_uar.obj, cdev->pdn, 320 MLX5_ASO_QUEUE_LOG_DESC, 321 cdev->config.hca_attr.sq_ts_format)) { 322 mlx5_aso_dereg_mr(cdev, &sh->aso_age_mng->aso_sq.mr); 323 return -1; 324 } 325 mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq); 326 break; 327 case ASO_OPC_MOD_POLICER: 328 if (mlx5_aso_sq_create(cdev->ctx, &sh->mtrmng->pools_mng.sq, 0, 329 sh->tx_uar.obj, cdev->pdn, 330 MLX5_ASO_QUEUE_LOG_DESC, 331 cdev->config.hca_attr.sq_ts_format)) 332 return -1; 333 mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq); 334 break; 335 case ASO_OPC_MOD_CONNECTION_TRACKING: 336 /* 64B per object for query. */ 337 if (mlx5_aso_reg_mr(cdev, 64 * sq_desc_n, 338 &sh->ct_mng->aso_sq.mr, 0)) 339 return -1; 340 if (mlx5_aso_sq_create(cdev->ctx, &sh->ct_mng->aso_sq, 0, 341 sh->tx_uar.obj, cdev->pdn, 342 MLX5_ASO_QUEUE_LOG_DESC, 343 cdev->config.hca_attr.sq_ts_format)) { 344 mlx5_aso_dereg_mr(cdev, &sh->ct_mng->aso_sq.mr); 345 return -1; 346 } 347 mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq); 348 break; 349 default: 350 DRV_LOG(ERR, "Unknown ASO operation mode"); 351 return -1; 352 } 353 return 0; 354 } 355 356 /** 357 * API to destroy Send Queue used for ASO access. 358 * 359 * @param[in] sh 360 * Pointer to shared device context. 361 * @param[in] aso_opc_mod 362 * Mode of ASO feature. 363 */ 364 void 365 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh, 366 enum mlx5_access_aso_opc_mod aso_opc_mod) 367 { 368 struct mlx5_aso_sq *sq; 369 370 switch (aso_opc_mod) { 371 case ASO_OPC_MOD_FLOW_HIT: 372 mlx5_aso_dereg_mr(sh->cdev, &sh->aso_age_mng->aso_sq.mr); 373 sq = &sh->aso_age_mng->aso_sq; 374 break; 375 case ASO_OPC_MOD_POLICER: 376 sq = &sh->mtrmng->pools_mng.sq; 377 break; 378 case ASO_OPC_MOD_CONNECTION_TRACKING: 379 mlx5_aso_dereg_mr(sh->cdev, &sh->ct_mng->aso_sq.mr); 380 sq = &sh->ct_mng->aso_sq; 381 break; 382 default: 383 DRV_LOG(ERR, "Unknown ASO operation mode"); 384 return; 385 } 386 mlx5_aso_destroy_sq(sq); 387 } 388 389 /** 390 * Write a burst of WQEs to ASO SQ. 391 * 392 * @param[in] sh 393 * Pointer to shared device context. 394 * @param[in] n 395 * Index of the last valid pool. 396 * 397 * @return 398 * Number of WQEs in burst. 399 */ 400 static uint16_t 401 mlx5_aso_sq_enqueue_burst(struct mlx5_dev_ctx_shared *sh, uint16_t n) 402 { 403 struct mlx5_aso_age_mng *mng = sh->aso_age_mng; 404 volatile struct mlx5_aso_wqe *wqe; 405 struct mlx5_aso_sq *sq = &mng->aso_sq; 406 struct mlx5_aso_age_pool *pool; 407 uint16_t size = 1 << sq->log_desc_n; 408 uint16_t mask = size - 1; 409 uint16_t max; 410 uint16_t start_head = sq->head; 411 412 max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next); 413 if (unlikely(!max)) 414 return 0; 415 sq->elts[start_head & mask].burst_size = max; 416 do { 417 wqe = &sq->sq_obj.aso_wqes[sq->head & mask]; 418 rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]); 419 /* Fill next WQE. */ 420 rte_rwlock_read_lock(&mng->resize_rwl); 421 pool = mng->pools[sq->next]; 422 rte_rwlock_read_unlock(&mng->resize_rwl); 423 sq->elts[sq->head & mask].pool = pool; 424 wqe->general_cseg.misc = 425 rte_cpu_to_be_32(((struct mlx5_devx_obj *) 426 (pool->flow_hit_aso_obj))->id); 427 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR << 428 MLX5_COMP_MODE_OFFSET); 429 wqe->general_cseg.opcode = rte_cpu_to_be_32 430 (MLX5_OPCODE_ACCESS_ASO | 431 (ASO_OPC_MOD_FLOW_HIT << 432 WQE_CSEG_OPC_MOD_OFFSET) | 433 (sq->pi << 434 WQE_CSEG_WQE_INDEX_OFFSET)); 435 sq->pi += 2; /* Each WQE contains 2 WQEBB's. */ 436 sq->head++; 437 sq->next++; 438 max--; 439 } while (max); 440 wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS << 441 MLX5_COMP_MODE_OFFSET); 442 mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe, 443 sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], 444 !sh->tx_uar.dbnc); 445 return sq->elts[start_head & mask].burst_size; 446 } 447 448 /** 449 * Debug utility function. Dump contents of error CQE and WQE. 450 * 451 * @param[in] cqe 452 * Error CQE to dump. 453 * @param[in] wqe 454 * Error WQE to dump. 455 */ 456 static void 457 mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe) 458 { 459 int i; 460 461 DRV_LOG(ERR, "Error cqe:"); 462 for (i = 0; i < 16; i += 4) 463 DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1], 464 cqe[i + 2], cqe[i + 3]); 465 DRV_LOG(ERR, "\nError wqe:"); 466 for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4) 467 DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1], 468 wqe[i + 2], wqe[i + 3]); 469 } 470 471 /** 472 * Handle case of error CQE. 473 * 474 * @param[in] sq 475 * ASO SQ to use. 476 */ 477 static void 478 mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq) 479 { 480 struct mlx5_aso_cq *cq = &sq->cq; 481 uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1); 482 volatile struct mlx5_err_cqe *cqe = 483 (volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx]; 484 485 cq->errors++; 486 idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n); 487 mlx5_aso_dump_err_objs((volatile uint32_t *)cqe, 488 (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]); 489 } 490 491 /** 492 * Update ASO objects upon completion. 493 * 494 * @param[in] sh 495 * Shared device context. 496 * @param[in] n 497 * Number of completed ASO objects. 498 */ 499 static void 500 mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n) 501 { 502 struct mlx5_aso_age_mng *mng = sh->aso_age_mng; 503 struct mlx5_aso_sq *sq = &mng->aso_sq; 504 struct mlx5_age_info *age_info; 505 const uint16_t size = 1 << sq->log_desc_n; 506 const uint16_t mask = size - 1; 507 const uint64_t curr = MLX5_CURR_TIME_SEC; 508 uint16_t expected = AGE_CANDIDATE; 509 uint16_t i; 510 511 for (i = 0; i < n; ++i) { 512 uint16_t idx = (sq->tail + i) & mask; 513 struct mlx5_aso_age_pool *pool = sq->elts[idx].pool; 514 uint64_t diff = curr - pool->time_of_last_age_check; 515 uint64_t *addr = sq->mr.addr; 516 int j; 517 518 addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64; 519 pool->time_of_last_age_check = curr; 520 for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) { 521 struct mlx5_aso_age_action *act = &pool->actions[j]; 522 struct mlx5_age_param *ap = &act->age_params; 523 uint8_t byte; 524 uint8_t offset; 525 uint8_t *u8addr; 526 uint8_t hit; 527 528 if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) != 529 AGE_CANDIDATE) 530 continue; 531 byte = 63 - (j / 8); 532 offset = j % 8; 533 u8addr = (uint8_t *)addr; 534 hit = (u8addr[byte] >> offset) & 0x1; 535 if (hit) { 536 __atomic_store_n(&ap->sec_since_last_hit, 0, 537 __ATOMIC_RELAXED); 538 } else { 539 struct mlx5_priv *priv; 540 541 __atomic_fetch_add(&ap->sec_since_last_hit, 542 diff, __ATOMIC_RELAXED); 543 /* If timeout passed add to aged-out list. */ 544 if (ap->sec_since_last_hit <= ap->timeout) 545 continue; 546 priv = 547 rte_eth_devices[ap->port_id].data->dev_private; 548 age_info = GET_PORT_AGE_INFO(priv); 549 rte_spinlock_lock(&age_info->aged_sl); 550 if (__atomic_compare_exchange_n(&ap->state, 551 &expected, 552 AGE_TMOUT, 553 false, 554 __ATOMIC_RELAXED, 555 __ATOMIC_RELAXED)) { 556 LIST_INSERT_HEAD(&age_info->aged_aso, 557 act, next); 558 MLX5_AGE_SET(age_info, 559 MLX5_AGE_EVENT_NEW); 560 } 561 rte_spinlock_unlock(&age_info->aged_sl); 562 } 563 } 564 } 565 mlx5_age_event_prepare(sh); 566 } 567 568 /** 569 * Handle completions from WQEs sent to ASO SQ. 570 * 571 * @param[in] sh 572 * Shared device context. 573 * 574 * @return 575 * Number of CQEs handled. 576 */ 577 static uint16_t 578 mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh) 579 { 580 struct mlx5_aso_age_mng *mng = sh->aso_age_mng; 581 struct mlx5_aso_sq *sq = &mng->aso_sq; 582 struct mlx5_aso_cq *cq = &sq->cq; 583 volatile struct mlx5_cqe *restrict cqe; 584 const unsigned int cq_size = 1 << cq->log_desc_n; 585 const unsigned int mask = cq_size - 1; 586 uint32_t idx; 587 uint32_t next_idx = cq->cq_ci & mask; 588 const uint16_t max = (uint16_t)(sq->head - sq->tail); 589 uint16_t i = 0; 590 int ret; 591 if (unlikely(!max)) 592 return 0; 593 do { 594 idx = next_idx; 595 next_idx = (cq->cq_ci + 1) & mask; 596 rte_prefetch0(&cq->cq_obj.cqes[next_idx]); 597 cqe = &cq->cq_obj.cqes[idx]; 598 ret = check_cqe(cqe, cq_size, cq->cq_ci); 599 /* 600 * Be sure owner read is done before any other cookie field or 601 * opaque field. 602 */ 603 rte_io_rmb(); 604 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 605 if (likely(ret == MLX5_CQE_STATUS_HW_OWN)) 606 break; 607 mlx5_aso_cqe_err_handle(sq); 608 } else { 609 i += sq->elts[(sq->tail + i) & mask].burst_size; 610 } 611 cq->cq_ci++; 612 } while (1); 613 if (likely(i)) { 614 mlx5_aso_age_action_update(sh, i); 615 sq->tail += i; 616 rte_io_wmb(); 617 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); 618 } 619 return i; 620 } 621 622 /** 623 * Periodically read CQEs and send WQEs to ASO SQ. 624 * 625 * @param[in] arg 626 * Shared device context containing the ASO SQ. 627 */ 628 static void 629 mlx5_flow_aso_alarm(void *arg) 630 { 631 struct mlx5_dev_ctx_shared *sh = arg; 632 struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq; 633 uint32_t us = 100u; 634 uint16_t n; 635 636 rte_rwlock_read_lock(&sh->aso_age_mng->resize_rwl); 637 n = sh->aso_age_mng->next; 638 rte_rwlock_read_unlock(&sh->aso_age_mng->resize_rwl); 639 mlx5_aso_completion_handle(sh); 640 if (sq->next == n) { 641 /* End of loop: wait 1 second. */ 642 us = US_PER_S; 643 sq->next = 0; 644 } 645 mlx5_aso_sq_enqueue_burst(sh, n); 646 if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh)) 647 DRV_LOG(ERR, "Cannot reinitialize aso alarm."); 648 } 649 650 /** 651 * API to start ASO access using ASO SQ. 652 * 653 * @param[in] sh 654 * Pointer to shared device context. 655 * 656 * @return 657 * 0 on success, a negative errno value otherwise and rte_errno is set. 658 */ 659 int 660 mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh) 661 { 662 if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) { 663 DRV_LOG(ERR, "Cannot reinitialize ASO age alarm."); 664 return -rte_errno; 665 } 666 return 0; 667 } 668 669 /** 670 * API to stop ASO access using ASO SQ. 671 * 672 * @param[in] sh 673 * Pointer to shared device context. 674 * 675 * @return 676 * 0 on success, a negative errno value otherwise and rte_errno is set. 677 */ 678 int 679 mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh) 680 { 681 int retries = 1024; 682 683 if (!sh->aso_age_mng->aso_sq.sq_obj.sq) 684 return -EINVAL; 685 rte_errno = 0; 686 while (--retries) { 687 rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh); 688 if (rte_errno != EINPROGRESS) 689 break; 690 rte_pause(); 691 } 692 return -rte_errno; 693 } 694 695 static uint16_t 696 mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh, 697 struct mlx5_aso_sq *sq, 698 struct mlx5_aso_mtr *aso_mtr) 699 { 700 volatile struct mlx5_aso_wqe *wqe = NULL; 701 struct mlx5_flow_meter_info *fm = NULL; 702 struct mlx5_flow_meter_profile *fmp; 703 uint16_t size = 1 << sq->log_desc_n; 704 uint16_t mask = size - 1; 705 uint16_t res; 706 uint32_t dseg_idx = 0; 707 struct mlx5_aso_mtr_pool *pool = NULL; 708 709 rte_spinlock_lock(&sq->sqsl); 710 res = size - (uint16_t)(sq->head - sq->tail); 711 if (unlikely(!res)) { 712 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send"); 713 rte_spinlock_unlock(&sq->sqsl); 714 return 0; 715 } 716 wqe = &sq->sq_obj.aso_wqes[sq->head & mask]; 717 rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]); 718 /* Fill next WQE. */ 719 fm = &aso_mtr->fm; 720 sq->elts[sq->head & mask].mtr = aso_mtr; 721 pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool, 722 mtrs[aso_mtr->offset]); 723 wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id + 724 (aso_mtr->offset >> 1)); 725 wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO | 726 (ASO_OPC_MOD_POLICER << 727 WQE_CSEG_OPC_MOD_OFFSET) | 728 sq->pi << WQE_CSEG_WQE_INDEX_OFFSET); 729 /* There are 2 meters in one ASO cache line. */ 730 dseg_idx = aso_mtr->offset & 0x1; 731 wqe->aso_cseg.data_mask = 732 RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx)); 733 if (fm->is_enable) { 734 wqe->aso_dseg.mtrs[dseg_idx].cbs_cir = 735 fm->profile->srtcm_prm.cbs_cir; 736 wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 737 fm->profile->srtcm_prm.ebs_eir; 738 } else { 739 wqe->aso_dseg.mtrs[dseg_idx].cbs_cir = 740 RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL); 741 wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0; 742 } 743 fmp = fm->profile; 744 if (fmp->profile.packet_mode) 745 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm = 746 RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) | 747 (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET) | 748 (MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE)); 749 else 750 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm = 751 RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) | 752 (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET)); 753 switch (fmp->profile.alg) { 754 case RTE_MTR_SRTCM_RFC2697: 755 /* Only needed for RFC2697. */ 756 if (fm->profile->srtcm_prm.ebs_eir) 757 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |= 758 RTE_BE32(1 << ASO_DSEG_BO_OFFSET); 759 break; 760 case RTE_MTR_TRTCM_RFC2698: 761 wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |= 762 RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET); 763 break; 764 case RTE_MTR_TRTCM_RFC4115: 765 default: 766 break; 767 } 768 /* 769 * Note: 770 * Due to software performance reason, the token fields will not be 771 * set when posting the WQE to ASO SQ. It will be filled by the HW 772 * automatically. 773 */ 774 sq->head++; 775 sq->pi += 2;/* Each WQE contains 2 WQEBB's. */ 776 mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe, 777 sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], 778 !sh->tx_uar.dbnc); 779 rte_spinlock_unlock(&sq->sqsl); 780 return 1; 781 } 782 783 static void 784 mlx5_aso_mtrs_status_update(struct mlx5_aso_sq *sq, uint16_t aso_mtrs_nums) 785 { 786 uint16_t size = 1 << sq->log_desc_n; 787 uint16_t mask = size - 1; 788 uint16_t i; 789 struct mlx5_aso_mtr *aso_mtr = NULL; 790 uint8_t exp_state = ASO_METER_WAIT; 791 792 for (i = 0; i < aso_mtrs_nums; ++i) { 793 aso_mtr = sq->elts[(sq->tail + i) & mask].mtr; 794 MLX5_ASSERT(aso_mtr); 795 (void)__atomic_compare_exchange_n(&aso_mtr->state, 796 &exp_state, ASO_METER_READY, 797 false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); 798 } 799 } 800 801 static void 802 mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq) 803 { 804 struct mlx5_aso_cq *cq = &sq->cq; 805 volatile struct mlx5_cqe *restrict cqe; 806 const unsigned int cq_size = 1 << cq->log_desc_n; 807 const unsigned int mask = cq_size - 1; 808 uint32_t idx; 809 uint32_t next_idx = cq->cq_ci & mask; 810 uint16_t max; 811 uint16_t n = 0; 812 int ret; 813 814 rte_spinlock_lock(&sq->sqsl); 815 max = (uint16_t)(sq->head - sq->tail); 816 if (unlikely(!max)) { 817 rte_spinlock_unlock(&sq->sqsl); 818 return; 819 } 820 do { 821 idx = next_idx; 822 next_idx = (cq->cq_ci + 1) & mask; 823 rte_prefetch0(&cq->cq_obj.cqes[next_idx]); 824 cqe = &cq->cq_obj.cqes[idx]; 825 ret = check_cqe(cqe, cq_size, cq->cq_ci); 826 /* 827 * Be sure owner read is done before any other cookie field or 828 * opaque field. 829 */ 830 rte_io_rmb(); 831 if (ret != MLX5_CQE_STATUS_SW_OWN) { 832 if (likely(ret == MLX5_CQE_STATUS_HW_OWN)) 833 break; 834 mlx5_aso_cqe_err_handle(sq); 835 } else { 836 n++; 837 } 838 cq->cq_ci++; 839 } while (1); 840 if (likely(n)) { 841 mlx5_aso_mtrs_status_update(sq, n); 842 sq->tail += n; 843 rte_io_wmb(); 844 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); 845 } 846 rte_spinlock_unlock(&sq->sqsl); 847 } 848 849 /** 850 * Update meter parameter by send WQE. 851 * 852 * @param[in] dev 853 * Pointer to Ethernet device. 854 * @param[in] priv 855 * Pointer to mlx5 private data structure. 856 * @param[in] fm 857 * Pointer to flow meter to be modified. 858 * 859 * @return 860 * 0 on success, a negative errno value otherwise and rte_errno is set. 861 */ 862 int 863 mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh, 864 struct mlx5_aso_mtr *mtr) 865 { 866 struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq; 867 uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES; 868 869 do { 870 mlx5_aso_mtr_completion_handle(sq); 871 if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr)) 872 return 0; 873 /* Waiting for wqe resource. */ 874 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY); 875 } while (--poll_wqe_times); 876 DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d", 877 mtr->offset); 878 return -1; 879 } 880 881 /** 882 * Wait for meter to be ready. 883 * 884 * @param[in] dev 885 * Pointer to Ethernet device. 886 * @param[in] priv 887 * Pointer to mlx5 private data structure. 888 * @param[in] fm 889 * Pointer to flow meter to be modified. 890 * 891 * @return 892 * 0 on success, a negative errno value otherwise and rte_errno is set. 893 */ 894 int 895 mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh, 896 struct mlx5_aso_mtr *mtr) 897 { 898 struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq; 899 uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES; 900 901 if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) == 902 ASO_METER_READY) 903 return 0; 904 do { 905 mlx5_aso_mtr_completion_handle(sq); 906 if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) == 907 ASO_METER_READY) 908 return 0; 909 /* Waiting for CQE ready. */ 910 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY); 911 } while (--poll_cqe_times); 912 DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d", 913 mtr->offset); 914 return -1; 915 } 916 917 /* 918 * Post a WQE to the ASO CT SQ to modify the context. 919 * 920 * @param[in] sh 921 * Pointer to shared device context. 922 * @param[in] ct 923 * Pointer to the generic CT structure related to the context. 924 * @param[in] profile 925 * Pointer to configuration profile. 926 * 927 * @return 928 * 1 on success (WQE number), 0 on failure. 929 */ 930 static uint16_t 931 mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh, 932 struct mlx5_aso_ct_action *ct, 933 const struct rte_flow_action_conntrack *profile) 934 { 935 volatile struct mlx5_aso_wqe *wqe = NULL; 936 struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq; 937 uint16_t size = 1 << sq->log_desc_n; 938 uint16_t mask = size - 1; 939 uint16_t res; 940 struct mlx5_aso_ct_pool *pool; 941 void *desg; 942 void *orig_dir; 943 void *reply_dir; 944 945 rte_spinlock_lock(&sq->sqsl); 946 /* Prevent other threads to update the index. */ 947 res = size - (uint16_t)(sq->head - sq->tail); 948 if (unlikely(!res)) { 949 rte_spinlock_unlock(&sq->sqsl); 950 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send"); 951 return 0; 952 } 953 wqe = &sq->sq_obj.aso_wqes[sq->head & mask]; 954 rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]); 955 /* Fill next WQE. */ 956 MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT); 957 sq->elts[sq->head & mask].ct = ct; 958 sq->elts[sq->head & mask].query_data = NULL; 959 pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]); 960 /* Each WQE will have a single CT object. */ 961 wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id + 962 ct->offset); 963 wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO | 964 (ASO_OPC_MOD_CONNECTION_TRACKING << 965 WQE_CSEG_OPC_MOD_OFFSET) | 966 sq->pi << WQE_CSEG_WQE_INDEX_OFFSET); 967 wqe->aso_cseg.operand_masks = rte_cpu_to_be_32 968 (0u | 969 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) | 970 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) | 971 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) | 972 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET)); 973 wqe->aso_cseg.data_mask = UINT64_MAX; 974 /* To make compiler happy. */ 975 desg = (void *)(uintptr_t)wqe->aso_dseg.data; 976 MLX5_SET(conn_track_aso, desg, valid, 1); 977 MLX5_SET(conn_track_aso, desg, state, profile->state); 978 MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable); 979 MLX5_SET(conn_track_aso, desg, connection_assured, 980 profile->live_connection); 981 MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack); 982 MLX5_SET(conn_track_aso, desg, challenged_acked, 983 profile->challenge_ack_passed); 984 /* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */ 985 MLX5_SET(conn_track_aso, desg, heartbeat, 0); 986 MLX5_SET(conn_track_aso, desg, max_ack_window, 987 profile->max_ack_window); 988 MLX5_SET(conn_track_aso, desg, retransmission_counter, 0); 989 MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0); 990 MLX5_SET(conn_track_aso, desg, retranmission_limit, 991 profile->retransmission_limit); 992 MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale, 993 profile->reply_dir.scale); 994 MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated, 995 profile->reply_dir.close_initiated); 996 /* Both directions will use the same liberal mode. */ 997 MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled, 998 profile->liberal_mode); 999 MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked, 1000 profile->reply_dir.data_unacked); 1001 MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack, 1002 profile->reply_dir.last_ack_seen); 1003 MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale, 1004 profile->original_dir.scale); 1005 MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated, 1006 profile->original_dir.close_initiated); 1007 MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled, 1008 profile->liberal_mode); 1009 MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked, 1010 profile->original_dir.data_unacked); 1011 MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack, 1012 profile->original_dir.last_ack_seen); 1013 MLX5_SET(conn_track_aso, desg, last_win, profile->last_window); 1014 MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction); 1015 MLX5_SET(conn_track_aso, desg, last_index, profile->last_index); 1016 MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq); 1017 MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack); 1018 MLX5_SET(conn_track_aso, desg, last_end, profile->last_end); 1019 orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir); 1020 MLX5_SET(tcp_window_params, orig_dir, sent_end, 1021 profile->original_dir.sent_end); 1022 MLX5_SET(tcp_window_params, orig_dir, reply_end, 1023 profile->original_dir.reply_end); 1024 MLX5_SET(tcp_window_params, orig_dir, max_win, 1025 profile->original_dir.max_win); 1026 MLX5_SET(tcp_window_params, orig_dir, max_ack, 1027 profile->original_dir.max_ack); 1028 reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir); 1029 MLX5_SET(tcp_window_params, reply_dir, sent_end, 1030 profile->reply_dir.sent_end); 1031 MLX5_SET(tcp_window_params, reply_dir, reply_end, 1032 profile->reply_dir.reply_end); 1033 MLX5_SET(tcp_window_params, reply_dir, max_win, 1034 profile->reply_dir.max_win); 1035 MLX5_SET(tcp_window_params, reply_dir, max_ack, 1036 profile->reply_dir.max_ack); 1037 sq->head++; 1038 sq->pi += 2; /* Each WQE contains 2 WQEBB's. */ 1039 mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe, 1040 sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], 1041 !sh->tx_uar.dbnc); 1042 rte_spinlock_unlock(&sq->sqsl); 1043 return 1; 1044 } 1045 1046 /* 1047 * Update the status field of CTs to indicate ready to be used by flows. 1048 * A continuous number of CTs since last update. 1049 * 1050 * @param[in] sq 1051 * Pointer to ASO CT SQ. 1052 * @param[in] num 1053 * Number of CT structures to be updated. 1054 * 1055 * @return 1056 * 0 on success, a negative value. 1057 */ 1058 static void 1059 mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num) 1060 { 1061 uint16_t size = 1 << sq->log_desc_n; 1062 uint16_t mask = size - 1; 1063 uint16_t i; 1064 struct mlx5_aso_ct_action *ct = NULL; 1065 uint16_t idx; 1066 1067 for (i = 0; i < num; i++) { 1068 idx = (uint16_t)((sq->tail + i) & mask); 1069 ct = sq->elts[idx].ct; 1070 MLX5_ASSERT(ct); 1071 MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY); 1072 if (sq->elts[idx].query_data) 1073 rte_memcpy(sq->elts[idx].query_data, 1074 (char *)((uintptr_t)sq->mr.addr + idx * 64), 1075 64); 1076 } 1077 } 1078 1079 /* 1080 * Post a WQE to the ASO CT SQ to query the current context. 1081 * 1082 * @param[in] sh 1083 * Pointer to shared device context. 1084 * @param[in] ct 1085 * Pointer to the generic CT structure related to the context. 1086 * @param[in] data 1087 * Pointer to data area to be filled. 1088 * 1089 * @return 1090 * 1 on success (WQE number), 0 on failure. 1091 */ 1092 static int 1093 mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh, 1094 struct mlx5_aso_ct_action *ct, char *data) 1095 { 1096 volatile struct mlx5_aso_wqe *wqe = NULL; 1097 struct mlx5_aso_sq *sq = &sh->ct_mng->aso_sq; 1098 uint16_t size = 1 << sq->log_desc_n; 1099 uint16_t mask = size - 1; 1100 uint16_t res; 1101 uint16_t wqe_idx; 1102 struct mlx5_aso_ct_pool *pool; 1103 enum mlx5_aso_ct_state state = 1104 __atomic_load_n(&ct->state, __ATOMIC_RELAXED); 1105 1106 if (state == ASO_CONNTRACK_FREE) { 1107 DRV_LOG(ERR, "Fail: No context to query"); 1108 return -1; 1109 } else if (state == ASO_CONNTRACK_WAIT) { 1110 return 0; 1111 } 1112 rte_spinlock_lock(&sq->sqsl); 1113 res = size - (uint16_t)(sq->head - sq->tail); 1114 if (unlikely(!res)) { 1115 rte_spinlock_unlock(&sq->sqsl); 1116 DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send"); 1117 return 0; 1118 } 1119 MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY); 1120 wqe = &sq->sq_obj.aso_wqes[sq->head & mask]; 1121 /* Confirm the location and address of the prefetch instruction. */ 1122 rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]); 1123 /* Fill next WQE. */ 1124 wqe_idx = sq->head & mask; 1125 sq->elts[wqe_idx].ct = ct; 1126 sq->elts[wqe_idx].query_data = data; 1127 pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]); 1128 /* Each WQE will have a single CT object. */ 1129 wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id + 1130 ct->offset); 1131 wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO | 1132 (ASO_OPC_MOD_CONNECTION_TRACKING << 1133 WQE_CSEG_OPC_MOD_OFFSET) | 1134 sq->pi << WQE_CSEG_WQE_INDEX_OFFSET); 1135 /* 1136 * There is no write request is required. 1137 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0. 1138 * "BYTEWISE_64BYTE" is needed for a whole context. 1139 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.) 1140 * "data_mask" is ignored. 1141 * Buffer address was already filled during initialization. 1142 */ 1143 wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE << 1144 ASO_CSEG_DATA_MASK_MODE_OFFSET); 1145 wqe->aso_cseg.data_mask = 0; 1146 sq->head++; 1147 /* 1148 * Each WQE contains 2 WQEBB's, even though 1149 * data segment is not used in this case. 1150 */ 1151 sq->pi += 2; 1152 mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe, 1153 sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR], 1154 !sh->tx_uar.dbnc); 1155 rte_spinlock_unlock(&sq->sqsl); 1156 return 1; 1157 } 1158 1159 /* 1160 * Handle completions from WQEs sent to ASO CT. 1161 * 1162 * @param[in] mng 1163 * Pointer to the CT pools management structure. 1164 */ 1165 static void 1166 mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng) 1167 { 1168 struct mlx5_aso_sq *sq = &mng->aso_sq; 1169 struct mlx5_aso_cq *cq = &sq->cq; 1170 volatile struct mlx5_cqe *restrict cqe; 1171 const uint32_t cq_size = 1 << cq->log_desc_n; 1172 const uint32_t mask = cq_size - 1; 1173 uint32_t idx; 1174 uint32_t next_idx; 1175 uint16_t max; 1176 uint16_t n = 0; 1177 int ret; 1178 1179 rte_spinlock_lock(&sq->sqsl); 1180 max = (uint16_t)(sq->head - sq->tail); 1181 if (unlikely(!max)) { 1182 rte_spinlock_unlock(&sq->sqsl); 1183 return; 1184 } 1185 next_idx = cq->cq_ci & mask; 1186 do { 1187 idx = next_idx; 1188 next_idx = (cq->cq_ci + 1) & mask; 1189 /* Need to confirm the position of the prefetch. */ 1190 rte_prefetch0(&cq->cq_obj.cqes[next_idx]); 1191 cqe = &cq->cq_obj.cqes[idx]; 1192 ret = check_cqe(cqe, cq_size, cq->cq_ci); 1193 /* 1194 * Be sure owner read is done before any other cookie field or 1195 * opaque field. 1196 */ 1197 rte_io_rmb(); 1198 if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) { 1199 if (likely(ret == MLX5_CQE_STATUS_HW_OWN)) 1200 break; 1201 mlx5_aso_cqe_err_handle(sq); 1202 } else { 1203 n++; 1204 } 1205 cq->cq_ci++; 1206 } while (1); 1207 if (likely(n)) { 1208 mlx5_aso_ct_status_update(sq, n); 1209 sq->tail += n; 1210 rte_io_wmb(); 1211 cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci); 1212 } 1213 rte_spinlock_unlock(&sq->sqsl); 1214 } 1215 1216 /* 1217 * Update connection tracking ASO context by sending WQE. 1218 * 1219 * @param[in] sh 1220 * Pointer to mlx5_dev_ctx_shared object. 1221 * @param[in] ct 1222 * Pointer to connection tracking offload object. 1223 * @param[in] profile 1224 * Pointer to connection tracking TCP parameter. 1225 * 1226 * @return 1227 * 0 on success, -1 on failure. 1228 */ 1229 int 1230 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh, 1231 struct mlx5_aso_ct_action *ct, 1232 const struct rte_flow_action_conntrack *profile) 1233 { 1234 uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES; 1235 struct mlx5_aso_ct_pool *pool; 1236 1237 MLX5_ASSERT(ct); 1238 do { 1239 mlx5_aso_ct_completion_handle(sh->ct_mng); 1240 if (mlx5_aso_ct_sq_enqueue_single(sh, ct, profile)) 1241 return 0; 1242 /* Waiting for wqe resource. */ 1243 rte_delay_us_sleep(10u); 1244 } while (--poll_wqe_times); 1245 pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]); 1246 DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d", 1247 ct->offset, pool->index); 1248 return -1; 1249 } 1250 1251 /* 1252 * The routine is used to wait for WQE completion to continue with queried data. 1253 * 1254 * @param[in] sh 1255 * Pointer to mlx5_dev_ctx_shared object. 1256 * @param[in] ct 1257 * Pointer to connection tracking offload object. 1258 * 1259 * @return 1260 * 0 on success, -1 on failure. 1261 */ 1262 int 1263 mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh, 1264 struct mlx5_aso_ct_action *ct) 1265 { 1266 struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng; 1267 uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES; 1268 struct mlx5_aso_ct_pool *pool; 1269 1270 if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) == 1271 ASO_CONNTRACK_READY) 1272 return 0; 1273 do { 1274 mlx5_aso_ct_completion_handle(mng); 1275 if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) == 1276 ASO_CONNTRACK_READY) 1277 return 0; 1278 /* Waiting for CQE ready, consider should block or sleep. */ 1279 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY); 1280 } while (--poll_cqe_times); 1281 pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]); 1282 DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d", 1283 ct->offset, pool->index); 1284 return -1; 1285 } 1286 1287 /* 1288 * Convert the hardware conntrack data format into the profile. 1289 * 1290 * @param[in] profile 1291 * Pointer to conntrack profile to be filled after query. 1292 * @param[in] wdata 1293 * Pointer to data fetched from hardware. 1294 */ 1295 static inline void 1296 mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile, 1297 char *wdata) 1298 { 1299 void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir); 1300 void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir); 1301 1302 /* MLX5_GET16 should be taken into consideration. */ 1303 profile->state = (enum rte_flow_conntrack_state) 1304 MLX5_GET(conn_track_aso, wdata, state); 1305 profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track); 1306 profile->selective_ack = MLX5_GET(conn_track_aso, wdata, 1307 sack_permitted); 1308 profile->live_connection = MLX5_GET(conn_track_aso, wdata, 1309 connection_assured); 1310 profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata, 1311 challenged_acked); 1312 profile->max_ack_window = MLX5_GET(conn_track_aso, wdata, 1313 max_ack_window); 1314 profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata, 1315 retranmission_limit); 1316 profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win); 1317 profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir); 1318 profile->last_index = (enum rte_flow_conntrack_tcp_last_index) 1319 MLX5_GET(conn_track_aso, wdata, last_index); 1320 profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq); 1321 profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack); 1322 profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end); 1323 profile->liberal_mode = MLX5_GET(conn_track_aso, wdata, 1324 reply_direction_tcp_liberal_enabled) | 1325 MLX5_GET(conn_track_aso, wdata, 1326 original_direction_tcp_liberal_enabled); 1327 /* No liberal in the RTE structure profile. */ 1328 profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata, 1329 reply_direction_tcp_scale); 1330 profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata, 1331 reply_direction_tcp_close_initiated); 1332 profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata, 1333 reply_direction_tcp_data_unacked); 1334 profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata, 1335 reply_direction_tcp_max_ack); 1336 profile->reply_dir.sent_end = MLX5_GET(tcp_window_params, 1337 r_dir, sent_end); 1338 profile->reply_dir.reply_end = MLX5_GET(tcp_window_params, 1339 r_dir, reply_end); 1340 profile->reply_dir.max_win = MLX5_GET(tcp_window_params, 1341 r_dir, max_win); 1342 profile->reply_dir.max_ack = MLX5_GET(tcp_window_params, 1343 r_dir, max_ack); 1344 profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata, 1345 original_direction_tcp_scale); 1346 profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata, 1347 original_direction_tcp_close_initiated); 1348 profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata, 1349 original_direction_tcp_data_unacked); 1350 profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata, 1351 original_direction_tcp_max_ack); 1352 profile->original_dir.sent_end = MLX5_GET(tcp_window_params, 1353 o_dir, sent_end); 1354 profile->original_dir.reply_end = MLX5_GET(tcp_window_params, 1355 o_dir, reply_end); 1356 profile->original_dir.max_win = MLX5_GET(tcp_window_params, 1357 o_dir, max_win); 1358 profile->original_dir.max_ack = MLX5_GET(tcp_window_params, 1359 o_dir, max_ack); 1360 } 1361 1362 /* 1363 * Query connection tracking information parameter by send WQE. 1364 * 1365 * @param[in] dev 1366 * Pointer to Ethernet device. 1367 * @param[in] ct 1368 * Pointer to connection tracking offload object. 1369 * @param[out] profile 1370 * Pointer to connection tracking TCP information. 1371 * 1372 * @return 1373 * 0 on success, -1 on failure. 1374 */ 1375 int 1376 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh, 1377 struct mlx5_aso_ct_action *ct, 1378 struct rte_flow_action_conntrack *profile) 1379 { 1380 uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES; 1381 struct mlx5_aso_ct_pool *pool; 1382 char out_data[64 * 2]; 1383 int ret; 1384 1385 MLX5_ASSERT(ct); 1386 do { 1387 mlx5_aso_ct_completion_handle(sh->ct_mng); 1388 ret = mlx5_aso_ct_sq_query_single(sh, ct, out_data); 1389 if (ret < 0) 1390 return ret; 1391 else if (ret > 0) 1392 goto data_handle; 1393 /* Waiting for wqe resource or state. */ 1394 else 1395 rte_delay_us_sleep(10u); 1396 } while (--poll_wqe_times); 1397 pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]); 1398 DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d", 1399 ct->offset, pool->index); 1400 return -1; 1401 data_handle: 1402 ret = mlx5_aso_ct_wait_ready(sh, ct); 1403 if (!ret) 1404 mlx5_aso_ct_obj_analyze(profile, out_data); 1405 return ret; 1406 } 1407 1408 /* 1409 * Make sure the conntrack context is synchronized with hardware before 1410 * creating a flow rule that uses it. 1411 * 1412 * @param[in] sh 1413 * Pointer to shared device context. 1414 * @param[in] ct 1415 * Pointer to connection tracking offload object. 1416 * 1417 * @return 1418 * 0 on success, a negative errno value otherwise and rte_errno is set. 1419 */ 1420 int 1421 mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh, 1422 struct mlx5_aso_ct_action *ct) 1423 { 1424 struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng; 1425 uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES; 1426 enum mlx5_aso_ct_state state = 1427 __atomic_load_n(&ct->state, __ATOMIC_RELAXED); 1428 1429 if (state == ASO_CONNTRACK_FREE) { 1430 rte_errno = ENXIO; 1431 return -rte_errno; 1432 } else if (state == ASO_CONNTRACK_READY || 1433 state == ASO_CONNTRACK_QUERY) { 1434 return 0; 1435 } 1436 do { 1437 mlx5_aso_ct_completion_handle(mng); 1438 state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED); 1439 if (state == ASO_CONNTRACK_READY || 1440 state == ASO_CONNTRACK_QUERY) 1441 return 0; 1442 /* Waiting for CQE ready, consider should block or sleep. */ 1443 rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY); 1444 } while (--poll_cqe_times); 1445 rte_errno = EBUSY; 1446 return -rte_errno; 1447 } 1448