1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2020 Mellanox Technologies, Ltd 3 */ 4 5 #include <stdint.h> 6 #include <rte_malloc.h> 7 #include <mlx5_malloc.h> 8 #include <rte_ring.h> 9 #include <mlx5_devx_cmds.h> 10 #include <rte_cycles.h> 11 #include <rte_eal_paging.h> 12 #include <rte_thread.h> 13 14 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H) 15 16 #include "mlx5_utils.h" 17 #include "mlx5_hws_cnt.h" 18 19 #define HWS_CNT_CACHE_SZ_DEFAULT 511 20 #define HWS_CNT_CACHE_PRELOAD_DEFAULT 254 21 #define HWS_CNT_CACHE_FETCH_DEFAULT 254 22 #define HWS_CNT_CACHE_THRESHOLD_DEFAULT 254 23 #define HWS_CNT_ALLOC_FACTOR_DEFAULT 20 24 25 static void 26 __hws_cnt_id_load(struct mlx5_hws_cnt_pool *cpool) 27 { 28 uint32_t cnt_num = mlx5_hws_cnt_pool_get_size(cpool); 29 uint32_t iidx; 30 31 /* 32 * Counter ID order is important for tracking the max number of in used 33 * counter for querying, which means counter internal index order must 34 * be from zero to the number user configured, i.e: 0 - 8000000. 35 * Need to load counter ID in this order into the cache firstly, 36 * and then the global free list. 37 * In the end, user fetch the counter from minimal to the maximum. 38 */ 39 for (iidx = 0; iidx < cnt_num; iidx++) { 40 cnt_id_t cnt_id = mlx5_hws_cnt_id_gen(cpool, iidx); 41 42 rte_ring_enqueue_elem(cpool->free_list, &cnt_id, 43 sizeof(cnt_id)); 44 } 45 } 46 47 static void 48 __mlx5_hws_cnt_svc(struct mlx5_dev_ctx_shared *sh, 49 struct mlx5_hws_cnt_pool *cpool) 50 { 51 struct rte_ring *reset_list = cpool->wait_reset_list; 52 struct rte_ring *reuse_list = cpool->reuse_list; 53 uint32_t reset_cnt_num; 54 struct rte_ring_zc_data zcdr = {0}; 55 struct rte_ring_zc_data zcdu = {0}; 56 uint32_t ret __rte_unused; 57 58 reset_cnt_num = rte_ring_count(reset_list); 59 cpool->query_gen++; 60 mlx5_aso_cnt_query(sh, cpool); 61 zcdr.n1 = 0; 62 zcdu.n1 = 0; 63 ret = rte_ring_enqueue_zc_burst_elem_start(reuse_list, 64 sizeof(cnt_id_t), 65 reset_cnt_num, &zcdu, 66 NULL); 67 MLX5_ASSERT(ret == reset_cnt_num); 68 ret = rte_ring_dequeue_zc_burst_elem_start(reset_list, 69 sizeof(cnt_id_t), 70 reset_cnt_num, &zcdr, 71 NULL); 72 MLX5_ASSERT(ret == reset_cnt_num); 73 __hws_cnt_r2rcpy(&zcdu, &zcdr, reset_cnt_num); 74 rte_ring_dequeue_zc_elem_finish(reset_list, reset_cnt_num); 75 rte_ring_enqueue_zc_elem_finish(reuse_list, reset_cnt_num); 76 77 if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) { 78 reset_cnt_num = rte_ring_count(reset_list); 79 DRV_LOG(DEBUG, "ibdev %s cpool %p wait_reset_cnt=%" PRIu32, 80 sh->ibdev_name, (void *)cpool, reset_cnt_num); 81 } 82 } 83 84 /** 85 * Release AGE parameter. 86 * 87 * @param priv 88 * Pointer to the port private data structure. 89 * @param own_cnt_index 90 * Counter ID to created only for this AGE to release. 91 * Zero means there is no such counter. 92 * @param age_ipool 93 * Pointer to AGE parameter indexed pool. 94 * @param idx 95 * Index of AGE parameter in the indexed pool. 96 */ 97 static void 98 mlx5_hws_age_param_free(struct mlx5_priv *priv, cnt_id_t own_cnt_index, 99 struct mlx5_indexed_pool *age_ipool, uint32_t idx) 100 { 101 if (own_cnt_index) { 102 struct mlx5_hws_cnt_pool *cpool = priv->hws_cpool; 103 104 MLX5_ASSERT(mlx5_hws_cnt_is_shared(cpool, own_cnt_index)); 105 mlx5_hws_cnt_shared_put(cpool, &own_cnt_index); 106 } 107 mlx5_ipool_free(age_ipool, idx); 108 } 109 110 /** 111 * Check and callback event for new aged flow in the HWS counter pool. 112 * 113 * @param[in] priv 114 * Pointer to port private object. 115 * @param[in] cpool 116 * Pointer to current counter pool. 117 */ 118 static void 119 mlx5_hws_aging_check(struct mlx5_priv *priv, struct mlx5_hws_cnt_pool *cpool) 120 { 121 struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv); 122 struct flow_counter_stats *stats = cpool->raw_mng->raw; 123 struct mlx5_hws_age_param *param; 124 struct rte_ring *r; 125 const uint64_t curr_time = MLX5_CURR_TIME_SEC; 126 const uint32_t time_delta = curr_time - cpool->time_of_last_age_check; 127 uint32_t nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(cpool); 128 uint16_t expected1 = HWS_AGE_CANDIDATE; 129 uint16_t expected2 = HWS_AGE_CANDIDATE_INSIDE_RING; 130 uint32_t i; 131 132 cpool->time_of_last_age_check = curr_time; 133 for (i = 0; i < nb_alloc_cnts; ++i) { 134 uint32_t age_idx = cpool->pool[i].age_idx; 135 uint64_t hits; 136 137 if (!cpool->pool[i].in_used || age_idx == 0) 138 continue; 139 param = mlx5_ipool_get(age_info->ages_ipool, age_idx); 140 if (unlikely(param == NULL)) { 141 /* 142 * When AGE which used indirect counter it is user 143 * responsibility not using this indirect counter 144 * without this AGE. 145 * If this counter is used after the AGE was freed, the 146 * AGE index is invalid and using it here will cause a 147 * segmentation fault. 148 */ 149 DRV_LOG(WARNING, 150 "Counter %u is lost his AGE, it is unused.", i); 151 continue; 152 } 153 if (param->timeout == 0) 154 continue; 155 switch (rte_atomic_load_explicit(¶m->state, rte_memory_order_relaxed)) { 156 case HWS_AGE_AGED_OUT_NOT_REPORTED: 157 case HWS_AGE_AGED_OUT_REPORTED: 158 /* Already aged-out, no action is needed. */ 159 continue; 160 case HWS_AGE_CANDIDATE: 161 case HWS_AGE_CANDIDATE_INSIDE_RING: 162 /* This AGE candidate to be aged-out, go to checking. */ 163 break; 164 case HWS_AGE_FREE: 165 /* 166 * AGE parameter with state "FREE" couldn't be pointed 167 * by any counter since counter is destroyed first. 168 * Fall-through. 169 */ 170 default: 171 MLX5_ASSERT(0); 172 continue; 173 } 174 hits = rte_be_to_cpu_64(stats[i].hits); 175 if (param->nb_cnts == 1) { 176 if (hits != param->accumulator_last_hits) { 177 rte_atomic_store_explicit(¶m->sec_since_last_hit, 0, 178 rte_memory_order_relaxed); 179 param->accumulator_last_hits = hits; 180 continue; 181 } 182 } else { 183 param->accumulator_hits += hits; 184 param->accumulator_cnt++; 185 if (param->accumulator_cnt < param->nb_cnts) 186 continue; 187 param->accumulator_cnt = 0; 188 if (param->accumulator_last_hits != 189 param->accumulator_hits) { 190 rte_atomic_store_explicit(¶m->sec_since_last_hit, 191 0, rte_memory_order_relaxed); 192 param->accumulator_last_hits = 193 param->accumulator_hits; 194 param->accumulator_hits = 0; 195 continue; 196 } 197 param->accumulator_hits = 0; 198 } 199 if (rte_atomic_fetch_add_explicit(¶m->sec_since_last_hit, time_delta, 200 rte_memory_order_relaxed) + time_delta <= 201 rte_atomic_load_explicit(¶m->timeout, rte_memory_order_relaxed)) 202 continue; 203 /* Prepare the relevant ring for this AGE parameter */ 204 if (priv->hws_strict_queue) 205 r = age_info->hw_q_age->aged_lists[param->queue_id]; 206 else 207 r = age_info->hw_age.aged_list; 208 /* Changing the state atomically and insert it into the ring. */ 209 if (rte_atomic_compare_exchange_strong_explicit(¶m->state, &expected1, 210 HWS_AGE_AGED_OUT_NOT_REPORTED, 211 rte_memory_order_relaxed, 212 rte_memory_order_relaxed)) { 213 int ret = rte_ring_enqueue_burst_elem(r, &age_idx, 214 sizeof(uint32_t), 215 1, NULL); 216 217 /* 218 * The ring doesn't have enough room for this entry, 219 * it replace back the state for the next second. 220 * 221 * FIXME: if until next sec it get traffic, we are going 222 * to lose this "aged out", will be fixed later 223 * when optimise it to fill ring in bulks. 224 */ 225 expected2 = HWS_AGE_AGED_OUT_NOT_REPORTED; 226 if (ret == 0 && 227 !rte_atomic_compare_exchange_strong_explicit(¶m->state, 228 &expected2, expected1, 229 rte_memory_order_relaxed, 230 rte_memory_order_relaxed) && 231 expected2 == HWS_AGE_FREE) 232 mlx5_hws_age_param_free(priv, 233 param->own_cnt_index, 234 age_info->ages_ipool, 235 age_idx); 236 /* The event is irrelevant in strict queue mode. */ 237 if (!priv->hws_strict_queue) 238 MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW); 239 } else { 240 rte_atomic_compare_exchange_strong_explicit(¶m->state, &expected2, 241 HWS_AGE_AGED_OUT_NOT_REPORTED, 242 rte_memory_order_relaxed, 243 rte_memory_order_relaxed); 244 } 245 } 246 /* The event is irrelevant in strict queue mode. */ 247 if (!priv->hws_strict_queue) 248 mlx5_age_event_prepare(priv->sh); 249 } 250 251 static void 252 mlx5_hws_cnt_raw_data_free(struct mlx5_dev_ctx_shared *sh, 253 struct mlx5_hws_cnt_raw_data_mng *mng) 254 { 255 if (mng == NULL) 256 return; 257 sh->cdev->mr_scache.dereg_mr_cb(&mng->mr); 258 mlx5_free(mng->raw); 259 mlx5_free(mng); 260 } 261 262 __rte_unused 263 static struct mlx5_hws_cnt_raw_data_mng * 264 mlx5_hws_cnt_raw_data_alloc(struct mlx5_dev_ctx_shared *sh, uint32_t n, 265 struct rte_flow_error *error) 266 { 267 struct mlx5_hws_cnt_raw_data_mng *mng = NULL; 268 int ret; 269 size_t sz = n * sizeof(struct flow_counter_stats); 270 size_t pgsz = rte_mem_page_size(); 271 272 MLX5_ASSERT(pgsz > 0); 273 mng = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, sizeof(*mng), 0, 274 SOCKET_ID_ANY); 275 if (mng == NULL) { 276 rte_flow_error_set(error, ENOMEM, 277 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 278 NULL, "failed to allocate counters memory manager"); 279 goto error; 280 } 281 mng->raw = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, sz, pgsz, 282 SOCKET_ID_ANY); 283 if (mng->raw == NULL) { 284 rte_flow_error_set(error, ENOMEM, 285 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 286 NULL, "failed to allocate raw counters memory"); 287 goto error; 288 } 289 ret = sh->cdev->mr_scache.reg_mr_cb(sh->cdev->pd, mng->raw, sz, 290 &mng->mr); 291 if (ret) { 292 rte_flow_error_set(error, errno, 293 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 294 NULL, "failed to register counters memory region"); 295 goto error; 296 } 297 return mng; 298 error: 299 mlx5_hws_cnt_raw_data_free(sh, mng); 300 return NULL; 301 } 302 303 static uint32_t 304 mlx5_hws_cnt_svc(void *opaque) 305 { 306 struct mlx5_dev_ctx_shared *sh = 307 (struct mlx5_dev_ctx_shared *)opaque; 308 uint64_t interval = 309 (uint64_t)sh->cnt_svc->query_interval * (US_PER_S / MS_PER_S); 310 struct mlx5_hws_cnt_pool *hws_cpool; 311 uint64_t start_cycle, query_cycle = 0; 312 uint64_t query_us; 313 uint64_t sleep_us; 314 315 while (sh->cnt_svc->svc_running != 0) { 316 if (rte_spinlock_trylock(&sh->cpool_lock) == 0) 317 continue; 318 start_cycle = rte_rdtsc(); 319 /* 200ms for 16M counters. */ 320 LIST_FOREACH(hws_cpool, &sh->hws_cpool_list, next) { 321 struct mlx5_priv *opriv = hws_cpool->priv; 322 323 __mlx5_hws_cnt_svc(sh, hws_cpool); 324 if (opriv->hws_age_req) 325 mlx5_hws_aging_check(opriv, hws_cpool); 326 } 327 query_cycle = rte_rdtsc() - start_cycle; 328 rte_spinlock_unlock(&sh->cpool_lock); 329 query_us = query_cycle / (rte_get_timer_hz() / US_PER_S); 330 sleep_us = interval - query_us; 331 DRV_LOG(DEBUG, "ibdev %s counter service thread: " 332 "interval_us=%" PRIu64 " query_us=%" PRIu64 " " 333 "sleep_us=%" PRIu64, 334 sh->ibdev_name, interval, query_us, 335 interval > query_us ? sleep_us : 0); 336 if (interval > query_us) 337 rte_delay_us_sleep(sleep_us); 338 } 339 return 0; 340 } 341 342 static void 343 mlx5_hws_cnt_pool_deinit(struct mlx5_hws_cnt_pool * const cntp) 344 { 345 uint32_t qidx = 0; 346 if (cntp == NULL) 347 return; 348 rte_ring_free(cntp->free_list); 349 rte_ring_free(cntp->wait_reset_list); 350 rte_ring_free(cntp->reuse_list); 351 if (cntp->cache) { 352 for (qidx = 0; qidx < cntp->cache->q_num; qidx++) 353 rte_ring_free(cntp->cache->qcache[qidx]); 354 } 355 mlx5_free(cntp->cache); 356 mlx5_free(cntp->raw_mng); 357 mlx5_free(cntp->pool); 358 mlx5_free(cntp); 359 } 360 361 static bool 362 mlx5_hws_cnt_should_enable_cache(const struct mlx5_hws_cnt_pool_cfg *pcfg, 363 const struct mlx5_hws_cache_param *ccfg) 364 { 365 /* 366 * Enable cache if and only if there are enough counters requested 367 * to populate all of the caches. 368 */ 369 return pcfg->request_num >= ccfg->q_num * ccfg->size; 370 } 371 372 static struct mlx5_hws_cnt_pool_caches * 373 mlx5_hws_cnt_cache_init(const struct mlx5_hws_cnt_pool_cfg *pcfg, 374 const struct mlx5_hws_cache_param *ccfg) 375 { 376 struct mlx5_hws_cnt_pool_caches *cache; 377 char mz_name[RTE_MEMZONE_NAMESIZE]; 378 uint32_t qidx; 379 380 /* If counter pool is big enough, setup the counter pool cache. */ 381 cache = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, 382 sizeof(*cache) + 383 sizeof(((struct mlx5_hws_cnt_pool_caches *)0)->qcache[0]) 384 * ccfg->q_num, 0, SOCKET_ID_ANY); 385 if (cache == NULL) 386 return NULL; 387 /* Store the necessary cache parameters. */ 388 cache->fetch_sz = ccfg->fetch_sz; 389 cache->preload_sz = ccfg->preload_sz; 390 cache->threshold = ccfg->threshold; 391 cache->q_num = ccfg->q_num; 392 for (qidx = 0; qidx < ccfg->q_num; qidx++) { 393 snprintf(mz_name, sizeof(mz_name), "%s_qc/%x", pcfg->name, qidx); 394 cache->qcache[qidx] = rte_ring_create(mz_name, ccfg->size, 395 SOCKET_ID_ANY, 396 RING_F_SP_ENQ | RING_F_SC_DEQ | 397 RING_F_EXACT_SZ); 398 if (cache->qcache[qidx] == NULL) 399 goto error; 400 } 401 return cache; 402 403 error: 404 while (qidx--) 405 rte_ring_free(cache->qcache[qidx]); 406 mlx5_free(cache); 407 return NULL; 408 } 409 410 static struct mlx5_hws_cnt_pool * 411 mlx5_hws_cnt_pool_init(struct mlx5_dev_ctx_shared *sh, 412 const struct mlx5_hws_cnt_pool_cfg *pcfg, 413 const struct mlx5_hws_cache_param *ccfg, 414 struct rte_flow_error *error) 415 { 416 char mz_name[RTE_MEMZONE_NAMESIZE]; 417 struct mlx5_hws_cnt_pool *cntp; 418 uint64_t cnt_num = 0; 419 420 MLX5_ASSERT(pcfg); 421 MLX5_ASSERT(ccfg); 422 cntp = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, sizeof(*cntp), 0, 423 SOCKET_ID_ANY); 424 if (cntp == NULL) { 425 rte_flow_error_set(error, ENOMEM, 426 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 427 "failed to allocate counter pool context"); 428 return NULL; 429 } 430 431 cntp->cfg = *pcfg; 432 if (cntp->cfg.host_cpool) 433 return cntp; 434 if (pcfg->request_num > sh->hws_max_nb_counters) { 435 DRV_LOG(ERR, "Counter number %u " 436 "is greater than the maximum supported (%u).", 437 pcfg->request_num, sh->hws_max_nb_counters); 438 rte_flow_error_set(error, EINVAL, 439 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 440 "requested counters number exceeds supported capacity"); 441 goto error; 442 } 443 cnt_num = pcfg->request_num * (100 + pcfg->alloc_factor) / 100; 444 if (cnt_num > UINT32_MAX) { 445 DRV_LOG(ERR, "counter number %"PRIu64" is out of 32bit range", 446 cnt_num); 447 rte_flow_error_set(error, EINVAL, 448 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 449 "counters number must fit in 32 bits"); 450 goto error; 451 } 452 /* 453 * When counter request number is supported, but the factor takes it 454 * out of size, the factor is reduced. 455 */ 456 cnt_num = RTE_MIN((uint32_t)cnt_num, sh->hws_max_nb_counters); 457 cntp->pool = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, 458 sizeof(struct mlx5_hws_cnt) * cnt_num, 459 0, SOCKET_ID_ANY); 460 if (cntp->pool == NULL) { 461 rte_flow_error_set(error, ENOMEM, 462 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 463 "failed to allocate counter pool context"); 464 goto error; 465 } 466 snprintf(mz_name, sizeof(mz_name), "%s_F_RING", pcfg->name); 467 cntp->free_list = rte_ring_create_elem(mz_name, sizeof(cnt_id_t), 468 (uint32_t)cnt_num, SOCKET_ID_ANY, 469 RING_F_MP_HTS_ENQ | RING_F_MC_HTS_DEQ | 470 RING_F_EXACT_SZ); 471 if (cntp->free_list == NULL) { 472 rte_flow_error_set(error, ENOMEM, 473 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 474 "failed to allocate free counters ring"); 475 goto error; 476 } 477 snprintf(mz_name, sizeof(mz_name), "%s_R_RING", pcfg->name); 478 cntp->wait_reset_list = rte_ring_create_elem(mz_name, sizeof(cnt_id_t), 479 (uint32_t)cnt_num, SOCKET_ID_ANY, 480 RING_F_MP_HTS_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ); 481 if (cntp->wait_reset_list == NULL) { 482 rte_flow_error_set(error, ENOMEM, 483 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 484 "failed to allocate counters wait reset ring"); 485 goto error; 486 } 487 snprintf(mz_name, sizeof(mz_name), "%s_U_RING", pcfg->name); 488 cntp->reuse_list = rte_ring_create_elem(mz_name, sizeof(cnt_id_t), 489 (uint32_t)cnt_num, SOCKET_ID_ANY, 490 RING_F_MP_HTS_ENQ | RING_F_MC_HTS_DEQ | RING_F_EXACT_SZ); 491 if (cntp->reuse_list == NULL) { 492 rte_flow_error_set(error, ENOMEM, 493 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 494 "failed to allocate counters reuse ring"); 495 goto error; 496 } 497 /* Allocate counter cache only if needed. */ 498 if (mlx5_hws_cnt_should_enable_cache(pcfg, ccfg)) { 499 cntp->cache = mlx5_hws_cnt_cache_init(pcfg, ccfg); 500 if (cntp->cache == NULL) { 501 rte_flow_error_set(error, ENOMEM, 502 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 503 "failed to allocate counters cache"); 504 goto error; 505 } 506 } 507 /* Initialize the time for aging-out calculation. */ 508 cntp->time_of_last_age_check = MLX5_CURR_TIME_SEC; 509 return cntp; 510 error: 511 mlx5_hws_cnt_pool_deinit(cntp); 512 return NULL; 513 } 514 515 int 516 mlx5_hws_cnt_service_thread_create(struct mlx5_dev_ctx_shared *sh) 517 { 518 char name[RTE_THREAD_INTERNAL_NAME_SIZE]; 519 rte_thread_attr_t attr; 520 int ret; 521 uint32_t service_core = sh->cnt_svc->service_core; 522 523 ret = rte_thread_attr_init(&attr); 524 if (ret != 0) 525 goto error; 526 CPU_SET(service_core, &attr.cpuset); 527 sh->cnt_svc->svc_running = 1; 528 ret = rte_thread_create(&sh->cnt_svc->service_thread, 529 &attr, mlx5_hws_cnt_svc, sh); 530 if (ret != 0) 531 goto error; 532 snprintf(name, sizeof(name), "mlx5-cn%d", service_core); 533 rte_thread_set_prefixed_name(sh->cnt_svc->service_thread, name); 534 535 return 0; 536 error: 537 DRV_LOG(ERR, "Failed to create HW steering's counter service thread."); 538 return ret; 539 } 540 541 void 542 mlx5_hws_cnt_service_thread_destroy(struct mlx5_dev_ctx_shared *sh) 543 { 544 if (sh->cnt_svc->service_thread.opaque_id == 0) 545 return; 546 sh->cnt_svc->svc_running = 0; 547 rte_thread_join(sh->cnt_svc->service_thread, NULL); 548 sh->cnt_svc->service_thread.opaque_id = 0; 549 } 550 551 static int 552 mlx5_hws_cnt_pool_dcs_alloc(struct mlx5_dev_ctx_shared *sh, 553 struct mlx5_hws_cnt_pool *cpool, 554 struct rte_flow_error *error) 555 { 556 struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr; 557 uint32_t max_log_bulk_sz = sh->hws_max_log_bulk_sz; 558 uint32_t log_bulk_sz; 559 uint32_t idx, alloc_candidate, alloced = 0; 560 unsigned int cnt_num = mlx5_hws_cnt_pool_get_size(cpool); 561 struct mlx5_devx_counter_attr attr = {0}; 562 struct mlx5_devx_obj *dcs; 563 564 MLX5_ASSERT(cpool->cfg.host_cpool == NULL); 565 if (hca_attr->flow_counter_bulk_log_max_alloc == 0) 566 return rte_flow_error_set(error, ENOTSUP, 567 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 568 NULL, "FW doesn't support bulk log max alloc"); 569 cnt_num = RTE_ALIGN_CEIL(cnt_num, 4); /* minimal 4 counter in bulk. */ 570 log_bulk_sz = RTE_MIN(max_log_bulk_sz, rte_log2_u32(cnt_num)); 571 attr.pd = sh->cdev->pdn; 572 attr.pd_valid = 1; 573 attr.bulk_log_max_alloc = 1; 574 attr.flow_counter_bulk_log_size = log_bulk_sz; 575 idx = 0; 576 dcs = mlx5_devx_cmd_flow_counter_alloc_general(sh->cdev->ctx, &attr); 577 if (dcs == NULL) { 578 rte_flow_error_set(error, rte_errno, 579 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 580 NULL, "FW failed to allocate counters"); 581 goto error; 582 } 583 cpool->dcs_mng.dcs[idx].obj = dcs; 584 cpool->dcs_mng.dcs[idx].batch_sz = (1 << log_bulk_sz); 585 cpool->dcs_mng.batch_total++; 586 idx++; 587 cpool->dcs_mng.dcs[0].iidx = 0; 588 alloced = cpool->dcs_mng.dcs[0].batch_sz; 589 if (cnt_num > cpool->dcs_mng.dcs[0].batch_sz) { 590 while (idx < MLX5_HWS_CNT_DCS_NUM) { 591 attr.flow_counter_bulk_log_size = --max_log_bulk_sz; 592 alloc_candidate = RTE_BIT32(max_log_bulk_sz); 593 if (alloced + alloc_candidate > sh->hws_max_nb_counters) 594 continue; 595 dcs = mlx5_devx_cmd_flow_counter_alloc_general 596 (sh->cdev->ctx, &attr); 597 if (dcs == NULL) { 598 rte_flow_error_set(error, rte_errno, 599 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 600 NULL, "FW failed to allocate counters"); 601 goto error; 602 } 603 cpool->dcs_mng.dcs[idx].obj = dcs; 604 cpool->dcs_mng.dcs[idx].batch_sz = alloc_candidate; 605 cpool->dcs_mng.dcs[idx].iidx = alloced; 606 alloced += cpool->dcs_mng.dcs[idx].batch_sz; 607 cpool->dcs_mng.batch_total++; 608 if (alloced >= cnt_num) 609 break; 610 idx++; 611 } 612 } 613 return 0; 614 error: 615 DRV_LOG(DEBUG, 616 "Cannot alloc device counter, allocated[%" PRIu32 "] request[%" PRIu32 "]", 617 alloced, cnt_num); 618 for (idx = 0; idx < cpool->dcs_mng.batch_total; idx++) { 619 mlx5_devx_cmd_destroy(cpool->dcs_mng.dcs[idx].obj); 620 cpool->dcs_mng.dcs[idx].obj = NULL; 621 cpool->dcs_mng.dcs[idx].batch_sz = 0; 622 cpool->dcs_mng.dcs[idx].iidx = 0; 623 } 624 cpool->dcs_mng.batch_total = 0; 625 return -1; 626 } 627 628 static void 629 mlx5_hws_cnt_pool_dcs_free(struct mlx5_dev_ctx_shared *sh, 630 struct mlx5_hws_cnt_pool *cpool) 631 { 632 uint32_t idx; 633 634 if (cpool == NULL) 635 return; 636 for (idx = 0; idx < MLX5_HWS_CNT_DCS_NUM; idx++) 637 mlx5_devx_cmd_destroy(cpool->dcs_mng.dcs[idx].obj); 638 if (cpool->raw_mng) { 639 mlx5_hws_cnt_raw_data_free(sh, cpool->raw_mng); 640 cpool->raw_mng = NULL; 641 } 642 } 643 644 static void 645 mlx5_hws_cnt_pool_action_destroy(struct mlx5_hws_cnt_pool *cpool) 646 { 647 uint32_t idx; 648 649 for (idx = 0; idx < cpool->dcs_mng.batch_total; idx++) { 650 struct mlx5_hws_cnt_dcs *dcs = &cpool->dcs_mng.dcs[idx]; 651 652 if (dcs->dr_action != NULL) { 653 mlx5dr_action_destroy(dcs->dr_action); 654 dcs->dr_action = NULL; 655 } 656 } 657 } 658 659 static int 660 mlx5_hws_cnt_pool_action_create(struct mlx5_priv *priv, 661 struct mlx5_hws_cnt_pool *cpool) 662 { 663 struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool); 664 uint32_t idx; 665 int ret = 0; 666 uint32_t flags; 667 668 flags = MLX5DR_ACTION_FLAG_HWS_RX | MLX5DR_ACTION_FLAG_HWS_TX; 669 if (priv->sh->config.dv_esw_en && priv->master) 670 flags |= MLX5DR_ACTION_FLAG_HWS_FDB; 671 for (idx = 0; idx < hpool->dcs_mng.batch_total; idx++) { 672 struct mlx5_hws_cnt_dcs *hdcs = &hpool->dcs_mng.dcs[idx]; 673 struct mlx5_hws_cnt_dcs *dcs = &cpool->dcs_mng.dcs[idx]; 674 675 dcs->dr_action = mlx5dr_action_create_counter(priv->dr_ctx, 676 (struct mlx5dr_devx_obj *)hdcs->obj, 677 flags); 678 if (dcs->dr_action == NULL) { 679 mlx5_hws_cnt_pool_action_destroy(cpool); 680 ret = -ENOSYS; 681 break; 682 } 683 } 684 return ret; 685 } 686 687 int 688 mlx5_hws_cnt_pool_create(struct rte_eth_dev *dev, 689 uint32_t nb_counters, uint16_t nb_queue, 690 struct mlx5_hws_cnt_pool *chost, 691 struct rte_flow_error *error) 692 { 693 struct mlx5_hws_cnt_pool *cpool = NULL; 694 struct mlx5_priv *priv = dev->data->dev_private; 695 struct mlx5_hws_cache_param cparam = {0}; 696 struct mlx5_hws_cnt_pool_cfg pcfg = {0}; 697 char *mp_name; 698 int ret = 0; 699 size_t sz; 700 701 mp_name = mlx5_malloc(MLX5_MEM_ZERO, RTE_MEMZONE_NAMESIZE, 0, SOCKET_ID_ANY); 702 if (mp_name == NULL) 703 goto error; 704 snprintf(mp_name, RTE_MEMZONE_NAMESIZE, "MLX5_HWS_CNT_P_%x", dev->data->port_id); 705 pcfg.name = mp_name; 706 pcfg.request_num = nb_counters; 707 pcfg.alloc_factor = HWS_CNT_ALLOC_FACTOR_DEFAULT; 708 if (chost) { 709 pcfg.host_cpool = chost; 710 cpool = mlx5_hws_cnt_pool_init(priv->sh, &pcfg, &cparam, error); 711 if (cpool == NULL) 712 goto error; 713 ret = mlx5_hws_cnt_pool_action_create(priv, cpool); 714 if (ret != 0) { 715 rte_flow_error_set(error, -ret, 716 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 717 NULL, "failed to allocate counter actions on guest port"); 718 goto error; 719 } 720 goto success; 721 } 722 /* init cnt service if not. */ 723 if (priv->sh->cnt_svc == NULL) { 724 ret = mlx5_hws_cnt_svc_init(priv->sh, error); 725 if (ret) 726 return ret; 727 } 728 cparam.fetch_sz = HWS_CNT_CACHE_FETCH_DEFAULT; 729 cparam.preload_sz = HWS_CNT_CACHE_PRELOAD_DEFAULT; 730 cparam.q_num = nb_queue; 731 cparam.threshold = HWS_CNT_CACHE_THRESHOLD_DEFAULT; 732 cparam.size = HWS_CNT_CACHE_SZ_DEFAULT; 733 cpool = mlx5_hws_cnt_pool_init(priv->sh, &pcfg, &cparam, error); 734 if (cpool == NULL) 735 goto error; 736 ret = mlx5_hws_cnt_pool_dcs_alloc(priv->sh, cpool, error); 737 if (ret != 0) 738 goto error; 739 sz = RTE_ALIGN_CEIL(mlx5_hws_cnt_pool_get_size(cpool), 4); 740 cpool->raw_mng = mlx5_hws_cnt_raw_data_alloc(priv->sh, sz, error); 741 if (cpool->raw_mng == NULL) 742 goto error; 743 __hws_cnt_id_load(cpool); 744 /* 745 * Bump query gen right after pool create so the 746 * pre-loaded counters can be used directly 747 * because they already have init value no need 748 * to wait for query. 749 */ 750 cpool->query_gen = 1; 751 ret = mlx5_hws_cnt_pool_action_create(priv, cpool); 752 if (ret != 0) { 753 rte_flow_error_set(error, -ret, 754 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 755 NULL, "failed to allocate counter actions"); 756 goto error; 757 } 758 priv->sh->cnt_svc->refcnt++; 759 cpool->priv = priv; 760 rte_spinlock_lock(&priv->sh->cpool_lock); 761 LIST_INSERT_HEAD(&priv->sh->hws_cpool_list, cpool, next); 762 rte_spinlock_unlock(&priv->sh->cpool_lock); 763 success: 764 priv->hws_cpool = cpool; 765 return 0; 766 error: 767 MLX5_ASSERT(ret); 768 mlx5_hws_cnt_pool_destroy(priv->sh, cpool); 769 priv->hws_cpool = NULL; 770 return ret; 771 } 772 773 void 774 mlx5_hws_cnt_pool_destroy(struct mlx5_dev_ctx_shared *sh, 775 struct mlx5_hws_cnt_pool *cpool) 776 { 777 if (cpool == NULL) 778 return; 779 /* 780 * 16M counter consumes 200ms to finish the query. 781 * Maybe blocked for at most 200ms here. 782 */ 783 rte_spinlock_lock(&sh->cpool_lock); 784 /* Try to remove cpool before it was added to list caused segfault. */ 785 if (!LIST_EMPTY(&sh->hws_cpool_list) && cpool->next.le_prev) 786 LIST_REMOVE(cpool, next); 787 rte_spinlock_unlock(&sh->cpool_lock); 788 if (cpool->cfg.host_cpool == NULL) { 789 if (--sh->cnt_svc->refcnt == 0) 790 mlx5_hws_cnt_svc_deinit(sh); 791 } 792 mlx5_hws_cnt_pool_action_destroy(cpool); 793 if (cpool->cfg.host_cpool == NULL) { 794 mlx5_hws_cnt_pool_dcs_free(sh, cpool); 795 mlx5_hws_cnt_raw_data_free(sh, cpool->raw_mng); 796 } 797 mlx5_free((void *)cpool->cfg.name); 798 mlx5_hws_cnt_pool_deinit(cpool); 799 } 800 801 int 802 mlx5_hws_cnt_svc_init(struct mlx5_dev_ctx_shared *sh, 803 struct rte_flow_error *error) 804 { 805 int ret; 806 807 sh->cnt_svc = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, 808 sizeof(*sh->cnt_svc), 0, SOCKET_ID_ANY); 809 if (sh->cnt_svc == NULL) 810 goto err; 811 sh->cnt_svc->query_interval = sh->config.cnt_svc.cycle_time; 812 sh->cnt_svc->service_core = sh->config.cnt_svc.service_core; 813 ret = mlx5_aso_cnt_queue_init(sh); 814 if (ret != 0) { 815 mlx5_free(sh->cnt_svc); 816 sh->cnt_svc = NULL; 817 goto err; 818 } 819 ret = mlx5_hws_cnt_service_thread_create(sh); 820 if (ret != 0) { 821 mlx5_aso_cnt_queue_uninit(sh); 822 mlx5_free(sh->cnt_svc); 823 sh->cnt_svc = NULL; 824 } 825 return 0; 826 err: 827 return rte_flow_error_set(error, ENOMEM, 828 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, 829 NULL, "failed to init counters service"); 830 831 } 832 833 void 834 mlx5_hws_cnt_svc_deinit(struct mlx5_dev_ctx_shared *sh) 835 { 836 if (sh->cnt_svc == NULL) 837 return; 838 mlx5_hws_cnt_service_thread_destroy(sh); 839 mlx5_aso_cnt_queue_uninit(sh); 840 mlx5_free(sh->cnt_svc); 841 sh->cnt_svc = NULL; 842 } 843 844 /** 845 * Destroy AGE action. 846 * 847 * @param priv 848 * Pointer to the port private data structure. 849 * @param idx 850 * Index of AGE parameter. 851 * @param error 852 * Pointer to error structure. 853 * 854 * @return 855 * 0 on success, a negative errno value otherwise and rte_errno is set. 856 */ 857 int 858 mlx5_hws_age_action_destroy(struct mlx5_priv *priv, uint32_t idx, 859 struct rte_flow_error *error) 860 { 861 struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv); 862 struct mlx5_indexed_pool *ipool = age_info->ages_ipool; 863 struct mlx5_hws_age_param *param = mlx5_ipool_get(ipool, idx); 864 865 if (param == NULL) 866 return rte_flow_error_set(error, EINVAL, 867 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 868 "invalid AGE parameter index"); 869 switch (rte_atomic_exchange_explicit(¶m->state, HWS_AGE_FREE, 870 rte_memory_order_relaxed)) { 871 case HWS_AGE_CANDIDATE: 872 case HWS_AGE_AGED_OUT_REPORTED: 873 mlx5_hws_age_param_free(priv, param->own_cnt_index, ipool, idx); 874 break; 875 case HWS_AGE_AGED_OUT_NOT_REPORTED: 876 case HWS_AGE_CANDIDATE_INSIDE_RING: 877 /* 878 * In both cases AGE is inside the ring. Change the state here 879 * and destroy it later when it is taken out of ring. 880 */ 881 break; 882 case HWS_AGE_FREE: 883 /* 884 * If index is valid and state is FREE, it says this AGE has 885 * been freed for the user but not for the PMD since it is 886 * inside the ring. 887 */ 888 return rte_flow_error_set(error, EINVAL, 889 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 890 "this AGE has already been released"); 891 default: 892 MLX5_ASSERT(0); 893 break; 894 } 895 return 0; 896 } 897 898 /** 899 * Create AGE action parameter. 900 * 901 * @param[in] priv 902 * Pointer to the port private data structure. 903 * @param[in] queue_id 904 * Which HWS queue to be used. 905 * @param[in] shared 906 * Whether it indirect AGE action. 907 * @param[in] flow_idx 908 * Flow index from indexed pool. 909 * For indirect AGE action it doesn't affect. 910 * @param[in] age 911 * Pointer to the aging action configuration. 912 * @param[out] error 913 * Pointer to error structure. 914 * 915 * @return 916 * Index to AGE action parameter on success, 0 otherwise. 917 */ 918 uint32_t 919 mlx5_hws_age_action_create(struct mlx5_priv *priv, uint32_t queue_id, 920 bool shared, const struct rte_flow_action_age *age, 921 uint32_t flow_idx, struct rte_flow_error *error) 922 { 923 struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv); 924 struct mlx5_indexed_pool *ipool = age_info->ages_ipool; 925 struct mlx5_hws_age_param *param; 926 uint32_t age_idx; 927 928 param = mlx5_ipool_malloc(ipool, &age_idx); 929 if (param == NULL) { 930 rte_flow_error_set(error, ENOMEM, 931 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 932 "cannot allocate AGE parameter"); 933 return 0; 934 } 935 MLX5_ASSERT(rte_atomic_load_explicit(¶m->state, 936 rte_memory_order_relaxed) == HWS_AGE_FREE); 937 if (shared) { 938 param->nb_cnts = 0; 939 param->accumulator_hits = 0; 940 param->accumulator_cnt = 0; 941 flow_idx = age_idx; 942 } else { 943 param->nb_cnts = 1; 944 } 945 param->context = age->context ? age->context : 946 (void *)(uintptr_t)flow_idx; 947 param->timeout = age->timeout; 948 param->queue_id = queue_id; 949 param->accumulator_last_hits = 0; 950 param->own_cnt_index = 0; 951 param->sec_since_last_hit = 0; 952 param->state = HWS_AGE_CANDIDATE; 953 return age_idx; 954 } 955 956 /** 957 * Update indirect AGE action parameter. 958 * 959 * @param[in] priv 960 * Pointer to the port private data structure. 961 * @param[in] idx 962 * Index of AGE parameter. 963 * @param[in] update 964 * Update value. 965 * @param[out] error 966 * Pointer to error structure. 967 * 968 * @return 969 * 0 on success, a negative errno value otherwise and rte_errno is set. 970 */ 971 int 972 mlx5_hws_age_action_update(struct mlx5_priv *priv, uint32_t idx, 973 const void *update, struct rte_flow_error *error) 974 { 975 const struct rte_flow_update_age *update_ade = update; 976 struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv); 977 struct mlx5_indexed_pool *ipool = age_info->ages_ipool; 978 struct mlx5_hws_age_param *param = mlx5_ipool_get(ipool, idx); 979 bool sec_since_last_hit_reset = false; 980 bool state_update = false; 981 982 if (param == NULL) 983 return rte_flow_error_set(error, EINVAL, 984 RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, 985 "invalid AGE parameter index"); 986 if (update_ade->timeout_valid) { 987 uint32_t old_timeout = rte_atomic_exchange_explicit(¶m->timeout, 988 update_ade->timeout, 989 rte_memory_order_relaxed); 990 991 if (old_timeout == 0) 992 sec_since_last_hit_reset = true; 993 else if (old_timeout < update_ade->timeout || 994 update_ade->timeout == 0) 995 /* 996 * When timeout is increased, aged-out flows might be 997 * active again and state should be updated accordingly. 998 * When new timeout is 0, we update the state for not 999 * reporting aged-out stopped. 1000 */ 1001 state_update = true; 1002 } 1003 if (update_ade->touch) { 1004 sec_since_last_hit_reset = true; 1005 state_update = true; 1006 } 1007 if (sec_since_last_hit_reset) 1008 rte_atomic_store_explicit(¶m->sec_since_last_hit, 0, 1009 rte_memory_order_relaxed); 1010 if (state_update) { 1011 uint16_t expected = HWS_AGE_AGED_OUT_NOT_REPORTED; 1012 1013 /* 1014 * Change states of aged-out flows to active: 1015 * - AGED_OUT_NOT_REPORTED -> CANDIDATE_INSIDE_RING 1016 * - AGED_OUT_REPORTED -> CANDIDATE 1017 */ 1018 if (!rte_atomic_compare_exchange_strong_explicit(¶m->state, &expected, 1019 HWS_AGE_CANDIDATE_INSIDE_RING, 1020 rte_memory_order_relaxed, 1021 rte_memory_order_relaxed) && 1022 expected == HWS_AGE_AGED_OUT_REPORTED) 1023 rte_atomic_store_explicit(¶m->state, HWS_AGE_CANDIDATE, 1024 rte_memory_order_relaxed); 1025 } 1026 return 0; 1027 } 1028 1029 /** 1030 * Get the AGE context if the aged-out index is still valid. 1031 * 1032 * @param priv 1033 * Pointer to the port private data structure. 1034 * @param idx 1035 * Index of AGE parameter. 1036 * 1037 * @return 1038 * AGE context if the index is still aged-out, NULL otherwise. 1039 */ 1040 void * 1041 mlx5_hws_age_context_get(struct mlx5_priv *priv, uint32_t idx) 1042 { 1043 struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv); 1044 struct mlx5_indexed_pool *ipool = age_info->ages_ipool; 1045 struct mlx5_hws_age_param *param = mlx5_ipool_get(ipool, idx); 1046 uint16_t expected = HWS_AGE_AGED_OUT_NOT_REPORTED; 1047 1048 MLX5_ASSERT(param != NULL); 1049 if (rte_atomic_compare_exchange_strong_explicit(¶m->state, &expected, 1050 HWS_AGE_AGED_OUT_REPORTED, 1051 rte_memory_order_relaxed, rte_memory_order_relaxed)) 1052 return param->context; 1053 switch (expected) { 1054 case HWS_AGE_FREE: 1055 /* 1056 * This AGE couldn't have been destroyed since it was inside 1057 * the ring. Its state has updated, and now it is actually 1058 * destroyed. 1059 */ 1060 mlx5_hws_age_param_free(priv, param->own_cnt_index, ipool, idx); 1061 break; 1062 case HWS_AGE_CANDIDATE_INSIDE_RING: 1063 rte_atomic_store_explicit(¶m->state, HWS_AGE_CANDIDATE, 1064 rte_memory_order_relaxed); 1065 break; 1066 case HWS_AGE_CANDIDATE: 1067 /* 1068 * Only BG thread pushes to ring and it never pushes this state. 1069 * When AGE inside the ring becomes candidate, it has a special 1070 * state called HWS_AGE_CANDIDATE_INSIDE_RING. 1071 * Fall-through. 1072 */ 1073 case HWS_AGE_AGED_OUT_REPORTED: 1074 /* 1075 * Only this thread (doing query) may write this state, and it 1076 * happens only after the query thread takes it out of the ring. 1077 * Fall-through. 1078 */ 1079 case HWS_AGE_AGED_OUT_NOT_REPORTED: 1080 /* 1081 * In this case the compare return true and function return 1082 * the context immediately. 1083 * Fall-through. 1084 */ 1085 default: 1086 MLX5_ASSERT(0); 1087 break; 1088 } 1089 return NULL; 1090 } 1091 1092 #ifdef RTE_ARCH_64 1093 #define MLX5_HWS_AGED_OUT_RING_SIZE_MAX UINT32_MAX 1094 #else 1095 #define MLX5_HWS_AGED_OUT_RING_SIZE_MAX RTE_BIT32(8) 1096 #endif 1097 1098 /** 1099 * Get the size of aged out ring list for each queue. 1100 * 1101 * The size is one percent of nb_counters divided by nb_queues. 1102 * The ring size must be power of 2, so it align up to power of 2. 1103 * In 32 bit systems, the size is limited by 256. 1104 * 1105 * This function is called when RTE_FLOW_PORT_FLAG_STRICT_QUEUE is on. 1106 * 1107 * @param nb_counters 1108 * Final number of allocated counter in the pool. 1109 * @param nb_queues 1110 * Number of HWS queues in this port. 1111 * 1112 * @return 1113 * Size of aged out ring per queue. 1114 */ 1115 static __rte_always_inline uint32_t 1116 mlx5_hws_aged_out_q_ring_size_get(uint32_t nb_counters, uint32_t nb_queues) 1117 { 1118 uint32_t size = rte_align32pow2((nb_counters / 100) / nb_queues); 1119 uint32_t max_size = MLX5_HWS_AGED_OUT_RING_SIZE_MAX; 1120 1121 return RTE_MIN(size, max_size); 1122 } 1123 1124 /** 1125 * Get the size of the aged out ring list. 1126 * 1127 * The size is one percent of nb_counters. 1128 * The ring size must be power of 2, so it align up to power of 2. 1129 * In 32 bit systems, the size is limited by 256. 1130 * 1131 * This function is called when RTE_FLOW_PORT_FLAG_STRICT_QUEUE is off. 1132 * 1133 * @param nb_counters 1134 * Final number of allocated counter in the pool. 1135 * 1136 * @return 1137 * Size of the aged out ring list. 1138 */ 1139 static __rte_always_inline uint32_t 1140 mlx5_hws_aged_out_ring_size_get(uint32_t nb_counters) 1141 { 1142 uint32_t size = rte_align32pow2(nb_counters / 100); 1143 uint32_t max_size = MLX5_HWS_AGED_OUT_RING_SIZE_MAX; 1144 1145 return RTE_MIN(size, max_size); 1146 } 1147 1148 /** 1149 * Initialize the shared aging list information per port. 1150 * 1151 * @param dev 1152 * Pointer to the rte_eth_dev structure. 1153 * @param nb_queues 1154 * Number of HWS queues. 1155 * @param strict_queue 1156 * Indicator whether is strict_queue mode. 1157 * @param ring_size 1158 * Size of aged-out ring for creation. 1159 * 1160 * @return 1161 * 0 on success, a negative errno value otherwise and rte_errno is set. 1162 */ 1163 static int 1164 mlx5_hws_age_info_init(struct rte_eth_dev *dev, uint16_t nb_queues, 1165 bool strict_queue, uint32_t ring_size) 1166 { 1167 struct mlx5_priv *priv = dev->data->dev_private; 1168 struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv); 1169 uint32_t flags = RING_F_SP_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ; 1170 char mz_name[RTE_MEMZONE_NAMESIZE]; 1171 struct rte_ring *r = NULL; 1172 uint32_t qidx; 1173 1174 age_info->flags = 0; 1175 if (strict_queue) { 1176 size_t size = sizeof(*age_info->hw_q_age) + 1177 sizeof(struct rte_ring *) * nb_queues; 1178 1179 age_info->hw_q_age = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, 1180 size, 0, SOCKET_ID_ANY); 1181 if (age_info->hw_q_age == NULL) 1182 return -ENOMEM; 1183 for (qidx = 0; qidx < nb_queues; ++qidx) { 1184 snprintf(mz_name, sizeof(mz_name), 1185 "port_%u_queue_%u_aged_out_ring", 1186 dev->data->port_id, qidx); 1187 r = rte_ring_create(mz_name, ring_size, SOCKET_ID_ANY, 1188 flags); 1189 if (r == NULL) { 1190 DRV_LOG(ERR, "\"%s\" creation failed: %s", 1191 mz_name, rte_strerror(rte_errno)); 1192 goto error; 1193 } 1194 age_info->hw_q_age->aged_lists[qidx] = r; 1195 DRV_LOG(DEBUG, 1196 "\"%s\" is successfully created (size=%u).", 1197 mz_name, ring_size); 1198 } 1199 age_info->hw_q_age->nb_rings = nb_queues; 1200 } else { 1201 snprintf(mz_name, sizeof(mz_name), "port_%u_aged_out_ring", 1202 dev->data->port_id); 1203 r = rte_ring_create(mz_name, ring_size, SOCKET_ID_ANY, flags); 1204 if (r == NULL) { 1205 DRV_LOG(ERR, "\"%s\" creation failed: %s", mz_name, 1206 rte_strerror(rte_errno)); 1207 return -rte_errno; 1208 } 1209 age_info->hw_age.aged_list = r; 1210 DRV_LOG(DEBUG, "\"%s\" is successfully created (size=%u).", 1211 mz_name, ring_size); 1212 /* In non "strict_queue" mode, initialize the event. */ 1213 MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER); 1214 } 1215 return 0; 1216 error: 1217 MLX5_ASSERT(strict_queue); 1218 while (qidx--) 1219 rte_ring_free(age_info->hw_q_age->aged_lists[qidx]); 1220 mlx5_free(age_info->hw_q_age); 1221 return -1; 1222 } 1223 1224 /** 1225 * Cleanup aged-out ring before destroying. 1226 * 1227 * @param priv 1228 * Pointer to port private object. 1229 * @param r 1230 * Pointer to aged-out ring object. 1231 */ 1232 static void 1233 mlx5_hws_aged_out_ring_cleanup(struct mlx5_priv *priv, struct rte_ring *r) 1234 { 1235 int ring_size = rte_ring_count(r); 1236 1237 while (ring_size > 0) { 1238 uint32_t age_idx = 0; 1239 1240 if (rte_ring_dequeue_elem(r, &age_idx, sizeof(uint32_t)) < 0) 1241 break; 1242 /* get the AGE context if the aged-out index is still valid. */ 1243 mlx5_hws_age_context_get(priv, age_idx); 1244 ring_size--; 1245 } 1246 rte_ring_free(r); 1247 } 1248 1249 /** 1250 * Destroy the shared aging list information per port. 1251 * 1252 * @param priv 1253 * Pointer to port private object. 1254 */ 1255 static void 1256 mlx5_hws_age_info_destroy(struct mlx5_priv *priv) 1257 { 1258 struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv); 1259 uint16_t nb_queues = age_info->hw_q_age->nb_rings; 1260 struct rte_ring *r; 1261 1262 if (priv->hws_strict_queue) { 1263 uint32_t qidx; 1264 1265 for (qidx = 0; qidx < nb_queues; ++qidx) { 1266 r = age_info->hw_q_age->aged_lists[qidx]; 1267 mlx5_hws_aged_out_ring_cleanup(priv, r); 1268 } 1269 mlx5_free(age_info->hw_q_age); 1270 } else { 1271 r = age_info->hw_age.aged_list; 1272 mlx5_hws_aged_out_ring_cleanup(priv, r); 1273 } 1274 } 1275 1276 /** 1277 * Initialize the aging mechanism per port. 1278 * 1279 * @param dev 1280 * Pointer to the rte_eth_dev structure. 1281 * @param attr 1282 * Port configuration attributes. 1283 * @param nb_queues 1284 * Number of HWS queues. 1285 * 1286 * @return 1287 * 0 on success, a negative errno value otherwise and rte_errno is set. 1288 */ 1289 int 1290 mlx5_hws_age_pool_init(struct rte_eth_dev *dev, 1291 uint32_t nb_aging_objects, 1292 uint16_t nb_queues, 1293 bool strict_queue) 1294 { 1295 struct mlx5_priv *priv = dev->data->dev_private; 1296 struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv); 1297 struct mlx5_indexed_pool_config cfg = { 1298 .size = 1299 RTE_CACHE_LINE_ROUNDUP(sizeof(struct mlx5_hws_age_param)), 1300 .trunk_size = 1 << 12, 1301 .per_core_cache = 1 << 13, 1302 .need_lock = 1, 1303 .release_mem_en = !!priv->sh->config.reclaim_mode, 1304 .malloc = mlx5_malloc, 1305 .free = mlx5_free, 1306 .type = "mlx5_hws_age_pool", 1307 }; 1308 uint32_t nb_alloc_cnts; 1309 uint32_t rsize; 1310 uint32_t nb_ages_updated; 1311 int ret; 1312 1313 MLX5_ASSERT(priv->hws_cpool); 1314 nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(priv->hws_cpool); 1315 if (strict_queue) { 1316 rsize = mlx5_hws_aged_out_q_ring_size_get(nb_alloc_cnts, 1317 nb_queues); 1318 nb_ages_updated = rsize * nb_queues + nb_aging_objects; 1319 } else { 1320 rsize = mlx5_hws_aged_out_ring_size_get(nb_alloc_cnts); 1321 nb_ages_updated = rsize + nb_aging_objects; 1322 } 1323 ret = mlx5_hws_age_info_init(dev, nb_queues, strict_queue, rsize); 1324 if (ret < 0) 1325 return ret; 1326 cfg.max_idx = rte_align32pow2(nb_ages_updated); 1327 if (cfg.max_idx <= cfg.trunk_size) { 1328 cfg.per_core_cache = 0; 1329 cfg.trunk_size = cfg.max_idx; 1330 } else if (cfg.max_idx <= MLX5_HW_IPOOL_SIZE_THRESHOLD) { 1331 cfg.per_core_cache = MLX5_HW_IPOOL_CACHE_MIN; 1332 } 1333 age_info->ages_ipool = mlx5_ipool_create(&cfg); 1334 if (age_info->ages_ipool == NULL) { 1335 mlx5_hws_age_info_destroy(priv); 1336 rte_errno = ENOMEM; 1337 return -rte_errno; 1338 } 1339 priv->hws_age_req = 1; 1340 return 0; 1341 } 1342 1343 /** 1344 * Cleanup all aging resources per port. 1345 * 1346 * @param priv 1347 * Pointer to port private object. 1348 */ 1349 void 1350 mlx5_hws_age_pool_destroy(struct mlx5_priv *priv) 1351 { 1352 struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv); 1353 1354 rte_spinlock_lock(&priv->sh->cpool_lock); 1355 MLX5_ASSERT(priv->hws_age_req); 1356 mlx5_hws_age_info_destroy(priv); 1357 mlx5_ipool_destroy(age_info->ages_ipool); 1358 age_info->ages_ipool = NULL; 1359 priv->hws_age_req = 0; 1360 rte_spinlock_unlock(&priv->sh->cpool_lock); 1361 } 1362 1363 #endif 1364