xref: /dpdk/drivers/net/mlx5/mlx5_hws_cnt.c (revision c0e29968294c92ca15fdb34ce63fbba01c4562a6)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 
5 #include <stdint.h>
6 #include <rte_malloc.h>
7 #include <mlx5_malloc.h>
8 #include <rte_ring.h>
9 #include <mlx5_devx_cmds.h>
10 #include <rte_cycles.h>
11 #include <rte_eal_paging.h>
12 #include <rte_thread.h>
13 
14 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
15 
16 #include "mlx5_utils.h"
17 #include "mlx5_hws_cnt.h"
18 
19 #define HWS_CNT_CACHE_SZ_DEFAULT 511
20 #define HWS_CNT_CACHE_PRELOAD_DEFAULT 254
21 #define HWS_CNT_CACHE_FETCH_DEFAULT 254
22 #define HWS_CNT_CACHE_THRESHOLD_DEFAULT 254
23 #define HWS_CNT_ALLOC_FACTOR_DEFAULT 20
24 
25 static void
26 __hws_cnt_id_load(struct mlx5_hws_cnt_pool *cpool)
27 {
28 	uint32_t cnt_num = mlx5_hws_cnt_pool_get_size(cpool);
29 	uint32_t iidx;
30 
31 	/*
32 	 * Counter ID order is important for tracking the max number of in used
33 	 * counter for querying, which means counter internal index order must
34 	 * be from zero to the number user configured, i.e: 0 - 8000000.
35 	 * Need to load counter ID in this order into the cache firstly,
36 	 * and then the global free list.
37 	 * In the end, user fetch the counter from minimal to the maximum.
38 	 */
39 	for (iidx = 0; iidx < cnt_num; iidx++) {
40 		cnt_id_t cnt_id  = mlx5_hws_cnt_id_gen(cpool, iidx);
41 
42 		rte_ring_enqueue_elem(cpool->free_list, &cnt_id,
43 				sizeof(cnt_id));
44 	}
45 }
46 
47 static void
48 __mlx5_hws_cnt_svc(struct mlx5_dev_ctx_shared *sh,
49 		   struct mlx5_hws_cnt_pool *cpool)
50 {
51 	struct rte_ring *reset_list = cpool->wait_reset_list;
52 	struct rte_ring *reuse_list = cpool->reuse_list;
53 	uint32_t reset_cnt_num;
54 	struct rte_ring_zc_data zcdr = {0};
55 	struct rte_ring_zc_data zcdu = {0};
56 	uint32_t ret __rte_unused;
57 
58 	reset_cnt_num = rte_ring_count(reset_list);
59 	cpool->query_gen++;
60 	mlx5_aso_cnt_query(sh, cpool);
61 	zcdr.n1 = 0;
62 	zcdu.n1 = 0;
63 	ret = rte_ring_enqueue_zc_burst_elem_start(reuse_list,
64 						   sizeof(cnt_id_t),
65 						   reset_cnt_num, &zcdu,
66 						   NULL);
67 	MLX5_ASSERT(ret == reset_cnt_num);
68 	ret = rte_ring_dequeue_zc_burst_elem_start(reset_list,
69 						   sizeof(cnt_id_t),
70 						   reset_cnt_num, &zcdr,
71 						   NULL);
72 	MLX5_ASSERT(ret == reset_cnt_num);
73 	__hws_cnt_r2rcpy(&zcdu, &zcdr, reset_cnt_num);
74 	rte_ring_dequeue_zc_elem_finish(reset_list, reset_cnt_num);
75 	rte_ring_enqueue_zc_elem_finish(reuse_list, reset_cnt_num);
76 
77 	if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
78 		reset_cnt_num = rte_ring_count(reset_list);
79 		DRV_LOG(DEBUG, "ibdev %s cpool %p wait_reset_cnt=%" PRIu32,
80 			       sh->ibdev_name, (void *)cpool, reset_cnt_num);
81 	}
82 }
83 
84 /**
85  * Release AGE parameter.
86  *
87  * @param priv
88  *   Pointer to the port private data structure.
89  * @param own_cnt_index
90  *   Counter ID to created only for this AGE to release.
91  *   Zero means there is no such counter.
92  * @param age_ipool
93  *   Pointer to AGE parameter indexed pool.
94  * @param idx
95  *   Index of AGE parameter in the indexed pool.
96  */
97 static void
98 mlx5_hws_age_param_free(struct mlx5_priv *priv, cnt_id_t own_cnt_index,
99 			struct mlx5_indexed_pool *age_ipool, uint32_t idx)
100 {
101 	if (own_cnt_index) {
102 		struct mlx5_hws_cnt_pool *cpool = priv->hws_cpool;
103 
104 		MLX5_ASSERT(mlx5_hws_cnt_is_shared(cpool, own_cnt_index));
105 		mlx5_hws_cnt_shared_put(cpool, &own_cnt_index);
106 	}
107 	mlx5_ipool_free(age_ipool, idx);
108 }
109 
110 /**
111  * Check and callback event for new aged flow in the HWS counter pool.
112  *
113  * @param[in] priv
114  *   Pointer to port private object.
115  * @param[in] cpool
116  *   Pointer to current counter pool.
117  */
118 static void
119 mlx5_hws_aging_check(struct mlx5_priv *priv, struct mlx5_hws_cnt_pool *cpool)
120 {
121 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
122 	struct flow_counter_stats *stats = cpool->raw_mng->raw;
123 	struct mlx5_hws_age_param *param;
124 	struct rte_ring *r;
125 	const uint64_t curr_time = MLX5_CURR_TIME_SEC;
126 	const uint32_t time_delta = curr_time - cpool->time_of_last_age_check;
127 	uint32_t nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(cpool);
128 	uint16_t expected1 = HWS_AGE_CANDIDATE;
129 	uint16_t expected2 = HWS_AGE_CANDIDATE_INSIDE_RING;
130 	uint32_t i;
131 
132 	cpool->time_of_last_age_check = curr_time;
133 	for (i = 0; i < nb_alloc_cnts; ++i) {
134 		uint32_t age_idx = cpool->pool[i].age_idx;
135 		uint64_t hits;
136 
137 		if (!cpool->pool[i].in_used || age_idx == 0)
138 			continue;
139 		param = mlx5_ipool_get(age_info->ages_ipool, age_idx);
140 		if (unlikely(param == NULL)) {
141 			/*
142 			 * When AGE which used indirect counter it is user
143 			 * responsibility not using this indirect counter
144 			 * without this AGE.
145 			 * If this counter is used after the AGE was freed, the
146 			 * AGE index is invalid and using it here will cause a
147 			 * segmentation fault.
148 			 */
149 			DRV_LOG(WARNING,
150 				"Counter %u is lost his AGE, it is unused.", i);
151 			continue;
152 		}
153 		if (param->timeout == 0)
154 			continue;
155 		switch (rte_atomic_load_explicit(&param->state, rte_memory_order_relaxed)) {
156 		case HWS_AGE_AGED_OUT_NOT_REPORTED:
157 		case HWS_AGE_AGED_OUT_REPORTED:
158 			/* Already aged-out, no action is needed. */
159 			continue;
160 		case HWS_AGE_CANDIDATE:
161 		case HWS_AGE_CANDIDATE_INSIDE_RING:
162 			/* This AGE candidate to be aged-out, go to checking. */
163 			break;
164 		case HWS_AGE_FREE:
165 			/*
166 			 * AGE parameter with state "FREE" couldn't be pointed
167 			 * by any counter since counter is destroyed first.
168 			 * Fall-through.
169 			 */
170 		default:
171 			MLX5_ASSERT(0);
172 			continue;
173 		}
174 		hits = rte_be_to_cpu_64(stats[i].hits);
175 		if (param->nb_cnts == 1) {
176 			if (hits != param->accumulator_last_hits) {
177 				rte_atomic_store_explicit(&param->sec_since_last_hit, 0,
178 						 rte_memory_order_relaxed);
179 				param->accumulator_last_hits = hits;
180 				continue;
181 			}
182 		} else {
183 			param->accumulator_hits += hits;
184 			param->accumulator_cnt++;
185 			if (param->accumulator_cnt < param->nb_cnts)
186 				continue;
187 			param->accumulator_cnt = 0;
188 			if (param->accumulator_last_hits !=
189 						param->accumulator_hits) {
190 				rte_atomic_store_explicit(&param->sec_since_last_hit,
191 						 0, rte_memory_order_relaxed);
192 				param->accumulator_last_hits =
193 							param->accumulator_hits;
194 				param->accumulator_hits = 0;
195 				continue;
196 			}
197 			param->accumulator_hits = 0;
198 		}
199 		if (rte_atomic_fetch_add_explicit(&param->sec_since_last_hit, time_delta,
200 				       rte_memory_order_relaxed) + time_delta <=
201 		   rte_atomic_load_explicit(&param->timeout, rte_memory_order_relaxed))
202 			continue;
203 		/* Prepare the relevant ring for this AGE parameter */
204 		if (priv->hws_strict_queue)
205 			r = age_info->hw_q_age->aged_lists[param->queue_id];
206 		else
207 			r = age_info->hw_age.aged_list;
208 		/* Changing the state atomically and insert it into the ring. */
209 		if (rte_atomic_compare_exchange_strong_explicit(&param->state, &expected1,
210 						HWS_AGE_AGED_OUT_NOT_REPORTED,
211 						rte_memory_order_relaxed,
212 						rte_memory_order_relaxed)) {
213 			int ret = rte_ring_enqueue_burst_elem(r, &age_idx,
214 							      sizeof(uint32_t),
215 							      1, NULL);
216 
217 			/*
218 			 * The ring doesn't have enough room for this entry,
219 			 * it replace back the state for the next second.
220 			 *
221 			 * FIXME: if until next sec it get traffic, we are going
222 			 *        to lose this "aged out", will be fixed later
223 			 *        when optimise it to fill ring in bulks.
224 			 */
225 			expected2 = HWS_AGE_AGED_OUT_NOT_REPORTED;
226 			if (ret == 0 &&
227 			    !rte_atomic_compare_exchange_strong_explicit(&param->state,
228 							 &expected2, expected1,
229 							 rte_memory_order_relaxed,
230 							 rte_memory_order_relaxed) &&
231 			    expected2 == HWS_AGE_FREE)
232 				mlx5_hws_age_param_free(priv,
233 							param->own_cnt_index,
234 							age_info->ages_ipool,
235 							age_idx);
236 			/* The event is irrelevant in strict queue mode. */
237 			if (!priv->hws_strict_queue)
238 				MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
239 		} else {
240 			rte_atomic_compare_exchange_strong_explicit(&param->state, &expected2,
241 						  HWS_AGE_AGED_OUT_NOT_REPORTED,
242 						  rte_memory_order_relaxed,
243 						  rte_memory_order_relaxed);
244 		}
245 	}
246 	/* The event is irrelevant in strict queue mode. */
247 	if (!priv->hws_strict_queue)
248 		mlx5_age_event_prepare(priv->sh);
249 }
250 
251 static void
252 mlx5_hws_cnt_raw_data_free(struct mlx5_dev_ctx_shared *sh,
253 			   struct mlx5_hws_cnt_raw_data_mng *mng)
254 {
255 	if (mng == NULL)
256 		return;
257 	sh->cdev->mr_scache.dereg_mr_cb(&mng->mr);
258 	mlx5_free(mng->raw);
259 	mlx5_free(mng);
260 }
261 
262 __rte_unused
263 static struct mlx5_hws_cnt_raw_data_mng *
264 mlx5_hws_cnt_raw_data_alloc(struct mlx5_dev_ctx_shared *sh, uint32_t n,
265 			    struct rte_flow_error *error)
266 {
267 	struct mlx5_hws_cnt_raw_data_mng *mng = NULL;
268 	int ret;
269 	size_t sz = n * sizeof(struct flow_counter_stats);
270 	size_t pgsz = rte_mem_page_size();
271 
272 	MLX5_ASSERT(pgsz > 0);
273 	mng = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, sizeof(*mng), 0,
274 			SOCKET_ID_ANY);
275 	if (mng == NULL) {
276 		rte_flow_error_set(error, ENOMEM,
277 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
278 				   NULL, "failed to allocate counters memory manager");
279 		goto error;
280 	}
281 	mng->raw = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, sz, pgsz,
282 			SOCKET_ID_ANY);
283 	if (mng->raw == NULL) {
284 		rte_flow_error_set(error, ENOMEM,
285 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
286 				   NULL, "failed to allocate raw counters memory");
287 		goto error;
288 	}
289 	ret = sh->cdev->mr_scache.reg_mr_cb(sh->cdev->pd, mng->raw, sz,
290 					    &mng->mr);
291 	if (ret) {
292 		rte_flow_error_set(error, errno,
293 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
294 				   NULL, "failed to register counters memory region");
295 		goto error;
296 	}
297 	return mng;
298 error:
299 	mlx5_hws_cnt_raw_data_free(sh, mng);
300 	return NULL;
301 }
302 
303 static uint32_t
304 mlx5_hws_cnt_svc(void *opaque)
305 {
306 	struct mlx5_dev_ctx_shared *sh =
307 		(struct mlx5_dev_ctx_shared *)opaque;
308 	uint64_t interval =
309 		(uint64_t)sh->cnt_svc->query_interval * (US_PER_S / MS_PER_S);
310 	struct mlx5_hws_cnt_pool *hws_cpool;
311 	uint64_t start_cycle, query_cycle = 0;
312 	uint64_t query_us;
313 	uint64_t sleep_us;
314 
315 	while (sh->cnt_svc->svc_running != 0) {
316 		if (rte_spinlock_trylock(&sh->cpool_lock) == 0)
317 			continue;
318 		start_cycle = rte_rdtsc();
319 		/* 200ms for 16M counters. */
320 		LIST_FOREACH(hws_cpool, &sh->hws_cpool_list, next) {
321 			struct mlx5_priv *opriv = hws_cpool->priv;
322 
323 			__mlx5_hws_cnt_svc(sh, hws_cpool);
324 			if (opriv->hws_age_req)
325 				mlx5_hws_aging_check(opriv, hws_cpool);
326 		}
327 		query_cycle = rte_rdtsc() - start_cycle;
328 		rte_spinlock_unlock(&sh->cpool_lock);
329 		query_us = query_cycle / (rte_get_timer_hz() / US_PER_S);
330 		sleep_us = interval - query_us;
331 		DRV_LOG(DEBUG, "ibdev %s counter service thread: "
332 			       "interval_us=%" PRIu64 " query_us=%" PRIu64 " "
333 			       "sleep_us=%" PRIu64,
334 			sh->ibdev_name, interval, query_us,
335 			interval > query_us ? sleep_us : 0);
336 		if (interval > query_us)
337 			rte_delay_us_sleep(sleep_us);
338 	}
339 	return 0;
340 }
341 
342 static void
343 mlx5_hws_cnt_pool_deinit(struct mlx5_hws_cnt_pool * const cntp)
344 {
345 	uint32_t qidx = 0;
346 	if (cntp == NULL)
347 		return;
348 	rte_ring_free(cntp->free_list);
349 	rte_ring_free(cntp->wait_reset_list);
350 	rte_ring_free(cntp->reuse_list);
351 	if (cntp->cache) {
352 		for (qidx = 0; qidx < cntp->cache->q_num; qidx++)
353 			rte_ring_free(cntp->cache->qcache[qidx]);
354 	}
355 	mlx5_free(cntp->cache);
356 	mlx5_free(cntp->raw_mng);
357 	mlx5_free(cntp->pool);
358 	mlx5_free(cntp);
359 }
360 
361 static bool
362 mlx5_hws_cnt_should_enable_cache(const struct mlx5_hws_cnt_pool_cfg *pcfg,
363 				 const struct mlx5_hws_cache_param *ccfg)
364 {
365 	/*
366 	 * Enable cache if and only if there are enough counters requested
367 	 * to populate all of the caches.
368 	 */
369 	return pcfg->request_num >= ccfg->q_num * ccfg->size;
370 }
371 
372 static struct mlx5_hws_cnt_pool_caches *
373 mlx5_hws_cnt_cache_init(const struct mlx5_hws_cnt_pool_cfg *pcfg,
374 			const struct mlx5_hws_cache_param *ccfg)
375 {
376 	struct mlx5_hws_cnt_pool_caches *cache;
377 	char mz_name[RTE_MEMZONE_NAMESIZE];
378 	uint32_t qidx;
379 
380 	/* If counter pool is big enough, setup the counter pool cache. */
381 	cache = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
382 			sizeof(*cache) +
383 			sizeof(((struct mlx5_hws_cnt_pool_caches *)0)->qcache[0])
384 				* ccfg->q_num, 0, SOCKET_ID_ANY);
385 	if (cache == NULL)
386 		return NULL;
387 	/* Store the necessary cache parameters. */
388 	cache->fetch_sz = ccfg->fetch_sz;
389 	cache->preload_sz = ccfg->preload_sz;
390 	cache->threshold = ccfg->threshold;
391 	cache->q_num = ccfg->q_num;
392 	for (qidx = 0; qidx < ccfg->q_num; qidx++) {
393 		snprintf(mz_name, sizeof(mz_name), "%s_qc/%x", pcfg->name, qidx);
394 		cache->qcache[qidx] = rte_ring_create(mz_name, ccfg->size,
395 				SOCKET_ID_ANY,
396 				RING_F_SP_ENQ | RING_F_SC_DEQ |
397 				RING_F_EXACT_SZ);
398 		if (cache->qcache[qidx] == NULL)
399 			goto error;
400 	}
401 	return cache;
402 
403 error:
404 	while (qidx--)
405 		rte_ring_free(cache->qcache[qidx]);
406 	mlx5_free(cache);
407 	return NULL;
408 }
409 
410 static struct mlx5_hws_cnt_pool *
411 mlx5_hws_cnt_pool_init(struct mlx5_dev_ctx_shared *sh,
412 		       const struct mlx5_hws_cnt_pool_cfg *pcfg,
413 		       const struct mlx5_hws_cache_param *ccfg,
414 		       struct rte_flow_error *error)
415 {
416 	char mz_name[RTE_MEMZONE_NAMESIZE];
417 	struct mlx5_hws_cnt_pool *cntp;
418 	uint64_t cnt_num = 0;
419 
420 	MLX5_ASSERT(pcfg);
421 	MLX5_ASSERT(ccfg);
422 	cntp = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO, sizeof(*cntp), 0,
423 			   SOCKET_ID_ANY);
424 	if (cntp == NULL) {
425 		rte_flow_error_set(error, ENOMEM,
426 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
427 				   "failed to allocate counter pool context");
428 		return NULL;
429 	}
430 
431 	cntp->cfg = *pcfg;
432 	if (cntp->cfg.host_cpool)
433 		return cntp;
434 	if (pcfg->request_num > sh->hws_max_nb_counters) {
435 		DRV_LOG(ERR, "Counter number %u "
436 			"is greater than the maximum supported (%u).",
437 			pcfg->request_num, sh->hws_max_nb_counters);
438 		rte_flow_error_set(error, EINVAL,
439 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
440 				   "requested counters number exceeds supported capacity");
441 		goto error;
442 	}
443 	cnt_num = pcfg->request_num * (100 + pcfg->alloc_factor) / 100;
444 	if (cnt_num > UINT32_MAX) {
445 		DRV_LOG(ERR, "counter number %"PRIu64" is out of 32bit range",
446 			cnt_num);
447 		rte_flow_error_set(error, EINVAL,
448 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
449 				   "counters number must fit in 32 bits");
450 		goto error;
451 	}
452 	/*
453 	 * When counter request number is supported, but the factor takes it
454 	 * out of size, the factor is reduced.
455 	 */
456 	cnt_num = RTE_MIN((uint32_t)cnt_num, sh->hws_max_nb_counters);
457 	cntp->pool = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
458 				 sizeof(struct mlx5_hws_cnt) * cnt_num,
459 				 0, SOCKET_ID_ANY);
460 	if (cntp->pool == NULL) {
461 		rte_flow_error_set(error, ENOMEM,
462 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
463 				   "failed to allocate counter pool context");
464 		goto error;
465 	}
466 	snprintf(mz_name, sizeof(mz_name), "%s_F_RING", pcfg->name);
467 	cntp->free_list = rte_ring_create_elem(mz_name, sizeof(cnt_id_t),
468 				(uint32_t)cnt_num, SOCKET_ID_ANY,
469 				RING_F_MP_HTS_ENQ | RING_F_MC_HTS_DEQ |
470 				RING_F_EXACT_SZ);
471 	if (cntp->free_list == NULL) {
472 		rte_flow_error_set(error, ENOMEM,
473 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
474 				   "failed to allocate free counters ring");
475 		goto error;
476 	}
477 	snprintf(mz_name, sizeof(mz_name), "%s_R_RING", pcfg->name);
478 	cntp->wait_reset_list = rte_ring_create_elem(mz_name, sizeof(cnt_id_t),
479 			(uint32_t)cnt_num, SOCKET_ID_ANY,
480 			RING_F_MP_HTS_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ);
481 	if (cntp->wait_reset_list == NULL) {
482 		rte_flow_error_set(error, ENOMEM,
483 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
484 				   "failed to allocate counters wait reset ring");
485 		goto error;
486 	}
487 	snprintf(mz_name, sizeof(mz_name), "%s_U_RING", pcfg->name);
488 	cntp->reuse_list = rte_ring_create_elem(mz_name, sizeof(cnt_id_t),
489 			(uint32_t)cnt_num, SOCKET_ID_ANY,
490 			RING_F_MP_HTS_ENQ | RING_F_MC_HTS_DEQ | RING_F_EXACT_SZ);
491 	if (cntp->reuse_list == NULL) {
492 		rte_flow_error_set(error, ENOMEM,
493 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
494 				   "failed to allocate counters reuse ring");
495 		goto error;
496 	}
497 	/* Allocate counter cache only if needed. */
498 	if (mlx5_hws_cnt_should_enable_cache(pcfg, ccfg)) {
499 		cntp->cache = mlx5_hws_cnt_cache_init(pcfg, ccfg);
500 		if (cntp->cache == NULL) {
501 			rte_flow_error_set(error, ENOMEM,
502 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
503 					   "failed to allocate counters cache");
504 			goto error;
505 		}
506 	}
507 	/* Initialize the time for aging-out calculation. */
508 	cntp->time_of_last_age_check = MLX5_CURR_TIME_SEC;
509 	return cntp;
510 error:
511 	mlx5_hws_cnt_pool_deinit(cntp);
512 	return NULL;
513 }
514 
515 int
516 mlx5_hws_cnt_service_thread_create(struct mlx5_dev_ctx_shared *sh)
517 {
518 	char name[RTE_THREAD_INTERNAL_NAME_SIZE];
519 	rte_thread_attr_t attr;
520 	int ret;
521 	uint32_t service_core = sh->cnt_svc->service_core;
522 
523 	ret = rte_thread_attr_init(&attr);
524 	if (ret != 0)
525 		goto error;
526 	CPU_SET(service_core, &attr.cpuset);
527 	sh->cnt_svc->svc_running = 1;
528 	ret = rte_thread_create(&sh->cnt_svc->service_thread,
529 			&attr, mlx5_hws_cnt_svc, sh);
530 	if (ret != 0)
531 		goto error;
532 	snprintf(name, sizeof(name), "mlx5-cn%d", service_core);
533 	rte_thread_set_prefixed_name(sh->cnt_svc->service_thread, name);
534 
535 	return 0;
536 error:
537 	DRV_LOG(ERR, "Failed to create HW steering's counter service thread.");
538 	return ret;
539 }
540 
541 void
542 mlx5_hws_cnt_service_thread_destroy(struct mlx5_dev_ctx_shared *sh)
543 {
544 	if (sh->cnt_svc->service_thread.opaque_id == 0)
545 		return;
546 	sh->cnt_svc->svc_running = 0;
547 	rte_thread_join(sh->cnt_svc->service_thread, NULL);
548 	sh->cnt_svc->service_thread.opaque_id = 0;
549 }
550 
551 static int
552 mlx5_hws_cnt_pool_dcs_alloc(struct mlx5_dev_ctx_shared *sh,
553 			    struct mlx5_hws_cnt_pool *cpool,
554 			    struct rte_flow_error *error)
555 {
556 	struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
557 	uint32_t max_log_bulk_sz = sh->hws_max_log_bulk_sz;
558 	uint32_t log_bulk_sz;
559 	uint32_t idx, alloc_candidate, alloced = 0;
560 	unsigned int cnt_num = mlx5_hws_cnt_pool_get_size(cpool);
561 	struct mlx5_devx_counter_attr attr = {0};
562 	struct mlx5_devx_obj *dcs;
563 
564 	MLX5_ASSERT(cpool->cfg.host_cpool == NULL);
565 	if (hca_attr->flow_counter_bulk_log_max_alloc == 0)
566 		return rte_flow_error_set(error, ENOTSUP,
567 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
568 					  NULL, "FW doesn't support bulk log max alloc");
569 	cnt_num = RTE_ALIGN_CEIL(cnt_num, 4); /* minimal 4 counter in bulk. */
570 	log_bulk_sz = RTE_MIN(max_log_bulk_sz, rte_log2_u32(cnt_num));
571 	attr.pd = sh->cdev->pdn;
572 	attr.pd_valid = 1;
573 	attr.bulk_log_max_alloc = 1;
574 	attr.flow_counter_bulk_log_size = log_bulk_sz;
575 	idx = 0;
576 	dcs = mlx5_devx_cmd_flow_counter_alloc_general(sh->cdev->ctx, &attr);
577 	if (dcs == NULL) {
578 		rte_flow_error_set(error, rte_errno,
579 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
580 				   NULL, "FW failed to allocate counters");
581 		goto error;
582 	}
583 	cpool->dcs_mng.dcs[idx].obj = dcs;
584 	cpool->dcs_mng.dcs[idx].batch_sz = (1 << log_bulk_sz);
585 	cpool->dcs_mng.batch_total++;
586 	idx++;
587 	cpool->dcs_mng.dcs[0].iidx = 0;
588 	alloced = cpool->dcs_mng.dcs[0].batch_sz;
589 	if (cnt_num > cpool->dcs_mng.dcs[0].batch_sz) {
590 		while (idx < MLX5_HWS_CNT_DCS_NUM) {
591 			attr.flow_counter_bulk_log_size = --max_log_bulk_sz;
592 			alloc_candidate = RTE_BIT32(max_log_bulk_sz);
593 			if (alloced + alloc_candidate > sh->hws_max_nb_counters)
594 				continue;
595 			dcs = mlx5_devx_cmd_flow_counter_alloc_general
596 				(sh->cdev->ctx, &attr);
597 			if (dcs == NULL) {
598 				rte_flow_error_set(error, rte_errno,
599 						   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
600 						   NULL, "FW failed to allocate counters");
601 				goto error;
602 			}
603 			cpool->dcs_mng.dcs[idx].obj = dcs;
604 			cpool->dcs_mng.dcs[idx].batch_sz = alloc_candidate;
605 			cpool->dcs_mng.dcs[idx].iidx = alloced;
606 			alloced += cpool->dcs_mng.dcs[idx].batch_sz;
607 			cpool->dcs_mng.batch_total++;
608 			if (alloced >= cnt_num)
609 				break;
610 			idx++;
611 		}
612 	}
613 	return 0;
614 error:
615 	DRV_LOG(DEBUG,
616 		"Cannot alloc device counter, allocated[%" PRIu32 "] request[%" PRIu32 "]",
617 		alloced, cnt_num);
618 	for (idx = 0; idx < cpool->dcs_mng.batch_total; idx++) {
619 		mlx5_devx_cmd_destroy(cpool->dcs_mng.dcs[idx].obj);
620 		cpool->dcs_mng.dcs[idx].obj = NULL;
621 		cpool->dcs_mng.dcs[idx].batch_sz = 0;
622 		cpool->dcs_mng.dcs[idx].iidx = 0;
623 	}
624 	cpool->dcs_mng.batch_total = 0;
625 	return -1;
626 }
627 
628 static void
629 mlx5_hws_cnt_pool_dcs_free(struct mlx5_dev_ctx_shared *sh,
630 			   struct mlx5_hws_cnt_pool *cpool)
631 {
632 	uint32_t idx;
633 
634 	if (cpool == NULL)
635 		return;
636 	for (idx = 0; idx < MLX5_HWS_CNT_DCS_NUM; idx++)
637 		mlx5_devx_cmd_destroy(cpool->dcs_mng.dcs[idx].obj);
638 	if (cpool->raw_mng) {
639 		mlx5_hws_cnt_raw_data_free(sh, cpool->raw_mng);
640 		cpool->raw_mng = NULL;
641 	}
642 }
643 
644 static void
645 mlx5_hws_cnt_pool_action_destroy(struct mlx5_hws_cnt_pool *cpool)
646 {
647 	uint32_t idx;
648 
649 	for (idx = 0; idx < cpool->dcs_mng.batch_total; idx++) {
650 		struct mlx5_hws_cnt_dcs *dcs = &cpool->dcs_mng.dcs[idx];
651 
652 		if (dcs->dr_action != NULL) {
653 			mlx5dr_action_destroy(dcs->dr_action);
654 			dcs->dr_action = NULL;
655 		}
656 	}
657 }
658 
659 static int
660 mlx5_hws_cnt_pool_action_create(struct mlx5_priv *priv,
661 		struct mlx5_hws_cnt_pool *cpool)
662 {
663 	struct mlx5_hws_cnt_pool *hpool = mlx5_hws_cnt_host_pool(cpool);
664 	uint32_t idx;
665 	int ret = 0;
666 	uint32_t flags;
667 
668 	flags = MLX5DR_ACTION_FLAG_HWS_RX | MLX5DR_ACTION_FLAG_HWS_TX;
669 	if (priv->sh->config.dv_esw_en && priv->master)
670 		flags |= MLX5DR_ACTION_FLAG_HWS_FDB;
671 	for (idx = 0; idx < hpool->dcs_mng.batch_total; idx++) {
672 		struct mlx5_hws_cnt_dcs *hdcs = &hpool->dcs_mng.dcs[idx];
673 		struct mlx5_hws_cnt_dcs *dcs = &cpool->dcs_mng.dcs[idx];
674 
675 		dcs->dr_action = mlx5dr_action_create_counter(priv->dr_ctx,
676 					(struct mlx5dr_devx_obj *)hdcs->obj,
677 					flags);
678 		if (dcs->dr_action == NULL) {
679 			mlx5_hws_cnt_pool_action_destroy(cpool);
680 			ret = -ENOSYS;
681 			break;
682 		}
683 	}
684 	return ret;
685 }
686 
687 int
688 mlx5_hws_cnt_pool_create(struct rte_eth_dev *dev,
689 			 uint32_t nb_counters, uint16_t nb_queue,
690 			 struct mlx5_hws_cnt_pool *chost,
691 			 struct rte_flow_error *error)
692 {
693 	struct mlx5_hws_cnt_pool *cpool = NULL;
694 	struct mlx5_priv *priv = dev->data->dev_private;
695 	struct mlx5_hws_cache_param cparam = {0};
696 	struct mlx5_hws_cnt_pool_cfg pcfg = {0};
697 	char *mp_name;
698 	int ret = 0;
699 	size_t sz;
700 
701 	mp_name = mlx5_malloc(MLX5_MEM_ZERO, RTE_MEMZONE_NAMESIZE, 0, SOCKET_ID_ANY);
702 	if (mp_name == NULL)
703 		goto error;
704 	snprintf(mp_name, RTE_MEMZONE_NAMESIZE, "MLX5_HWS_CNT_P_%x", dev->data->port_id);
705 	pcfg.name = mp_name;
706 	pcfg.request_num = nb_counters;
707 	pcfg.alloc_factor = HWS_CNT_ALLOC_FACTOR_DEFAULT;
708 	if (chost) {
709 		pcfg.host_cpool = chost;
710 		cpool = mlx5_hws_cnt_pool_init(priv->sh, &pcfg, &cparam, error);
711 		if (cpool == NULL)
712 			goto error;
713 		ret = mlx5_hws_cnt_pool_action_create(priv, cpool);
714 		if (ret != 0) {
715 			rte_flow_error_set(error, -ret,
716 					   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
717 					   NULL, "failed to allocate counter actions on guest port");
718 			goto error;
719 		}
720 		goto success;
721 	}
722 	/* init cnt service if not. */
723 	if (priv->sh->cnt_svc == NULL) {
724 		ret = mlx5_hws_cnt_svc_init(priv->sh, error);
725 		if (ret)
726 			return ret;
727 	}
728 	cparam.fetch_sz = HWS_CNT_CACHE_FETCH_DEFAULT;
729 	cparam.preload_sz = HWS_CNT_CACHE_PRELOAD_DEFAULT;
730 	cparam.q_num = nb_queue;
731 	cparam.threshold = HWS_CNT_CACHE_THRESHOLD_DEFAULT;
732 	cparam.size = HWS_CNT_CACHE_SZ_DEFAULT;
733 	cpool = mlx5_hws_cnt_pool_init(priv->sh, &pcfg, &cparam, error);
734 	if (cpool == NULL)
735 		goto error;
736 	ret = mlx5_hws_cnt_pool_dcs_alloc(priv->sh, cpool, error);
737 	if (ret != 0)
738 		goto error;
739 	sz = RTE_ALIGN_CEIL(mlx5_hws_cnt_pool_get_size(cpool), 4);
740 	cpool->raw_mng = mlx5_hws_cnt_raw_data_alloc(priv->sh, sz, error);
741 	if (cpool->raw_mng == NULL)
742 		goto error;
743 	__hws_cnt_id_load(cpool);
744 	/*
745 	 * Bump query gen right after pool create so the
746 	 * pre-loaded counters can be used directly
747 	 * because they already have init value no need
748 	 * to wait for query.
749 	 */
750 	cpool->query_gen = 1;
751 	ret = mlx5_hws_cnt_pool_action_create(priv, cpool);
752 	if (ret != 0) {
753 		rte_flow_error_set(error, -ret,
754 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
755 				   NULL, "failed to allocate counter actions");
756 		goto error;
757 	}
758 	priv->sh->cnt_svc->refcnt++;
759 	cpool->priv = priv;
760 	rte_spinlock_lock(&priv->sh->cpool_lock);
761 	LIST_INSERT_HEAD(&priv->sh->hws_cpool_list, cpool, next);
762 	rte_spinlock_unlock(&priv->sh->cpool_lock);
763 success:
764 	priv->hws_cpool = cpool;
765 	return 0;
766 error:
767 	MLX5_ASSERT(ret);
768 	mlx5_hws_cnt_pool_destroy(priv->sh, cpool);
769 	priv->hws_cpool = NULL;
770 	return ret;
771 }
772 
773 void
774 mlx5_hws_cnt_pool_destroy(struct mlx5_dev_ctx_shared *sh,
775 		struct mlx5_hws_cnt_pool *cpool)
776 {
777 	if (cpool == NULL)
778 		return;
779 	/*
780 	 * 16M counter consumes 200ms to finish the query.
781 	 * Maybe blocked for at most 200ms here.
782 	 */
783 	rte_spinlock_lock(&sh->cpool_lock);
784 	/* Try to remove cpool before it was added to list caused segfault. */
785 	if (!LIST_EMPTY(&sh->hws_cpool_list) && cpool->next.le_prev)
786 		LIST_REMOVE(cpool, next);
787 	rte_spinlock_unlock(&sh->cpool_lock);
788 	if (cpool->cfg.host_cpool == NULL) {
789 		if (--sh->cnt_svc->refcnt == 0)
790 			mlx5_hws_cnt_svc_deinit(sh);
791 	}
792 	mlx5_hws_cnt_pool_action_destroy(cpool);
793 	if (cpool->cfg.host_cpool == NULL) {
794 		mlx5_hws_cnt_pool_dcs_free(sh, cpool);
795 		mlx5_hws_cnt_raw_data_free(sh, cpool->raw_mng);
796 	}
797 	mlx5_free((void *)cpool->cfg.name);
798 	mlx5_hws_cnt_pool_deinit(cpool);
799 }
800 
801 int
802 mlx5_hws_cnt_svc_init(struct mlx5_dev_ctx_shared *sh,
803 		      struct rte_flow_error *error)
804 {
805 	int ret;
806 
807 	sh->cnt_svc = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
808 			sizeof(*sh->cnt_svc), 0, SOCKET_ID_ANY);
809 	if (sh->cnt_svc == NULL)
810 		goto err;
811 	sh->cnt_svc->query_interval = sh->config.cnt_svc.cycle_time;
812 	sh->cnt_svc->service_core = sh->config.cnt_svc.service_core;
813 	ret = mlx5_aso_cnt_queue_init(sh);
814 	if (ret != 0) {
815 		mlx5_free(sh->cnt_svc);
816 		sh->cnt_svc = NULL;
817 		goto err;
818 	}
819 	ret = mlx5_hws_cnt_service_thread_create(sh);
820 	if (ret != 0) {
821 		mlx5_aso_cnt_queue_uninit(sh);
822 		mlx5_free(sh->cnt_svc);
823 		sh->cnt_svc = NULL;
824 	}
825 	return 0;
826 err:
827 	return rte_flow_error_set(error, ENOMEM,
828 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
829 				  NULL, "failed to init counters service");
830 
831 }
832 
833 void
834 mlx5_hws_cnt_svc_deinit(struct mlx5_dev_ctx_shared *sh)
835 {
836 	if (sh->cnt_svc == NULL)
837 		return;
838 	mlx5_hws_cnt_service_thread_destroy(sh);
839 	mlx5_aso_cnt_queue_uninit(sh);
840 	mlx5_free(sh->cnt_svc);
841 	sh->cnt_svc = NULL;
842 }
843 
844 /**
845  * Destroy AGE action.
846  *
847  * @param priv
848  *   Pointer to the port private data structure.
849  * @param idx
850  *   Index of AGE parameter.
851  * @param error
852  *   Pointer to error structure.
853  *
854  * @return
855  *   0 on success, a negative errno value otherwise and rte_errno is set.
856  */
857 int
858 mlx5_hws_age_action_destroy(struct mlx5_priv *priv, uint32_t idx,
859 			    struct rte_flow_error *error)
860 {
861 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
862 	struct mlx5_indexed_pool *ipool = age_info->ages_ipool;
863 	struct mlx5_hws_age_param *param = mlx5_ipool_get(ipool, idx);
864 
865 	if (param == NULL)
866 		return rte_flow_error_set(error, EINVAL,
867 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
868 					  "invalid AGE parameter index");
869 	switch (rte_atomic_exchange_explicit(&param->state, HWS_AGE_FREE,
870 				    rte_memory_order_relaxed)) {
871 	case HWS_AGE_CANDIDATE:
872 	case HWS_AGE_AGED_OUT_REPORTED:
873 		mlx5_hws_age_param_free(priv, param->own_cnt_index, ipool, idx);
874 		break;
875 	case HWS_AGE_AGED_OUT_NOT_REPORTED:
876 	case HWS_AGE_CANDIDATE_INSIDE_RING:
877 		/*
878 		 * In both cases AGE is inside the ring. Change the state here
879 		 * and destroy it later when it is taken out of ring.
880 		 */
881 		break;
882 	case HWS_AGE_FREE:
883 		/*
884 		 * If index is valid and state is FREE, it says this AGE has
885 		 * been freed for the user but not for the PMD since it is
886 		 * inside the ring.
887 		 */
888 		return rte_flow_error_set(error, EINVAL,
889 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
890 					  "this AGE has already been released");
891 	default:
892 		MLX5_ASSERT(0);
893 		break;
894 	}
895 	return 0;
896 }
897 
898 /**
899  * Create AGE action parameter.
900  *
901  * @param[in] priv
902  *   Pointer to the port private data structure.
903  * @param[in] queue_id
904  *   Which HWS queue to be used.
905  * @param[in] shared
906  *   Whether it indirect AGE action.
907  * @param[in] flow_idx
908  *   Flow index from indexed pool.
909  *   For indirect AGE action it doesn't affect.
910  * @param[in] age
911  *   Pointer to the aging action configuration.
912  * @param[out] error
913  *   Pointer to error structure.
914  *
915  * @return
916  *   Index to AGE action parameter on success, 0 otherwise.
917  */
918 uint32_t
919 mlx5_hws_age_action_create(struct mlx5_priv *priv, uint32_t queue_id,
920 			   bool shared, const struct rte_flow_action_age *age,
921 			   uint32_t flow_idx, struct rte_flow_error *error)
922 {
923 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
924 	struct mlx5_indexed_pool *ipool = age_info->ages_ipool;
925 	struct mlx5_hws_age_param *param;
926 	uint32_t age_idx;
927 
928 	param = mlx5_ipool_malloc(ipool, &age_idx);
929 	if (param == NULL) {
930 		rte_flow_error_set(error, ENOMEM,
931 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
932 				   "cannot allocate AGE parameter");
933 		return 0;
934 	}
935 	MLX5_ASSERT(rte_atomic_load_explicit(&param->state,
936 				    rte_memory_order_relaxed) == HWS_AGE_FREE);
937 	if (shared) {
938 		param->nb_cnts = 0;
939 		param->accumulator_hits = 0;
940 		param->accumulator_cnt = 0;
941 		flow_idx = age_idx;
942 	} else {
943 		param->nb_cnts = 1;
944 	}
945 	param->context = age->context ? age->context :
946 					(void *)(uintptr_t)flow_idx;
947 	param->timeout = age->timeout;
948 	param->queue_id = queue_id;
949 	param->accumulator_last_hits = 0;
950 	param->own_cnt_index = 0;
951 	param->sec_since_last_hit = 0;
952 	param->state = HWS_AGE_CANDIDATE;
953 	return age_idx;
954 }
955 
956 /**
957  * Update indirect AGE action parameter.
958  *
959  * @param[in] priv
960  *   Pointer to the port private data structure.
961  * @param[in] idx
962  *   Index of AGE parameter.
963  * @param[in] update
964  *   Update value.
965  * @param[out] error
966  *   Pointer to error structure.
967  *
968  * @return
969  *   0 on success, a negative errno value otherwise and rte_errno is set.
970  */
971 int
972 mlx5_hws_age_action_update(struct mlx5_priv *priv, uint32_t idx,
973 			   const void *update, struct rte_flow_error *error)
974 {
975 	const struct rte_flow_update_age *update_ade = update;
976 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
977 	struct mlx5_indexed_pool *ipool = age_info->ages_ipool;
978 	struct mlx5_hws_age_param *param = mlx5_ipool_get(ipool, idx);
979 	bool sec_since_last_hit_reset = false;
980 	bool state_update = false;
981 
982 	if (param == NULL)
983 		return rte_flow_error_set(error, EINVAL,
984 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
985 					  "invalid AGE parameter index");
986 	if (update_ade->timeout_valid) {
987 		uint32_t old_timeout = rte_atomic_exchange_explicit(&param->timeout,
988 							   update_ade->timeout,
989 							   rte_memory_order_relaxed);
990 
991 		if (old_timeout == 0)
992 			sec_since_last_hit_reset = true;
993 		else if (old_timeout < update_ade->timeout ||
994 			 update_ade->timeout == 0)
995 			/*
996 			 * When timeout is increased, aged-out flows might be
997 			 * active again and state should be updated accordingly.
998 			 * When new timeout is 0, we update the state for not
999 			 * reporting aged-out stopped.
1000 			 */
1001 			state_update = true;
1002 	}
1003 	if (update_ade->touch) {
1004 		sec_since_last_hit_reset = true;
1005 		state_update = true;
1006 	}
1007 	if (sec_since_last_hit_reset)
1008 		rte_atomic_store_explicit(&param->sec_since_last_hit, 0,
1009 				 rte_memory_order_relaxed);
1010 	if (state_update) {
1011 		uint16_t expected = HWS_AGE_AGED_OUT_NOT_REPORTED;
1012 
1013 		/*
1014 		 * Change states of aged-out flows to active:
1015 		 *  - AGED_OUT_NOT_REPORTED -> CANDIDATE_INSIDE_RING
1016 		 *  - AGED_OUT_REPORTED -> CANDIDATE
1017 		 */
1018 		if (!rte_atomic_compare_exchange_strong_explicit(&param->state, &expected,
1019 						 HWS_AGE_CANDIDATE_INSIDE_RING,
1020 						 rte_memory_order_relaxed,
1021 						 rte_memory_order_relaxed) &&
1022 		    expected == HWS_AGE_AGED_OUT_REPORTED)
1023 			rte_atomic_store_explicit(&param->state, HWS_AGE_CANDIDATE,
1024 					 rte_memory_order_relaxed);
1025 	}
1026 	return 0;
1027 }
1028 
1029 /**
1030  * Get the AGE context if the aged-out index is still valid.
1031  *
1032  * @param priv
1033  *   Pointer to the port private data structure.
1034  * @param idx
1035  *   Index of AGE parameter.
1036  *
1037  * @return
1038  *   AGE context if the index is still aged-out, NULL otherwise.
1039  */
1040 void *
1041 mlx5_hws_age_context_get(struct mlx5_priv *priv, uint32_t idx)
1042 {
1043 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
1044 	struct mlx5_indexed_pool *ipool = age_info->ages_ipool;
1045 	struct mlx5_hws_age_param *param = mlx5_ipool_get(ipool, idx);
1046 	uint16_t expected = HWS_AGE_AGED_OUT_NOT_REPORTED;
1047 
1048 	MLX5_ASSERT(param != NULL);
1049 	if (rte_atomic_compare_exchange_strong_explicit(&param->state, &expected,
1050 					HWS_AGE_AGED_OUT_REPORTED,
1051 					rte_memory_order_relaxed, rte_memory_order_relaxed))
1052 		return param->context;
1053 	switch (expected) {
1054 	case HWS_AGE_FREE:
1055 		/*
1056 		 * This AGE couldn't have been destroyed since it was inside
1057 		 * the ring. Its state has updated, and now it is actually
1058 		 * destroyed.
1059 		 */
1060 		mlx5_hws_age_param_free(priv, param->own_cnt_index, ipool, idx);
1061 		break;
1062 	case HWS_AGE_CANDIDATE_INSIDE_RING:
1063 		rte_atomic_store_explicit(&param->state, HWS_AGE_CANDIDATE,
1064 				 rte_memory_order_relaxed);
1065 		break;
1066 	case HWS_AGE_CANDIDATE:
1067 		/*
1068 		 * Only BG thread pushes to ring and it never pushes this state.
1069 		 * When AGE inside the ring becomes candidate, it has a special
1070 		 * state called HWS_AGE_CANDIDATE_INSIDE_RING.
1071 		 * Fall-through.
1072 		 */
1073 	case HWS_AGE_AGED_OUT_REPORTED:
1074 		/*
1075 		 * Only this thread (doing query) may write this state, and it
1076 		 * happens only after the query thread takes it out of the ring.
1077 		 * Fall-through.
1078 		 */
1079 	case HWS_AGE_AGED_OUT_NOT_REPORTED:
1080 		/*
1081 		 * In this case the compare return true and function return
1082 		 * the context immediately.
1083 		 * Fall-through.
1084 		 */
1085 	default:
1086 		MLX5_ASSERT(0);
1087 		break;
1088 	}
1089 	return NULL;
1090 }
1091 
1092 #ifdef RTE_ARCH_64
1093 #define MLX5_HWS_AGED_OUT_RING_SIZE_MAX UINT32_MAX
1094 #else
1095 #define MLX5_HWS_AGED_OUT_RING_SIZE_MAX RTE_BIT32(8)
1096 #endif
1097 
1098 /**
1099  * Get the size of aged out ring list for each queue.
1100  *
1101  * The size is one percent of nb_counters divided by nb_queues.
1102  * The ring size must be power of 2, so it align up to power of 2.
1103  * In 32 bit systems, the size is limited by 256.
1104  *
1105  * This function is called when RTE_FLOW_PORT_FLAG_STRICT_QUEUE is on.
1106  *
1107  * @param nb_counters
1108  *   Final number of allocated counter in the pool.
1109  * @param nb_queues
1110  *   Number of HWS queues in this port.
1111  *
1112  * @return
1113  *   Size of aged out ring per queue.
1114  */
1115 static __rte_always_inline uint32_t
1116 mlx5_hws_aged_out_q_ring_size_get(uint32_t nb_counters, uint32_t nb_queues)
1117 {
1118 	uint32_t size = rte_align32pow2((nb_counters / 100) / nb_queues);
1119 	uint32_t max_size = MLX5_HWS_AGED_OUT_RING_SIZE_MAX;
1120 
1121 	return RTE_MIN(size, max_size);
1122 }
1123 
1124 /**
1125  * Get the size of the aged out ring list.
1126  *
1127  * The size is one percent of nb_counters.
1128  * The ring size must be power of 2, so it align up to power of 2.
1129  * In 32 bit systems, the size is limited by 256.
1130  *
1131  * This function is called when RTE_FLOW_PORT_FLAG_STRICT_QUEUE is off.
1132  *
1133  * @param nb_counters
1134  *   Final number of allocated counter in the pool.
1135  *
1136  * @return
1137  *   Size of the aged out ring list.
1138  */
1139 static __rte_always_inline uint32_t
1140 mlx5_hws_aged_out_ring_size_get(uint32_t nb_counters)
1141 {
1142 	uint32_t size = rte_align32pow2(nb_counters / 100);
1143 	uint32_t max_size = MLX5_HWS_AGED_OUT_RING_SIZE_MAX;
1144 
1145 	return RTE_MIN(size, max_size);
1146 }
1147 
1148 /**
1149  * Initialize the shared aging list information per port.
1150  *
1151  * @param dev
1152  *   Pointer to the rte_eth_dev structure.
1153  * @param nb_queues
1154  *   Number of HWS queues.
1155  * @param strict_queue
1156  *   Indicator whether is strict_queue mode.
1157  * @param ring_size
1158  *   Size of aged-out ring for creation.
1159  *
1160  * @return
1161  *   0 on success, a negative errno value otherwise and rte_errno is set.
1162  */
1163 static int
1164 mlx5_hws_age_info_init(struct rte_eth_dev *dev, uint16_t nb_queues,
1165 		       bool strict_queue, uint32_t ring_size)
1166 {
1167 	struct mlx5_priv *priv = dev->data->dev_private;
1168 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
1169 	uint32_t flags = RING_F_SP_ENQ | RING_F_SC_DEQ | RING_F_EXACT_SZ;
1170 	char mz_name[RTE_MEMZONE_NAMESIZE];
1171 	struct rte_ring *r = NULL;
1172 	uint32_t qidx;
1173 
1174 	age_info->flags = 0;
1175 	if (strict_queue) {
1176 		size_t size = sizeof(*age_info->hw_q_age) +
1177 			      sizeof(struct rte_ring *) * nb_queues;
1178 
1179 		age_info->hw_q_age = mlx5_malloc(MLX5_MEM_ANY | MLX5_MEM_ZERO,
1180 						 size, 0, SOCKET_ID_ANY);
1181 		if (age_info->hw_q_age == NULL)
1182 			return -ENOMEM;
1183 		for (qidx = 0; qidx < nb_queues; ++qidx) {
1184 			snprintf(mz_name, sizeof(mz_name),
1185 				 "port_%u_queue_%u_aged_out_ring",
1186 				 dev->data->port_id, qidx);
1187 			r = rte_ring_create(mz_name, ring_size, SOCKET_ID_ANY,
1188 					    flags);
1189 			if (r == NULL) {
1190 				DRV_LOG(ERR, "\"%s\" creation failed: %s",
1191 					mz_name, rte_strerror(rte_errno));
1192 				goto error;
1193 			}
1194 			age_info->hw_q_age->aged_lists[qidx] = r;
1195 			DRV_LOG(DEBUG,
1196 				"\"%s\" is successfully created (size=%u).",
1197 				mz_name, ring_size);
1198 		}
1199 		age_info->hw_q_age->nb_rings = nb_queues;
1200 	} else {
1201 		snprintf(mz_name, sizeof(mz_name), "port_%u_aged_out_ring",
1202 			 dev->data->port_id);
1203 		r = rte_ring_create(mz_name, ring_size, SOCKET_ID_ANY, flags);
1204 		if (r == NULL) {
1205 			DRV_LOG(ERR, "\"%s\" creation failed: %s", mz_name,
1206 				rte_strerror(rte_errno));
1207 			return -rte_errno;
1208 		}
1209 		age_info->hw_age.aged_list = r;
1210 		DRV_LOG(DEBUG, "\"%s\" is successfully created (size=%u).",
1211 			mz_name, ring_size);
1212 		/* In non "strict_queue" mode, initialize the event. */
1213 		MLX5_AGE_SET(age_info, MLX5_AGE_TRIGGER);
1214 	}
1215 	return 0;
1216 error:
1217 	MLX5_ASSERT(strict_queue);
1218 	while (qidx--)
1219 		rte_ring_free(age_info->hw_q_age->aged_lists[qidx]);
1220 	mlx5_free(age_info->hw_q_age);
1221 	return -1;
1222 }
1223 
1224 /**
1225  * Cleanup aged-out ring before destroying.
1226  *
1227  * @param priv
1228  *   Pointer to port private object.
1229  * @param r
1230  *   Pointer to aged-out ring object.
1231  */
1232 static void
1233 mlx5_hws_aged_out_ring_cleanup(struct mlx5_priv *priv, struct rte_ring *r)
1234 {
1235 	int ring_size = rte_ring_count(r);
1236 
1237 	while (ring_size > 0) {
1238 		uint32_t age_idx = 0;
1239 
1240 		if (rte_ring_dequeue_elem(r, &age_idx, sizeof(uint32_t)) < 0)
1241 			break;
1242 		/* get the AGE context if the aged-out index is still valid. */
1243 		mlx5_hws_age_context_get(priv, age_idx);
1244 		ring_size--;
1245 	}
1246 	rte_ring_free(r);
1247 }
1248 
1249 /**
1250  * Destroy the shared aging list information per port.
1251  *
1252  * @param priv
1253  *   Pointer to port private object.
1254  */
1255 static void
1256 mlx5_hws_age_info_destroy(struct mlx5_priv *priv)
1257 {
1258 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
1259 	uint16_t nb_queues = age_info->hw_q_age->nb_rings;
1260 	struct rte_ring *r;
1261 
1262 	if (priv->hws_strict_queue) {
1263 		uint32_t qidx;
1264 
1265 		for (qidx = 0; qidx < nb_queues; ++qidx) {
1266 			r = age_info->hw_q_age->aged_lists[qidx];
1267 			mlx5_hws_aged_out_ring_cleanup(priv, r);
1268 		}
1269 		mlx5_free(age_info->hw_q_age);
1270 	} else {
1271 		r = age_info->hw_age.aged_list;
1272 		mlx5_hws_aged_out_ring_cleanup(priv, r);
1273 	}
1274 }
1275 
1276 /**
1277  * Initialize the aging mechanism per port.
1278  *
1279  * @param dev
1280  *   Pointer to the rte_eth_dev structure.
1281  * @param attr
1282  *   Port configuration attributes.
1283  * @param nb_queues
1284  *   Number of HWS queues.
1285  *
1286  * @return
1287  *   0 on success, a negative errno value otherwise and rte_errno is set.
1288  */
1289 int
1290 mlx5_hws_age_pool_init(struct rte_eth_dev *dev,
1291 		       uint32_t nb_aging_objects,
1292 		       uint16_t nb_queues,
1293 		       bool strict_queue)
1294 {
1295 	struct mlx5_priv *priv = dev->data->dev_private;
1296 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
1297 	struct mlx5_indexed_pool_config cfg = {
1298 		.size =
1299 		      RTE_CACHE_LINE_ROUNDUP(sizeof(struct mlx5_hws_age_param)),
1300 		.trunk_size = 1 << 12,
1301 		.per_core_cache = 1 << 13,
1302 		.need_lock = 1,
1303 		.release_mem_en = !!priv->sh->config.reclaim_mode,
1304 		.malloc = mlx5_malloc,
1305 		.free = mlx5_free,
1306 		.type = "mlx5_hws_age_pool",
1307 	};
1308 	uint32_t nb_alloc_cnts;
1309 	uint32_t rsize;
1310 	uint32_t nb_ages_updated;
1311 	int ret;
1312 
1313 	MLX5_ASSERT(priv->hws_cpool);
1314 	nb_alloc_cnts = mlx5_hws_cnt_pool_get_size(priv->hws_cpool);
1315 	if (strict_queue) {
1316 		rsize = mlx5_hws_aged_out_q_ring_size_get(nb_alloc_cnts,
1317 							  nb_queues);
1318 		nb_ages_updated = rsize * nb_queues + nb_aging_objects;
1319 	} else {
1320 		rsize = mlx5_hws_aged_out_ring_size_get(nb_alloc_cnts);
1321 		nb_ages_updated = rsize + nb_aging_objects;
1322 	}
1323 	ret = mlx5_hws_age_info_init(dev, nb_queues, strict_queue, rsize);
1324 	if (ret < 0)
1325 		return ret;
1326 	cfg.max_idx = rte_align32pow2(nb_ages_updated);
1327 	if (cfg.max_idx <= cfg.trunk_size) {
1328 		cfg.per_core_cache = 0;
1329 		cfg.trunk_size = cfg.max_idx;
1330 	} else if (cfg.max_idx <= MLX5_HW_IPOOL_SIZE_THRESHOLD) {
1331 		cfg.per_core_cache = MLX5_HW_IPOOL_CACHE_MIN;
1332 	}
1333 	age_info->ages_ipool = mlx5_ipool_create(&cfg);
1334 	if (age_info->ages_ipool == NULL) {
1335 		mlx5_hws_age_info_destroy(priv);
1336 		rte_errno = ENOMEM;
1337 		return -rte_errno;
1338 	}
1339 	priv->hws_age_req = 1;
1340 	return 0;
1341 }
1342 
1343 /**
1344  * Cleanup all aging resources per port.
1345  *
1346  * @param priv
1347  *   Pointer to port private object.
1348  */
1349 void
1350 mlx5_hws_age_pool_destroy(struct mlx5_priv *priv)
1351 {
1352 	struct mlx5_age_info *age_info = GET_PORT_AGE_INFO(priv);
1353 
1354 	rte_spinlock_lock(&priv->sh->cpool_lock);
1355 	MLX5_ASSERT(priv->hws_age_req);
1356 	mlx5_hws_age_info_destroy(priv);
1357 	mlx5_ipool_destroy(age_info->ages_ipool);
1358 	age_info->ages_ipool = NULL;
1359 	priv->hws_age_req = 0;
1360 	rte_spinlock_unlock(&priv->sh->cpool_lock);
1361 }
1362 
1363 #endif
1364