xref: /dpdk/drivers/net/mlx5/mlx5_flow_aso.c (revision 3cddeba0ca38b00c7dc646277484d08a4cb2d862)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 #include <mlx5_prm.h>
5 #include <rte_malloc.h>
6 #include <rte_cycles.h>
7 #include <rte_eal_paging.h>
8 
9 #include <mlx5_malloc.h>
10 #include <mlx5_common_os.h>
11 #include <mlx5_common_devx.h>
12 
13 #include "mlx5.h"
14 #include "mlx5_flow.h"
15 #include "mlx5_hws_cnt.h"
16 
17 #define MLX5_ASO_CNT_QUEUE_LOG_DESC 14
18 
19 /**
20  * Free MR resources.
21  *
22  * @param[in] cdev
23  *   Pointer to the mlx5 common device.
24  * @param[in] mr
25  *   MR to free.
26  */
27 static void
28 mlx5_aso_dereg_mr(struct mlx5_common_device *cdev, struct mlx5_pmd_mr *mr)
29 {
30 	void *addr = mr->addr;
31 
32 	cdev->mr_scache.dereg_mr_cb(mr);
33 	mlx5_free(addr);
34 	memset(mr, 0, sizeof(*mr));
35 }
36 
37 /**
38  * Register Memory Region.
39  *
40  * @param[in] cdev
41  *   Pointer to the mlx5 common device.
42  * @param[in] length
43  *   Size of MR buffer.
44  * @param[in/out] mr
45  *   Pointer to MR to create.
46  *
47  * @return
48  *   0 on success, a negative errno value otherwise and rte_errno is set.
49  */
50 static int
51 mlx5_aso_reg_mr(struct mlx5_common_device *cdev, size_t length,
52 		struct mlx5_pmd_mr *mr)
53 {
54 	int ret;
55 
56 	mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
57 			       SOCKET_ID_ANY);
58 	if (!mr->addr) {
59 		DRV_LOG(ERR, "Failed to create ASO bits mem for MR.");
60 		return -1;
61 	}
62 	ret = cdev->mr_scache.reg_mr_cb(cdev->pd, mr->addr, length, mr);
63 	if (ret) {
64 		DRV_LOG(ERR, "Failed to create direct Mkey.");
65 		mlx5_free(mr->addr);
66 		return -1;
67 	}
68 	return 0;
69 }
70 
71 /**
72  * Destroy Send Queue used for ASO access.
73  *
74  * @param[in] sq
75  *   ASO SQ to destroy.
76  */
77 void
78 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
79 {
80 	mlx5_devx_sq_destroy(&sq->sq_obj);
81 	mlx5_devx_cq_destroy(&sq->cq.cq_obj);
82 	memset(sq, 0, sizeof(*sq));
83 }
84 
85 /**
86  * Initialize Send Queue used for ASO access counter.
87  *
88  * @param[in] sq
89  *   ASO SQ to initialize.
90  */
91 static void
92 mlx5_aso_cnt_init_sq(struct mlx5_aso_sq *sq)
93 {
94 	volatile struct mlx5_aso_wqe *restrict wqe;
95 	int i;
96 	int size = 1 << sq->log_desc_n;
97 
98 	/* All the next fields state should stay constant. */
99 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
100 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
101 							  (sizeof(*wqe) >> 4));
102 		wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
103 			(0u |
104 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
105 			 (ASO_OP_ALWAYS_FALSE << ASO_CSEG_COND_1_OPER_OFFSET) |
106 			 (ASO_OP_ALWAYS_FALSE << ASO_CSEG_COND_0_OPER_OFFSET) |
107 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
108 		wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
109 	}
110 }
111 
112 /**
113  * Initialize Send Queue used for ASO access.
114  *
115  * @param[in] sq
116  *   ASO SQ to initialize.
117  */
118 static void
119 mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq)
120 {
121 	volatile struct mlx5_aso_wqe *restrict wqe;
122 	int i;
123 	int size = 1 << sq->log_desc_n;
124 	uint64_t addr;
125 
126 	/* All the next fields state should stay constant. */
127 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
128 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
129 							  (sizeof(*wqe) >> 4));
130 		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
131 		addr = (uint64_t)((uint64_t *)sq->mr.addr + i *
132 					    MLX5_ASO_AGE_ACTIONS_PER_POOL / 64);
133 		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
134 		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
135 		wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
136 			(0u |
137 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
138 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
139 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
140 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
141 		wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
142 	}
143 }
144 
145 /**
146  * Initialize Send Queue used for ASO flow meter access.
147  *
148  * @param[in] sq
149  *   ASO SQ to initialize.
150  */
151 void
152 mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
153 {
154 	volatile struct mlx5_aso_wqe *restrict wqe;
155 	int i;
156 	int size = 1 << sq->log_desc_n;
157 
158 	/* All the next fields state should stay constant. */
159 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
160 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
161 							  (sizeof(*wqe) >> 4));
162 		wqe->aso_cseg.operand_masks = RTE_BE32(0u |
163 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
164 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
165 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
166 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
167 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
168 							 MLX5_COMP_MODE_OFFSET);
169 	}
170 }
171 
172 /*
173  * Initialize Send Queue used for ASO connection tracking.
174  *
175  * @param[in] sq
176  *   ASO SQ to initialize.
177  */
178 static void
179 mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
180 {
181 	volatile struct mlx5_aso_wqe *restrict wqe;
182 	int i;
183 	int size = 1 << sq->log_desc_n;
184 	uint64_t addr;
185 
186 	/* All the next fields state should stay constant. */
187 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
188 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
189 							  (sizeof(*wqe) >> 4));
190 		/* One unique MR for the query data. */
191 		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
192 		/* Magic number 64 represents the length of a ASO CT obj. */
193 		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
194 		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
195 		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
196 		/*
197 		 * The values of operand_masks are different for modify
198 		 * and query.
199 		 * And data_mask may be different for each modification. In
200 		 * query, it could be zero and ignored.
201 		 * CQE generation is always needed, in order to decide when
202 		 * it is available to create the flow or read the data.
203 		 */
204 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
205 						   MLX5_COMP_MODE_OFFSET);
206 	}
207 }
208 
209 /**
210  * Create Send Queue used for ASO access.
211  *
212  * @param[in] cdev
213  *   Pointer to the mlx5 common device.
214  * @param[in/out] sq
215  *   Pointer to SQ to create.
216  * @param[in] uar
217  *   User Access Region object.
218  *
219  * @return
220  *   0 on success, a negative errno value otherwise and rte_errno is set.
221  */
222 int
223 mlx5_aso_sq_create(struct mlx5_common_device *cdev, struct mlx5_aso_sq *sq,
224 		   void *uar, uint16_t log_desc_n)
225 {
226 	struct mlx5_devx_cq_attr cq_attr = {
227 		.uar_page_id = mlx5_os_get_devx_uar_page_id(uar),
228 	};
229 	struct mlx5_devx_create_sq_attr sq_attr = {
230 		.user_index = 0xFFFF,
231 		.wq_attr = (struct mlx5_devx_wq_attr){
232 			.pd = cdev->pdn,
233 			.uar_page = mlx5_os_get_devx_uar_page_id(uar),
234 		},
235 		.ts_format =
236 			mlx5_ts_format_conv(cdev->config.hca_attr.sq_ts_format),
237 	};
238 	struct mlx5_devx_modify_sq_attr modify_attr = {
239 		.state = MLX5_SQC_STATE_RDY,
240 	};
241 	uint16_t log_wqbb_n;
242 	int ret;
243 
244 	if (mlx5_devx_cq_create(cdev->ctx, &sq->cq.cq_obj,
245 				log_desc_n, &cq_attr,
246 				SOCKET_ID_ANY))
247 		goto error;
248 	sq->cq.cq_ci = 0;
249 	sq->cq.log_desc_n = log_desc_n;
250 	sq->log_desc_n = log_desc_n;
251 	sq_attr.cqn = sq->cq.cq_obj.cq->id;
252 	/* for mlx5_aso_wqe that is twice the size of mlx5_wqe */
253 	log_wqbb_n = sq->log_desc_n + 1;
254 	ret = mlx5_devx_sq_create(cdev->ctx, &sq->sq_obj, log_wqbb_n, &sq_attr,
255 				  SOCKET_ID_ANY);
256 	if (ret) {
257 		DRV_LOG(ERR, "Can't create SQ object.");
258 		rte_errno = ENOMEM;
259 		goto error;
260 	}
261 	ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr);
262 	if (ret) {
263 		DRV_LOG(ERR, "Can't change SQ state to ready.");
264 		rte_errno = ENOMEM;
265 		goto error;
266 	}
267 	sq->pi = 0;
268 	sq->head = 0;
269 	sq->tail = 0;
270 	sq->sqn = sq->sq_obj.sq->id;
271 	rte_spinlock_init(&sq->sqsl);
272 	return 0;
273 error:
274 	mlx5_aso_destroy_sq(sq);
275 	return -1;
276 }
277 
278 void
279 mlx5_aso_mtr_queue_uninit(struct mlx5_dev_ctx_shared *sh __rte_unused,
280 			  struct mlx5_aso_mtr_pool *hws_pool,
281 			  struct mlx5_aso_mtr_pools_mng *pool_mng)
282 {
283 	uint32_t i;
284 
285 	if (hws_pool) {
286 		for (i = 0; i < hws_pool->nb_sq; i++)
287 			mlx5_aso_destroy_sq(hws_pool->sq + i);
288 		mlx5_free(hws_pool->sq);
289 		return;
290 	}
291 	if (pool_mng)
292 		mlx5_aso_destroy_sq(&pool_mng->sq);
293 }
294 
295 int
296 mlx5_aso_mtr_queue_init(struct mlx5_dev_ctx_shared *sh,
297 				struct mlx5_aso_mtr_pool *hws_pool,
298 				struct mlx5_aso_mtr_pools_mng *pool_mng,
299 				uint32_t nb_queues)
300 {
301 	struct mlx5_common_device *cdev = sh->cdev;
302 	struct mlx5_aso_sq *sq;
303 	uint32_t i;
304 
305 	if (hws_pool) {
306 		sq = mlx5_malloc(MLX5_MEM_ZERO,
307 			sizeof(struct mlx5_aso_sq) * nb_queues,
308 			RTE_CACHE_LINE_SIZE, SOCKET_ID_ANY);
309 		if (!sq)
310 			return -1;
311 		hws_pool->sq = sq;
312 		for (i = 0; i < nb_queues; i++) {
313 			if (mlx5_aso_sq_create(cdev, hws_pool->sq + i,
314 					       sh->tx_uar.obj,
315 					       MLX5_ASO_QUEUE_LOG_DESC))
316 				goto error;
317 			mlx5_aso_mtr_init_sq(hws_pool->sq + i);
318 		}
319 		hws_pool->nb_sq = nb_queues;
320 	}
321 	if (pool_mng) {
322 		if (mlx5_aso_sq_create(cdev, &pool_mng->sq,
323 				       sh->tx_uar.obj,
324 				       MLX5_ASO_QUEUE_LOG_DESC))
325 			return -1;
326 		mlx5_aso_mtr_init_sq(&pool_mng->sq);
327 	}
328 	return 0;
329 error:
330 	do {
331 		mlx5_aso_destroy_sq(hws_pool->sq + i);
332 	} while (i--);
333 	return -1;
334 }
335 
336 /**
337  * API to create and initialize Send Queue used for ASO access.
338  *
339  * @param[in] sh
340  *   Pointer to shared device context.
341  * @param[in] aso_opc_mod
342  *   Mode of ASO feature.
343  * @param[in] nb_queues
344  *   Number of Send Queues to create.
345  *
346  * @return
347  *   0 on success, a negative errno value otherwise and rte_errno is set.
348  */
349 int
350 mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
351 		    enum mlx5_access_aso_opc_mod aso_opc_mod,
352 			uint32_t nb_queues)
353 {
354 	uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC;
355 	struct mlx5_common_device *cdev = sh->cdev;
356 
357 	switch (aso_opc_mod) {
358 	case ASO_OPC_MOD_FLOW_HIT:
359 		if (mlx5_aso_reg_mr(cdev, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
360 				    sq_desc_n, &sh->aso_age_mng->aso_sq.mr))
361 			return -1;
362 		if (mlx5_aso_sq_create(cdev, &sh->aso_age_mng->aso_sq,
363 				       sh->tx_uar.obj,
364 				       MLX5_ASO_QUEUE_LOG_DESC)) {
365 			mlx5_aso_dereg_mr(cdev, &sh->aso_age_mng->aso_sq.mr);
366 			return -1;
367 		}
368 		mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq);
369 		break;
370 	case ASO_OPC_MOD_POLICER:
371 		if (mlx5_aso_mtr_queue_init(sh, NULL,
372 					    &sh->mtrmng->pools_mng, nb_queues))
373 			return -1;
374 		break;
375 	case ASO_OPC_MOD_CONNECTION_TRACKING:
376 		if (mlx5_aso_ct_queue_init(sh, sh->ct_mng, MLX5_ASO_CT_SQ_NUM))
377 			return -1;
378 		break;
379 	default:
380 		DRV_LOG(ERR, "Unknown ASO operation mode");
381 		return -1;
382 	}
383 	return 0;
384 }
385 
386 /**
387  * API to destroy Send Queue used for ASO access.
388  *
389  * @param[in] sh
390  *   Pointer to shared device context.
391  * @param[in] aso_opc_mod
392  *   Mode of ASO feature.
393  */
394 void
395 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
396 		      enum mlx5_access_aso_opc_mod aso_opc_mod)
397 {
398 	struct mlx5_aso_sq *sq = NULL;
399 
400 	switch (aso_opc_mod) {
401 	case ASO_OPC_MOD_FLOW_HIT:
402 		mlx5_aso_dereg_mr(sh->cdev, &sh->aso_age_mng->aso_sq.mr);
403 		sq = &sh->aso_age_mng->aso_sq;
404 		break;
405 	case ASO_OPC_MOD_POLICER:
406 		mlx5_aso_mtr_queue_uninit(sh, NULL, &sh->mtrmng->pools_mng);
407 		break;
408 	case ASO_OPC_MOD_CONNECTION_TRACKING:
409 		mlx5_aso_ct_queue_uninit(sh, sh->ct_mng);
410 		break;
411 	default:
412 		DRV_LOG(ERR, "Unknown ASO operation mode");
413 		return;
414 	}
415 	if (sq)
416 		mlx5_aso_destroy_sq(sq);
417 }
418 
419 /**
420  * Write a burst of WQEs to ASO SQ.
421  *
422  * @param[in] sh
423  *   Pointer to shared device context.
424  * @param[in] n
425  *   Index of the last valid pool.
426  *
427  * @return
428  *   Number of WQEs in burst.
429  */
430 static uint16_t
431 mlx5_aso_sq_enqueue_burst(struct mlx5_dev_ctx_shared *sh, uint16_t n)
432 {
433 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
434 	volatile struct mlx5_aso_wqe *wqe;
435 	struct mlx5_aso_sq *sq = &mng->aso_sq;
436 	struct mlx5_aso_age_pool *pool;
437 	uint16_t size = 1 << sq->log_desc_n;
438 	uint16_t mask = size - 1;
439 	uint16_t max;
440 	uint16_t start_head = sq->head;
441 
442 	max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
443 	if (unlikely(!max))
444 		return 0;
445 	sq->elts[start_head & mask].burst_size = max;
446 	do {
447 		wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
448 		rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
449 		/* Fill next WQE. */
450 		rte_rwlock_read_lock(&mng->resize_rwl);
451 		pool = mng->pools[sq->next];
452 		rte_rwlock_read_unlock(&mng->resize_rwl);
453 		sq->elts[sq->head & mask].pool = pool;
454 		wqe->general_cseg.misc =
455 				rte_cpu_to_be_32(((struct mlx5_devx_obj *)
456 						 (pool->flow_hit_aso_obj))->id);
457 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
458 							 MLX5_COMP_MODE_OFFSET);
459 		wqe->general_cseg.opcode = rte_cpu_to_be_32
460 						(MLX5_OPCODE_ACCESS_ASO |
461 						 (ASO_OPC_MOD_FLOW_HIT <<
462 						  WQE_CSEG_OPC_MOD_OFFSET) |
463 						 (sq->pi <<
464 						  WQE_CSEG_WQE_INDEX_OFFSET));
465 		sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
466 		sq->head++;
467 		sq->next++;
468 		max--;
469 	} while (max);
470 	wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
471 							 MLX5_COMP_MODE_OFFSET);
472 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
473 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
474 			   !sh->tx_uar.dbnc);
475 	return sq->elts[start_head & mask].burst_size;
476 }
477 
478 /**
479  * Debug utility function. Dump contents of error CQE and WQE.
480  *
481  * @param[in] cqe
482  *   Error CQE to dump.
483  * @param[in] wqe
484  *   Error WQE to dump.
485  */
486 static void
487 mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
488 {
489 	int i;
490 
491 	DRV_LOG(ERR, "Error cqe:");
492 	for (i = 0; i < (int)sizeof(struct mlx5_error_cqe) / 4; i += 4)
493 		DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
494 			cqe[i + 2], cqe[i + 3]);
495 	DRV_LOG(ERR, "\nError wqe:");
496 	for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4)
497 		DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
498 			wqe[i + 2], wqe[i + 3]);
499 }
500 
501 /**
502  * Handle case of error CQE.
503  *
504  * @param[in] sq
505  *   ASO SQ to use.
506  */
507 void
508 mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
509 {
510 	struct mlx5_aso_cq *cq = &sq->cq;
511 	uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
512 	volatile struct mlx5_error_cqe *cqe =
513 			(volatile struct mlx5_error_cqe *)&cq->cq_obj.cqes[idx];
514 
515 	cq->errors++;
516 	idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
517 	mlx5_aso_dump_err_objs((volatile uint32_t *)cqe,
518 			       (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]);
519 }
520 
521 int
522 mlx5_aso_pull_completion(struct mlx5_aso_sq *sq,
523 			 struct rte_flow_op_result res[],
524 			 uint16_t n_res)
525 {
526 	struct mlx5_aso_cq *cq = &sq->cq;
527 	volatile struct mlx5_cqe *restrict cqe;
528 	const uint32_t cq_size = 1 << cq->log_desc_n;
529 	const uint32_t mask = cq_size - 1;
530 	uint32_t idx;
531 	uint32_t next_idx;
532 	uint16_t max;
533 	uint16_t n = 0;
534 	int ret;
535 
536 	max = (uint16_t)(sq->head - sq->tail);
537 	if (unlikely(!max || !n_res))
538 		return 0;
539 	next_idx = cq->cq_ci & mask;
540 	do {
541 		idx = next_idx;
542 		next_idx = (cq->cq_ci + 1) & mask;
543 		/* Need to confirm the position of the prefetch. */
544 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
545 		cqe = &cq->cq_obj.cqes[idx];
546 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
547 		/*
548 		 * Be sure owner read is done before any other cookie field or
549 		 * opaque field.
550 		 */
551 		rte_io_rmb();
552 		if (ret == MLX5_CQE_STATUS_HW_OWN)
553 			break;
554 		res[n].user_data = sq->elts[(uint16_t)((sq->tail + n) & mask)].user_data;
555 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
556 			mlx5_aso_cqe_err_handle(sq);
557 			res[n].status = RTE_FLOW_OP_ERROR;
558 		} else {
559 			res[n].status = RTE_FLOW_OP_SUCCESS;
560 		}
561 		cq->cq_ci++;
562 		if (++n == n_res)
563 			break;
564 	} while (1);
565 	if (likely(n)) {
566 		sq->tail += n;
567 		rte_io_wmb();
568 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
569 	}
570 	return n;
571 }
572 
573 void
574 mlx5_aso_push_wqe(struct mlx5_dev_ctx_shared *sh,
575 		  struct mlx5_aso_sq *sq)
576 {
577 	if (sq->db_pi == sq->pi)
578 		return;
579 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)sq->db,
580 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
581 			   !sh->tx_uar.dbnc);
582 	sq->db_pi = sq->pi;
583 }
584 
585 /**
586  * Update ASO objects upon completion.
587  *
588  * @param[in] sh
589  *   Shared device context.
590  * @param[in] n
591  *   Number of completed ASO objects.
592  */
593 static void
594 mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
595 {
596 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
597 	struct mlx5_aso_sq *sq = &mng->aso_sq;
598 	struct mlx5_age_info *age_info;
599 	const uint16_t size = 1 << sq->log_desc_n;
600 	const uint16_t mask = size - 1;
601 	const uint64_t curr = MLX5_CURR_TIME_SEC;
602 	uint16_t expected = AGE_CANDIDATE;
603 	uint16_t i;
604 
605 	for (i = 0; i < n; ++i) {
606 		uint16_t idx = (sq->tail + i) & mask;
607 		struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
608 		uint64_t diff = curr - pool->time_of_last_age_check;
609 		uint64_t *addr = sq->mr.addr;
610 		int j;
611 
612 		addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64;
613 		pool->time_of_last_age_check = curr;
614 		for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) {
615 			struct mlx5_aso_age_action *act = &pool->actions[j];
616 			struct mlx5_age_param *ap = &act->age_params;
617 			uint8_t byte;
618 			uint8_t offset;
619 			uint8_t *u8addr;
620 			uint8_t hit;
621 
622 			if (rte_atomic_load_explicit(&ap->state, rte_memory_order_relaxed) !=
623 					    AGE_CANDIDATE)
624 				continue;
625 			byte = 63 - (j / 8);
626 			offset = j % 8;
627 			u8addr = (uint8_t *)addr;
628 			hit = (u8addr[byte] >> offset) & 0x1;
629 			if (hit) {
630 				rte_atomic_store_explicit(&ap->sec_since_last_hit, 0,
631 						 rte_memory_order_relaxed);
632 			} else {
633 				struct mlx5_priv *priv;
634 
635 				rte_atomic_fetch_add_explicit(&ap->sec_since_last_hit,
636 						   diff, rte_memory_order_relaxed);
637 				/* If timeout passed add to aged-out list. */
638 				if (ap->sec_since_last_hit <= ap->timeout)
639 					continue;
640 				priv =
641 				rte_eth_devices[ap->port_id].data->dev_private;
642 				age_info = GET_PORT_AGE_INFO(priv);
643 				rte_spinlock_lock(&age_info->aged_sl);
644 				if (rte_atomic_compare_exchange_strong_explicit(&ap->state,
645 								&expected,
646 								AGE_TMOUT,
647 							       rte_memory_order_relaxed,
648 							    rte_memory_order_relaxed)) {
649 					LIST_INSERT_HEAD(&age_info->aged_aso,
650 							 act, next);
651 					MLX5_AGE_SET(age_info,
652 						     MLX5_AGE_EVENT_NEW);
653 				}
654 				rte_spinlock_unlock(&age_info->aged_sl);
655 			}
656 		}
657 	}
658 	mlx5_age_event_prepare(sh);
659 }
660 
661 /**
662  * Handle completions from WQEs sent to ASO SQ.
663  *
664  * @param[in] sh
665  *   Shared device context.
666  *
667  * @return
668  *   Number of CQEs handled.
669  */
670 static uint16_t
671 mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
672 {
673 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
674 	struct mlx5_aso_sq *sq = &mng->aso_sq;
675 	struct mlx5_aso_cq *cq = &sq->cq;
676 	volatile struct mlx5_cqe *restrict cqe;
677 	const unsigned int cq_size = 1 << cq->log_desc_n;
678 	const unsigned int mask = cq_size - 1;
679 	uint32_t idx;
680 	uint32_t next_idx = cq->cq_ci & mask;
681 	const uint16_t max = (uint16_t)(sq->head - sq->tail);
682 	uint16_t i = 0;
683 	int ret;
684 	if (unlikely(!max))
685 		return 0;
686 	do {
687 		idx = next_idx;
688 		next_idx = (cq->cq_ci + 1) & mask;
689 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
690 		cqe = &cq->cq_obj.cqes[idx];
691 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
692 		/*
693 		 * Be sure owner read is done before any other cookie field or
694 		 * opaque field.
695 		 */
696 		rte_io_rmb();
697 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
698 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
699 				break;
700 			mlx5_aso_cqe_err_handle(sq);
701 		} else {
702 			i += sq->elts[(sq->tail + i) & mask].burst_size;
703 		}
704 		cq->cq_ci++;
705 	} while (1);
706 	if (likely(i)) {
707 		mlx5_aso_age_action_update(sh, i);
708 		sq->tail += i;
709 		rte_io_wmb();
710 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
711 	}
712 	return i;
713 }
714 
715 /**
716  * Periodically read CQEs and send WQEs to ASO SQ.
717  *
718  * @param[in] arg
719  *   Shared device context containing the ASO SQ.
720  */
721 static void
722 mlx5_flow_aso_alarm(void *arg)
723 {
724 	struct mlx5_dev_ctx_shared *sh = arg;
725 	struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq;
726 	uint32_t us = 100u;
727 	uint16_t n;
728 
729 	rte_rwlock_read_lock(&sh->aso_age_mng->resize_rwl);
730 	n = sh->aso_age_mng->next;
731 	rte_rwlock_read_unlock(&sh->aso_age_mng->resize_rwl);
732 	mlx5_aso_completion_handle(sh);
733 	if (sq->next == n) {
734 		/* End of loop: wait 1 second. */
735 		us = US_PER_S;
736 		sq->next = 0;
737 	}
738 	mlx5_aso_sq_enqueue_burst(sh, n);
739 	if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
740 		DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
741 }
742 
743 /**
744  * API to start ASO access using ASO SQ.
745  *
746  * @param[in] sh
747  *   Pointer to shared device context.
748  *
749  * @return
750  *   0 on success, a negative errno value otherwise and rte_errno is set.
751  */
752 int
753 mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh)
754 {
755 	if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) {
756 		DRV_LOG(ERR, "Cannot reinitialize ASO age alarm.");
757 		return -rte_errno;
758 	}
759 	return 0;
760 }
761 
762 /**
763  * API to stop ASO access using ASO SQ.
764  *
765  * @param[in] sh
766  *   Pointer to shared device context.
767  *
768  * @return
769  *   0 on success, a negative errno value otherwise and rte_errno is set.
770  */
771 int
772 mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
773 {
774 	int retries = 1024;
775 
776 	if (!sh->aso_age_mng->aso_sq.sq_obj.sq)
777 		return -EINVAL;
778 	rte_errno = 0;
779 	while (--retries) {
780 		rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh);
781 		if (rte_errno != EINPROGRESS)
782 			break;
783 		rte_pause();
784 	}
785 	return -rte_errno;
786 }
787 
788 static uint16_t
789 mlx5_aso_mtr_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
790 			       struct mlx5_aso_sq *sq,
791 			       struct mlx5_aso_mtr *aso_mtr,
792 			       struct mlx5_mtr_bulk *bulk,
793 			       bool need_lock,
794 			       struct mlx5_hw_q_job *job,
795 			       bool push)
796 {
797 	volatile struct mlx5_aso_wqe *wqe = NULL;
798 	struct mlx5_flow_meter_info *fm = NULL;
799 	struct mlx5_flow_meter_profile *fmp;
800 	uint16_t size = 1 << sq->log_desc_n;
801 	uint16_t mask = size - 1;
802 	uint16_t res;
803 	uint32_t dseg_idx = 0;
804 	struct mlx5_aso_mtr_pool *pool = NULL;
805 	uint32_t param_le;
806 	int id;
807 
808 	if (need_lock)
809 		rte_spinlock_lock(&sq->sqsl);
810 	res = size - (uint16_t)(sq->head - sq->tail);
811 	if (unlikely(!res)) {
812 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
813 		if (need_lock)
814 			rte_spinlock_unlock(&sq->sqsl);
815 		return 0;
816 	}
817 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
818 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
819 	/* Fill next WQE. */
820 	fm = &aso_mtr->fm;
821 	sq->elts[sq->head & mask].user_data = job ? job : (void *)aso_mtr;
822 	if (aso_mtr->type == ASO_METER_INDIRECT) {
823 		if (likely(sh->config.dv_flow_en == 2))
824 			pool = aso_mtr->pool;
825 		else
826 			pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool,
827 					    mtrs[aso_mtr->offset]);
828 		id = pool->devx_obj->id;
829 	} else {
830 		id = bulk->devx_obj->id;
831 	}
832 	wqe->general_cseg.misc = rte_cpu_to_be_32(id +
833 						  (aso_mtr->offset >> 1));
834 	wqe->general_cseg.opcode =
835 		rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
836 			(ASO_OPC_MOD_POLICER << WQE_CSEG_OPC_MOD_OFFSET) |
837 			 sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
838 	/* There are 2 meters in one ASO cache line. */
839 	dseg_idx = aso_mtr->offset & 0x1;
840 	wqe->aso_cseg.data_mask =
841 		RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx));
842 	if (fm->is_enable) {
843 		wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
844 			fm->profile->srtcm_prm.cbs_cir;
845 		wqe->aso_dseg.mtrs[dseg_idx].ebs_eir =
846 			fm->profile->srtcm_prm.ebs_eir;
847 	} else {
848 		wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
849 			RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL);
850 		wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0;
851 	}
852 	fmp = fm->profile;
853 	param_le = (1 << ASO_DSEG_VALID_OFFSET);
854 	if (fm->color_aware)
855 		param_le |= (MLX5_FLOW_COLOR_UNDEFINED << ASO_DSEG_SC_OFFSET);
856 	else
857 		param_le |= (MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET);
858 	if (fmp->profile.packet_mode)
859 		param_le |= (MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE);
860 	wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm = RTE_BE32(param_le);
861 	switch (fmp->profile.alg) {
862 	case RTE_MTR_SRTCM_RFC2697:
863 		/* Only needed for RFC2697. */
864 		if (fm->profile->srtcm_prm.ebs_eir)
865 			wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
866 					RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
867 		break;
868 	case RTE_MTR_TRTCM_RFC2698:
869 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
870 				RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
871 		break;
872 	case RTE_MTR_TRTCM_RFC4115:
873 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
874 				RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
875 		break;
876 	default:
877 		break;
878 	}
879 	/*
880 	 * Note:
881 	 * Due to software performance reason, the token fields will not be
882 	 * set when posting the WQE to ASO SQ. It will be filled by the HW
883 	 * automatically.
884 	 */
885 	sq->head++;
886 	sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
887 	if (push) {
888 		mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
889 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
890 			   !sh->tx_uar.dbnc);
891 		sq->db_pi = sq->pi;
892 	}
893 	sq->db = wqe;
894 	if (need_lock)
895 		rte_spinlock_unlock(&sq->sqsl);
896 	return 1;
897 }
898 
899 static void
900 mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq, bool need_lock)
901 {
902 	struct mlx5_aso_cq *cq = &sq->cq;
903 	volatile struct mlx5_cqe *restrict cqe;
904 	const unsigned int cq_size = 1 << cq->log_desc_n;
905 	const unsigned int mask = cq_size - 1;
906 	uint32_t idx;
907 	uint32_t next_idx = cq->cq_ci & mask;
908 	uint16_t max;
909 	uint16_t i, n = 0;
910 	int ret;
911 
912 	if (need_lock)
913 		rte_spinlock_lock(&sq->sqsl);
914 	max = (uint16_t)(sq->head - sq->tail);
915 	if (unlikely(!max)) {
916 		if (need_lock)
917 			rte_spinlock_unlock(&sq->sqsl);
918 		return;
919 	}
920 	do {
921 		idx = next_idx;
922 		next_idx = (cq->cq_ci + 1) & mask;
923 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
924 		cqe = &cq->cq_obj.cqes[idx];
925 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
926 		/*
927 		 * Be sure owner read is done before any other cookie field or
928 		 * opaque field.
929 		 */
930 		rte_io_rmb();
931 		if (ret != MLX5_CQE_STATUS_SW_OWN) {
932 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
933 				break;
934 			mlx5_aso_cqe_err_handle(sq);
935 		} else {
936 			n++;
937 		}
938 		cq->cq_ci++;
939 	} while (1);
940 	if (likely(n)) {
941 		uint8_t exp_state = ASO_METER_WAIT;
942 		struct mlx5_aso_mtr *aso_mtr;
943 		__rte_unused bool verdict;
944 
945 		for (i = 0; i < n; ++i) {
946 			aso_mtr = sq->elts[(sq->tail + i) & mask].mtr;
947 			MLX5_ASSERT(aso_mtr);
948 			verdict = rte_atomic_compare_exchange_strong_explicit(&aso_mtr->state,
949 						    &exp_state, ASO_METER_READY,
950 						    rte_memory_order_relaxed,
951 						    rte_memory_order_relaxed);
952 			MLX5_ASSERT(verdict);
953 		}
954 		sq->tail += n;
955 		rte_io_wmb();
956 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
957 	}
958 	if (need_lock)
959 		rte_spinlock_unlock(&sq->sqsl);
960 }
961 
962 static __rte_always_inline struct mlx5_aso_sq *
963 mlx5_aso_mtr_select_sq(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
964 		       struct mlx5_aso_mtr *mtr, bool *need_lock)
965 {
966 	struct mlx5_aso_sq *sq;
967 
968 	if (likely(sh->config.dv_flow_en == 2) &&
969 	    mtr->type == ASO_METER_INDIRECT) {
970 		if (queue == MLX5_HW_INV_QUEUE) {
971 			sq = &mtr->pool->sq[mtr->pool->nb_sq - 1];
972 			*need_lock = true;
973 		} else {
974 			sq = &mtr->pool->sq[queue];
975 			*need_lock = false;
976 		}
977 	} else {
978 		sq = &sh->mtrmng->pools_mng.sq;
979 		*need_lock = true;
980 	}
981 	return sq;
982 }
983 
984 #if defined(HAVE_MLX5_HWS_SUPPORT)
985 static void
986 mlx5_aso_poll_cq_mtr_hws(struct mlx5_priv *priv, struct mlx5_aso_sq *sq)
987 {
988 #define MLX5_HWS_MTR_CMPL_NUM 4
989 
990 	int i, ret;
991 	struct mlx5_aso_mtr *mtr;
992 	uint8_t exp_state = ASO_METER_WAIT;
993 	struct rte_flow_op_result res[MLX5_HWS_MTR_CMPL_NUM];
994 	__rte_unused bool verdict;
995 
996 	rte_spinlock_lock(&sq->sqsl);
997 repeat:
998 	ret = mlx5_aso_pull_completion(sq, res, MLX5_HWS_MTR_CMPL_NUM);
999 	if (ret) {
1000 		for (i = 0; i < ret; i++) {
1001 			struct mlx5_hw_q_job *job = res[i].user_data;
1002 
1003 			MLX5_ASSERT(job);
1004 			mtr = mlx5_ipool_get(priv->hws_mpool->idx_pool,
1005 					     MLX5_INDIRECT_ACTION_IDX_GET(job->action));
1006 			MLX5_ASSERT(mtr);
1007 			verdict = rte_atomic_compare_exchange_strong_explicit(&mtr->state,
1008 						    &exp_state, ASO_METER_READY,
1009 						    rte_memory_order_relaxed,
1010 						    rte_memory_order_relaxed);
1011 			MLX5_ASSERT(verdict);
1012 			flow_hw_job_put(priv, job, CTRL_QUEUE_ID(priv));
1013 		}
1014 		if (ret == MLX5_HWS_MTR_CMPL_NUM)
1015 			goto repeat;
1016 	}
1017 	rte_spinlock_unlock(&sq->sqsl);
1018 
1019 #undef MLX5_HWS_MTR_CMPL_NUM
1020 }
1021 #else
1022 static void
1023 mlx5_aso_poll_cq_mtr_hws(__rte_unused struct mlx5_priv *priv, __rte_unused struct mlx5_aso_sq *sq)
1024 {
1025 	MLX5_ASSERT(false);
1026 }
1027 #endif
1028 
1029 static void
1030 mlx5_aso_poll_cq_mtr_sws(__rte_unused struct mlx5_priv *priv,
1031 			 struct mlx5_aso_sq *sq)
1032 {
1033 	mlx5_aso_mtr_completion_handle(sq, true);
1034 }
1035 
1036 typedef void (*poll_cq_t)(struct mlx5_priv *, struct mlx5_aso_sq *);
1037 
1038 /**
1039  * Update meter parameter by send WQE.
1040  *
1041  * @param[in] dev
1042  *   Pointer to Ethernet device.
1043  * @param[in] priv
1044  *   Pointer to mlx5 private data structure.
1045  * @param[in] fm
1046  *   Pointer to flow meter to be modified.
1047  *
1048  * @return
1049  *   0 on success, a negative errno value otherwise and rte_errno is set.
1050  */
1051 int
1052 mlx5_aso_meter_update_by_wqe(struct mlx5_priv *priv, uint32_t queue,
1053 			     struct mlx5_aso_mtr *mtr,
1054 			     struct mlx5_mtr_bulk *bulk,
1055 			     struct mlx5_hw_q_job *job, bool push)
1056 {
1057 	bool need_lock;
1058 	struct mlx5_dev_ctx_shared *sh = priv->sh;
1059 	struct mlx5_aso_sq *sq =
1060 		mlx5_aso_mtr_select_sq(sh, queue, mtr, &need_lock);
1061 	uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
1062 	poll_cq_t poll_mtr_cq =
1063 		job ? mlx5_aso_poll_cq_mtr_hws : mlx5_aso_poll_cq_mtr_sws;
1064 	int ret;
1065 
1066 	if (queue != MLX5_HW_INV_QUEUE) {
1067 		ret = mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr, bulk,
1068 						     need_lock, job, push);
1069 		return ret > 0 ? 0 : -1;
1070 	}
1071 	do {
1072 		poll_mtr_cq(priv, sq);
1073 		if (mlx5_aso_mtr_sq_enqueue_single(sh, sq, mtr, bulk,
1074 						   need_lock, job, true))
1075 			return 0;
1076 		/* Waiting for wqe resource. */
1077 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1078 	} while (--poll_wqe_times);
1079 	DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d",
1080 			mtr->offset);
1081 	return -1;
1082 }
1083 
1084 /**
1085  * Wait for meter to be ready.
1086  *
1087  * @param[in] dev
1088  *   Pointer to Ethernet device.
1089  * @param[in] priv
1090  *   Pointer to mlx5 private data structure.
1091  * @param[in] fm
1092  *   Pointer to flow meter to be modified.
1093  *
1094  * @return
1095  *   0 on success, a negative errno value otherwise and rte_errno is set.
1096  */
1097 int
1098 mlx5_aso_mtr_wait(struct mlx5_priv *priv,
1099 		  struct mlx5_aso_mtr *mtr, bool is_tmpl_api)
1100 {
1101 	bool need_lock;
1102 	struct mlx5_aso_sq *sq;
1103 	struct mlx5_dev_ctx_shared *sh = priv->sh;
1104 	uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
1105 	uint8_t state = rte_atomic_load_explicit(&mtr->state, rte_memory_order_relaxed);
1106 	poll_cq_t poll_mtr_cq =
1107 		is_tmpl_api ? mlx5_aso_poll_cq_mtr_hws : mlx5_aso_poll_cq_mtr_sws;
1108 
1109 	if (state == ASO_METER_READY || state == ASO_METER_WAIT_ASYNC)
1110 		return 0;
1111 	sq = mlx5_aso_mtr_select_sq(sh, MLX5_HW_INV_QUEUE, mtr, &need_lock);
1112 	do {
1113 		poll_mtr_cq(priv, sq);
1114 		if (rte_atomic_load_explicit(&mtr->state, rte_memory_order_relaxed) ==
1115 					    ASO_METER_READY)
1116 			return 0;
1117 		/* Waiting for CQE ready. */
1118 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1119 	} while (--poll_cqe_times);
1120 	DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d",
1121 			mtr->offset);
1122 	return -1;
1123 }
1124 
1125 static inline struct mlx5_aso_sq*
1126 __mlx5_aso_ct_get_sq_in_hws(uint32_t queue,
1127 			    struct mlx5_aso_ct_pool *pool)
1128 {
1129 	return (queue == MLX5_HW_INV_QUEUE) ?
1130 		pool->shared_sq : &pool->sq[queue];
1131 }
1132 
1133 static inline struct mlx5_aso_sq*
1134 __mlx5_aso_ct_get_sq_in_sws(struct mlx5_dev_ctx_shared *sh,
1135 			    struct mlx5_aso_ct_action *ct)
1136 {
1137 	return &sh->ct_mng->aso_sqs[ct->offset & (MLX5_ASO_CT_SQ_NUM - 1)];
1138 }
1139 
1140 static inline struct mlx5_aso_ct_pool*
1141 __mlx5_aso_ct_get_pool(struct mlx5_dev_ctx_shared *sh,
1142 		       struct mlx5_aso_ct_action *ct)
1143 {
1144 	if (likely(sh->config.dv_flow_en == 2))
1145 		return ct->pool;
1146 	return container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1147 }
1148 
1149 int
1150 mlx5_aso_ct_queue_uninit(struct mlx5_dev_ctx_shared *sh,
1151 			 struct mlx5_aso_ct_pools_mng *ct_mng)
1152 {
1153 	uint32_t i;
1154 
1155 	/* 64B per object for query. */
1156 	for (i = 0; i < ct_mng->nb_sq; i++) {
1157 		if (ct_mng->aso_sqs[i].mr.addr)
1158 			mlx5_aso_dereg_mr(sh->cdev, &ct_mng->aso_sqs[i].mr);
1159 		mlx5_aso_destroy_sq(&ct_mng->aso_sqs[i]);
1160 	}
1161 	return 0;
1162 }
1163 
1164 /**
1165  * API to create and initialize CT Send Queue used for ASO access.
1166  *
1167  * @param[in] sh
1168  *   Pointer to shared device context.
1169  * @param[in] ct_mng
1170  *   Pointer to the CT management struct.
1171  * *param[in] nb_queues
1172  *   Number of queues to be allocated.
1173  *
1174  * @return
1175  *   0 on success, a negative errno value otherwise and rte_errno is set.
1176  */
1177 int
1178 mlx5_aso_ct_queue_init(struct mlx5_dev_ctx_shared *sh,
1179 		       struct mlx5_aso_ct_pools_mng *ct_mng,
1180 		       uint32_t nb_queues)
1181 {
1182 	uint32_t i;
1183 
1184 	/* 64B per object for query. */
1185 	for (i = 0; i < nb_queues; i++) {
1186 		if (mlx5_aso_reg_mr(sh->cdev, 64 * (1 << MLX5_ASO_QUEUE_LOG_DESC),
1187 				    &ct_mng->aso_sqs[i].mr))
1188 			goto error;
1189 		if (mlx5_aso_sq_create(sh->cdev, &ct_mng->aso_sqs[i],
1190 				       sh->tx_uar.obj,
1191 				       MLX5_ASO_QUEUE_LOG_DESC))
1192 			goto error;
1193 		mlx5_aso_ct_init_sq(&ct_mng->aso_sqs[i]);
1194 	}
1195 	ct_mng->nb_sq = nb_queues;
1196 	return 0;
1197 error:
1198 	do {
1199 		if (ct_mng->aso_sqs[i].mr.addr)
1200 			mlx5_aso_dereg_mr(sh->cdev, &ct_mng->aso_sqs[i].mr);
1201 		mlx5_aso_destroy_sq(&ct_mng->aso_sqs[i]);
1202 	} while (i--);
1203 	ct_mng->nb_sq = 0;
1204 	return -1;
1205 }
1206 
1207 /*
1208  * Post a WQE to the ASO CT SQ to modify the context.
1209  *
1210  * @param[in] sh
1211  *   Pointer to shared device context.
1212  * @param[in] ct
1213  *   Pointer to the generic CT structure related to the context.
1214  * @param[in] profile
1215  *   Pointer to configuration profile.
1216  *
1217  * @return
1218  *   1 on success (WQE number), 0 on failure.
1219  */
1220 static uint16_t
1221 mlx5_aso_ct_sq_enqueue_single(struct mlx5_dev_ctx_shared *sh,
1222 			      struct mlx5_aso_sq *sq,
1223 			      struct mlx5_aso_ct_action *ct,
1224 			      const struct rte_flow_action_conntrack *profile,
1225 			      bool need_lock,
1226 			      void *user_data,
1227 			      bool push)
1228 {
1229 	volatile struct mlx5_aso_wqe *wqe = NULL;
1230 	uint16_t size = 1 << sq->log_desc_n;
1231 	uint16_t mask = size - 1;
1232 	uint16_t res;
1233 	struct mlx5_aso_ct_pool *pool;
1234 	void *desg;
1235 	void *orig_dir;
1236 	void *reply_dir;
1237 
1238 	if (need_lock)
1239 		rte_spinlock_lock(&sq->sqsl);
1240 	/* Prevent other threads to update the index. */
1241 	res = size - (uint16_t)(sq->head - sq->tail);
1242 	if (unlikely(!res)) {
1243 		if (need_lock)
1244 			rte_spinlock_unlock(&sq->sqsl);
1245 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
1246 		return 0;
1247 	}
1248 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1249 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1250 	/* Fill next WQE. */
1251 	MLX5_ASO_CT_UPDATE_STATE(ct,
1252 			user_data ? ASO_CONNTRACK_WAIT_ASYNC : ASO_CONNTRACK_WAIT);
1253 	if (user_data) {
1254 		sq->elts[sq->head & mask].user_data = user_data;
1255 	} else {
1256 		sq->elts[sq->head & mask].ct = ct;
1257 		sq->elts[sq->head & mask].query_data = NULL;
1258 	}
1259 	pool = __mlx5_aso_ct_get_pool(sh, ct);
1260 
1261 	/* Each WQE will have a single CT object. */
1262 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
1263 						  ct->offset);
1264 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
1265 			(ASO_OPC_MOD_CONNECTION_TRACKING <<
1266 			 WQE_CSEG_OPC_MOD_OFFSET) |
1267 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
1268 	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
1269 			(0u |
1270 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
1271 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
1272 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
1273 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
1274 	wqe->aso_cseg.data_mask = UINT64_MAX;
1275 	/* To make compiler happy. */
1276 	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
1277 	MLX5_SET(conn_track_aso, desg, valid, 1);
1278 	MLX5_SET(conn_track_aso, desg, state, profile->state);
1279 	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
1280 	MLX5_SET(conn_track_aso, desg, connection_assured,
1281 		 profile->live_connection);
1282 	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
1283 	MLX5_SET(conn_track_aso, desg, challenged_acked,
1284 		 profile->challenge_ack_passed);
1285 	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
1286 	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
1287 	MLX5_SET(conn_track_aso, desg, max_ack_window,
1288 		 profile->max_ack_window);
1289 	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
1290 	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
1291 	MLX5_SET(conn_track_aso, desg, retranmission_limit,
1292 		 profile->retransmission_limit);
1293 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
1294 		 profile->reply_dir.scale);
1295 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
1296 		 profile->reply_dir.close_initiated);
1297 	/* Both directions will use the same liberal mode. */
1298 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
1299 		 profile->liberal_mode);
1300 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
1301 		 profile->reply_dir.data_unacked);
1302 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
1303 		 profile->reply_dir.last_ack_seen);
1304 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
1305 		 profile->original_dir.scale);
1306 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
1307 		 profile->original_dir.close_initiated);
1308 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
1309 		 profile->liberal_mode);
1310 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
1311 		 profile->original_dir.data_unacked);
1312 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
1313 		 profile->original_dir.last_ack_seen);
1314 	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
1315 	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
1316 	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
1317 	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
1318 	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
1319 	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
1320 	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
1321 	MLX5_SET(tcp_window_params, orig_dir, sent_end,
1322 		 profile->original_dir.sent_end);
1323 	MLX5_SET(tcp_window_params, orig_dir, reply_end,
1324 		 profile->original_dir.reply_end);
1325 	MLX5_SET(tcp_window_params, orig_dir, max_win,
1326 		 profile->original_dir.max_win);
1327 	MLX5_SET(tcp_window_params, orig_dir, max_ack,
1328 		 profile->original_dir.max_ack);
1329 	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
1330 	MLX5_SET(tcp_window_params, reply_dir, sent_end,
1331 		 profile->reply_dir.sent_end);
1332 	MLX5_SET(tcp_window_params, reply_dir, reply_end,
1333 		 profile->reply_dir.reply_end);
1334 	MLX5_SET(tcp_window_params, reply_dir, max_win,
1335 		 profile->reply_dir.max_win);
1336 	MLX5_SET(tcp_window_params, reply_dir, max_ack,
1337 		 profile->reply_dir.max_ack);
1338 	sq->head++;
1339 	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
1340 	if (push) {
1341 		mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
1342 				   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
1343 				   !sh->tx_uar.dbnc);
1344 		sq->db_pi = sq->pi;
1345 	}
1346 	sq->db = wqe;
1347 	if (need_lock)
1348 		rte_spinlock_unlock(&sq->sqsl);
1349 	return 1;
1350 }
1351 
1352 /*
1353  * Update the status field of CTs to indicate ready to be used by flows.
1354  * A continuous number of CTs since last update.
1355  *
1356  * @param[in] sq
1357  *   Pointer to ASO CT SQ.
1358  * @param[in] num
1359  *   Number of CT structures to be updated.
1360  *
1361  * @return
1362  *   0 on success, a negative value.
1363  */
1364 static void
1365 mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
1366 {
1367 	uint16_t size = 1 << sq->log_desc_n;
1368 	uint16_t mask = size - 1;
1369 	uint16_t i;
1370 	struct mlx5_aso_ct_action *ct = NULL;
1371 	uint16_t idx;
1372 
1373 	for (i = 0; i < num; i++) {
1374 		idx = (uint16_t)((sq->tail + i) & mask);
1375 		ct = sq->elts[idx].ct;
1376 		MLX5_ASSERT(ct);
1377 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
1378 		if (sq->elts[idx].query_data)
1379 			rte_memcpy(sq->elts[idx].query_data,
1380 				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
1381 				   64);
1382 	}
1383 }
1384 
1385 /*
1386  * Post a WQE to the ASO CT SQ to query the current context.
1387  *
1388  * @param[in] sh
1389  *   Pointer to shared device context.
1390  * @param[in] ct
1391  *   Pointer to the generic CT structure related to the context.
1392  * @param[in] data
1393  *   Pointer to data area to be filled.
1394  *
1395  * @return
1396  *   1 on success (WQE number), 0 on failure.
1397  */
1398 static int
1399 mlx5_aso_ct_sq_query_single(struct mlx5_dev_ctx_shared *sh,
1400 			    struct mlx5_aso_sq *sq,
1401 			    struct mlx5_aso_ct_action *ct, char *data,
1402 			    bool need_lock,
1403 			    void *user_data,
1404 			    bool push)
1405 {
1406 	volatile struct mlx5_aso_wqe *wqe = NULL;
1407 	uint16_t size = 1 << sq->log_desc_n;
1408 	uint16_t mask = size - 1;
1409 	uint16_t res;
1410 	uint16_t wqe_idx;
1411 	struct mlx5_aso_ct_pool *pool;
1412 	enum mlx5_aso_ct_state state =
1413 				rte_atomic_load_explicit(&ct->state, rte_memory_order_relaxed);
1414 
1415 	if (state == ASO_CONNTRACK_FREE) {
1416 		DRV_LOG(ERR, "Fail: No context to query");
1417 		return -1;
1418 	} else if (state == ASO_CONNTRACK_WAIT) {
1419 		return 0;
1420 	}
1421 	if (need_lock)
1422 		rte_spinlock_lock(&sq->sqsl);
1423 	res = size - (uint16_t)(sq->head - sq->tail);
1424 	if (unlikely(!res)) {
1425 		if (need_lock)
1426 			rte_spinlock_unlock(&sq->sqsl);
1427 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
1428 		return 0;
1429 	}
1430 	MLX5_ASO_CT_UPDATE_STATE(ct,
1431 			user_data ? ASO_CONNTRACK_WAIT_ASYNC : ASO_CONNTRACK_QUERY);
1432 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1433 	/* Confirm the location and address of the prefetch instruction. */
1434 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1435 	/* Fill next WQE. */
1436 	wqe_idx = sq->head & mask;
1437 	/* Check if this is async mode. */
1438 	if (user_data) {
1439 		struct mlx5_hw_q_job *job = (struct mlx5_hw_q_job *)user_data;
1440 
1441 		sq->elts[wqe_idx].ct = user_data;
1442 		job->query.hw = (char *)((uintptr_t)sq->mr.addr + wqe_idx * 64);
1443 	} else {
1444 		sq->elts[wqe_idx].query_data = data;
1445 		sq->elts[wqe_idx].ct = ct;
1446 	}
1447 	pool = __mlx5_aso_ct_get_pool(sh, ct);
1448 	/* Each WQE will have a single CT object. */
1449 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
1450 						  ct->offset);
1451 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
1452 			(ASO_OPC_MOD_CONNECTION_TRACKING <<
1453 			 WQE_CSEG_OPC_MOD_OFFSET) |
1454 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
1455 	/*
1456 	 * There is no write request is required.
1457 	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
1458 	 * "BYTEWISE_64BYTE" is needed for a whole context.
1459 	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
1460 	 * "data_mask" is ignored.
1461 	 * Buffer address was already filled during initialization.
1462 	 */
1463 	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
1464 					ASO_CSEG_DATA_MASK_MODE_OFFSET);
1465 	wqe->aso_cseg.data_mask = 0;
1466 	sq->head++;
1467 	/*
1468 	 * Each WQE contains 2 WQEBB's, even though
1469 	 * data segment is not used in this case.
1470 	 */
1471 	sq->pi += 2;
1472 	if (push) {
1473 		mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
1474 				   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
1475 				   !sh->tx_uar.dbnc);
1476 		sq->db_pi = sq->pi;
1477 	}
1478 	sq->db = wqe;
1479 	if (need_lock)
1480 		rte_spinlock_unlock(&sq->sqsl);
1481 	return 1;
1482 }
1483 
1484 /*
1485  * Handle completions from WQEs sent to ASO CT.
1486  *
1487  * @param[in] mng
1488  *   Pointer to the CT pools management structure.
1489  */
1490 static void
1491 mlx5_aso_ct_completion_handle(struct mlx5_dev_ctx_shared *sh __rte_unused,
1492 			      struct mlx5_aso_sq *sq,
1493 			      bool need_lock)
1494 {
1495 	struct mlx5_aso_cq *cq = &sq->cq;
1496 	volatile struct mlx5_cqe *restrict cqe;
1497 	const uint32_t cq_size = 1 << cq->log_desc_n;
1498 	const uint32_t mask = cq_size - 1;
1499 	uint32_t idx;
1500 	uint32_t next_idx;
1501 	uint16_t max;
1502 	uint16_t n = 0;
1503 	int ret;
1504 
1505 	if (need_lock)
1506 		rte_spinlock_lock(&sq->sqsl);
1507 	max = (uint16_t)(sq->head - sq->tail);
1508 	if (unlikely(!max)) {
1509 		if (need_lock)
1510 			rte_spinlock_unlock(&sq->sqsl);
1511 		return;
1512 	}
1513 	next_idx = cq->cq_ci & mask;
1514 	do {
1515 		idx = next_idx;
1516 		next_idx = (cq->cq_ci + 1) & mask;
1517 		/* Need to confirm the position of the prefetch. */
1518 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
1519 		cqe = &cq->cq_obj.cqes[idx];
1520 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
1521 		/*
1522 		 * Be sure owner read is done before any other cookie field or
1523 		 * opaque field.
1524 		 */
1525 		rte_io_rmb();
1526 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1527 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
1528 				break;
1529 			mlx5_aso_cqe_err_handle(sq);
1530 		} else {
1531 			n++;
1532 		}
1533 		cq->cq_ci++;
1534 	} while (1);
1535 	if (likely(n)) {
1536 		mlx5_aso_ct_status_update(sq, n);
1537 		sq->tail += n;
1538 		rte_io_wmb();
1539 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
1540 	}
1541 	if (need_lock)
1542 		rte_spinlock_unlock(&sq->sqsl);
1543 }
1544 
1545 /*
1546  * Update connection tracking ASO context by sending WQE.
1547  *
1548  * @param[in] sh
1549  *   Pointer to mlx5_dev_ctx_shared object.
1550  * @param[in] queue
1551  *   The queue index.
1552  * @param[in] ct
1553  *   Pointer to connection tracking offload object.
1554  * @param[in] profile
1555  *   Pointer to connection tracking TCP parameter.
1556  *
1557  * @return
1558  *   0 on success, -1 on failure.
1559  */
1560 int
1561 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
1562 			  uint32_t queue,
1563 			  struct mlx5_aso_ct_action *ct,
1564 			  const struct rte_flow_action_conntrack *profile,
1565 			  void *user_data,
1566 			  bool push)
1567 {
1568 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1569 	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
1570 	struct mlx5_aso_sq *sq;
1571 	bool need_lock = !!(queue == MLX5_HW_INV_QUEUE);
1572 	int ret;
1573 
1574 	if (sh->config.dv_flow_en == 2)
1575 		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
1576 	else
1577 		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
1578 	if (queue != MLX5_HW_INV_QUEUE) {
1579 		ret = mlx5_aso_ct_sq_enqueue_single(sh, sq, ct, profile,
1580 						    need_lock, user_data, push);
1581 		return ret > 0 ? 0 : -1;
1582 	}
1583 	do {
1584 		mlx5_aso_ct_completion_handle(sh, sq,  need_lock);
1585 		if (mlx5_aso_ct_sq_enqueue_single(sh, sq, ct, profile,
1586 						  need_lock, NULL, true))
1587 			return 0;
1588 		/* Waiting for wqe resource. */
1589 		rte_delay_us_sleep(10u);
1590 	} while (--poll_wqe_times);
1591 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1592 		ct->offset, pool->index);
1593 	return -1;
1594 }
1595 
1596 /*
1597  * The routine is used to wait for WQE completion to continue with queried data.
1598  *
1599  * @param[in] sh
1600  *   Pointer to mlx5_dev_ctx_shared object.
1601  * @param[in] queue
1602  *   The queue which CT works on..
1603  * @param[in] ct
1604  *   Pointer to connection tracking offload object.
1605  *
1606  * @return
1607  *   0 on success, -1 on failure.
1608  */
1609 int
1610 mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh, uint32_t queue,
1611 		       struct mlx5_aso_ct_action *ct)
1612 {
1613 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1614 	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
1615 	struct mlx5_aso_sq *sq;
1616 	bool need_lock = !!(queue == MLX5_HW_INV_QUEUE);
1617 
1618 	if (sh->config.dv_flow_en == 2)
1619 		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
1620 	else
1621 		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
1622 	if (rte_atomic_load_explicit(&ct->state, rte_memory_order_relaxed) ==
1623 	    ASO_CONNTRACK_READY)
1624 		return 0;
1625 	do {
1626 		mlx5_aso_ct_completion_handle(sh, sq, need_lock);
1627 		if (rte_atomic_load_explicit(&ct->state, rte_memory_order_relaxed) ==
1628 		    ASO_CONNTRACK_READY)
1629 			return 0;
1630 		/* Waiting for CQE ready, consider should block or sleep. */
1631 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1632 	} while (--poll_cqe_times);
1633 	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
1634 		ct->offset, pool->index);
1635 	return -1;
1636 }
1637 
1638 /*
1639  * Convert the hardware conntrack data format into the profile.
1640  *
1641  * @param[in] profile
1642  *   Pointer to conntrack profile to be filled after query.
1643  * @param[in] wdata
1644  *   Pointer to data fetched from hardware.
1645  */
1646 void
1647 mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
1648 			char *wdata)
1649 {
1650 	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
1651 	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
1652 
1653 	/* MLX5_GET16 should be taken into consideration. */
1654 	profile->state = (enum rte_flow_conntrack_state)
1655 			 MLX5_GET(conn_track_aso, wdata, state);
1656 	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
1657 	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
1658 					  sack_permitted);
1659 	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
1660 					    connection_assured);
1661 	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
1662 						 challenged_acked);
1663 	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
1664 					   max_ack_window);
1665 	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
1666 						 retranmission_limit);
1667 	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
1668 	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
1669 	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
1670 			      MLX5_GET(conn_track_aso, wdata, last_index);
1671 	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
1672 	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
1673 	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
1674 	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
1675 				reply_direction_tcp_liberal_enabled) |
1676 				MLX5_GET(conn_track_aso, wdata,
1677 				original_direction_tcp_liberal_enabled);
1678 	/* No liberal in the RTE structure profile. */
1679 	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
1680 					    reply_direction_tcp_scale);
1681 	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1682 					reply_direction_tcp_close_initiated);
1683 	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1684 					reply_direction_tcp_data_unacked);
1685 	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1686 					reply_direction_tcp_max_ack);
1687 	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
1688 					       r_dir, sent_end);
1689 	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
1690 						r_dir, reply_end);
1691 	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
1692 					      r_dir, max_win);
1693 	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
1694 					      r_dir, max_ack);
1695 	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
1696 					       original_direction_tcp_scale);
1697 	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1698 					original_direction_tcp_close_initiated);
1699 	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1700 					original_direction_tcp_data_unacked);
1701 	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1702 					original_direction_tcp_max_ack);
1703 	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
1704 						  o_dir, sent_end);
1705 	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
1706 						   o_dir, reply_end);
1707 	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
1708 						 o_dir, max_win);
1709 	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
1710 						 o_dir, max_ack);
1711 }
1712 
1713 /*
1714  * Query connection tracking information parameter by send WQE.
1715  *
1716  * @param[in] dev
1717  *   Pointer to Ethernet device.
1718  * @param[in] ct
1719  *   Pointer to connection tracking offload object.
1720  * @param[out] profile
1721  *   Pointer to connection tracking TCP information.
1722  *
1723  * @return
1724  *   0 on success, -1 on failure.
1725  */
1726 int
1727 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
1728 			 uint32_t queue,
1729 			 struct mlx5_aso_ct_action *ct,
1730 			 struct rte_flow_action_conntrack *profile,
1731 			 void *user_data, bool push)
1732 {
1733 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1734 	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
1735 	struct mlx5_aso_sq *sq;
1736 	bool need_lock = !!(queue == MLX5_HW_INV_QUEUE);
1737 	char out_data[64 * 2];
1738 	int ret;
1739 
1740 	if (sh->config.dv_flow_en == 2)
1741 		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
1742 	else
1743 		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
1744 	if (queue != MLX5_HW_INV_QUEUE) {
1745 		ret = mlx5_aso_ct_sq_query_single(sh, sq, ct, out_data,
1746 						  need_lock, user_data, push);
1747 		return ret > 0 ? 0 : -1;
1748 	}
1749 	do {
1750 		mlx5_aso_ct_completion_handle(sh, sq, need_lock);
1751 		ret = mlx5_aso_ct_sq_query_single(sh, sq, ct, out_data,
1752 				need_lock, NULL, true);
1753 		if (ret < 0)
1754 			return ret;
1755 		else if (ret > 0)
1756 			goto data_handle;
1757 		/* Waiting for wqe resource or state. */
1758 		else
1759 			rte_delay_us_sleep(10u);
1760 	} while (--poll_wqe_times);
1761 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1762 		ct->offset, pool->index);
1763 	return -1;
1764 data_handle:
1765 	ret = mlx5_aso_ct_wait_ready(sh, queue, ct);
1766 	if (!ret)
1767 		mlx5_aso_ct_obj_analyze(profile, out_data);
1768 	return ret;
1769 }
1770 
1771 /*
1772  * Make sure the conntrack context is synchronized with hardware before
1773  * creating a flow rule that uses it.
1774  *
1775  * @param[in] sh
1776  *   Pointer to shared device context.
1777  * @param[in] ct
1778  *   Pointer to connection tracking offload object.
1779  *
1780  * @return
1781  *   0 on success, a negative errno value otherwise and rte_errno is set.
1782  */
1783 int
1784 mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
1785 		      uint32_t queue,
1786 		      struct mlx5_aso_ct_action *ct)
1787 {
1788 	struct mlx5_aso_ct_pool *pool = __mlx5_aso_ct_get_pool(sh, ct);
1789 	struct mlx5_aso_sq *sq;
1790 	bool need_lock = !!(queue == MLX5_HW_INV_QUEUE);
1791 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1792 	enum mlx5_aso_ct_state state =
1793 				rte_atomic_load_explicit(&ct->state, rte_memory_order_relaxed);
1794 
1795 	if (sh->config.dv_flow_en == 2)
1796 		sq = __mlx5_aso_ct_get_sq_in_hws(queue, pool);
1797 	else
1798 		sq = __mlx5_aso_ct_get_sq_in_sws(sh, ct);
1799 	if (state == ASO_CONNTRACK_FREE) {
1800 		rte_errno = ENXIO;
1801 		return -rte_errno;
1802 	} else if (state == ASO_CONNTRACK_READY ||
1803 		   state == ASO_CONNTRACK_QUERY ||
1804 		   state == ASO_CONNTRACK_WAIT_ASYNC) {
1805 		return 0;
1806 	}
1807 	do {
1808 		mlx5_aso_ct_completion_handle(sh, sq, need_lock);
1809 		state = rte_atomic_load_explicit(&ct->state, rte_memory_order_relaxed);
1810 		if (state == ASO_CONNTRACK_READY ||
1811 		    state == ASO_CONNTRACK_QUERY)
1812 			return 0;
1813 		/* Waiting for CQE ready, consider should block or sleep.  */
1814 		rte_delay_us_block(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1815 	} while (--poll_cqe_times);
1816 	rte_errno = EBUSY;
1817 	return -rte_errno;
1818 }
1819 
1820 int
1821 mlx5_aso_cnt_queue_init(struct mlx5_dev_ctx_shared *sh)
1822 {
1823 	struct mlx5_hws_aso_mng *aso_mng = NULL;
1824 	uint8_t idx;
1825 	struct mlx5_aso_sq *sq;
1826 
1827 	MLX5_ASSERT(sh);
1828 	MLX5_ASSERT(sh->cnt_svc);
1829 	aso_mng = &sh->cnt_svc->aso_mng;
1830 	aso_mng->sq_num = HWS_CNT_ASO_SQ_NUM;
1831 	for (idx = 0; idx < HWS_CNT_ASO_SQ_NUM; idx++) {
1832 		sq = &aso_mng->sqs[idx];
1833 		if (mlx5_aso_sq_create(sh->cdev, sq, sh->tx_uar.obj,
1834 					MLX5_ASO_CNT_QUEUE_LOG_DESC))
1835 			goto error;
1836 		mlx5_aso_cnt_init_sq(sq);
1837 	}
1838 	return 0;
1839 error:
1840 	mlx5_aso_cnt_queue_uninit(sh);
1841 	return -1;
1842 }
1843 
1844 void
1845 mlx5_aso_cnt_queue_uninit(struct mlx5_dev_ctx_shared *sh)
1846 {
1847 	uint16_t idx;
1848 
1849 	for (idx = 0; idx < sh->cnt_svc->aso_mng.sq_num; idx++)
1850 		mlx5_aso_destroy_sq(&sh->cnt_svc->aso_mng.sqs[idx]);
1851 	sh->cnt_svc->aso_mng.sq_num = 0;
1852 }
1853 
1854 static uint16_t
1855 mlx5_aso_cnt_sq_enqueue_burst(struct mlx5_hws_cnt_pool *cpool,
1856 		struct mlx5_dev_ctx_shared *sh,
1857 		struct mlx5_aso_sq *sq, uint32_t n,
1858 		uint32_t offset, uint32_t dcs_id_base)
1859 {
1860 	volatile struct mlx5_aso_wqe *wqe;
1861 	uint16_t size = 1 << sq->log_desc_n;
1862 	uint16_t mask = size - 1;
1863 	uint16_t max;
1864 	uint32_t upper_offset = offset;
1865 	uint64_t addr;
1866 	uint32_t ctrl_gen_id = 0;
1867 	uint8_t opcmod = sh->cdev->config.hca_attr.flow_access_aso_opc_mod;
1868 	rte_be32_t lkey = rte_cpu_to_be_32(cpool->raw_mng->mr.lkey);
1869 	uint16_t aso_n = (uint16_t)(RTE_ALIGN_CEIL(n, 4) / 4);
1870 	uint32_t ccntid;
1871 
1872 	max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), aso_n);
1873 	if (unlikely(!max))
1874 		return 0;
1875 	upper_offset += (max * 4);
1876 	/* Because only one burst at one time, we can use the same elt. */
1877 	sq->elts[0].burst_size = max;
1878 	ctrl_gen_id = dcs_id_base;
1879 	ctrl_gen_id /= 4;
1880 	do {
1881 		ccntid = upper_offset - max * 4;
1882 		wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1883 		rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1884 		wqe->general_cseg.misc = rte_cpu_to_be_32(ctrl_gen_id);
1885 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
1886 							 MLX5_COMP_MODE_OFFSET);
1887 		wqe->general_cseg.opcode = rte_cpu_to_be_32
1888 						(MLX5_OPCODE_ACCESS_ASO |
1889 						 (opcmod <<
1890 						  WQE_CSEG_OPC_MOD_OFFSET) |
1891 						 (sq->pi <<
1892 						  WQE_CSEG_WQE_INDEX_OFFSET));
1893 		addr = (uint64_t)RTE_PTR_ADD(cpool->raw_mng->raw,
1894 				ccntid * sizeof(struct flow_counter_stats));
1895 		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
1896 		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
1897 		wqe->aso_cseg.lkey = lkey;
1898 		sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
1899 		sq->head++;
1900 		sq->next++;
1901 		ctrl_gen_id++;
1902 		max--;
1903 	} while (max);
1904 	wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
1905 							 MLX5_COMP_MODE_OFFSET);
1906 	mlx5_doorbell_ring(&sh->tx_uar.bf_db, *(volatile uint64_t *)wqe,
1907 			   sq->pi, &sq->sq_obj.db_rec[MLX5_SND_DBR],
1908 			   !sh->tx_uar.dbnc);
1909 	return sq->elts[0].burst_size;
1910 }
1911 
1912 static uint16_t
1913 mlx5_aso_cnt_completion_handle(struct mlx5_aso_sq *sq)
1914 {
1915 	struct mlx5_aso_cq *cq = &sq->cq;
1916 	volatile struct mlx5_cqe *restrict cqe;
1917 	const unsigned int cq_size = 1 << cq->log_desc_n;
1918 	const unsigned int mask = cq_size - 1;
1919 	uint32_t idx;
1920 	uint32_t next_idx = cq->cq_ci & mask;
1921 	const uint16_t max = (uint16_t)(sq->head - sq->tail);
1922 	uint16_t i = 0;
1923 	int ret;
1924 	if (unlikely(!max))
1925 		return 0;
1926 	idx = next_idx;
1927 	next_idx = (cq->cq_ci + 1) & mask;
1928 	rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
1929 	cqe = &cq->cq_obj.cqes[idx];
1930 	ret = check_cqe(cqe, cq_size, cq->cq_ci);
1931 	/*
1932 	 * Be sure owner read is done before any other cookie field or
1933 	 * opaque field.
1934 	 */
1935 	rte_io_rmb();
1936 	if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1937 		if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
1938 			return 0; /* return immediately. */
1939 		mlx5_aso_cqe_err_handle(sq);
1940 	}
1941 	i += sq->elts[0].burst_size;
1942 	sq->elts[0].burst_size = 0;
1943 	cq->cq_ci++;
1944 	if (likely(i)) {
1945 		sq->tail += i;
1946 		rte_io_wmb();
1947 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
1948 	}
1949 	return i;
1950 }
1951 
1952 static uint16_t
1953 mlx5_aso_cnt_query_one_dcs(struct mlx5_dev_ctx_shared *sh,
1954 			   struct mlx5_hws_cnt_pool *cpool,
1955 			   uint8_t dcs_idx, uint32_t num)
1956 {
1957 	uint32_t dcs_id = cpool->dcs_mng.dcs[dcs_idx].obj->id;
1958 	uint64_t cnt_num = cpool->dcs_mng.dcs[dcs_idx].batch_sz;
1959 	uint64_t left;
1960 	uint32_t iidx = cpool->dcs_mng.dcs[dcs_idx].iidx;
1961 	uint32_t offset;
1962 	uint16_t mask;
1963 	uint16_t sq_idx;
1964 	uint64_t burst_sz = (uint64_t)(1 << MLX5_ASO_CNT_QUEUE_LOG_DESC) * 4 *
1965 		sh->cnt_svc->aso_mng.sq_num;
1966 	uint64_t qburst_sz = burst_sz / sh->cnt_svc->aso_mng.sq_num;
1967 	uint64_t n;
1968 	struct mlx5_aso_sq *sq;
1969 
1970 	cnt_num = RTE_MIN(num, cnt_num);
1971 	left = cnt_num;
1972 	while (left) {
1973 		mask = 0;
1974 		for (sq_idx = 0; sq_idx < sh->cnt_svc->aso_mng.sq_num;
1975 				sq_idx++) {
1976 			if (left == 0) {
1977 				mask |= (1 << sq_idx);
1978 				continue;
1979 			}
1980 			n = RTE_MIN(left, qburst_sz);
1981 			offset = cnt_num - left;
1982 			offset += iidx;
1983 			mlx5_aso_cnt_sq_enqueue_burst(cpool, sh,
1984 					&sh->cnt_svc->aso_mng.sqs[sq_idx], n,
1985 					offset, dcs_id);
1986 			left -= n;
1987 		}
1988 		do {
1989 			for (sq_idx = 0; sq_idx < sh->cnt_svc->aso_mng.sq_num;
1990 					sq_idx++) {
1991 				sq = &sh->cnt_svc->aso_mng.sqs[sq_idx];
1992 				if (mlx5_aso_cnt_completion_handle(sq))
1993 					mask |= (1 << sq_idx);
1994 			}
1995 		} while (mask < ((1 << sh->cnt_svc->aso_mng.sq_num) - 1));
1996 	}
1997 	return cnt_num;
1998 }
1999 
2000 /*
2001  * Query FW counter via ASO WQE.
2002  *
2003  * ASO query counter use _sync_ mode, means:
2004  * 1. each SQ issue one burst with several WQEs
2005  * 2. ask for CQE at last WQE
2006  * 3. busy poll CQ of each SQ's
2007  * 4. If all SQ's CQE are received then goto step 1, issue next burst
2008  *
2009  * @param[in] sh
2010  *   Pointer to shared device.
2011  * @param[in] cpool
2012  *   Pointer to counter pool.
2013  *
2014  * @return
2015  *   0 on success, -1 on failure.
2016  */
2017 int
2018 mlx5_aso_cnt_query(struct mlx5_dev_ctx_shared *sh,
2019 		   struct mlx5_hws_cnt_pool *cpool)
2020 {
2021 	uint32_t idx;
2022 	uint32_t num;
2023 	uint32_t cnt_num = mlx5_hws_cnt_pool_get_size(cpool) -
2024 		rte_ring_count(cpool->free_list);
2025 
2026 	for (idx = 0; idx < cpool->dcs_mng.batch_total; idx++) {
2027 		num = RTE_MIN(cnt_num, cpool->dcs_mng.dcs[idx].batch_sz);
2028 		mlx5_aso_cnt_query_one_dcs(sh, cpool, idx, num);
2029 		cnt_num -= num;
2030 		if (cnt_num == 0)
2031 			break;
2032 	}
2033 	return 0;
2034 }
2035