xref: /dpdk/drivers/net/mlx5/mlx5_flow_aso.c (revision daa02b5cddbb8e11b31d41e2bf7bb1ae64dcae2f)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 #include <mlx5_prm.h>
5 #include <rte_malloc.h>
6 #include <rte_cycles.h>
7 #include <rte_eal_paging.h>
8 
9 #include <mlx5_malloc.h>
10 #include <mlx5_common_os.h>
11 #include <mlx5_common_devx.h>
12 
13 #include "mlx5.h"
14 #include "mlx5_flow.h"
15 
16 /**
17  * Destroy Completion Queue used for ASO access.
18  *
19  * @param[in] cq
20  *   ASO CQ to destroy.
21  */
22 static void
23 mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq)
24 {
25 	if (cq->cq_obj.cq)
26 		mlx5_devx_cq_destroy(&cq->cq_obj);
27 	memset(cq, 0, sizeof(*cq));
28 }
29 
30 /**
31  * Create Completion Queue used for ASO access.
32  *
33  * @param[in] ctx
34  *   Context returned from mlx5 open_device() glue function.
35  * @param[in/out] cq
36  *   Pointer to CQ to create.
37  * @param[in] log_desc_n
38  *   Log of number of descriptors in queue.
39  * @param[in] socket
40  *   Socket to use for allocation.
41  * @param[in] uar_page_id
42  *   UAR page ID to use.
43  *
44  * @return
45  *   0 on success, a negative errno value otherwise and rte_errno is set.
46  */
47 static int
48 mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n,
49 		   int socket, int uar_page_id)
50 {
51 	struct mlx5_devx_cq_attr attr = {
52 		.uar_page_id = uar_page_id,
53 	};
54 
55 	cq->log_desc_n = log_desc_n;
56 	cq->cq_ci = 0;
57 	return mlx5_devx_cq_create(ctx, &cq->cq_obj, log_desc_n, &attr, socket);
58 }
59 
60 /**
61  * Free MR resources.
62  *
63  * @param[in] cdev
64  *   Pointer to the mlx5 common device.
65  * @param[in] mr
66  *   MR to free.
67  */
68 static void
69 mlx5_aso_dereg_mr(struct mlx5_common_device *cdev, struct mlx5_pmd_mr *mr)
70 {
71 	void *addr = mr->addr;
72 
73 	cdev->mr_scache.dereg_mr_cb(mr);
74 	mlx5_free(addr);
75 	memset(mr, 0, sizeof(*mr));
76 }
77 
78 /**
79  * Register Memory Region.
80  *
81  * @param[in] cdev
82  *   Pointer to the mlx5 common device.
83  * @param[in] length
84  *   Size of MR buffer.
85  * @param[in/out] mr
86  *   Pointer to MR to create.
87  * @param[in] socket
88  *   Socket to use for allocation.
89  *
90  * @return
91  *   0 on success, a negative errno value otherwise and rte_errno is set.
92  */
93 static int
94 mlx5_aso_reg_mr(struct mlx5_common_device *cdev, size_t length,
95 		struct mlx5_pmd_mr *mr, int socket)
96 {
97 
98 	int ret;
99 
100 	mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
101 			       socket);
102 	if (!mr->addr) {
103 		DRV_LOG(ERR, "Failed to create ASO bits mem for MR.");
104 		return -1;
105 	}
106 	ret = cdev->mr_scache.reg_mr_cb(cdev->pd, mr->addr, length, mr);
107 	if (ret) {
108 		DRV_LOG(ERR, "Failed to create direct Mkey.");
109 		mlx5_free(mr->addr);
110 		return -1;
111 	}
112 	return 0;
113 }
114 
115 /**
116  * Destroy Send Queue used for ASO access.
117  *
118  * @param[in] sq
119  *   ASO SQ to destroy.
120  */
121 static void
122 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
123 {
124 	mlx5_devx_sq_destroy(&sq->sq_obj);
125 	mlx5_aso_cq_destroy(&sq->cq);
126 	memset(sq, 0, sizeof(*sq));
127 }
128 
129 /**
130  * Initialize Send Queue used for ASO access.
131  *
132  * @param[in] sq
133  *   ASO SQ to initialize.
134  */
135 static void
136 mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq)
137 {
138 	volatile struct mlx5_aso_wqe *restrict wqe;
139 	int i;
140 	int size = 1 << sq->log_desc_n;
141 	uint64_t addr;
142 
143 	/* All the next fields state should stay constant. */
144 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
145 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
146 							  (sizeof(*wqe) >> 4));
147 		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
148 		addr = (uint64_t)((uint64_t *)sq->mr.addr + i *
149 					    MLX5_ASO_AGE_ACTIONS_PER_POOL / 64);
150 		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
151 		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
152 		wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
153 			(0u |
154 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
155 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
156 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
157 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
158 		wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
159 	}
160 }
161 
162 /**
163  * Initialize Send Queue used for ASO flow meter access.
164  *
165  * @param[in] sq
166  *   ASO SQ to initialize.
167  */
168 static void
169 mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
170 {
171 	volatile struct mlx5_aso_wqe *restrict wqe;
172 	int i;
173 	int size = 1 << sq->log_desc_n;
174 
175 	/* All the next fields state should stay constant. */
176 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
177 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
178 							  (sizeof(*wqe) >> 4));
179 		wqe->aso_cseg.operand_masks = RTE_BE32(0u |
180 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
181 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
182 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
183 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
184 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
185 							 MLX5_COMP_MODE_OFFSET);
186 	}
187 }
188 
189 /*
190  * Initialize Send Queue used for ASO connection tracking.
191  *
192  * @param[in] sq
193  *   ASO SQ to initialize.
194  */
195 static void
196 mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
197 {
198 	volatile struct mlx5_aso_wqe *restrict wqe;
199 	int i;
200 	int size = 1 << sq->log_desc_n;
201 	uint64_t addr;
202 
203 	/* All the next fields state should stay constant. */
204 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
205 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
206 							  (sizeof(*wqe) >> 4));
207 		/* One unique MR for the query data. */
208 		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
209 		/* Magic number 64 represents the length of a ASO CT obj. */
210 		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
211 		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
212 		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
213 		/*
214 		 * The values of operand_masks are different for modify
215 		 * and query.
216 		 * And data_mask may be different for each modification. In
217 		 * query, it could be zero and ignored.
218 		 * CQE generation is always needed, in order to decide when
219 		 * it is available to create the flow or read the data.
220 		 */
221 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
222 						   MLX5_COMP_MODE_OFFSET);
223 	}
224 }
225 
226 /**
227  * Create Send Queue used for ASO access.
228  *
229  * @param[in] ctx
230  *   Context returned from mlx5 open_device() glue function.
231  * @param[in/out] sq
232  *   Pointer to SQ to create.
233  * @param[in] socket
234  *   Socket to use for allocation.
235  * @param[in] uar
236  *   User Access Region object.
237  * @param[in] pdn
238  *   Protection Domain number to use.
239  * @param[in] log_desc_n
240  *   Log of number of descriptors in queue.
241  * @param[in] ts_format
242  *   timestamp format supported by the queue.
243  *
244  * @return
245  *   0 on success, a negative errno value otherwise and rte_errno is set.
246  */
247 static int
248 mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar,
249 		   uint32_t pdn, uint16_t log_desc_n, uint32_t ts_format)
250 {
251 	struct mlx5_devx_create_sq_attr attr = {
252 		.user_index = 0xFFFF,
253 		.wq_attr = (struct mlx5_devx_wq_attr){
254 			.pd = pdn,
255 			.uar_page = mlx5_os_get_devx_uar_page_id(uar),
256 		},
257 		.ts_format = mlx5_ts_format_conv(ts_format),
258 	};
259 	struct mlx5_devx_modify_sq_attr modify_attr = {
260 		.state = MLX5_SQC_STATE_RDY,
261 	};
262 	uint16_t log_wqbb_n;
263 	int ret;
264 
265 	if (mlx5_aso_cq_create(ctx, &sq->cq, log_desc_n, socket,
266 			       mlx5_os_get_devx_uar_page_id(uar)))
267 		goto error;
268 	sq->log_desc_n = log_desc_n;
269 	attr.cqn = sq->cq.cq_obj.cq->id;
270 	/* for mlx5_aso_wqe that is twice the size of mlx5_wqe */
271 	log_wqbb_n = log_desc_n + 1;
272 	ret = mlx5_devx_sq_create(ctx, &sq->sq_obj, log_wqbb_n, &attr, socket);
273 	if (ret) {
274 		DRV_LOG(ERR, "Can't create SQ object.");
275 		rte_errno = ENOMEM;
276 		goto error;
277 	}
278 	ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr);
279 	if (ret) {
280 		DRV_LOG(ERR, "Can't change SQ state to ready.");
281 		rte_errno = ENOMEM;
282 		goto error;
283 	}
284 	sq->pi = 0;
285 	sq->head = 0;
286 	sq->tail = 0;
287 	sq->sqn = sq->sq_obj.sq->id;
288 	sq->uar_addr = mlx5_os_get_devx_uar_reg_addr(uar);
289 	rte_spinlock_init(&sq->sqsl);
290 	return 0;
291 error:
292 	mlx5_aso_destroy_sq(sq);
293 	return -1;
294 }
295 
296 /**
297  * API to create and initialize Send Queue used for ASO access.
298  *
299  * @param[in] sh
300  *   Pointer to shared device context.
301  * @param[in] aso_opc_mod
302  *   Mode of ASO feature.
303  *
304  * @return
305  *   0 on success, a negative errno value otherwise and rte_errno is set.
306  */
307 int
308 mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
309 		    enum mlx5_access_aso_opc_mod aso_opc_mod)
310 {
311 	uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC;
312 	struct mlx5_common_device *cdev = sh->cdev;
313 
314 	switch (aso_opc_mod) {
315 	case ASO_OPC_MOD_FLOW_HIT:
316 		if (mlx5_aso_reg_mr(cdev, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
317 				    sq_desc_n, &sh->aso_age_mng->aso_sq.mr, 0))
318 			return -1;
319 		if (mlx5_aso_sq_create(cdev->ctx, &sh->aso_age_mng->aso_sq, 0,
320 				       sh->tx_uar, cdev->pdn,
321 				       MLX5_ASO_QUEUE_LOG_DESC,
322 				       cdev->config.hca_attr.sq_ts_format)) {
323 			mlx5_aso_dereg_mr(cdev, &sh->aso_age_mng->aso_sq.mr);
324 			return -1;
325 		}
326 		mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq);
327 		break;
328 	case ASO_OPC_MOD_POLICER:
329 		if (mlx5_aso_sq_create(cdev->ctx, &sh->mtrmng->pools_mng.sq, 0,
330 				       sh->tx_uar, cdev->pdn,
331 				       MLX5_ASO_QUEUE_LOG_DESC,
332 				       cdev->config.hca_attr.sq_ts_format))
333 			return -1;
334 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
335 		break;
336 	case ASO_OPC_MOD_CONNECTION_TRACKING:
337 		/* 64B per object for query. */
338 		if (mlx5_aso_reg_mr(cdev, 64 * sq_desc_n,
339 				    &sh->ct_mng->aso_sq.mr, 0))
340 			return -1;
341 		if (mlx5_aso_sq_create(cdev->ctx, &sh->ct_mng->aso_sq, 0,
342 				       sh->tx_uar, cdev->pdn,
343 				       MLX5_ASO_QUEUE_LOG_DESC,
344 				       cdev->config.hca_attr.sq_ts_format)) {
345 			mlx5_aso_dereg_mr(cdev, &sh->ct_mng->aso_sq.mr);
346 			return -1;
347 		}
348 		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
349 		break;
350 	default:
351 		DRV_LOG(ERR, "Unknown ASO operation mode");
352 		return -1;
353 	}
354 	return 0;
355 }
356 
357 /**
358  * API to destroy Send Queue used for ASO access.
359  *
360  * @param[in] sh
361  *   Pointer to shared device context.
362  * @param[in] aso_opc_mod
363  *   Mode of ASO feature.
364  */
365 void
366 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
367 		      enum mlx5_access_aso_opc_mod aso_opc_mod)
368 {
369 	struct mlx5_aso_sq *sq;
370 
371 	switch (aso_opc_mod) {
372 	case ASO_OPC_MOD_FLOW_HIT:
373 		mlx5_aso_dereg_mr(sh->cdev, &sh->aso_age_mng->aso_sq.mr);
374 		sq = &sh->aso_age_mng->aso_sq;
375 		break;
376 	case ASO_OPC_MOD_POLICER:
377 		sq = &sh->mtrmng->pools_mng.sq;
378 		break;
379 	case ASO_OPC_MOD_CONNECTION_TRACKING:
380 		mlx5_aso_dereg_mr(sh->cdev, &sh->ct_mng->aso_sq.mr);
381 		sq = &sh->ct_mng->aso_sq;
382 		break;
383 	default:
384 		DRV_LOG(ERR, "Unknown ASO operation mode");
385 		return;
386 	}
387 	mlx5_aso_destroy_sq(sq);
388 }
389 
390 /**
391  * Write a burst of WQEs to ASO SQ.
392  *
393  * @param[in] mng
394  *   ASO management data, contains the SQ.
395  * @param[in] n
396  *   Index of the last valid pool.
397  *
398  * @return
399  *   Number of WQEs in burst.
400  */
401 static uint16_t
402 mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
403 {
404 	volatile struct mlx5_aso_wqe *wqe;
405 	struct mlx5_aso_sq *sq = &mng->aso_sq;
406 	struct mlx5_aso_age_pool *pool;
407 	uint16_t size = 1 << sq->log_desc_n;
408 	uint16_t mask = size - 1;
409 	uint16_t max;
410 	uint16_t start_head = sq->head;
411 
412 	max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
413 	if (unlikely(!max))
414 		return 0;
415 	sq->elts[start_head & mask].burst_size = max;
416 	do {
417 		wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
418 		rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
419 		/* Fill next WQE. */
420 		rte_spinlock_lock(&mng->resize_sl);
421 		pool = mng->pools[sq->next];
422 		rte_spinlock_unlock(&mng->resize_sl);
423 		sq->elts[sq->head & mask].pool = pool;
424 		wqe->general_cseg.misc =
425 				rte_cpu_to_be_32(((struct mlx5_devx_obj *)
426 						 (pool->flow_hit_aso_obj))->id);
427 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
428 							 MLX5_COMP_MODE_OFFSET);
429 		wqe->general_cseg.opcode = rte_cpu_to_be_32
430 						(MLX5_OPCODE_ACCESS_ASO |
431 						 (ASO_OPC_MOD_FLOW_HIT <<
432 						  WQE_CSEG_OPC_MOD_OFFSET) |
433 						 (sq->pi <<
434 						  WQE_CSEG_WQE_INDEX_OFFSET));
435 		sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
436 		sq->head++;
437 		sq->next++;
438 		max--;
439 	} while (max);
440 	wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
441 							 MLX5_COMP_MODE_OFFSET);
442 	rte_io_wmb();
443 	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
444 	rte_wmb();
445 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
446 	rte_wmb();
447 	return sq->elts[start_head & mask].burst_size;
448 }
449 
450 /**
451  * Debug utility function. Dump contents of error CQE and WQE.
452  *
453  * @param[in] cqe
454  *   Error CQE to dump.
455  * @param[in] wqe
456  *   Error WQE to dump.
457  */
458 static void
459 mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
460 {
461 	int i;
462 
463 	DRV_LOG(ERR, "Error cqe:");
464 	for (i = 0; i < 16; i += 4)
465 		DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
466 			cqe[i + 2], cqe[i + 3]);
467 	DRV_LOG(ERR, "\nError wqe:");
468 	for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4)
469 		DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
470 			wqe[i + 2], wqe[i + 3]);
471 }
472 
473 /**
474  * Handle case of error CQE.
475  *
476  * @param[in] sq
477  *   ASO SQ to use.
478  */
479 static void
480 mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
481 {
482 	struct mlx5_aso_cq *cq = &sq->cq;
483 	uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
484 	volatile struct mlx5_err_cqe *cqe =
485 			(volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
486 
487 	cq->errors++;
488 	idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
489 	mlx5_aso_dump_err_objs((volatile uint32_t *)cqe,
490 			       (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]);
491 }
492 
493 /**
494  * Update ASO objects upon completion.
495  *
496  * @param[in] sh
497  *   Shared device context.
498  * @param[in] n
499  *   Number of completed ASO objects.
500  */
501 static void
502 mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
503 {
504 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
505 	struct mlx5_aso_sq *sq = &mng->aso_sq;
506 	struct mlx5_age_info *age_info;
507 	const uint16_t size = 1 << sq->log_desc_n;
508 	const uint16_t mask = size - 1;
509 	const uint64_t curr = MLX5_CURR_TIME_SEC;
510 	uint16_t expected = AGE_CANDIDATE;
511 	uint16_t i;
512 
513 	for (i = 0; i < n; ++i) {
514 		uint16_t idx = (sq->tail + i) & mask;
515 		struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
516 		uint64_t diff = curr - pool->time_of_last_age_check;
517 		uint64_t *addr = sq->mr.addr;
518 		int j;
519 
520 		addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64;
521 		pool->time_of_last_age_check = curr;
522 		for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) {
523 			struct mlx5_aso_age_action *act = &pool->actions[j];
524 			struct mlx5_age_param *ap = &act->age_params;
525 			uint8_t byte;
526 			uint8_t offset;
527 			uint8_t *u8addr;
528 			uint8_t hit;
529 
530 			if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) !=
531 					    AGE_CANDIDATE)
532 				continue;
533 			byte = 63 - (j / 8);
534 			offset = j % 8;
535 			u8addr = (uint8_t *)addr;
536 			hit = (u8addr[byte] >> offset) & 0x1;
537 			if (hit) {
538 				__atomic_store_n(&ap->sec_since_last_hit, 0,
539 						 __ATOMIC_RELAXED);
540 			} else {
541 				struct mlx5_priv *priv;
542 
543 				__atomic_fetch_add(&ap->sec_since_last_hit,
544 						   diff, __ATOMIC_RELAXED);
545 				/* If timeout passed add to aged-out list. */
546 				if (ap->sec_since_last_hit <= ap->timeout)
547 					continue;
548 				priv =
549 				rte_eth_devices[ap->port_id].data->dev_private;
550 				age_info = GET_PORT_AGE_INFO(priv);
551 				rte_spinlock_lock(&age_info->aged_sl);
552 				if (__atomic_compare_exchange_n(&ap->state,
553 								&expected,
554 								AGE_TMOUT,
555 								false,
556 							       __ATOMIC_RELAXED,
557 							    __ATOMIC_RELAXED)) {
558 					LIST_INSERT_HEAD(&age_info->aged_aso,
559 							 act, next);
560 					MLX5_AGE_SET(age_info,
561 						     MLX5_AGE_EVENT_NEW);
562 				}
563 				rte_spinlock_unlock(&age_info->aged_sl);
564 			}
565 		}
566 	}
567 	mlx5_age_event_prepare(sh);
568 }
569 
570 /**
571  * Handle completions from WQEs sent to ASO SQ.
572  *
573  * @param[in] sh
574  *   Shared device context.
575  *
576  * @return
577  *   Number of CQEs handled.
578  */
579 static uint16_t
580 mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
581 {
582 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
583 	struct mlx5_aso_sq *sq = &mng->aso_sq;
584 	struct mlx5_aso_cq *cq = &sq->cq;
585 	volatile struct mlx5_cqe *restrict cqe;
586 	const unsigned int cq_size = 1 << cq->log_desc_n;
587 	const unsigned int mask = cq_size - 1;
588 	uint32_t idx;
589 	uint32_t next_idx = cq->cq_ci & mask;
590 	const uint16_t max = (uint16_t)(sq->head - sq->tail);
591 	uint16_t i = 0;
592 	int ret;
593 	if (unlikely(!max))
594 		return 0;
595 	do {
596 		idx = next_idx;
597 		next_idx = (cq->cq_ci + 1) & mask;
598 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
599 		cqe = &cq->cq_obj.cqes[idx];
600 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
601 		/*
602 		 * Be sure owner read is done before any other cookie field or
603 		 * opaque field.
604 		 */
605 		rte_io_rmb();
606 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
607 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
608 				break;
609 			mlx5_aso_cqe_err_handle(sq);
610 		} else {
611 			i += sq->elts[(sq->tail + i) & mask].burst_size;
612 		}
613 		cq->cq_ci++;
614 	} while (1);
615 	if (likely(i)) {
616 		mlx5_aso_age_action_update(sh, i);
617 		sq->tail += i;
618 		rte_io_wmb();
619 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
620 	}
621 	return i;
622 }
623 
624 /**
625  * Periodically read CQEs and send WQEs to ASO SQ.
626  *
627  * @param[in] arg
628  *   Shared device context containing the ASO SQ.
629  */
630 static void
631 mlx5_flow_aso_alarm(void *arg)
632 {
633 	struct mlx5_dev_ctx_shared *sh = arg;
634 	struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq;
635 	uint32_t us = 100u;
636 	uint16_t n;
637 
638 	rte_spinlock_lock(&sh->aso_age_mng->resize_sl);
639 	n = sh->aso_age_mng->next;
640 	rte_spinlock_unlock(&sh->aso_age_mng->resize_sl);
641 	mlx5_aso_completion_handle(sh);
642 	if (sq->next == n) {
643 		/* End of loop: wait 1 second. */
644 		us = US_PER_S;
645 		sq->next = 0;
646 	}
647 	mlx5_aso_sq_enqueue_burst(sh->aso_age_mng, n);
648 	if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
649 		DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
650 }
651 
652 /**
653  * API to start ASO access using ASO SQ.
654  *
655  * @param[in] sh
656  *   Pointer to shared device context.
657  *
658  * @return
659  *   0 on success, a negative errno value otherwise and rte_errno is set.
660  */
661 int
662 mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh)
663 {
664 	if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) {
665 		DRV_LOG(ERR, "Cannot reinitialize ASO age alarm.");
666 		return -rte_errno;
667 	}
668 	return 0;
669 }
670 
671 /**
672  * API to stop ASO access using ASO SQ.
673  *
674  * @param[in] sh
675  *   Pointer to shared device context.
676  *
677  * @return
678  *   0 on success, a negative errno value otherwise and rte_errno is set.
679  */
680 int
681 mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
682 {
683 	int retries = 1024;
684 
685 	if (!sh->aso_age_mng->aso_sq.sq_obj.sq)
686 		return -EINVAL;
687 	rte_errno = 0;
688 	while (--retries) {
689 		rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh);
690 		if (rte_errno != EINPROGRESS)
691 			break;
692 		rte_pause();
693 	}
694 	return -rte_errno;
695 }
696 
697 static uint16_t
698 mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
699 		struct mlx5_aso_mtr *aso_mtr)
700 {
701 	volatile struct mlx5_aso_wqe *wqe = NULL;
702 	struct mlx5_flow_meter_info *fm = NULL;
703 	struct mlx5_flow_meter_profile *fmp;
704 	uint16_t size = 1 << sq->log_desc_n;
705 	uint16_t mask = size - 1;
706 	uint16_t res;
707 	uint32_t dseg_idx = 0;
708 	struct mlx5_aso_mtr_pool *pool = NULL;
709 
710 	rte_spinlock_lock(&sq->sqsl);
711 	res = size - (uint16_t)(sq->head - sq->tail);
712 	if (unlikely(!res)) {
713 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
714 		rte_spinlock_unlock(&sq->sqsl);
715 		return 0;
716 	}
717 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
718 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
719 	/* Fill next WQE. */
720 	fm = &aso_mtr->fm;
721 	sq->elts[sq->head & mask].mtr = aso_mtr;
722 	pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool,
723 			mtrs[aso_mtr->offset]);
724 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
725 			(aso_mtr->offset >> 1));
726 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
727 			(ASO_OPC_MOD_POLICER <<
728 			WQE_CSEG_OPC_MOD_OFFSET) |
729 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
730 	/* There are 2 meters in one ASO cache line. */
731 	dseg_idx = aso_mtr->offset & 0x1;
732 	wqe->aso_cseg.data_mask =
733 		RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx));
734 	if (fm->is_enable) {
735 		wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
736 			fm->profile->srtcm_prm.cbs_cir;
737 		wqe->aso_dseg.mtrs[dseg_idx].ebs_eir =
738 			fm->profile->srtcm_prm.ebs_eir;
739 	} else {
740 		wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
741 			RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL);
742 		wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0;
743 	}
744 	fmp = fm->profile;
745 	if (fmp->profile.packet_mode)
746 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
747 				RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
748 				(MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET) |
749 				(MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE));
750 	else
751 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
752 				RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
753 				(MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET));
754 	switch (fmp->profile.alg) {
755 	case RTE_MTR_SRTCM_RFC2697:
756 		/* Only needed for RFC2697. */
757 		if (fm->profile->srtcm_prm.ebs_eir)
758 			wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
759 					RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
760 		break;
761 	case RTE_MTR_TRTCM_RFC2698:
762 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
763 				RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
764 		break;
765 	case RTE_MTR_TRTCM_RFC4115:
766 	default:
767 		break;
768 	}
769 	/*
770 	 * Note:
771 	 * Due to software performance reason, the token fields will not be
772 	 * set when posting the WQE to ASO SQ. It will be filled by the HW
773 	 * automatically.
774 	 */
775 	sq->head++;
776 	sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
777 	rte_io_wmb();
778 	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
779 	rte_wmb();
780 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
781 	rte_wmb();
782 	rte_spinlock_unlock(&sq->sqsl);
783 	return 1;
784 }
785 
786 static void
787 mlx5_aso_mtrs_status_update(struct mlx5_aso_sq *sq, uint16_t aso_mtrs_nums)
788 {
789 	uint16_t size = 1 << sq->log_desc_n;
790 	uint16_t mask = size - 1;
791 	uint16_t i;
792 	struct mlx5_aso_mtr *aso_mtr = NULL;
793 	uint8_t exp_state = ASO_METER_WAIT;
794 
795 	for (i = 0; i < aso_mtrs_nums; ++i) {
796 		aso_mtr = sq->elts[(sq->tail + i) & mask].mtr;
797 		MLX5_ASSERT(aso_mtr);
798 		(void)__atomic_compare_exchange_n(&aso_mtr->state,
799 				&exp_state, ASO_METER_READY,
800 				false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
801 	}
802 }
803 
804 static void
805 mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq)
806 {
807 	struct mlx5_aso_cq *cq = &sq->cq;
808 	volatile struct mlx5_cqe *restrict cqe;
809 	const unsigned int cq_size = 1 << cq->log_desc_n;
810 	const unsigned int mask = cq_size - 1;
811 	uint32_t idx;
812 	uint32_t next_idx = cq->cq_ci & mask;
813 	uint16_t max;
814 	uint16_t n = 0;
815 	int ret;
816 
817 	rte_spinlock_lock(&sq->sqsl);
818 	max = (uint16_t)(sq->head - sq->tail);
819 	if (unlikely(!max)) {
820 		rte_spinlock_unlock(&sq->sqsl);
821 		return;
822 	}
823 	do {
824 		idx = next_idx;
825 		next_idx = (cq->cq_ci + 1) & mask;
826 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
827 		cqe = &cq->cq_obj.cqes[idx];
828 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
829 		/*
830 		 * Be sure owner read is done before any other cookie field or
831 		 * opaque field.
832 		 */
833 		rte_io_rmb();
834 		if (ret != MLX5_CQE_STATUS_SW_OWN) {
835 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
836 				break;
837 			mlx5_aso_cqe_err_handle(sq);
838 		} else {
839 			n++;
840 		}
841 		cq->cq_ci++;
842 	} while (1);
843 	if (likely(n)) {
844 		mlx5_aso_mtrs_status_update(sq, n);
845 		sq->tail += n;
846 		rte_io_wmb();
847 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
848 	}
849 	rte_spinlock_unlock(&sq->sqsl);
850 }
851 
852 /**
853  * Update meter parameter by send WQE.
854  *
855  * @param[in] dev
856  *   Pointer to Ethernet device.
857  * @param[in] priv
858  *   Pointer to mlx5 private data structure.
859  * @param[in] fm
860  *   Pointer to flow meter to be modified.
861  *
862  * @return
863  *   0 on success, a negative errno value otherwise and rte_errno is set.
864  */
865 int
866 mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
867 			struct mlx5_aso_mtr *mtr)
868 {
869 	struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
870 	uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
871 
872 	do {
873 		mlx5_aso_mtr_completion_handle(sq);
874 		if (mlx5_aso_mtr_sq_enqueue_single(sq, mtr))
875 			return 0;
876 		/* Waiting for wqe resource. */
877 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
878 	} while (--poll_wqe_times);
879 	DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d",
880 			mtr->offset);
881 	return -1;
882 }
883 
884 /**
885  * Wait for meter to be ready.
886  *
887  * @param[in] dev
888  *   Pointer to Ethernet device.
889  * @param[in] priv
890  *   Pointer to mlx5 private data structure.
891  * @param[in] fm
892  *   Pointer to flow meter to be modified.
893  *
894  * @return
895  *   0 on success, a negative errno value otherwise and rte_errno is set.
896  */
897 int
898 mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
899 			struct mlx5_aso_mtr *mtr)
900 {
901 	struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
902 	uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
903 
904 	if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
905 					    ASO_METER_READY)
906 		return 0;
907 	do {
908 		mlx5_aso_mtr_completion_handle(sq);
909 		if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
910 					    ASO_METER_READY)
911 			return 0;
912 		/* Waiting for CQE ready. */
913 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
914 	} while (--poll_cqe_times);
915 	DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d",
916 			mtr->offset);
917 	return -1;
918 }
919 
920 /*
921  * Post a WQE to the ASO CT SQ to modify the context.
922  *
923  * @param[in] mng
924  *   Pointer to the CT pools management structure.
925  * @param[in] ct
926  *   Pointer to the generic CT structure related to the context.
927  * @param[in] profile
928  *   Pointer to configuration profile.
929  *
930  * @return
931  *   1 on success (WQE number), 0 on failure.
932  */
933 static uint16_t
934 mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
935 			      struct mlx5_aso_ct_action *ct,
936 			      const struct rte_flow_action_conntrack *profile)
937 {
938 	volatile struct mlx5_aso_wqe *wqe = NULL;
939 	struct mlx5_aso_sq *sq = &mng->aso_sq;
940 	uint16_t size = 1 << sq->log_desc_n;
941 	uint16_t mask = size - 1;
942 	uint16_t res;
943 	struct mlx5_aso_ct_pool *pool;
944 	void *desg;
945 	void *orig_dir;
946 	void *reply_dir;
947 
948 	rte_spinlock_lock(&sq->sqsl);
949 	/* Prevent other threads to update the index. */
950 	res = size - (uint16_t)(sq->head - sq->tail);
951 	if (unlikely(!res)) {
952 		rte_spinlock_unlock(&sq->sqsl);
953 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
954 		return 0;
955 	}
956 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
957 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
958 	/* Fill next WQE. */
959 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
960 	sq->elts[sq->head & mask].ct = ct;
961 	sq->elts[sq->head & mask].query_data = NULL;
962 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
963 	/* Each WQE will have a single CT object. */
964 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
965 						  ct->offset);
966 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
967 			(ASO_OPC_MOD_CONNECTION_TRACKING <<
968 			 WQE_CSEG_OPC_MOD_OFFSET) |
969 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
970 	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
971 			(0u |
972 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
973 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
974 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
975 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
976 	wqe->aso_cseg.data_mask = UINT64_MAX;
977 	/* To make compiler happy. */
978 	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
979 	MLX5_SET(conn_track_aso, desg, valid, 1);
980 	MLX5_SET(conn_track_aso, desg, state, profile->state);
981 	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
982 	MLX5_SET(conn_track_aso, desg, connection_assured,
983 		 profile->live_connection);
984 	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
985 	MLX5_SET(conn_track_aso, desg, challenged_acked,
986 		 profile->challenge_ack_passed);
987 	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
988 	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
989 	MLX5_SET(conn_track_aso, desg, max_ack_window,
990 		 profile->max_ack_window);
991 	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
992 	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
993 	MLX5_SET(conn_track_aso, desg, retranmission_limit,
994 		 profile->retransmission_limit);
995 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
996 		 profile->reply_dir.scale);
997 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
998 		 profile->reply_dir.close_initiated);
999 	/* Both directions will use the same liberal mode. */
1000 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
1001 		 profile->liberal_mode);
1002 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
1003 		 profile->reply_dir.data_unacked);
1004 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
1005 		 profile->reply_dir.last_ack_seen);
1006 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
1007 		 profile->original_dir.scale);
1008 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
1009 		 profile->original_dir.close_initiated);
1010 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
1011 		 profile->liberal_mode);
1012 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
1013 		 profile->original_dir.data_unacked);
1014 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
1015 		 profile->original_dir.last_ack_seen);
1016 	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
1017 	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
1018 	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
1019 	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
1020 	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
1021 	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
1022 	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
1023 	MLX5_SET(tcp_window_params, orig_dir, sent_end,
1024 		 profile->original_dir.sent_end);
1025 	MLX5_SET(tcp_window_params, orig_dir, reply_end,
1026 		 profile->original_dir.reply_end);
1027 	MLX5_SET(tcp_window_params, orig_dir, max_win,
1028 		 profile->original_dir.max_win);
1029 	MLX5_SET(tcp_window_params, orig_dir, max_ack,
1030 		 profile->original_dir.max_ack);
1031 	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
1032 	MLX5_SET(tcp_window_params, reply_dir, sent_end,
1033 		 profile->reply_dir.sent_end);
1034 	MLX5_SET(tcp_window_params, reply_dir, reply_end,
1035 		 profile->reply_dir.reply_end);
1036 	MLX5_SET(tcp_window_params, reply_dir, max_win,
1037 		 profile->reply_dir.max_win);
1038 	MLX5_SET(tcp_window_params, reply_dir, max_ack,
1039 		 profile->reply_dir.max_ack);
1040 	sq->head++;
1041 	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
1042 	rte_io_wmb();
1043 	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
1044 	rte_wmb();
1045 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
1046 	rte_wmb();
1047 	rte_spinlock_unlock(&sq->sqsl);
1048 	return 1;
1049 }
1050 
1051 /*
1052  * Update the status field of CTs to indicate ready to be used by flows.
1053  * A continuous number of CTs since last update.
1054  *
1055  * @param[in] sq
1056  *   Pointer to ASO CT SQ.
1057  * @param[in] num
1058  *   Number of CT structures to be updated.
1059  *
1060  * @return
1061  *   0 on success, a negative value.
1062  */
1063 static void
1064 mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
1065 {
1066 	uint16_t size = 1 << sq->log_desc_n;
1067 	uint16_t mask = size - 1;
1068 	uint16_t i;
1069 	struct mlx5_aso_ct_action *ct = NULL;
1070 	uint16_t idx;
1071 
1072 	for (i = 0; i < num; i++) {
1073 		idx = (uint16_t)((sq->tail + i) & mask);
1074 		ct = sq->elts[idx].ct;
1075 		MLX5_ASSERT(ct);
1076 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
1077 		if (sq->elts[idx].query_data)
1078 			rte_memcpy(sq->elts[idx].query_data,
1079 				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
1080 				   64);
1081 	}
1082 }
1083 
1084 /*
1085  * Post a WQE to the ASO CT SQ to query the current context.
1086  *
1087  * @param[in] mng
1088  *   Pointer to the CT pools management structure.
1089  * @param[in] ct
1090  *   Pointer to the generic CT structure related to the context.
1091  * @param[in] data
1092  *   Pointer to data area to be filled.
1093  *
1094  * @return
1095  *   1 on success (WQE number), 0 on failure.
1096  */
1097 static int
1098 mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
1099 			    struct mlx5_aso_ct_action *ct, char *data)
1100 {
1101 	volatile struct mlx5_aso_wqe *wqe = NULL;
1102 	struct mlx5_aso_sq *sq = &mng->aso_sq;
1103 	uint16_t size = 1 << sq->log_desc_n;
1104 	uint16_t mask = size - 1;
1105 	uint16_t res;
1106 	uint16_t wqe_idx;
1107 	struct mlx5_aso_ct_pool *pool;
1108 	enum mlx5_aso_ct_state state =
1109 				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1110 
1111 	if (state == ASO_CONNTRACK_FREE) {
1112 		DRV_LOG(ERR, "Fail: No context to query");
1113 		return -1;
1114 	} else if (state == ASO_CONNTRACK_WAIT) {
1115 		return 0;
1116 	}
1117 	rte_spinlock_lock(&sq->sqsl);
1118 	res = size - (uint16_t)(sq->head - sq->tail);
1119 	if (unlikely(!res)) {
1120 		rte_spinlock_unlock(&sq->sqsl);
1121 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
1122 		return 0;
1123 	}
1124 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
1125 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1126 	/* Confirm the location and address of the prefetch instruction. */
1127 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1128 	/* Fill next WQE. */
1129 	wqe_idx = sq->head & mask;
1130 	sq->elts[wqe_idx].ct = ct;
1131 	sq->elts[wqe_idx].query_data = data;
1132 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1133 	/* Each WQE will have a single CT object. */
1134 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
1135 						  ct->offset);
1136 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
1137 			(ASO_OPC_MOD_CONNECTION_TRACKING <<
1138 			 WQE_CSEG_OPC_MOD_OFFSET) |
1139 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
1140 	/*
1141 	 * There is no write request is required.
1142 	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
1143 	 * "BYTEWISE_64BYTE" is needed for a whole context.
1144 	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
1145 	 * "data_mask" is ignored.
1146 	 * Buffer address was already filled during initialization.
1147 	 */
1148 	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
1149 					ASO_CSEG_DATA_MASK_MODE_OFFSET);
1150 	wqe->aso_cseg.data_mask = 0;
1151 	sq->head++;
1152 	/*
1153 	 * Each WQE contains 2 WQEBB's, even though
1154 	 * data segment is not used in this case.
1155 	 */
1156 	sq->pi += 2;
1157 	rte_io_wmb();
1158 	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
1159 	rte_wmb();
1160 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
1161 	rte_wmb();
1162 	rte_spinlock_unlock(&sq->sqsl);
1163 	return 1;
1164 }
1165 
1166 /*
1167  * Handle completions from WQEs sent to ASO CT.
1168  *
1169  * @param[in] mng
1170  *   Pointer to the CT pools management structure.
1171  */
1172 static void
1173 mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
1174 {
1175 	struct mlx5_aso_sq *sq = &mng->aso_sq;
1176 	struct mlx5_aso_cq *cq = &sq->cq;
1177 	volatile struct mlx5_cqe *restrict cqe;
1178 	const uint32_t cq_size = 1 << cq->log_desc_n;
1179 	const uint32_t mask = cq_size - 1;
1180 	uint32_t idx;
1181 	uint32_t next_idx;
1182 	uint16_t max;
1183 	uint16_t n = 0;
1184 	int ret;
1185 
1186 	rte_spinlock_lock(&sq->sqsl);
1187 	max = (uint16_t)(sq->head - sq->tail);
1188 	if (unlikely(!max)) {
1189 		rte_spinlock_unlock(&sq->sqsl);
1190 		return;
1191 	}
1192 	next_idx = cq->cq_ci & mask;
1193 	do {
1194 		idx = next_idx;
1195 		next_idx = (cq->cq_ci + 1) & mask;
1196 		/* Need to confirm the position of the prefetch. */
1197 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
1198 		cqe = &cq->cq_obj.cqes[idx];
1199 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
1200 		/*
1201 		 * Be sure owner read is done before any other cookie field or
1202 		 * opaque field.
1203 		 */
1204 		rte_io_rmb();
1205 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1206 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
1207 				break;
1208 			mlx5_aso_cqe_err_handle(sq);
1209 		} else {
1210 			n++;
1211 		}
1212 		cq->cq_ci++;
1213 	} while (1);
1214 	if (likely(n)) {
1215 		mlx5_aso_ct_status_update(sq, n);
1216 		sq->tail += n;
1217 		rte_io_wmb();
1218 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
1219 	}
1220 	rte_spinlock_unlock(&sq->sqsl);
1221 }
1222 
1223 /*
1224  * Update connection tracking ASO context by sending WQE.
1225  *
1226  * @param[in] sh
1227  *   Pointer to mlx5_dev_ctx_shared object.
1228  * @param[in] ct
1229  *   Pointer to connection tracking offload object.
1230  * @param[in] profile
1231  *   Pointer to connection tracking TCP parameter.
1232  *
1233  * @return
1234  *   0 on success, -1 on failure.
1235  */
1236 int
1237 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
1238 			  struct mlx5_aso_ct_action *ct,
1239 			  const struct rte_flow_action_conntrack *profile)
1240 {
1241 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1242 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1243 	struct mlx5_aso_ct_pool *pool;
1244 
1245 	MLX5_ASSERT(ct);
1246 	do {
1247 		mlx5_aso_ct_completion_handle(mng);
1248 		if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
1249 			return 0;
1250 		/* Waiting for wqe resource. */
1251 		rte_delay_us_sleep(10u);
1252 	} while (--poll_wqe_times);
1253 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1254 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1255 		ct->offset, pool->index);
1256 	return -1;
1257 }
1258 
1259 /*
1260  * The routine is used to wait for WQE completion to continue with queried data.
1261  *
1262  * @param[in] sh
1263  *   Pointer to mlx5_dev_ctx_shared object.
1264  * @param[in] ct
1265  *   Pointer to connection tracking offload object.
1266  *
1267  * @return
1268  *   0 on success, -1 on failure.
1269  */
1270 int
1271 mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
1272 		       struct mlx5_aso_ct_action *ct)
1273 {
1274 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1275 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1276 	struct mlx5_aso_ct_pool *pool;
1277 
1278 	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1279 	    ASO_CONNTRACK_READY)
1280 		return 0;
1281 	do {
1282 		mlx5_aso_ct_completion_handle(mng);
1283 		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1284 		    ASO_CONNTRACK_READY)
1285 			return 0;
1286 		/* Waiting for CQE ready, consider should block or sleep. */
1287 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1288 	} while (--poll_cqe_times);
1289 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1290 	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
1291 		ct->offset, pool->index);
1292 	return -1;
1293 }
1294 
1295 /*
1296  * Convert the hardware conntrack data format into the profile.
1297  *
1298  * @param[in] profile
1299  *   Pointer to conntrack profile to be filled after query.
1300  * @param[in] wdata
1301  *   Pointer to data fetched from hardware.
1302  */
1303 static inline void
1304 mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
1305 			char *wdata)
1306 {
1307 	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
1308 	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
1309 
1310 	/* MLX5_GET16 should be taken into consideration. */
1311 	profile->state = (enum rte_flow_conntrack_state)
1312 			 MLX5_GET(conn_track_aso, wdata, state);
1313 	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
1314 	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
1315 					  sack_permitted);
1316 	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
1317 					    connection_assured);
1318 	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
1319 						 challenged_acked);
1320 	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
1321 					   max_ack_window);
1322 	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
1323 						 retranmission_limit);
1324 	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
1325 	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
1326 	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
1327 			      MLX5_GET(conn_track_aso, wdata, last_index);
1328 	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
1329 	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
1330 	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
1331 	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
1332 				reply_direction_tcp_liberal_enabled) |
1333 				MLX5_GET(conn_track_aso, wdata,
1334 				original_direction_tcp_liberal_enabled);
1335 	/* No liberal in the RTE structure profile. */
1336 	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
1337 					    reply_direction_tcp_scale);
1338 	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1339 					reply_direction_tcp_close_initiated);
1340 	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1341 					reply_direction_tcp_data_unacked);
1342 	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1343 					reply_direction_tcp_max_ack);
1344 	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
1345 					       r_dir, sent_end);
1346 	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
1347 						r_dir, reply_end);
1348 	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
1349 					      r_dir, max_win);
1350 	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
1351 					      r_dir, max_ack);
1352 	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
1353 					       original_direction_tcp_scale);
1354 	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1355 					original_direction_tcp_close_initiated);
1356 	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1357 					original_direction_tcp_data_unacked);
1358 	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1359 					original_direction_tcp_max_ack);
1360 	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
1361 						  o_dir, sent_end);
1362 	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
1363 						   o_dir, reply_end);
1364 	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
1365 						 o_dir, max_win);
1366 	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
1367 						 o_dir, max_ack);
1368 }
1369 
1370 /*
1371  * Query connection tracking information parameter by send WQE.
1372  *
1373  * @param[in] dev
1374  *   Pointer to Ethernet device.
1375  * @param[in] ct
1376  *   Pointer to connection tracking offload object.
1377  * @param[out] profile
1378  *   Pointer to connection tracking TCP information.
1379  *
1380  * @return
1381  *   0 on success, -1 on failure.
1382  */
1383 int
1384 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
1385 			 struct mlx5_aso_ct_action *ct,
1386 			 struct rte_flow_action_conntrack *profile)
1387 {
1388 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1389 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1390 	struct mlx5_aso_ct_pool *pool;
1391 	char out_data[64 * 2];
1392 	int ret;
1393 
1394 	MLX5_ASSERT(ct);
1395 	do {
1396 		mlx5_aso_ct_completion_handle(mng);
1397 		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
1398 		if (ret < 0)
1399 			return ret;
1400 		else if (ret > 0)
1401 			goto data_handle;
1402 		/* Waiting for wqe resource or state. */
1403 		else
1404 			rte_delay_us_sleep(10u);
1405 	} while (--poll_wqe_times);
1406 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1407 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1408 		ct->offset, pool->index);
1409 	return -1;
1410 data_handle:
1411 	ret = mlx5_aso_ct_wait_ready(sh, ct);
1412 	if (!ret)
1413 		mlx5_aso_ct_obj_analyze(profile, out_data);
1414 	return ret;
1415 }
1416 
1417 /*
1418  * Make sure the conntrack context is synchronized with hardware before
1419  * creating a flow rule that uses it.
1420  *
1421  * @param[in] sh
1422  *   Pointer to shared device context.
1423  * @param[in] ct
1424  *   Pointer to connection tracking offload object.
1425  *
1426  * @return
1427  *   0 on success, a negative errno value otherwise and rte_errno is set.
1428  */
1429 int
1430 mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
1431 		      struct mlx5_aso_ct_action *ct)
1432 {
1433 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1434 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1435 	enum mlx5_aso_ct_state state =
1436 				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1437 
1438 	if (state == ASO_CONNTRACK_FREE) {
1439 		rte_errno = ENXIO;
1440 		return -rte_errno;
1441 	} else if (state == ASO_CONNTRACK_READY ||
1442 		   state == ASO_CONNTRACK_QUERY) {
1443 		return 0;
1444 	}
1445 	do {
1446 		mlx5_aso_ct_completion_handle(mng);
1447 		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1448 		if (state == ASO_CONNTRACK_READY ||
1449 		    state == ASO_CONNTRACK_QUERY)
1450 			return 0;
1451 		/* Waiting for CQE ready, consider should block or sleep. */
1452 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1453 	} while (--poll_cqe_times);
1454 	rte_errno = EBUSY;
1455 	return -rte_errno;
1456 }
1457