xref: /dpdk/drivers/net/mlx5/mlx5_flow_aso.c (revision d38febb08d57fec29fed27a2d12a507fc6fcdfa1)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2020 Mellanox Technologies, Ltd
3  */
4 #include <mlx5_prm.h>
5 #include <rte_malloc.h>
6 #include <rte_cycles.h>
7 #include <rte_eal_paging.h>
8 
9 #include <mlx5_malloc.h>
10 #include <mlx5_common_os.h>
11 #include <mlx5_common_devx.h>
12 
13 #include "mlx5.h"
14 #include "mlx5_flow.h"
15 
16 /**
17  * Destroy Completion Queue used for ASO access.
18  *
19  * @param[in] cq
20  *   ASO CQ to destroy.
21  */
22 static void
23 mlx5_aso_cq_destroy(struct mlx5_aso_cq *cq)
24 {
25 	if (cq->cq_obj.cq)
26 		mlx5_devx_cq_destroy(&cq->cq_obj);
27 	memset(cq, 0, sizeof(*cq));
28 }
29 
30 /**
31  * Create Completion Queue used for ASO access.
32  *
33  * @param[in] ctx
34  *   Context returned from mlx5 open_device() glue function.
35  * @param[in/out] cq
36  *   Pointer to CQ to create.
37  * @param[in] log_desc_n
38  *   Log of number of descriptors in queue.
39  * @param[in] socket
40  *   Socket to use for allocation.
41  * @param[in] uar_page_id
42  *   UAR page ID to use.
43  *
44  * @return
45  *   0 on success, a negative errno value otherwise and rte_errno is set.
46  */
47 static int
48 mlx5_aso_cq_create(void *ctx, struct mlx5_aso_cq *cq, uint16_t log_desc_n,
49 		   int socket, int uar_page_id)
50 {
51 	struct mlx5_devx_cq_attr attr = {
52 		.uar_page_id = uar_page_id,
53 	};
54 
55 	cq->log_desc_n = log_desc_n;
56 	cq->cq_ci = 0;
57 	return mlx5_devx_cq_create(ctx, &cq->cq_obj, log_desc_n, &attr, socket);
58 }
59 
60 /**
61  * Free MR resources.
62  *
63  * @param[in] sh
64  *   Pointer to shared device context.
65  * @param[in] mr
66  *   MR to free.
67  */
68 static void
69 mlx5_aso_dereg_mr(struct mlx5_dev_ctx_shared *sh, struct mlx5_pmd_mr *mr)
70 {
71 	void *addr = mr->addr;
72 
73 	sh->share_cache.dereg_mr_cb(mr);
74 	mlx5_free(addr);
75 	memset(mr, 0, sizeof(*mr));
76 }
77 
78 /**
79  * Register Memory Region.
80  *
81  * @param[in] sh
82  *   Pointer to shared device context.
83  * @param[in] length
84  *   Size of MR buffer.
85  * @param[in/out] mr
86  *   Pointer to MR to create.
87  * @param[in] socket
88  *   Socket to use for allocation.
89  *
90  * @return
91  *   0 on success, a negative errno value otherwise and rte_errno is set.
92  */
93 static int
94 mlx5_aso_reg_mr(struct mlx5_dev_ctx_shared *sh, size_t length,
95 		struct mlx5_pmd_mr *mr, int socket)
96 {
97 
98 	int ret;
99 
100 	mr->addr = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, length, 4096,
101 			       socket);
102 	if (!mr->addr) {
103 		DRV_LOG(ERR, "Failed to create ASO bits mem for MR.");
104 		return -1;
105 	}
106 	ret = sh->share_cache.reg_mr_cb(sh->pd, mr->addr, length, mr);
107 	if (ret) {
108 		DRV_LOG(ERR, "Failed to create direct Mkey.");
109 		mlx5_free(mr->addr);
110 		return -1;
111 	}
112 	return 0;
113 }
114 
115 /**
116  * Destroy Send Queue used for ASO access.
117  *
118  * @param[in] sq
119  *   ASO SQ to destroy.
120  */
121 static void
122 mlx5_aso_destroy_sq(struct mlx5_aso_sq *sq)
123 {
124 	mlx5_devx_sq_destroy(&sq->sq_obj);
125 	mlx5_aso_cq_destroy(&sq->cq);
126 	memset(sq, 0, sizeof(*sq));
127 }
128 
129 /**
130  * Initialize Send Queue used for ASO access.
131  *
132  * @param[in] sq
133  *   ASO SQ to initialize.
134  */
135 static void
136 mlx5_aso_age_init_sq(struct mlx5_aso_sq *sq)
137 {
138 	volatile struct mlx5_aso_wqe *restrict wqe;
139 	int i;
140 	int size = 1 << sq->log_desc_n;
141 	uint64_t addr;
142 
143 	/* All the next fields state should stay constant. */
144 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
145 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
146 							  (sizeof(*wqe) >> 4));
147 		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
148 		addr = (uint64_t)((uint64_t *)sq->mr.addr + i *
149 					    MLX5_ASO_AGE_ACTIONS_PER_POOL / 64);
150 		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
151 		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
152 		wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
153 			(0u |
154 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
155 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
156 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
157 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
158 		wqe->aso_cseg.data_mask = RTE_BE64(UINT64_MAX);
159 	}
160 }
161 
162 /**
163  * Initialize Send Queue used for ASO flow meter access.
164  *
165  * @param[in] sq
166  *   ASO SQ to initialize.
167  */
168 static void
169 mlx5_aso_mtr_init_sq(struct mlx5_aso_sq *sq)
170 {
171 	volatile struct mlx5_aso_wqe *restrict wqe;
172 	int i;
173 	int size = 1 << sq->log_desc_n;
174 
175 	/* All the next fields state should stay constant. */
176 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
177 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
178 							  (sizeof(*wqe) >> 4));
179 		wqe->aso_cseg.operand_masks = RTE_BE32(0u |
180 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
181 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
182 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
183 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
184 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
185 							 MLX5_COMP_MODE_OFFSET);
186 	}
187 }
188 
189 /*
190  * Initialize Send Queue used for ASO connection tracking.
191  *
192  * @param[in] sq
193  *   ASO SQ to initialize.
194  */
195 static void
196 mlx5_aso_ct_init_sq(struct mlx5_aso_sq *sq)
197 {
198 	volatile struct mlx5_aso_wqe *restrict wqe;
199 	int i;
200 	int size = 1 << sq->log_desc_n;
201 	uint64_t addr;
202 
203 	/* All the next fields state should stay constant. */
204 	for (i = 0, wqe = &sq->sq_obj.aso_wqes[0]; i < size; ++i, ++wqe) {
205 		wqe->general_cseg.sq_ds = rte_cpu_to_be_32((sq->sqn << 8) |
206 							  (sizeof(*wqe) >> 4));
207 		/* One unique MR for the query data. */
208 		wqe->aso_cseg.lkey = rte_cpu_to_be_32(sq->mr.lkey);
209 		/* Magic number 64 represents the length of a ASO CT obj. */
210 		addr = (uint64_t)((uintptr_t)sq->mr.addr + i * 64);
211 		wqe->aso_cseg.va_h = rte_cpu_to_be_32((uint32_t)(addr >> 32));
212 		wqe->aso_cseg.va_l_r = rte_cpu_to_be_32((uint32_t)addr | 1u);
213 		/*
214 		 * The values of operand_masks are different for modify
215 		 * and query.
216 		 * And data_mask may be different for each modification. In
217 		 * query, it could be zero and ignored.
218 		 * CQE generation is always needed, in order to decide when
219 		 * it is available to create the flow or read the data.
220 		 */
221 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
222 						   MLX5_COMP_MODE_OFFSET);
223 	}
224 }
225 
226 /**
227  * Create Send Queue used for ASO access.
228  *
229  * @param[in] ctx
230  *   Context returned from mlx5 open_device() glue function.
231  * @param[in/out] sq
232  *   Pointer to SQ to create.
233  * @param[in] socket
234  *   Socket to use for allocation.
235  * @param[in] uar
236  *   User Access Region object.
237  * @param[in] pdn
238  *   Protection Domain number to use.
239  * @param[in] log_desc_n
240  *   Log of number of descriptors in queue.
241  * @param[in] ts_format
242  *   timestamp format supported by the queue.
243  *
244  * @return
245  *   0 on success, a negative errno value otherwise and rte_errno is set.
246  */
247 static int
248 mlx5_aso_sq_create(void *ctx, struct mlx5_aso_sq *sq, int socket, void *uar,
249 		   uint32_t pdn, uint16_t log_desc_n, uint32_t ts_format)
250 {
251 	struct mlx5_devx_create_sq_attr attr = {
252 		.user_index = 0xFFFF,
253 		.wq_attr = (struct mlx5_devx_wq_attr){
254 			.pd = pdn,
255 			.uar_page = mlx5_os_get_devx_uar_page_id(uar),
256 		},
257 		.ts_format = mlx5_ts_format_conv(ts_format),
258 	};
259 	struct mlx5_devx_modify_sq_attr modify_attr = {
260 		.state = MLX5_SQC_STATE_RDY,
261 	};
262 	uint16_t log_wqbb_n;
263 	int ret;
264 
265 	if (mlx5_aso_cq_create(ctx, &sq->cq, log_desc_n, socket,
266 			       mlx5_os_get_devx_uar_page_id(uar)))
267 		goto error;
268 	sq->log_desc_n = log_desc_n;
269 	attr.cqn = sq->cq.cq_obj.cq->id;
270 	/* for mlx5_aso_wqe that is twice the size of mlx5_wqe */
271 	log_wqbb_n = log_desc_n + 1;
272 	ret = mlx5_devx_sq_create(ctx, &sq->sq_obj, log_wqbb_n, &attr, socket);
273 	if (ret) {
274 		DRV_LOG(ERR, "Can't create SQ object.");
275 		rte_errno = ENOMEM;
276 		goto error;
277 	}
278 	ret = mlx5_devx_cmd_modify_sq(sq->sq_obj.sq, &modify_attr);
279 	if (ret) {
280 		DRV_LOG(ERR, "Can't change SQ state to ready.");
281 		rte_errno = ENOMEM;
282 		goto error;
283 	}
284 	sq->pi = 0;
285 	sq->head = 0;
286 	sq->tail = 0;
287 	sq->sqn = sq->sq_obj.sq->id;
288 	sq->uar_addr = mlx5_os_get_devx_uar_reg_addr(uar);
289 	rte_spinlock_init(&sq->sqsl);
290 	return 0;
291 error:
292 	mlx5_aso_destroy_sq(sq);
293 	return -1;
294 }
295 
296 /**
297  * API to create and initialize Send Queue used for ASO access.
298  *
299  * @param[in] sh
300  *   Pointer to shared device context.
301  * @param[in] aso_opc_mod
302  *   Mode of ASO feature.
303  *
304  * @return
305  *   0 on success, a negative errno value otherwise and rte_errno is set.
306  */
307 int
308 mlx5_aso_queue_init(struct mlx5_dev_ctx_shared *sh,
309 		    enum mlx5_access_aso_opc_mod aso_opc_mod)
310 {
311 	uint32_t sq_desc_n = 1 << MLX5_ASO_QUEUE_LOG_DESC;
312 
313 	switch (aso_opc_mod) {
314 	case ASO_OPC_MOD_FLOW_HIT:
315 		if (mlx5_aso_reg_mr(sh, (MLX5_ASO_AGE_ACTIONS_PER_POOL / 8) *
316 				    sq_desc_n, &sh->aso_age_mng->aso_sq.mr, 0))
317 			return -1;
318 		if (mlx5_aso_sq_create(sh->ctx, &sh->aso_age_mng->aso_sq, 0,
319 				  sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
320 				  sh->sq_ts_format)) {
321 			mlx5_aso_dereg_mr(sh, &sh->aso_age_mng->aso_sq.mr);
322 			return -1;
323 		}
324 		mlx5_aso_age_init_sq(&sh->aso_age_mng->aso_sq);
325 		break;
326 	case ASO_OPC_MOD_POLICER:
327 		if (mlx5_aso_sq_create(sh->ctx, &sh->mtrmng->pools_mng.sq, 0,
328 				  sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
329 				  sh->sq_ts_format))
330 			return -1;
331 		mlx5_aso_mtr_init_sq(&sh->mtrmng->pools_mng.sq);
332 		break;
333 	case ASO_OPC_MOD_CONNECTION_TRACKING:
334 		/* 64B per object for query. */
335 		if (mlx5_aso_reg_mr(sh, 64 * sq_desc_n,
336 				    &sh->ct_mng->aso_sq.mr, 0))
337 			return -1;
338 		if (mlx5_aso_sq_create(sh->ctx, &sh->ct_mng->aso_sq, 0,
339 				sh->tx_uar, sh->pdn, MLX5_ASO_QUEUE_LOG_DESC,
340 				sh->sq_ts_format)) {
341 			mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
342 			return -1;
343 		}
344 		mlx5_aso_ct_init_sq(&sh->ct_mng->aso_sq);
345 		break;
346 	default:
347 		DRV_LOG(ERR, "Unknown ASO operation mode");
348 		return -1;
349 	}
350 	return 0;
351 }
352 
353 /**
354  * API to destroy Send Queue used for ASO access.
355  *
356  * @param[in] sh
357  *   Pointer to shared device context.
358  * @param[in] aso_opc_mod
359  *   Mode of ASO feature.
360  */
361 void
362 mlx5_aso_queue_uninit(struct mlx5_dev_ctx_shared *sh,
363 		      enum mlx5_access_aso_opc_mod aso_opc_mod)
364 {
365 	struct mlx5_aso_sq *sq;
366 
367 	switch (aso_opc_mod) {
368 	case ASO_OPC_MOD_FLOW_HIT:
369 		mlx5_aso_dereg_mr(sh, &sh->aso_age_mng->aso_sq.mr);
370 		sq = &sh->aso_age_mng->aso_sq;
371 		break;
372 	case ASO_OPC_MOD_POLICER:
373 		sq = &sh->mtrmng->pools_mng.sq;
374 		break;
375 	case ASO_OPC_MOD_CONNECTION_TRACKING:
376 		mlx5_aso_dereg_mr(sh, &sh->ct_mng->aso_sq.mr);
377 		sq = &sh->ct_mng->aso_sq;
378 		break;
379 	default:
380 		DRV_LOG(ERR, "Unknown ASO operation mode");
381 		return;
382 	}
383 	mlx5_aso_destroy_sq(sq);
384 }
385 
386 /**
387  * Write a burst of WQEs to ASO SQ.
388  *
389  * @param[in] mng
390  *   ASO management data, contains the SQ.
391  * @param[in] n
392  *   Index of the last valid pool.
393  *
394  * @return
395  *   Number of WQEs in burst.
396  */
397 static uint16_t
398 mlx5_aso_sq_enqueue_burst(struct mlx5_aso_age_mng *mng, uint16_t n)
399 {
400 	volatile struct mlx5_aso_wqe *wqe;
401 	struct mlx5_aso_sq *sq = &mng->aso_sq;
402 	struct mlx5_aso_age_pool *pool;
403 	uint16_t size = 1 << sq->log_desc_n;
404 	uint16_t mask = size - 1;
405 	uint16_t max;
406 	uint16_t start_head = sq->head;
407 
408 	max = RTE_MIN(size - (uint16_t)(sq->head - sq->tail), n - sq->next);
409 	if (unlikely(!max))
410 		return 0;
411 	sq->elts[start_head & mask].burst_size = max;
412 	do {
413 		wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
414 		rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
415 		/* Fill next WQE. */
416 		rte_spinlock_lock(&mng->resize_sl);
417 		pool = mng->pools[sq->next];
418 		rte_spinlock_unlock(&mng->resize_sl);
419 		sq->elts[sq->head & mask].pool = pool;
420 		wqe->general_cseg.misc =
421 				rte_cpu_to_be_32(((struct mlx5_devx_obj *)
422 						 (pool->flow_hit_aso_obj))->id);
423 		wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ONLY_FIRST_ERR <<
424 							 MLX5_COMP_MODE_OFFSET);
425 		wqe->general_cseg.opcode = rte_cpu_to_be_32
426 						(MLX5_OPCODE_ACCESS_ASO |
427 						 (ASO_OPC_MOD_FLOW_HIT <<
428 						  WQE_CSEG_OPC_MOD_OFFSET) |
429 						 (sq->pi <<
430 						  WQE_CSEG_WQE_INDEX_OFFSET));
431 		sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
432 		sq->head++;
433 		sq->next++;
434 		max--;
435 	} while (max);
436 	wqe->general_cseg.flags = RTE_BE32(MLX5_COMP_ALWAYS <<
437 							 MLX5_COMP_MODE_OFFSET);
438 	rte_io_wmb();
439 	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
440 	rte_wmb();
441 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH.*/
442 	rte_wmb();
443 	return sq->elts[start_head & mask].burst_size;
444 }
445 
446 /**
447  * Debug utility function. Dump contents of error CQE and WQE.
448  *
449  * @param[in] cqe
450  *   Error CQE to dump.
451  * @param[in] wqe
452  *   Error WQE to dump.
453  */
454 static void
455 mlx5_aso_dump_err_objs(volatile uint32_t *cqe, volatile uint32_t *wqe)
456 {
457 	int i;
458 
459 	DRV_LOG(ERR, "Error cqe:");
460 	for (i = 0; i < 16; i += 4)
461 		DRV_LOG(ERR, "%08X %08X %08X %08X", cqe[i], cqe[i + 1],
462 			cqe[i + 2], cqe[i + 3]);
463 	DRV_LOG(ERR, "\nError wqe:");
464 	for (i = 0; i < (int)sizeof(struct mlx5_aso_wqe) / 4; i += 4)
465 		DRV_LOG(ERR, "%08X %08X %08X %08X", wqe[i], wqe[i + 1],
466 			wqe[i + 2], wqe[i + 3]);
467 }
468 
469 /**
470  * Handle case of error CQE.
471  *
472  * @param[in] sq
473  *   ASO SQ to use.
474  */
475 static void
476 mlx5_aso_cqe_err_handle(struct mlx5_aso_sq *sq)
477 {
478 	struct mlx5_aso_cq *cq = &sq->cq;
479 	uint32_t idx = cq->cq_ci & ((1 << cq->log_desc_n) - 1);
480 	volatile struct mlx5_err_cqe *cqe =
481 			(volatile struct mlx5_err_cqe *)&cq->cq_obj.cqes[idx];
482 
483 	cq->errors++;
484 	idx = rte_be_to_cpu_16(cqe->wqe_counter) & (1u << sq->log_desc_n);
485 	mlx5_aso_dump_err_objs((volatile uint32_t *)cqe,
486 			       (volatile uint32_t *)&sq->sq_obj.aso_wqes[idx]);
487 }
488 
489 /**
490  * Update ASO objects upon completion.
491  *
492  * @param[in] sh
493  *   Shared device context.
494  * @param[in] n
495  *   Number of completed ASO objects.
496  */
497 static void
498 mlx5_aso_age_action_update(struct mlx5_dev_ctx_shared *sh, uint16_t n)
499 {
500 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
501 	struct mlx5_aso_sq *sq = &mng->aso_sq;
502 	struct mlx5_age_info *age_info;
503 	const uint16_t size = 1 << sq->log_desc_n;
504 	const uint16_t mask = size - 1;
505 	const uint64_t curr = MLX5_CURR_TIME_SEC;
506 	uint16_t expected = AGE_CANDIDATE;
507 	uint16_t i;
508 
509 	for (i = 0; i < n; ++i) {
510 		uint16_t idx = (sq->tail + i) & mask;
511 		struct mlx5_aso_age_pool *pool = sq->elts[idx].pool;
512 		uint64_t diff = curr - pool->time_of_last_age_check;
513 		uint64_t *addr = sq->mr.addr;
514 		int j;
515 
516 		addr += idx * MLX5_ASO_AGE_ACTIONS_PER_POOL / 64;
517 		pool->time_of_last_age_check = curr;
518 		for (j = 0; j < MLX5_ASO_AGE_ACTIONS_PER_POOL; j++) {
519 			struct mlx5_aso_age_action *act = &pool->actions[j];
520 			struct mlx5_age_param *ap = &act->age_params;
521 			uint8_t byte;
522 			uint8_t offset;
523 			uint8_t *u8addr;
524 			uint8_t hit;
525 
526 			if (__atomic_load_n(&ap->state, __ATOMIC_RELAXED) !=
527 					    AGE_CANDIDATE)
528 				continue;
529 			byte = 63 - (j / 8);
530 			offset = j % 8;
531 			u8addr = (uint8_t *)addr;
532 			hit = (u8addr[byte] >> offset) & 0x1;
533 			if (hit) {
534 				__atomic_store_n(&ap->sec_since_last_hit, 0,
535 						 __ATOMIC_RELAXED);
536 			} else {
537 				struct mlx5_priv *priv;
538 
539 				__atomic_fetch_add(&ap->sec_since_last_hit,
540 						   diff, __ATOMIC_RELAXED);
541 				/* If timeout passed add to aged-out list. */
542 				if (ap->sec_since_last_hit <= ap->timeout)
543 					continue;
544 				priv =
545 				rte_eth_devices[ap->port_id].data->dev_private;
546 				age_info = GET_PORT_AGE_INFO(priv);
547 				rte_spinlock_lock(&age_info->aged_sl);
548 				if (__atomic_compare_exchange_n(&ap->state,
549 								&expected,
550 								AGE_TMOUT,
551 								false,
552 							       __ATOMIC_RELAXED,
553 							    __ATOMIC_RELAXED)) {
554 					LIST_INSERT_HEAD(&age_info->aged_aso,
555 							 act, next);
556 					MLX5_AGE_SET(age_info,
557 						     MLX5_AGE_EVENT_NEW);
558 				}
559 				rte_spinlock_unlock(&age_info->aged_sl);
560 			}
561 		}
562 	}
563 	mlx5_age_event_prepare(sh);
564 }
565 
566 /**
567  * Handle completions from WQEs sent to ASO SQ.
568  *
569  * @param[in] sh
570  *   Shared device context.
571  *
572  * @return
573  *   Number of CQEs handled.
574  */
575 static uint16_t
576 mlx5_aso_completion_handle(struct mlx5_dev_ctx_shared *sh)
577 {
578 	struct mlx5_aso_age_mng *mng = sh->aso_age_mng;
579 	struct mlx5_aso_sq *sq = &mng->aso_sq;
580 	struct mlx5_aso_cq *cq = &sq->cq;
581 	volatile struct mlx5_cqe *restrict cqe;
582 	const unsigned int cq_size = 1 << cq->log_desc_n;
583 	const unsigned int mask = cq_size - 1;
584 	uint32_t idx;
585 	uint32_t next_idx = cq->cq_ci & mask;
586 	const uint16_t max = (uint16_t)(sq->head - sq->tail);
587 	uint16_t i = 0;
588 	int ret;
589 	if (unlikely(!max))
590 		return 0;
591 	do {
592 		idx = next_idx;
593 		next_idx = (cq->cq_ci + 1) & mask;
594 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
595 		cqe = &cq->cq_obj.cqes[idx];
596 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
597 		/*
598 		 * Be sure owner read is done before any other cookie field or
599 		 * opaque field.
600 		 */
601 		rte_io_rmb();
602 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
603 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
604 				break;
605 			mlx5_aso_cqe_err_handle(sq);
606 		} else {
607 			i += sq->elts[(sq->tail + i) & mask].burst_size;
608 		}
609 		cq->cq_ci++;
610 	} while (1);
611 	if (likely(i)) {
612 		mlx5_aso_age_action_update(sh, i);
613 		sq->tail += i;
614 		rte_io_wmb();
615 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
616 	}
617 	return i;
618 }
619 
620 /**
621  * Periodically read CQEs and send WQEs to ASO SQ.
622  *
623  * @param[in] arg
624  *   Shared device context containing the ASO SQ.
625  */
626 static void
627 mlx5_flow_aso_alarm(void *arg)
628 {
629 	struct mlx5_dev_ctx_shared *sh = arg;
630 	struct mlx5_aso_sq *sq = &sh->aso_age_mng->aso_sq;
631 	uint32_t us = 100u;
632 	uint16_t n;
633 
634 	rte_spinlock_lock(&sh->aso_age_mng->resize_sl);
635 	n = sh->aso_age_mng->next;
636 	rte_spinlock_unlock(&sh->aso_age_mng->resize_sl);
637 	mlx5_aso_completion_handle(sh);
638 	if (sq->next == n) {
639 		/* End of loop: wait 1 second. */
640 		us = US_PER_S;
641 		sq->next = 0;
642 	}
643 	mlx5_aso_sq_enqueue_burst(sh->aso_age_mng, n);
644 	if (rte_eal_alarm_set(us, mlx5_flow_aso_alarm, sh))
645 		DRV_LOG(ERR, "Cannot reinitialize aso alarm.");
646 }
647 
648 /**
649  * API to start ASO access using ASO SQ.
650  *
651  * @param[in] sh
652  *   Pointer to shared device context.
653  *
654  * @return
655  *   0 on success, a negative errno value otherwise and rte_errno is set.
656  */
657 int
658 mlx5_aso_flow_hit_queue_poll_start(struct mlx5_dev_ctx_shared *sh)
659 {
660 	if (rte_eal_alarm_set(US_PER_S, mlx5_flow_aso_alarm, sh)) {
661 		DRV_LOG(ERR, "Cannot reinitialize ASO age alarm.");
662 		return -rte_errno;
663 	}
664 	return 0;
665 }
666 
667 /**
668  * API to stop ASO access using ASO SQ.
669  *
670  * @param[in] sh
671  *   Pointer to shared device context.
672  *
673  * @return
674  *   0 on success, a negative errno value otherwise and rte_errno is set.
675  */
676 int
677 mlx5_aso_flow_hit_queue_poll_stop(struct mlx5_dev_ctx_shared *sh)
678 {
679 	int retries = 1024;
680 
681 	if (!sh->aso_age_mng->aso_sq.sq_obj.sq)
682 		return -EINVAL;
683 	rte_errno = 0;
684 	while (--retries) {
685 		rte_eal_alarm_cancel(mlx5_flow_aso_alarm, sh);
686 		if (rte_errno != EINPROGRESS)
687 			break;
688 		rte_pause();
689 	}
690 	return -rte_errno;
691 }
692 
693 static uint16_t
694 mlx5_aso_mtr_sq_enqueue_single(struct mlx5_aso_sq *sq,
695 		struct mlx5_aso_mtr *aso_mtr)
696 {
697 	volatile struct mlx5_aso_wqe *wqe = NULL;
698 	struct mlx5_flow_meter_info *fm = NULL;
699 	struct mlx5_flow_meter_profile *fmp;
700 	uint16_t size = 1 << sq->log_desc_n;
701 	uint16_t mask = size - 1;
702 	uint16_t res;
703 	uint32_t dseg_idx = 0;
704 	struct mlx5_aso_mtr_pool *pool = NULL;
705 
706 	rte_spinlock_lock(&sq->sqsl);
707 	res = size - (uint16_t)(sq->head - sq->tail);
708 	if (unlikely(!res)) {
709 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
710 		rte_spinlock_unlock(&sq->sqsl);
711 		return 0;
712 	}
713 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
714 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
715 	/* Fill next WQE. */
716 	fm = &aso_mtr->fm;
717 	sq->elts[sq->head & mask].mtr = aso_mtr;
718 	pool = container_of(aso_mtr, struct mlx5_aso_mtr_pool,
719 			mtrs[aso_mtr->offset]);
720 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
721 			(aso_mtr->offset >> 1));
722 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
723 			(ASO_OPC_MOD_POLICER <<
724 			WQE_CSEG_OPC_MOD_OFFSET) |
725 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
726 	/* There are 2 meters in one ASO cache line. */
727 	dseg_idx = aso_mtr->offset & 0x1;
728 	wqe->aso_cseg.data_mask =
729 		RTE_BE64(MLX5_IFC_FLOW_METER_PARAM_MASK << (32 * !dseg_idx));
730 	if (fm->is_enable) {
731 		wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
732 			fm->profile->srtcm_prm.cbs_cir;
733 		wqe->aso_dseg.mtrs[dseg_idx].ebs_eir =
734 			fm->profile->srtcm_prm.ebs_eir;
735 	} else {
736 		wqe->aso_dseg.mtrs[dseg_idx].cbs_cir =
737 			RTE_BE32(MLX5_IFC_FLOW_METER_DISABLE_CBS_CIR_VAL);
738 		wqe->aso_dseg.mtrs[dseg_idx].ebs_eir = 0;
739 	}
740 	fmp = fm->profile;
741 	if (fmp->profile.packet_mode)
742 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
743 				RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
744 				(MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET) |
745 				(MLX5_METER_MODE_PKT << ASO_DSEG_MTR_MODE));
746 	else
747 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm =
748 				RTE_BE32((1 << ASO_DSEG_VALID_OFFSET) |
749 				(MLX5_FLOW_COLOR_GREEN << ASO_DSEG_SC_OFFSET));
750 	switch (fmp->profile.alg) {
751 	case RTE_MTR_SRTCM_RFC2697:
752 		/* Only needed for RFC2697. */
753 		if (fm->profile->srtcm_prm.ebs_eir)
754 			wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
755 					RTE_BE32(1 << ASO_DSEG_BO_OFFSET);
756 		break;
757 	case RTE_MTR_TRTCM_RFC2698:
758 		wqe->aso_dseg.mtrs[dseg_idx].v_bo_sc_bbog_mm |=
759 				RTE_BE32(1 << ASO_DSEG_BBOG_OFFSET);
760 		break;
761 	case RTE_MTR_TRTCM_RFC4115:
762 	default:
763 		break;
764 	}
765 	/*
766 	 * Note:
767 	 * Due to software performance reason, the token fields will not be
768 	 * set when posting the WQE to ASO SQ. It will be filled by the HW
769 	 * automatically.
770 	 */
771 	sq->head++;
772 	sq->pi += 2;/* Each WQE contains 2 WQEBB's. */
773 	rte_io_wmb();
774 	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
775 	rte_wmb();
776 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
777 	rte_wmb();
778 	rte_spinlock_unlock(&sq->sqsl);
779 	return 1;
780 }
781 
782 static void
783 mlx5_aso_mtrs_status_update(struct mlx5_aso_sq *sq, uint16_t aso_mtrs_nums)
784 {
785 	uint16_t size = 1 << sq->log_desc_n;
786 	uint16_t mask = size - 1;
787 	uint16_t i;
788 	struct mlx5_aso_mtr *aso_mtr = NULL;
789 	uint8_t exp_state = ASO_METER_WAIT;
790 
791 	for (i = 0; i < aso_mtrs_nums; ++i) {
792 		aso_mtr = sq->elts[(sq->tail + i) & mask].mtr;
793 		MLX5_ASSERT(aso_mtr);
794 		(void)__atomic_compare_exchange_n(&aso_mtr->state,
795 				&exp_state, ASO_METER_READY,
796 				false, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
797 	}
798 }
799 
800 static void
801 mlx5_aso_mtr_completion_handle(struct mlx5_aso_sq *sq)
802 {
803 	struct mlx5_aso_cq *cq = &sq->cq;
804 	volatile struct mlx5_cqe *restrict cqe;
805 	const unsigned int cq_size = 1 << cq->log_desc_n;
806 	const unsigned int mask = cq_size - 1;
807 	uint32_t idx;
808 	uint32_t next_idx = cq->cq_ci & mask;
809 	uint16_t max;
810 	uint16_t n = 0;
811 	int ret;
812 
813 	rte_spinlock_lock(&sq->sqsl);
814 	max = (uint16_t)(sq->head - sq->tail);
815 	if (unlikely(!max)) {
816 		rte_spinlock_unlock(&sq->sqsl);
817 		return;
818 	}
819 	do {
820 		idx = next_idx;
821 		next_idx = (cq->cq_ci + 1) & mask;
822 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
823 		cqe = &cq->cq_obj.cqes[idx];
824 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
825 		/*
826 		 * Be sure owner read is done before any other cookie field or
827 		 * opaque field.
828 		 */
829 		rte_io_rmb();
830 		if (ret != MLX5_CQE_STATUS_SW_OWN) {
831 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
832 				break;
833 			mlx5_aso_cqe_err_handle(sq);
834 		} else {
835 			n++;
836 		}
837 		cq->cq_ci++;
838 	} while (1);
839 	if (likely(n)) {
840 		mlx5_aso_mtrs_status_update(sq, n);
841 		sq->tail += n;
842 		rte_io_wmb();
843 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
844 	}
845 	rte_spinlock_unlock(&sq->sqsl);
846 }
847 
848 /**
849  * Update meter parameter by send WQE.
850  *
851  * @param[in] dev
852  *   Pointer to Ethernet device.
853  * @param[in] priv
854  *   Pointer to mlx5 private data structure.
855  * @param[in] fm
856  *   Pointer to flow meter to be modified.
857  *
858  * @return
859  *   0 on success, a negative errno value otherwise and rte_errno is set.
860  */
861 int
862 mlx5_aso_meter_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
863 			struct mlx5_aso_mtr *mtr)
864 {
865 	struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
866 	uint32_t poll_wqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
867 
868 	do {
869 		mlx5_aso_mtr_completion_handle(sq);
870 		if (mlx5_aso_mtr_sq_enqueue_single(sq, mtr))
871 			return 0;
872 		/* Waiting for wqe resource. */
873 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
874 	} while (--poll_wqe_times);
875 	DRV_LOG(ERR, "Fail to send WQE for ASO meter offset %d",
876 			mtr->offset);
877 	return -1;
878 }
879 
880 /**
881  * Wait for meter to be ready.
882  *
883  * @param[in] dev
884  *   Pointer to Ethernet device.
885  * @param[in] priv
886  *   Pointer to mlx5 private data structure.
887  * @param[in] fm
888  *   Pointer to flow meter to be modified.
889  *
890  * @return
891  *   0 on success, a negative errno value otherwise and rte_errno is set.
892  */
893 int
894 mlx5_aso_mtr_wait(struct mlx5_dev_ctx_shared *sh,
895 			struct mlx5_aso_mtr *mtr)
896 {
897 	struct mlx5_aso_sq *sq = &sh->mtrmng->pools_mng.sq;
898 	uint32_t poll_cqe_times = MLX5_MTR_POLL_WQE_CQE_TIMES;
899 
900 	if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
901 					    ASO_METER_READY)
902 		return 0;
903 	do {
904 		mlx5_aso_mtr_completion_handle(sq);
905 		if (__atomic_load_n(&mtr->state, __ATOMIC_RELAXED) ==
906 					    ASO_METER_READY)
907 			return 0;
908 		/* Waiting for CQE ready. */
909 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
910 	} while (--poll_cqe_times);
911 	DRV_LOG(ERR, "Fail to poll CQE ready for ASO meter offset %d",
912 			mtr->offset);
913 	return -1;
914 }
915 
916 /*
917  * Post a WQE to the ASO CT SQ to modify the context.
918  *
919  * @param[in] mng
920  *   Pointer to the CT pools management structure.
921  * @param[in] ct
922  *   Pointer to the generic CT structure related to the context.
923  * @param[in] profile
924  *   Pointer to configuration profile.
925  *
926  * @return
927  *   1 on success (WQE number), 0 on failure.
928  */
929 static uint16_t
930 mlx5_aso_ct_sq_enqueue_single(struct mlx5_aso_ct_pools_mng *mng,
931 			      struct mlx5_aso_ct_action *ct,
932 			      const struct rte_flow_action_conntrack *profile)
933 {
934 	volatile struct mlx5_aso_wqe *wqe = NULL;
935 	struct mlx5_aso_sq *sq = &mng->aso_sq;
936 	uint16_t size = 1 << sq->log_desc_n;
937 	uint16_t mask = size - 1;
938 	uint16_t res;
939 	struct mlx5_aso_ct_pool *pool;
940 	void *desg;
941 	void *orig_dir;
942 	void *reply_dir;
943 
944 	rte_spinlock_lock(&sq->sqsl);
945 	/* Prevent other threads to update the index. */
946 	res = size - (uint16_t)(sq->head - sq->tail);
947 	if (unlikely(!res)) {
948 		rte_spinlock_unlock(&sq->sqsl);
949 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
950 		return 0;
951 	}
952 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
953 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
954 	/* Fill next WQE. */
955 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_WAIT);
956 	sq->elts[sq->head & mask].ct = ct;
957 	sq->elts[sq->head & mask].query_data = NULL;
958 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
959 	/* Each WQE will have a single CT object. */
960 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
961 						  ct->offset);
962 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
963 			(ASO_OPC_MOD_CONNECTION_TRACKING <<
964 			 WQE_CSEG_OPC_MOD_OFFSET) |
965 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
966 	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32
967 			(0u |
968 			 (ASO_OPER_LOGICAL_OR << ASO_CSEG_COND_OPER_OFFSET) |
969 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_1_OPER_OFFSET) |
970 			 (ASO_OP_ALWAYS_TRUE << ASO_CSEG_COND_0_OPER_OFFSET) |
971 			 (BYTEWISE_64BYTE << ASO_CSEG_DATA_MASK_MODE_OFFSET));
972 	wqe->aso_cseg.data_mask = UINT64_MAX;
973 	/* To make compiler happy. */
974 	desg = (void *)(uintptr_t)wqe->aso_dseg.data;
975 	MLX5_SET(conn_track_aso, desg, valid, 1);
976 	MLX5_SET(conn_track_aso, desg, state, profile->state);
977 	MLX5_SET(conn_track_aso, desg, freeze_track, !profile->enable);
978 	MLX5_SET(conn_track_aso, desg, connection_assured,
979 		 profile->live_connection);
980 	MLX5_SET(conn_track_aso, desg, sack_permitted, profile->selective_ack);
981 	MLX5_SET(conn_track_aso, desg, challenged_acked,
982 		 profile->challenge_ack_passed);
983 	/* Heartbeat, retransmission_counter, retranmission_limit_exceeded: 0 */
984 	MLX5_SET(conn_track_aso, desg, heartbeat, 0);
985 	MLX5_SET(conn_track_aso, desg, max_ack_window,
986 		 profile->max_ack_window);
987 	MLX5_SET(conn_track_aso, desg, retransmission_counter, 0);
988 	MLX5_SET(conn_track_aso, desg, retranmission_limit_exceeded, 0);
989 	MLX5_SET(conn_track_aso, desg, retranmission_limit,
990 		 profile->retransmission_limit);
991 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_scale,
992 		 profile->reply_dir.scale);
993 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_close_initiated,
994 		 profile->reply_dir.close_initiated);
995 	/* Both directions will use the same liberal mode. */
996 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_liberal_enabled,
997 		 profile->liberal_mode);
998 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_data_unacked,
999 		 profile->reply_dir.data_unacked);
1000 	MLX5_SET(conn_track_aso, desg, reply_direction_tcp_max_ack,
1001 		 profile->reply_dir.last_ack_seen);
1002 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_scale,
1003 		 profile->original_dir.scale);
1004 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_close_initiated,
1005 		 profile->original_dir.close_initiated);
1006 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_liberal_enabled,
1007 		 profile->liberal_mode);
1008 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_data_unacked,
1009 		 profile->original_dir.data_unacked);
1010 	MLX5_SET(conn_track_aso, desg, original_direction_tcp_max_ack,
1011 		 profile->original_dir.last_ack_seen);
1012 	MLX5_SET(conn_track_aso, desg, last_win, profile->last_window);
1013 	MLX5_SET(conn_track_aso, desg, last_dir, profile->last_direction);
1014 	MLX5_SET(conn_track_aso, desg, last_index, profile->last_index);
1015 	MLX5_SET(conn_track_aso, desg, last_seq, profile->last_seq);
1016 	MLX5_SET(conn_track_aso, desg, last_ack, profile->last_ack);
1017 	MLX5_SET(conn_track_aso, desg, last_end, profile->last_end);
1018 	orig_dir = MLX5_ADDR_OF(conn_track_aso, desg, original_dir);
1019 	MLX5_SET(tcp_window_params, orig_dir, sent_end,
1020 		 profile->original_dir.sent_end);
1021 	MLX5_SET(tcp_window_params, orig_dir, reply_end,
1022 		 profile->original_dir.reply_end);
1023 	MLX5_SET(tcp_window_params, orig_dir, max_win,
1024 		 profile->original_dir.max_win);
1025 	MLX5_SET(tcp_window_params, orig_dir, max_ack,
1026 		 profile->original_dir.max_ack);
1027 	reply_dir = MLX5_ADDR_OF(conn_track_aso, desg, reply_dir);
1028 	MLX5_SET(tcp_window_params, reply_dir, sent_end,
1029 		 profile->reply_dir.sent_end);
1030 	MLX5_SET(tcp_window_params, reply_dir, reply_end,
1031 		 profile->reply_dir.reply_end);
1032 	MLX5_SET(tcp_window_params, reply_dir, max_win,
1033 		 profile->reply_dir.max_win);
1034 	MLX5_SET(tcp_window_params, reply_dir, max_ack,
1035 		 profile->reply_dir.max_ack);
1036 	sq->head++;
1037 	sq->pi += 2; /* Each WQE contains 2 WQEBB's. */
1038 	rte_io_wmb();
1039 	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
1040 	rte_wmb();
1041 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
1042 	rte_wmb();
1043 	rte_spinlock_unlock(&sq->sqsl);
1044 	return 1;
1045 }
1046 
1047 /*
1048  * Update the status field of CTs to indicate ready to be used by flows.
1049  * A continuous number of CTs since last update.
1050  *
1051  * @param[in] sq
1052  *   Pointer to ASO CT SQ.
1053  * @param[in] num
1054  *   Number of CT structures to be updated.
1055  *
1056  * @return
1057  *   0 on success, a negative value.
1058  */
1059 static void
1060 mlx5_aso_ct_status_update(struct mlx5_aso_sq *sq, uint16_t num)
1061 {
1062 	uint16_t size = 1 << sq->log_desc_n;
1063 	uint16_t mask = size - 1;
1064 	uint16_t i;
1065 	struct mlx5_aso_ct_action *ct = NULL;
1066 	uint16_t idx;
1067 
1068 	for (i = 0; i < num; i++) {
1069 		idx = (uint16_t)((sq->tail + i) & mask);
1070 		ct = sq->elts[idx].ct;
1071 		MLX5_ASSERT(ct);
1072 		MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_READY);
1073 		if (sq->elts[idx].query_data)
1074 			rte_memcpy(sq->elts[idx].query_data,
1075 				   (char *)((uintptr_t)sq->mr.addr + idx * 64),
1076 				   64);
1077 	}
1078 }
1079 
1080 /*
1081  * Post a WQE to the ASO CT SQ to query the current context.
1082  *
1083  * @param[in] mng
1084  *   Pointer to the CT pools management structure.
1085  * @param[in] ct
1086  *   Pointer to the generic CT structure related to the context.
1087  * @param[in] data
1088  *   Pointer to data area to be filled.
1089  *
1090  * @return
1091  *   1 on success (WQE number), 0 on failure.
1092  */
1093 static int
1094 mlx5_aso_ct_sq_query_single(struct mlx5_aso_ct_pools_mng *mng,
1095 			    struct mlx5_aso_ct_action *ct, char *data)
1096 {
1097 	volatile struct mlx5_aso_wqe *wqe = NULL;
1098 	struct mlx5_aso_sq *sq = &mng->aso_sq;
1099 	uint16_t size = 1 << sq->log_desc_n;
1100 	uint16_t mask = size - 1;
1101 	uint16_t res;
1102 	uint16_t wqe_idx;
1103 	struct mlx5_aso_ct_pool *pool;
1104 	enum mlx5_aso_ct_state state =
1105 				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1106 
1107 	if (state == ASO_CONNTRACK_FREE) {
1108 		DRV_LOG(ERR, "Fail: No context to query");
1109 		return -1;
1110 	} else if (state == ASO_CONNTRACK_WAIT) {
1111 		return 0;
1112 	}
1113 	rte_spinlock_lock(&sq->sqsl);
1114 	res = size - (uint16_t)(sq->head - sq->tail);
1115 	if (unlikely(!res)) {
1116 		rte_spinlock_unlock(&sq->sqsl);
1117 		DRV_LOG(ERR, "Fail: SQ is full and no free WQE to send");
1118 		return 0;
1119 	}
1120 	MLX5_ASO_CT_UPDATE_STATE(ct, ASO_CONNTRACK_QUERY);
1121 	wqe = &sq->sq_obj.aso_wqes[sq->head & mask];
1122 	/* Confirm the location and address of the prefetch instruction. */
1123 	rte_prefetch0(&sq->sq_obj.aso_wqes[(sq->head + 1) & mask]);
1124 	/* Fill next WQE. */
1125 	wqe_idx = sq->head & mask;
1126 	sq->elts[wqe_idx].ct = ct;
1127 	sq->elts[wqe_idx].query_data = data;
1128 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1129 	/* Each WQE will have a single CT object. */
1130 	wqe->general_cseg.misc = rte_cpu_to_be_32(pool->devx_obj->id +
1131 						  ct->offset);
1132 	wqe->general_cseg.opcode = rte_cpu_to_be_32(MLX5_OPCODE_ACCESS_ASO |
1133 			(ASO_OPC_MOD_CONNECTION_TRACKING <<
1134 			 WQE_CSEG_OPC_MOD_OFFSET) |
1135 			sq->pi << WQE_CSEG_WQE_INDEX_OFFSET);
1136 	/*
1137 	 * There is no write request is required.
1138 	 * ASO_OPER_LOGICAL_AND and ASO_OP_ALWAYS_FALSE are both 0.
1139 	 * "BYTEWISE_64BYTE" is needed for a whole context.
1140 	 * Set to 0 directly to reduce an endian swap. (Modify should rewrite.)
1141 	 * "data_mask" is ignored.
1142 	 * Buffer address was already filled during initialization.
1143 	 */
1144 	wqe->aso_cseg.operand_masks = rte_cpu_to_be_32(BYTEWISE_64BYTE <<
1145 					ASO_CSEG_DATA_MASK_MODE_OFFSET);
1146 	wqe->aso_cseg.data_mask = 0;
1147 	sq->head++;
1148 	/*
1149 	 * Each WQE contains 2 WQEBB's, even though
1150 	 * data segment is not used in this case.
1151 	 */
1152 	sq->pi += 2;
1153 	rte_io_wmb();
1154 	sq->sq_obj.db_rec[MLX5_SND_DBR] = rte_cpu_to_be_32(sq->pi);
1155 	rte_wmb();
1156 	*sq->uar_addr = *(volatile uint64_t *)wqe; /* Assume 64 bit ARCH. */
1157 	rte_wmb();
1158 	rte_spinlock_unlock(&sq->sqsl);
1159 	return 1;
1160 }
1161 
1162 /*
1163  * Handle completions from WQEs sent to ASO CT.
1164  *
1165  * @param[in] mng
1166  *   Pointer to the CT pools management structure.
1167  */
1168 static void
1169 mlx5_aso_ct_completion_handle(struct mlx5_aso_ct_pools_mng *mng)
1170 {
1171 	struct mlx5_aso_sq *sq = &mng->aso_sq;
1172 	struct mlx5_aso_cq *cq = &sq->cq;
1173 	volatile struct mlx5_cqe *restrict cqe;
1174 	const uint32_t cq_size = 1 << cq->log_desc_n;
1175 	const uint32_t mask = cq_size - 1;
1176 	uint32_t idx;
1177 	uint32_t next_idx;
1178 	uint16_t max;
1179 	uint16_t n = 0;
1180 	int ret;
1181 
1182 	rte_spinlock_lock(&sq->sqsl);
1183 	max = (uint16_t)(sq->head - sq->tail);
1184 	if (unlikely(!max)) {
1185 		rte_spinlock_unlock(&sq->sqsl);
1186 		return;
1187 	}
1188 	next_idx = cq->cq_ci & mask;
1189 	do {
1190 		idx = next_idx;
1191 		next_idx = (cq->cq_ci + 1) & mask;
1192 		/* Need to confirm the position of the prefetch. */
1193 		rte_prefetch0(&cq->cq_obj.cqes[next_idx]);
1194 		cqe = &cq->cq_obj.cqes[idx];
1195 		ret = check_cqe(cqe, cq_size, cq->cq_ci);
1196 		/*
1197 		 * Be sure owner read is done before any other cookie field or
1198 		 * opaque field.
1199 		 */
1200 		rte_io_rmb();
1201 		if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
1202 			if (likely(ret == MLX5_CQE_STATUS_HW_OWN))
1203 				break;
1204 			mlx5_aso_cqe_err_handle(sq);
1205 		} else {
1206 			n++;
1207 		}
1208 		cq->cq_ci++;
1209 	} while (1);
1210 	if (likely(n)) {
1211 		mlx5_aso_ct_status_update(sq, n);
1212 		sq->tail += n;
1213 		rte_io_wmb();
1214 		cq->cq_obj.db_rec[0] = rte_cpu_to_be_32(cq->cq_ci);
1215 	}
1216 	rte_spinlock_unlock(&sq->sqsl);
1217 }
1218 
1219 /*
1220  * Update connection tracking ASO context by sending WQE.
1221  *
1222  * @param[in] sh
1223  *   Pointer to mlx5_dev_ctx_shared object.
1224  * @param[in] ct
1225  *   Pointer to connection tracking offload object.
1226  * @param[in] profile
1227  *   Pointer to connection tracking TCP parameter.
1228  *
1229  * @return
1230  *   0 on success, -1 on failure.
1231  */
1232 int
1233 mlx5_aso_ct_update_by_wqe(struct mlx5_dev_ctx_shared *sh,
1234 			  struct mlx5_aso_ct_action *ct,
1235 			  const struct rte_flow_action_conntrack *profile)
1236 {
1237 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1238 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1239 	struct mlx5_aso_ct_pool *pool;
1240 
1241 	MLX5_ASSERT(ct);
1242 	do {
1243 		mlx5_aso_ct_completion_handle(mng);
1244 		if (mlx5_aso_ct_sq_enqueue_single(mng, ct, profile))
1245 			return 0;
1246 		/* Waiting for wqe resource. */
1247 		rte_delay_us_sleep(10u);
1248 	} while (--poll_wqe_times);
1249 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1250 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1251 		ct->offset, pool->index);
1252 	return -1;
1253 }
1254 
1255 /*
1256  * The routine is used to wait for WQE completion to continue with queried data.
1257  *
1258  * @param[in] sh
1259  *   Pointer to mlx5_dev_ctx_shared object.
1260  * @param[in] ct
1261  *   Pointer to connection tracking offload object.
1262  *
1263  * @return
1264  *   0 on success, -1 on failure.
1265  */
1266 int
1267 mlx5_aso_ct_wait_ready(struct mlx5_dev_ctx_shared *sh,
1268 		       struct mlx5_aso_ct_action *ct)
1269 {
1270 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1271 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1272 	struct mlx5_aso_ct_pool *pool;
1273 
1274 	if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1275 	    ASO_CONNTRACK_READY)
1276 		return 0;
1277 	do {
1278 		mlx5_aso_ct_completion_handle(mng);
1279 		if (__atomic_load_n(&ct->state, __ATOMIC_RELAXED) ==
1280 		    ASO_CONNTRACK_READY)
1281 			return 0;
1282 		/* Waiting for CQE ready, consider should block or sleep. */
1283 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1284 	} while (--poll_cqe_times);
1285 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1286 	DRV_LOG(ERR, "Fail to poll CQE for ASO CT %d in pool %d",
1287 		ct->offset, pool->index);
1288 	return -1;
1289 }
1290 
1291 /*
1292  * Convert the hardware conntrack data format into the profile.
1293  *
1294  * @param[in] profile
1295  *   Pointer to conntrack profile to be filled after query.
1296  * @param[in] wdata
1297  *   Pointer to data fetched from hardware.
1298  */
1299 static inline void
1300 mlx5_aso_ct_obj_analyze(struct rte_flow_action_conntrack *profile,
1301 			char *wdata)
1302 {
1303 	void *o_dir = MLX5_ADDR_OF(conn_track_aso, wdata, original_dir);
1304 	void *r_dir = MLX5_ADDR_OF(conn_track_aso, wdata, reply_dir);
1305 
1306 	/* MLX5_GET16 should be taken into consideration. */
1307 	profile->state = (enum rte_flow_conntrack_state)
1308 			 MLX5_GET(conn_track_aso, wdata, state);
1309 	profile->enable = !MLX5_GET(conn_track_aso, wdata, freeze_track);
1310 	profile->selective_ack = MLX5_GET(conn_track_aso, wdata,
1311 					  sack_permitted);
1312 	profile->live_connection = MLX5_GET(conn_track_aso, wdata,
1313 					    connection_assured);
1314 	profile->challenge_ack_passed = MLX5_GET(conn_track_aso, wdata,
1315 						 challenged_acked);
1316 	profile->max_ack_window = MLX5_GET(conn_track_aso, wdata,
1317 					   max_ack_window);
1318 	profile->retransmission_limit = MLX5_GET(conn_track_aso, wdata,
1319 						 retranmission_limit);
1320 	profile->last_window = MLX5_GET(conn_track_aso, wdata, last_win);
1321 	profile->last_direction = MLX5_GET(conn_track_aso, wdata, last_dir);
1322 	profile->last_index = (enum rte_flow_conntrack_tcp_last_index)
1323 			      MLX5_GET(conn_track_aso, wdata, last_index);
1324 	profile->last_seq = MLX5_GET(conn_track_aso, wdata, last_seq);
1325 	profile->last_ack = MLX5_GET(conn_track_aso, wdata, last_ack);
1326 	profile->last_end = MLX5_GET(conn_track_aso, wdata, last_end);
1327 	profile->liberal_mode = MLX5_GET(conn_track_aso, wdata,
1328 				reply_direction_tcp_liberal_enabled) |
1329 				MLX5_GET(conn_track_aso, wdata,
1330 				original_direction_tcp_liberal_enabled);
1331 	/* No liberal in the RTE structure profile. */
1332 	profile->reply_dir.scale = MLX5_GET(conn_track_aso, wdata,
1333 					    reply_direction_tcp_scale);
1334 	profile->reply_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1335 					reply_direction_tcp_close_initiated);
1336 	profile->reply_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1337 					reply_direction_tcp_data_unacked);
1338 	profile->reply_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1339 					reply_direction_tcp_max_ack);
1340 	profile->reply_dir.sent_end = MLX5_GET(tcp_window_params,
1341 					       r_dir, sent_end);
1342 	profile->reply_dir.reply_end = MLX5_GET(tcp_window_params,
1343 						r_dir, reply_end);
1344 	profile->reply_dir.max_win = MLX5_GET(tcp_window_params,
1345 					      r_dir, max_win);
1346 	profile->reply_dir.max_ack = MLX5_GET(tcp_window_params,
1347 					      r_dir, max_ack);
1348 	profile->original_dir.scale = MLX5_GET(conn_track_aso, wdata,
1349 					       original_direction_tcp_scale);
1350 	profile->original_dir.close_initiated = MLX5_GET(conn_track_aso, wdata,
1351 					original_direction_tcp_close_initiated);
1352 	profile->original_dir.data_unacked = MLX5_GET(conn_track_aso, wdata,
1353 					original_direction_tcp_data_unacked);
1354 	profile->original_dir.last_ack_seen = MLX5_GET(conn_track_aso, wdata,
1355 					original_direction_tcp_max_ack);
1356 	profile->original_dir.sent_end = MLX5_GET(tcp_window_params,
1357 						  o_dir, sent_end);
1358 	profile->original_dir.reply_end = MLX5_GET(tcp_window_params,
1359 						   o_dir, reply_end);
1360 	profile->original_dir.max_win = MLX5_GET(tcp_window_params,
1361 						 o_dir, max_win);
1362 	profile->original_dir.max_ack = MLX5_GET(tcp_window_params,
1363 						 o_dir, max_ack);
1364 }
1365 
1366 /*
1367  * Query connection tracking information parameter by send WQE.
1368  *
1369  * @param[in] dev
1370  *   Pointer to Ethernet device.
1371  * @param[in] ct
1372  *   Pointer to connection tracking offload object.
1373  * @param[out] profile
1374  *   Pointer to connection tracking TCP information.
1375  *
1376  * @return
1377  *   0 on success, -1 on failure.
1378  */
1379 int
1380 mlx5_aso_ct_query_by_wqe(struct mlx5_dev_ctx_shared *sh,
1381 			 struct mlx5_aso_ct_action *ct,
1382 			 struct rte_flow_action_conntrack *profile)
1383 {
1384 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1385 	uint32_t poll_wqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1386 	struct mlx5_aso_ct_pool *pool;
1387 	char out_data[64 * 2];
1388 	int ret;
1389 
1390 	MLX5_ASSERT(ct);
1391 	do {
1392 		mlx5_aso_ct_completion_handle(mng);
1393 		ret = mlx5_aso_ct_sq_query_single(mng, ct, out_data);
1394 		if (ret < 0)
1395 			return ret;
1396 		else if (ret > 0)
1397 			goto data_handle;
1398 		/* Waiting for wqe resource or state. */
1399 		else
1400 			rte_delay_us_sleep(10u);
1401 	} while (--poll_wqe_times);
1402 	pool = container_of(ct, struct mlx5_aso_ct_pool, actions[ct->offset]);
1403 	DRV_LOG(ERR, "Fail to send WQE for ASO CT %d in pool %d",
1404 		ct->offset, pool->index);
1405 	return -1;
1406 data_handle:
1407 	ret = mlx5_aso_ct_wait_ready(sh, ct);
1408 	if (!ret)
1409 		mlx5_aso_ct_obj_analyze(profile, out_data);
1410 	return ret;
1411 }
1412 
1413 /*
1414  * Make sure the conntrack context is synchronized with hardware before
1415  * creating a flow rule that uses it.
1416  *
1417  * @param[in] sh
1418  *   Pointer to shared device context.
1419  * @param[in] ct
1420  *   Pointer to connection tracking offload object.
1421  *
1422  * @return
1423  *   0 on success, a negative errno value otherwise and rte_errno is set.
1424  */
1425 int
1426 mlx5_aso_ct_available(struct mlx5_dev_ctx_shared *sh,
1427 		      struct mlx5_aso_ct_action *ct)
1428 {
1429 	struct mlx5_aso_ct_pools_mng *mng = sh->ct_mng;
1430 	uint32_t poll_cqe_times = MLX5_CT_POLL_WQE_CQE_TIMES;
1431 	enum mlx5_aso_ct_state state =
1432 				__atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1433 
1434 	if (state == ASO_CONNTRACK_FREE) {
1435 		rte_errno = ENXIO;
1436 		return -rte_errno;
1437 	} else if (state == ASO_CONNTRACK_READY ||
1438 		   state == ASO_CONNTRACK_QUERY) {
1439 		return 0;
1440 	}
1441 	do {
1442 		mlx5_aso_ct_completion_handle(mng);
1443 		state = __atomic_load_n(&ct->state, __ATOMIC_RELAXED);
1444 		if (state == ASO_CONNTRACK_READY ||
1445 		    state == ASO_CONNTRACK_QUERY)
1446 			return 0;
1447 		/* Waiting for CQE ready, consider should block or sleep. */
1448 		rte_delay_us_sleep(MLX5_ASO_WQE_CQE_RESPONSE_DELAY);
1449 	} while (--poll_cqe_times);
1450 	rte_errno = EBUSY;
1451 	return -rte_errno;
1452 }
1453