xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 48fbb0e93d069d1beea2e06851f45dfce668bb08)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11 
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21 
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26 
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35 
36 /*
37  * Shared array for quick translation between port_id and vport mask/values
38  * used for HWS rules.
39  */
40 struct flow_hw_port_info mlx5_flow_hw_port_infos[RTE_MAX_ETHPORTS];
41 
42 /*
43  * A global structure to save the available REG_C_x for tags usage.
44  * The Meter color REG (ASO) and the last available one will be reserved
45  * for PMD internal usage.
46  * Since there is no "port" concept in the driver, it is assumed that the
47  * available tags set will be the minimum intersection.
48  * 3 - in FDB mode / 5 - in legacy mode
49  */
50 uint32_t mlx5_flow_hw_avl_tags_init_cnt;
51 enum modify_reg mlx5_flow_hw_avl_tags[MLX5_FLOW_HW_TAGS_MAX] = {REG_NON};
52 enum modify_reg mlx5_flow_hw_aso_tag;
53 
54 struct tunnel_default_miss_ctx {
55 	uint16_t *queue;
56 	__extension__
57 	union {
58 		struct rte_flow_action_rss action_rss;
59 		struct rte_flow_action_queue miss_queue;
60 		struct rte_flow_action_jump miss_jump;
61 		uint8_t raw[0];
62 	};
63 };
64 
65 static int
66 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
67 			     struct rte_flow *flow,
68 			     const struct rte_flow_attr *attr,
69 			     const struct rte_flow_action *app_actions,
70 			     uint32_t flow_idx,
71 			     const struct mlx5_flow_tunnel *tunnel,
72 			     struct tunnel_default_miss_ctx *ctx,
73 			     struct rte_flow_error *error);
74 static struct mlx5_flow_tunnel *
75 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
76 static void
77 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
78 static uint32_t
79 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
80 				const struct mlx5_flow_tunnel *tunnel,
81 				uint32_t group, uint32_t *table,
82 				struct rte_flow_error *error);
83 
84 /** Device flow drivers. */
85 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
86 
87 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
88 
89 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
90 	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
91 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
92 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
93 #endif
94 #ifdef HAVE_MLX5_HWS_SUPPORT
95 	[MLX5_FLOW_TYPE_HW] = &mlx5_flow_hw_drv_ops,
96 #endif
97 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
98 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
99 };
100 
101 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
102 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
103 	(const int []){ \
104 		__VA_ARGS__, 0, \
105 	}
106 
107 /** Node object of input graph for mlx5_flow_expand_rss(). */
108 struct mlx5_flow_expand_node {
109 	const int *const next;
110 	/**<
111 	 * List of next node indexes. Index 0 is interpreted as a terminator.
112 	 */
113 	const enum rte_flow_item_type type;
114 	/**< Pattern item type of current node. */
115 	uint64_t rss_types;
116 	/**<
117 	 * RSS types bit-field associated with this node
118 	 * (see RTE_ETH_RSS_* definitions).
119 	 */
120 	uint64_t node_flags;
121 	/**<
122 	 *  Bit-fields that define how the node is used in the expansion.
123 	 * (see MLX5_EXPANSION_NODE_* definitions).
124 	 */
125 };
126 
127 /* Optional expand field. The expansion alg will not go deeper. */
128 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
129 
130 /* The node is not added implicitly as expansion to the flow pattern.
131  * If the node type does not match the flow pattern item type, the
132  * expansion alg will go deeper to its next items.
133  * In the current implementation, the list of next nodes indexes can
134  * have up to one node with this flag set and it has to be the last
135  * node index (before the list terminator).
136  */
137 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
138 
139 /** Object returned by mlx5_flow_expand_rss(). */
140 struct mlx5_flow_expand_rss {
141 	uint32_t entries;
142 	/**< Number of entries @p patterns and @p priorities. */
143 	struct {
144 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
145 		uint32_t priority; /**< Priority offset for each expansion. */
146 	} entry[];
147 };
148 
149 static void
150 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
151 
152 static const struct mlx5_flow_expand_node *
153 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
154 		unsigned int item_idx,
155 		const struct mlx5_flow_expand_node graph[],
156 		const struct mlx5_flow_expand_node *node);
157 
158 static bool
159 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
160 {
161 	switch (item->type) {
162 	case RTE_FLOW_ITEM_TYPE_ETH:
163 	case RTE_FLOW_ITEM_TYPE_VLAN:
164 	case RTE_FLOW_ITEM_TYPE_IPV4:
165 	case RTE_FLOW_ITEM_TYPE_IPV6:
166 	case RTE_FLOW_ITEM_TYPE_UDP:
167 	case RTE_FLOW_ITEM_TYPE_TCP:
168 	case RTE_FLOW_ITEM_TYPE_ESP:
169 	case RTE_FLOW_ITEM_TYPE_ICMP:
170 	case RTE_FLOW_ITEM_TYPE_ICMP6:
171 	case RTE_FLOW_ITEM_TYPE_VXLAN:
172 	case RTE_FLOW_ITEM_TYPE_NVGRE:
173 	case RTE_FLOW_ITEM_TYPE_GRE:
174 	case RTE_FLOW_ITEM_TYPE_GENEVE:
175 	case RTE_FLOW_ITEM_TYPE_MPLS:
176 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
177 	case RTE_FLOW_ITEM_TYPE_GRE_KEY:
178 	case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
179 	case RTE_FLOW_ITEM_TYPE_GTP:
180 		return true;
181 	default:
182 		break;
183 	}
184 	return false;
185 }
186 
187 /**
188  * Network Service Header (NSH) and its next protocol values
189  * are described in RFC-8393.
190  */
191 static enum rte_flow_item_type
192 mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
193 {
194 	enum rte_flow_item_type type;
195 
196 	switch (proto_mask & proto_spec) {
197 	case 0:
198 		type = RTE_FLOW_ITEM_TYPE_VOID;
199 		break;
200 	case RTE_VXLAN_GPE_TYPE_IPV4:
201 		type = RTE_FLOW_ITEM_TYPE_IPV4;
202 		break;
203 	case RTE_VXLAN_GPE_TYPE_IPV6:
204 		type = RTE_VXLAN_GPE_TYPE_IPV6;
205 		break;
206 	case RTE_VXLAN_GPE_TYPE_ETH:
207 		type = RTE_FLOW_ITEM_TYPE_ETH;
208 		break;
209 	default:
210 		type = RTE_FLOW_ITEM_TYPE_END;
211 	}
212 	return type;
213 }
214 
215 static enum rte_flow_item_type
216 mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
217 {
218 	enum rte_flow_item_type type;
219 
220 	switch (proto_mask & proto_spec) {
221 	case 0:
222 		type = RTE_FLOW_ITEM_TYPE_VOID;
223 		break;
224 	case IPPROTO_UDP:
225 		type = RTE_FLOW_ITEM_TYPE_UDP;
226 		break;
227 	case IPPROTO_TCP:
228 		type = RTE_FLOW_ITEM_TYPE_TCP;
229 		break;
230 	case IPPROTO_IPIP:
231 		type = RTE_FLOW_ITEM_TYPE_IPV4;
232 		break;
233 	case IPPROTO_IPV6:
234 		type = RTE_FLOW_ITEM_TYPE_IPV6;
235 		break;
236 	case IPPROTO_ESP:
237 		type = RTE_FLOW_ITEM_TYPE_ESP;
238 		break;
239 	default:
240 		type = RTE_FLOW_ITEM_TYPE_END;
241 	}
242 	return type;
243 }
244 
245 static enum rte_flow_item_type
246 mlx5_ethertype_to_item_type(rte_be16_t type_spec,
247 			    rte_be16_t type_mask, bool is_tunnel)
248 {
249 	enum rte_flow_item_type type;
250 
251 	switch (rte_be_to_cpu_16(type_spec & type_mask)) {
252 	case 0:
253 		type = RTE_FLOW_ITEM_TYPE_VOID;
254 		break;
255 	case RTE_ETHER_TYPE_TEB:
256 		type = is_tunnel ?
257 		       RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END;
258 		break;
259 	case RTE_ETHER_TYPE_VLAN:
260 		type = !is_tunnel ?
261 		       RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END;
262 		break;
263 	case RTE_ETHER_TYPE_IPV4:
264 		type = RTE_FLOW_ITEM_TYPE_IPV4;
265 		break;
266 	case RTE_ETHER_TYPE_IPV6:
267 		type = RTE_FLOW_ITEM_TYPE_IPV6;
268 		break;
269 	default:
270 		type = RTE_FLOW_ITEM_TYPE_END;
271 	}
272 	return type;
273 }
274 
275 static enum rte_flow_item_type
276 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
277 {
278 #define MLX5_XSET_ITEM_MASK_SPEC(type, fld)                              \
279 	do {                                                             \
280 		const void *m = item->mask;                              \
281 		const void *s = item->spec;                              \
282 		mask = m ?                                               \
283 			((const struct rte_flow_item_##type *)m)->fld :  \
284 			rte_flow_item_##type##_mask.fld;                 \
285 		spec = ((const struct rte_flow_item_##type *)s)->fld;    \
286 	} while (0)
287 
288 	enum rte_flow_item_type ret;
289 	uint16_t spec, mask;
290 
291 	if (item == NULL || item->spec == NULL)
292 		return RTE_FLOW_ITEM_TYPE_VOID;
293 	switch (item->type) {
294 	case RTE_FLOW_ITEM_TYPE_ETH:
295 		MLX5_XSET_ITEM_MASK_SPEC(eth, type);
296 		if (!mask)
297 			return RTE_FLOW_ITEM_TYPE_VOID;
298 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
299 		break;
300 	case RTE_FLOW_ITEM_TYPE_VLAN:
301 		MLX5_XSET_ITEM_MASK_SPEC(vlan, inner_type);
302 		if (!mask)
303 			return RTE_FLOW_ITEM_TYPE_VOID;
304 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
305 		break;
306 	case RTE_FLOW_ITEM_TYPE_IPV4:
307 		MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id);
308 		if (!mask)
309 			return RTE_FLOW_ITEM_TYPE_VOID;
310 		ret = mlx5_inet_proto_to_item_type(spec, mask);
311 		break;
312 	case RTE_FLOW_ITEM_TYPE_IPV6:
313 		MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto);
314 		if (!mask)
315 			return RTE_FLOW_ITEM_TYPE_VOID;
316 		ret = mlx5_inet_proto_to_item_type(spec, mask);
317 		break;
318 	case RTE_FLOW_ITEM_TYPE_GENEVE:
319 		MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol);
320 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
321 		break;
322 	case RTE_FLOW_ITEM_TYPE_GRE:
323 		MLX5_XSET_ITEM_MASK_SPEC(gre, protocol);
324 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
325 		break;
326 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
327 		MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, protocol);
328 		ret = mlx5_nsh_proto_to_item_type(spec, mask);
329 		break;
330 	default:
331 		ret = RTE_FLOW_ITEM_TYPE_VOID;
332 		break;
333 	}
334 	return ret;
335 #undef MLX5_XSET_ITEM_MASK_SPEC
336 }
337 
338 static const int *
339 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
340 		const int *next_node)
341 {
342 	const struct mlx5_flow_expand_node *node = NULL;
343 	const int *next = next_node;
344 
345 	while (next && *next) {
346 		/*
347 		 * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
348 		 * flag set, because they were not found in the flow pattern.
349 		 */
350 		node = &graph[*next];
351 		if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
352 			break;
353 		next = node->next;
354 	}
355 	return next;
356 }
357 
358 #define MLX5_RSS_EXP_ELT_N 16
359 
360 /**
361  * Expand RSS flows into several possible flows according to the RSS hash
362  * fields requested and the driver capabilities.
363  *
364  * @param[out] buf
365  *   Buffer to store the result expansion.
366  * @param[in] size
367  *   Buffer size in bytes. If 0, @p buf can be NULL.
368  * @param[in] pattern
369  *   User flow pattern.
370  * @param[in] types
371  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
372  * @param[in] graph
373  *   Input graph to expand @p pattern according to @p types.
374  * @param[in] graph_root_index
375  *   Index of root node in @p graph, typically 0.
376  *
377  * @return
378  *   A positive value representing the size of @p buf in bytes regardless of
379  *   @p size on success, a negative errno value otherwise and rte_errno is
380  *   set, the following errors are defined:
381  *
382  *   -E2BIG: graph-depth @p graph is too deep.
383  *   -EINVAL: @p size has not enough space for expanded pattern.
384  */
385 static int
386 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
387 		     const struct rte_flow_item *pattern, uint64_t types,
388 		     const struct mlx5_flow_expand_node graph[],
389 		     int graph_root_index)
390 {
391 	const struct rte_flow_item *item;
392 	const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
393 	const int *next_node;
394 	const int *stack[MLX5_RSS_EXP_ELT_N];
395 	int stack_pos = 0;
396 	struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
397 	unsigned int i, item_idx, last_expand_item_idx = 0;
398 	size_t lsize;
399 	size_t user_pattern_size = 0;
400 	void *addr = NULL;
401 	const struct mlx5_flow_expand_node *next = NULL;
402 	struct rte_flow_item missed_item;
403 	int missed = 0;
404 	int elt = 0;
405 	const struct rte_flow_item *last_expand_item = NULL;
406 
407 	memset(&missed_item, 0, sizeof(missed_item));
408 	lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
409 		MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
410 	if (lsize > size)
411 		return -EINVAL;
412 	buf->entry[0].priority = 0;
413 	buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
414 	buf->entries = 0;
415 	addr = buf->entry[0].pattern;
416 	for (item = pattern, item_idx = 0;
417 			item->type != RTE_FLOW_ITEM_TYPE_END;
418 			item++, item_idx++) {
419 		if (!mlx5_flow_is_rss_expandable_item(item)) {
420 			user_pattern_size += sizeof(*item);
421 			continue;
422 		}
423 		last_expand_item = item;
424 		last_expand_item_idx = item_idx;
425 		i = 0;
426 		while (node->next && node->next[i]) {
427 			next = &graph[node->next[i]];
428 			if (next->type == item->type)
429 				break;
430 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
431 				node = next;
432 				i = 0;
433 			} else {
434 				++i;
435 			}
436 		}
437 		if (next)
438 			node = next;
439 		user_pattern_size += sizeof(*item);
440 	}
441 	user_pattern_size += sizeof(*item); /* Handle END item. */
442 	lsize += user_pattern_size;
443 	if (lsize > size)
444 		return -EINVAL;
445 	/* Copy the user pattern in the first entry of the buffer. */
446 	rte_memcpy(addr, pattern, user_pattern_size);
447 	addr = (void *)(((uintptr_t)addr) + user_pattern_size);
448 	buf->entries = 1;
449 	/* Start expanding. */
450 	memset(flow_items, 0, sizeof(flow_items));
451 	user_pattern_size -= sizeof(*item);
452 	/*
453 	 * Check if the last valid item has spec set, need complete pattern,
454 	 * and the pattern can be used for expansion.
455 	 */
456 	missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
457 	if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
458 		/* Item type END indicates expansion is not required. */
459 		return lsize;
460 	}
461 	if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
462 		next = NULL;
463 		missed = 1;
464 		i = 0;
465 		while (node->next && node->next[i]) {
466 			next = &graph[node->next[i]];
467 			if (next->type == missed_item.type) {
468 				flow_items[0].type = missed_item.type;
469 				flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
470 				break;
471 			}
472 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
473 				node = next;
474 				i = 0;
475 			} else {
476 				++i;
477 			}
478 			next = NULL;
479 		}
480 	}
481 	if (next && missed) {
482 		elt = 2; /* missed item + item end. */
483 		node = next;
484 		lsize += elt * sizeof(*item) + user_pattern_size;
485 		if (lsize > size)
486 			return -EINVAL;
487 		if (node->rss_types & types) {
488 			buf->entry[buf->entries].priority = 1;
489 			buf->entry[buf->entries].pattern = addr;
490 			buf->entries++;
491 			rte_memcpy(addr, buf->entry[0].pattern,
492 				   user_pattern_size);
493 			addr = (void *)(((uintptr_t)addr) + user_pattern_size);
494 			rte_memcpy(addr, flow_items, elt * sizeof(*item));
495 			addr = (void *)(((uintptr_t)addr) +
496 					elt * sizeof(*item));
497 		}
498 	} else if (last_expand_item != NULL) {
499 		node = mlx5_flow_expand_rss_adjust_node(pattern,
500 				last_expand_item_idx, graph, node);
501 	}
502 	memset(flow_items, 0, sizeof(flow_items));
503 	next_node = mlx5_flow_expand_rss_skip_explicit(graph,
504 			node->next);
505 	stack[stack_pos] = next_node;
506 	node = next_node ? &graph[*next_node] : NULL;
507 	while (node) {
508 		flow_items[stack_pos].type = node->type;
509 		if (node->rss_types & types) {
510 			size_t n;
511 			/*
512 			 * compute the number of items to copy from the
513 			 * expansion and copy it.
514 			 * When the stack_pos is 0, there are 1 element in it,
515 			 * plus the addition END item.
516 			 */
517 			elt = stack_pos + 2;
518 			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
519 			lsize += elt * sizeof(*item) + user_pattern_size;
520 			if (lsize > size)
521 				return -EINVAL;
522 			n = elt * sizeof(*item);
523 			buf->entry[buf->entries].priority =
524 				stack_pos + 1 + missed;
525 			buf->entry[buf->entries].pattern = addr;
526 			buf->entries++;
527 			rte_memcpy(addr, buf->entry[0].pattern,
528 				   user_pattern_size);
529 			addr = (void *)(((uintptr_t)addr) +
530 					user_pattern_size);
531 			rte_memcpy(addr, &missed_item,
532 				   missed * sizeof(*item));
533 			addr = (void *)(((uintptr_t)addr) +
534 				missed * sizeof(*item));
535 			rte_memcpy(addr, flow_items, n);
536 			addr = (void *)(((uintptr_t)addr) + n);
537 		}
538 		/* Go deeper. */
539 		if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
540 				node->next) {
541 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
542 					node->next);
543 			if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
544 				rte_errno = E2BIG;
545 				return -rte_errno;
546 			}
547 			stack[stack_pos] = next_node;
548 		} else if (*(next_node + 1)) {
549 			/* Follow up with the next possibility. */
550 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
551 					++next_node);
552 		} else if (!stack_pos) {
553 			/*
554 			 * Completing the traverse over the different paths.
555 			 * The next_node is advanced to the terminator.
556 			 */
557 			++next_node;
558 		} else {
559 			/* Move to the next path. */
560 			while (stack_pos) {
561 				next_node = stack[--stack_pos];
562 				next_node++;
563 				if (*next_node)
564 					break;
565 			}
566 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
567 					next_node);
568 			stack[stack_pos] = next_node;
569 		}
570 		node = next_node && *next_node ? &graph[*next_node] : NULL;
571 	};
572 	return lsize;
573 }
574 
575 enum mlx5_expansion {
576 	MLX5_EXPANSION_ROOT,
577 	MLX5_EXPANSION_ROOT_OUTER,
578 	MLX5_EXPANSION_OUTER_ETH,
579 	MLX5_EXPANSION_OUTER_VLAN,
580 	MLX5_EXPANSION_OUTER_IPV4,
581 	MLX5_EXPANSION_OUTER_IPV4_UDP,
582 	MLX5_EXPANSION_OUTER_IPV4_TCP,
583 	MLX5_EXPANSION_OUTER_IPV4_ESP,
584 	MLX5_EXPANSION_OUTER_IPV4_ICMP,
585 	MLX5_EXPANSION_OUTER_IPV6,
586 	MLX5_EXPANSION_OUTER_IPV6_UDP,
587 	MLX5_EXPANSION_OUTER_IPV6_TCP,
588 	MLX5_EXPANSION_OUTER_IPV6_ESP,
589 	MLX5_EXPANSION_OUTER_IPV6_ICMP6,
590 	MLX5_EXPANSION_VXLAN,
591 	MLX5_EXPANSION_STD_VXLAN,
592 	MLX5_EXPANSION_L3_VXLAN,
593 	MLX5_EXPANSION_VXLAN_GPE,
594 	MLX5_EXPANSION_GRE,
595 	MLX5_EXPANSION_NVGRE,
596 	MLX5_EXPANSION_GRE_KEY,
597 	MLX5_EXPANSION_MPLS,
598 	MLX5_EXPANSION_ETH,
599 	MLX5_EXPANSION_VLAN,
600 	MLX5_EXPANSION_IPV4,
601 	MLX5_EXPANSION_IPV4_UDP,
602 	MLX5_EXPANSION_IPV4_TCP,
603 	MLX5_EXPANSION_IPV4_ESP,
604 	MLX5_EXPANSION_IPV4_ICMP,
605 	MLX5_EXPANSION_IPV6,
606 	MLX5_EXPANSION_IPV6_UDP,
607 	MLX5_EXPANSION_IPV6_TCP,
608 	MLX5_EXPANSION_IPV6_ESP,
609 	MLX5_EXPANSION_IPV6_ICMP6,
610 	MLX5_EXPANSION_IPV6_FRAG_EXT,
611 	MLX5_EXPANSION_GTP,
612 	MLX5_EXPANSION_GENEVE,
613 };
614 
615 /** Supported expansion of items. */
616 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
617 	[MLX5_EXPANSION_ROOT] = {
618 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
619 						  MLX5_EXPANSION_IPV4,
620 						  MLX5_EXPANSION_IPV6),
621 		.type = RTE_FLOW_ITEM_TYPE_END,
622 	},
623 	[MLX5_EXPANSION_ROOT_OUTER] = {
624 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
625 						  MLX5_EXPANSION_OUTER_IPV4,
626 						  MLX5_EXPANSION_OUTER_IPV6),
627 		.type = RTE_FLOW_ITEM_TYPE_END,
628 	},
629 	[MLX5_EXPANSION_OUTER_ETH] = {
630 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
631 		.type = RTE_FLOW_ITEM_TYPE_ETH,
632 		.rss_types = 0,
633 	},
634 	[MLX5_EXPANSION_OUTER_VLAN] = {
635 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
636 						  MLX5_EXPANSION_OUTER_IPV6),
637 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
638 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
639 	},
640 	[MLX5_EXPANSION_OUTER_IPV4] = {
641 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
642 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
643 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
644 			 MLX5_EXPANSION_OUTER_IPV4_ESP,
645 			 MLX5_EXPANSION_OUTER_IPV4_ICMP,
646 			 MLX5_EXPANSION_GRE,
647 			 MLX5_EXPANSION_NVGRE,
648 			 MLX5_EXPANSION_IPV4,
649 			 MLX5_EXPANSION_IPV6),
650 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
651 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
652 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
653 	},
654 	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
655 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
656 						  MLX5_EXPANSION_VXLAN_GPE,
657 						  MLX5_EXPANSION_MPLS,
658 						  MLX5_EXPANSION_GENEVE,
659 						  MLX5_EXPANSION_GTP),
660 		.type = RTE_FLOW_ITEM_TYPE_UDP,
661 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
662 	},
663 	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
664 		.type = RTE_FLOW_ITEM_TYPE_TCP,
665 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
666 	},
667 	[MLX5_EXPANSION_OUTER_IPV4_ESP] = {
668 		.type = RTE_FLOW_ITEM_TYPE_ESP,
669 		.rss_types = RTE_ETH_RSS_ESP,
670 	},
671 	[MLX5_EXPANSION_OUTER_IPV4_ICMP] = {
672 		.type = RTE_FLOW_ITEM_TYPE_ICMP,
673 	},
674 	[MLX5_EXPANSION_OUTER_IPV6] = {
675 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
676 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
677 			 MLX5_EXPANSION_OUTER_IPV6_TCP,
678 			 MLX5_EXPANSION_OUTER_IPV6_ESP,
679 			 MLX5_EXPANSION_OUTER_IPV6_ICMP6,
680 			 MLX5_EXPANSION_IPV4,
681 			 MLX5_EXPANSION_IPV6,
682 			 MLX5_EXPANSION_GRE,
683 			 MLX5_EXPANSION_NVGRE),
684 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
685 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
686 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
687 	},
688 	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
689 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
690 						  MLX5_EXPANSION_VXLAN_GPE,
691 						  MLX5_EXPANSION_MPLS,
692 						  MLX5_EXPANSION_GENEVE,
693 						  MLX5_EXPANSION_GTP),
694 		.type = RTE_FLOW_ITEM_TYPE_UDP,
695 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
696 	},
697 	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
698 		.type = RTE_FLOW_ITEM_TYPE_TCP,
699 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
700 	},
701 	[MLX5_EXPANSION_OUTER_IPV6_ESP] = {
702 		.type = RTE_FLOW_ITEM_TYPE_ESP,
703 		.rss_types = RTE_ETH_RSS_ESP,
704 	},
705 	[MLX5_EXPANSION_OUTER_IPV6_ICMP6] = {
706 		.type = RTE_FLOW_ITEM_TYPE_ICMP6,
707 	},
708 	[MLX5_EXPANSION_VXLAN] = {
709 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
710 						  MLX5_EXPANSION_IPV4,
711 						  MLX5_EXPANSION_IPV6),
712 		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
713 	},
714 	[MLX5_EXPANSION_STD_VXLAN] = {
715 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
716 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
717 	},
718 	[MLX5_EXPANSION_L3_VXLAN] = {
719 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
720 					MLX5_EXPANSION_IPV6),
721 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
722 	},
723 	[MLX5_EXPANSION_VXLAN_GPE] = {
724 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
725 						  MLX5_EXPANSION_IPV4,
726 						  MLX5_EXPANSION_IPV6),
727 		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
728 	},
729 	[MLX5_EXPANSION_GRE] = {
730 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
731 						  MLX5_EXPANSION_IPV4,
732 						  MLX5_EXPANSION_IPV6,
733 						  MLX5_EXPANSION_GRE_KEY,
734 						  MLX5_EXPANSION_MPLS),
735 		.type = RTE_FLOW_ITEM_TYPE_GRE,
736 	},
737 	[MLX5_EXPANSION_GRE_KEY] = {
738 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
739 						  MLX5_EXPANSION_IPV6,
740 						  MLX5_EXPANSION_MPLS),
741 		.type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
742 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
743 	},
744 	[MLX5_EXPANSION_NVGRE] = {
745 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
746 		.type = RTE_FLOW_ITEM_TYPE_NVGRE,
747 	},
748 	[MLX5_EXPANSION_MPLS] = {
749 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
750 						  MLX5_EXPANSION_IPV6,
751 						  MLX5_EXPANSION_ETH),
752 		.type = RTE_FLOW_ITEM_TYPE_MPLS,
753 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
754 	},
755 	[MLX5_EXPANSION_ETH] = {
756 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
757 		.type = RTE_FLOW_ITEM_TYPE_ETH,
758 	},
759 	[MLX5_EXPANSION_VLAN] = {
760 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
761 						  MLX5_EXPANSION_IPV6),
762 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
763 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
764 	},
765 	[MLX5_EXPANSION_IPV4] = {
766 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
767 						  MLX5_EXPANSION_IPV4_TCP,
768 						  MLX5_EXPANSION_IPV4_ESP,
769 						  MLX5_EXPANSION_IPV4_ICMP),
770 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
771 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
772 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
773 	},
774 	[MLX5_EXPANSION_IPV4_UDP] = {
775 		.type = RTE_FLOW_ITEM_TYPE_UDP,
776 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
777 	},
778 	[MLX5_EXPANSION_IPV4_TCP] = {
779 		.type = RTE_FLOW_ITEM_TYPE_TCP,
780 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
781 	},
782 	[MLX5_EXPANSION_IPV4_ESP] = {
783 		.type = RTE_FLOW_ITEM_TYPE_ESP,
784 		.rss_types = RTE_ETH_RSS_ESP,
785 	},
786 	[MLX5_EXPANSION_IPV4_ICMP] = {
787 		.type = RTE_FLOW_ITEM_TYPE_ICMP,
788 	},
789 	[MLX5_EXPANSION_IPV6] = {
790 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
791 						  MLX5_EXPANSION_IPV6_TCP,
792 						  MLX5_EXPANSION_IPV6_ESP,
793 						  MLX5_EXPANSION_IPV6_ICMP6,
794 						  MLX5_EXPANSION_IPV6_FRAG_EXT),
795 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
796 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
797 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
798 	},
799 	[MLX5_EXPANSION_IPV6_UDP] = {
800 		.type = RTE_FLOW_ITEM_TYPE_UDP,
801 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
802 	},
803 	[MLX5_EXPANSION_IPV6_TCP] = {
804 		.type = RTE_FLOW_ITEM_TYPE_TCP,
805 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
806 	},
807 	[MLX5_EXPANSION_IPV6_ESP] = {
808 		.type = RTE_FLOW_ITEM_TYPE_ESP,
809 		.rss_types = RTE_ETH_RSS_ESP,
810 	},
811 	[MLX5_EXPANSION_IPV6_FRAG_EXT] = {
812 		.type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
813 	},
814 	[MLX5_EXPANSION_IPV6_ICMP6] = {
815 		.type = RTE_FLOW_ITEM_TYPE_ICMP6,
816 	},
817 	[MLX5_EXPANSION_GTP] = {
818 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
819 						  MLX5_EXPANSION_IPV6),
820 		.type = RTE_FLOW_ITEM_TYPE_GTP,
821 	},
822 	[MLX5_EXPANSION_GENEVE] = {
823 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
824 						  MLX5_EXPANSION_IPV4,
825 						  MLX5_EXPANSION_IPV6),
826 		.type = RTE_FLOW_ITEM_TYPE_GENEVE,
827 	},
828 };
829 
830 static struct rte_flow_action_handle *
831 mlx5_action_handle_create(struct rte_eth_dev *dev,
832 			  const struct rte_flow_indir_action_conf *conf,
833 			  const struct rte_flow_action *action,
834 			  struct rte_flow_error *error);
835 static int mlx5_action_handle_destroy
836 				(struct rte_eth_dev *dev,
837 				 struct rte_flow_action_handle *handle,
838 				 struct rte_flow_error *error);
839 static int mlx5_action_handle_update
840 				(struct rte_eth_dev *dev,
841 				 struct rte_flow_action_handle *handle,
842 				 const void *update,
843 				 struct rte_flow_error *error);
844 static int mlx5_action_handle_query
845 				(struct rte_eth_dev *dev,
846 				 const struct rte_flow_action_handle *handle,
847 				 void *data,
848 				 struct rte_flow_error *error);
849 static int
850 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
851 		    struct rte_flow_tunnel *app_tunnel,
852 		    struct rte_flow_action **actions,
853 		    uint32_t *num_of_actions,
854 		    struct rte_flow_error *error);
855 static int
856 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
857 		       struct rte_flow_tunnel *app_tunnel,
858 		       struct rte_flow_item **items,
859 		       uint32_t *num_of_items,
860 		       struct rte_flow_error *error);
861 static int
862 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
863 			      struct rte_flow_item *pmd_items,
864 			      uint32_t num_items, struct rte_flow_error *err);
865 static int
866 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
867 				struct rte_flow_action *pmd_actions,
868 				uint32_t num_actions,
869 				struct rte_flow_error *err);
870 static int
871 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
872 				  struct rte_mbuf *m,
873 				  struct rte_flow_restore_info *info,
874 				  struct rte_flow_error *err);
875 static struct rte_flow_item_flex_handle *
876 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
877 			   const struct rte_flow_item_flex_conf *conf,
878 			   struct rte_flow_error *error);
879 static int
880 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
881 			    const struct rte_flow_item_flex_handle *handle,
882 			    struct rte_flow_error *error);
883 static int
884 mlx5_flow_info_get(struct rte_eth_dev *dev,
885 		   struct rte_flow_port_info *port_info,
886 		   struct rte_flow_queue_info *queue_info,
887 		   struct rte_flow_error *error);
888 static int
889 mlx5_flow_port_configure(struct rte_eth_dev *dev,
890 			 const struct rte_flow_port_attr *port_attr,
891 			 uint16_t nb_queue,
892 			 const struct rte_flow_queue_attr *queue_attr[],
893 			 struct rte_flow_error *err);
894 
895 static struct rte_flow_pattern_template *
896 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
897 		const struct rte_flow_pattern_template_attr *attr,
898 		const struct rte_flow_item items[],
899 		struct rte_flow_error *error);
900 
901 static int
902 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
903 				   struct rte_flow_pattern_template *template,
904 				   struct rte_flow_error *error);
905 static struct rte_flow_actions_template *
906 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
907 			const struct rte_flow_actions_template_attr *attr,
908 			const struct rte_flow_action actions[],
909 			const struct rte_flow_action masks[],
910 			struct rte_flow_error *error);
911 static int
912 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
913 				   struct rte_flow_actions_template *template,
914 				   struct rte_flow_error *error);
915 
916 static struct rte_flow_template_table *
917 mlx5_flow_table_create(struct rte_eth_dev *dev,
918 		       const struct rte_flow_template_table_attr *attr,
919 		       struct rte_flow_pattern_template *item_templates[],
920 		       uint8_t nb_item_templates,
921 		       struct rte_flow_actions_template *action_templates[],
922 		       uint8_t nb_action_templates,
923 		       struct rte_flow_error *error);
924 static int
925 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
926 			struct rte_flow_template_table *table,
927 			struct rte_flow_error *error);
928 static struct rte_flow *
929 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
930 			    uint32_t queue,
931 			    const struct rte_flow_op_attr *attr,
932 			    struct rte_flow_template_table *table,
933 			    const struct rte_flow_item items[],
934 			    uint8_t pattern_template_index,
935 			    const struct rte_flow_action actions[],
936 			    uint8_t action_template_index,
937 			    void *user_data,
938 			    struct rte_flow_error *error);
939 static int
940 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
941 			     uint32_t queue,
942 			     const struct rte_flow_op_attr *attr,
943 			     struct rte_flow *flow,
944 			     void *user_data,
945 			     struct rte_flow_error *error);
946 static int
947 mlx5_flow_pull(struct rte_eth_dev *dev,
948 	       uint32_t queue,
949 	       struct rte_flow_op_result res[],
950 	       uint16_t n_res,
951 	       struct rte_flow_error *error);
952 static int
953 mlx5_flow_push(struct rte_eth_dev *dev,
954 	       uint32_t queue,
955 	       struct rte_flow_error *error);
956 
957 static struct rte_flow_action_handle *
958 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
959 				 const struct rte_flow_op_attr *attr,
960 				 const struct rte_flow_indir_action_conf *conf,
961 				 const struct rte_flow_action *action,
962 				 void *user_data,
963 				 struct rte_flow_error *error);
964 
965 static int
966 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
967 				 const struct rte_flow_op_attr *attr,
968 				 struct rte_flow_action_handle *handle,
969 				 const void *update,
970 				 void *user_data,
971 				 struct rte_flow_error *error);
972 
973 static int
974 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
975 				  const struct rte_flow_op_attr *attr,
976 				  struct rte_flow_action_handle *handle,
977 				  void *user_data,
978 				  struct rte_flow_error *error);
979 
980 static const struct rte_flow_ops mlx5_flow_ops = {
981 	.validate = mlx5_flow_validate,
982 	.create = mlx5_flow_create,
983 	.destroy = mlx5_flow_destroy,
984 	.flush = mlx5_flow_flush,
985 	.isolate = mlx5_flow_isolate,
986 	.query = mlx5_flow_query,
987 	.dev_dump = mlx5_flow_dev_dump,
988 	.get_aged_flows = mlx5_flow_get_aged_flows,
989 	.action_handle_create = mlx5_action_handle_create,
990 	.action_handle_destroy = mlx5_action_handle_destroy,
991 	.action_handle_update = mlx5_action_handle_update,
992 	.action_handle_query = mlx5_action_handle_query,
993 	.tunnel_decap_set = mlx5_flow_tunnel_decap_set,
994 	.tunnel_match = mlx5_flow_tunnel_match,
995 	.tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
996 	.tunnel_item_release = mlx5_flow_tunnel_item_release,
997 	.get_restore_info = mlx5_flow_tunnel_get_restore_info,
998 	.flex_item_create = mlx5_flow_flex_item_create,
999 	.flex_item_release = mlx5_flow_flex_item_release,
1000 	.info_get = mlx5_flow_info_get,
1001 	.pick_transfer_proxy = mlx5_flow_pick_transfer_proxy,
1002 	.configure = mlx5_flow_port_configure,
1003 	.pattern_template_create = mlx5_flow_pattern_template_create,
1004 	.pattern_template_destroy = mlx5_flow_pattern_template_destroy,
1005 	.actions_template_create = mlx5_flow_actions_template_create,
1006 	.actions_template_destroy = mlx5_flow_actions_template_destroy,
1007 	.template_table_create = mlx5_flow_table_create,
1008 	.template_table_destroy = mlx5_flow_table_destroy,
1009 	.async_create = mlx5_flow_async_flow_create,
1010 	.async_destroy = mlx5_flow_async_flow_destroy,
1011 	.pull = mlx5_flow_pull,
1012 	.push = mlx5_flow_push,
1013 	.async_action_handle_create = mlx5_flow_async_action_handle_create,
1014 	.async_action_handle_update = mlx5_flow_async_action_handle_update,
1015 	.async_action_handle_destroy = mlx5_flow_async_action_handle_destroy,
1016 };
1017 
1018 /* Tunnel information. */
1019 struct mlx5_flow_tunnel_info {
1020 	uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
1021 	uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
1022 };
1023 
1024 static struct mlx5_flow_tunnel_info tunnels_info[] = {
1025 	{
1026 		.tunnel = MLX5_FLOW_LAYER_VXLAN,
1027 		.ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
1028 	},
1029 	{
1030 		.tunnel = MLX5_FLOW_LAYER_GENEVE,
1031 		.ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
1032 	},
1033 	{
1034 		.tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
1035 		.ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
1036 	},
1037 	{
1038 		.tunnel = MLX5_FLOW_LAYER_GRE,
1039 		.ptype = RTE_PTYPE_TUNNEL_GRE,
1040 	},
1041 	{
1042 		.tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
1043 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
1044 	},
1045 	{
1046 		.tunnel = MLX5_FLOW_LAYER_MPLS,
1047 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
1048 	},
1049 	{
1050 		.tunnel = MLX5_FLOW_LAYER_NVGRE,
1051 		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
1052 	},
1053 	{
1054 		.tunnel = MLX5_FLOW_LAYER_IPIP,
1055 		.ptype = RTE_PTYPE_TUNNEL_IP,
1056 	},
1057 	{
1058 		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
1059 		.ptype = RTE_PTYPE_TUNNEL_IP,
1060 	},
1061 	{
1062 		.tunnel = MLX5_FLOW_LAYER_GTP,
1063 		.ptype = RTE_PTYPE_TUNNEL_GTPU,
1064 	},
1065 };
1066 
1067 
1068 
1069 /**
1070  * Translate tag ID to register.
1071  *
1072  * @param[in] dev
1073  *   Pointer to the Ethernet device structure.
1074  * @param[in] feature
1075  *   The feature that request the register.
1076  * @param[in] id
1077  *   The request register ID.
1078  * @param[out] error
1079  *   Error description in case of any.
1080  *
1081  * @return
1082  *   The request register on success, a negative errno
1083  *   value otherwise and rte_errno is set.
1084  */
1085 int
1086 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
1087 		     enum mlx5_feature_name feature,
1088 		     uint32_t id,
1089 		     struct rte_flow_error *error)
1090 {
1091 	struct mlx5_priv *priv = dev->data->dev_private;
1092 	struct mlx5_sh_config *config = &priv->sh->config;
1093 	enum modify_reg start_reg;
1094 	bool skip_mtr_reg = false;
1095 
1096 	switch (feature) {
1097 	case MLX5_HAIRPIN_RX:
1098 		return REG_B;
1099 	case MLX5_HAIRPIN_TX:
1100 		return REG_A;
1101 	case MLX5_METADATA_RX:
1102 		switch (config->dv_xmeta_en) {
1103 		case MLX5_XMETA_MODE_LEGACY:
1104 			return REG_B;
1105 		case MLX5_XMETA_MODE_META16:
1106 			return REG_C_0;
1107 		case MLX5_XMETA_MODE_META32:
1108 			return REG_C_1;
1109 		case MLX5_XMETA_MODE_META32_HWS:
1110 			return REG_C_1;
1111 		}
1112 		break;
1113 	case MLX5_METADATA_TX:
1114 		return REG_A;
1115 	case MLX5_METADATA_FDB:
1116 		switch (config->dv_xmeta_en) {
1117 		case MLX5_XMETA_MODE_LEGACY:
1118 			return REG_NON;
1119 		case MLX5_XMETA_MODE_META16:
1120 			return REG_C_0;
1121 		case MLX5_XMETA_MODE_META32:
1122 			return REG_C_1;
1123 		case MLX5_XMETA_MODE_META32_HWS:
1124 			return REG_C_1;
1125 		}
1126 		break;
1127 	case MLX5_FLOW_MARK:
1128 		switch (config->dv_xmeta_en) {
1129 		case MLX5_XMETA_MODE_LEGACY:
1130 		case MLX5_XMETA_MODE_META32_HWS:
1131 			return REG_NON;
1132 		case MLX5_XMETA_MODE_META16:
1133 			return REG_C_1;
1134 		case MLX5_XMETA_MODE_META32:
1135 			return REG_C_0;
1136 		}
1137 		break;
1138 	case MLX5_MTR_ID:
1139 		/*
1140 		 * If meter color and meter id share one register, flow match
1141 		 * should use the meter color register for match.
1142 		 */
1143 		if (priv->mtr_reg_share)
1144 			return priv->mtr_color_reg;
1145 		else
1146 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1147 			       REG_C_3;
1148 	case MLX5_MTR_COLOR:
1149 	case MLX5_ASO_FLOW_HIT:
1150 	case MLX5_ASO_CONNTRACK:
1151 	case MLX5_SAMPLE_ID:
1152 		/* All features use the same REG_C. */
1153 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
1154 		return priv->mtr_color_reg;
1155 	case MLX5_COPY_MARK:
1156 		/*
1157 		 * Metadata COPY_MARK register using is in meter suffix sub
1158 		 * flow while with meter. It's safe to share the same register.
1159 		 */
1160 		return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
1161 	case MLX5_APP_TAG:
1162 		/*
1163 		 * If meter is enable, it will engage the register for color
1164 		 * match and flow match. If meter color match is not using the
1165 		 * REG_C_2, need to skip the REG_C_x be used by meter color
1166 		 * match.
1167 		 * If meter is disable, free to use all available registers.
1168 		 */
1169 		start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1170 			    (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
1171 		skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
1172 		if (id > (uint32_t)(REG_C_7 - start_reg))
1173 			return rte_flow_error_set(error, EINVAL,
1174 						  RTE_FLOW_ERROR_TYPE_ITEM,
1175 						  NULL, "invalid tag id");
1176 		if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
1177 			return rte_flow_error_set(error, ENOTSUP,
1178 						  RTE_FLOW_ERROR_TYPE_ITEM,
1179 						  NULL, "unsupported tag id");
1180 		/*
1181 		 * This case means meter is using the REG_C_x great than 2.
1182 		 * Take care not to conflict with meter color REG_C_x.
1183 		 * If the available index REG_C_y >= REG_C_x, skip the
1184 		 * color register.
1185 		 */
1186 		if (skip_mtr_reg && priv->sh->flow_mreg_c
1187 		    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
1188 			if (id >= (uint32_t)(REG_C_7 - start_reg))
1189 				return rte_flow_error_set(error, EINVAL,
1190 						       RTE_FLOW_ERROR_TYPE_ITEM,
1191 							NULL, "invalid tag id");
1192 			if (priv->sh->flow_mreg_c
1193 			    [id + 1 + start_reg - REG_C_0] != REG_NON)
1194 				return priv->sh->flow_mreg_c
1195 					       [id + 1 + start_reg - REG_C_0];
1196 			return rte_flow_error_set(error, ENOTSUP,
1197 						  RTE_FLOW_ERROR_TYPE_ITEM,
1198 						  NULL, "unsupported tag id");
1199 		}
1200 		return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
1201 	}
1202 	MLX5_ASSERT(false);
1203 	return rte_flow_error_set(error, EINVAL,
1204 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1205 				  NULL, "invalid feature name");
1206 }
1207 
1208 /**
1209  * Check extensive flow metadata register support.
1210  *
1211  * @param dev
1212  *   Pointer to rte_eth_dev structure.
1213  *
1214  * @return
1215  *   True if device supports extensive flow metadata register, otherwise false.
1216  */
1217 bool
1218 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
1219 {
1220 	struct mlx5_priv *priv = dev->data->dev_private;
1221 
1222 	/*
1223 	 * Having available reg_c can be regarded inclusively as supporting
1224 	 * extensive flow metadata register, which could mean,
1225 	 * - metadata register copy action by modify header.
1226 	 * - 16 modify header actions is supported.
1227 	 * - reg_c's are preserved across different domain (FDB and NIC) on
1228 	 *   packet loopback by flow lookup miss.
1229 	 */
1230 	return priv->sh->flow_mreg_c[2] != REG_NON;
1231 }
1232 
1233 /**
1234  * Get the lowest priority.
1235  *
1236  * @param[in] dev
1237  *   Pointer to the Ethernet device structure.
1238  * @param[in] attributes
1239  *   Pointer to device flow rule attributes.
1240  *
1241  * @return
1242  *   The value of lowest priority of flow.
1243  */
1244 uint32_t
1245 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
1246 			  const struct rte_flow_attr *attr)
1247 {
1248 	struct mlx5_priv *priv = dev->data->dev_private;
1249 
1250 	if (!attr->group && !(attr->transfer && priv->fdb_def_rule))
1251 		return priv->sh->flow_max_priority - 2;
1252 	return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1253 }
1254 
1255 /**
1256  * Calculate matcher priority of the flow.
1257  *
1258  * @param[in] dev
1259  *   Pointer to the Ethernet device structure.
1260  * @param[in] attr
1261  *   Pointer to device flow rule attributes.
1262  * @param[in] subpriority
1263  *   The priority based on the items.
1264  * @param[in] external
1265  *   Flow is user flow.
1266  * @return
1267  *   The matcher priority of the flow.
1268  */
1269 uint16_t
1270 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1271 			  const struct rte_flow_attr *attr,
1272 			  uint32_t subpriority, bool external)
1273 {
1274 	uint16_t priority = (uint16_t)attr->priority;
1275 	struct mlx5_priv *priv = dev->data->dev_private;
1276 
1277 	/* NIC root rules */
1278 	if (!attr->group && !attr->transfer) {
1279 		if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1280 			priority = priv->sh->flow_max_priority - 1;
1281 		return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1282 	/* FDB root rules */
1283 	} else if (attr->transfer && (!external || !priv->fdb_def_rule) &&
1284 		   attr->group == 0 &&
1285 		   attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1286 		return (priv->sh->flow_max_priority - 1) * 3;
1287 	}
1288 	if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1289 		priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1290 	return priority * 3 + subpriority;
1291 }
1292 
1293 /**
1294  * Verify the @p item specifications (spec, last, mask) are compatible with the
1295  * NIC capabilities.
1296  *
1297  * @param[in] item
1298  *   Item specification.
1299  * @param[in] mask
1300  *   @p item->mask or flow default bit-masks.
1301  * @param[in] nic_mask
1302  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1303  * @param[in] size
1304  *   Bit-masks size in bytes.
1305  * @param[in] range_accepted
1306  *   True if range of values is accepted for specific fields, false otherwise.
1307  * @param[out] error
1308  *   Pointer to error structure.
1309  *
1310  * @return
1311  *   0 on success, a negative errno value otherwise and rte_errno is set.
1312  */
1313 int
1314 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1315 			  const uint8_t *mask,
1316 			  const uint8_t *nic_mask,
1317 			  unsigned int size,
1318 			  bool range_accepted,
1319 			  struct rte_flow_error *error)
1320 {
1321 	unsigned int i;
1322 
1323 	MLX5_ASSERT(nic_mask);
1324 	for (i = 0; i < size; ++i)
1325 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
1326 			return rte_flow_error_set(error, ENOTSUP,
1327 						  RTE_FLOW_ERROR_TYPE_ITEM,
1328 						  item,
1329 						  "mask enables non supported"
1330 						  " bits");
1331 	if (!item->spec && (item->mask || item->last))
1332 		return rte_flow_error_set(error, EINVAL,
1333 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1334 					  "mask/last without a spec is not"
1335 					  " supported");
1336 	if (item->spec && item->last && !range_accepted) {
1337 		uint8_t spec[size];
1338 		uint8_t last[size];
1339 		unsigned int i;
1340 		int ret;
1341 
1342 		for (i = 0; i < size; ++i) {
1343 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1344 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1345 		}
1346 		ret = memcmp(spec, last, size);
1347 		if (ret != 0)
1348 			return rte_flow_error_set(error, EINVAL,
1349 						  RTE_FLOW_ERROR_TYPE_ITEM,
1350 						  item,
1351 						  "range is not valid");
1352 	}
1353 	return 0;
1354 }
1355 
1356 /**
1357  * Adjust the hash fields according to the @p flow information.
1358  *
1359  * @param[in] dev_flow.
1360  *   Pointer to the mlx5_flow.
1361  * @param[in] tunnel
1362  *   1 when the hash field is for a tunnel item.
1363  * @param[in] layer_types
1364  *   RTE_ETH_RSS_* types.
1365  * @param[in] hash_fields
1366  *   Item hash fields.
1367  *
1368  * @return
1369  *   The hash fields that should be used.
1370  */
1371 uint64_t
1372 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1373 			    int tunnel __rte_unused, uint64_t layer_types,
1374 			    uint64_t hash_fields)
1375 {
1376 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1377 	int rss_request_inner = rss_desc->level >= 2;
1378 
1379 	/* Check RSS hash level for tunnel. */
1380 	if (tunnel && rss_request_inner)
1381 		hash_fields |= IBV_RX_HASH_INNER;
1382 	else if (tunnel || rss_request_inner)
1383 		return 0;
1384 #endif
1385 	/* Check if requested layer matches RSS hash fields. */
1386 	if (!(rss_desc->types & layer_types))
1387 		return 0;
1388 	return hash_fields;
1389 }
1390 
1391 /**
1392  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1393  * if several tunnel rules are used on this queue, the tunnel ptype will be
1394  * cleared.
1395  *
1396  * @param rxq_ctrl
1397  *   Rx queue to update.
1398  */
1399 static void
1400 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1401 {
1402 	unsigned int i;
1403 	uint32_t tunnel_ptype = 0;
1404 
1405 	/* Look up for the ptype to use. */
1406 	for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1407 		if (!rxq_ctrl->flow_tunnels_n[i])
1408 			continue;
1409 		if (!tunnel_ptype) {
1410 			tunnel_ptype = tunnels_info[i].ptype;
1411 		} else {
1412 			tunnel_ptype = 0;
1413 			break;
1414 		}
1415 	}
1416 	rxq_ctrl->rxq.tunnel = tunnel_ptype;
1417 }
1418 
1419 /**
1420  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device
1421  * flow.
1422  *
1423  * @param[in] dev
1424  *   Pointer to the Ethernet device structure.
1425  * @param[in] dev_handle
1426  *   Pointer to device flow handle structure.
1427  */
1428 void
1429 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1430 		       struct mlx5_flow_handle *dev_handle)
1431 {
1432 	struct mlx5_priv *priv = dev->data->dev_private;
1433 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1434 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1435 	unsigned int i;
1436 
1437 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1438 		struct mlx5_hrxq *hrxq;
1439 
1440 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1441 			      dev_handle->rix_hrxq);
1442 		if (hrxq)
1443 			ind_tbl = hrxq->ind_table;
1444 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1445 		struct mlx5_shared_action_rss *shared_rss;
1446 
1447 		shared_rss = mlx5_ipool_get
1448 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1449 			 dev_handle->rix_srss);
1450 		if (shared_rss)
1451 			ind_tbl = shared_rss->ind_tbl;
1452 	}
1453 	if (!ind_tbl)
1454 		return;
1455 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1456 		int idx = ind_tbl->queues[i];
1457 		struct mlx5_rxq_ctrl *rxq_ctrl;
1458 
1459 		if (mlx5_is_external_rxq(dev, idx))
1460 			continue;
1461 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1462 		MLX5_ASSERT(rxq_ctrl != NULL);
1463 		if (rxq_ctrl == NULL)
1464 			continue;
1465 		/*
1466 		 * To support metadata register copy on Tx loopback,
1467 		 * this must be always enabled (metadata may arive
1468 		 * from other port - not from local flows only.
1469 		 */
1470 		if (tunnel) {
1471 			unsigned int j;
1472 
1473 			/* Increase the counter matching the flow. */
1474 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1475 				if ((tunnels_info[j].tunnel &
1476 				     dev_handle->layers) ==
1477 				    tunnels_info[j].tunnel) {
1478 					rxq_ctrl->flow_tunnels_n[j]++;
1479 					break;
1480 				}
1481 			}
1482 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1483 		}
1484 	}
1485 }
1486 
1487 static void
1488 flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
1489 {
1490 	struct mlx5_priv *priv = dev->data->dev_private;
1491 	struct mlx5_rxq_ctrl *rxq_ctrl;
1492 	uint16_t port_id;
1493 
1494 	if (priv->sh->shared_mark_enabled)
1495 		return;
1496 	if (priv->master || priv->representor) {
1497 		MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
1498 			struct mlx5_priv *opriv =
1499 				rte_eth_devices[port_id].data->dev_private;
1500 
1501 			if (!opriv ||
1502 			    opriv->sh != priv->sh ||
1503 			    opriv->domain_id != priv->domain_id ||
1504 			    opriv->mark_enabled)
1505 				continue;
1506 			LIST_FOREACH(rxq_ctrl, &opriv->rxqsctrl, next) {
1507 				rxq_ctrl->rxq.mark = 1;
1508 			}
1509 			opriv->mark_enabled = 1;
1510 		}
1511 	} else {
1512 		LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1513 			rxq_ctrl->rxq.mark = 1;
1514 		}
1515 		priv->mark_enabled = 1;
1516 	}
1517 	priv->sh->shared_mark_enabled = 1;
1518 }
1519 
1520 /**
1521  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1522  *
1523  * @param[in] dev
1524  *   Pointer to the Ethernet device structure.
1525  * @param[in] flow
1526  *   Pointer to flow structure.
1527  */
1528 static void
1529 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1530 {
1531 	struct mlx5_priv *priv = dev->data->dev_private;
1532 	uint32_t handle_idx;
1533 	struct mlx5_flow_handle *dev_handle;
1534 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1535 
1536 	MLX5_ASSERT(wks);
1537 	if (wks->mark)
1538 		flow_rxq_mark_flag_set(dev);
1539 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1540 		       handle_idx, dev_handle, next)
1541 		flow_drv_rxq_flags_set(dev, dev_handle);
1542 }
1543 
1544 /**
1545  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1546  * device flow if no other flow uses it with the same kind of request.
1547  *
1548  * @param dev
1549  *   Pointer to Ethernet device.
1550  * @param[in] dev_handle
1551  *   Pointer to the device flow handle structure.
1552  */
1553 static void
1554 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1555 			struct mlx5_flow_handle *dev_handle)
1556 {
1557 	struct mlx5_priv *priv = dev->data->dev_private;
1558 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1559 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1560 	unsigned int i;
1561 
1562 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1563 		struct mlx5_hrxq *hrxq;
1564 
1565 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1566 			      dev_handle->rix_hrxq);
1567 		if (hrxq)
1568 			ind_tbl = hrxq->ind_table;
1569 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1570 		struct mlx5_shared_action_rss *shared_rss;
1571 
1572 		shared_rss = mlx5_ipool_get
1573 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1574 			 dev_handle->rix_srss);
1575 		if (shared_rss)
1576 			ind_tbl = shared_rss->ind_tbl;
1577 	}
1578 	if (!ind_tbl)
1579 		return;
1580 	MLX5_ASSERT(dev->data->dev_started);
1581 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1582 		int idx = ind_tbl->queues[i];
1583 		struct mlx5_rxq_ctrl *rxq_ctrl;
1584 
1585 		if (mlx5_is_external_rxq(dev, idx))
1586 			continue;
1587 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1588 		MLX5_ASSERT(rxq_ctrl != NULL);
1589 		if (rxq_ctrl == NULL)
1590 			continue;
1591 		if (tunnel) {
1592 			unsigned int j;
1593 
1594 			/* Decrease the counter matching the flow. */
1595 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1596 				if ((tunnels_info[j].tunnel &
1597 				     dev_handle->layers) ==
1598 				    tunnels_info[j].tunnel) {
1599 					rxq_ctrl->flow_tunnels_n[j]--;
1600 					break;
1601 				}
1602 			}
1603 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1604 		}
1605 	}
1606 }
1607 
1608 /**
1609  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1610  * @p flow if no other flow uses it with the same kind of request.
1611  *
1612  * @param dev
1613  *   Pointer to Ethernet device.
1614  * @param[in] flow
1615  *   Pointer to the flow.
1616  */
1617 static void
1618 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1619 {
1620 	struct mlx5_priv *priv = dev->data->dev_private;
1621 	uint32_t handle_idx;
1622 	struct mlx5_flow_handle *dev_handle;
1623 
1624 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1625 		       handle_idx, dev_handle, next)
1626 		flow_drv_rxq_flags_trim(dev, dev_handle);
1627 }
1628 
1629 /**
1630  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1631  *
1632  * @param dev
1633  *   Pointer to Ethernet device.
1634  */
1635 static void
1636 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1637 {
1638 	struct mlx5_priv *priv = dev->data->dev_private;
1639 	unsigned int i;
1640 
1641 	for (i = 0; i != priv->rxqs_n; ++i) {
1642 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1643 		unsigned int j;
1644 
1645 		if (rxq == NULL || rxq->ctrl == NULL)
1646 			continue;
1647 		rxq->ctrl->rxq.mark = 0;
1648 		for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1649 			rxq->ctrl->flow_tunnels_n[j] = 0;
1650 		rxq->ctrl->rxq.tunnel = 0;
1651 	}
1652 	priv->mark_enabled = 0;
1653 	priv->sh->shared_mark_enabled = 0;
1654 }
1655 
1656 /**
1657  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1658  *
1659  * @param[in] dev
1660  *   Pointer to the Ethernet device structure.
1661  */
1662 void
1663 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1664 {
1665 	struct mlx5_priv *priv = dev->data->dev_private;
1666 	unsigned int i;
1667 
1668 	for (i = 0; i != priv->rxqs_n; ++i) {
1669 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1670 		struct mlx5_rxq_data *data;
1671 
1672 		if (rxq == NULL || rxq->ctrl == NULL)
1673 			continue;
1674 		data = &rxq->ctrl->rxq;
1675 		if (!rte_flow_dynf_metadata_avail()) {
1676 			data->dynf_meta = 0;
1677 			data->flow_meta_mask = 0;
1678 			data->flow_meta_offset = -1;
1679 			data->flow_meta_port_mask = 0;
1680 		} else {
1681 			data->dynf_meta = 1;
1682 			data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1683 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1684 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1685 		}
1686 	}
1687 }
1688 
1689 /*
1690  * return a pointer to the desired action in the list of actions.
1691  *
1692  * @param[in] actions
1693  *   The list of actions to search the action in.
1694  * @param[in] action
1695  *   The action to find.
1696  *
1697  * @return
1698  *   Pointer to the action in the list, if found. NULL otherwise.
1699  */
1700 const struct rte_flow_action *
1701 mlx5_flow_find_action(const struct rte_flow_action *actions,
1702 		      enum rte_flow_action_type action)
1703 {
1704 	if (actions == NULL)
1705 		return NULL;
1706 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1707 		if (actions->type == action)
1708 			return actions;
1709 	return NULL;
1710 }
1711 
1712 /*
1713  * Validate the flag action.
1714  *
1715  * @param[in] action_flags
1716  *   Bit-fields that holds the actions detected until now.
1717  * @param[in] attr
1718  *   Attributes of flow that includes this action.
1719  * @param[out] error
1720  *   Pointer to error structure.
1721  *
1722  * @return
1723  *   0 on success, a negative errno value otherwise and rte_errno is set.
1724  */
1725 int
1726 mlx5_flow_validate_action_flag(uint64_t action_flags,
1727 			       const struct rte_flow_attr *attr,
1728 			       struct rte_flow_error *error)
1729 {
1730 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1731 		return rte_flow_error_set(error, EINVAL,
1732 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1733 					  "can't mark and flag in same flow");
1734 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1735 		return rte_flow_error_set(error, EINVAL,
1736 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1737 					  "can't have 2 flag"
1738 					  " actions in same flow");
1739 	if (attr->egress)
1740 		return rte_flow_error_set(error, ENOTSUP,
1741 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1742 					  "flag action not supported for "
1743 					  "egress");
1744 	return 0;
1745 }
1746 
1747 /*
1748  * Validate the mark action.
1749  *
1750  * @param[in] action
1751  *   Pointer to the queue action.
1752  * @param[in] action_flags
1753  *   Bit-fields that holds the actions detected until now.
1754  * @param[in] attr
1755  *   Attributes of flow that includes this action.
1756  * @param[out] error
1757  *   Pointer to error structure.
1758  *
1759  * @return
1760  *   0 on success, a negative errno value otherwise and rte_errno is set.
1761  */
1762 int
1763 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1764 			       uint64_t action_flags,
1765 			       const struct rte_flow_attr *attr,
1766 			       struct rte_flow_error *error)
1767 {
1768 	const struct rte_flow_action_mark *mark = action->conf;
1769 
1770 	if (!mark)
1771 		return rte_flow_error_set(error, EINVAL,
1772 					  RTE_FLOW_ERROR_TYPE_ACTION,
1773 					  action,
1774 					  "configuration cannot be null");
1775 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1776 		return rte_flow_error_set(error, EINVAL,
1777 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1778 					  &mark->id,
1779 					  "mark id must in 0 <= id < "
1780 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1781 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1782 		return rte_flow_error_set(error, EINVAL,
1783 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1784 					  "can't flag and mark in same flow");
1785 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1786 		return rte_flow_error_set(error, EINVAL,
1787 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1788 					  "can't have 2 mark actions in same"
1789 					  " flow");
1790 	if (attr->egress)
1791 		return rte_flow_error_set(error, ENOTSUP,
1792 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1793 					  "mark action not supported for "
1794 					  "egress");
1795 	return 0;
1796 }
1797 
1798 /*
1799  * Validate the drop action.
1800  *
1801  * @param[in] action_flags
1802  *   Bit-fields that holds the actions detected until now.
1803  * @param[in] attr
1804  *   Attributes of flow that includes this action.
1805  * @param[out] error
1806  *   Pointer to error structure.
1807  *
1808  * @return
1809  *   0 on success, a negative errno value otherwise and rte_errno is set.
1810  */
1811 int
1812 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1813 			       const struct rte_flow_attr *attr,
1814 			       struct rte_flow_error *error)
1815 {
1816 	if (attr->egress)
1817 		return rte_flow_error_set(error, ENOTSUP,
1818 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1819 					  "drop action not supported for "
1820 					  "egress");
1821 	return 0;
1822 }
1823 
1824 /*
1825  * Validate the queue action.
1826  *
1827  * @param[in] action
1828  *   Pointer to the queue action.
1829  * @param[in] action_flags
1830  *   Bit-fields that holds the actions detected until now.
1831  * @param[in] dev
1832  *   Pointer to the Ethernet device structure.
1833  * @param[in] attr
1834  *   Attributes of flow that includes this action.
1835  * @param[out] error
1836  *   Pointer to error structure.
1837  *
1838  * @return
1839  *   0 on success, a negative errno value otherwise and rte_errno is set.
1840  */
1841 int
1842 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1843 				uint64_t action_flags,
1844 				struct rte_eth_dev *dev,
1845 				const struct rte_flow_attr *attr,
1846 				struct rte_flow_error *error)
1847 {
1848 	struct mlx5_priv *priv = dev->data->dev_private;
1849 	const struct rte_flow_action_queue *queue = action->conf;
1850 
1851 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1852 		return rte_flow_error_set(error, EINVAL,
1853 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1854 					  "can't have 2 fate actions in"
1855 					  " same flow");
1856 	if (attr->egress)
1857 		return rte_flow_error_set(error, ENOTSUP,
1858 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1859 					  "queue action not supported for egress.");
1860 	if (mlx5_is_external_rxq(dev, queue->index))
1861 		return 0;
1862 	if (!priv->rxqs_n)
1863 		return rte_flow_error_set(error, EINVAL,
1864 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1865 					  NULL, "No Rx queues configured");
1866 	if (queue->index >= priv->rxqs_n)
1867 		return rte_flow_error_set(error, EINVAL,
1868 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1869 					  &queue->index,
1870 					  "queue index out of range");
1871 	if (mlx5_rxq_get(dev, queue->index) == NULL)
1872 		return rte_flow_error_set(error, EINVAL,
1873 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1874 					  &queue->index,
1875 					  "queue is not configured");
1876 	return 0;
1877 }
1878 
1879 /**
1880  * Validate queue numbers for device RSS.
1881  *
1882  * @param[in] dev
1883  *   Configured device.
1884  * @param[in] queues
1885  *   Array of queue numbers.
1886  * @param[in] queues_n
1887  *   Size of the @p queues array.
1888  * @param[out] error
1889  *   On error, filled with a textual error description.
1890  * @param[out] queue_idx
1891  *   On error, filled with an offending queue index in @p queues array.
1892  *
1893  * @return
1894  *   0 on success, a negative errno code on error.
1895  */
1896 static int
1897 mlx5_validate_rss_queues(struct rte_eth_dev *dev,
1898 			 const uint16_t *queues, uint32_t queues_n,
1899 			 const char **error, uint32_t *queue_idx)
1900 {
1901 	const struct mlx5_priv *priv = dev->data->dev_private;
1902 	bool is_hairpin = false;
1903 	bool is_ext_rss = false;
1904 	uint32_t i;
1905 
1906 	for (i = 0; i != queues_n; ++i) {
1907 		struct mlx5_rxq_ctrl *rxq_ctrl;
1908 
1909 		if (mlx5_is_external_rxq(dev, queues[0])) {
1910 			is_ext_rss = true;
1911 			continue;
1912 		}
1913 		if (is_ext_rss) {
1914 			*error = "Combining external and regular RSS queues is not supported";
1915 			*queue_idx = i;
1916 			return -ENOTSUP;
1917 		}
1918 		if (queues[i] >= priv->rxqs_n) {
1919 			*error = "queue index out of range";
1920 			*queue_idx = i;
1921 			return -EINVAL;
1922 		}
1923 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, queues[i]);
1924 		if (rxq_ctrl == NULL) {
1925 			*error =  "queue is not configured";
1926 			*queue_idx = i;
1927 			return -EINVAL;
1928 		}
1929 		if (i == 0 && rxq_ctrl->is_hairpin)
1930 			is_hairpin = true;
1931 		if (is_hairpin != rxq_ctrl->is_hairpin) {
1932 			*error = "combining hairpin and regular RSS queues is not supported";
1933 			*queue_idx = i;
1934 			return -ENOTSUP;
1935 		}
1936 	}
1937 	return 0;
1938 }
1939 
1940 /*
1941  * Validate the rss action.
1942  *
1943  * @param[in] dev
1944  *   Pointer to the Ethernet device structure.
1945  * @param[in] action
1946  *   Pointer to the queue action.
1947  * @param[out] error
1948  *   Pointer to error structure.
1949  *
1950  * @return
1951  *   0 on success, a negative errno value otherwise and rte_errno is set.
1952  */
1953 int
1954 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1955 			 const struct rte_flow_action *action,
1956 			 struct rte_flow_error *error)
1957 {
1958 	struct mlx5_priv *priv = dev->data->dev_private;
1959 	const struct rte_flow_action_rss *rss = action->conf;
1960 	int ret;
1961 	const char *message;
1962 	uint32_t queue_idx;
1963 
1964 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1965 	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1966 		return rte_flow_error_set(error, ENOTSUP,
1967 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1968 					  &rss->func,
1969 					  "RSS hash function not supported");
1970 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1971 	if (rss->level > 2)
1972 #else
1973 	if (rss->level > 1)
1974 #endif
1975 		return rte_flow_error_set(error, ENOTSUP,
1976 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1977 					  &rss->level,
1978 					  "tunnel RSS is not supported");
1979 	/* allow RSS key_len 0 in case of NULL (default) RSS key. */
1980 	if (rss->key_len == 0 && rss->key != NULL)
1981 		return rte_flow_error_set(error, ENOTSUP,
1982 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1983 					  &rss->key_len,
1984 					  "RSS hash key length 0");
1985 	if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1986 		return rte_flow_error_set(error, ENOTSUP,
1987 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1988 					  &rss->key_len,
1989 					  "RSS hash key too small");
1990 	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1991 		return rte_flow_error_set(error, ENOTSUP,
1992 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1993 					  &rss->key_len,
1994 					  "RSS hash key too large");
1995 	if (rss->queue_num > priv->sh->dev_cap.ind_table_max_size)
1996 		return rte_flow_error_set(error, ENOTSUP,
1997 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1998 					  &rss->queue_num,
1999 					  "number of queues too large");
2000 	if (rss->types & MLX5_RSS_HF_MASK)
2001 		return rte_flow_error_set(error, ENOTSUP,
2002 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2003 					  &rss->types,
2004 					  "some RSS protocols are not"
2005 					  " supported");
2006 	if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
2007 	    !(rss->types & RTE_ETH_RSS_IP))
2008 		return rte_flow_error_set(error, EINVAL,
2009 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2010 					  "L3 partial RSS requested but L3 RSS"
2011 					  " type not specified");
2012 	if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
2013 	    !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
2014 		return rte_flow_error_set(error, EINVAL,
2015 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2016 					  "L4 partial RSS requested but L4 RSS"
2017 					  " type not specified");
2018 	if (!priv->rxqs_n && priv->ext_rxqs == NULL)
2019 		return rte_flow_error_set(error, EINVAL,
2020 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2021 					  NULL, "No Rx queues configured");
2022 	if (!rss->queue_num)
2023 		return rte_flow_error_set(error, EINVAL,
2024 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2025 					  NULL, "No queues configured");
2026 	ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
2027 				       &message, &queue_idx);
2028 	if (ret != 0) {
2029 		return rte_flow_error_set(error, -ret,
2030 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2031 					  &rss->queue[queue_idx], message);
2032 	}
2033 	return 0;
2034 }
2035 
2036 /*
2037  * Validate the rss action.
2038  *
2039  * @param[in] action
2040  *   Pointer to the queue action.
2041  * @param[in] action_flags
2042  *   Bit-fields that holds the actions detected until now.
2043  * @param[in] dev
2044  *   Pointer to the Ethernet device structure.
2045  * @param[in] attr
2046  *   Attributes of flow that includes this action.
2047  * @param[in] item_flags
2048  *   Items that were detected.
2049  * @param[out] error
2050  *   Pointer to error structure.
2051  *
2052  * @return
2053  *   0 on success, a negative errno value otherwise and rte_errno is set.
2054  */
2055 int
2056 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
2057 			      uint64_t action_flags,
2058 			      struct rte_eth_dev *dev,
2059 			      const struct rte_flow_attr *attr,
2060 			      uint64_t item_flags,
2061 			      struct rte_flow_error *error)
2062 {
2063 	const struct rte_flow_action_rss *rss = action->conf;
2064 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2065 	int ret;
2066 
2067 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2068 		return rte_flow_error_set(error, EINVAL,
2069 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2070 					  "can't have 2 fate actions"
2071 					  " in same flow");
2072 	ret = mlx5_validate_action_rss(dev, action, error);
2073 	if (ret)
2074 		return ret;
2075 	if (attr->egress)
2076 		return rte_flow_error_set(error, ENOTSUP,
2077 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2078 					  "rss action not supported for "
2079 					  "egress");
2080 	if (rss->level > 1 && !tunnel)
2081 		return rte_flow_error_set(error, EINVAL,
2082 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2083 					  "inner RSS is not supported for "
2084 					  "non-tunnel flows");
2085 	if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
2086 	    !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
2087 		return rte_flow_error_set(error, EINVAL,
2088 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2089 					  "RSS on eCPRI is not supported now");
2090 	}
2091 	if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
2092 	    !(item_flags &
2093 	      (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
2094 	    rss->level > 1)
2095 		return rte_flow_error_set(error, EINVAL,
2096 					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
2097 					  "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
2098 	return 0;
2099 }
2100 
2101 /*
2102  * Validate the default miss action.
2103  *
2104  * @param[in] action_flags
2105  *   Bit-fields that holds the actions detected until now.
2106  * @param[out] error
2107  *   Pointer to error structure.
2108  *
2109  * @return
2110  *   0 on success, a negative errno value otherwise and rte_errno is set.
2111  */
2112 int
2113 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
2114 				const struct rte_flow_attr *attr,
2115 				struct rte_flow_error *error)
2116 {
2117 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2118 		return rte_flow_error_set(error, EINVAL,
2119 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2120 					  "can't have 2 fate actions in"
2121 					  " same flow");
2122 	if (attr->egress)
2123 		return rte_flow_error_set(error, ENOTSUP,
2124 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2125 					  "default miss action not supported "
2126 					  "for egress");
2127 	if (attr->group)
2128 		return rte_flow_error_set(error, ENOTSUP,
2129 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
2130 					  "only group 0 is supported");
2131 	if (attr->transfer)
2132 		return rte_flow_error_set(error, ENOTSUP,
2133 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2134 					  NULL, "transfer is not supported");
2135 	return 0;
2136 }
2137 
2138 /*
2139  * Validate the count action.
2140  *
2141  * @param[in] dev
2142  *   Pointer to the Ethernet device structure.
2143  * @param[in] attr
2144  *   Attributes of flow that includes this action.
2145  * @param[out] error
2146  *   Pointer to error structure.
2147  *
2148  * @return
2149  *   0 on success, a negative errno value otherwise and rte_errno is set.
2150  */
2151 int
2152 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
2153 				const struct rte_flow_attr *attr,
2154 				struct rte_flow_error *error)
2155 {
2156 	if (attr->egress)
2157 		return rte_flow_error_set(error, ENOTSUP,
2158 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2159 					  "count action not supported for "
2160 					  "egress");
2161 	return 0;
2162 }
2163 
2164 /*
2165  * Validate the ASO CT action.
2166  *
2167  * @param[in] dev
2168  *   Pointer to the Ethernet device structure.
2169  * @param[in] conntrack
2170  *   Pointer to the CT action profile.
2171  * @param[out] error
2172  *   Pointer to error structure.
2173  *
2174  * @return
2175  *   0 on success, a negative errno value otherwise and rte_errno is set.
2176  */
2177 int
2178 mlx5_validate_action_ct(struct rte_eth_dev *dev,
2179 			const struct rte_flow_action_conntrack *conntrack,
2180 			struct rte_flow_error *error)
2181 {
2182 	RTE_SET_USED(dev);
2183 
2184 	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
2185 		return rte_flow_error_set(error, EINVAL,
2186 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2187 					  "Invalid CT state");
2188 	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
2189 		return rte_flow_error_set(error, EINVAL,
2190 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2191 					  "Invalid last TCP packet flag");
2192 	return 0;
2193 }
2194 
2195 /**
2196  * Verify the @p attributes will be correctly understood by the NIC and store
2197  * them in the @p flow if everything is correct.
2198  *
2199  * @param[in] dev
2200  *   Pointer to the Ethernet device structure.
2201  * @param[in] attributes
2202  *   Pointer to flow attributes
2203  * @param[out] error
2204  *   Pointer to error structure.
2205  *
2206  * @return
2207  *   0 on success, a negative errno value otherwise and rte_errno is set.
2208  */
2209 int
2210 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
2211 			      const struct rte_flow_attr *attributes,
2212 			      struct rte_flow_error *error)
2213 {
2214 	struct mlx5_priv *priv = dev->data->dev_private;
2215 	uint32_t priority_max = priv->sh->flow_max_priority - 1;
2216 
2217 	if (attributes->group)
2218 		return rte_flow_error_set(error, ENOTSUP,
2219 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
2220 					  NULL, "groups is not supported");
2221 	if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
2222 	    attributes->priority >= priority_max)
2223 		return rte_flow_error_set(error, ENOTSUP,
2224 					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
2225 					  NULL, "priority out of range");
2226 	if (attributes->egress)
2227 		return rte_flow_error_set(error, ENOTSUP,
2228 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2229 					  "egress is not supported");
2230 	if (attributes->transfer && !priv->sh->config.dv_esw_en)
2231 		return rte_flow_error_set(error, ENOTSUP,
2232 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2233 					  NULL, "transfer is not supported");
2234 	if (!attributes->ingress)
2235 		return rte_flow_error_set(error, EINVAL,
2236 					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
2237 					  NULL,
2238 					  "ingress attribute is mandatory");
2239 	return 0;
2240 }
2241 
2242 /**
2243  * Validate ICMP6 item.
2244  *
2245  * @param[in] item
2246  *   Item specification.
2247  * @param[in] item_flags
2248  *   Bit-fields that holds the items detected until now.
2249  * @param[in] ext_vlan_sup
2250  *   Whether extended VLAN features are supported or not.
2251  * @param[out] error
2252  *   Pointer to error structure.
2253  *
2254  * @return
2255  *   0 on success, a negative errno value otherwise and rte_errno is set.
2256  */
2257 int
2258 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
2259 			       uint64_t item_flags,
2260 			       uint8_t target_protocol,
2261 			       struct rte_flow_error *error)
2262 {
2263 	const struct rte_flow_item_icmp6 *mask = item->mask;
2264 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2265 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2266 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2267 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2268 				      MLX5_FLOW_LAYER_OUTER_L4;
2269 	int ret;
2270 
2271 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2272 		return rte_flow_error_set(error, EINVAL,
2273 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2274 					  "protocol filtering not compatible"
2275 					  " with ICMP6 layer");
2276 	if (!(item_flags & l3m))
2277 		return rte_flow_error_set(error, EINVAL,
2278 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2279 					  "IPv6 is mandatory to filter on"
2280 					  " ICMP6");
2281 	if (item_flags & l4m)
2282 		return rte_flow_error_set(error, EINVAL,
2283 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2284 					  "multiple L4 layers not supported");
2285 	if (!mask)
2286 		mask = &rte_flow_item_icmp6_mask;
2287 	ret = mlx5_flow_item_acceptable
2288 		(item, (const uint8_t *)mask,
2289 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
2290 		 sizeof(struct rte_flow_item_icmp6),
2291 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2292 	if (ret < 0)
2293 		return ret;
2294 	return 0;
2295 }
2296 
2297 /**
2298  * Validate ICMP item.
2299  *
2300  * @param[in] item
2301  *   Item specification.
2302  * @param[in] item_flags
2303  *   Bit-fields that holds the items detected until now.
2304  * @param[out] error
2305  *   Pointer to error structure.
2306  *
2307  * @return
2308  *   0 on success, a negative errno value otherwise and rte_errno is set.
2309  */
2310 int
2311 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
2312 			     uint64_t item_flags,
2313 			     uint8_t target_protocol,
2314 			     struct rte_flow_error *error)
2315 {
2316 	const struct rte_flow_item_icmp *mask = item->mask;
2317 	const struct rte_flow_item_icmp nic_mask = {
2318 		.hdr.icmp_type = 0xff,
2319 		.hdr.icmp_code = 0xff,
2320 		.hdr.icmp_ident = RTE_BE16(0xffff),
2321 		.hdr.icmp_seq_nb = RTE_BE16(0xffff),
2322 	};
2323 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2324 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2325 				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2326 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2327 				      MLX5_FLOW_LAYER_OUTER_L4;
2328 	int ret;
2329 
2330 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2331 		return rte_flow_error_set(error, EINVAL,
2332 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2333 					  "protocol filtering not compatible"
2334 					  " with ICMP layer");
2335 	if (!(item_flags & l3m))
2336 		return rte_flow_error_set(error, EINVAL,
2337 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2338 					  "IPv4 is mandatory to filter"
2339 					  " on ICMP");
2340 	if (item_flags & l4m)
2341 		return rte_flow_error_set(error, EINVAL,
2342 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2343 					  "multiple L4 layers not supported");
2344 	if (!mask)
2345 		mask = &nic_mask;
2346 	ret = mlx5_flow_item_acceptable
2347 		(item, (const uint8_t *)mask,
2348 		 (const uint8_t *)&nic_mask,
2349 		 sizeof(struct rte_flow_item_icmp),
2350 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2351 	if (ret < 0)
2352 		return ret;
2353 	return 0;
2354 }
2355 
2356 /**
2357  * Validate Ethernet item.
2358  *
2359  * @param[in] item
2360  *   Item specification.
2361  * @param[in] item_flags
2362  *   Bit-fields that holds the items detected until now.
2363  * @param[out] error
2364  *   Pointer to error structure.
2365  *
2366  * @return
2367  *   0 on success, a negative errno value otherwise and rte_errno is set.
2368  */
2369 int
2370 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2371 			    uint64_t item_flags, bool ext_vlan_sup,
2372 			    struct rte_flow_error *error)
2373 {
2374 	const struct rte_flow_item_eth *mask = item->mask;
2375 	const struct rte_flow_item_eth nic_mask = {
2376 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2377 		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2378 		.type = RTE_BE16(0xffff),
2379 		.has_vlan = ext_vlan_sup ? 1 : 0,
2380 	};
2381 	int ret;
2382 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2383 	const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2	:
2384 				       MLX5_FLOW_LAYER_OUTER_L2;
2385 
2386 	if (item_flags & ethm)
2387 		return rte_flow_error_set(error, ENOTSUP,
2388 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2389 					  "multiple L2 layers not supported");
2390 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2391 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2392 		return rte_flow_error_set(error, EINVAL,
2393 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2394 					  "L2 layer should not follow "
2395 					  "L3 layers");
2396 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2397 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2398 		return rte_flow_error_set(error, EINVAL,
2399 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2400 					  "L2 layer should not follow VLAN");
2401 	if (item_flags & MLX5_FLOW_LAYER_GTP)
2402 		return rte_flow_error_set(error, EINVAL,
2403 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2404 					  "L2 layer should not follow GTP");
2405 	if (!mask)
2406 		mask = &rte_flow_item_eth_mask;
2407 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2408 					(const uint8_t *)&nic_mask,
2409 					sizeof(struct rte_flow_item_eth),
2410 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2411 	return ret;
2412 }
2413 
2414 /**
2415  * Validate VLAN item.
2416  *
2417  * @param[in] item
2418  *   Item specification.
2419  * @param[in] item_flags
2420  *   Bit-fields that holds the items detected until now.
2421  * @param[in] dev
2422  *   Ethernet device flow is being created on.
2423  * @param[out] error
2424  *   Pointer to error structure.
2425  *
2426  * @return
2427  *   0 on success, a negative errno value otherwise and rte_errno is set.
2428  */
2429 int
2430 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2431 			     uint64_t item_flags,
2432 			     struct rte_eth_dev *dev,
2433 			     struct rte_flow_error *error)
2434 {
2435 	const struct rte_flow_item_vlan *spec = item->spec;
2436 	const struct rte_flow_item_vlan *mask = item->mask;
2437 	const struct rte_flow_item_vlan nic_mask = {
2438 		.tci = RTE_BE16(UINT16_MAX),
2439 		.inner_type = RTE_BE16(UINT16_MAX),
2440 	};
2441 	uint16_t vlan_tag = 0;
2442 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2443 	int ret;
2444 	const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2445 					MLX5_FLOW_LAYER_INNER_L4) :
2446 				       (MLX5_FLOW_LAYER_OUTER_L3 |
2447 					MLX5_FLOW_LAYER_OUTER_L4);
2448 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2449 					MLX5_FLOW_LAYER_OUTER_VLAN;
2450 
2451 	if (item_flags & vlanm)
2452 		return rte_flow_error_set(error, EINVAL,
2453 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2454 					  "multiple VLAN layers not supported");
2455 	else if ((item_flags & l34m) != 0)
2456 		return rte_flow_error_set(error, EINVAL,
2457 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2458 					  "VLAN cannot follow L3/L4 layer");
2459 	if (!mask)
2460 		mask = &rte_flow_item_vlan_mask;
2461 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2462 					(const uint8_t *)&nic_mask,
2463 					sizeof(struct rte_flow_item_vlan),
2464 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2465 	if (ret)
2466 		return ret;
2467 	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2468 		struct mlx5_priv *priv = dev->data->dev_private;
2469 
2470 		if (priv->vmwa_context) {
2471 			/*
2472 			 * Non-NULL context means we have a virtual machine
2473 			 * and SR-IOV enabled, we have to create VLAN interface
2474 			 * to make hypervisor to setup E-Switch vport
2475 			 * context correctly. We avoid creating the multiple
2476 			 * VLAN interfaces, so we cannot support VLAN tag mask.
2477 			 */
2478 			return rte_flow_error_set(error, EINVAL,
2479 						  RTE_FLOW_ERROR_TYPE_ITEM,
2480 						  item,
2481 						  "VLAN tag mask is not"
2482 						  " supported in virtual"
2483 						  " environment");
2484 		}
2485 	}
2486 	if (spec) {
2487 		vlan_tag = spec->tci;
2488 		vlan_tag &= mask->tci;
2489 	}
2490 	/*
2491 	 * From verbs perspective an empty VLAN is equivalent
2492 	 * to a packet without VLAN layer.
2493 	 */
2494 	if (!vlan_tag)
2495 		return rte_flow_error_set(error, EINVAL,
2496 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2497 					  item->spec,
2498 					  "VLAN cannot be empty");
2499 	return 0;
2500 }
2501 
2502 /**
2503  * Validate IPV4 item.
2504  *
2505  * @param[in] item
2506  *   Item specification.
2507  * @param[in] item_flags
2508  *   Bit-fields that holds the items detected until now.
2509  * @param[in] last_item
2510  *   Previous validated item in the pattern items.
2511  * @param[in] ether_type
2512  *   Type in the ethernet layer header (including dot1q).
2513  * @param[in] acc_mask
2514  *   Acceptable mask, if NULL default internal default mask
2515  *   will be used to check whether item fields are supported.
2516  * @param[in] range_accepted
2517  *   True if range of values is accepted for specific fields, false otherwise.
2518  * @param[out] error
2519  *   Pointer to error structure.
2520  *
2521  * @return
2522  *   0 on success, a negative errno value otherwise and rte_errno is set.
2523  */
2524 int
2525 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2526 			     uint64_t item_flags,
2527 			     uint64_t last_item,
2528 			     uint16_t ether_type,
2529 			     const struct rte_flow_item_ipv4 *acc_mask,
2530 			     bool range_accepted,
2531 			     struct rte_flow_error *error)
2532 {
2533 	const struct rte_flow_item_ipv4 *mask = item->mask;
2534 	const struct rte_flow_item_ipv4 *spec = item->spec;
2535 	const struct rte_flow_item_ipv4 nic_mask = {
2536 		.hdr = {
2537 			.src_addr = RTE_BE32(0xffffffff),
2538 			.dst_addr = RTE_BE32(0xffffffff),
2539 			.type_of_service = 0xff,
2540 			.next_proto_id = 0xff,
2541 		},
2542 	};
2543 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2544 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2545 				      MLX5_FLOW_LAYER_OUTER_L3;
2546 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2547 				      MLX5_FLOW_LAYER_OUTER_L4;
2548 	int ret;
2549 	uint8_t next_proto = 0xFF;
2550 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2551 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2552 				  MLX5_FLOW_LAYER_INNER_VLAN);
2553 
2554 	if ((last_item & l2_vlan) && ether_type &&
2555 	    ether_type != RTE_ETHER_TYPE_IPV4)
2556 		return rte_flow_error_set(error, EINVAL,
2557 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2558 					  "IPv4 cannot follow L2/VLAN layer "
2559 					  "which ether type is not IPv4");
2560 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2561 		if (mask && spec)
2562 			next_proto = mask->hdr.next_proto_id &
2563 				     spec->hdr.next_proto_id;
2564 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2565 			return rte_flow_error_set(error, EINVAL,
2566 						  RTE_FLOW_ERROR_TYPE_ITEM,
2567 						  item,
2568 						  "multiple tunnel "
2569 						  "not supported");
2570 	}
2571 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2572 		return rte_flow_error_set(error, EINVAL,
2573 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2574 					  "wrong tunnel type - IPv6 specified "
2575 					  "but IPv4 item provided");
2576 	if (item_flags & l3m)
2577 		return rte_flow_error_set(error, ENOTSUP,
2578 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2579 					  "multiple L3 layers not supported");
2580 	else if (item_flags & l4m)
2581 		return rte_flow_error_set(error, EINVAL,
2582 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2583 					  "L3 cannot follow an L4 layer.");
2584 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2585 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2586 		return rte_flow_error_set(error, EINVAL,
2587 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2588 					  "L3 cannot follow an NVGRE layer.");
2589 	if (!mask)
2590 		mask = &rte_flow_item_ipv4_mask;
2591 	else if (mask->hdr.next_proto_id != 0 &&
2592 		 mask->hdr.next_proto_id != 0xff)
2593 		return rte_flow_error_set(error, EINVAL,
2594 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2595 					  "partial mask is not supported"
2596 					  " for protocol");
2597 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2598 					acc_mask ? (const uint8_t *)acc_mask
2599 						 : (const uint8_t *)&nic_mask,
2600 					sizeof(struct rte_flow_item_ipv4),
2601 					range_accepted, error);
2602 	if (ret < 0)
2603 		return ret;
2604 	return 0;
2605 }
2606 
2607 /**
2608  * Validate IPV6 item.
2609  *
2610  * @param[in] item
2611  *   Item specification.
2612  * @param[in] item_flags
2613  *   Bit-fields that holds the items detected until now.
2614  * @param[in] last_item
2615  *   Previous validated item in the pattern items.
2616  * @param[in] ether_type
2617  *   Type in the ethernet layer header (including dot1q).
2618  * @param[in] acc_mask
2619  *   Acceptable mask, if NULL default internal default mask
2620  *   will be used to check whether item fields are supported.
2621  * @param[out] error
2622  *   Pointer to error structure.
2623  *
2624  * @return
2625  *   0 on success, a negative errno value otherwise and rte_errno is set.
2626  */
2627 int
2628 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2629 			     uint64_t item_flags,
2630 			     uint64_t last_item,
2631 			     uint16_t ether_type,
2632 			     const struct rte_flow_item_ipv6 *acc_mask,
2633 			     struct rte_flow_error *error)
2634 {
2635 	const struct rte_flow_item_ipv6 *mask = item->mask;
2636 	const struct rte_flow_item_ipv6 *spec = item->spec;
2637 	const struct rte_flow_item_ipv6 nic_mask = {
2638 		.hdr = {
2639 			.src_addr =
2640 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2641 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2642 			.dst_addr =
2643 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2644 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2645 			.vtc_flow = RTE_BE32(0xffffffff),
2646 			.proto = 0xff,
2647 		},
2648 	};
2649 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2650 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2651 				      MLX5_FLOW_LAYER_OUTER_L3;
2652 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2653 				      MLX5_FLOW_LAYER_OUTER_L4;
2654 	int ret;
2655 	uint8_t next_proto = 0xFF;
2656 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2657 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2658 				  MLX5_FLOW_LAYER_INNER_VLAN);
2659 
2660 	if ((last_item & l2_vlan) && ether_type &&
2661 	    ether_type != RTE_ETHER_TYPE_IPV6)
2662 		return rte_flow_error_set(error, EINVAL,
2663 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2664 					  "IPv6 cannot follow L2/VLAN layer "
2665 					  "which ether type is not IPv6");
2666 	if (mask && mask->hdr.proto == UINT8_MAX && spec)
2667 		next_proto = spec->hdr.proto;
2668 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2669 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2670 			return rte_flow_error_set(error, EINVAL,
2671 						  RTE_FLOW_ERROR_TYPE_ITEM,
2672 						  item,
2673 						  "multiple tunnel "
2674 						  "not supported");
2675 	}
2676 	if (next_proto == IPPROTO_HOPOPTS  ||
2677 	    next_proto == IPPROTO_ROUTING  ||
2678 	    next_proto == IPPROTO_FRAGMENT ||
2679 	    next_proto == IPPROTO_ESP	   ||
2680 	    next_proto == IPPROTO_AH	   ||
2681 	    next_proto == IPPROTO_DSTOPTS)
2682 		return rte_flow_error_set(error, EINVAL,
2683 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2684 					  "IPv6 proto (next header) should "
2685 					  "not be set as extension header");
2686 	if (item_flags & MLX5_FLOW_LAYER_IPIP)
2687 		return rte_flow_error_set(error, EINVAL,
2688 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2689 					  "wrong tunnel type - IPv4 specified "
2690 					  "but IPv6 item provided");
2691 	if (item_flags & l3m)
2692 		return rte_flow_error_set(error, ENOTSUP,
2693 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2694 					  "multiple L3 layers not supported");
2695 	else if (item_flags & l4m)
2696 		return rte_flow_error_set(error, EINVAL,
2697 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2698 					  "L3 cannot follow an L4 layer.");
2699 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2700 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2701 		return rte_flow_error_set(error, EINVAL,
2702 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2703 					  "L3 cannot follow an NVGRE layer.");
2704 	if (!mask)
2705 		mask = &rte_flow_item_ipv6_mask;
2706 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2707 					acc_mask ? (const uint8_t *)acc_mask
2708 						 : (const uint8_t *)&nic_mask,
2709 					sizeof(struct rte_flow_item_ipv6),
2710 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2711 	if (ret < 0)
2712 		return ret;
2713 	return 0;
2714 }
2715 
2716 /**
2717  * Validate UDP item.
2718  *
2719  * @param[in] item
2720  *   Item specification.
2721  * @param[in] item_flags
2722  *   Bit-fields that holds the items detected until now.
2723  * @param[in] target_protocol
2724  *   The next protocol in the previous item.
2725  * @param[in] flow_mask
2726  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2727  * @param[out] error
2728  *   Pointer to error structure.
2729  *
2730  * @return
2731  *   0 on success, a negative errno value otherwise and rte_errno is set.
2732  */
2733 int
2734 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2735 			    uint64_t item_flags,
2736 			    uint8_t target_protocol,
2737 			    struct rte_flow_error *error)
2738 {
2739 	const struct rte_flow_item_udp *mask = item->mask;
2740 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2741 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2742 				      MLX5_FLOW_LAYER_OUTER_L3;
2743 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2744 				      MLX5_FLOW_LAYER_OUTER_L4;
2745 	int ret;
2746 
2747 	if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2748 		return rte_flow_error_set(error, EINVAL,
2749 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2750 					  "protocol filtering not compatible"
2751 					  " with UDP layer");
2752 	if (!(item_flags & l3m))
2753 		return rte_flow_error_set(error, EINVAL,
2754 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2755 					  "L3 is mandatory to filter on L4");
2756 	if (item_flags & l4m)
2757 		return rte_flow_error_set(error, EINVAL,
2758 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2759 					  "multiple L4 layers not supported");
2760 	if (!mask)
2761 		mask = &rte_flow_item_udp_mask;
2762 	ret = mlx5_flow_item_acceptable
2763 		(item, (const uint8_t *)mask,
2764 		 (const uint8_t *)&rte_flow_item_udp_mask,
2765 		 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2766 		 error);
2767 	if (ret < 0)
2768 		return ret;
2769 	return 0;
2770 }
2771 
2772 /**
2773  * Validate TCP item.
2774  *
2775  * @param[in] item
2776  *   Item specification.
2777  * @param[in] item_flags
2778  *   Bit-fields that holds the items detected until now.
2779  * @param[in] target_protocol
2780  *   The next protocol in the previous item.
2781  * @param[out] error
2782  *   Pointer to error structure.
2783  *
2784  * @return
2785  *   0 on success, a negative errno value otherwise and rte_errno is set.
2786  */
2787 int
2788 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2789 			    uint64_t item_flags,
2790 			    uint8_t target_protocol,
2791 			    const struct rte_flow_item_tcp *flow_mask,
2792 			    struct rte_flow_error *error)
2793 {
2794 	const struct rte_flow_item_tcp *mask = item->mask;
2795 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2796 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2797 				      MLX5_FLOW_LAYER_OUTER_L3;
2798 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2799 				      MLX5_FLOW_LAYER_OUTER_L4;
2800 	int ret;
2801 
2802 	MLX5_ASSERT(flow_mask);
2803 	if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2804 		return rte_flow_error_set(error, EINVAL,
2805 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2806 					  "protocol filtering not compatible"
2807 					  " with TCP layer");
2808 	if (!(item_flags & l3m))
2809 		return rte_flow_error_set(error, EINVAL,
2810 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2811 					  "L3 is mandatory to filter on L4");
2812 	if (item_flags & l4m)
2813 		return rte_flow_error_set(error, EINVAL,
2814 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2815 					  "multiple L4 layers not supported");
2816 	if (!mask)
2817 		mask = &rte_flow_item_tcp_mask;
2818 	ret = mlx5_flow_item_acceptable
2819 		(item, (const uint8_t *)mask,
2820 		 (const uint8_t *)flow_mask,
2821 		 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2822 		 error);
2823 	if (ret < 0)
2824 		return ret;
2825 	return 0;
2826 }
2827 
2828 /**
2829  * Validate VXLAN item.
2830  *
2831  * @param[in] dev
2832  *   Pointer to the Ethernet device structure.
2833  * @param[in] udp_dport
2834  *   UDP destination port
2835  * @param[in] item
2836  *   Item specification.
2837  * @param[in] item_flags
2838  *   Bit-fields that holds the items detected until now.
2839  * @param root
2840  *   Whether action is on root table.
2841  * @param[out] error
2842  *   Pointer to error structure.
2843  *
2844  * @return
2845  *   0 on success, a negative errno value otherwise and rte_errno is set.
2846  */
2847 int
2848 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2849 			      uint16_t udp_dport,
2850 			      const struct rte_flow_item *item,
2851 			      uint64_t item_flags,
2852 			      bool root,
2853 			      struct rte_flow_error *error)
2854 {
2855 	const struct rte_flow_item_vxlan *spec = item->spec;
2856 	const struct rte_flow_item_vxlan *mask = item->mask;
2857 	int ret;
2858 	struct mlx5_priv *priv = dev->data->dev_private;
2859 	union vni {
2860 		uint32_t vlan_id;
2861 		uint8_t vni[4];
2862 	} id = { .vlan_id = 0, };
2863 	const struct rte_flow_item_vxlan nic_mask = {
2864 		.vni = "\xff\xff\xff",
2865 		.rsvd1 = 0xff,
2866 	};
2867 	const struct rte_flow_item_vxlan *valid_mask;
2868 
2869 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2870 		return rte_flow_error_set(error, ENOTSUP,
2871 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2872 					  "multiple tunnel layers not"
2873 					  " supported");
2874 	valid_mask = &rte_flow_item_vxlan_mask;
2875 	/*
2876 	 * Verify only UDPv4 is present as defined in
2877 	 * https://tools.ietf.org/html/rfc7348
2878 	 */
2879 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2880 		return rte_flow_error_set(error, EINVAL,
2881 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2882 					  "no outer UDP layer found");
2883 	if (!mask)
2884 		mask = &rte_flow_item_vxlan_mask;
2885 
2886 	if (priv->sh->steering_format_version !=
2887 	    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2888 	    !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2889 		/* non-root table */
2890 		if (!root && priv->sh->misc5_cap)
2891 			valid_mask = &nic_mask;
2892 		/* Group zero in NIC domain */
2893 		if (!root && priv->sh->tunnel_header_0_1)
2894 			valid_mask = &nic_mask;
2895 	}
2896 	ret = mlx5_flow_item_acceptable
2897 		(item, (const uint8_t *)mask,
2898 		 (const uint8_t *)valid_mask,
2899 		 sizeof(struct rte_flow_item_vxlan),
2900 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2901 	if (ret < 0)
2902 		return ret;
2903 	if (spec) {
2904 		memcpy(&id.vni[1], spec->vni, 3);
2905 		memcpy(&id.vni[1], mask->vni, 3);
2906 	}
2907 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2908 		return rte_flow_error_set(error, ENOTSUP,
2909 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2910 					  "VXLAN tunnel must be fully defined");
2911 	return 0;
2912 }
2913 
2914 /**
2915  * Validate VXLAN_GPE item.
2916  *
2917  * @param[in] item
2918  *   Item specification.
2919  * @param[in] item_flags
2920  *   Bit-fields that holds the items detected until now.
2921  * @param[in] priv
2922  *   Pointer to the private data structure.
2923  * @param[in] target_protocol
2924  *   The next protocol in the previous item.
2925  * @param[out] error
2926  *   Pointer to error structure.
2927  *
2928  * @return
2929  *   0 on success, a negative errno value otherwise and rte_errno is set.
2930  */
2931 int
2932 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2933 				  uint64_t item_flags,
2934 				  struct rte_eth_dev *dev,
2935 				  struct rte_flow_error *error)
2936 {
2937 	struct mlx5_priv *priv = dev->data->dev_private;
2938 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2939 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2940 	int ret;
2941 	union vni {
2942 		uint32_t vlan_id;
2943 		uint8_t vni[4];
2944 	} id = { .vlan_id = 0, };
2945 
2946 	if (!priv->sh->config.l3_vxlan_en)
2947 		return rte_flow_error_set(error, ENOTSUP,
2948 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2949 					  "L3 VXLAN is not enabled by device"
2950 					  " parameter and/or not configured in"
2951 					  " firmware");
2952 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2953 		return rte_flow_error_set(error, ENOTSUP,
2954 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2955 					  "multiple tunnel layers not"
2956 					  " supported");
2957 	/*
2958 	 * Verify only UDPv4 is present as defined in
2959 	 * https://tools.ietf.org/html/rfc7348
2960 	 */
2961 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2962 		return rte_flow_error_set(error, EINVAL,
2963 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2964 					  "no outer UDP layer found");
2965 	if (!mask)
2966 		mask = &rte_flow_item_vxlan_gpe_mask;
2967 	ret = mlx5_flow_item_acceptable
2968 		(item, (const uint8_t *)mask,
2969 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2970 		 sizeof(struct rte_flow_item_vxlan_gpe),
2971 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2972 	if (ret < 0)
2973 		return ret;
2974 	if (spec) {
2975 		if (spec->protocol)
2976 			return rte_flow_error_set(error, ENOTSUP,
2977 						  RTE_FLOW_ERROR_TYPE_ITEM,
2978 						  item,
2979 						  "VxLAN-GPE protocol"
2980 						  " not supported");
2981 		memcpy(&id.vni[1], spec->vni, 3);
2982 		memcpy(&id.vni[1], mask->vni, 3);
2983 	}
2984 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2985 		return rte_flow_error_set(error, ENOTSUP,
2986 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2987 					  "VXLAN-GPE tunnel must be fully"
2988 					  " defined");
2989 	return 0;
2990 }
2991 /**
2992  * Validate GRE Key item.
2993  *
2994  * @param[in] item
2995  *   Item specification.
2996  * @param[in] item_flags
2997  *   Bit flags to mark detected items.
2998  * @param[in] gre_item
2999  *   Pointer to gre_item
3000  * @param[out] error
3001  *   Pointer to error structure.
3002  *
3003  * @return
3004  *   0 on success, a negative errno value otherwise and rte_errno is set.
3005  */
3006 int
3007 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
3008 				uint64_t item_flags,
3009 				const struct rte_flow_item *gre_item,
3010 				struct rte_flow_error *error)
3011 {
3012 	const rte_be32_t *mask = item->mask;
3013 	int ret = 0;
3014 	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
3015 	const struct rte_flow_item_gre *gre_spec;
3016 	const struct rte_flow_item_gre *gre_mask;
3017 
3018 	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
3019 		return rte_flow_error_set(error, ENOTSUP,
3020 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3021 					  "Multiple GRE key not support");
3022 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3023 		return rte_flow_error_set(error, ENOTSUP,
3024 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3025 					  "No preceding GRE header");
3026 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3027 		return rte_flow_error_set(error, ENOTSUP,
3028 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3029 					  "GRE key following a wrong item");
3030 	gre_mask = gre_item->mask;
3031 	if (!gre_mask)
3032 		gre_mask = &rte_flow_item_gre_mask;
3033 	gre_spec = gre_item->spec;
3034 	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3035 			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3036 		return rte_flow_error_set(error, EINVAL,
3037 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3038 					  "Key bit must be on");
3039 
3040 	if (!mask)
3041 		mask = &gre_key_default_mask;
3042 	ret = mlx5_flow_item_acceptable
3043 		(item, (const uint8_t *)mask,
3044 		 (const uint8_t *)&gre_key_default_mask,
3045 		 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3046 	return ret;
3047 }
3048 
3049 /**
3050  * Validate GRE optional item.
3051  *
3052  * @param[in] dev
3053  *   Pointer to the Ethernet device structure.
3054  * @param[in] item
3055  *   Item specification.
3056  * @param[in] item_flags
3057  *   Bit flags to mark detected items.
3058  * @param[in] attr
3059  *   Flow rule attributes.
3060  * @param[in] gre_item
3061  *   Pointer to gre_item
3062  * @param[out] error
3063  *   Pointer to error structure.
3064  *
3065  * @return
3066  *   0 on success, a negative errno value otherwise and rte_errno is set.
3067  */
3068 int
3069 mlx5_flow_validate_item_gre_option(struct rte_eth_dev *dev,
3070 				   const struct rte_flow_item *item,
3071 				   uint64_t item_flags,
3072 				   const struct rte_flow_attr *attr,
3073 				   const struct rte_flow_item *gre_item,
3074 				   struct rte_flow_error *error)
3075 {
3076 	const struct rte_flow_item_gre *gre_spec = gre_item->spec;
3077 	const struct rte_flow_item_gre *gre_mask = gre_item->mask;
3078 	const struct rte_flow_item_gre_opt *spec = item->spec;
3079 	const struct rte_flow_item_gre_opt *mask = item->mask;
3080 	struct mlx5_priv *priv = dev->data->dev_private;
3081 	int ret = 0;
3082 	struct rte_flow_item_gre_opt nic_mask = {
3083 		.checksum_rsvd = {
3084 			.checksum = RTE_BE16(UINT16_MAX),
3085 			.reserved1 = 0x0,
3086 		},
3087 		.key = {
3088 			.key = RTE_BE32(UINT32_MAX),
3089 		},
3090 		.sequence = {
3091 			.sequence = RTE_BE32(UINT32_MAX),
3092 		},
3093 	};
3094 
3095 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3096 		return rte_flow_error_set(error, ENOTSUP,
3097 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3098 					  "No preceding GRE header");
3099 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3100 		return rte_flow_error_set(error, ENOTSUP,
3101 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3102 					  "GRE option following a wrong item");
3103 	if (!spec || !mask)
3104 		return rte_flow_error_set(error, EINVAL,
3105 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3106 					  "At least one field gre_option(checksum/key/sequence) must be specified");
3107 	if (!gre_mask)
3108 		gre_mask = &rte_flow_item_gre_mask;
3109 	if (mask->checksum_rsvd.checksum)
3110 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x8000)) &&
3111 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x8000)))
3112 			return rte_flow_error_set(error, EINVAL,
3113 						  RTE_FLOW_ERROR_TYPE_ITEM,
3114 						  item,
3115 						  "Checksum bit must be on");
3116 	if (mask->key.key)
3117 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3118 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3119 			return rte_flow_error_set(error, EINVAL,
3120 						  RTE_FLOW_ERROR_TYPE_ITEM,
3121 						  item, "Key bit must be on");
3122 	if (mask->sequence.sequence)
3123 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x1000)) &&
3124 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x1000)))
3125 			return rte_flow_error_set(error, EINVAL,
3126 						  RTE_FLOW_ERROR_TYPE_ITEM,
3127 						  item,
3128 						  "Sequence bit must be on");
3129 	if (mask->checksum_rsvd.checksum || mask->sequence.sequence) {
3130 		if (priv->sh->steering_format_version ==
3131 		    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3132 		    ((attr->group || (attr->transfer && priv->fdb_def_rule)) &&
3133 		     !priv->sh->misc5_cap) ||
3134 		    (!(priv->sh->tunnel_header_0_1 &&
3135 		       priv->sh->tunnel_header_2_3) &&
3136 		    !attr->group && (!attr->transfer || !priv->fdb_def_rule)))
3137 			return rte_flow_error_set(error, EINVAL,
3138 						  RTE_FLOW_ERROR_TYPE_ITEM,
3139 						  item,
3140 						  "Checksum/Sequence not supported");
3141 	}
3142 	ret = mlx5_flow_item_acceptable
3143 		(item, (const uint8_t *)mask,
3144 		 (const uint8_t *)&nic_mask,
3145 		 sizeof(struct rte_flow_item_gre_opt),
3146 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3147 	return ret;
3148 }
3149 
3150 /**
3151  * Validate GRE item.
3152  *
3153  * @param[in] item
3154  *   Item specification.
3155  * @param[in] item_flags
3156  *   Bit flags to mark detected items.
3157  * @param[in] target_protocol
3158  *   The next protocol in the previous item.
3159  * @param[out] error
3160  *   Pointer to error structure.
3161  *
3162  * @return
3163  *   0 on success, a negative errno value otherwise and rte_errno is set.
3164  */
3165 int
3166 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3167 			    uint64_t item_flags,
3168 			    uint8_t target_protocol,
3169 			    struct rte_flow_error *error)
3170 {
3171 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3172 	const struct rte_flow_item_gre *mask = item->mask;
3173 	int ret;
3174 	const struct rte_flow_item_gre nic_mask = {
3175 		.c_rsvd0_ver = RTE_BE16(0xB000),
3176 		.protocol = RTE_BE16(UINT16_MAX),
3177 	};
3178 
3179 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3180 		return rte_flow_error_set(error, EINVAL,
3181 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3182 					  "protocol filtering not compatible"
3183 					  " with this GRE layer");
3184 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3185 		return rte_flow_error_set(error, ENOTSUP,
3186 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3187 					  "multiple tunnel layers not"
3188 					  " supported");
3189 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3190 		return rte_flow_error_set(error, ENOTSUP,
3191 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3192 					  "L3 Layer is missing");
3193 	if (!mask)
3194 		mask = &rte_flow_item_gre_mask;
3195 	ret = mlx5_flow_item_acceptable
3196 		(item, (const uint8_t *)mask,
3197 		 (const uint8_t *)&nic_mask,
3198 		 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3199 		 error);
3200 	if (ret < 0)
3201 		return ret;
3202 #ifndef HAVE_MLX5DV_DR
3203 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3204 	if (spec && (spec->protocol & mask->protocol))
3205 		return rte_flow_error_set(error, ENOTSUP,
3206 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3207 					  "without MPLS support the"
3208 					  " specification cannot be used for"
3209 					  " filtering");
3210 #endif
3211 #endif
3212 	return 0;
3213 }
3214 
3215 /**
3216  * Validate Geneve item.
3217  *
3218  * @param[in] item
3219  *   Item specification.
3220  * @param[in] itemFlags
3221  *   Bit-fields that holds the items detected until now.
3222  * @param[in] enPriv
3223  *   Pointer to the private data structure.
3224  * @param[out] error
3225  *   Pointer to error structure.
3226  *
3227  * @return
3228  *   0 on success, a negative errno value otherwise and rte_errno is set.
3229  */
3230 
3231 int
3232 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
3233 			       uint64_t item_flags,
3234 			       struct rte_eth_dev *dev,
3235 			       struct rte_flow_error *error)
3236 {
3237 	struct mlx5_priv *priv = dev->data->dev_private;
3238 	const struct rte_flow_item_geneve *spec = item->spec;
3239 	const struct rte_flow_item_geneve *mask = item->mask;
3240 	int ret;
3241 	uint16_t gbhdr;
3242 	uint8_t opt_len = priv->sh->cdev->config.hca_attr.geneve_max_opt_len ?
3243 			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
3244 	const struct rte_flow_item_geneve nic_mask = {
3245 		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
3246 		.vni = "\xff\xff\xff",
3247 		.protocol = RTE_BE16(UINT16_MAX),
3248 	};
3249 
3250 	if (!priv->sh->cdev->config.hca_attr.tunnel_stateless_geneve_rx)
3251 		return rte_flow_error_set(error, ENOTSUP,
3252 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3253 					  "L3 Geneve is not enabled by device"
3254 					  " parameter and/or not configured in"
3255 					  " firmware");
3256 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3257 		return rte_flow_error_set(error, ENOTSUP,
3258 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3259 					  "multiple tunnel layers not"
3260 					  " supported");
3261 	/*
3262 	 * Verify only UDPv4 is present as defined in
3263 	 * https://tools.ietf.org/html/rfc7348
3264 	 */
3265 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3266 		return rte_flow_error_set(error, EINVAL,
3267 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3268 					  "no outer UDP layer found");
3269 	if (!mask)
3270 		mask = &rte_flow_item_geneve_mask;
3271 	ret = mlx5_flow_item_acceptable
3272 				  (item, (const uint8_t *)mask,
3273 				   (const uint8_t *)&nic_mask,
3274 				   sizeof(struct rte_flow_item_geneve),
3275 				   MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3276 	if (ret)
3277 		return ret;
3278 	if (spec) {
3279 		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
3280 		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
3281 		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
3282 		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
3283 			return rte_flow_error_set(error, ENOTSUP,
3284 						  RTE_FLOW_ERROR_TYPE_ITEM,
3285 						  item,
3286 						  "Geneve protocol unsupported"
3287 						  " fields are being used");
3288 		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
3289 			return rte_flow_error_set
3290 					(error, ENOTSUP,
3291 					 RTE_FLOW_ERROR_TYPE_ITEM,
3292 					 item,
3293 					 "Unsupported Geneve options length");
3294 	}
3295 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3296 		return rte_flow_error_set
3297 				    (error, ENOTSUP,
3298 				     RTE_FLOW_ERROR_TYPE_ITEM, item,
3299 				     "Geneve tunnel must be fully defined");
3300 	return 0;
3301 }
3302 
3303 /**
3304  * Validate Geneve TLV option item.
3305  *
3306  * @param[in] item
3307  *   Item specification.
3308  * @param[in] last_item
3309  *   Previous validated item in the pattern items.
3310  * @param[in] geneve_item
3311  *   Previous GENEVE item specification.
3312  * @param[in] dev
3313  *   Pointer to the rte_eth_dev structure.
3314  * @param[out] error
3315  *   Pointer to error structure.
3316  *
3317  * @return
3318  *   0 on success, a negative errno value otherwise and rte_errno is set.
3319  */
3320 int
3321 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
3322 				   uint64_t last_item,
3323 				   const struct rte_flow_item *geneve_item,
3324 				   struct rte_eth_dev *dev,
3325 				   struct rte_flow_error *error)
3326 {
3327 	struct mlx5_priv *priv = dev->data->dev_private;
3328 	struct mlx5_dev_ctx_shared *sh = priv->sh;
3329 	struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
3330 	struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
3331 	uint8_t data_max_supported =
3332 			hca_attr->max_geneve_tlv_option_data_len * 4;
3333 	const struct rte_flow_item_geneve *geneve_spec;
3334 	const struct rte_flow_item_geneve *geneve_mask;
3335 	const struct rte_flow_item_geneve_opt *spec = item->spec;
3336 	const struct rte_flow_item_geneve_opt *mask = item->mask;
3337 	unsigned int i;
3338 	unsigned int data_len;
3339 	uint8_t tlv_option_len;
3340 	uint16_t optlen_m, optlen_v;
3341 	const struct rte_flow_item_geneve_opt full_mask = {
3342 		.option_class = RTE_BE16(0xffff),
3343 		.option_type = 0xff,
3344 		.option_len = 0x1f,
3345 	};
3346 
3347 	if (!mask)
3348 		mask = &rte_flow_item_geneve_opt_mask;
3349 	if (!spec)
3350 		return rte_flow_error_set
3351 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3352 			"Geneve TLV opt class/type/length must be specified");
3353 	if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
3354 		return rte_flow_error_set
3355 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3356 			"Geneve TLV opt length exceeds the limit (31)");
3357 	/* Check if class type and length masks are full. */
3358 	if (full_mask.option_class != mask->option_class ||
3359 	    full_mask.option_type != mask->option_type ||
3360 	    full_mask.option_len != (mask->option_len & full_mask.option_len))
3361 		return rte_flow_error_set
3362 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3363 			"Geneve TLV opt class/type/length masks must be full");
3364 	/* Check if length is supported */
3365 	if ((uint32_t)spec->option_len >
3366 			hca_attr->max_geneve_tlv_option_data_len)
3367 		return rte_flow_error_set
3368 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3369 			"Geneve TLV opt length not supported");
3370 	if (hca_attr->max_geneve_tlv_options > 1)
3371 		DRV_LOG(DEBUG,
3372 			"max_geneve_tlv_options supports more than 1 option");
3373 	/* Check GENEVE item preceding. */
3374 	if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
3375 		return rte_flow_error_set
3376 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3377 			"Geneve opt item must be preceded with Geneve item");
3378 	geneve_spec = geneve_item->spec;
3379 	geneve_mask = geneve_item->mask ? geneve_item->mask :
3380 					  &rte_flow_item_geneve_mask;
3381 	/* Check if GENEVE TLV option size doesn't exceed option length */
3382 	if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
3383 			    geneve_spec->ver_opt_len_o_c_rsvd0)) {
3384 		tlv_option_len = spec->option_len & mask->option_len;
3385 		optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
3386 		optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
3387 		optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
3388 		optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
3389 		if ((optlen_v & optlen_m) <= tlv_option_len)
3390 			return rte_flow_error_set
3391 				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3392 				 "GENEVE TLV option length exceeds optlen");
3393 	}
3394 	/* Check if length is 0 or data is 0. */
3395 	if (spec->data == NULL || spec->option_len == 0)
3396 		return rte_flow_error_set
3397 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3398 			"Geneve TLV opt with zero data/length not supported");
3399 	/* Check not all data & mask are 0. */
3400 	data_len = spec->option_len * 4;
3401 	if (mask->data == NULL) {
3402 		for (i = 0; i < data_len; i++)
3403 			if (spec->data[i])
3404 				break;
3405 		if (i == data_len)
3406 			return rte_flow_error_set(error, ENOTSUP,
3407 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3408 				"Can't match on Geneve option data 0");
3409 	} else {
3410 		for (i = 0; i < data_len; i++)
3411 			if (spec->data[i] & mask->data[i])
3412 				break;
3413 		if (i == data_len)
3414 			return rte_flow_error_set(error, ENOTSUP,
3415 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3416 				"Can't match on Geneve option data and mask 0");
3417 		/* Check data mask supported. */
3418 		for (i = data_max_supported; i < data_len ; i++)
3419 			if (mask->data[i])
3420 				return rte_flow_error_set(error, ENOTSUP,
3421 					RTE_FLOW_ERROR_TYPE_ITEM, item,
3422 					"Data mask is of unsupported size");
3423 	}
3424 	/* Check GENEVE option is supported in NIC. */
3425 	if (!hca_attr->geneve_tlv_opt)
3426 		return rte_flow_error_set
3427 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3428 			"Geneve TLV opt not supported");
3429 	/* Check if we already have geneve option with different type/class. */
3430 	rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3431 	geneve_opt_resource = sh->geneve_tlv_option_resource;
3432 	if (geneve_opt_resource != NULL)
3433 		if (geneve_opt_resource->option_class != spec->option_class ||
3434 		    geneve_opt_resource->option_type != spec->option_type ||
3435 		    geneve_opt_resource->length != spec->option_len) {
3436 			rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3437 			return rte_flow_error_set(error, ENOTSUP,
3438 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3439 				"Only one Geneve TLV option supported");
3440 		}
3441 	rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3442 	return 0;
3443 }
3444 
3445 /**
3446  * Validate MPLS item.
3447  *
3448  * @param[in] dev
3449  *   Pointer to the rte_eth_dev structure.
3450  * @param[in] item
3451  *   Item specification.
3452  * @param[in] item_flags
3453  *   Bit-fields that holds the items detected until now.
3454  * @param[in] prev_layer
3455  *   The protocol layer indicated in previous item.
3456  * @param[out] error
3457  *   Pointer to error structure.
3458  *
3459  * @return
3460  *   0 on success, a negative errno value otherwise and rte_errno is set.
3461  */
3462 int
3463 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3464 			     const struct rte_flow_item *item __rte_unused,
3465 			     uint64_t item_flags __rte_unused,
3466 			     uint64_t prev_layer __rte_unused,
3467 			     struct rte_flow_error *error)
3468 {
3469 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3470 	const struct rte_flow_item_mpls *mask = item->mask;
3471 	struct mlx5_priv *priv = dev->data->dev_private;
3472 	int ret;
3473 
3474 	if (!priv->sh->dev_cap.mpls_en)
3475 		return rte_flow_error_set(error, ENOTSUP,
3476 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3477 					  "MPLS not supported or"
3478 					  " disabled in firmware"
3479 					  " configuration.");
3480 	/* MPLS over UDP, GRE is allowed */
3481 	if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3482 			    MLX5_FLOW_LAYER_GRE |
3483 			    MLX5_FLOW_LAYER_GRE_KEY)))
3484 		return rte_flow_error_set(error, EINVAL,
3485 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3486 					  "protocol filtering not compatible"
3487 					  " with MPLS layer");
3488 	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3489 	if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3490 	    !(item_flags & MLX5_FLOW_LAYER_GRE))
3491 		return rte_flow_error_set(error, ENOTSUP,
3492 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3493 					  "multiple tunnel layers not"
3494 					  " supported");
3495 	if (!mask)
3496 		mask = &rte_flow_item_mpls_mask;
3497 	ret = mlx5_flow_item_acceptable
3498 		(item, (const uint8_t *)mask,
3499 		 (const uint8_t *)&rte_flow_item_mpls_mask,
3500 		 sizeof(struct rte_flow_item_mpls),
3501 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3502 	if (ret < 0)
3503 		return ret;
3504 	return 0;
3505 #else
3506 	return rte_flow_error_set(error, ENOTSUP,
3507 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
3508 				  "MPLS is not supported by Verbs, please"
3509 				  " update.");
3510 #endif
3511 }
3512 
3513 /**
3514  * Validate NVGRE item.
3515  *
3516  * @param[in] item
3517  *   Item specification.
3518  * @param[in] item_flags
3519  *   Bit flags to mark detected items.
3520  * @param[in] target_protocol
3521  *   The next protocol in the previous item.
3522  * @param[out] error
3523  *   Pointer to error structure.
3524  *
3525  * @return
3526  *   0 on success, a negative errno value otherwise and rte_errno is set.
3527  */
3528 int
3529 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3530 			      uint64_t item_flags,
3531 			      uint8_t target_protocol,
3532 			      struct rte_flow_error *error)
3533 {
3534 	const struct rte_flow_item_nvgre *mask = item->mask;
3535 	int ret;
3536 
3537 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3538 		return rte_flow_error_set(error, EINVAL,
3539 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3540 					  "protocol filtering not compatible"
3541 					  " with this GRE layer");
3542 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3543 		return rte_flow_error_set(error, ENOTSUP,
3544 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3545 					  "multiple tunnel layers not"
3546 					  " supported");
3547 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3548 		return rte_flow_error_set(error, ENOTSUP,
3549 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3550 					  "L3 Layer is missing");
3551 	if (!mask)
3552 		mask = &rte_flow_item_nvgre_mask;
3553 	ret = mlx5_flow_item_acceptable
3554 		(item, (const uint8_t *)mask,
3555 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
3556 		 sizeof(struct rte_flow_item_nvgre),
3557 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3558 	if (ret < 0)
3559 		return ret;
3560 	return 0;
3561 }
3562 
3563 /**
3564  * Validate eCPRI item.
3565  *
3566  * @param[in] item
3567  *   Item specification.
3568  * @param[in] item_flags
3569  *   Bit-fields that holds the items detected until now.
3570  * @param[in] last_item
3571  *   Previous validated item in the pattern items.
3572  * @param[in] ether_type
3573  *   Type in the ethernet layer header (including dot1q).
3574  * @param[in] acc_mask
3575  *   Acceptable mask, if NULL default internal default mask
3576  *   will be used to check whether item fields are supported.
3577  * @param[out] error
3578  *   Pointer to error structure.
3579  *
3580  * @return
3581  *   0 on success, a negative errno value otherwise and rte_errno is set.
3582  */
3583 int
3584 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3585 			      uint64_t item_flags,
3586 			      uint64_t last_item,
3587 			      uint16_t ether_type,
3588 			      const struct rte_flow_item_ecpri *acc_mask,
3589 			      struct rte_flow_error *error)
3590 {
3591 	const struct rte_flow_item_ecpri *mask = item->mask;
3592 	const struct rte_flow_item_ecpri nic_mask = {
3593 		.hdr = {
3594 			.common = {
3595 				.u32 =
3596 				RTE_BE32(((const struct rte_ecpri_common_hdr) {
3597 					.type = 0xFF,
3598 					}).u32),
3599 			},
3600 			.dummy[0] = 0xFFFFFFFF,
3601 		},
3602 	};
3603 	const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3604 					MLX5_FLOW_LAYER_OUTER_VLAN);
3605 	struct rte_flow_item_ecpri mask_lo;
3606 
3607 	if (!(last_item & outer_l2_vlan) &&
3608 	    last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3609 		return rte_flow_error_set(error, EINVAL,
3610 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3611 					  "eCPRI can only follow L2/VLAN layer or UDP layer");
3612 	if ((last_item & outer_l2_vlan) && ether_type &&
3613 	    ether_type != RTE_ETHER_TYPE_ECPRI)
3614 		return rte_flow_error_set(error, EINVAL,
3615 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3616 					  "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3617 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3618 		return rte_flow_error_set(error, EINVAL,
3619 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3620 					  "eCPRI with tunnel is not supported right now");
3621 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3622 		return rte_flow_error_set(error, ENOTSUP,
3623 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3624 					  "multiple L3 layers not supported");
3625 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3626 		return rte_flow_error_set(error, EINVAL,
3627 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3628 					  "eCPRI cannot coexist with a TCP layer");
3629 	/* In specification, eCPRI could be over UDP layer. */
3630 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3631 		return rte_flow_error_set(error, EINVAL,
3632 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3633 					  "eCPRI over UDP layer is not yet supported right now");
3634 	/* Mask for type field in common header could be zero. */
3635 	if (!mask)
3636 		mask = &rte_flow_item_ecpri_mask;
3637 	mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3638 	/* Input mask is in big-endian format. */
3639 	if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3640 		return rte_flow_error_set(error, EINVAL,
3641 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3642 					  "partial mask is not supported for protocol");
3643 	else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3644 		return rte_flow_error_set(error, EINVAL,
3645 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3646 					  "message header mask must be after a type mask");
3647 	return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3648 					 acc_mask ? (const uint8_t *)acc_mask
3649 						  : (const uint8_t *)&nic_mask,
3650 					 sizeof(struct rte_flow_item_ecpri),
3651 					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3652 }
3653 
3654 static int
3655 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3656 		   const struct rte_flow_attr *attr __rte_unused,
3657 		   const struct rte_flow_item items[] __rte_unused,
3658 		   const struct rte_flow_action actions[] __rte_unused,
3659 		   bool external __rte_unused,
3660 		   int hairpin __rte_unused,
3661 		   struct rte_flow_error *error)
3662 {
3663 	return rte_flow_error_set(error, ENOTSUP,
3664 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3665 }
3666 
3667 static struct mlx5_flow *
3668 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3669 		  const struct rte_flow_attr *attr __rte_unused,
3670 		  const struct rte_flow_item items[] __rte_unused,
3671 		  const struct rte_flow_action actions[] __rte_unused,
3672 		  struct rte_flow_error *error)
3673 {
3674 	rte_flow_error_set(error, ENOTSUP,
3675 			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3676 	return NULL;
3677 }
3678 
3679 static int
3680 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3681 		    struct mlx5_flow *dev_flow __rte_unused,
3682 		    const struct rte_flow_attr *attr __rte_unused,
3683 		    const struct rte_flow_item items[] __rte_unused,
3684 		    const struct rte_flow_action actions[] __rte_unused,
3685 		    struct rte_flow_error *error)
3686 {
3687 	return rte_flow_error_set(error, ENOTSUP,
3688 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3689 }
3690 
3691 static int
3692 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3693 		struct rte_flow *flow __rte_unused,
3694 		struct rte_flow_error *error)
3695 {
3696 	return rte_flow_error_set(error, ENOTSUP,
3697 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3698 }
3699 
3700 static void
3701 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3702 		 struct rte_flow *flow __rte_unused)
3703 {
3704 }
3705 
3706 static void
3707 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3708 		  struct rte_flow *flow __rte_unused)
3709 {
3710 }
3711 
3712 static int
3713 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3714 		struct rte_flow *flow __rte_unused,
3715 		const struct rte_flow_action *actions __rte_unused,
3716 		void *data __rte_unused,
3717 		struct rte_flow_error *error)
3718 {
3719 	return rte_flow_error_set(error, ENOTSUP,
3720 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3721 }
3722 
3723 static int
3724 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3725 		      uint32_t domains __rte_unused,
3726 		      uint32_t flags __rte_unused)
3727 {
3728 	return 0;
3729 }
3730 
3731 /* Void driver to protect from null pointer reference. */
3732 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3733 	.validate = flow_null_validate,
3734 	.prepare = flow_null_prepare,
3735 	.translate = flow_null_translate,
3736 	.apply = flow_null_apply,
3737 	.remove = flow_null_remove,
3738 	.destroy = flow_null_destroy,
3739 	.query = flow_null_query,
3740 	.sync_domain = flow_null_sync_domain,
3741 };
3742 
3743 /**
3744  * Select flow driver type according to flow attributes and device
3745  * configuration.
3746  *
3747  * @param[in] dev
3748  *   Pointer to the dev structure.
3749  * @param[in] attr
3750  *   Pointer to the flow attributes.
3751  *
3752  * @return
3753  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3754  */
3755 static enum mlx5_flow_drv_type
3756 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3757 {
3758 	struct mlx5_priv *priv = dev->data->dev_private;
3759 	/* The OS can determine first a specific flow type (DV, VERBS) */
3760 	enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3761 
3762 	if (type != MLX5_FLOW_TYPE_MAX)
3763 		return type;
3764 	/*
3765 	 * Currently when dv_flow_en == 2, only HW steering engine is
3766 	 * supported. New engines can also be chosen here if ready.
3767 	 */
3768 	if (priv->sh->config.dv_flow_en == 2)
3769 		return MLX5_FLOW_TYPE_HW;
3770 	if (!attr)
3771 		return MLX5_FLOW_TYPE_MIN;
3772 	/* If no OS specific type - continue with DV/VERBS selection */
3773 	if (attr->transfer && priv->sh->config.dv_esw_en)
3774 		type = MLX5_FLOW_TYPE_DV;
3775 	if (!attr->transfer)
3776 		type = priv->sh->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3777 						     MLX5_FLOW_TYPE_VERBS;
3778 	return type;
3779 }
3780 
3781 #define flow_get_drv_ops(type) flow_drv_ops[type]
3782 
3783 /**
3784  * Flow driver validation API. This abstracts calling driver specific functions.
3785  * The type of flow driver is determined according to flow attributes.
3786  *
3787  * @param[in] dev
3788  *   Pointer to the dev structure.
3789  * @param[in] attr
3790  *   Pointer to the flow attributes.
3791  * @param[in] items
3792  *   Pointer to the list of items.
3793  * @param[in] actions
3794  *   Pointer to the list of actions.
3795  * @param[in] external
3796  *   This flow rule is created by request external to PMD.
3797  * @param[in] hairpin
3798  *   Number of hairpin TX actions, 0 means classic flow.
3799  * @param[out] error
3800  *   Pointer to the error structure.
3801  *
3802  * @return
3803  *   0 on success, a negative errno value otherwise and rte_errno is set.
3804  */
3805 static inline int
3806 flow_drv_validate(struct rte_eth_dev *dev,
3807 		  const struct rte_flow_attr *attr,
3808 		  const struct rte_flow_item items[],
3809 		  const struct rte_flow_action actions[],
3810 		  bool external, int hairpin, struct rte_flow_error *error)
3811 {
3812 	const struct mlx5_flow_driver_ops *fops;
3813 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3814 
3815 	fops = flow_get_drv_ops(type);
3816 	return fops->validate(dev, attr, items, actions, external,
3817 			      hairpin, error);
3818 }
3819 
3820 /**
3821  * Flow driver preparation API. This abstracts calling driver specific
3822  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3823  * calculates the size of memory required for device flow, allocates the memory,
3824  * initializes the device flow and returns the pointer.
3825  *
3826  * @note
3827  *   This function initializes device flow structure such as dv or verbs in
3828  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3829  *   rest. For example, adding returning device flow to flow->dev_flow list and
3830  *   setting backward reference to the flow should be done out of this function.
3831  *   layers field is not filled either.
3832  *
3833  * @param[in] dev
3834  *   Pointer to the dev structure.
3835  * @param[in] attr
3836  *   Pointer to the flow attributes.
3837  * @param[in] items
3838  *   Pointer to the list of items.
3839  * @param[in] actions
3840  *   Pointer to the list of actions.
3841  * @param[in] flow_idx
3842  *   This memory pool index to the flow.
3843  * @param[out] error
3844  *   Pointer to the error structure.
3845  *
3846  * @return
3847  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3848  */
3849 static inline struct mlx5_flow *
3850 flow_drv_prepare(struct rte_eth_dev *dev,
3851 		 const struct rte_flow *flow,
3852 		 const struct rte_flow_attr *attr,
3853 		 const struct rte_flow_item items[],
3854 		 const struct rte_flow_action actions[],
3855 		 uint32_t flow_idx,
3856 		 struct rte_flow_error *error)
3857 {
3858 	const struct mlx5_flow_driver_ops *fops;
3859 	enum mlx5_flow_drv_type type = flow->drv_type;
3860 	struct mlx5_flow *mlx5_flow = NULL;
3861 
3862 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3863 	fops = flow_get_drv_ops(type);
3864 	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3865 	if (mlx5_flow)
3866 		mlx5_flow->flow_idx = flow_idx;
3867 	return mlx5_flow;
3868 }
3869 
3870 /**
3871  * Flow driver translation API. This abstracts calling driver specific
3872  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3873  * translates a generic flow into a driver flow. flow_drv_prepare() must
3874  * precede.
3875  *
3876  * @note
3877  *   dev_flow->layers could be filled as a result of parsing during translation
3878  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3879  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3880  *   flow->actions could be overwritten even though all the expanded dev_flows
3881  *   have the same actions.
3882  *
3883  * @param[in] dev
3884  *   Pointer to the rte dev structure.
3885  * @param[in, out] dev_flow
3886  *   Pointer to the mlx5 flow.
3887  * @param[in] attr
3888  *   Pointer to the flow attributes.
3889  * @param[in] items
3890  *   Pointer to the list of items.
3891  * @param[in] actions
3892  *   Pointer to the list of actions.
3893  * @param[out] error
3894  *   Pointer to the error structure.
3895  *
3896  * @return
3897  *   0 on success, a negative errno value otherwise and rte_errno is set.
3898  */
3899 static inline int
3900 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3901 		   const struct rte_flow_attr *attr,
3902 		   const struct rte_flow_item items[],
3903 		   const struct rte_flow_action actions[],
3904 		   struct rte_flow_error *error)
3905 {
3906 	const struct mlx5_flow_driver_ops *fops;
3907 	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3908 
3909 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3910 	fops = flow_get_drv_ops(type);
3911 	return fops->translate(dev, dev_flow, attr, items, actions, error);
3912 }
3913 
3914 /**
3915  * Flow driver apply API. This abstracts calling driver specific functions.
3916  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3917  * translated driver flows on to device. flow_drv_translate() must precede.
3918  *
3919  * @param[in] dev
3920  *   Pointer to Ethernet device structure.
3921  * @param[in, out] flow
3922  *   Pointer to flow structure.
3923  * @param[out] error
3924  *   Pointer to error structure.
3925  *
3926  * @return
3927  *   0 on success, a negative errno value otherwise and rte_errno is set.
3928  */
3929 static inline int
3930 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3931 	       struct rte_flow_error *error)
3932 {
3933 	const struct mlx5_flow_driver_ops *fops;
3934 	enum mlx5_flow_drv_type type = flow->drv_type;
3935 
3936 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3937 	fops = flow_get_drv_ops(type);
3938 	return fops->apply(dev, flow, error);
3939 }
3940 
3941 /**
3942  * Flow driver destroy API. This abstracts calling driver specific functions.
3943  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3944  * on device and releases resources of the flow.
3945  *
3946  * @param[in] dev
3947  *   Pointer to Ethernet device.
3948  * @param[in, out] flow
3949  *   Pointer to flow structure.
3950  */
3951 static inline void
3952 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3953 {
3954 	const struct mlx5_flow_driver_ops *fops;
3955 	enum mlx5_flow_drv_type type = flow->drv_type;
3956 
3957 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3958 	fops = flow_get_drv_ops(type);
3959 	fops->destroy(dev, flow);
3960 }
3961 
3962 /**
3963  * Flow driver find RSS policy tbl API. This abstracts calling driver
3964  * specific functions. Parent flow (rte_flow) should have driver
3965  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3966  *
3967  * @param[in] dev
3968  *   Pointer to Ethernet device.
3969  * @param[in, out] flow
3970  *   Pointer to flow structure.
3971  * @param[in] policy
3972  *   Pointer to meter policy table.
3973  * @param[in] rss_desc
3974  *   Pointer to rss_desc
3975  */
3976 static struct mlx5_flow_meter_sub_policy *
3977 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3978 		struct rte_flow *flow,
3979 		struct mlx5_flow_meter_policy *policy,
3980 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3981 {
3982 	const struct mlx5_flow_driver_ops *fops;
3983 	enum mlx5_flow_drv_type type = flow->drv_type;
3984 
3985 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3986 	fops = flow_get_drv_ops(type);
3987 	return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3988 }
3989 
3990 /**
3991  * Flow driver color tag rule API. This abstracts calling driver
3992  * specific functions. Parent flow (rte_flow) should have driver
3993  * type (drv_type). It will create the color tag rules in hierarchy meter.
3994  *
3995  * @param[in] dev
3996  *   Pointer to Ethernet device.
3997  * @param[in, out] flow
3998  *   Pointer to flow structure.
3999  * @param[in] fm
4000  *   Pointer to flow meter structure.
4001  * @param[in] src_port
4002  *   The src port this extra rule should use.
4003  * @param[in] item
4004  *   The src port id match item.
4005  * @param[out] error
4006  *   Pointer to error structure.
4007  */
4008 static int
4009 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
4010 		struct rte_flow *flow,
4011 		struct mlx5_flow_meter_info *fm,
4012 		int32_t src_port,
4013 		const struct rte_flow_item *item,
4014 		struct rte_flow_error *error)
4015 {
4016 	const struct mlx5_flow_driver_ops *fops;
4017 	enum mlx5_flow_drv_type type = flow->drv_type;
4018 
4019 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4020 	fops = flow_get_drv_ops(type);
4021 	return fops->meter_hierarchy_rule_create(dev, fm,
4022 						src_port, item, error);
4023 }
4024 
4025 /**
4026  * Get RSS action from the action list.
4027  *
4028  * @param[in] dev
4029  *   Pointer to Ethernet device.
4030  * @param[in] actions
4031  *   Pointer to the list of actions.
4032  * @param[in] flow
4033  *   Parent flow structure pointer.
4034  *
4035  * @return
4036  *   Pointer to the RSS action if exist, else return NULL.
4037  */
4038 static const struct rte_flow_action_rss*
4039 flow_get_rss_action(struct rte_eth_dev *dev,
4040 		    const struct rte_flow_action actions[])
4041 {
4042 	struct mlx5_priv *priv = dev->data->dev_private;
4043 	const struct rte_flow_action_rss *rss = NULL;
4044 	struct mlx5_meter_policy_action_container *acg;
4045 	struct mlx5_meter_policy_action_container *acy;
4046 
4047 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4048 		switch (actions->type) {
4049 		case RTE_FLOW_ACTION_TYPE_RSS:
4050 			rss = actions->conf;
4051 			break;
4052 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
4053 		{
4054 			const struct rte_flow_action_sample *sample =
4055 								actions->conf;
4056 			const struct rte_flow_action *act = sample->actions;
4057 			for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
4058 				if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
4059 					rss = act->conf;
4060 			break;
4061 		}
4062 		case RTE_FLOW_ACTION_TYPE_METER:
4063 		{
4064 			uint32_t mtr_idx;
4065 			struct mlx5_flow_meter_info *fm;
4066 			struct mlx5_flow_meter_policy *policy;
4067 			const struct rte_flow_action_meter *mtr = actions->conf;
4068 
4069 			fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
4070 			if (fm && !fm->def_policy) {
4071 				policy = mlx5_flow_meter_policy_find(dev,
4072 						fm->policy_id, NULL);
4073 				MLX5_ASSERT(policy);
4074 				if (policy->is_hierarchy) {
4075 					policy =
4076 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
4077 									policy);
4078 					if (!policy)
4079 						return NULL;
4080 				}
4081 				if (policy->is_rss) {
4082 					acg =
4083 					&policy->act_cnt[RTE_COLOR_GREEN];
4084 					acy =
4085 					&policy->act_cnt[RTE_COLOR_YELLOW];
4086 					if (acg->fate_action ==
4087 					    MLX5_FLOW_FATE_SHARED_RSS)
4088 						rss = acg->rss->conf;
4089 					else if (acy->fate_action ==
4090 						 MLX5_FLOW_FATE_SHARED_RSS)
4091 						rss = acy->rss->conf;
4092 				}
4093 			}
4094 			break;
4095 		}
4096 		default:
4097 			break;
4098 		}
4099 	}
4100 	return rss;
4101 }
4102 
4103 /**
4104  * Get ASO age action by index.
4105  *
4106  * @param[in] dev
4107  *   Pointer to the Ethernet device structure.
4108  * @param[in] age_idx
4109  *   Index to the ASO age action.
4110  *
4111  * @return
4112  *   The specified ASO age action.
4113  */
4114 struct mlx5_aso_age_action*
4115 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
4116 {
4117 	uint16_t pool_idx = age_idx & UINT16_MAX;
4118 	uint16_t offset = (age_idx >> 16) & UINT16_MAX;
4119 	struct mlx5_priv *priv = dev->data->dev_private;
4120 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
4121 	struct mlx5_aso_age_pool *pool;
4122 
4123 	rte_rwlock_read_lock(&mng->resize_rwl);
4124 	pool = mng->pools[pool_idx];
4125 	rte_rwlock_read_unlock(&mng->resize_rwl);
4126 	return &pool->actions[offset - 1];
4127 }
4128 
4129 /* maps indirect action to translated direct in some actions array */
4130 struct mlx5_translated_action_handle {
4131 	struct rte_flow_action_handle *action; /**< Indirect action handle. */
4132 	int index; /**< Index in related array of rte_flow_action. */
4133 };
4134 
4135 /**
4136  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
4137  * direct action if translation possible.
4138  * This functionality used to run same execution path for both direct and
4139  * indirect actions on flow create. All necessary preparations for indirect
4140  * action handling should be performed on *handle* actions list returned
4141  * from this call.
4142  *
4143  * @param[in] dev
4144  *   Pointer to Ethernet device.
4145  * @param[in] actions
4146  *   List of actions to translate.
4147  * @param[out] handle
4148  *   List to store translated indirect action object handles.
4149  * @param[in, out] indir_n
4150  *   Size of *handle* array. On return should be updated with number of
4151  *   indirect actions retrieved from the *actions* list.
4152  * @param[out] translated_actions
4153  *   List of actions where all indirect actions were translated to direct
4154  *   if possible. NULL if no translation took place.
4155  * @param[out] error
4156  *   Pointer to the error structure.
4157  *
4158  * @return
4159  *   0 on success, a negative errno value otherwise and rte_errno is set.
4160  */
4161 static int
4162 flow_action_handles_translate(struct rte_eth_dev *dev,
4163 			      const struct rte_flow_action actions[],
4164 			      struct mlx5_translated_action_handle *handle,
4165 			      int *indir_n,
4166 			      struct rte_flow_action **translated_actions,
4167 			      struct rte_flow_error *error)
4168 {
4169 	struct mlx5_priv *priv = dev->data->dev_private;
4170 	struct rte_flow_action *translated = NULL;
4171 	size_t actions_size;
4172 	int n;
4173 	int copied_n = 0;
4174 	struct mlx5_translated_action_handle *handle_end = NULL;
4175 
4176 	for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
4177 		if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
4178 			continue;
4179 		if (copied_n == *indir_n) {
4180 			return rte_flow_error_set
4181 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
4182 				 NULL, "too many shared actions");
4183 		}
4184 		rte_memcpy(&handle[copied_n].action, &actions[n].conf,
4185 			   sizeof(actions[n].conf));
4186 		handle[copied_n].index = n;
4187 		copied_n++;
4188 	}
4189 	n++;
4190 	*indir_n = copied_n;
4191 	if (!copied_n)
4192 		return 0;
4193 	actions_size = sizeof(struct rte_flow_action) * n;
4194 	translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
4195 	if (!translated) {
4196 		rte_errno = ENOMEM;
4197 		return -ENOMEM;
4198 	}
4199 	memcpy(translated, actions, actions_size);
4200 	for (handle_end = handle + copied_n; handle < handle_end; handle++) {
4201 		struct mlx5_shared_action_rss *shared_rss;
4202 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4203 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4204 		uint32_t idx = act_idx &
4205 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4206 
4207 		switch (type) {
4208 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4209 			shared_rss = mlx5_ipool_get
4210 			  (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
4211 			translated[handle->index].type =
4212 				RTE_FLOW_ACTION_TYPE_RSS;
4213 			translated[handle->index].conf =
4214 				&shared_rss->origin;
4215 			break;
4216 		case MLX5_INDIRECT_ACTION_TYPE_COUNT:
4217 			translated[handle->index].type =
4218 						(enum rte_flow_action_type)
4219 						MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
4220 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4221 			break;
4222 		case MLX5_INDIRECT_ACTION_TYPE_METER_MARK:
4223 			translated[handle->index].type =
4224 						(enum rte_flow_action_type)
4225 						MLX5_RTE_FLOW_ACTION_TYPE_METER_MARK;
4226 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4227 			break;
4228 		case MLX5_INDIRECT_ACTION_TYPE_AGE:
4229 			if (priv->sh->flow_hit_aso_en) {
4230 				translated[handle->index].type =
4231 					(enum rte_flow_action_type)
4232 					MLX5_RTE_FLOW_ACTION_TYPE_AGE;
4233 				translated[handle->index].conf =
4234 							 (void *)(uintptr_t)idx;
4235 				break;
4236 			}
4237 			/* Fall-through */
4238 		case MLX5_INDIRECT_ACTION_TYPE_CT:
4239 			if (priv->sh->ct_aso_en) {
4240 				translated[handle->index].type =
4241 					RTE_FLOW_ACTION_TYPE_CONNTRACK;
4242 				translated[handle->index].conf =
4243 							 (void *)(uintptr_t)idx;
4244 				break;
4245 			}
4246 			/* Fall-through */
4247 		default:
4248 			mlx5_free(translated);
4249 			return rte_flow_error_set
4250 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
4251 				 NULL, "invalid indirect action type");
4252 		}
4253 	}
4254 	*translated_actions = translated;
4255 	return 0;
4256 }
4257 
4258 /**
4259  * Get Shared RSS action from the action list.
4260  *
4261  * @param[in] dev
4262  *   Pointer to Ethernet device.
4263  * @param[in] shared
4264  *   Pointer to the list of actions.
4265  * @param[in] shared_n
4266  *   Actions list length.
4267  *
4268  * @return
4269  *   The MLX5 RSS action ID if exists, otherwise return 0.
4270  */
4271 static uint32_t
4272 flow_get_shared_rss_action(struct rte_eth_dev *dev,
4273 			   struct mlx5_translated_action_handle *handle,
4274 			   int shared_n)
4275 {
4276 	struct mlx5_translated_action_handle *handle_end;
4277 	struct mlx5_priv *priv = dev->data->dev_private;
4278 	struct mlx5_shared_action_rss *shared_rss;
4279 
4280 
4281 	for (handle_end = handle + shared_n; handle < handle_end; handle++) {
4282 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4283 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4284 		uint32_t idx = act_idx &
4285 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4286 		switch (type) {
4287 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4288 			shared_rss = mlx5_ipool_get
4289 				(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
4290 									   idx);
4291 			__atomic_add_fetch(&shared_rss->refcnt, 1,
4292 					   __ATOMIC_RELAXED);
4293 			return idx;
4294 		default:
4295 			break;
4296 		}
4297 	}
4298 	return 0;
4299 }
4300 
4301 static unsigned int
4302 find_graph_root(uint32_t rss_level)
4303 {
4304 	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
4305 			       MLX5_EXPANSION_ROOT_OUTER;
4306 }
4307 
4308 /**
4309  *  Get layer flags from the prefix flow.
4310  *
4311  *  Some flows may be split to several subflows, the prefix subflow gets the
4312  *  match items and the suffix sub flow gets the actions.
4313  *  Some actions need the user defined match item flags to get the detail for
4314  *  the action.
4315  *  This function helps the suffix flow to get the item layer flags from prefix
4316  *  subflow.
4317  *
4318  * @param[in] dev_flow
4319  *   Pointer the created prefix subflow.
4320  *
4321  * @return
4322  *   The layers get from prefix subflow.
4323  */
4324 static inline uint64_t
4325 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
4326 {
4327 	uint64_t layers = 0;
4328 
4329 	/*
4330 	 * Layers bits could be localization, but usually the compiler will
4331 	 * help to do the optimization work for source code.
4332 	 * If no decap actions, use the layers directly.
4333 	 */
4334 	if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
4335 		return dev_flow->handle->layers;
4336 	/* Convert L3 layers with decap action. */
4337 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
4338 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
4339 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
4340 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
4341 	/* Convert L4 layers with decap action.  */
4342 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
4343 		layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
4344 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
4345 		layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
4346 	return layers;
4347 }
4348 
4349 /**
4350  * Get metadata split action information.
4351  *
4352  * @param[in] actions
4353  *   Pointer to the list of actions.
4354  * @param[out] qrss
4355  *   Pointer to the return pointer.
4356  * @param[out] qrss_type
4357  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
4358  *   if no QUEUE/RSS is found.
4359  * @param[out] encap_idx
4360  *   Pointer to the index of the encap action if exists, otherwise the last
4361  *   action index.
4362  *
4363  * @return
4364  *   Total number of actions.
4365  */
4366 static int
4367 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
4368 				       const struct rte_flow_action **qrss,
4369 				       int *encap_idx)
4370 {
4371 	const struct rte_flow_action_raw_encap *raw_encap;
4372 	int actions_n = 0;
4373 	int raw_decap_idx = -1;
4374 
4375 	*encap_idx = -1;
4376 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4377 		switch (actions->type) {
4378 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4379 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4380 			*encap_idx = actions_n;
4381 			break;
4382 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4383 			raw_decap_idx = actions_n;
4384 			break;
4385 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4386 			raw_encap = actions->conf;
4387 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4388 				*encap_idx = raw_decap_idx != -1 ?
4389 						      raw_decap_idx : actions_n;
4390 			break;
4391 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4392 		case RTE_FLOW_ACTION_TYPE_RSS:
4393 			*qrss = actions;
4394 			break;
4395 		default:
4396 			break;
4397 		}
4398 		actions_n++;
4399 	}
4400 	if (*encap_idx == -1)
4401 		*encap_idx = actions_n;
4402 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4403 	return actions_n + 1;
4404 }
4405 
4406 /**
4407  * Check if the action will change packet.
4408  *
4409  * @param dev
4410  *   Pointer to Ethernet device.
4411  * @param[in] type
4412  *   action type.
4413  *
4414  * @return
4415  *   true if action will change packet, false otherwise.
4416  */
4417 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
4418 					  enum rte_flow_action_type type)
4419 {
4420 	struct mlx5_priv *priv = dev->data->dev_private;
4421 
4422 	switch (type) {
4423 	case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4424 	case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4425 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4426 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4427 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4428 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4429 	case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4430 	case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4431 	case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4432 	case RTE_FLOW_ACTION_TYPE_SET_TTL:
4433 	case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4434 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4435 	case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4436 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4437 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4438 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4439 	case RTE_FLOW_ACTION_TYPE_SET_META:
4440 	case RTE_FLOW_ACTION_TYPE_SET_TAG:
4441 	case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4442 	case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4443 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4444 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4445 	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4446 	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4447 	case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4448 	case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4449 	case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4450 	case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4451 	case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4452 		return true;
4453 	case RTE_FLOW_ACTION_TYPE_FLAG:
4454 	case RTE_FLOW_ACTION_TYPE_MARK:
4455 		if (priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
4456 		    priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_META32_HWS)
4457 			return true;
4458 		else
4459 			return false;
4460 	default:
4461 		return false;
4462 	}
4463 }
4464 
4465 /**
4466  * Check meter action from the action list.
4467  *
4468  * @param dev
4469  *   Pointer to Ethernet device.
4470  * @param[in] actions
4471  *   Pointer to the list of actions.
4472  * @param[out] has_mtr
4473  *   Pointer to the meter exist flag.
4474  * @param[out] has_modify
4475  *   Pointer to the flag showing there's packet change action.
4476  * @param[out] meter_id
4477  *   Pointer to the meter id.
4478  *
4479  * @return
4480  *   Total number of actions.
4481  */
4482 static int
4483 flow_check_meter_action(struct rte_eth_dev *dev,
4484 			const struct rte_flow_action actions[],
4485 			bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4486 {
4487 	const struct rte_flow_action_meter *mtr = NULL;
4488 	int actions_n = 0;
4489 
4490 	MLX5_ASSERT(has_mtr);
4491 	*has_mtr = false;
4492 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4493 		switch (actions->type) {
4494 		case RTE_FLOW_ACTION_TYPE_METER:
4495 			mtr = actions->conf;
4496 			*meter_id = mtr->mtr_id;
4497 			*has_mtr = true;
4498 			break;
4499 		default:
4500 			break;
4501 		}
4502 		if (!*has_mtr)
4503 			*has_modify |= flow_check_modify_action_type(dev,
4504 								actions->type);
4505 		actions_n++;
4506 	}
4507 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4508 	return actions_n + 1;
4509 }
4510 
4511 /**
4512  * Check if the flow should be split due to hairpin.
4513  * The reason for the split is that in current HW we can't
4514  * support encap and push-vlan on Rx, so if a flow contains
4515  * these actions we move it to Tx.
4516  *
4517  * @param dev
4518  *   Pointer to Ethernet device.
4519  * @param[in] attr
4520  *   Flow rule attributes.
4521  * @param[in] actions
4522  *   Associated actions (list terminated by the END action).
4523  *
4524  * @return
4525  *   > 0 the number of actions and the flow should be split,
4526  *   0 when no split required.
4527  */
4528 static int
4529 flow_check_hairpin_split(struct rte_eth_dev *dev,
4530 			 const struct rte_flow_attr *attr,
4531 			 const struct rte_flow_action actions[])
4532 {
4533 	int queue_action = 0;
4534 	int action_n = 0;
4535 	int split = 0;
4536 	const struct rte_flow_action_queue *queue;
4537 	const struct rte_flow_action_rss *rss;
4538 	const struct rte_flow_action_raw_encap *raw_encap;
4539 	const struct rte_eth_hairpin_conf *conf;
4540 
4541 	if (!attr->ingress)
4542 		return 0;
4543 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4544 		switch (actions->type) {
4545 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4546 			queue = actions->conf;
4547 			if (queue == NULL)
4548 				return 0;
4549 			conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4550 			if (conf == NULL || conf->tx_explicit != 0)
4551 				return 0;
4552 			queue_action = 1;
4553 			action_n++;
4554 			break;
4555 		case RTE_FLOW_ACTION_TYPE_RSS:
4556 			rss = actions->conf;
4557 			if (rss == NULL || rss->queue_num == 0)
4558 				return 0;
4559 			conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4560 			if (conf == NULL || conf->tx_explicit != 0)
4561 				return 0;
4562 			queue_action = 1;
4563 			action_n++;
4564 			break;
4565 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4566 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4567 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4568 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4569 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4570 			split++;
4571 			action_n++;
4572 			break;
4573 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4574 			raw_encap = actions->conf;
4575 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4576 				split++;
4577 			action_n++;
4578 			break;
4579 		default:
4580 			action_n++;
4581 			break;
4582 		}
4583 	}
4584 	if (split && queue_action)
4585 		return action_n;
4586 	return 0;
4587 }
4588 
4589 /* Declare flow create/destroy prototype in advance. */
4590 static uint32_t
4591 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4592 		 const struct rte_flow_attr *attr,
4593 		 const struct rte_flow_item items[],
4594 		 const struct rte_flow_action actions[],
4595 		 bool external, struct rte_flow_error *error);
4596 
4597 static void
4598 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4599 		  uint32_t flow_idx);
4600 
4601 int
4602 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4603 		      struct mlx5_list_entry *entry, void *cb_ctx)
4604 {
4605 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4606 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4607 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4608 
4609 	return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4610 }
4611 
4612 struct mlx5_list_entry *
4613 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4614 {
4615 	struct rte_eth_dev *dev = tool_ctx;
4616 	struct mlx5_priv *priv = dev->data->dev_private;
4617 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4618 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4619 	struct rte_flow_error *error = ctx->error;
4620 	uint32_t idx = 0;
4621 	int ret;
4622 	uint32_t mark_id = *(uint32_t *)(ctx->data);
4623 	struct rte_flow_attr attr = {
4624 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4625 		.ingress = 1,
4626 	};
4627 	struct mlx5_rte_flow_item_tag tag_spec = {
4628 		.data = mark_id,
4629 	};
4630 	struct rte_flow_item items[] = {
4631 		[1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4632 	};
4633 	struct rte_flow_action_mark ftag = {
4634 		.id = mark_id,
4635 	};
4636 	struct mlx5_flow_action_copy_mreg cp_mreg = {
4637 		.dst = REG_B,
4638 		.src = REG_NON,
4639 	};
4640 	struct rte_flow_action_jump jump = {
4641 		.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4642 	};
4643 	struct rte_flow_action actions[] = {
4644 		[3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4645 	};
4646 
4647 	/* Fill the register fields in the flow. */
4648 	ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4649 	if (ret < 0)
4650 		return NULL;
4651 	tag_spec.id = ret;
4652 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4653 	if (ret < 0)
4654 		return NULL;
4655 	cp_mreg.src = ret;
4656 	/* Provide the full width of FLAG specific value. */
4657 	if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4658 		tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4659 	/* Build a new flow. */
4660 	if (mark_id != MLX5_DEFAULT_COPY_ID) {
4661 		items[0] = (struct rte_flow_item){
4662 			.type = (enum rte_flow_item_type)
4663 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4664 			.spec = &tag_spec,
4665 		};
4666 		items[1] = (struct rte_flow_item){
4667 			.type = RTE_FLOW_ITEM_TYPE_END,
4668 		};
4669 		actions[0] = (struct rte_flow_action){
4670 			.type = (enum rte_flow_action_type)
4671 				MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4672 			.conf = &ftag,
4673 		};
4674 		actions[1] = (struct rte_flow_action){
4675 			.type = (enum rte_flow_action_type)
4676 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4677 			.conf = &cp_mreg,
4678 		};
4679 		actions[2] = (struct rte_flow_action){
4680 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4681 			.conf = &jump,
4682 		};
4683 		actions[3] = (struct rte_flow_action){
4684 			.type = RTE_FLOW_ACTION_TYPE_END,
4685 		};
4686 	} else {
4687 		/* Default rule, wildcard match. */
4688 		attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4689 		items[0] = (struct rte_flow_item){
4690 			.type = RTE_FLOW_ITEM_TYPE_END,
4691 		};
4692 		actions[0] = (struct rte_flow_action){
4693 			.type = (enum rte_flow_action_type)
4694 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4695 			.conf = &cp_mreg,
4696 		};
4697 		actions[1] = (struct rte_flow_action){
4698 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4699 			.conf = &jump,
4700 		};
4701 		actions[2] = (struct rte_flow_action){
4702 			.type = RTE_FLOW_ACTION_TYPE_END,
4703 		};
4704 	}
4705 	/* Build a new entry. */
4706 	mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4707 	if (!mcp_res) {
4708 		rte_errno = ENOMEM;
4709 		return NULL;
4710 	}
4711 	mcp_res->idx = idx;
4712 	mcp_res->mark_id = mark_id;
4713 	/*
4714 	 * The copy Flows are not included in any list. There
4715 	 * ones are referenced from other Flows and can not
4716 	 * be applied, removed, deleted in arbitrary order
4717 	 * by list traversing.
4718 	 */
4719 	mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4720 					&attr, items, actions, false, error);
4721 	if (!mcp_res->rix_flow) {
4722 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4723 		return NULL;
4724 	}
4725 	return &mcp_res->hlist_ent;
4726 }
4727 
4728 struct mlx5_list_entry *
4729 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4730 		      void *cb_ctx __rte_unused)
4731 {
4732 	struct rte_eth_dev *dev = tool_ctx;
4733 	struct mlx5_priv *priv = dev->data->dev_private;
4734 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4735 	uint32_t idx = 0;
4736 
4737 	mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4738 	if (!mcp_res) {
4739 		rte_errno = ENOMEM;
4740 		return NULL;
4741 	}
4742 	memcpy(mcp_res, oentry, sizeof(*mcp_res));
4743 	mcp_res->idx = idx;
4744 	return &mcp_res->hlist_ent;
4745 }
4746 
4747 void
4748 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4749 {
4750 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4751 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4752 	struct rte_eth_dev *dev = tool_ctx;
4753 	struct mlx5_priv *priv = dev->data->dev_private;
4754 
4755 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4756 }
4757 
4758 /**
4759  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4760  *
4761  * As mark_id is unique, if there's already a registered flow for the mark_id,
4762  * return by increasing the reference counter of the resource. Otherwise, create
4763  * the resource (mcp_res) and flow.
4764  *
4765  * Flow looks like,
4766  *   - If ingress port is ANY and reg_c[1] is mark_id,
4767  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4768  *
4769  * For default flow (zero mark_id), flow is like,
4770  *   - If ingress port is ANY,
4771  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4772  *
4773  * @param dev
4774  *   Pointer to Ethernet device.
4775  * @param mark_id
4776  *   ID of MARK action, zero means default flow for META.
4777  * @param[out] error
4778  *   Perform verbose error reporting if not NULL.
4779  *
4780  * @return
4781  *   Associated resource on success, NULL otherwise and rte_errno is set.
4782  */
4783 static struct mlx5_flow_mreg_copy_resource *
4784 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4785 			  struct rte_flow_error *error)
4786 {
4787 	struct mlx5_priv *priv = dev->data->dev_private;
4788 	struct mlx5_list_entry *entry;
4789 	struct mlx5_flow_cb_ctx ctx = {
4790 		.dev = dev,
4791 		.error = error,
4792 		.data = &mark_id,
4793 	};
4794 
4795 	/* Check if already registered. */
4796 	MLX5_ASSERT(priv->mreg_cp_tbl);
4797 	entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4798 	if (!entry)
4799 		return NULL;
4800 	return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4801 			    hlist_ent);
4802 }
4803 
4804 void
4805 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4806 {
4807 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4808 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4809 	struct rte_eth_dev *dev = tool_ctx;
4810 	struct mlx5_priv *priv = dev->data->dev_private;
4811 
4812 	MLX5_ASSERT(mcp_res->rix_flow);
4813 	flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4814 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4815 }
4816 
4817 /**
4818  * Release flow in RX_CP_TBL.
4819  *
4820  * @param dev
4821  *   Pointer to Ethernet device.
4822  * @flow
4823  *   Parent flow for wich copying is provided.
4824  */
4825 static void
4826 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4827 			  struct rte_flow *flow)
4828 {
4829 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4830 	struct mlx5_priv *priv = dev->data->dev_private;
4831 
4832 	if (!flow->rix_mreg_copy)
4833 		return;
4834 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4835 				 flow->rix_mreg_copy);
4836 	if (!mcp_res || !priv->mreg_cp_tbl)
4837 		return;
4838 	MLX5_ASSERT(mcp_res->rix_flow);
4839 	mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4840 	flow->rix_mreg_copy = 0;
4841 }
4842 
4843 /**
4844  * Remove the default copy action from RX_CP_TBL.
4845  *
4846  * This functions is called in the mlx5_dev_start(). No thread safe
4847  * is guaranteed.
4848  *
4849  * @param dev
4850  *   Pointer to Ethernet device.
4851  */
4852 static void
4853 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4854 {
4855 	struct mlx5_list_entry *entry;
4856 	struct mlx5_priv *priv = dev->data->dev_private;
4857 	struct mlx5_flow_cb_ctx ctx;
4858 	uint32_t mark_id;
4859 
4860 	/* Check if default flow is registered. */
4861 	if (!priv->mreg_cp_tbl)
4862 		return;
4863 	mark_id = MLX5_DEFAULT_COPY_ID;
4864 	ctx.data = &mark_id;
4865 	entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4866 	if (!entry)
4867 		return;
4868 	mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4869 }
4870 
4871 /**
4872  * Add the default copy action in in RX_CP_TBL.
4873  *
4874  * This functions is called in the mlx5_dev_start(). No thread safe
4875  * is guaranteed.
4876  *
4877  * @param dev
4878  *   Pointer to Ethernet device.
4879  * @param[out] error
4880  *   Perform verbose error reporting if not NULL.
4881  *
4882  * @return
4883  *   0 for success, negative value otherwise and rte_errno is set.
4884  */
4885 static int
4886 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4887 				  struct rte_flow_error *error)
4888 {
4889 	struct mlx5_priv *priv = dev->data->dev_private;
4890 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4891 	struct mlx5_flow_cb_ctx ctx;
4892 	uint32_t mark_id;
4893 
4894 	/* Check whether extensive metadata feature is engaged. */
4895 	if (!priv->sh->config.dv_flow_en ||
4896 	    priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4897 	    !mlx5_flow_ext_mreg_supported(dev) ||
4898 	    !priv->sh->dv_regc0_mask)
4899 		return 0;
4900 	/*
4901 	 * Add default mreg copy flow may be called multiple time, but
4902 	 * only be called once in stop. Avoid register it twice.
4903 	 */
4904 	mark_id = MLX5_DEFAULT_COPY_ID;
4905 	ctx.data = &mark_id;
4906 	if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4907 		return 0;
4908 	mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4909 	if (!mcp_res)
4910 		return -rte_errno;
4911 	return 0;
4912 }
4913 
4914 /**
4915  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4916  *
4917  * All the flow having Q/RSS action should be split by
4918  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4919  * performs the following,
4920  *   - CQE->flow_tag := reg_c[1] (MARK)
4921  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4922  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4923  * but there should be a flow per each MARK ID set by MARK action.
4924  *
4925  * For the aforementioned reason, if there's a MARK action in flow's action
4926  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4927  * the MARK ID to CQE's flow_tag like,
4928  *   - If reg_c[1] is mark_id,
4929  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4930  *
4931  * For SET_META action which stores value in reg_c[0], as the destination is
4932  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4933  * MARK ID means the default flow. The default flow looks like,
4934  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4935  *
4936  * @param dev
4937  *   Pointer to Ethernet device.
4938  * @param flow
4939  *   Pointer to flow structure.
4940  * @param[in] actions
4941  *   Pointer to the list of actions.
4942  * @param[out] error
4943  *   Perform verbose error reporting if not NULL.
4944  *
4945  * @return
4946  *   0 on success, negative value otherwise and rte_errno is set.
4947  */
4948 static int
4949 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4950 			    struct rte_flow *flow,
4951 			    const struct rte_flow_action *actions,
4952 			    struct rte_flow_error *error)
4953 {
4954 	struct mlx5_priv *priv = dev->data->dev_private;
4955 	struct mlx5_sh_config *config = &priv->sh->config;
4956 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4957 	const struct rte_flow_action_mark *mark;
4958 
4959 	/* Check whether extensive metadata feature is engaged. */
4960 	if (!config->dv_flow_en ||
4961 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4962 	    !mlx5_flow_ext_mreg_supported(dev) ||
4963 	    !priv->sh->dv_regc0_mask)
4964 		return 0;
4965 	/* Find MARK action. */
4966 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4967 		switch (actions->type) {
4968 		case RTE_FLOW_ACTION_TYPE_FLAG:
4969 			mcp_res = flow_mreg_add_copy_action
4970 				(dev, MLX5_FLOW_MARK_DEFAULT, error);
4971 			if (!mcp_res)
4972 				return -rte_errno;
4973 			flow->rix_mreg_copy = mcp_res->idx;
4974 			return 0;
4975 		case RTE_FLOW_ACTION_TYPE_MARK:
4976 			mark = (const struct rte_flow_action_mark *)
4977 				actions->conf;
4978 			mcp_res =
4979 				flow_mreg_add_copy_action(dev, mark->id, error);
4980 			if (!mcp_res)
4981 				return -rte_errno;
4982 			flow->rix_mreg_copy = mcp_res->idx;
4983 			return 0;
4984 		default:
4985 			break;
4986 		}
4987 	}
4988 	return 0;
4989 }
4990 
4991 #define MLX5_MAX_SPLIT_ACTIONS 24
4992 #define MLX5_MAX_SPLIT_ITEMS 24
4993 
4994 /**
4995  * Split the hairpin flow.
4996  * Since HW can't support encap and push-vlan on Rx, we move these
4997  * actions to Tx.
4998  * If the count action is after the encap then we also
4999  * move the count action. in this case the count will also measure
5000  * the outer bytes.
5001  *
5002  * @param dev
5003  *   Pointer to Ethernet device.
5004  * @param[in] actions
5005  *   Associated actions (list terminated by the END action).
5006  * @param[out] actions_rx
5007  *   Rx flow actions.
5008  * @param[out] actions_tx
5009  *   Tx flow actions..
5010  * @param[out] pattern_tx
5011  *   The pattern items for the Tx flow.
5012  * @param[out] flow_id
5013  *   The flow ID connected to this flow.
5014  *
5015  * @return
5016  *   0 on success.
5017  */
5018 static int
5019 flow_hairpin_split(struct rte_eth_dev *dev,
5020 		   const struct rte_flow_action actions[],
5021 		   struct rte_flow_action actions_rx[],
5022 		   struct rte_flow_action actions_tx[],
5023 		   struct rte_flow_item pattern_tx[],
5024 		   uint32_t flow_id)
5025 {
5026 	const struct rte_flow_action_raw_encap *raw_encap;
5027 	const struct rte_flow_action_raw_decap *raw_decap;
5028 	struct mlx5_rte_flow_action_set_tag *set_tag;
5029 	struct rte_flow_action *tag_action;
5030 	struct mlx5_rte_flow_item_tag *tag_item;
5031 	struct rte_flow_item *item;
5032 	char *addr;
5033 	int encap = 0;
5034 
5035 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5036 		switch (actions->type) {
5037 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5038 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5039 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5040 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5041 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5042 			rte_memcpy(actions_tx, actions,
5043 			       sizeof(struct rte_flow_action));
5044 			actions_tx++;
5045 			break;
5046 		case RTE_FLOW_ACTION_TYPE_COUNT:
5047 			if (encap) {
5048 				rte_memcpy(actions_tx, actions,
5049 					   sizeof(struct rte_flow_action));
5050 				actions_tx++;
5051 			} else {
5052 				rte_memcpy(actions_rx, actions,
5053 					   sizeof(struct rte_flow_action));
5054 				actions_rx++;
5055 			}
5056 			break;
5057 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5058 			raw_encap = actions->conf;
5059 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
5060 				memcpy(actions_tx, actions,
5061 				       sizeof(struct rte_flow_action));
5062 				actions_tx++;
5063 				encap = 1;
5064 			} else {
5065 				rte_memcpy(actions_rx, actions,
5066 					   sizeof(struct rte_flow_action));
5067 				actions_rx++;
5068 			}
5069 			break;
5070 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5071 			raw_decap = actions->conf;
5072 			if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
5073 				memcpy(actions_tx, actions,
5074 				       sizeof(struct rte_flow_action));
5075 				actions_tx++;
5076 			} else {
5077 				rte_memcpy(actions_rx, actions,
5078 					   sizeof(struct rte_flow_action));
5079 				actions_rx++;
5080 			}
5081 			break;
5082 		default:
5083 			rte_memcpy(actions_rx, actions,
5084 				   sizeof(struct rte_flow_action));
5085 			actions_rx++;
5086 			break;
5087 		}
5088 	}
5089 	/* Add set meta action and end action for the Rx flow. */
5090 	tag_action = actions_rx;
5091 	tag_action->type = (enum rte_flow_action_type)
5092 			   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5093 	actions_rx++;
5094 	rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
5095 	actions_rx++;
5096 	set_tag = (void *)actions_rx;
5097 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5098 		.id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
5099 		.data = flow_id,
5100 	};
5101 	MLX5_ASSERT(set_tag->id > REG_NON);
5102 	tag_action->conf = set_tag;
5103 	/* Create Tx item list. */
5104 	rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
5105 	addr = (void *)&pattern_tx[2];
5106 	item = pattern_tx;
5107 	item->type = (enum rte_flow_item_type)
5108 		     MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5109 	tag_item = (void *)addr;
5110 	tag_item->data = flow_id;
5111 	tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
5112 	MLX5_ASSERT(set_tag->id > REG_NON);
5113 	item->spec = tag_item;
5114 	addr += sizeof(struct mlx5_rte_flow_item_tag);
5115 	tag_item = (void *)addr;
5116 	tag_item->data = UINT32_MAX;
5117 	tag_item->id = UINT16_MAX;
5118 	item->mask = tag_item;
5119 	item->last = NULL;
5120 	item++;
5121 	item->type = RTE_FLOW_ITEM_TYPE_END;
5122 	return 0;
5123 }
5124 
5125 /**
5126  * The last stage of splitting chain, just creates the subflow
5127  * without any modification.
5128  *
5129  * @param[in] dev
5130  *   Pointer to Ethernet device.
5131  * @param[in] flow
5132  *   Parent flow structure pointer.
5133  * @param[in, out] sub_flow
5134  *   Pointer to return the created subflow, may be NULL.
5135  * @param[in] attr
5136  *   Flow rule attributes.
5137  * @param[in] items
5138  *   Pattern specification (list terminated by the END pattern item).
5139  * @param[in] actions
5140  *   Associated actions (list terminated by the END action).
5141  * @param[in] flow_split_info
5142  *   Pointer to flow split info structure.
5143  * @param[out] error
5144  *   Perform verbose error reporting if not NULL.
5145  * @return
5146  *   0 on success, negative value otherwise
5147  */
5148 static int
5149 flow_create_split_inner(struct rte_eth_dev *dev,
5150 			struct rte_flow *flow,
5151 			struct mlx5_flow **sub_flow,
5152 			const struct rte_flow_attr *attr,
5153 			const struct rte_flow_item items[],
5154 			const struct rte_flow_action actions[],
5155 			struct mlx5_flow_split_info *flow_split_info,
5156 			struct rte_flow_error *error)
5157 {
5158 	struct mlx5_flow *dev_flow;
5159 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5160 
5161 	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
5162 				    flow_split_info->flow_idx, error);
5163 	if (!dev_flow)
5164 		return -rte_errno;
5165 	dev_flow->flow = flow;
5166 	dev_flow->external = flow_split_info->external;
5167 	dev_flow->skip_scale = flow_split_info->skip_scale;
5168 	/* Subflow object was created, we must include one in the list. */
5169 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5170 		      dev_flow->handle, next);
5171 	/*
5172 	 * If dev_flow is as one of the suffix flow, some actions in suffix
5173 	 * flow may need some user defined item layer flags, and pass the
5174 	 * Metadata rxq mark flag to suffix flow as well.
5175 	 */
5176 	if (flow_split_info->prefix_layers)
5177 		dev_flow->handle->layers = flow_split_info->prefix_layers;
5178 	if (flow_split_info->prefix_mark) {
5179 		MLX5_ASSERT(wks);
5180 		wks->mark = 1;
5181 	}
5182 	if (sub_flow)
5183 		*sub_flow = dev_flow;
5184 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5185 	dev_flow->dv.table_id = flow_split_info->table_id;
5186 #endif
5187 	return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
5188 }
5189 
5190 /**
5191  * Get the sub policy of a meter.
5192  *
5193  * @param[in] dev
5194  *   Pointer to Ethernet device.
5195  * @param[in] flow
5196  *   Parent flow structure pointer.
5197  * @param wks
5198  *   Pointer to thread flow work space.
5199  * @param[in] attr
5200  *   Flow rule attributes.
5201  * @param[in] items
5202  *   Pattern specification (list terminated by the END pattern item).
5203  * @param[out] error
5204  *   Perform verbose error reporting if not NULL.
5205  *
5206  * @return
5207  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
5208  */
5209 static struct mlx5_flow_meter_sub_policy *
5210 get_meter_sub_policy(struct rte_eth_dev *dev,
5211 		     struct rte_flow *flow,
5212 		     struct mlx5_flow_workspace *wks,
5213 		     const struct rte_flow_attr *attr,
5214 		     const struct rte_flow_item items[],
5215 		     struct rte_flow_error *error)
5216 {
5217 	struct mlx5_flow_meter_policy *policy;
5218 	struct mlx5_flow_meter_policy *final_policy;
5219 	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
5220 
5221 	policy = wks->policy;
5222 	final_policy = policy->is_hierarchy ? wks->final_policy : policy;
5223 	if (final_policy->is_rss || final_policy->is_queue) {
5224 		struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
5225 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
5226 		uint32_t i;
5227 
5228 		/*
5229 		 * This is a tmp dev_flow,
5230 		 * no need to register any matcher for it in translate.
5231 		 */
5232 		wks->skip_matcher_reg = 1;
5233 		for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
5234 			struct mlx5_flow dev_flow = {0};
5235 			struct mlx5_flow_handle dev_handle = { {0} };
5236 			uint8_t fate = final_policy->act_cnt[i].fate_action;
5237 
5238 			if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
5239 				const struct rte_flow_action_rss *rss_act =
5240 					final_policy->act_cnt[i].rss->conf;
5241 				struct rte_flow_action rss_actions[2] = {
5242 					[0] = {
5243 					.type = RTE_FLOW_ACTION_TYPE_RSS,
5244 					.conf = rss_act,
5245 					},
5246 					[1] = {
5247 					.type = RTE_FLOW_ACTION_TYPE_END,
5248 					.conf = NULL,
5249 					}
5250 				};
5251 
5252 				dev_flow.handle = &dev_handle;
5253 				dev_flow.ingress = attr->ingress;
5254 				dev_flow.flow = flow;
5255 				dev_flow.external = 0;
5256 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5257 				dev_flow.dv.transfer = attr->transfer;
5258 #endif
5259 				/**
5260 				 * Translate RSS action to get rss hash fields.
5261 				 */
5262 				if (flow_drv_translate(dev, &dev_flow, attr,
5263 						items, rss_actions, error))
5264 					goto exit;
5265 				rss_desc_v[i] = wks->rss_desc;
5266 				rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
5267 				rss_desc_v[i].hash_fields =
5268 						dev_flow.hash_fields;
5269 				rss_desc_v[i].queue_num =
5270 						rss_desc_v[i].hash_fields ?
5271 						rss_desc_v[i].queue_num : 1;
5272 				rss_desc_v[i].tunnel =
5273 						!!(dev_flow.handle->layers &
5274 						   MLX5_FLOW_LAYER_TUNNEL);
5275 				/* Use the RSS queues in the containers. */
5276 				rss_desc_v[i].queue =
5277 					(uint16_t *)(uintptr_t)rss_act->queue;
5278 				rss_desc[i] = &rss_desc_v[i];
5279 			} else if (fate == MLX5_FLOW_FATE_QUEUE) {
5280 				/* This is queue action. */
5281 				rss_desc_v[i] = wks->rss_desc;
5282 				rss_desc_v[i].key_len = 0;
5283 				rss_desc_v[i].hash_fields = 0;
5284 				rss_desc_v[i].queue =
5285 					&final_policy->act_cnt[i].queue;
5286 				rss_desc_v[i].queue_num = 1;
5287 				rss_desc[i] = &rss_desc_v[i];
5288 			} else {
5289 				rss_desc[i] = NULL;
5290 			}
5291 		}
5292 		sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
5293 						flow, policy, rss_desc);
5294 	} else {
5295 		enum mlx5_meter_domain mtr_domain =
5296 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5297 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5298 						MLX5_MTR_DOMAIN_INGRESS);
5299 		sub_policy = policy->sub_policys[mtr_domain][0];
5300 	}
5301 	if (!sub_policy)
5302 		rte_flow_error_set(error, EINVAL,
5303 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5304 				   "Failed to get meter sub-policy.");
5305 exit:
5306 	return sub_policy;
5307 }
5308 
5309 /**
5310  * Split the meter flow.
5311  *
5312  * As meter flow will split to three sub flow, other than meter
5313  * action, the other actions make sense to only meter accepts
5314  * the packet. If it need to be dropped, no other additional
5315  * actions should be take.
5316  *
5317  * One kind of special action which decapsulates the L3 tunnel
5318  * header will be in the prefix sub flow, as not to take the
5319  * L3 tunnel header into account.
5320  *
5321  * @param[in] dev
5322  *   Pointer to Ethernet device.
5323  * @param[in] flow
5324  *   Parent flow structure pointer.
5325  * @param wks
5326  *   Pointer to thread flow work space.
5327  * @param[in] attr
5328  *   Flow rule attributes.
5329  * @param[in] items
5330  *   Pattern specification (list terminated by the END pattern item).
5331  * @param[out] sfx_items
5332  *   Suffix flow match items (list terminated by the END pattern item).
5333  * @param[in] actions
5334  *   Associated actions (list terminated by the END action).
5335  * @param[out] actions_sfx
5336  *   Suffix flow actions.
5337  * @param[out] actions_pre
5338  *   Prefix flow actions.
5339  * @param[out] mtr_flow_id
5340  *   Pointer to meter flow id.
5341  * @param[out] error
5342  *   Perform verbose error reporting if not NULL.
5343  *
5344  * @return
5345  *   0 on success, a negative errno value otherwise and rte_errno is set.
5346  */
5347 static int
5348 flow_meter_split_prep(struct rte_eth_dev *dev,
5349 		      struct rte_flow *flow,
5350 		      struct mlx5_flow_workspace *wks,
5351 		      const struct rte_flow_attr *attr,
5352 		      const struct rte_flow_item items[],
5353 		      struct rte_flow_item sfx_items[],
5354 		      const struct rte_flow_action actions[],
5355 		      struct rte_flow_action actions_sfx[],
5356 		      struct rte_flow_action actions_pre[],
5357 		      uint32_t *mtr_flow_id,
5358 		      struct rte_flow_error *error)
5359 {
5360 	struct mlx5_priv *priv = dev->data->dev_private;
5361 	struct mlx5_flow_meter_info *fm = wks->fm;
5362 	struct rte_flow_action *tag_action = NULL;
5363 	struct rte_flow_item *tag_item;
5364 	struct mlx5_rte_flow_action_set_tag *set_tag;
5365 	const struct rte_flow_action_raw_encap *raw_encap;
5366 	const struct rte_flow_action_raw_decap *raw_decap;
5367 	struct mlx5_rte_flow_item_tag *tag_item_spec;
5368 	struct mlx5_rte_flow_item_tag *tag_item_mask;
5369 	uint32_t tag_id = 0;
5370 	struct rte_flow_item *vlan_item_dst = NULL;
5371 	const struct rte_flow_item *vlan_item_src = NULL;
5372 	const struct rte_flow_item *orig_items = items;
5373 	struct rte_flow_action *hw_mtr_action;
5374 	struct rte_flow_action *action_pre_head = NULL;
5375 	uint16_t flow_src_port = priv->representor_id;
5376 	bool mtr_first;
5377 	uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
5378 	uint8_t mtr_reg_bits = priv->mtr_reg_share ?
5379 				MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
5380 	uint32_t flow_id = 0;
5381 	uint32_t flow_id_reversed = 0;
5382 	uint8_t flow_id_bits = 0;
5383 	bool after_meter = false;
5384 	int shift;
5385 
5386 	/* Prepare the suffix subflow items. */
5387 	tag_item = sfx_items++;
5388 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5389 		int item_type = items->type;
5390 
5391 		switch (item_type) {
5392 		case RTE_FLOW_ITEM_TYPE_PORT_ID:
5393 		case RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT:
5394 			if (mlx5_flow_get_item_vport_id(dev, items, &flow_src_port, NULL, error))
5395 				return -rte_errno;
5396 			if (!fm->def_policy && wks->policy->is_hierarchy &&
5397 			    flow_src_port != priv->representor_id) {
5398 				if (flow_drv_mtr_hierarchy_rule_create(dev,
5399 								flow, fm,
5400 								flow_src_port,
5401 								items,
5402 								error))
5403 					return -rte_errno;
5404 			}
5405 			memcpy(sfx_items, items, sizeof(*sfx_items));
5406 			sfx_items++;
5407 			break;
5408 		case RTE_FLOW_ITEM_TYPE_VLAN:
5409 			/* Determine if copy vlan item below. */
5410 			vlan_item_src = items;
5411 			vlan_item_dst = sfx_items++;
5412 			vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
5413 			break;
5414 		default:
5415 			break;
5416 		}
5417 	}
5418 	sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
5419 	sfx_items++;
5420 	mtr_first = priv->sh->meter_aso_en &&
5421 		(attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
5422 	/* For ASO meter, meter must be before tag in TX direction. */
5423 	if (mtr_first) {
5424 		action_pre_head = actions_pre++;
5425 		/* Leave space for tag action. */
5426 		tag_action = actions_pre++;
5427 	}
5428 	/* Prepare the actions for prefix and suffix flow. */
5429 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5430 		struct rte_flow_action *action_cur = NULL;
5431 
5432 		switch (actions->type) {
5433 		case RTE_FLOW_ACTION_TYPE_METER:
5434 			if (mtr_first) {
5435 				action_cur = action_pre_head;
5436 			} else {
5437 				/* Leave space for tag action. */
5438 				tag_action = actions_pre++;
5439 				action_cur = actions_pre++;
5440 			}
5441 			after_meter = true;
5442 			break;
5443 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5444 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5445 			action_cur = actions_pre++;
5446 			break;
5447 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5448 			raw_encap = actions->conf;
5449 			if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5450 				action_cur = actions_pre++;
5451 			break;
5452 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5453 			raw_decap = actions->conf;
5454 			if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5455 				action_cur = actions_pre++;
5456 			break;
5457 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5458 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5459 			if (vlan_item_dst && vlan_item_src) {
5460 				memcpy(vlan_item_dst, vlan_item_src,
5461 					sizeof(*vlan_item_dst));
5462 				/*
5463 				 * Convert to internal match item, it is used
5464 				 * for vlan push and set vid.
5465 				 */
5466 				vlan_item_dst->type = (enum rte_flow_item_type)
5467 						MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5468 			}
5469 			break;
5470 		case RTE_FLOW_ACTION_TYPE_COUNT:
5471 			if (fm->def_policy)
5472 				action_cur = after_meter ?
5473 						actions_sfx++ : actions_pre++;
5474 			break;
5475 		default:
5476 			break;
5477 		}
5478 		if (!action_cur)
5479 			action_cur = (fm->def_policy) ?
5480 					actions_sfx++ : actions_pre++;
5481 		memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5482 	}
5483 	/* Add end action to the actions. */
5484 	actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5485 	if (priv->sh->meter_aso_en) {
5486 		/**
5487 		 * For ASO meter, need to add an extra jump action explicitly,
5488 		 * to jump from meter to policer table.
5489 		 */
5490 		struct mlx5_flow_meter_sub_policy *sub_policy;
5491 		struct mlx5_flow_tbl_data_entry *tbl_data;
5492 
5493 		if (!fm->def_policy) {
5494 			sub_policy = get_meter_sub_policy(dev, flow, wks,
5495 							  attr, orig_items,
5496 							  error);
5497 			if (!sub_policy)
5498 				return -rte_errno;
5499 		} else {
5500 			enum mlx5_meter_domain mtr_domain =
5501 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5502 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5503 						MLX5_MTR_DOMAIN_INGRESS);
5504 
5505 			sub_policy =
5506 			&priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5507 		}
5508 		tbl_data = container_of(sub_policy->tbl_rsc,
5509 					struct mlx5_flow_tbl_data_entry, tbl);
5510 		hw_mtr_action = actions_pre++;
5511 		hw_mtr_action->type = (enum rte_flow_action_type)
5512 				      MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5513 		hw_mtr_action->conf = tbl_data->jump.action;
5514 	}
5515 	actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5516 	actions_pre++;
5517 	if (!tag_action)
5518 		return rte_flow_error_set(error, ENOMEM,
5519 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5520 					  NULL, "No tag action space.");
5521 	if (!mtr_flow_id) {
5522 		tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5523 		goto exit;
5524 	}
5525 	/* Only default-policy Meter creates mtr flow id. */
5526 	if (fm->def_policy) {
5527 		mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5528 		if (!tag_id)
5529 			return rte_flow_error_set(error, ENOMEM,
5530 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5531 					"Failed to allocate meter flow id.");
5532 		flow_id = tag_id - 1;
5533 		flow_id_bits = (!flow_id) ? 1 :
5534 				(MLX5_REG_BITS - __builtin_clz(flow_id));
5535 		if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5536 		    mtr_reg_bits) {
5537 			mlx5_ipool_free(fm->flow_ipool, tag_id);
5538 			return rte_flow_error_set(error, EINVAL,
5539 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5540 					"Meter flow id exceeds max limit.");
5541 		}
5542 		if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5543 			priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5544 	}
5545 	/* Build tag actions and items for meter_id/meter flow_id. */
5546 	set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5547 	tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5548 	tag_item_mask = tag_item_spec + 1;
5549 	/* Both flow_id and meter_id share the same register. */
5550 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5551 		.id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5552 							    0, error),
5553 		.offset = mtr_id_offset,
5554 		.length = mtr_reg_bits,
5555 		.data = flow->meter,
5556 	};
5557 	/*
5558 	 * The color Reg bits used by flow_id are growing from
5559 	 * msb to lsb, so must do bit reverse for flow_id val in RegC.
5560 	 */
5561 	for (shift = 0; shift < flow_id_bits; shift++)
5562 		flow_id_reversed = (flow_id_reversed << 1) |
5563 				((flow_id >> shift) & 0x1);
5564 	set_tag->data |=
5565 		flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5566 	tag_item_spec->id = set_tag->id;
5567 	tag_item_spec->data = set_tag->data << mtr_id_offset;
5568 	tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5569 	tag_action->type = (enum rte_flow_action_type)
5570 				MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5571 	tag_action->conf = set_tag;
5572 	tag_item->type = (enum rte_flow_item_type)
5573 				MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5574 	tag_item->spec = tag_item_spec;
5575 	tag_item->last = NULL;
5576 	tag_item->mask = tag_item_mask;
5577 exit:
5578 	if (mtr_flow_id)
5579 		*mtr_flow_id = tag_id;
5580 	return 0;
5581 }
5582 
5583 /**
5584  * Split action list having QUEUE/RSS for metadata register copy.
5585  *
5586  * Once Q/RSS action is detected in user's action list, the flow action
5587  * should be split in order to copy metadata registers, which will happen in
5588  * RX_CP_TBL like,
5589  *   - CQE->flow_tag := reg_c[1] (MARK)
5590  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5591  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5592  * This is because the last action of each flow must be a terminal action
5593  * (QUEUE, RSS or DROP).
5594  *
5595  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5596  * stored and kept in the mlx5_flow structure per each sub_flow.
5597  *
5598  * The Q/RSS action is replaced with,
5599  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5600  * And the following JUMP action is added at the end,
5601  *   - JUMP, to RX_CP_TBL.
5602  *
5603  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5604  * flow_create_split_metadata() routine. The flow will look like,
5605  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5606  *
5607  * @param dev
5608  *   Pointer to Ethernet device.
5609  * @param[out] split_actions
5610  *   Pointer to store split actions to jump to CP_TBL.
5611  * @param[in] actions
5612  *   Pointer to the list of original flow actions.
5613  * @param[in] qrss
5614  *   Pointer to the Q/RSS action.
5615  * @param[in] actions_n
5616  *   Number of original actions.
5617  * @param[in] mtr_sfx
5618  *   Check if it is in meter suffix table.
5619  * @param[out] error
5620  *   Perform verbose error reporting if not NULL.
5621  *
5622  * @return
5623  *   non-zero unique flow_id on success, otherwise 0 and
5624  *   error/rte_error are set.
5625  */
5626 static uint32_t
5627 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5628 			  struct rte_flow_action *split_actions,
5629 			  const struct rte_flow_action *actions,
5630 			  const struct rte_flow_action *qrss,
5631 			  int actions_n, int mtr_sfx,
5632 			  struct rte_flow_error *error)
5633 {
5634 	struct mlx5_priv *priv = dev->data->dev_private;
5635 	struct mlx5_rte_flow_action_set_tag *set_tag;
5636 	struct rte_flow_action_jump *jump;
5637 	const int qrss_idx = qrss - actions;
5638 	uint32_t flow_id = 0;
5639 	int ret = 0;
5640 
5641 	/*
5642 	 * Given actions will be split
5643 	 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5644 	 * - Add jump to mreg CP_TBL.
5645 	 * As a result, there will be one more action.
5646 	 */
5647 	memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5648 	/* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */
5649 	++actions_n;
5650 	set_tag = (void *)(split_actions + actions_n);
5651 	/*
5652 	 * If we are not the meter suffix flow, add the tag action.
5653 	 * Since meter suffix flow already has the tag added.
5654 	 */
5655 	if (!mtr_sfx) {
5656 		/*
5657 		 * Allocate the new subflow ID. This one is unique within
5658 		 * device and not shared with representors. Otherwise,
5659 		 * we would have to resolve multi-thread access synch
5660 		 * issue. Each flow on the shared device is appended
5661 		 * with source vport identifier, so the resulting
5662 		 * flows will be unique in the shared (by master and
5663 		 * representors) domain even if they have coinciding
5664 		 * IDs.
5665 		 */
5666 		mlx5_ipool_malloc(priv->sh->ipool
5667 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5668 		if (!flow_id)
5669 			return rte_flow_error_set(error, ENOMEM,
5670 						  RTE_FLOW_ERROR_TYPE_ACTION,
5671 						  NULL, "can't allocate id "
5672 						  "for split Q/RSS subflow");
5673 		/* Internal SET_TAG action to set flow ID. */
5674 		*set_tag = (struct mlx5_rte_flow_action_set_tag){
5675 			.data = flow_id,
5676 		};
5677 		ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5678 		if (ret < 0)
5679 			return ret;
5680 		set_tag->id = ret;
5681 		/* Construct new actions array. */
5682 		/* Replace QUEUE/RSS action. */
5683 		split_actions[qrss_idx] = (struct rte_flow_action){
5684 			.type = (enum rte_flow_action_type)
5685 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5686 			.conf = set_tag,
5687 		};
5688 	} else {
5689 		/*
5690 		 * If we are the suffix flow of meter, tag already exist.
5691 		 * Set the QUEUE/RSS action to void.
5692 		 */
5693 		split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID;
5694 	}
5695 	/* JUMP action to jump to mreg copy table (CP_TBL). */
5696 	jump = (void *)(set_tag + 1);
5697 	*jump = (struct rte_flow_action_jump){
5698 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5699 	};
5700 	split_actions[actions_n - 2] = (struct rte_flow_action){
5701 		.type = RTE_FLOW_ACTION_TYPE_JUMP,
5702 		.conf = jump,
5703 	};
5704 	split_actions[actions_n - 1] = (struct rte_flow_action){
5705 		.type = RTE_FLOW_ACTION_TYPE_END,
5706 	};
5707 	return flow_id;
5708 }
5709 
5710 /**
5711  * Extend the given action list for Tx metadata copy.
5712  *
5713  * Copy the given action list to the ext_actions and add flow metadata register
5714  * copy action in order to copy reg_a set by WQE to reg_c[0].
5715  *
5716  * @param[out] ext_actions
5717  *   Pointer to the extended action list.
5718  * @param[in] actions
5719  *   Pointer to the list of actions.
5720  * @param[in] actions_n
5721  *   Number of actions in the list.
5722  * @param[out] error
5723  *   Perform verbose error reporting if not NULL.
5724  * @param[in] encap_idx
5725  *   The encap action index.
5726  *
5727  * @return
5728  *   0 on success, negative value otherwise
5729  */
5730 static int
5731 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5732 		       struct rte_flow_action *ext_actions,
5733 		       const struct rte_flow_action *actions,
5734 		       int actions_n, struct rte_flow_error *error,
5735 		       int encap_idx)
5736 {
5737 	struct mlx5_flow_action_copy_mreg *cp_mreg =
5738 		(struct mlx5_flow_action_copy_mreg *)
5739 			(ext_actions + actions_n + 1);
5740 	int ret;
5741 
5742 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5743 	if (ret < 0)
5744 		return ret;
5745 	cp_mreg->dst = ret;
5746 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5747 	if (ret < 0)
5748 		return ret;
5749 	cp_mreg->src = ret;
5750 	if (encap_idx != 0)
5751 		memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5752 	if (encap_idx == actions_n - 1) {
5753 		ext_actions[actions_n - 1] = (struct rte_flow_action){
5754 			.type = (enum rte_flow_action_type)
5755 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5756 			.conf = cp_mreg,
5757 		};
5758 		ext_actions[actions_n] = (struct rte_flow_action){
5759 			.type = RTE_FLOW_ACTION_TYPE_END,
5760 		};
5761 	} else {
5762 		ext_actions[encap_idx] = (struct rte_flow_action){
5763 			.type = (enum rte_flow_action_type)
5764 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5765 			.conf = cp_mreg,
5766 		};
5767 		memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5768 				sizeof(*ext_actions) * (actions_n - encap_idx));
5769 	}
5770 	return 0;
5771 }
5772 
5773 /**
5774  * Check the match action from the action list.
5775  *
5776  * @param[in] actions
5777  *   Pointer to the list of actions.
5778  * @param[in] attr
5779  *   Flow rule attributes.
5780  * @param[in] action
5781  *   The action to be check if exist.
5782  * @param[out] match_action_pos
5783  *   Pointer to the position of the matched action if exists, otherwise is -1.
5784  * @param[out] qrss_action_pos
5785  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5786  * @param[out] modify_after_mirror
5787  *   Pointer to the flag of modify action after FDB mirroring.
5788  *
5789  * @return
5790  *   > 0 the total number of actions.
5791  *   0 if not found match action in action list.
5792  */
5793 static int
5794 flow_check_match_action(const struct rte_flow_action actions[],
5795 			const struct rte_flow_attr *attr,
5796 			enum rte_flow_action_type action,
5797 			int *match_action_pos, int *qrss_action_pos,
5798 			int *modify_after_mirror)
5799 {
5800 	const struct rte_flow_action_sample *sample;
5801 	const struct rte_flow_action_raw_decap *decap;
5802 	int actions_n = 0;
5803 	uint32_t ratio = 0;
5804 	int sub_type = 0;
5805 	int flag = 0;
5806 	int fdb_mirror = 0;
5807 
5808 	*match_action_pos = -1;
5809 	*qrss_action_pos = -1;
5810 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5811 		if (actions->type == action) {
5812 			flag = 1;
5813 			*match_action_pos = actions_n;
5814 		}
5815 		switch (actions->type) {
5816 		case RTE_FLOW_ACTION_TYPE_QUEUE:
5817 		case RTE_FLOW_ACTION_TYPE_RSS:
5818 			*qrss_action_pos = actions_n;
5819 			break;
5820 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
5821 			sample = actions->conf;
5822 			ratio = sample->ratio;
5823 			sub_type = ((const struct rte_flow_action *)
5824 					(sample->actions))->type;
5825 			if (ratio == 1 && attr->transfer)
5826 				fdb_mirror = 1;
5827 			break;
5828 		case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5829 		case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5830 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5831 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5832 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5833 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5834 		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5835 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5836 		case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5837 		case RTE_FLOW_ACTION_TYPE_SET_TTL:
5838 		case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5839 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5840 		case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5841 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5842 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5843 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5844 		case RTE_FLOW_ACTION_TYPE_FLAG:
5845 		case RTE_FLOW_ACTION_TYPE_MARK:
5846 		case RTE_FLOW_ACTION_TYPE_SET_META:
5847 		case RTE_FLOW_ACTION_TYPE_SET_TAG:
5848 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5849 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5850 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5851 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5852 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5853 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5854 		case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5855 		case RTE_FLOW_ACTION_TYPE_METER:
5856 			if (fdb_mirror)
5857 				*modify_after_mirror = 1;
5858 			break;
5859 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5860 			decap = actions->conf;
5861 			while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5862 				;
5863 			actions_n++;
5864 			if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5865 				const struct rte_flow_action_raw_encap *encap =
5866 								actions->conf;
5867 				if (decap->size <=
5868 					MLX5_ENCAPSULATION_DECISION_SIZE &&
5869 				    encap->size >
5870 					MLX5_ENCAPSULATION_DECISION_SIZE)
5871 					/* L3 encap. */
5872 					break;
5873 			}
5874 			if (fdb_mirror)
5875 				*modify_after_mirror = 1;
5876 			break;
5877 		default:
5878 			break;
5879 		}
5880 		actions_n++;
5881 	}
5882 	if (flag && fdb_mirror && !*modify_after_mirror) {
5883 		/* FDB mirroring uses the destination array to implement
5884 		 * instead of FLOW_SAMPLER object.
5885 		 */
5886 		if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5887 			flag = 0;
5888 	}
5889 	/* Count RTE_FLOW_ACTION_TYPE_END. */
5890 	return flag ? actions_n + 1 : 0;
5891 }
5892 
5893 #define SAMPLE_SUFFIX_ITEM 3
5894 
5895 /**
5896  * Split the sample flow.
5897  *
5898  * As sample flow will split to two sub flow, sample flow with
5899  * sample action, the other actions will move to new suffix flow.
5900  *
5901  * Also add unique tag id with tag action in the sample flow,
5902  * the same tag id will be as match in the suffix flow.
5903  *
5904  * @param dev
5905  *   Pointer to Ethernet device.
5906  * @param[in] add_tag
5907  *   Add extra tag action flag.
5908  * @param[out] sfx_items
5909  *   Suffix flow match items (list terminated by the END pattern item).
5910  * @param[in] actions
5911  *   Associated actions (list terminated by the END action).
5912  * @param[out] actions_sfx
5913  *   Suffix flow actions.
5914  * @param[out] actions_pre
5915  *   Prefix flow actions.
5916  * @param[in] actions_n
5917  *  The total number of actions.
5918  * @param[in] sample_action_pos
5919  *   The sample action position.
5920  * @param[in] qrss_action_pos
5921  *   The Queue/RSS action position.
5922  * @param[in] jump_table
5923  *   Add extra jump action flag.
5924  * @param[out] error
5925  *   Perform verbose error reporting if not NULL.
5926  *
5927  * @return
5928  *   0 on success, or unique flow_id, a negative errno value
5929  *   otherwise and rte_errno is set.
5930  */
5931 static int
5932 flow_sample_split_prep(struct rte_eth_dev *dev,
5933 		       int add_tag,
5934 		       const struct rte_flow_item items[],
5935 		       struct rte_flow_item sfx_items[],
5936 		       const struct rte_flow_action actions[],
5937 		       struct rte_flow_action actions_sfx[],
5938 		       struct rte_flow_action actions_pre[],
5939 		       int actions_n,
5940 		       int sample_action_pos,
5941 		       int qrss_action_pos,
5942 		       int jump_table,
5943 		       struct rte_flow_error *error)
5944 {
5945 	struct mlx5_priv *priv = dev->data->dev_private;
5946 	struct mlx5_rte_flow_action_set_tag *set_tag;
5947 	struct mlx5_rte_flow_item_tag *tag_spec;
5948 	struct mlx5_rte_flow_item_tag *tag_mask;
5949 	struct rte_flow_action_jump *jump_action;
5950 	uint32_t tag_id = 0;
5951 	int append_index = 0;
5952 	int set_tag_idx = -1;
5953 	int index;
5954 	int ret;
5955 
5956 	if (sample_action_pos < 0)
5957 		return rte_flow_error_set(error, EINVAL,
5958 					  RTE_FLOW_ERROR_TYPE_ACTION,
5959 					  NULL, "invalid position of sample "
5960 					  "action in list");
5961 	/* Prepare the actions for prefix and suffix flow. */
5962 	if (add_tag) {
5963 		/* Update the new added tag action index preceding
5964 		 * the PUSH_VLAN or ENCAP action.
5965 		 */
5966 		const struct rte_flow_action_raw_encap *raw_encap;
5967 		const struct rte_flow_action *action = actions;
5968 		int encap_idx;
5969 		int action_idx = 0;
5970 		int raw_decap_idx = -1;
5971 		int push_vlan_idx = -1;
5972 		for (; action->type != RTE_FLOW_ACTION_TYPE_END; action++) {
5973 			switch (action->type) {
5974 			case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5975 				raw_decap_idx = action_idx;
5976 				break;
5977 			case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5978 				raw_encap = action->conf;
5979 				if (raw_encap->size >
5980 					MLX5_ENCAPSULATION_DECISION_SIZE) {
5981 					encap_idx = raw_decap_idx != -1 ?
5982 						    raw_decap_idx : action_idx;
5983 					if (encap_idx < sample_action_pos &&
5984 					    push_vlan_idx == -1)
5985 						set_tag_idx = encap_idx;
5986 				}
5987 				break;
5988 			case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5989 			case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5990 				encap_idx = action_idx;
5991 				if (encap_idx < sample_action_pos &&
5992 				    push_vlan_idx == -1)
5993 					set_tag_idx = encap_idx;
5994 				break;
5995 			case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5996 			case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5997 				push_vlan_idx = action_idx;
5998 				if (push_vlan_idx < sample_action_pos)
5999 					set_tag_idx = action_idx;
6000 				break;
6001 			default:
6002 				break;
6003 			}
6004 			action_idx++;
6005 		}
6006 	}
6007 	/* Prepare the actions for prefix and suffix flow. */
6008 	if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
6009 		index = qrss_action_pos;
6010 		/* Put the preceding the Queue/RSS action into prefix flow. */
6011 		if (index != 0)
6012 			memcpy(actions_pre, actions,
6013 			       sizeof(struct rte_flow_action) * index);
6014 		/* Put others preceding the sample action into prefix flow. */
6015 		if (sample_action_pos > index + 1)
6016 			memcpy(actions_pre + index, actions + index + 1,
6017 			       sizeof(struct rte_flow_action) *
6018 			       (sample_action_pos - index - 1));
6019 		index = sample_action_pos - 1;
6020 		/* Put Queue/RSS action into Suffix flow. */
6021 		memcpy(actions_sfx, actions + qrss_action_pos,
6022 		       sizeof(struct rte_flow_action));
6023 		actions_sfx++;
6024 	} else if (add_tag && set_tag_idx >= 0) {
6025 		if (set_tag_idx > 0)
6026 			memcpy(actions_pre, actions,
6027 			       sizeof(struct rte_flow_action) * set_tag_idx);
6028 		memcpy(actions_pre + set_tag_idx + 1, actions + set_tag_idx,
6029 		       sizeof(struct rte_flow_action) *
6030 		       (sample_action_pos - set_tag_idx));
6031 		index = sample_action_pos;
6032 	} else {
6033 		index = sample_action_pos;
6034 		if (index != 0)
6035 			memcpy(actions_pre, actions,
6036 			       sizeof(struct rte_flow_action) * index);
6037 	}
6038 	/* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
6039 	 * For CX6DX and above, metadata registers Cx preserve their value,
6040 	 * add an extra tag action for NIC-RX and E-Switch Domain.
6041 	 */
6042 	if (add_tag) {
6043 		/* Prepare the prefix tag action. */
6044 		append_index++;
6045 		set_tag = (void *)(actions_pre + actions_n + append_index);
6046 		ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error);
6047 		/* Trust VF/SF on CX5 not supported meter so that the reserved
6048 		 * metadata regC is REG_NON, back to use application tag
6049 		 * index 0.
6050 		 */
6051 		if (unlikely(ret == REG_NON))
6052 			ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
6053 		if (ret < 0)
6054 			return ret;
6055 		mlx5_ipool_malloc(priv->sh->ipool
6056 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
6057 		*set_tag = (struct mlx5_rte_flow_action_set_tag) {
6058 			.id = ret,
6059 			.data = tag_id,
6060 		};
6061 		/* Prepare the suffix subflow items. */
6062 		for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
6063 			if (items->type == RTE_FLOW_ITEM_TYPE_PORT_ID) {
6064 				memcpy(sfx_items, items, sizeof(*sfx_items));
6065 				sfx_items++;
6066 			}
6067 		}
6068 		tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
6069 		tag_spec->data = tag_id;
6070 		tag_spec->id = set_tag->id;
6071 		tag_mask = tag_spec + 1;
6072 		tag_mask->data = UINT32_MAX;
6073 		sfx_items[0] = (struct rte_flow_item){
6074 			.type = (enum rte_flow_item_type)
6075 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6076 			.spec = tag_spec,
6077 			.last = NULL,
6078 			.mask = tag_mask,
6079 		};
6080 		sfx_items[1] = (struct rte_flow_item){
6081 			.type = (enum rte_flow_item_type)
6082 				RTE_FLOW_ITEM_TYPE_END,
6083 		};
6084 		/* Prepare the tag action in prefix subflow. */
6085 		set_tag_idx = (set_tag_idx == -1) ? index : set_tag_idx;
6086 		actions_pre[set_tag_idx] =
6087 			(struct rte_flow_action){
6088 			.type = (enum rte_flow_action_type)
6089 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
6090 			.conf = set_tag,
6091 		};
6092 		/* Update next sample position due to add one tag action */
6093 		index += 1;
6094 	}
6095 	/* Copy the sample action into prefix flow. */
6096 	memcpy(actions_pre + index, actions + sample_action_pos,
6097 	       sizeof(struct rte_flow_action));
6098 	index += 1;
6099 	/* For the modify action after the sample action in E-Switch mirroring,
6100 	 * Add the extra jump action in prefix subflow and jump into the next
6101 	 * table, then do the modify action in the new table.
6102 	 */
6103 	if (jump_table) {
6104 		/* Prepare the prefix jump action. */
6105 		append_index++;
6106 		jump_action = (void *)(actions_pre + actions_n + append_index);
6107 		jump_action->group = jump_table;
6108 		actions_pre[index++] =
6109 			(struct rte_flow_action){
6110 			.type = (enum rte_flow_action_type)
6111 				RTE_FLOW_ACTION_TYPE_JUMP,
6112 			.conf = jump_action,
6113 		};
6114 	}
6115 	actions_pre[index] = (struct rte_flow_action){
6116 		.type = (enum rte_flow_action_type)
6117 			RTE_FLOW_ACTION_TYPE_END,
6118 	};
6119 	/* Put the actions after sample into Suffix flow. */
6120 	memcpy(actions_sfx, actions + sample_action_pos + 1,
6121 	       sizeof(struct rte_flow_action) *
6122 	       (actions_n - sample_action_pos - 1));
6123 	return tag_id;
6124 }
6125 
6126 /**
6127  * The splitting for metadata feature.
6128  *
6129  * - Q/RSS action on NIC Rx should be split in order to pass by
6130  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
6131  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
6132  *
6133  * - All the actions on NIC Tx should have a mreg copy action to
6134  *   copy reg_a from WQE to reg_c[0].
6135  *
6136  * @param dev
6137  *   Pointer to Ethernet device.
6138  * @param[in] flow
6139  *   Parent flow structure pointer.
6140  * @param[in] attr
6141  *   Flow rule attributes.
6142  * @param[in] items
6143  *   Pattern specification (list terminated by the END pattern item).
6144  * @param[in] actions
6145  *   Associated actions (list terminated by the END action).
6146  * @param[in] flow_split_info
6147  *   Pointer to flow split info structure.
6148  * @param[out] error
6149  *   Perform verbose error reporting if not NULL.
6150  * @return
6151  *   0 on success, negative value otherwise
6152  */
6153 static int
6154 flow_create_split_metadata(struct rte_eth_dev *dev,
6155 			   struct rte_flow *flow,
6156 			   const struct rte_flow_attr *attr,
6157 			   const struct rte_flow_item items[],
6158 			   const struct rte_flow_action actions[],
6159 			   struct mlx5_flow_split_info *flow_split_info,
6160 			   struct rte_flow_error *error)
6161 {
6162 	struct mlx5_priv *priv = dev->data->dev_private;
6163 	struct mlx5_sh_config *config = &priv->sh->config;
6164 	const struct rte_flow_action *qrss = NULL;
6165 	struct rte_flow_action *ext_actions = NULL;
6166 	struct mlx5_flow *dev_flow = NULL;
6167 	uint32_t qrss_id = 0;
6168 	int mtr_sfx = 0;
6169 	size_t act_size;
6170 	int actions_n;
6171 	int encap_idx;
6172 	int ret;
6173 
6174 	/* Check whether extensive metadata feature is engaged. */
6175 	if (!config->dv_flow_en ||
6176 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
6177 	    !mlx5_flow_ext_mreg_supported(dev))
6178 		return flow_create_split_inner(dev, flow, NULL, attr, items,
6179 					       actions, flow_split_info, error);
6180 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
6181 							   &encap_idx);
6182 	if (qrss) {
6183 		/* Exclude hairpin flows from splitting. */
6184 		if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
6185 			const struct rte_flow_action_queue *queue;
6186 
6187 			queue = qrss->conf;
6188 			if (mlx5_rxq_is_hairpin(dev, queue->index))
6189 				qrss = NULL;
6190 		} else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
6191 			const struct rte_flow_action_rss *rss;
6192 
6193 			rss = qrss->conf;
6194 			if (mlx5_rxq_is_hairpin(dev, rss->queue[0]))
6195 				qrss = NULL;
6196 		}
6197 	}
6198 	if (qrss) {
6199 		/* Check if it is in meter suffix table. */
6200 		mtr_sfx = attr->group ==
6201 			  ((attr->transfer && priv->fdb_def_rule) ?
6202 			  (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6203 			  MLX5_FLOW_TABLE_LEVEL_METER);
6204 		/*
6205 		 * Q/RSS action on NIC Rx should be split in order to pass by
6206 		 * the mreg copy table (RX_CP_TBL) and then it jumps to the
6207 		 * action table (RX_ACT_TBL) which has the split Q/RSS action.
6208 		 */
6209 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6210 			   sizeof(struct rte_flow_action_set_tag) +
6211 			   sizeof(struct rte_flow_action_jump);
6212 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6213 					  SOCKET_ID_ANY);
6214 		if (!ext_actions)
6215 			return rte_flow_error_set(error, ENOMEM,
6216 						  RTE_FLOW_ERROR_TYPE_ACTION,
6217 						  NULL, "no memory to split "
6218 						  "metadata flow");
6219 		/*
6220 		 * Create the new actions list with removed Q/RSS action
6221 		 * and appended set tag and jump to register copy table
6222 		 * (RX_CP_TBL). We should preallocate unique tag ID here
6223 		 * in advance, because it is needed for set tag action.
6224 		 */
6225 		qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
6226 						    qrss, actions_n,
6227 						    mtr_sfx, error);
6228 		if (!mtr_sfx && !qrss_id) {
6229 			ret = -rte_errno;
6230 			goto exit;
6231 		}
6232 	} else if (attr->egress && !attr->transfer) {
6233 		/*
6234 		 * All the actions on NIC Tx should have a metadata register
6235 		 * copy action to copy reg_a from WQE to reg_c[meta]
6236 		 */
6237 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6238 			   sizeof(struct mlx5_flow_action_copy_mreg);
6239 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6240 					  SOCKET_ID_ANY);
6241 		if (!ext_actions)
6242 			return rte_flow_error_set(error, ENOMEM,
6243 						  RTE_FLOW_ERROR_TYPE_ACTION,
6244 						  NULL, "no memory to split "
6245 						  "metadata flow");
6246 		/* Create the action list appended with copy register. */
6247 		ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
6248 					     actions_n, error, encap_idx);
6249 		if (ret < 0)
6250 			goto exit;
6251 	}
6252 	/* Add the unmodified original or prefix subflow. */
6253 	ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6254 				      items, ext_actions ? ext_actions :
6255 				      actions, flow_split_info, error);
6256 	if (ret < 0)
6257 		goto exit;
6258 	MLX5_ASSERT(dev_flow);
6259 	if (qrss) {
6260 		const struct rte_flow_attr q_attr = {
6261 			.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6262 			.ingress = 1,
6263 		};
6264 		/* Internal PMD action to set register. */
6265 		struct mlx5_rte_flow_item_tag q_tag_spec = {
6266 			.data = qrss_id,
6267 			.id = REG_NON,
6268 		};
6269 		struct rte_flow_item q_items[] = {
6270 			{
6271 				.type = (enum rte_flow_item_type)
6272 					MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6273 				.spec = &q_tag_spec,
6274 				.last = NULL,
6275 				.mask = NULL,
6276 			},
6277 			{
6278 				.type = RTE_FLOW_ITEM_TYPE_END,
6279 			},
6280 		};
6281 		struct rte_flow_action q_actions[] = {
6282 			{
6283 				.type = qrss->type,
6284 				.conf = qrss->conf,
6285 			},
6286 			{
6287 				.type = RTE_FLOW_ACTION_TYPE_END,
6288 			},
6289 		};
6290 		uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
6291 
6292 		/*
6293 		 * Configure the tag item only if there is no meter subflow.
6294 		 * Since tag is already marked in the meter suffix subflow
6295 		 * we can just use the meter suffix items as is.
6296 		 */
6297 		if (qrss_id) {
6298 			/* Not meter subflow. */
6299 			MLX5_ASSERT(!mtr_sfx);
6300 			/*
6301 			 * Put unique id in prefix flow due to it is destroyed
6302 			 * after suffix flow and id will be freed after there
6303 			 * is no actual flows with this id and identifier
6304 			 * reallocation becomes possible (for example, for
6305 			 * other flows in other threads).
6306 			 */
6307 			dev_flow->handle->split_flow_id = qrss_id;
6308 			ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
6309 						   error);
6310 			if (ret < 0)
6311 				goto exit;
6312 			q_tag_spec.id = ret;
6313 		}
6314 		dev_flow = NULL;
6315 		/* Add suffix subflow to execute Q/RSS. */
6316 		flow_split_info->prefix_layers = layers;
6317 		flow_split_info->prefix_mark = 0;
6318 		flow_split_info->table_id = 0;
6319 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6320 					      &q_attr, mtr_sfx ? items :
6321 					      q_items, q_actions,
6322 					      flow_split_info, error);
6323 		if (ret < 0)
6324 			goto exit;
6325 		/* qrss ID should be freed if failed. */
6326 		qrss_id = 0;
6327 		MLX5_ASSERT(dev_flow);
6328 	}
6329 
6330 exit:
6331 	/*
6332 	 * We do not destroy the partially created sub_flows in case of error.
6333 	 * These ones are included into parent flow list and will be destroyed
6334 	 * by flow_drv_destroy.
6335 	 */
6336 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
6337 			qrss_id);
6338 	mlx5_free(ext_actions);
6339 	return ret;
6340 }
6341 
6342 /**
6343  * Create meter internal drop flow with the original pattern.
6344  *
6345  * @param dev
6346  *   Pointer to Ethernet device.
6347  * @param[in] flow
6348  *   Parent flow structure pointer.
6349  * @param[in] attr
6350  *   Flow rule attributes.
6351  * @param[in] items
6352  *   Pattern specification (list terminated by the END pattern item).
6353  * @param[in] flow_split_info
6354  *   Pointer to flow split info structure.
6355  * @param[in] fm
6356  *   Pointer to flow meter structure.
6357  * @param[out] error
6358  *   Perform verbose error reporting if not NULL.
6359  * @return
6360  *   0 on success, negative value otherwise
6361  */
6362 static uint32_t
6363 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
6364 			struct rte_flow *flow,
6365 			const struct rte_flow_attr *attr,
6366 			const struct rte_flow_item items[],
6367 			struct mlx5_flow_split_info *flow_split_info,
6368 			struct mlx5_flow_meter_info *fm,
6369 			struct rte_flow_error *error)
6370 {
6371 	struct mlx5_flow *dev_flow = NULL;
6372 	struct rte_flow_attr drop_attr = *attr;
6373 	struct rte_flow_action drop_actions[3];
6374 	struct mlx5_flow_split_info drop_split_info = *flow_split_info;
6375 
6376 	MLX5_ASSERT(fm->drop_cnt);
6377 	drop_actions[0].type =
6378 		(enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
6379 	drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
6380 	drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
6381 	drop_actions[1].conf = NULL;
6382 	drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
6383 	drop_actions[2].conf = NULL;
6384 	drop_split_info.external = false;
6385 	drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6386 	drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
6387 	drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
6388 	return flow_create_split_inner(dev, flow, &dev_flow,
6389 				&drop_attr, items, drop_actions,
6390 				&drop_split_info, error);
6391 }
6392 
6393 /**
6394  * The splitting for meter feature.
6395  *
6396  * - The meter flow will be split to two flows as prefix and
6397  *   suffix flow. The packets make sense only it pass the prefix
6398  *   meter action.
6399  *
6400  * - Reg_C_5 is used for the packet to match betweend prefix and
6401  *   suffix flow.
6402  *
6403  * @param dev
6404  *   Pointer to Ethernet device.
6405  * @param[in] flow
6406  *   Parent flow structure pointer.
6407  * @param[in] attr
6408  *   Flow rule attributes.
6409  * @param[in] items
6410  *   Pattern specification (list terminated by the END pattern item).
6411  * @param[in] actions
6412  *   Associated actions (list terminated by the END action).
6413  * @param[in] flow_split_info
6414  *   Pointer to flow split info structure.
6415  * @param[out] error
6416  *   Perform verbose error reporting if not NULL.
6417  * @return
6418  *   0 on success, negative value otherwise
6419  */
6420 static int
6421 flow_create_split_meter(struct rte_eth_dev *dev,
6422 			struct rte_flow *flow,
6423 			const struct rte_flow_attr *attr,
6424 			const struct rte_flow_item items[],
6425 			const struct rte_flow_action actions[],
6426 			struct mlx5_flow_split_info *flow_split_info,
6427 			struct rte_flow_error *error)
6428 {
6429 	struct mlx5_priv *priv = dev->data->dev_private;
6430 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6431 	struct rte_flow_action *sfx_actions = NULL;
6432 	struct rte_flow_action *pre_actions = NULL;
6433 	struct rte_flow_item *sfx_items = NULL;
6434 	struct mlx5_flow *dev_flow = NULL;
6435 	struct rte_flow_attr sfx_attr = *attr;
6436 	struct mlx5_flow_meter_info *fm = NULL;
6437 	uint8_t skip_scale_restore;
6438 	bool has_mtr = false;
6439 	bool has_modify = false;
6440 	bool set_mtr_reg = true;
6441 	bool is_mtr_hierarchy = false;
6442 	uint32_t meter_id = 0;
6443 	uint32_t mtr_idx = 0;
6444 	uint32_t mtr_flow_id = 0;
6445 	size_t act_size;
6446 	size_t item_size;
6447 	int actions_n = 0;
6448 	int ret = 0;
6449 
6450 	if (priv->mtr_en)
6451 		actions_n = flow_check_meter_action(dev, actions, &has_mtr,
6452 						    &has_modify, &meter_id);
6453 	if (has_mtr) {
6454 		if (flow->meter) {
6455 			fm = flow_dv_meter_find_by_idx(priv, flow->meter);
6456 			if (!fm)
6457 				return rte_flow_error_set(error, EINVAL,
6458 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6459 						NULL, "Meter not found.");
6460 		} else {
6461 			fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
6462 			if (!fm)
6463 				return rte_flow_error_set(error, EINVAL,
6464 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6465 						NULL, "Meter not found.");
6466 			ret = mlx5_flow_meter_attach(priv, fm,
6467 						     &sfx_attr, error);
6468 			if (ret)
6469 				return -rte_errno;
6470 			flow->meter = mtr_idx;
6471 		}
6472 		MLX5_ASSERT(wks);
6473 		wks->fm = fm;
6474 		if (!fm->def_policy) {
6475 			wks->policy = mlx5_flow_meter_policy_find(dev,
6476 								  fm->policy_id,
6477 								  NULL);
6478 			MLX5_ASSERT(wks->policy);
6479 			if (wks->policy->mark)
6480 				wks->mark = 1;
6481 			if (wks->policy->is_hierarchy) {
6482 				wks->final_policy =
6483 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
6484 								wks->policy);
6485 				if (!wks->final_policy)
6486 					return rte_flow_error_set(error,
6487 					EINVAL,
6488 					RTE_FLOW_ERROR_TYPE_ACTION, NULL,
6489 				"Failed to find terminal policy of hierarchy.");
6490 				is_mtr_hierarchy = true;
6491 			}
6492 		}
6493 		/*
6494 		 * If it isn't default-policy Meter, and
6495 		 * 1. Not meter hierarchy and there's no action in flow to change
6496 		 *    packet (modify/encap/decap etc.), OR
6497 		 * 2. No drop count needed for this meter.
6498 		 * Then no need to use regC to save meter id anymore.
6499 		 */
6500 		if (!fm->def_policy && ((!has_modify && !is_mtr_hierarchy) || !fm->drop_cnt))
6501 			set_mtr_reg = false;
6502 		/* Prefix actions: meter, decap, encap, tag, jump, end, cnt. */
6503 #define METER_PREFIX_ACTION 7
6504 		act_size = (sizeof(struct rte_flow_action) *
6505 			    (actions_n + METER_PREFIX_ACTION)) +
6506 			   sizeof(struct mlx5_rte_flow_action_set_tag);
6507 		/* Suffix items: tag, vlan, port id, end. */
6508 #define METER_SUFFIX_ITEM 4
6509 		item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
6510 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6511 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
6512 					  0, SOCKET_ID_ANY);
6513 		if (!sfx_actions)
6514 			return rte_flow_error_set(error, ENOMEM,
6515 						  RTE_FLOW_ERROR_TYPE_ACTION,
6516 						  NULL, "no memory to split "
6517 						  "meter flow");
6518 		sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6519 			     act_size);
6520 		/* There's no suffix flow for meter of non-default policy. */
6521 		if (!fm->def_policy)
6522 			pre_actions = sfx_actions + 1;
6523 		else
6524 			pre_actions = sfx_actions + actions_n;
6525 		ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6526 					    items, sfx_items, actions,
6527 					    sfx_actions, pre_actions,
6528 					    (set_mtr_reg ? &mtr_flow_id : NULL),
6529 					    error);
6530 		if (ret) {
6531 			ret = -rte_errno;
6532 			goto exit;
6533 		}
6534 		/* Add the prefix subflow. */
6535 		skip_scale_restore = flow_split_info->skip_scale;
6536 		flow_split_info->skip_scale |=
6537 			1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6538 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6539 					      attr, items, pre_actions,
6540 					      flow_split_info, error);
6541 		flow_split_info->skip_scale = skip_scale_restore;
6542 		if (ret) {
6543 			if (mtr_flow_id)
6544 				mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6545 			ret = -rte_errno;
6546 			goto exit;
6547 		}
6548 		if (mtr_flow_id) {
6549 			dev_flow->handle->split_flow_id = mtr_flow_id;
6550 			dev_flow->handle->is_meter_flow_id = 1;
6551 		}
6552 		if (!fm->def_policy) {
6553 			if (!set_mtr_reg && fm->drop_cnt)
6554 				ret =
6555 			flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6556 							&sfx_attr, items,
6557 							flow_split_info,
6558 							fm, error);
6559 			goto exit;
6560 		}
6561 		/* Setting the sfx group atrr. */
6562 		sfx_attr.group = sfx_attr.transfer ?
6563 				(MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6564 				 MLX5_FLOW_TABLE_LEVEL_METER;
6565 		flow_split_info->prefix_layers =
6566 				flow_get_prefix_layer_flags(dev_flow);
6567 		flow_split_info->prefix_mark |= wks->mark;
6568 		flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6569 	}
6570 	/* Add the prefix subflow. */
6571 	ret = flow_create_split_metadata(dev, flow,
6572 					 &sfx_attr, sfx_items ?
6573 					 sfx_items : items,
6574 					 sfx_actions ? sfx_actions : actions,
6575 					 flow_split_info, error);
6576 exit:
6577 	if (sfx_actions)
6578 		mlx5_free(sfx_actions);
6579 	return ret;
6580 }
6581 
6582 /**
6583  * The splitting for sample feature.
6584  *
6585  * Once Sample action is detected in the action list, the flow actions should
6586  * be split into prefix sub flow and suffix sub flow.
6587  *
6588  * The original items remain in the prefix sub flow, all actions preceding the
6589  * sample action and the sample action itself will be copied to the prefix
6590  * sub flow, the actions following the sample action will be copied to the
6591  * suffix sub flow, Queue action always be located in the suffix sub flow.
6592  *
6593  * In order to make the packet from prefix sub flow matches with suffix sub
6594  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6595  * flow uses tag item with the unique flow id.
6596  *
6597  * @param dev
6598  *   Pointer to Ethernet device.
6599  * @param[in] flow
6600  *   Parent flow structure pointer.
6601  * @param[in] attr
6602  *   Flow rule attributes.
6603  * @param[in] items
6604  *   Pattern specification (list terminated by the END pattern item).
6605  * @param[in] actions
6606  *   Associated actions (list terminated by the END action).
6607  * @param[in] flow_split_info
6608  *   Pointer to flow split info structure.
6609  * @param[out] error
6610  *   Perform verbose error reporting if not NULL.
6611  * @return
6612  *   0 on success, negative value otherwise
6613  */
6614 static int
6615 flow_create_split_sample(struct rte_eth_dev *dev,
6616 			 struct rte_flow *flow,
6617 			 const struct rte_flow_attr *attr,
6618 			 const struct rte_flow_item items[],
6619 			 const struct rte_flow_action actions[],
6620 			 struct mlx5_flow_split_info *flow_split_info,
6621 			 struct rte_flow_error *error)
6622 {
6623 	struct mlx5_priv *priv = dev->data->dev_private;
6624 	struct rte_flow_action *sfx_actions = NULL;
6625 	struct rte_flow_action *pre_actions = NULL;
6626 	struct rte_flow_item *sfx_items = NULL;
6627 	struct mlx5_flow *dev_flow = NULL;
6628 	struct rte_flow_attr sfx_attr = *attr;
6629 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6630 	struct mlx5_flow_dv_sample_resource *sample_res;
6631 	struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6632 	struct mlx5_flow_tbl_resource *sfx_tbl;
6633 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6634 #endif
6635 	size_t act_size;
6636 	size_t item_size;
6637 	uint32_t fdb_tx = 0;
6638 	int32_t tag_id = 0;
6639 	int actions_n = 0;
6640 	int sample_action_pos;
6641 	int qrss_action_pos;
6642 	int add_tag = 0;
6643 	int modify_after_mirror = 0;
6644 	uint16_t jump_table = 0;
6645 	const uint32_t next_ft_step = 1;
6646 	int ret = 0;
6647 
6648 	if (priv->sampler_en)
6649 		actions_n = flow_check_match_action(actions, attr,
6650 					RTE_FLOW_ACTION_TYPE_SAMPLE,
6651 					&sample_action_pos, &qrss_action_pos,
6652 					&modify_after_mirror);
6653 	if (actions_n) {
6654 		/* The prefix actions must includes sample, tag, end. */
6655 		act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6656 			   + sizeof(struct mlx5_rte_flow_action_set_tag);
6657 		item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6658 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6659 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6660 					  item_size), 0, SOCKET_ID_ANY);
6661 		if (!sfx_actions)
6662 			return rte_flow_error_set(error, ENOMEM,
6663 						  RTE_FLOW_ERROR_TYPE_ACTION,
6664 						  NULL, "no memory to split "
6665 						  "sample flow");
6666 		/* The representor_id is UINT16_MAX for uplink. */
6667 		fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6668 		/*
6669 		 * When reg_c_preserve is set, metadata registers Cx preserve
6670 		 * their value even through packet duplication.
6671 		 */
6672 		add_tag = (!fdb_tx ||
6673 			   priv->sh->cdev->config.hca_attr.reg_c_preserve);
6674 		if (add_tag)
6675 			sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6676 					+ act_size);
6677 		if (modify_after_mirror)
6678 			jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6679 				     next_ft_step;
6680 		pre_actions = sfx_actions + actions_n;
6681 		tag_id = flow_sample_split_prep(dev, add_tag, items, sfx_items,
6682 						actions, sfx_actions,
6683 						pre_actions, actions_n,
6684 						sample_action_pos,
6685 						qrss_action_pos, jump_table,
6686 						error);
6687 		if (tag_id < 0 || (add_tag && !tag_id)) {
6688 			ret = -rte_errno;
6689 			goto exit;
6690 		}
6691 		if (modify_after_mirror)
6692 			flow_split_info->skip_scale =
6693 					1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6694 		/* Add the prefix subflow. */
6695 		ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6696 					      items, pre_actions,
6697 					      flow_split_info, error);
6698 		if (ret) {
6699 			ret = -rte_errno;
6700 			goto exit;
6701 		}
6702 		dev_flow->handle->split_flow_id = tag_id;
6703 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6704 		if (!modify_after_mirror) {
6705 			/* Set the sfx group attr. */
6706 			sample_res = (struct mlx5_flow_dv_sample_resource *)
6707 						dev_flow->dv.sample_res;
6708 			sfx_tbl = (struct mlx5_flow_tbl_resource *)
6709 						sample_res->normal_path_tbl;
6710 			sfx_tbl_data = container_of(sfx_tbl,
6711 						struct mlx5_flow_tbl_data_entry,
6712 						tbl);
6713 			sfx_attr.group = sfx_attr.transfer ?
6714 			(sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6715 		} else {
6716 			MLX5_ASSERT(attr->transfer);
6717 			sfx_attr.group = jump_table;
6718 		}
6719 		flow_split_info->prefix_layers =
6720 				flow_get_prefix_layer_flags(dev_flow);
6721 		MLX5_ASSERT(wks);
6722 		flow_split_info->prefix_mark |= wks->mark;
6723 		/* Suffix group level already be scaled with factor, set
6724 		 * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6725 		 * again in translation.
6726 		 */
6727 		flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6728 #endif
6729 	}
6730 	/* Add the suffix subflow. */
6731 	ret = flow_create_split_meter(dev, flow, &sfx_attr,
6732 				      sfx_items ? sfx_items : items,
6733 				      sfx_actions ? sfx_actions : actions,
6734 				      flow_split_info, error);
6735 exit:
6736 	if (sfx_actions)
6737 		mlx5_free(sfx_actions);
6738 	return ret;
6739 }
6740 
6741 /**
6742  * Split the flow to subflow set. The splitters might be linked
6743  * in the chain, like this:
6744  * flow_create_split_outer() calls:
6745  *   flow_create_split_meter() calls:
6746  *     flow_create_split_metadata(meter_subflow_0) calls:
6747  *       flow_create_split_inner(metadata_subflow_0)
6748  *       flow_create_split_inner(metadata_subflow_1)
6749  *       flow_create_split_inner(metadata_subflow_2)
6750  *     flow_create_split_metadata(meter_subflow_1) calls:
6751  *       flow_create_split_inner(metadata_subflow_0)
6752  *       flow_create_split_inner(metadata_subflow_1)
6753  *       flow_create_split_inner(metadata_subflow_2)
6754  *
6755  * This provide flexible way to add new levels of flow splitting.
6756  * The all of successfully created subflows are included to the
6757  * parent flow dev_flow list.
6758  *
6759  * @param dev
6760  *   Pointer to Ethernet device.
6761  * @param[in] flow
6762  *   Parent flow structure pointer.
6763  * @param[in] attr
6764  *   Flow rule attributes.
6765  * @param[in] items
6766  *   Pattern specification (list terminated by the END pattern item).
6767  * @param[in] actions
6768  *   Associated actions (list terminated by the END action).
6769  * @param[in] flow_split_info
6770  *   Pointer to flow split info structure.
6771  * @param[out] error
6772  *   Perform verbose error reporting if not NULL.
6773  * @return
6774  *   0 on success, negative value otherwise
6775  */
6776 static int
6777 flow_create_split_outer(struct rte_eth_dev *dev,
6778 			struct rte_flow *flow,
6779 			const struct rte_flow_attr *attr,
6780 			const struct rte_flow_item items[],
6781 			const struct rte_flow_action actions[],
6782 			struct mlx5_flow_split_info *flow_split_info,
6783 			struct rte_flow_error *error)
6784 {
6785 	int ret;
6786 
6787 	ret = flow_create_split_sample(dev, flow, attr, items,
6788 				       actions, flow_split_info, error);
6789 	MLX5_ASSERT(ret <= 0);
6790 	return ret;
6791 }
6792 
6793 static inline struct mlx5_flow_tunnel *
6794 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6795 {
6796 	struct mlx5_flow_tunnel *tunnel;
6797 
6798 #pragma GCC diagnostic push
6799 #pragma GCC diagnostic ignored "-Wcast-qual"
6800 	tunnel = (typeof(tunnel))flow->tunnel;
6801 #pragma GCC diagnostic pop
6802 
6803 	return tunnel;
6804 }
6805 
6806 /**
6807  * Adjust flow RSS workspace if needed.
6808  *
6809  * @param wks
6810  *   Pointer to thread flow work space.
6811  * @param rss_desc
6812  *   Pointer to RSS descriptor.
6813  * @param[in] nrssq_num
6814  *   New RSS queue number.
6815  *
6816  * @return
6817  *   0 on success, -1 otherwise and rte_errno is set.
6818  */
6819 static int
6820 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6821 			  struct mlx5_flow_rss_desc *rss_desc,
6822 			  uint32_t nrssq_num)
6823 {
6824 	if (likely(nrssq_num <= wks->rssq_num))
6825 		return 0;
6826 	rss_desc->queue = realloc(rss_desc->queue,
6827 			  sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6828 	if (!rss_desc->queue) {
6829 		rte_errno = ENOMEM;
6830 		return -1;
6831 	}
6832 	wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6833 	return 0;
6834 }
6835 
6836 /**
6837  * Create a flow and add it to @p list.
6838  *
6839  * @param dev
6840  *   Pointer to Ethernet device.
6841  * @param list
6842  *   Pointer to a TAILQ flow list. If this parameter NULL,
6843  *   no list insertion occurred, flow is just created,
6844  *   this is caller's responsibility to track the
6845  *   created flow.
6846  * @param[in] attr
6847  *   Flow rule attributes.
6848  * @param[in] items
6849  *   Pattern specification (list terminated by the END pattern item).
6850  * @param[in] actions
6851  *   Associated actions (list terminated by the END action).
6852  * @param[in] external
6853  *   This flow rule is created by request external to PMD.
6854  * @param[out] error
6855  *   Perform verbose error reporting if not NULL.
6856  *
6857  * @return
6858  *   A flow index on success, 0 otherwise and rte_errno is set.
6859  */
6860 static uint32_t
6861 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6862 		 const struct rte_flow_attr *attr,
6863 		 const struct rte_flow_item items[],
6864 		 const struct rte_flow_action original_actions[],
6865 		 bool external, struct rte_flow_error *error)
6866 {
6867 	struct mlx5_priv *priv = dev->data->dev_private;
6868 	struct rte_flow *flow = NULL;
6869 	struct mlx5_flow *dev_flow;
6870 	const struct rte_flow_action_rss *rss = NULL;
6871 	struct mlx5_translated_action_handle
6872 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6873 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6874 	union {
6875 		struct mlx5_flow_expand_rss buf;
6876 		uint8_t buffer[4096];
6877 	} expand_buffer;
6878 	union {
6879 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6880 		uint8_t buffer[2048];
6881 	} actions_rx;
6882 	union {
6883 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6884 		uint8_t buffer[2048];
6885 	} actions_hairpin_tx;
6886 	union {
6887 		struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6888 		uint8_t buffer[2048];
6889 	} items_tx;
6890 	struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6891 	struct mlx5_flow_rss_desc *rss_desc;
6892 	const struct rte_flow_action *p_actions_rx;
6893 	uint32_t i;
6894 	uint32_t idx = 0;
6895 	int hairpin_flow;
6896 	struct rte_flow_attr attr_tx = { .priority = 0 };
6897 	const struct rte_flow_action *actions;
6898 	struct rte_flow_action *translated_actions = NULL;
6899 	struct mlx5_flow_tunnel *tunnel;
6900 	struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6901 	struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6902 	struct mlx5_flow_split_info flow_split_info = {
6903 		.external = !!external,
6904 		.skip_scale = 0,
6905 		.flow_idx = 0,
6906 		.prefix_mark = 0,
6907 		.prefix_layers = 0,
6908 		.table_id = 0
6909 	};
6910 	int ret;
6911 
6912 	MLX5_ASSERT(wks);
6913 	rss_desc = &wks->rss_desc;
6914 	ret = flow_action_handles_translate(dev, original_actions,
6915 					    indir_actions,
6916 					    &indir_actions_n,
6917 					    &translated_actions, error);
6918 	if (ret < 0) {
6919 		MLX5_ASSERT(translated_actions == NULL);
6920 		return 0;
6921 	}
6922 	actions = translated_actions ? translated_actions : original_actions;
6923 	p_actions_rx = actions;
6924 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6925 	ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6926 				external, hairpin_flow, error);
6927 	if (ret < 0)
6928 		goto error_before_hairpin_split;
6929 	flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6930 	if (!flow) {
6931 		rte_errno = ENOMEM;
6932 		goto error_before_hairpin_split;
6933 	}
6934 	if (hairpin_flow > 0) {
6935 		if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6936 			rte_errno = EINVAL;
6937 			goto error_before_hairpin_split;
6938 		}
6939 		flow_hairpin_split(dev, actions, actions_rx.actions,
6940 				   actions_hairpin_tx.actions, items_tx.items,
6941 				   idx);
6942 		p_actions_rx = actions_rx.actions;
6943 	}
6944 	flow_split_info.flow_idx = idx;
6945 	flow->drv_type = flow_get_drv_type(dev, attr);
6946 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6947 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
6948 	memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6949 	/* RSS Action only works on NIC RX domain */
6950 	if (attr->ingress && !attr->transfer)
6951 		rss = flow_get_rss_action(dev, p_actions_rx);
6952 	if (rss) {
6953 		if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6954 			return 0;
6955 		/*
6956 		 * The following information is required by
6957 		 * mlx5_flow_hashfields_adjust() in advance.
6958 		 */
6959 		rss_desc->level = rss->level;
6960 		/* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
6961 		rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
6962 	}
6963 	flow->dev_handles = 0;
6964 	if (rss && rss->types) {
6965 		unsigned int graph_root;
6966 
6967 		graph_root = find_graph_root(rss->level);
6968 		ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6969 					   items, rss->types,
6970 					   mlx5_support_expansion, graph_root);
6971 		MLX5_ASSERT(ret > 0 &&
6972 		       (unsigned int)ret < sizeof(expand_buffer.buffer));
6973 		if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6974 			for (i = 0; i < buf->entries; ++i)
6975 				mlx5_dbg__print_pattern(buf->entry[i].pattern);
6976 		}
6977 	} else {
6978 		buf->entries = 1;
6979 		buf->entry[0].pattern = (void *)(uintptr_t)items;
6980 	}
6981 	rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6982 						      indir_actions_n);
6983 	for (i = 0; i < buf->entries; ++i) {
6984 		/* Initialize flow split data. */
6985 		flow_split_info.prefix_layers = 0;
6986 		flow_split_info.prefix_mark = 0;
6987 		flow_split_info.skip_scale = 0;
6988 		/*
6989 		 * The splitter may create multiple dev_flows,
6990 		 * depending on configuration. In the simplest
6991 		 * case it just creates unmodified original flow.
6992 		 */
6993 		ret = flow_create_split_outer(dev, flow, attr,
6994 					      buf->entry[i].pattern,
6995 					      p_actions_rx, &flow_split_info,
6996 					      error);
6997 		if (ret < 0)
6998 			goto error;
6999 		if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
7000 			ret = flow_tunnel_add_default_miss(dev, flow, attr,
7001 							   p_actions_rx,
7002 							   idx,
7003 							   wks->flows[0].tunnel,
7004 							   &default_miss_ctx,
7005 							   error);
7006 			if (ret < 0) {
7007 				mlx5_free(default_miss_ctx.queue);
7008 				goto error;
7009 			}
7010 		}
7011 	}
7012 	/* Create the tx flow. */
7013 	if (hairpin_flow) {
7014 		attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
7015 		attr_tx.ingress = 0;
7016 		attr_tx.egress = 1;
7017 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
7018 					 actions_hairpin_tx.actions,
7019 					 idx, error);
7020 		if (!dev_flow)
7021 			goto error;
7022 		dev_flow->flow = flow;
7023 		dev_flow->external = 0;
7024 		SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
7025 			      dev_flow->handle, next);
7026 		ret = flow_drv_translate(dev, dev_flow, &attr_tx,
7027 					 items_tx.items,
7028 					 actions_hairpin_tx.actions, error);
7029 		if (ret < 0)
7030 			goto error;
7031 	}
7032 	/*
7033 	 * Update the metadata register copy table. If extensive
7034 	 * metadata feature is enabled and registers are supported
7035 	 * we might create the extra rte_flow for each unique
7036 	 * MARK/FLAG action ID.
7037 	 *
7038 	 * The table is updated for ingress Flows only, because
7039 	 * the egress Flows belong to the different device and
7040 	 * copy table should be updated in peer NIC Rx domain.
7041 	 */
7042 	if (attr->ingress &&
7043 	    (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
7044 		ret = flow_mreg_update_copy_table(dev, flow, actions, error);
7045 		if (ret)
7046 			goto error;
7047 	}
7048 	/*
7049 	 * If the flow is external (from application) OR device is started,
7050 	 * OR mreg discover, then apply immediately.
7051 	 */
7052 	if (external || dev->data->dev_started ||
7053 	    (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
7054 	     attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
7055 		ret = flow_drv_apply(dev, flow, error);
7056 		if (ret < 0)
7057 			goto error;
7058 	}
7059 	flow->type = type;
7060 	flow_rxq_flags_set(dev, flow);
7061 	rte_free(translated_actions);
7062 	tunnel = flow_tunnel_from_rule(wks->flows);
7063 	if (tunnel) {
7064 		flow->tunnel = 1;
7065 		flow->tunnel_id = tunnel->tunnel_id;
7066 		__atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
7067 		mlx5_free(default_miss_ctx.queue);
7068 	}
7069 	mlx5_flow_pop_thread_workspace();
7070 	return idx;
7071 error:
7072 	MLX5_ASSERT(flow);
7073 	ret = rte_errno; /* Save rte_errno before cleanup. */
7074 	flow_mreg_del_copy_action(dev, flow);
7075 	flow_drv_destroy(dev, flow);
7076 	if (rss_desc->shared_rss)
7077 		__atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
7078 			mlx5_ipool_get
7079 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
7080 			rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
7081 	mlx5_ipool_free(priv->flows[type], idx);
7082 	rte_errno = ret; /* Restore rte_errno. */
7083 	ret = rte_errno;
7084 	rte_errno = ret;
7085 error_before_hairpin_split:
7086 	mlx5_flow_pop_thread_workspace();
7087 	rte_free(translated_actions);
7088 	return 0;
7089 }
7090 
7091 /**
7092  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
7093  * incoming packets to table 1.
7094  *
7095  * Other flow rules, requested for group n, will be created in
7096  * e-switch table n+1.
7097  * Jump action to e-switch group n will be created to group n+1.
7098  *
7099  * Used when working in switchdev mode, to utilise advantages of table 1
7100  * and above.
7101  *
7102  * @param dev
7103  *   Pointer to Ethernet device.
7104  *
7105  * @return
7106  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
7107  */
7108 struct rte_flow *
7109 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
7110 {
7111 	const struct rte_flow_attr attr = {
7112 		.group = 0,
7113 		.priority = 0,
7114 		.ingress = 1,
7115 		.egress = 0,
7116 		.transfer = 1,
7117 	};
7118 	const struct rte_flow_item pattern = {
7119 		.type = RTE_FLOW_ITEM_TYPE_END,
7120 	};
7121 	struct rte_flow_action_jump jump = {
7122 		.group = 1,
7123 	};
7124 	const struct rte_flow_action actions[] = {
7125 		{
7126 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7127 			.conf = &jump,
7128 		},
7129 		{
7130 			.type = RTE_FLOW_ACTION_TYPE_END,
7131 		},
7132 	};
7133 	struct rte_flow_error error;
7134 
7135 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7136 						   &attr, &pattern,
7137 						   actions, false, &error);
7138 }
7139 
7140 /**
7141  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
7142  * and sq number, directs all packets to peer vport.
7143  *
7144  * @param dev
7145  *   Pointer to Ethernet device.
7146  * @param txq
7147  *   Txq index.
7148  *
7149  * @return
7150  *   Flow ID on success, 0 otherwise and rte_errno is set.
7151  */
7152 uint32_t
7153 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
7154 {
7155 	struct rte_flow_attr attr = {
7156 		.group = 0,
7157 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7158 		.ingress = 1,
7159 		.egress = 0,
7160 		.transfer = 1,
7161 	};
7162 	struct rte_flow_item_port_id port_spec = {
7163 		.id = MLX5_PORT_ESW_MGR,
7164 	};
7165 	struct mlx5_rte_flow_item_sq txq_spec = {
7166 		.queue = txq,
7167 	};
7168 	struct rte_flow_item pattern[] = {
7169 		{
7170 			.type = RTE_FLOW_ITEM_TYPE_PORT_ID,
7171 			.spec = &port_spec,
7172 		},
7173 		{
7174 			.type = (enum rte_flow_item_type)
7175 				MLX5_RTE_FLOW_ITEM_TYPE_SQ,
7176 			.spec = &txq_spec,
7177 		},
7178 		{
7179 			.type = RTE_FLOW_ITEM_TYPE_END,
7180 		},
7181 	};
7182 	struct rte_flow_action_jump jump = {
7183 		.group = 1,
7184 	};
7185 	struct rte_flow_action_port_id port = {
7186 		.id = dev->data->port_id,
7187 	};
7188 	struct rte_flow_action actions[] = {
7189 		{
7190 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7191 			.conf = &jump,
7192 		},
7193 		{
7194 			.type = RTE_FLOW_ACTION_TYPE_END,
7195 		},
7196 	};
7197 	struct rte_flow_error error;
7198 
7199 	/*
7200 	 * Creates group 0, highest priority jump flow.
7201 	 * Matches txq to bypass kernel packets.
7202 	 */
7203 	if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
7204 			     false, &error) == 0)
7205 		return 0;
7206 	/* Create group 1, lowest priority redirect flow for txq. */
7207 	attr.group = 1;
7208 	actions[0].conf = &port;
7209 	actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
7210 	return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
7211 				actions, false, &error);
7212 }
7213 
7214 /**
7215  * Validate a flow supported by the NIC.
7216  *
7217  * @see rte_flow_validate()
7218  * @see rte_flow_ops
7219  */
7220 int
7221 mlx5_flow_validate(struct rte_eth_dev *dev,
7222 		   const struct rte_flow_attr *attr,
7223 		   const struct rte_flow_item items[],
7224 		   const struct rte_flow_action original_actions[],
7225 		   struct rte_flow_error *error)
7226 {
7227 	int hairpin_flow;
7228 	struct mlx5_translated_action_handle
7229 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7230 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7231 	const struct rte_flow_action *actions;
7232 	struct rte_flow_action *translated_actions = NULL;
7233 	int ret = flow_action_handles_translate(dev, original_actions,
7234 						indir_actions,
7235 						&indir_actions_n,
7236 						&translated_actions, error);
7237 
7238 	if (ret)
7239 		return ret;
7240 	actions = translated_actions ? translated_actions : original_actions;
7241 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7242 	ret = flow_drv_validate(dev, attr, items, actions,
7243 				true, hairpin_flow, error);
7244 	rte_free(translated_actions);
7245 	return ret;
7246 }
7247 
7248 /**
7249  * Create a flow.
7250  *
7251  * @see rte_flow_create()
7252  * @see rte_flow_ops
7253  */
7254 struct rte_flow *
7255 mlx5_flow_create(struct rte_eth_dev *dev,
7256 		 const struct rte_flow_attr *attr,
7257 		 const struct rte_flow_item items[],
7258 		 const struct rte_flow_action actions[],
7259 		 struct rte_flow_error *error)
7260 {
7261 	struct mlx5_priv *priv = dev->data->dev_private;
7262 
7263 	if (priv->sh->config.dv_flow_en == 2) {
7264 		rte_flow_error_set(error, ENOTSUP,
7265 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7266 			  NULL,
7267 			  "Flow non-Q creation not supported");
7268 		return NULL;
7269 	}
7270 	/*
7271 	 * If the device is not started yet, it is not allowed to created a
7272 	 * flow from application. PMD default flows and traffic control flows
7273 	 * are not affected.
7274 	 */
7275 	if (unlikely(!dev->data->dev_started)) {
7276 		DRV_LOG(DEBUG, "port %u is not started when "
7277 			"inserting a flow", dev->data->port_id);
7278 		rte_flow_error_set(error, ENODEV,
7279 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7280 				   NULL,
7281 				   "port not started");
7282 		return NULL;
7283 	}
7284 
7285 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
7286 						   attr, items, actions,
7287 						   true, error);
7288 }
7289 
7290 /**
7291  * Destroy a flow in a list.
7292  *
7293  * @param dev
7294  *   Pointer to Ethernet device.
7295  * @param[in] flow_idx
7296  *   Index of flow to destroy.
7297  */
7298 static void
7299 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7300 		  uint32_t flow_idx)
7301 {
7302 	struct mlx5_priv *priv = dev->data->dev_private;
7303 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
7304 
7305 	if (!flow)
7306 		return;
7307 	MLX5_ASSERT(flow->type == type);
7308 	/*
7309 	 * Update RX queue flags only if port is started, otherwise it is
7310 	 * already clean.
7311 	 */
7312 	if (dev->data->dev_started)
7313 		flow_rxq_flags_trim(dev, flow);
7314 	flow_drv_destroy(dev, flow);
7315 	if (flow->tunnel) {
7316 		struct mlx5_flow_tunnel *tunnel;
7317 
7318 		tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
7319 		RTE_VERIFY(tunnel);
7320 		if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
7321 			mlx5_flow_tunnel_free(dev, tunnel);
7322 	}
7323 	flow_mreg_del_copy_action(dev, flow);
7324 	mlx5_ipool_free(priv->flows[type], flow_idx);
7325 }
7326 
7327 /**
7328  * Destroy all flows.
7329  *
7330  * @param dev
7331  *   Pointer to Ethernet device.
7332  * @param type
7333  *   Flow type to be flushed.
7334  * @param active
7335  *   If flushing is called actively.
7336  */
7337 void
7338 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7339 		     bool active)
7340 {
7341 	struct mlx5_priv *priv = dev->data->dev_private;
7342 	uint32_t num_flushed = 0, fidx = 1;
7343 	struct rte_flow *flow;
7344 
7345 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7346 	if (priv->sh->config.dv_flow_en == 2 &&
7347 	    type == MLX5_FLOW_TYPE_GEN) {
7348 		flow_hw_q_flow_flush(dev, NULL);
7349 		return;
7350 	}
7351 #endif
7352 
7353 	MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
7354 		flow_list_destroy(dev, type, fidx);
7355 		num_flushed++;
7356 	}
7357 	if (active) {
7358 		DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
7359 			dev->data->port_id, num_flushed);
7360 	}
7361 }
7362 
7363 /**
7364  * Stop all default actions for flows.
7365  *
7366  * @param dev
7367  *   Pointer to Ethernet device.
7368  */
7369 void
7370 mlx5_flow_stop_default(struct rte_eth_dev *dev)
7371 {
7372 	flow_mreg_del_default_copy_action(dev);
7373 	flow_rxq_flags_clear(dev);
7374 }
7375 
7376 /**
7377  * Start all default actions for flows.
7378  *
7379  * @param dev
7380  *   Pointer to Ethernet device.
7381  * @return
7382  *   0 on success, a negative errno value otherwise and rte_errno is set.
7383  */
7384 int
7385 mlx5_flow_start_default(struct rte_eth_dev *dev)
7386 {
7387 	struct rte_flow_error error;
7388 
7389 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
7390 	return flow_mreg_add_default_copy_action(dev, &error);
7391 }
7392 
7393 /**
7394  * Release key of thread specific flow workspace data.
7395  */
7396 void
7397 flow_release_workspace(void *data)
7398 {
7399 	struct mlx5_flow_workspace *wks = data;
7400 	struct mlx5_flow_workspace *next;
7401 
7402 	while (wks) {
7403 		next = wks->next;
7404 		free(wks->rss_desc.queue);
7405 		free(wks);
7406 		wks = next;
7407 	}
7408 }
7409 
7410 /**
7411  * Get thread specific current flow workspace.
7412  *
7413  * @return pointer to thread specific flow workspace data, NULL on error.
7414  */
7415 struct mlx5_flow_workspace*
7416 mlx5_flow_get_thread_workspace(void)
7417 {
7418 	struct mlx5_flow_workspace *data;
7419 
7420 	data = mlx5_flow_os_get_specific_workspace();
7421 	MLX5_ASSERT(data && data->inuse);
7422 	if (!data || !data->inuse)
7423 		DRV_LOG(ERR, "flow workspace not initialized.");
7424 	return data;
7425 }
7426 
7427 /**
7428  * Allocate and init new flow workspace.
7429  *
7430  * @return pointer to flow workspace data, NULL on error.
7431  */
7432 static struct mlx5_flow_workspace*
7433 flow_alloc_thread_workspace(void)
7434 {
7435 	struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
7436 
7437 	if (!data) {
7438 		DRV_LOG(ERR, "Failed to allocate flow workspace "
7439 			"memory.");
7440 		return NULL;
7441 	}
7442 	data->rss_desc.queue = calloc(1,
7443 			sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
7444 	if (!data->rss_desc.queue)
7445 		goto err;
7446 	data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
7447 	return data;
7448 err:
7449 	free(data->rss_desc.queue);
7450 	free(data);
7451 	return NULL;
7452 }
7453 
7454 /**
7455  * Get new thread specific flow workspace.
7456  *
7457  * If current workspace inuse, create new one and set as current.
7458  *
7459  * @return pointer to thread specific flow workspace data, NULL on error.
7460  */
7461 struct mlx5_flow_workspace*
7462 mlx5_flow_push_thread_workspace(void)
7463 {
7464 	struct mlx5_flow_workspace *curr;
7465 	struct mlx5_flow_workspace *data;
7466 
7467 	curr = mlx5_flow_os_get_specific_workspace();
7468 	if (!curr) {
7469 		data = flow_alloc_thread_workspace();
7470 		if (!data)
7471 			return NULL;
7472 	} else if (!curr->inuse) {
7473 		data = curr;
7474 	} else if (curr->next) {
7475 		data = curr->next;
7476 	} else {
7477 		data = flow_alloc_thread_workspace();
7478 		if (!data)
7479 			return NULL;
7480 		curr->next = data;
7481 		data->prev = curr;
7482 	}
7483 	data->inuse = 1;
7484 	data->flow_idx = 0;
7485 	/* Set as current workspace */
7486 	if (mlx5_flow_os_set_specific_workspace(data))
7487 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7488 	return data;
7489 }
7490 
7491 /**
7492  * Close current thread specific flow workspace.
7493  *
7494  * If previous workspace available, set it as current.
7495  *
7496  * @return pointer to thread specific flow workspace data, NULL on error.
7497  */
7498 void
7499 mlx5_flow_pop_thread_workspace(void)
7500 {
7501 	struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
7502 
7503 	if (!data)
7504 		return;
7505 	if (!data->inuse) {
7506 		DRV_LOG(ERR, "Failed to close unused flow workspace.");
7507 		return;
7508 	}
7509 	data->inuse = 0;
7510 	if (!data->prev)
7511 		return;
7512 	if (mlx5_flow_os_set_specific_workspace(data->prev))
7513 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7514 }
7515 
7516 /**
7517  * Verify the flow list is empty
7518  *
7519  * @param dev
7520  *  Pointer to Ethernet device.
7521  *
7522  * @return the number of flows not released.
7523  */
7524 int
7525 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
7526 {
7527 	struct mlx5_priv *priv = dev->data->dev_private;
7528 	struct rte_flow *flow;
7529 	uint32_t idx = 0;
7530 	int ret = 0, i;
7531 
7532 	for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
7533 		MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
7534 			DRV_LOG(DEBUG, "port %u flow %p still referenced",
7535 				dev->data->port_id, (void *)flow);
7536 			ret++;
7537 		}
7538 	}
7539 	return ret;
7540 }
7541 
7542 /**
7543  * Enable default hairpin egress flow.
7544  *
7545  * @param dev
7546  *   Pointer to Ethernet device.
7547  * @param queue
7548  *   The queue index.
7549  *
7550  * @return
7551  *   0 on success, a negative errno value otherwise and rte_errno is set.
7552  */
7553 int
7554 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
7555 			    uint32_t queue)
7556 {
7557 	const struct rte_flow_attr attr = {
7558 		.egress = 1,
7559 		.priority = 0,
7560 	};
7561 	struct mlx5_rte_flow_item_sq queue_spec = {
7562 		.queue = queue,
7563 	};
7564 	struct mlx5_rte_flow_item_sq queue_mask = {
7565 		.queue = UINT32_MAX,
7566 	};
7567 	struct rte_flow_item items[] = {
7568 		{
7569 			.type = (enum rte_flow_item_type)
7570 				MLX5_RTE_FLOW_ITEM_TYPE_SQ,
7571 			.spec = &queue_spec,
7572 			.last = NULL,
7573 			.mask = &queue_mask,
7574 		},
7575 		{
7576 			.type = RTE_FLOW_ITEM_TYPE_END,
7577 		},
7578 	};
7579 	struct rte_flow_action_jump jump = {
7580 		.group = MLX5_HAIRPIN_TX_TABLE,
7581 	};
7582 	struct rte_flow_action actions[2];
7583 	uint32_t flow_idx;
7584 	struct rte_flow_error error;
7585 
7586 	actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
7587 	actions[0].conf = &jump;
7588 	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
7589 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7590 				    &attr, items, actions, false, &error);
7591 	if (!flow_idx) {
7592 		DRV_LOG(DEBUG,
7593 			"Failed to create ctrl flow: rte_errno(%d),"
7594 			" type(%d), message(%s)",
7595 			rte_errno, error.type,
7596 			error.message ? error.message : " (no stated reason)");
7597 		return -rte_errno;
7598 	}
7599 	return 0;
7600 }
7601 
7602 /**
7603  * Enable a control flow configured from the control plane.
7604  *
7605  * @param dev
7606  *   Pointer to Ethernet device.
7607  * @param eth_spec
7608  *   An Ethernet flow spec to apply.
7609  * @param eth_mask
7610  *   An Ethernet flow mask to apply.
7611  * @param vlan_spec
7612  *   A VLAN flow spec to apply.
7613  * @param vlan_mask
7614  *   A VLAN flow mask to apply.
7615  *
7616  * @return
7617  *   0 on success, a negative errno value otherwise and rte_errno is set.
7618  */
7619 int
7620 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
7621 		    struct rte_flow_item_eth *eth_spec,
7622 		    struct rte_flow_item_eth *eth_mask,
7623 		    struct rte_flow_item_vlan *vlan_spec,
7624 		    struct rte_flow_item_vlan *vlan_mask)
7625 {
7626 	struct mlx5_priv *priv = dev->data->dev_private;
7627 	const struct rte_flow_attr attr = {
7628 		.ingress = 1,
7629 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7630 	};
7631 	struct rte_flow_item items[] = {
7632 		{
7633 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7634 			.spec = eth_spec,
7635 			.last = NULL,
7636 			.mask = eth_mask,
7637 		},
7638 		{
7639 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
7640 					      RTE_FLOW_ITEM_TYPE_END,
7641 			.spec = vlan_spec,
7642 			.last = NULL,
7643 			.mask = vlan_mask,
7644 		},
7645 		{
7646 			.type = RTE_FLOW_ITEM_TYPE_END,
7647 		},
7648 	};
7649 	uint16_t queue[priv->reta_idx_n];
7650 	struct rte_flow_action_rss action_rss = {
7651 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7652 		.level = 0,
7653 		.types = priv->rss_conf.rss_hf,
7654 		.key_len = priv->rss_conf.rss_key_len,
7655 		.queue_num = priv->reta_idx_n,
7656 		.key = priv->rss_conf.rss_key,
7657 		.queue = queue,
7658 	};
7659 	struct rte_flow_action actions[] = {
7660 		{
7661 			.type = RTE_FLOW_ACTION_TYPE_RSS,
7662 			.conf = &action_rss,
7663 		},
7664 		{
7665 			.type = RTE_FLOW_ACTION_TYPE_END,
7666 		},
7667 	};
7668 	uint32_t flow_idx;
7669 	struct rte_flow_error error;
7670 	unsigned int i;
7671 
7672 	if (!priv->reta_idx_n || !priv->rxqs_n) {
7673 		return 0;
7674 	}
7675 	if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
7676 		action_rss.types = 0;
7677 	for (i = 0; i != priv->reta_idx_n; ++i)
7678 		queue[i] = (*priv->reta_idx)[i];
7679 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7680 				    &attr, items, actions, false, &error);
7681 	if (!flow_idx)
7682 		return -rte_errno;
7683 	return 0;
7684 }
7685 
7686 /**
7687  * Enable a flow control configured from the control plane.
7688  *
7689  * @param dev
7690  *   Pointer to Ethernet device.
7691  * @param eth_spec
7692  *   An Ethernet flow spec to apply.
7693  * @param eth_mask
7694  *   An Ethernet flow mask to apply.
7695  *
7696  * @return
7697  *   0 on success, a negative errno value otherwise and rte_errno is set.
7698  */
7699 int
7700 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7701 	       struct rte_flow_item_eth *eth_spec,
7702 	       struct rte_flow_item_eth *eth_mask)
7703 {
7704 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7705 }
7706 
7707 /**
7708  * Create default miss flow rule matching lacp traffic
7709  *
7710  * @param dev
7711  *   Pointer to Ethernet device.
7712  * @param eth_spec
7713  *   An Ethernet flow spec to apply.
7714  *
7715  * @return
7716  *   0 on success, a negative errno value otherwise and rte_errno is set.
7717  */
7718 int
7719 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7720 {
7721 	/*
7722 	 * The LACP matching is done by only using ether type since using
7723 	 * a multicast dst mac causes kernel to give low priority to this flow.
7724 	 */
7725 	static const struct rte_flow_item_eth lacp_spec = {
7726 		.type = RTE_BE16(0x8809),
7727 	};
7728 	static const struct rte_flow_item_eth lacp_mask = {
7729 		.type = 0xffff,
7730 	};
7731 	const struct rte_flow_attr attr = {
7732 		.ingress = 1,
7733 	};
7734 	struct rte_flow_item items[] = {
7735 		{
7736 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7737 			.spec = &lacp_spec,
7738 			.mask = &lacp_mask,
7739 		},
7740 		{
7741 			.type = RTE_FLOW_ITEM_TYPE_END,
7742 		},
7743 	};
7744 	struct rte_flow_action actions[] = {
7745 		{
7746 			.type = (enum rte_flow_action_type)
7747 				MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7748 		},
7749 		{
7750 			.type = RTE_FLOW_ACTION_TYPE_END,
7751 		},
7752 	};
7753 	struct rte_flow_error error;
7754 	uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7755 					&attr, items, actions,
7756 					false, &error);
7757 
7758 	if (!flow_idx)
7759 		return -rte_errno;
7760 	return 0;
7761 }
7762 
7763 /**
7764  * Destroy a flow.
7765  *
7766  * @see rte_flow_destroy()
7767  * @see rte_flow_ops
7768  */
7769 int
7770 mlx5_flow_destroy(struct rte_eth_dev *dev,
7771 		  struct rte_flow *flow,
7772 		  struct rte_flow_error *error __rte_unused)
7773 {
7774 	struct mlx5_priv *priv = dev->data->dev_private;
7775 
7776 	if (priv->sh->config.dv_flow_en == 2)
7777 		return rte_flow_error_set(error, ENOTSUP,
7778 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7779 			  NULL,
7780 			  "Flow non-Q destruction not supported");
7781 	flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7782 				(uintptr_t)(void *)flow);
7783 	return 0;
7784 }
7785 
7786 /**
7787  * Destroy all flows.
7788  *
7789  * @see rte_flow_flush()
7790  * @see rte_flow_ops
7791  */
7792 int
7793 mlx5_flow_flush(struct rte_eth_dev *dev,
7794 		struct rte_flow_error *error __rte_unused)
7795 {
7796 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7797 	return 0;
7798 }
7799 
7800 /**
7801  * Isolated mode.
7802  *
7803  * @see rte_flow_isolate()
7804  * @see rte_flow_ops
7805  */
7806 int
7807 mlx5_flow_isolate(struct rte_eth_dev *dev,
7808 		  int enable,
7809 		  struct rte_flow_error *error)
7810 {
7811 	struct mlx5_priv *priv = dev->data->dev_private;
7812 
7813 	if (dev->data->dev_started) {
7814 		rte_flow_error_set(error, EBUSY,
7815 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7816 				   NULL,
7817 				   "port must be stopped first");
7818 		return -rte_errno;
7819 	}
7820 	priv->isolated = !!enable;
7821 	if (enable)
7822 		dev->dev_ops = &mlx5_dev_ops_isolate;
7823 	else
7824 		dev->dev_ops = &mlx5_dev_ops;
7825 
7826 	dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7827 	dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7828 
7829 	return 0;
7830 }
7831 
7832 /**
7833  * Query a flow.
7834  *
7835  * @see rte_flow_query()
7836  * @see rte_flow_ops
7837  */
7838 static int
7839 flow_drv_query(struct rte_eth_dev *dev,
7840 	       struct rte_flow *eflow,
7841 	       const struct rte_flow_action *actions,
7842 	       void *data,
7843 	       struct rte_flow_error *error)
7844 {
7845 	struct mlx5_priv *priv = dev->data->dev_private;
7846 	const struct mlx5_flow_driver_ops *fops;
7847 	struct rte_flow *flow = NULL;
7848 	enum mlx5_flow_drv_type ftype = MLX5_FLOW_TYPE_MIN;
7849 
7850 	if (priv->sh->config.dv_flow_en == 2) {
7851 #ifdef HAVE_MLX5_HWS_SUPPORT
7852 		flow = eflow;
7853 		ftype = MLX5_FLOW_TYPE_HW;
7854 #endif
7855 	} else {
7856 		flow = (struct rte_flow *)mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7857 				(uintptr_t)(void *)eflow);
7858 	}
7859 	if (!flow) {
7860 		return rte_flow_error_set(error, ENOENT,
7861 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7862 			  NULL,
7863 			  "invalid flow handle");
7864 	}
7865 	if (ftype == MLX5_FLOW_TYPE_MIN)
7866 		ftype = flow->drv_type;
7867 	MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7868 	fops = flow_get_drv_ops(ftype);
7869 
7870 	return fops->query(dev, flow, actions, data, error);
7871 }
7872 
7873 /**
7874  * Query a flow.
7875  *
7876  * @see rte_flow_query()
7877  * @see rte_flow_ops
7878  */
7879 int
7880 mlx5_flow_query(struct rte_eth_dev *dev,
7881 		struct rte_flow *flow,
7882 		const struct rte_flow_action *actions,
7883 		void *data,
7884 		struct rte_flow_error *error)
7885 {
7886 	int ret;
7887 
7888 	ret = flow_drv_query(dev, flow, actions, data,
7889 			     error);
7890 	if (ret < 0)
7891 		return ret;
7892 	return 0;
7893 }
7894 
7895 /**
7896  * Get rte_flow callbacks.
7897  *
7898  * @param dev
7899  *   Pointer to Ethernet device structure.
7900  * @param ops
7901  *   Pointer to operation-specific structure.
7902  *
7903  * @return 0
7904  */
7905 int
7906 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7907 		  const struct rte_flow_ops **ops)
7908 {
7909 	*ops = &mlx5_flow_ops;
7910 	return 0;
7911 }
7912 
7913 /**
7914  * Validate meter policy actions.
7915  * Dispatcher for action type specific validation.
7916  *
7917  * @param[in] dev
7918  *   Pointer to the Ethernet device structure.
7919  * @param[in] action
7920  *   The meter policy action object to validate.
7921  * @param[in] attr
7922  *   Attributes of flow to determine steering domain.
7923  * @param[out] is_rss
7924  *   Is RSS or not.
7925  * @param[out] domain_bitmap
7926  *   Domain bitmap.
7927  * @param[out] is_def_policy
7928  *   Is default policy or not.
7929  * @param[out] error
7930  *   Perform verbose error reporting if not NULL. Initialized in case of
7931  *   error only.
7932  *
7933  * @return
7934  *   0 on success, otherwise negative errno value.
7935  */
7936 int
7937 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7938 			const struct rte_flow_action *actions[RTE_COLORS],
7939 			struct rte_flow_attr *attr,
7940 			bool *is_rss,
7941 			uint8_t *domain_bitmap,
7942 			uint8_t *policy_mode,
7943 			struct rte_mtr_error *error)
7944 {
7945 	const struct mlx5_flow_driver_ops *fops;
7946 
7947 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7948 	return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7949 				       domain_bitmap, policy_mode, error);
7950 }
7951 
7952 /**
7953  * Destroy the meter table set.
7954  *
7955  * @param[in] dev
7956  *   Pointer to Ethernet device.
7957  * @param[in] mtr_policy
7958  *   Meter policy struct.
7959  */
7960 void
7961 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7962 		      struct mlx5_flow_meter_policy *mtr_policy)
7963 {
7964 	const struct mlx5_flow_driver_ops *fops;
7965 
7966 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7967 	fops->destroy_mtr_acts(dev, mtr_policy);
7968 }
7969 
7970 /**
7971  * Create policy action, lock free,
7972  * (mutex should be acquired by caller).
7973  * Dispatcher for action type specific call.
7974  *
7975  * @param[in] dev
7976  *   Pointer to the Ethernet device structure.
7977  * @param[in] mtr_policy
7978  *   Meter policy struct.
7979  * @param[in] action
7980  *   Action specification used to create meter actions.
7981  * @param[in] attr
7982  *   Flow rule attributes.
7983  * @param[out] error
7984  *   Perform verbose error reporting if not NULL. Initialized in case of
7985  *   error only.
7986  *
7987  * @return
7988  *   0 on success, otherwise negative errno value.
7989  */
7990 int
7991 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7992 		      struct mlx5_flow_meter_policy *mtr_policy,
7993 		      const struct rte_flow_action *actions[RTE_COLORS],
7994 		      struct rte_flow_attr *attr,
7995 		      struct rte_mtr_error *error)
7996 {
7997 	const struct mlx5_flow_driver_ops *fops;
7998 
7999 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8000 	return fops->create_mtr_acts(dev, mtr_policy, actions, attr, error);
8001 }
8002 
8003 /**
8004  * Create policy rules, lock free,
8005  * (mutex should be acquired by caller).
8006  * Dispatcher for action type specific call.
8007  *
8008  * @param[in] dev
8009  *   Pointer to the Ethernet device structure.
8010  * @param[in] mtr_policy
8011  *   Meter policy struct.
8012  *
8013  * @return
8014  *   0 on success, -1 otherwise.
8015  */
8016 int
8017 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
8018 			     struct mlx5_flow_meter_policy *mtr_policy)
8019 {
8020 	const struct mlx5_flow_driver_ops *fops;
8021 
8022 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8023 	return fops->create_policy_rules(dev, mtr_policy);
8024 }
8025 
8026 /**
8027  * Destroy policy rules, lock free,
8028  * (mutex should be acquired by caller).
8029  * Dispatcher for action type specific call.
8030  *
8031  * @param[in] dev
8032  *   Pointer to the Ethernet device structure.
8033  * @param[in] mtr_policy
8034  *   Meter policy struct.
8035  */
8036 void
8037 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
8038 			     struct mlx5_flow_meter_policy *mtr_policy)
8039 {
8040 	const struct mlx5_flow_driver_ops *fops;
8041 
8042 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8043 	fops->destroy_policy_rules(dev, mtr_policy);
8044 }
8045 
8046 /**
8047  * Destroy the default policy table set.
8048  *
8049  * @param[in] dev
8050  *   Pointer to Ethernet device.
8051  */
8052 void
8053 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
8054 {
8055 	const struct mlx5_flow_driver_ops *fops;
8056 
8057 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8058 	fops->destroy_def_policy(dev);
8059 }
8060 
8061 /**
8062  * Destroy the default policy table set.
8063  *
8064  * @param[in] dev
8065  *   Pointer to Ethernet device.
8066  *
8067  * @return
8068  *   0 on success, -1 otherwise.
8069  */
8070 int
8071 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
8072 {
8073 	const struct mlx5_flow_driver_ops *fops;
8074 
8075 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8076 	return fops->create_def_policy(dev);
8077 }
8078 
8079 /**
8080  * Create the needed meter and suffix tables.
8081  *
8082  * @param[in] dev
8083  *   Pointer to Ethernet device.
8084  *
8085  * @return
8086  *   0 on success, -1 otherwise.
8087  */
8088 int
8089 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
8090 			struct mlx5_flow_meter_info *fm,
8091 			uint32_t mtr_idx,
8092 			uint8_t domain_bitmap)
8093 {
8094 	const struct mlx5_flow_driver_ops *fops;
8095 
8096 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8097 	return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
8098 }
8099 
8100 /**
8101  * Destroy the meter table set.
8102  *
8103  * @param[in] dev
8104  *   Pointer to Ethernet device.
8105  * @param[in] tbl
8106  *   Pointer to the meter table set.
8107  */
8108 void
8109 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
8110 			   struct mlx5_flow_meter_info *fm)
8111 {
8112 	const struct mlx5_flow_driver_ops *fops;
8113 
8114 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8115 	fops->destroy_mtr_tbls(dev, fm);
8116 }
8117 
8118 /**
8119  * Destroy the global meter drop table.
8120  *
8121  * @param[in] dev
8122  *   Pointer to Ethernet device.
8123  */
8124 void
8125 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
8126 {
8127 	const struct mlx5_flow_driver_ops *fops;
8128 
8129 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8130 	fops->destroy_mtr_drop_tbls(dev);
8131 }
8132 
8133 /**
8134  * Destroy the sub policy table with RX queue.
8135  *
8136  * @param[in] dev
8137  *   Pointer to Ethernet device.
8138  * @param[in] mtr_policy
8139  *   Pointer to meter policy table.
8140  */
8141 void
8142 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
8143 		struct mlx5_flow_meter_policy *mtr_policy)
8144 {
8145 	const struct mlx5_flow_driver_ops *fops;
8146 
8147 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8148 	fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
8149 }
8150 
8151 /**
8152  * Allocate the needed aso flow meter id.
8153  *
8154  * @param[in] dev
8155  *   Pointer to Ethernet device.
8156  *
8157  * @return
8158  *   Index to aso flow meter on success, NULL otherwise.
8159  */
8160 uint32_t
8161 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
8162 {
8163 	const struct mlx5_flow_driver_ops *fops;
8164 
8165 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8166 	return fops->create_meter(dev);
8167 }
8168 
8169 /**
8170  * Free the aso flow meter id.
8171  *
8172  * @param[in] dev
8173  *   Pointer to Ethernet device.
8174  * @param[in] mtr_idx
8175  *  Index to aso flow meter to be free.
8176  *
8177  * @return
8178  *   0 on success.
8179  */
8180 void
8181 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
8182 {
8183 	const struct mlx5_flow_driver_ops *fops;
8184 
8185 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8186 	fops->free_meter(dev, mtr_idx);
8187 }
8188 
8189 /**
8190  * Allocate a counter.
8191  *
8192  * @param[in] dev
8193  *   Pointer to Ethernet device structure.
8194  *
8195  * @return
8196  *   Index to allocated counter  on success, 0 otherwise.
8197  */
8198 uint32_t
8199 mlx5_counter_alloc(struct rte_eth_dev *dev)
8200 {
8201 	const struct mlx5_flow_driver_ops *fops;
8202 	struct rte_flow_attr attr = { .transfer = 0 };
8203 
8204 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8205 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8206 		return fops->counter_alloc(dev);
8207 	}
8208 	DRV_LOG(ERR,
8209 		"port %u counter allocate is not supported.",
8210 		 dev->data->port_id);
8211 	return 0;
8212 }
8213 
8214 /**
8215  * Free a counter.
8216  *
8217  * @param[in] dev
8218  *   Pointer to Ethernet device structure.
8219  * @param[in] cnt
8220  *   Index to counter to be free.
8221  */
8222 void
8223 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
8224 {
8225 	const struct mlx5_flow_driver_ops *fops;
8226 	struct rte_flow_attr attr = { .transfer = 0 };
8227 
8228 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8229 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8230 		fops->counter_free(dev, cnt);
8231 		return;
8232 	}
8233 	DRV_LOG(ERR,
8234 		"port %u counter free is not supported.",
8235 		 dev->data->port_id);
8236 }
8237 
8238 /**
8239  * Query counter statistics.
8240  *
8241  * @param[in] dev
8242  *   Pointer to Ethernet device structure.
8243  * @param[in] cnt
8244  *   Index to counter to query.
8245  * @param[in] clear
8246  *   Set to clear counter statistics.
8247  * @param[out] pkts
8248  *   The counter hits packets number to save.
8249  * @param[out] bytes
8250  *   The counter hits bytes number to save.
8251  *
8252  * @return
8253  *   0 on success, a negative errno value otherwise.
8254  */
8255 int
8256 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
8257 		   bool clear, uint64_t *pkts, uint64_t *bytes, void **action)
8258 {
8259 	const struct mlx5_flow_driver_ops *fops;
8260 	struct rte_flow_attr attr = { .transfer = 0 };
8261 
8262 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8263 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8264 		return fops->counter_query(dev, cnt, clear, pkts,
8265 					bytes, action);
8266 	}
8267 	DRV_LOG(ERR,
8268 		"port %u counter query is not supported.",
8269 		 dev->data->port_id);
8270 	return -ENOTSUP;
8271 }
8272 
8273 /**
8274  * Get information about HWS pre-configurable resources.
8275  *
8276  * @param[in] dev
8277  *   Pointer to the rte_eth_dev structure.
8278  * @param[out] port_info
8279  *   Pointer to port information.
8280  * @param[out] queue_info
8281  *   Pointer to queue information.
8282  * @param[out] error
8283  *   Pointer to error structure.
8284  *
8285  * @return
8286  *   0 on success, a negative errno value otherwise and rte_errno is set.
8287  */
8288 static int
8289 mlx5_flow_info_get(struct rte_eth_dev *dev,
8290 		   struct rte_flow_port_info *port_info,
8291 		   struct rte_flow_queue_info *queue_info,
8292 		   struct rte_flow_error *error)
8293 {
8294 	const struct mlx5_flow_driver_ops *fops;
8295 	struct rte_flow_attr attr = {0};
8296 
8297 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8298 		return rte_flow_error_set(error, ENOTSUP,
8299 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8300 				NULL,
8301 				"info get with incorrect steering mode");
8302 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8303 	return fops->info_get(dev, port_info, queue_info, error);
8304 }
8305 
8306 /**
8307  * Configure port HWS resources.
8308  *
8309  * @param[in] dev
8310  *   Pointer to the rte_eth_dev structure.
8311  * @param[in] port_attr
8312  *   Port configuration attributes.
8313  * @param[in] nb_queue
8314  *   Number of queue.
8315  * @param[in] queue_attr
8316  *   Array that holds attributes for each flow queue.
8317  * @param[out] error
8318  *   Pointer to error structure.
8319  *
8320  * @return
8321  *   0 on success, a negative errno value otherwise and rte_errno is set.
8322  */
8323 static int
8324 mlx5_flow_port_configure(struct rte_eth_dev *dev,
8325 			 const struct rte_flow_port_attr *port_attr,
8326 			 uint16_t nb_queue,
8327 			 const struct rte_flow_queue_attr *queue_attr[],
8328 			 struct rte_flow_error *error)
8329 {
8330 	const struct mlx5_flow_driver_ops *fops;
8331 	struct rte_flow_attr attr = {0};
8332 
8333 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8334 		return rte_flow_error_set(error, ENOTSUP,
8335 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8336 				NULL,
8337 				"port configure with incorrect steering mode");
8338 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8339 	return fops->configure(dev, port_attr, nb_queue, queue_attr, error);
8340 }
8341 
8342 /**
8343  * Validate item template.
8344  *
8345  * @param[in] dev
8346  *   Pointer to the rte_eth_dev structure.
8347  * @param[in] attr
8348  *   Pointer to the item template attributes.
8349  * @param[in] items
8350  *   The template item pattern.
8351  * @param[out] error
8352  *   Pointer to error structure.
8353  *
8354  * @return
8355  *   0 on success, a negative errno value otherwise and rte_errno is set.
8356  */
8357 int
8358 mlx5_flow_pattern_validate(struct rte_eth_dev *dev,
8359 		const struct rte_flow_pattern_template_attr *attr,
8360 		const struct rte_flow_item items[],
8361 		struct rte_flow_error *error)
8362 {
8363 	const struct mlx5_flow_driver_ops *fops;
8364 	struct rte_flow_attr fattr = {0};
8365 
8366 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
8367 		rte_flow_error_set(error, ENOTSUP,
8368 			RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8369 			"pattern validate with incorrect steering mode");
8370 		return -ENOTSUP;
8371 	}
8372 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8373 	return fops->pattern_validate(dev, attr, items, error);
8374 }
8375 
8376 /**
8377  * Create flow item template.
8378  *
8379  * @param[in] dev
8380  *   Pointer to the rte_eth_dev structure.
8381  * @param[in] attr
8382  *   Pointer to the item template attributes.
8383  * @param[in] items
8384  *   The template item pattern.
8385  * @param[out] error
8386  *   Pointer to error structure.
8387  *
8388  * @return
8389  *   0 on success, a negative errno value otherwise and rte_errno is set.
8390  */
8391 static struct rte_flow_pattern_template *
8392 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
8393 		const struct rte_flow_pattern_template_attr *attr,
8394 		const struct rte_flow_item items[],
8395 		struct rte_flow_error *error)
8396 {
8397 	const struct mlx5_flow_driver_ops *fops;
8398 	struct rte_flow_attr fattr = {0};
8399 
8400 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
8401 		rte_flow_error_set(error, ENOTSUP,
8402 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8403 				NULL,
8404 				"pattern create with incorrect steering mode");
8405 		return NULL;
8406 	}
8407 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8408 	return fops->pattern_template_create(dev, attr, items, error);
8409 }
8410 
8411 /**
8412  * Destroy flow item template.
8413  *
8414  * @param[in] dev
8415  *   Pointer to the rte_eth_dev structure.
8416  * @param[in] template
8417  *   Pointer to the item template to be destroyed.
8418  * @param[out] error
8419  *   Pointer to error structure.
8420  *
8421  * @return
8422  *   0 on success, a negative errno value otherwise and rte_errno is set.
8423  */
8424 static int
8425 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
8426 				   struct rte_flow_pattern_template *template,
8427 				   struct rte_flow_error *error)
8428 {
8429 	const struct mlx5_flow_driver_ops *fops;
8430 	struct rte_flow_attr attr = {0};
8431 
8432 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8433 		return rte_flow_error_set(error, ENOTSUP,
8434 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8435 				NULL,
8436 				"pattern destroy with incorrect steering mode");
8437 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8438 	return fops->pattern_template_destroy(dev, template, error);
8439 }
8440 
8441 /**
8442  * Validate flow actions template.
8443  *
8444  * @param[in] dev
8445  *   Pointer to the rte_eth_dev structure.
8446  * @param[in] attr
8447  *   Pointer to the action template attributes.
8448  * @param[in] actions
8449  *   Associated actions (list terminated by the END action).
8450  * @param[in] masks
8451  *   List of actions that marks which of the action's member is constant.
8452  * @param[out] error
8453  *   Pointer to error structure.
8454  *
8455  * @return
8456  *   0 on success, a negative errno value otherwise and rte_errno is set.
8457  */
8458 int
8459 mlx5_flow_actions_validate(struct rte_eth_dev *dev,
8460 			const struct rte_flow_actions_template_attr *attr,
8461 			const struct rte_flow_action actions[],
8462 			const struct rte_flow_action masks[],
8463 			struct rte_flow_error *error)
8464 {
8465 	const struct mlx5_flow_driver_ops *fops;
8466 	struct rte_flow_attr fattr = {0};
8467 
8468 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
8469 		rte_flow_error_set(error, ENOTSUP,
8470 			RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8471 			"actions validate with incorrect steering mode");
8472 		return -ENOTSUP;
8473 	}
8474 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8475 	return fops->actions_validate(dev, attr, actions, masks, error);
8476 }
8477 
8478 /**
8479  * Create flow item template.
8480  *
8481  * @param[in] dev
8482  *   Pointer to the rte_eth_dev structure.
8483  * @param[in] attr
8484  *   Pointer to the action template attributes.
8485  * @param[in] actions
8486  *   Associated actions (list terminated by the END action).
8487  * @param[in] masks
8488  *   List of actions that marks which of the action's member is constant.
8489  * @param[out] error
8490  *   Pointer to error structure.
8491  *
8492  * @return
8493  *   0 on success, a negative errno value otherwise and rte_errno is set.
8494  */
8495 static struct rte_flow_actions_template *
8496 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
8497 			const struct rte_flow_actions_template_attr *attr,
8498 			const struct rte_flow_action actions[],
8499 			const struct rte_flow_action masks[],
8500 			struct rte_flow_error *error)
8501 {
8502 	const struct mlx5_flow_driver_ops *fops;
8503 	struct rte_flow_attr fattr = {0};
8504 
8505 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
8506 		rte_flow_error_set(error, ENOTSUP,
8507 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8508 				NULL,
8509 				"action create with incorrect steering mode");
8510 		return NULL;
8511 	}
8512 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8513 	return fops->actions_template_create(dev, attr, actions, masks, error);
8514 }
8515 
8516 /**
8517  * Destroy flow action template.
8518  *
8519  * @param[in] dev
8520  *   Pointer to the rte_eth_dev structure.
8521  * @param[in] template
8522  *   Pointer to the action template to be destroyed.
8523  * @param[out] error
8524  *   Pointer to error structure.
8525  *
8526  * @return
8527  *   0 on success, a negative errno value otherwise and rte_errno is set.
8528  */
8529 static int
8530 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
8531 				   struct rte_flow_actions_template *template,
8532 				   struct rte_flow_error *error)
8533 {
8534 	const struct mlx5_flow_driver_ops *fops;
8535 	struct rte_flow_attr attr = {0};
8536 
8537 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8538 		return rte_flow_error_set(error, ENOTSUP,
8539 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8540 				NULL,
8541 				"action destroy with incorrect steering mode");
8542 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8543 	return fops->actions_template_destroy(dev, template, error);
8544 }
8545 
8546 /**
8547  * Create flow table.
8548  *
8549  * @param[in] dev
8550  *   Pointer to the rte_eth_dev structure.
8551  * @param[in] attr
8552  *   Pointer to the table attributes.
8553  * @param[in] item_templates
8554  *   Item template array to be binded to the table.
8555  * @param[in] nb_item_templates
8556  *   Number of item template.
8557  * @param[in] action_templates
8558  *   Action template array to be binded to the table.
8559  * @param[in] nb_action_templates
8560  *   Number of action template.
8561  * @param[out] error
8562  *   Pointer to error structure.
8563  *
8564  * @return
8565  *    Table on success, NULL otherwise and rte_errno is set.
8566  */
8567 static struct rte_flow_template_table *
8568 mlx5_flow_table_create(struct rte_eth_dev *dev,
8569 		       const struct rte_flow_template_table_attr *attr,
8570 		       struct rte_flow_pattern_template *item_templates[],
8571 		       uint8_t nb_item_templates,
8572 		       struct rte_flow_actions_template *action_templates[],
8573 		       uint8_t nb_action_templates,
8574 		       struct rte_flow_error *error)
8575 {
8576 	const struct mlx5_flow_driver_ops *fops;
8577 	struct rte_flow_attr fattr = {0};
8578 
8579 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
8580 		rte_flow_error_set(error, ENOTSUP,
8581 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8582 				NULL,
8583 				"table create with incorrect steering mode");
8584 		return NULL;
8585 	}
8586 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8587 	return fops->template_table_create(dev,
8588 					   attr,
8589 					   item_templates,
8590 					   nb_item_templates,
8591 					   action_templates,
8592 					   nb_action_templates,
8593 					   error);
8594 }
8595 
8596 /**
8597  * PMD destroy flow table.
8598  *
8599  * @param[in] dev
8600  *   Pointer to the rte_eth_dev structure.
8601  * @param[in] table
8602  *   Pointer to the table to be destroyed.
8603  * @param[out] error
8604  *   Pointer to error structure.
8605  *
8606  * @return
8607  *   0 on success, a negative errno value otherwise and rte_errno is set.
8608  */
8609 static int
8610 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
8611 			struct rte_flow_template_table *table,
8612 			struct rte_flow_error *error)
8613 {
8614 	const struct mlx5_flow_driver_ops *fops;
8615 	struct rte_flow_attr attr = {0};
8616 
8617 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8618 		return rte_flow_error_set(error, ENOTSUP,
8619 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8620 				NULL,
8621 				"table destroy with incorrect steering mode");
8622 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8623 	return fops->template_table_destroy(dev, table, error);
8624 }
8625 
8626 /**
8627  * Enqueue flow creation.
8628  *
8629  * @param[in] dev
8630  *   Pointer to the rte_eth_dev structure.
8631  * @param[in] queue_id
8632  *   The queue to create the flow.
8633  * @param[in] attr
8634  *   Pointer to the flow operation attributes.
8635  * @param[in] items
8636  *   Items with flow spec value.
8637  * @param[in] pattern_template_index
8638  *   The item pattern flow follows from the table.
8639  * @param[in] actions
8640  *   Action with flow spec value.
8641  * @param[in] action_template_index
8642  *   The action pattern flow follows from the table.
8643  * @param[in] user_data
8644  *   Pointer to the user_data.
8645  * @param[out] error
8646  *   Pointer to error structure.
8647  *
8648  * @return
8649  *    Flow pointer on success, NULL otherwise and rte_errno is set.
8650  */
8651 static struct rte_flow *
8652 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
8653 			    uint32_t queue_id,
8654 			    const struct rte_flow_op_attr *attr,
8655 			    struct rte_flow_template_table *table,
8656 			    const struct rte_flow_item items[],
8657 			    uint8_t pattern_template_index,
8658 			    const struct rte_flow_action actions[],
8659 			    uint8_t action_template_index,
8660 			    void *user_data,
8661 			    struct rte_flow_error *error)
8662 {
8663 	const struct mlx5_flow_driver_ops *fops;
8664 	struct rte_flow_attr fattr = {0};
8665 
8666 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
8667 		rte_flow_error_set(error, ENOTSUP,
8668 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8669 				NULL,
8670 				"flow_q create with incorrect steering mode");
8671 		return NULL;
8672 	}
8673 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8674 	return fops->async_flow_create(dev, queue_id, attr, table,
8675 				       items, pattern_template_index,
8676 				       actions, action_template_index,
8677 				       user_data, error);
8678 }
8679 
8680 /**
8681  * Enqueue flow destruction.
8682  *
8683  * @param[in] dev
8684  *   Pointer to the rte_eth_dev structure.
8685  * @param[in] queue
8686  *   The queue to destroy the flow.
8687  * @param[in] attr
8688  *   Pointer to the flow operation attributes.
8689  * @param[in] flow
8690  *   Pointer to the flow to be destroyed.
8691  * @param[in] user_data
8692  *   Pointer to the user_data.
8693  * @param[out] error
8694  *   Pointer to error structure.
8695  *
8696  * @return
8697  *    0 on success, negative value otherwise and rte_errno is set.
8698  */
8699 static int
8700 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
8701 			     uint32_t queue,
8702 			     const struct rte_flow_op_attr *attr,
8703 			     struct rte_flow *flow,
8704 			     void *user_data,
8705 			     struct rte_flow_error *error)
8706 {
8707 	const struct mlx5_flow_driver_ops *fops;
8708 	struct rte_flow_attr fattr = {0};
8709 
8710 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW)
8711 		return rte_flow_error_set(error, ENOTSUP,
8712 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8713 				NULL,
8714 				"flow_q destroy with incorrect steering mode");
8715 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8716 	return fops->async_flow_destroy(dev, queue, attr, flow,
8717 					user_data, error);
8718 }
8719 
8720 /**
8721  * Pull the enqueued flows.
8722  *
8723  * @param[in] dev
8724  *   Pointer to the rte_eth_dev structure.
8725  * @param[in] queue
8726  *   The queue to pull the result.
8727  * @param[in/out] res
8728  *   Array to save the results.
8729  * @param[in] n_res
8730  *   Available result with the array.
8731  * @param[out] error
8732  *   Pointer to error structure.
8733  *
8734  * @return
8735  *    Result number on success, negative value otherwise and rte_errno is set.
8736  */
8737 static int
8738 mlx5_flow_pull(struct rte_eth_dev *dev,
8739 	       uint32_t queue,
8740 	       struct rte_flow_op_result res[],
8741 	       uint16_t n_res,
8742 	       struct rte_flow_error *error)
8743 {
8744 	const struct mlx5_flow_driver_ops *fops;
8745 	struct rte_flow_attr attr = {0};
8746 
8747 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8748 		return rte_flow_error_set(error, ENOTSUP,
8749 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8750 				NULL,
8751 				"flow_q pull with incorrect steering mode");
8752 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8753 	return fops->pull(dev, queue, res, n_res, error);
8754 }
8755 
8756 /**
8757  * Push the enqueued flows.
8758  *
8759  * @param[in] dev
8760  *   Pointer to the rte_eth_dev structure.
8761  * @param[in] queue
8762  *   The queue to push the flows.
8763  * @param[out] error
8764  *   Pointer to error structure.
8765  *
8766  * @return
8767  *    0 on success, negative value otherwise and rte_errno is set.
8768  */
8769 static int
8770 mlx5_flow_push(struct rte_eth_dev *dev,
8771 	       uint32_t queue,
8772 	       struct rte_flow_error *error)
8773 {
8774 	const struct mlx5_flow_driver_ops *fops;
8775 	struct rte_flow_attr attr = {0};
8776 
8777 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8778 		return rte_flow_error_set(error, ENOTSUP,
8779 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8780 				NULL,
8781 				"flow_q push with incorrect steering mode");
8782 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8783 	return fops->push(dev, queue, error);
8784 }
8785 
8786 /**
8787  * Create shared action.
8788  *
8789  * @param[in] dev
8790  *   Pointer to the rte_eth_dev structure.
8791  * @param[in] queue
8792  *   Which queue to be used..
8793  * @param[in] attr
8794  *   Operation attribute.
8795  * @param[in] conf
8796  *   Indirect action configuration.
8797  * @param[in] action
8798  *   rte_flow action detail.
8799  * @param[in] user_data
8800  *   Pointer to the user_data.
8801  * @param[out] error
8802  *   Pointer to error structure.
8803  *
8804  * @return
8805  *   Action handle on success, NULL otherwise and rte_errno is set.
8806  */
8807 static struct rte_flow_action_handle *
8808 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
8809 				 const struct rte_flow_op_attr *attr,
8810 				 const struct rte_flow_indir_action_conf *conf,
8811 				 const struct rte_flow_action *action,
8812 				 void *user_data,
8813 				 struct rte_flow_error *error)
8814 {
8815 	const struct mlx5_flow_driver_ops *fops =
8816 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8817 
8818 	return fops->async_action_create(dev, queue, attr, conf, action,
8819 					 user_data, error);
8820 }
8821 
8822 /**
8823  * Update shared action.
8824  *
8825  * @param[in] dev
8826  *   Pointer to the rte_eth_dev structure.
8827  * @param[in] queue
8828  *   Which queue to be used..
8829  * @param[in] attr
8830  *   Operation attribute.
8831  * @param[in] handle
8832  *   Action handle to be updated.
8833  * @param[in] update
8834  *   Update value.
8835  * @param[in] user_data
8836  *   Pointer to the user_data.
8837  * @param[out] error
8838  *   Pointer to error structure.
8839  *
8840  * @return
8841  *   0 on success, negative value otherwise and rte_errno is set.
8842  */
8843 static int
8844 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
8845 				     const struct rte_flow_op_attr *attr,
8846 				     struct rte_flow_action_handle *handle,
8847 				     const void *update,
8848 				     void *user_data,
8849 				     struct rte_flow_error *error)
8850 {
8851 	const struct mlx5_flow_driver_ops *fops =
8852 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8853 
8854 	return fops->async_action_update(dev, queue, attr, handle,
8855 					 update, user_data, error);
8856 }
8857 
8858 /**
8859  * Destroy shared action.
8860  *
8861  * @param[in] dev
8862  *   Pointer to the rte_eth_dev structure.
8863  * @param[in] queue
8864  *   Which queue to be used..
8865  * @param[in] attr
8866  *   Operation attribute.
8867  * @param[in] handle
8868  *   Action handle to be destroyed.
8869  * @param[in] user_data
8870  *   Pointer to the user_data.
8871  * @param[out] error
8872  *   Pointer to error structure.
8873  *
8874  * @return
8875  *   0 on success, negative value otherwise and rte_errno is set.
8876  */
8877 static int
8878 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
8879 				      const struct rte_flow_op_attr *attr,
8880 				      struct rte_flow_action_handle *handle,
8881 				      void *user_data,
8882 				      struct rte_flow_error *error)
8883 {
8884 	const struct mlx5_flow_driver_ops *fops =
8885 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8886 
8887 	return fops->async_action_destroy(dev, queue, attr, handle,
8888 					  user_data, error);
8889 }
8890 
8891 /**
8892  * Allocate a new memory for the counter values wrapped by all the needed
8893  * management.
8894  *
8895  * @param[in] sh
8896  *   Pointer to mlx5_dev_ctx_shared object.
8897  *
8898  * @return
8899  *   0 on success, a negative errno value otherwise.
8900  */
8901 static int
8902 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
8903 {
8904 	struct mlx5_counter_stats_mem_mng *mem_mng;
8905 	volatile struct flow_counter_stats *raw_data;
8906 	int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
8907 	int size = (sizeof(struct flow_counter_stats) *
8908 			MLX5_COUNTERS_PER_POOL +
8909 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
8910 			sizeof(struct mlx5_counter_stats_mem_mng);
8911 	size_t pgsize = rte_mem_page_size();
8912 	uint8_t *mem;
8913 	int ret;
8914 	int i;
8915 
8916 	if (pgsize == (size_t)-1) {
8917 		DRV_LOG(ERR, "Failed to get mem page size");
8918 		rte_errno = ENOMEM;
8919 		return -ENOMEM;
8920 	}
8921 	mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
8922 	if (!mem) {
8923 		rte_errno = ENOMEM;
8924 		return -ENOMEM;
8925 	}
8926 	mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
8927 	size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
8928 	ret = mlx5_os_wrapped_mkey_create(sh->cdev->ctx, sh->cdev->pd,
8929 					  sh->cdev->pdn, mem, size,
8930 					  &mem_mng->wm);
8931 	if (ret) {
8932 		rte_errno = errno;
8933 		mlx5_free(mem);
8934 		return -rte_errno;
8935 	}
8936 	mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
8937 	raw_data = (volatile struct flow_counter_stats *)mem;
8938 	for (i = 0; i < raws_n; ++i) {
8939 		mem_mng->raws[i].mem_mng = mem_mng;
8940 		mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
8941 	}
8942 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
8943 		LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
8944 				 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
8945 				 next);
8946 	LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
8947 	sh->cmng.mem_mng = mem_mng;
8948 	return 0;
8949 }
8950 
8951 /**
8952  * Set the statistic memory to the new counter pool.
8953  *
8954  * @param[in] sh
8955  *   Pointer to mlx5_dev_ctx_shared object.
8956  * @param[in] pool
8957  *   Pointer to the pool to set the statistic memory.
8958  *
8959  * @return
8960  *   0 on success, a negative errno value otherwise.
8961  */
8962 static int
8963 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
8964 			       struct mlx5_flow_counter_pool *pool)
8965 {
8966 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8967 	/* Resize statistic memory once used out. */
8968 	if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
8969 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
8970 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
8971 		return -1;
8972 	}
8973 	rte_spinlock_lock(&pool->sl);
8974 	pool->raw = cmng->mem_mng->raws + pool->index %
8975 		    MLX5_CNT_CONTAINER_RESIZE;
8976 	rte_spinlock_unlock(&pool->sl);
8977 	pool->raw_hw = NULL;
8978 	return 0;
8979 }
8980 
8981 #define MLX5_POOL_QUERY_FREQ_US 1000000
8982 
8983 /**
8984  * Set the periodic procedure for triggering asynchronous batch queries for all
8985  * the counter pools.
8986  *
8987  * @param[in] sh
8988  *   Pointer to mlx5_dev_ctx_shared object.
8989  */
8990 void
8991 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
8992 {
8993 	uint32_t pools_n, us;
8994 
8995 	pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
8996 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
8997 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
8998 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
8999 		sh->cmng.query_thread_on = 0;
9000 		DRV_LOG(ERR, "Cannot reinitialize query alarm");
9001 	} else {
9002 		sh->cmng.query_thread_on = 1;
9003 	}
9004 }
9005 
9006 /**
9007  * The periodic procedure for triggering asynchronous batch queries for all the
9008  * counter pools. This function is probably called by the host thread.
9009  *
9010  * @param[in] arg
9011  *   The parameter for the alarm process.
9012  */
9013 void
9014 mlx5_flow_query_alarm(void *arg)
9015 {
9016 	struct mlx5_dev_ctx_shared *sh = arg;
9017 	int ret;
9018 	uint16_t pool_index = sh->cmng.pool_index;
9019 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
9020 	struct mlx5_flow_counter_pool *pool;
9021 	uint16_t n_valid;
9022 
9023 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
9024 		goto set_alarm;
9025 	rte_spinlock_lock(&cmng->pool_update_sl);
9026 	pool = cmng->pools[pool_index];
9027 	n_valid = cmng->n_valid;
9028 	rte_spinlock_unlock(&cmng->pool_update_sl);
9029 	/* Set the statistic memory to the new created pool. */
9030 	if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
9031 		goto set_alarm;
9032 	if (pool->raw_hw)
9033 		/* There is a pool query in progress. */
9034 		goto set_alarm;
9035 	pool->raw_hw =
9036 		LIST_FIRST(&sh->cmng.free_stat_raws);
9037 	if (!pool->raw_hw)
9038 		/* No free counter statistics raw memory. */
9039 		goto set_alarm;
9040 	/*
9041 	 * Identify the counters released between query trigger and query
9042 	 * handle more efficiently. The counter released in this gap period
9043 	 * should wait for a new round of query as the new arrived packets
9044 	 * will not be taken into account.
9045 	 */
9046 	pool->query_gen++;
9047 	ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
9048 					       MLX5_COUNTERS_PER_POOL,
9049 					       NULL, NULL,
9050 					       pool->raw_hw->mem_mng->wm.lkey,
9051 					       (void *)(uintptr_t)
9052 					       pool->raw_hw->data,
9053 					       sh->devx_comp,
9054 					       (uint64_t)(uintptr_t)pool);
9055 	if (ret) {
9056 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
9057 			" %d", pool->min_dcs->id);
9058 		pool->raw_hw = NULL;
9059 		goto set_alarm;
9060 	}
9061 	LIST_REMOVE(pool->raw_hw, next);
9062 	sh->cmng.pending_queries++;
9063 	pool_index++;
9064 	if (pool_index >= n_valid)
9065 		pool_index = 0;
9066 set_alarm:
9067 	sh->cmng.pool_index = pool_index;
9068 	mlx5_set_query_alarm(sh);
9069 }
9070 
9071 /**
9072  * Check and callback event for new aged flow in the counter pool
9073  *
9074  * @param[in] sh
9075  *   Pointer to mlx5_dev_ctx_shared object.
9076  * @param[in] pool
9077  *   Pointer to Current counter pool.
9078  */
9079 static void
9080 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
9081 		   struct mlx5_flow_counter_pool *pool)
9082 {
9083 	struct mlx5_priv *priv;
9084 	struct mlx5_flow_counter *cnt;
9085 	struct mlx5_age_info *age_info;
9086 	struct mlx5_age_param *age_param;
9087 	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
9088 	struct mlx5_counter_stats_raw *prev = pool->raw;
9089 	const uint64_t curr_time = MLX5_CURR_TIME_SEC;
9090 	const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
9091 	uint16_t expected = AGE_CANDIDATE;
9092 	uint32_t i;
9093 
9094 	pool->time_of_last_age_check = curr_time;
9095 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
9096 		cnt = MLX5_POOL_GET_CNT(pool, i);
9097 		age_param = MLX5_CNT_TO_AGE(cnt);
9098 		if (__atomic_load_n(&age_param->state,
9099 				    __ATOMIC_RELAXED) != AGE_CANDIDATE)
9100 			continue;
9101 		if (cur->data[i].hits != prev->data[i].hits) {
9102 			__atomic_store_n(&age_param->sec_since_last_hit, 0,
9103 					 __ATOMIC_RELAXED);
9104 			continue;
9105 		}
9106 		if (__atomic_add_fetch(&age_param->sec_since_last_hit,
9107 				       time_delta,
9108 				       __ATOMIC_RELAXED) <= age_param->timeout)
9109 			continue;
9110 		/**
9111 		 * Hold the lock first, or if between the
9112 		 * state AGE_TMOUT and tailq operation the
9113 		 * release happened, the release procedure
9114 		 * may delete a non-existent tailq node.
9115 		 */
9116 		priv = rte_eth_devices[age_param->port_id].data->dev_private;
9117 		age_info = GET_PORT_AGE_INFO(priv);
9118 		rte_spinlock_lock(&age_info->aged_sl);
9119 		if (__atomic_compare_exchange_n(&age_param->state, &expected,
9120 						AGE_TMOUT, false,
9121 						__ATOMIC_RELAXED,
9122 						__ATOMIC_RELAXED)) {
9123 			TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
9124 			MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
9125 		}
9126 		rte_spinlock_unlock(&age_info->aged_sl);
9127 	}
9128 	mlx5_age_event_prepare(sh);
9129 }
9130 
9131 /**
9132  * Handler for the HW respond about ready values from an asynchronous batch
9133  * query. This function is probably called by the host thread.
9134  *
9135  * @param[in] sh
9136  *   The pointer to the shared device context.
9137  * @param[in] async_id
9138  *   The Devx async ID.
9139  * @param[in] status
9140  *   The status of the completion.
9141  */
9142 void
9143 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
9144 				  uint64_t async_id, int status)
9145 {
9146 	struct mlx5_flow_counter_pool *pool =
9147 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
9148 	struct mlx5_counter_stats_raw *raw_to_free;
9149 	uint8_t query_gen = pool->query_gen ^ 1;
9150 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
9151 	enum mlx5_counter_type cnt_type =
9152 		pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
9153 				MLX5_COUNTER_TYPE_ORIGIN;
9154 
9155 	if (unlikely(status)) {
9156 		raw_to_free = pool->raw_hw;
9157 	} else {
9158 		raw_to_free = pool->raw;
9159 		if (pool->is_aged)
9160 			mlx5_flow_aging_check(sh, pool);
9161 		rte_spinlock_lock(&pool->sl);
9162 		pool->raw = pool->raw_hw;
9163 		rte_spinlock_unlock(&pool->sl);
9164 		/* Be sure the new raw counters data is updated in memory. */
9165 		rte_io_wmb();
9166 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
9167 			rte_spinlock_lock(&cmng->csl[cnt_type]);
9168 			TAILQ_CONCAT(&cmng->counters[cnt_type],
9169 				     &pool->counters[query_gen], next);
9170 			rte_spinlock_unlock(&cmng->csl[cnt_type]);
9171 		}
9172 	}
9173 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
9174 	pool->raw_hw = NULL;
9175 	sh->cmng.pending_queries--;
9176 }
9177 
9178 static int
9179 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
9180 		    const struct flow_grp_info *grp_info,
9181 		    struct rte_flow_error *error)
9182 {
9183 	if (grp_info->transfer && grp_info->external &&
9184 	    grp_info->fdb_def_rule) {
9185 		if (group == UINT32_MAX)
9186 			return rte_flow_error_set
9187 						(error, EINVAL,
9188 						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9189 						 NULL,
9190 						 "group index not supported");
9191 		*table = group + 1;
9192 	} else {
9193 		*table = group;
9194 	}
9195 	DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
9196 	return 0;
9197 }
9198 
9199 /**
9200  * Translate the rte_flow group index to HW table value.
9201  *
9202  * If tunnel offload is disabled, all group ids converted to flow table
9203  * id using the standard method.
9204  * If tunnel offload is enabled, group id can be converted using the
9205  * standard or tunnel conversion method. Group conversion method
9206  * selection depends on flags in `grp_info` parameter:
9207  * - Internal (grp_info.external == 0) groups conversion uses the
9208  *   standard method.
9209  * - Group ids in JUMP action converted with the tunnel conversion.
9210  * - Group id in rule attribute conversion depends on a rule type and
9211  *   group id value:
9212  *   ** non zero group attributes converted with the tunnel method
9213  *   ** zero group attribute in non-tunnel rule is converted using the
9214  *      standard method - there's only one root table
9215  *   ** zero group attribute in steer tunnel rule is converted with the
9216  *      standard method - single root table
9217  *   ** zero group attribute in match tunnel rule is a special OvS
9218  *      case: that value is used for portability reasons. That group
9219  *      id is converted with the tunnel conversion method.
9220  *
9221  * @param[in] dev
9222  *   Port device
9223  * @param[in] tunnel
9224  *   PMD tunnel offload object
9225  * @param[in] group
9226  *   rte_flow group index value.
9227  * @param[out] table
9228  *   HW table value.
9229  * @param[in] grp_info
9230  *   flags used for conversion
9231  * @param[out] error
9232  *   Pointer to error structure.
9233  *
9234  * @return
9235  *   0 on success, a negative errno value otherwise and rte_errno is set.
9236  */
9237 int
9238 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
9239 			 const struct mlx5_flow_tunnel *tunnel,
9240 			 uint32_t group, uint32_t *table,
9241 			 const struct flow_grp_info *grp_info,
9242 			 struct rte_flow_error *error)
9243 {
9244 	int ret;
9245 	bool standard_translation;
9246 
9247 	if (!grp_info->skip_scale && grp_info->external &&
9248 	    group < MLX5_MAX_TABLES_EXTERNAL)
9249 		group *= MLX5_FLOW_TABLE_FACTOR;
9250 	if (is_tunnel_offload_active(dev)) {
9251 		standard_translation = !grp_info->external ||
9252 					grp_info->std_tbl_fix;
9253 	} else {
9254 		standard_translation = true;
9255 	}
9256 	DRV_LOG(DEBUG,
9257 		"port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
9258 		dev->data->port_id, group, grp_info->transfer,
9259 		grp_info->external, grp_info->fdb_def_rule,
9260 		standard_translation ? "STANDARD" : "TUNNEL");
9261 	if (standard_translation)
9262 		ret = flow_group_to_table(dev->data->port_id, group, table,
9263 					  grp_info, error);
9264 	else
9265 		ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
9266 						      table, error);
9267 
9268 	return ret;
9269 }
9270 
9271 /**
9272  * Discover availability of metadata reg_c's.
9273  *
9274  * Iteratively use test flows to check availability.
9275  *
9276  * @param[in] dev
9277  *   Pointer to the Ethernet device structure.
9278  *
9279  * @return
9280  *   0 on success, a negative errno value otherwise and rte_errno is set.
9281  */
9282 int
9283 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
9284 {
9285 	struct mlx5_priv *priv = dev->data->dev_private;
9286 	enum modify_reg idx;
9287 	int n = 0;
9288 
9289 	/* reg_c[0] and reg_c[1] are reserved. */
9290 	priv->sh->flow_mreg_c[n++] = REG_C_0;
9291 	priv->sh->flow_mreg_c[n++] = REG_C_1;
9292 	/* Discover availability of other reg_c's. */
9293 	for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
9294 		struct rte_flow_attr attr = {
9295 			.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
9296 			.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
9297 			.ingress = 1,
9298 		};
9299 		struct rte_flow_item items[] = {
9300 			[0] = {
9301 				.type = RTE_FLOW_ITEM_TYPE_END,
9302 			},
9303 		};
9304 		struct rte_flow_action actions[] = {
9305 			[0] = {
9306 				.type = (enum rte_flow_action_type)
9307 					MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
9308 				.conf = &(struct mlx5_flow_action_copy_mreg){
9309 					.src = REG_C_1,
9310 					.dst = idx,
9311 				},
9312 			},
9313 			[1] = {
9314 				.type = RTE_FLOW_ACTION_TYPE_JUMP,
9315 				.conf = &(struct rte_flow_action_jump){
9316 					.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
9317 				},
9318 			},
9319 			[2] = {
9320 				.type = RTE_FLOW_ACTION_TYPE_END,
9321 			},
9322 		};
9323 		uint32_t flow_idx;
9324 		struct rte_flow *flow;
9325 		struct rte_flow_error error;
9326 
9327 		if (!priv->sh->config.dv_flow_en)
9328 			break;
9329 		/* Create internal flow, validation skips copy action. */
9330 		flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
9331 					items, actions, false, &error);
9332 		flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9333 				      flow_idx);
9334 		if (!flow)
9335 			continue;
9336 		priv->sh->flow_mreg_c[n++] = idx;
9337 		flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
9338 	}
9339 	for (; n < MLX5_MREG_C_NUM; ++n)
9340 		priv->sh->flow_mreg_c[n] = REG_NON;
9341 	priv->sh->metadata_regc_check_flag = 1;
9342 	return 0;
9343 }
9344 
9345 int
9346 save_dump_file(const uint8_t *data, uint32_t size,
9347 	uint32_t type, uint64_t id, void *arg, FILE *file)
9348 {
9349 	char line[BUF_SIZE];
9350 	uint32_t out = 0;
9351 	uint32_t k;
9352 	uint32_t actions_num;
9353 	struct rte_flow_query_count *count;
9354 
9355 	memset(line, 0, BUF_SIZE);
9356 	switch (type) {
9357 	case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
9358 		actions_num = *(uint32_t *)(arg);
9359 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
9360 				type, id, actions_num);
9361 		break;
9362 	case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
9363 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
9364 				type, id);
9365 		break;
9366 	case DR_DUMP_REC_TYPE_PMD_COUNTER:
9367 		count = (struct rte_flow_query_count *)arg;
9368 		fprintf(file,
9369 			"%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
9370 			type, id, count->hits, count->bytes);
9371 		return 0;
9372 	default:
9373 		return -1;
9374 	}
9375 
9376 	for (k = 0; k < size; k++) {
9377 		/* Make sure we do not overrun the line buffer length. */
9378 		if (out >= BUF_SIZE - 4) {
9379 			line[out] = '\0';
9380 			break;
9381 		}
9382 		out += snprintf(line + out, BUF_SIZE - out, "%02x",
9383 				(data[k]) & 0xff);
9384 	}
9385 	fprintf(file, "%s\n", line);
9386 	return 0;
9387 }
9388 
9389 int
9390 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
9391 	struct rte_flow_query_count *count, struct rte_flow_error *error)
9392 {
9393 	struct rte_flow_action action[2];
9394 	enum mlx5_flow_drv_type ftype;
9395 	const struct mlx5_flow_driver_ops *fops;
9396 
9397 	if (!flow) {
9398 		return rte_flow_error_set(error, ENOENT,
9399 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9400 				NULL,
9401 				"invalid flow handle");
9402 	}
9403 	action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
9404 	action[1].type = RTE_FLOW_ACTION_TYPE_END;
9405 	if (flow->counter) {
9406 		memset(count, 0, sizeof(struct rte_flow_query_count));
9407 		ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
9408 		MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
9409 						ftype < MLX5_FLOW_TYPE_MAX);
9410 		fops = flow_get_drv_ops(ftype);
9411 		return fops->query(dev, flow, action, count, error);
9412 	}
9413 	return -1;
9414 }
9415 
9416 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9417 /**
9418  * Dump flow ipool data to file
9419  *
9420  * @param[in] dev
9421  *   The pointer to Ethernet device.
9422  * @param[in] file
9423  *   A pointer to a file for output.
9424  * @param[out] error
9425  *   Perform verbose error reporting if not NULL. PMDs initialize this
9426  *   structure in case of error only.
9427  * @return
9428  *   0 on success, a negative value otherwise.
9429  */
9430 int
9431 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
9432 	struct rte_flow *flow, FILE *file,
9433 	struct rte_flow_error *error)
9434 {
9435 	struct mlx5_priv *priv = dev->data->dev_private;
9436 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9437 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9438 	uint32_t handle_idx;
9439 	struct mlx5_flow_handle *dh;
9440 	struct rte_flow_query_count count;
9441 	uint32_t actions_num;
9442 	const uint8_t *data;
9443 	size_t size;
9444 	uint64_t id;
9445 	uint32_t type;
9446 	void *action = NULL;
9447 
9448 	if (!flow) {
9449 		return rte_flow_error_set(error, ENOENT,
9450 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9451 				NULL,
9452 				"invalid flow handle");
9453 	}
9454 	handle_idx = flow->dev_handles;
9455 	/* query counter */
9456 	if (flow->counter &&
9457 	(!mlx5_counter_query(dev, flow->counter, false,
9458 	&count.hits, &count.bytes, &action)) && action) {
9459 		id = (uint64_t)(uintptr_t)action;
9460 		type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9461 		save_dump_file(NULL, 0, type,
9462 			id, (void *)&count, file);
9463 	}
9464 
9465 	while (handle_idx) {
9466 		dh = mlx5_ipool_get(priv->sh->ipool
9467 				[MLX5_IPOOL_MLX5_FLOW], handle_idx);
9468 		if (!dh)
9469 			continue;
9470 		handle_idx = dh->next.next;
9471 
9472 		/* Get modify_hdr and encap_decap buf from ipools. */
9473 		encap_decap = NULL;
9474 		modify_hdr = dh->dvh.modify_hdr;
9475 
9476 		if (dh->dvh.rix_encap_decap) {
9477 			encap_decap = mlx5_ipool_get(priv->sh->ipool
9478 						[MLX5_IPOOL_DECAP_ENCAP],
9479 						dh->dvh.rix_encap_decap);
9480 		}
9481 		if (modify_hdr) {
9482 			data = (const uint8_t *)modify_hdr->actions;
9483 			size = (size_t)(modify_hdr->actions_num) * 8;
9484 			id = (uint64_t)(uintptr_t)modify_hdr->action;
9485 			actions_num = modify_hdr->actions_num;
9486 			type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9487 			save_dump_file(data, size, type, id,
9488 						(void *)(&actions_num), file);
9489 		}
9490 		if (encap_decap) {
9491 			data = encap_decap->buf;
9492 			size = encap_decap->size;
9493 			id = (uint64_t)(uintptr_t)encap_decap->action;
9494 			type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9495 			save_dump_file(data, size, type,
9496 						id, NULL, file);
9497 		}
9498 	}
9499 	return 0;
9500 }
9501 
9502 /**
9503  * Dump all flow's encap_decap/modify_hdr/counter data to file
9504  *
9505  * @param[in] dev
9506  *   The pointer to Ethernet device.
9507  * @param[in] file
9508  *   A pointer to a file for output.
9509  * @param[out] error
9510  *   Perform verbose error reporting if not NULL. PMDs initialize this
9511  *   structure in case of error only.
9512  * @return
9513  *   0 on success, a negative value otherwise.
9514  */
9515 static int
9516 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
9517 	FILE *file, struct rte_flow_error *error __rte_unused)
9518 {
9519 	struct mlx5_priv *priv = dev->data->dev_private;
9520 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9521 	struct mlx5_hlist *h;
9522 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9523 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9524 	struct rte_flow_query_count count;
9525 	uint32_t actions_num;
9526 	const uint8_t *data;
9527 	size_t size;
9528 	uint64_t id;
9529 	uint32_t type;
9530 	uint32_t i;
9531 	uint32_t j;
9532 	struct mlx5_list_inconst *l_inconst;
9533 	struct mlx5_list_entry *e;
9534 	int lcore_index;
9535 	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
9536 	uint32_t max;
9537 	void *action;
9538 
9539 	/* encap_decap hlist is lcore_share, get global core cache. */
9540 	i = MLX5_LIST_GLOBAL;
9541 	h = sh->encaps_decaps;
9542 	if (h) {
9543 		for (j = 0; j <= h->mask; j++) {
9544 			l_inconst = &h->buckets[j].l;
9545 			if (!l_inconst || !l_inconst->cache[i])
9546 				continue;
9547 
9548 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9549 			while (e) {
9550 				encap_decap =
9551 				(struct mlx5_flow_dv_encap_decap_resource *)e;
9552 				data = encap_decap->buf;
9553 				size = encap_decap->size;
9554 				id = (uint64_t)(uintptr_t)encap_decap->action;
9555 				type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9556 				save_dump_file(data, size, type,
9557 					id, NULL, file);
9558 				e = LIST_NEXT(e, next);
9559 			}
9560 		}
9561 	}
9562 
9563 	/* get modify_hdr */
9564 	h = sh->modify_cmds;
9565 	if (h) {
9566 		lcore_index = rte_lcore_index(rte_lcore_id());
9567 		if (unlikely(lcore_index == -1)) {
9568 			lcore_index = MLX5_LIST_NLCORE;
9569 			rte_spinlock_lock(&h->l_const.lcore_lock);
9570 		}
9571 		i = lcore_index;
9572 
9573 		for (j = 0; j <= h->mask; j++) {
9574 			l_inconst = &h->buckets[j].l;
9575 			if (!l_inconst || !l_inconst->cache[i])
9576 				continue;
9577 
9578 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9579 			while (e) {
9580 				modify_hdr =
9581 				(struct mlx5_flow_dv_modify_hdr_resource *)e;
9582 				data = (const uint8_t *)modify_hdr->actions;
9583 				size = (size_t)(modify_hdr->actions_num) * 8;
9584 				actions_num = modify_hdr->actions_num;
9585 				id = (uint64_t)(uintptr_t)modify_hdr->action;
9586 				type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9587 				save_dump_file(data, size, type, id,
9588 						(void *)(&actions_num), file);
9589 				e = LIST_NEXT(e, next);
9590 			}
9591 		}
9592 
9593 		if (unlikely(lcore_index == MLX5_LIST_NLCORE))
9594 			rte_spinlock_unlock(&h->l_const.lcore_lock);
9595 	}
9596 
9597 	/* get counter */
9598 	MLX5_ASSERT(cmng->n_valid <= cmng->n);
9599 	max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
9600 	for (j = 1; j <= max; j++) {
9601 		action = NULL;
9602 		if ((!mlx5_counter_query(dev, j, false, &count.hits,
9603 		&count.bytes, &action)) && action) {
9604 			id = (uint64_t)(uintptr_t)action;
9605 			type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9606 			save_dump_file(NULL, 0, type,
9607 					id, (void *)&count, file);
9608 		}
9609 	}
9610 	return 0;
9611 }
9612 #endif
9613 
9614 /**
9615  * Dump flow raw hw data to file
9616  *
9617  * @param[in] dev
9618  *    The pointer to Ethernet device.
9619  * @param[in] file
9620  *   A pointer to a file for output.
9621  * @param[out] error
9622  *   Perform verbose error reporting if not NULL. PMDs initialize this
9623  *   structure in case of error only.
9624  * @return
9625  *   0 on success, a negative value otherwise.
9626  */
9627 int
9628 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
9629 		   FILE *file,
9630 		   struct rte_flow_error *error __rte_unused)
9631 {
9632 	struct mlx5_priv *priv = dev->data->dev_private;
9633 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9634 	uint32_t handle_idx;
9635 	int ret;
9636 	struct mlx5_flow_handle *dh;
9637 	struct rte_flow *flow;
9638 
9639 	if (!sh->config.dv_flow_en) {
9640 		if (fputs("device dv flow disabled\n", file) <= 0)
9641 			return -errno;
9642 		return -ENOTSUP;
9643 	}
9644 
9645 	/* dump all */
9646 	if (!flow_idx) {
9647 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9648 		if (mlx5_flow_dev_dump_sh_all(dev, file, error))
9649 			return -EINVAL;
9650 #endif
9651 		return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
9652 					sh->rx_domain,
9653 					sh->tx_domain, file);
9654 	}
9655 	/* dump one */
9656 	flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9657 			(uintptr_t)(void *)flow_idx);
9658 	if (!flow)
9659 		return -EINVAL;
9660 
9661 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9662 	mlx5_flow_dev_dump_ipool(dev, flow, file, error);
9663 #endif
9664 	handle_idx = flow->dev_handles;
9665 	while (handle_idx) {
9666 		dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
9667 				handle_idx);
9668 		if (!dh)
9669 			return -ENOENT;
9670 		if (dh->drv_flow) {
9671 			ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
9672 					file);
9673 			if (ret)
9674 				return -ENOENT;
9675 		}
9676 		handle_idx = dh->next.next;
9677 	}
9678 	return 0;
9679 }
9680 
9681 /**
9682  * Get aged-out flows.
9683  *
9684  * @param[in] dev
9685  *   Pointer to the Ethernet device structure.
9686  * @param[in] context
9687  *   The address of an array of pointers to the aged-out flows contexts.
9688  * @param[in] nb_countexts
9689  *   The length of context array pointers.
9690  * @param[out] error
9691  *   Perform verbose error reporting if not NULL. Initialized in case of
9692  *   error only.
9693  *
9694  * @return
9695  *   how many contexts get in success, otherwise negative errno value.
9696  *   if nb_contexts is 0, return the amount of all aged contexts.
9697  *   if nb_contexts is not 0 , return the amount of aged flows reported
9698  *   in the context array.
9699  */
9700 int
9701 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
9702 			uint32_t nb_contexts, struct rte_flow_error *error)
9703 {
9704 	const struct mlx5_flow_driver_ops *fops;
9705 	struct rte_flow_attr attr = { .transfer = 0 };
9706 
9707 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
9708 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
9709 		return fops->get_aged_flows(dev, contexts, nb_contexts,
9710 						    error);
9711 	}
9712 	DRV_LOG(ERR,
9713 		"port %u get aged flows is not supported.",
9714 		 dev->data->port_id);
9715 	return -ENOTSUP;
9716 }
9717 
9718 /* Wrapper for driver action_validate op callback */
9719 static int
9720 flow_drv_action_validate(struct rte_eth_dev *dev,
9721 			 const struct rte_flow_indir_action_conf *conf,
9722 			 const struct rte_flow_action *action,
9723 			 const struct mlx5_flow_driver_ops *fops,
9724 			 struct rte_flow_error *error)
9725 {
9726 	static const char err_msg[] = "indirect action validation unsupported";
9727 
9728 	if (!fops->action_validate) {
9729 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9730 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9731 				   NULL, err_msg);
9732 		return -rte_errno;
9733 	}
9734 	return fops->action_validate(dev, conf, action, error);
9735 }
9736 
9737 /**
9738  * Destroys the shared action by handle.
9739  *
9740  * @param dev
9741  *   Pointer to Ethernet device structure.
9742  * @param[in] handle
9743  *   Handle for the indirect action object to be destroyed.
9744  * @param[out] error
9745  *   Perform verbose error reporting if not NULL. PMDs initialize this
9746  *   structure in case of error only.
9747  *
9748  * @return
9749  *   0 on success, a negative errno value otherwise and rte_errno is set.
9750  *
9751  * @note: wrapper for driver action_create op callback.
9752  */
9753 static int
9754 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
9755 			   struct rte_flow_action_handle *handle,
9756 			   struct rte_flow_error *error)
9757 {
9758 	static const char err_msg[] = "indirect action destruction unsupported";
9759 	struct rte_flow_attr attr = { .transfer = 0 };
9760 	const struct mlx5_flow_driver_ops *fops =
9761 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9762 
9763 	if (!fops->action_destroy) {
9764 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9765 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9766 				   NULL, err_msg);
9767 		return -rte_errno;
9768 	}
9769 	return fops->action_destroy(dev, handle, error);
9770 }
9771 
9772 /* Wrapper for driver action_destroy op callback */
9773 static int
9774 flow_drv_action_update(struct rte_eth_dev *dev,
9775 		       struct rte_flow_action_handle *handle,
9776 		       const void *update,
9777 		       const struct mlx5_flow_driver_ops *fops,
9778 		       struct rte_flow_error *error)
9779 {
9780 	static const char err_msg[] = "indirect action update unsupported";
9781 
9782 	if (!fops->action_update) {
9783 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9784 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9785 				   NULL, err_msg);
9786 		return -rte_errno;
9787 	}
9788 	return fops->action_update(dev, handle, update, error);
9789 }
9790 
9791 /* Wrapper for driver action_destroy op callback */
9792 static int
9793 flow_drv_action_query(struct rte_eth_dev *dev,
9794 		      const struct rte_flow_action_handle *handle,
9795 		      void *data,
9796 		      const struct mlx5_flow_driver_ops *fops,
9797 		      struct rte_flow_error *error)
9798 {
9799 	static const char err_msg[] = "indirect action query unsupported";
9800 
9801 	if (!fops->action_query) {
9802 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9803 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9804 				   NULL, err_msg);
9805 		return -rte_errno;
9806 	}
9807 	return fops->action_query(dev, handle, data, error);
9808 }
9809 
9810 /**
9811  * Create indirect action for reuse in multiple flow rules.
9812  *
9813  * @param dev
9814  *   Pointer to Ethernet device structure.
9815  * @param conf
9816  *   Pointer to indirect action object configuration.
9817  * @param[in] action
9818  *   Action configuration for indirect action object creation.
9819  * @param[out] error
9820  *   Perform verbose error reporting if not NULL. PMDs initialize this
9821  *   structure in case of error only.
9822  * @return
9823  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
9824  */
9825 static struct rte_flow_action_handle *
9826 mlx5_action_handle_create(struct rte_eth_dev *dev,
9827 			  const struct rte_flow_indir_action_conf *conf,
9828 			  const struct rte_flow_action *action,
9829 			  struct rte_flow_error *error)
9830 {
9831 	static const char err_msg[] = "indirect action creation unsupported";
9832 	struct rte_flow_attr attr = { .transfer = 0 };
9833 	const struct mlx5_flow_driver_ops *fops =
9834 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9835 
9836 	if (flow_drv_action_validate(dev, conf, action, fops, error))
9837 		return NULL;
9838 	if (!fops->action_create) {
9839 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9840 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9841 				   NULL, err_msg);
9842 		return NULL;
9843 	}
9844 	return fops->action_create(dev, conf, action, error);
9845 }
9846 
9847 /**
9848  * Updates inplace the indirect action configuration pointed by *handle*
9849  * with the configuration provided as *update* argument.
9850  * The update of the indirect action configuration effects all flow rules
9851  * reusing the action via handle.
9852  *
9853  * @param dev
9854  *   Pointer to Ethernet device structure.
9855  * @param[in] handle
9856  *   Handle for the indirect action to be updated.
9857  * @param[in] update
9858  *   Action specification used to modify the action pointed by handle.
9859  *   *update* could be of same type with the action pointed by the *handle*
9860  *   handle argument, or some other structures like a wrapper, depending on
9861  *   the indirect action type.
9862  * @param[out] error
9863  *   Perform verbose error reporting if not NULL. PMDs initialize this
9864  *   structure in case of error only.
9865  *
9866  * @return
9867  *   0 on success, a negative errno value otherwise and rte_errno is set.
9868  */
9869 static int
9870 mlx5_action_handle_update(struct rte_eth_dev *dev,
9871 		struct rte_flow_action_handle *handle,
9872 		const void *update,
9873 		struct rte_flow_error *error)
9874 {
9875 	struct rte_flow_attr attr = { .transfer = 0 };
9876 	const struct mlx5_flow_driver_ops *fops =
9877 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9878 	int ret;
9879 
9880 	ret = flow_drv_action_validate(dev, NULL,
9881 			(const struct rte_flow_action *)update, fops, error);
9882 	if (ret)
9883 		return ret;
9884 	return flow_drv_action_update(dev, handle, update, fops,
9885 				      error);
9886 }
9887 
9888 /**
9889  * Query the indirect action by handle.
9890  *
9891  * This function allows retrieving action-specific data such as counters.
9892  * Data is gathered by special action which may be present/referenced in
9893  * more than one flow rule definition.
9894  *
9895  * see @RTE_FLOW_ACTION_TYPE_COUNT
9896  *
9897  * @param dev
9898  *   Pointer to Ethernet device structure.
9899  * @param[in] handle
9900  *   Handle for the indirect action to query.
9901  * @param[in, out] data
9902  *   Pointer to storage for the associated query data type.
9903  * @param[out] error
9904  *   Perform verbose error reporting if not NULL. PMDs initialize this
9905  *   structure in case of error only.
9906  *
9907  * @return
9908  *   0 on success, a negative errno value otherwise and rte_errno is set.
9909  */
9910 static int
9911 mlx5_action_handle_query(struct rte_eth_dev *dev,
9912 			 const struct rte_flow_action_handle *handle,
9913 			 void *data,
9914 			 struct rte_flow_error *error)
9915 {
9916 	struct rte_flow_attr attr = { .transfer = 0 };
9917 	const struct mlx5_flow_driver_ops *fops =
9918 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9919 
9920 	return flow_drv_action_query(dev, handle, data, fops, error);
9921 }
9922 
9923 /**
9924  * Destroy all indirect actions (shared RSS).
9925  *
9926  * @param dev
9927  *   Pointer to Ethernet device.
9928  *
9929  * @return
9930  *   0 on success, a negative errno value otherwise and rte_errno is set.
9931  */
9932 int
9933 mlx5_action_handle_flush(struct rte_eth_dev *dev)
9934 {
9935 	struct rte_flow_error error;
9936 	struct mlx5_priv *priv = dev->data->dev_private;
9937 	struct mlx5_shared_action_rss *shared_rss;
9938 	int ret = 0;
9939 	uint32_t idx;
9940 
9941 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
9942 		      priv->rss_shared_actions, idx, shared_rss, next) {
9943 		ret |= mlx5_action_handle_destroy(dev,
9944 		       (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
9945 	}
9946 	return ret;
9947 }
9948 
9949 /**
9950  * Validate existing indirect actions against current device configuration
9951  * and attach them to device resources.
9952  *
9953  * @param dev
9954  *   Pointer to Ethernet device.
9955  *
9956  * @return
9957  *   0 on success, a negative errno value otherwise and rte_errno is set.
9958  */
9959 int
9960 mlx5_action_handle_attach(struct rte_eth_dev *dev)
9961 {
9962 	struct mlx5_priv *priv = dev->data->dev_private;
9963 	int ret = 0;
9964 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9965 
9966 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9967 		const char *message;
9968 		uint32_t queue_idx;
9969 
9970 		ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
9971 					       ind_tbl->queues_n,
9972 					       &message, &queue_idx);
9973 		if (ret != 0) {
9974 			DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
9975 				dev->data->port_id, ind_tbl->queues[queue_idx],
9976 				message);
9977 			break;
9978 		}
9979 	}
9980 	if (ret != 0)
9981 		return ret;
9982 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9983 		ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
9984 		if (ret != 0) {
9985 			DRV_LOG(ERR, "Port %u could not attach "
9986 				"indirection table obj %p",
9987 				dev->data->port_id, (void *)ind_tbl);
9988 			goto error;
9989 		}
9990 	}
9991 
9992 	return 0;
9993 error:
9994 	ind_tbl_last = ind_tbl;
9995 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9996 		if (ind_tbl == ind_tbl_last)
9997 			break;
9998 		if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
9999 			DRV_LOG(CRIT, "Port %u could not detach "
10000 				"indirection table obj %p on rollback",
10001 				dev->data->port_id, (void *)ind_tbl);
10002 	}
10003 	return ret;
10004 }
10005 
10006 /**
10007  * Detach indirect actions of the device from its resources.
10008  *
10009  * @param dev
10010  *   Pointer to Ethernet device.
10011  *
10012  * @return
10013  *   0 on success, a negative errno value otherwise and rte_errno is set.
10014  */
10015 int
10016 mlx5_action_handle_detach(struct rte_eth_dev *dev)
10017 {
10018 	struct mlx5_priv *priv = dev->data->dev_private;
10019 	int ret = 0;
10020 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
10021 
10022 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10023 		ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
10024 		if (ret != 0) {
10025 			DRV_LOG(ERR, "Port %u could not detach "
10026 				"indirection table obj %p",
10027 				dev->data->port_id, (void *)ind_tbl);
10028 			goto error;
10029 		}
10030 	}
10031 	return 0;
10032 error:
10033 	ind_tbl_last = ind_tbl;
10034 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10035 		if (ind_tbl == ind_tbl_last)
10036 			break;
10037 		if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
10038 			DRV_LOG(CRIT, "Port %u could not attach "
10039 				"indirection table obj %p on rollback",
10040 				dev->data->port_id, (void *)ind_tbl);
10041 	}
10042 	return ret;
10043 }
10044 
10045 #ifndef HAVE_MLX5DV_DR
10046 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
10047 #else
10048 #define MLX5_DOMAIN_SYNC_FLOW \
10049 	(MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
10050 #endif
10051 
10052 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
10053 {
10054 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
10055 	const struct mlx5_flow_driver_ops *fops;
10056 	int ret;
10057 	struct rte_flow_attr attr = { .transfer = 0 };
10058 
10059 	fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10060 	ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
10061 	if (ret > 0)
10062 		ret = -ret;
10063 	return ret;
10064 }
10065 
10066 const struct mlx5_flow_tunnel *
10067 mlx5_get_tof(const struct rte_flow_item *item,
10068 	     const struct rte_flow_action *action,
10069 	     enum mlx5_tof_rule_type *rule_type)
10070 {
10071 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10072 		if (item->type == (typeof(item->type))
10073 				  MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
10074 			*rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
10075 			return flow_items_to_tunnel(item);
10076 		}
10077 	}
10078 	for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
10079 		if (action->type == (typeof(action->type))
10080 				    MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
10081 			*rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
10082 			return flow_actions_to_tunnel(action);
10083 		}
10084 	}
10085 	return NULL;
10086 }
10087 
10088 /**
10089  * tunnel offload functionality is defined for DV environment only
10090  */
10091 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
10092 __extension__
10093 union tunnel_offload_mark {
10094 	uint32_t val;
10095 	struct {
10096 		uint32_t app_reserve:8;
10097 		uint32_t table_id:15;
10098 		uint32_t transfer:1;
10099 		uint32_t _unused_:8;
10100 	};
10101 };
10102 
10103 static bool
10104 mlx5_access_tunnel_offload_db
10105 	(struct rte_eth_dev *dev,
10106 	 bool (*match)(struct rte_eth_dev *,
10107 		       struct mlx5_flow_tunnel *, const void *),
10108 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
10109 	 void (*miss)(struct rte_eth_dev *, void *),
10110 	 void *ctx, bool lock_op);
10111 
10112 static int
10113 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
10114 			     struct rte_flow *flow,
10115 			     const struct rte_flow_attr *attr,
10116 			     const struct rte_flow_action *app_actions,
10117 			     uint32_t flow_idx,
10118 			     const struct mlx5_flow_tunnel *tunnel,
10119 			     struct tunnel_default_miss_ctx *ctx,
10120 			     struct rte_flow_error *error)
10121 {
10122 	struct mlx5_priv *priv = dev->data->dev_private;
10123 	struct mlx5_flow *dev_flow;
10124 	struct rte_flow_attr miss_attr = *attr;
10125 	const struct rte_flow_item miss_items[2] = {
10126 		{
10127 			.type = RTE_FLOW_ITEM_TYPE_ETH,
10128 			.spec = NULL,
10129 			.last = NULL,
10130 			.mask = NULL
10131 		},
10132 		{
10133 			.type = RTE_FLOW_ITEM_TYPE_END,
10134 			.spec = NULL,
10135 			.last = NULL,
10136 			.mask = NULL
10137 		}
10138 	};
10139 	union tunnel_offload_mark mark_id;
10140 	struct rte_flow_action_mark miss_mark;
10141 	struct rte_flow_action miss_actions[3] = {
10142 		[0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
10143 		[2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
10144 	};
10145 	const struct rte_flow_action_jump *jump_data;
10146 	uint32_t i, flow_table = 0; /* prevent compilation warning */
10147 	struct flow_grp_info grp_info = {
10148 		.external = 1,
10149 		.transfer = attr->transfer,
10150 		.fdb_def_rule = !!priv->fdb_def_rule,
10151 		.std_tbl_fix = 0,
10152 	};
10153 	int ret;
10154 
10155 	if (!attr->transfer) {
10156 		uint32_t q_size;
10157 
10158 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
10159 		q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
10160 		ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
10161 					 0, SOCKET_ID_ANY);
10162 		if (!ctx->queue)
10163 			return rte_flow_error_set
10164 				(error, ENOMEM,
10165 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10166 				NULL, "invalid default miss RSS");
10167 		ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
10168 		ctx->action_rss.level = 0,
10169 		ctx->action_rss.types = priv->rss_conf.rss_hf,
10170 		ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
10171 		ctx->action_rss.queue_num = priv->reta_idx_n,
10172 		ctx->action_rss.key = priv->rss_conf.rss_key,
10173 		ctx->action_rss.queue = ctx->queue;
10174 		if (!priv->reta_idx_n || !priv->rxqs_n)
10175 			return rte_flow_error_set
10176 				(error, EINVAL,
10177 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10178 				NULL, "invalid port configuration");
10179 		if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
10180 			ctx->action_rss.types = 0;
10181 		for (i = 0; i != priv->reta_idx_n; ++i)
10182 			ctx->queue[i] = (*priv->reta_idx)[i];
10183 	} else {
10184 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
10185 		ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
10186 	}
10187 	miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
10188 	for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
10189 	jump_data = app_actions->conf;
10190 	miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
10191 	miss_attr.group = jump_data->group;
10192 	ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
10193 				       &flow_table, &grp_info, error);
10194 	if (ret)
10195 		return rte_flow_error_set(error, EINVAL,
10196 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10197 					  NULL, "invalid tunnel id");
10198 	mark_id.app_reserve = 0;
10199 	mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
10200 	mark_id.transfer = !!attr->transfer;
10201 	mark_id._unused_ = 0;
10202 	miss_mark.id = mark_id.val;
10203 	dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
10204 				    miss_items, miss_actions, flow_idx, error);
10205 	if (!dev_flow)
10206 		return -rte_errno;
10207 	dev_flow->flow = flow;
10208 	dev_flow->external = true;
10209 	dev_flow->tunnel = tunnel;
10210 	dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
10211 	/* Subflow object was created, we must include one in the list. */
10212 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
10213 		      dev_flow->handle, next);
10214 	DRV_LOG(DEBUG,
10215 		"port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
10216 		dev->data->port_id, tunnel->app_tunnel.type,
10217 		tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
10218 	ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
10219 				  miss_actions, error);
10220 	if (!ret)
10221 		ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
10222 						  error);
10223 
10224 	return ret;
10225 }
10226 
10227 static const struct mlx5_flow_tbl_data_entry  *
10228 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
10229 {
10230 	struct mlx5_priv *priv = dev->data->dev_private;
10231 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10232 	struct mlx5_list_entry *he;
10233 	union tunnel_offload_mark mbits = { .val = mark };
10234 	union mlx5_flow_tbl_key table_key = {
10235 		{
10236 			.level = tunnel_id_to_flow_tbl(mbits.table_id),
10237 			.id = 0,
10238 			.reserved = 0,
10239 			.dummy = 0,
10240 			.is_fdb = !!mbits.transfer,
10241 			.is_egress = 0,
10242 		}
10243 	};
10244 	struct mlx5_flow_cb_ctx ctx = {
10245 		.data = &table_key.v64,
10246 	};
10247 
10248 	he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
10249 	return he ?
10250 	       container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
10251 }
10252 
10253 static void
10254 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
10255 				   struct mlx5_list_entry *entry)
10256 {
10257 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10258 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10259 
10260 	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10261 			tunnel_flow_tbl_to_id(tte->flow_table));
10262 	mlx5_free(tte);
10263 }
10264 
10265 static int
10266 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
10267 				  struct mlx5_list_entry *entry, void *cb_ctx)
10268 {
10269 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10270 	union tunnel_tbl_key tbl = {
10271 		.val = *(uint64_t *)(ctx->data),
10272 	};
10273 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10274 
10275 	return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
10276 }
10277 
10278 static struct mlx5_list_entry *
10279 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
10280 {
10281 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10282 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10283 	struct tunnel_tbl_entry *tte;
10284 	union tunnel_tbl_key tbl = {
10285 		.val = *(uint64_t *)(ctx->data),
10286 	};
10287 
10288 	tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
10289 			  sizeof(*tte), 0,
10290 			  SOCKET_ID_ANY);
10291 	if (!tte)
10292 		goto err;
10293 	mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10294 			  &tte->flow_table);
10295 	if (tte->flow_table >= MLX5_MAX_TABLES) {
10296 		DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
10297 			tte->flow_table);
10298 		mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10299 				tte->flow_table);
10300 		goto err;
10301 	} else if (!tte->flow_table) {
10302 		goto err;
10303 	}
10304 	tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
10305 	tte->tunnel_id = tbl.tunnel_id;
10306 	tte->group = tbl.group;
10307 	return &tte->hash;
10308 err:
10309 	if (tte)
10310 		mlx5_free(tte);
10311 	return NULL;
10312 }
10313 
10314 static struct mlx5_list_entry *
10315 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
10316 				  struct mlx5_list_entry *oentry,
10317 				  void *cb_ctx __rte_unused)
10318 {
10319 	struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
10320 						   0, SOCKET_ID_ANY);
10321 
10322 	if (!tte)
10323 		return NULL;
10324 	memcpy(tte, oentry, sizeof(*tte));
10325 	return &tte->hash;
10326 }
10327 
10328 static void
10329 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
10330 				       struct mlx5_list_entry *entry)
10331 {
10332 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10333 
10334 	mlx5_free(tte);
10335 }
10336 
10337 static uint32_t
10338 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
10339 				const struct mlx5_flow_tunnel *tunnel,
10340 				uint32_t group, uint32_t *table,
10341 				struct rte_flow_error *error)
10342 {
10343 	struct mlx5_list_entry *he;
10344 	struct tunnel_tbl_entry *tte;
10345 	union tunnel_tbl_key key = {
10346 		.tunnel_id = tunnel ? tunnel->tunnel_id : 0,
10347 		.group = group
10348 	};
10349 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10350 	struct mlx5_hlist *group_hash;
10351 	struct mlx5_flow_cb_ctx ctx = {
10352 		.data = &key.val,
10353 	};
10354 
10355 	group_hash = tunnel ? tunnel->groups : thub->groups;
10356 	he = mlx5_hlist_register(group_hash, key.val, &ctx);
10357 	if (!he)
10358 		return rte_flow_error_set(error, EINVAL,
10359 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
10360 					  NULL,
10361 					  "tunnel group index not supported");
10362 	tte = container_of(he, typeof(*tte), hash);
10363 	*table = tte->flow_table;
10364 	DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
10365 		dev->data->port_id, key.tunnel_id, group, *table);
10366 	return 0;
10367 }
10368 
10369 static void
10370 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
10371 		      struct mlx5_flow_tunnel *tunnel)
10372 {
10373 	struct mlx5_priv *priv = dev->data->dev_private;
10374 	struct mlx5_indexed_pool *ipool;
10375 
10376 	DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
10377 		dev->data->port_id, tunnel->tunnel_id);
10378 	LIST_REMOVE(tunnel, chain);
10379 	mlx5_hlist_destroy(tunnel->groups);
10380 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10381 	mlx5_ipool_free(ipool, tunnel->tunnel_id);
10382 }
10383 
10384 static bool
10385 mlx5_access_tunnel_offload_db
10386 	(struct rte_eth_dev *dev,
10387 	 bool (*match)(struct rte_eth_dev *,
10388 		       struct mlx5_flow_tunnel *, const void *),
10389 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
10390 	 void (*miss)(struct rte_eth_dev *, void *),
10391 	 void *ctx, bool lock_op)
10392 {
10393 	bool verdict = false;
10394 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10395 	struct mlx5_flow_tunnel *tunnel;
10396 
10397 	rte_spinlock_lock(&thub->sl);
10398 	LIST_FOREACH(tunnel, &thub->tunnels, chain) {
10399 		verdict = match(dev, tunnel, (const void *)ctx);
10400 		if (verdict)
10401 			break;
10402 	}
10403 	if (!lock_op)
10404 		rte_spinlock_unlock(&thub->sl);
10405 	if (verdict && hit)
10406 		hit(dev, tunnel, ctx);
10407 	if (!verdict && miss)
10408 		miss(dev, ctx);
10409 	if (lock_op)
10410 		rte_spinlock_unlock(&thub->sl);
10411 
10412 	return verdict;
10413 }
10414 
10415 struct tunnel_db_find_tunnel_id_ctx {
10416 	uint32_t tunnel_id;
10417 	struct mlx5_flow_tunnel *tunnel;
10418 };
10419 
10420 static bool
10421 find_tunnel_id_match(struct rte_eth_dev *dev,
10422 		     struct mlx5_flow_tunnel *tunnel, const void *x)
10423 {
10424 	const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10425 
10426 	RTE_SET_USED(dev);
10427 	return tunnel->tunnel_id == ctx->tunnel_id;
10428 }
10429 
10430 static void
10431 find_tunnel_id_hit(struct rte_eth_dev *dev,
10432 		   struct mlx5_flow_tunnel *tunnel, void *x)
10433 {
10434 	struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10435 	RTE_SET_USED(dev);
10436 	ctx->tunnel = tunnel;
10437 }
10438 
10439 static struct mlx5_flow_tunnel *
10440 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
10441 {
10442 	struct tunnel_db_find_tunnel_id_ctx ctx = {
10443 		.tunnel_id = id,
10444 	};
10445 
10446 	mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
10447 				      find_tunnel_id_hit, NULL, &ctx, true);
10448 
10449 	return ctx.tunnel;
10450 }
10451 
10452 static struct mlx5_flow_tunnel *
10453 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
10454 			  const struct rte_flow_tunnel *app_tunnel)
10455 {
10456 	struct mlx5_priv *priv = dev->data->dev_private;
10457 	struct mlx5_indexed_pool *ipool;
10458 	struct mlx5_flow_tunnel *tunnel;
10459 	uint32_t id;
10460 
10461 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10462 	tunnel = mlx5_ipool_zmalloc(ipool, &id);
10463 	if (!tunnel)
10464 		return NULL;
10465 	if (id >= MLX5_MAX_TUNNELS) {
10466 		mlx5_ipool_free(ipool, id);
10467 		DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
10468 		return NULL;
10469 	}
10470 	tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
10471 					   priv->sh,
10472 					   mlx5_flow_tunnel_grp2tbl_create_cb,
10473 					   mlx5_flow_tunnel_grp2tbl_match_cb,
10474 					   mlx5_flow_tunnel_grp2tbl_remove_cb,
10475 					   mlx5_flow_tunnel_grp2tbl_clone_cb,
10476 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10477 	if (!tunnel->groups) {
10478 		mlx5_ipool_free(ipool, id);
10479 		return NULL;
10480 	}
10481 	/* initiate new PMD tunnel */
10482 	memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
10483 	tunnel->tunnel_id = id;
10484 	tunnel->action.type = (typeof(tunnel->action.type))
10485 			      MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
10486 	tunnel->action.conf = tunnel;
10487 	tunnel->item.type = (typeof(tunnel->item.type))
10488 			    MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
10489 	tunnel->item.spec = tunnel;
10490 	tunnel->item.last = NULL;
10491 	tunnel->item.mask = NULL;
10492 
10493 	DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
10494 		dev->data->port_id, tunnel->tunnel_id);
10495 
10496 	return tunnel;
10497 }
10498 
10499 struct tunnel_db_get_tunnel_ctx {
10500 	const struct rte_flow_tunnel *app_tunnel;
10501 	struct mlx5_flow_tunnel *tunnel;
10502 };
10503 
10504 static bool get_tunnel_match(struct rte_eth_dev *dev,
10505 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10506 {
10507 	const struct tunnel_db_get_tunnel_ctx *ctx = x;
10508 
10509 	RTE_SET_USED(dev);
10510 	return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
10511 		       sizeof(*ctx->app_tunnel));
10512 }
10513 
10514 static void get_tunnel_hit(struct rte_eth_dev *dev,
10515 			   struct mlx5_flow_tunnel *tunnel, void *x)
10516 {
10517 	/* called under tunnel spinlock protection */
10518 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10519 
10520 	RTE_SET_USED(dev);
10521 	tunnel->refctn++;
10522 	ctx->tunnel = tunnel;
10523 }
10524 
10525 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
10526 {
10527 	/* called under tunnel spinlock protection */
10528 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10529 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10530 
10531 	rte_spinlock_unlock(&thub->sl);
10532 	ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
10533 	rte_spinlock_lock(&thub->sl);
10534 	if (ctx->tunnel) {
10535 		ctx->tunnel->refctn = 1;
10536 		LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
10537 	}
10538 }
10539 
10540 
10541 static int
10542 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
10543 		     const struct rte_flow_tunnel *app_tunnel,
10544 		     struct mlx5_flow_tunnel **tunnel)
10545 {
10546 	struct tunnel_db_get_tunnel_ctx ctx = {
10547 		.app_tunnel = app_tunnel,
10548 	};
10549 
10550 	mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
10551 				      get_tunnel_miss, &ctx, true);
10552 	*tunnel = ctx.tunnel;
10553 	return ctx.tunnel ? 0 : -ENOMEM;
10554 }
10555 
10556 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
10557 {
10558 	struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
10559 
10560 	if (!thub)
10561 		return;
10562 	if (!LIST_EMPTY(&thub->tunnels))
10563 		DRV_LOG(WARNING, "port %u tunnels present", port_id);
10564 	mlx5_hlist_destroy(thub->groups);
10565 	mlx5_free(thub);
10566 }
10567 
10568 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
10569 {
10570 	int err;
10571 	struct mlx5_flow_tunnel_hub *thub;
10572 
10573 	thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
10574 			   0, SOCKET_ID_ANY);
10575 	if (!thub)
10576 		return -ENOMEM;
10577 	LIST_INIT(&thub->tunnels);
10578 	rte_spinlock_init(&thub->sl);
10579 	thub->groups = mlx5_hlist_create("flow groups", 64,
10580 					 false, true, sh,
10581 					 mlx5_flow_tunnel_grp2tbl_create_cb,
10582 					 mlx5_flow_tunnel_grp2tbl_match_cb,
10583 					 mlx5_flow_tunnel_grp2tbl_remove_cb,
10584 					 mlx5_flow_tunnel_grp2tbl_clone_cb,
10585 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10586 	if (!thub->groups) {
10587 		err = -rte_errno;
10588 		goto err;
10589 	}
10590 	sh->tunnel_hub = thub;
10591 
10592 	return 0;
10593 
10594 err:
10595 	if (thub->groups)
10596 		mlx5_hlist_destroy(thub->groups);
10597 	if (thub)
10598 		mlx5_free(thub);
10599 	return err;
10600 }
10601 
10602 static inline int
10603 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
10604 			  struct rte_flow_tunnel *tunnel,
10605 			  struct rte_flow_error *error)
10606 {
10607 	struct mlx5_priv *priv = dev->data->dev_private;
10608 
10609 	if (!priv->sh->config.dv_flow_en)
10610 		return rte_flow_error_set(error, ENOTSUP,
10611 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10612 					  "flow DV interface is off");
10613 	if (!is_tunnel_offload_active(dev))
10614 		return rte_flow_error_set(error, ENOTSUP,
10615 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10616 					  "tunnel offload was not activated");
10617 	if (!tunnel)
10618 		return rte_flow_error_set(error, EINVAL,
10619 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10620 					  "no application tunnel");
10621 	switch (tunnel->type) {
10622 	default:
10623 		return rte_flow_error_set(error, EINVAL,
10624 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10625 					  "unsupported tunnel type");
10626 	case RTE_FLOW_ITEM_TYPE_VXLAN:
10627 	case RTE_FLOW_ITEM_TYPE_GRE:
10628 	case RTE_FLOW_ITEM_TYPE_NVGRE:
10629 	case RTE_FLOW_ITEM_TYPE_GENEVE:
10630 		break;
10631 	}
10632 	return 0;
10633 }
10634 
10635 static int
10636 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
10637 		    struct rte_flow_tunnel *app_tunnel,
10638 		    struct rte_flow_action **actions,
10639 		    uint32_t *num_of_actions,
10640 		    struct rte_flow_error *error)
10641 {
10642 	struct mlx5_flow_tunnel *tunnel;
10643 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10644 
10645 	if (ret)
10646 		return ret;
10647 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10648 	if (ret < 0) {
10649 		return rte_flow_error_set(error, ret,
10650 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10651 					  "failed to initialize pmd tunnel");
10652 	}
10653 	*actions = &tunnel->action;
10654 	*num_of_actions = 1;
10655 	return 0;
10656 }
10657 
10658 static int
10659 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
10660 		       struct rte_flow_tunnel *app_tunnel,
10661 		       struct rte_flow_item **items,
10662 		       uint32_t *num_of_items,
10663 		       struct rte_flow_error *error)
10664 {
10665 	struct mlx5_flow_tunnel *tunnel;
10666 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10667 
10668 	if (ret)
10669 		return ret;
10670 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10671 	if (ret < 0) {
10672 		return rte_flow_error_set(error, ret,
10673 					  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10674 					  "failed to initialize pmd tunnel");
10675 	}
10676 	*items = &tunnel->item;
10677 	*num_of_items = 1;
10678 	return 0;
10679 }
10680 
10681 struct tunnel_db_element_release_ctx {
10682 	struct rte_flow_item *items;
10683 	struct rte_flow_action *actions;
10684 	uint32_t num_elements;
10685 	struct rte_flow_error *error;
10686 	int ret;
10687 };
10688 
10689 static bool
10690 tunnel_element_release_match(struct rte_eth_dev *dev,
10691 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10692 {
10693 	const struct tunnel_db_element_release_ctx *ctx = x;
10694 
10695 	RTE_SET_USED(dev);
10696 	if (ctx->num_elements != 1)
10697 		return false;
10698 	else if (ctx->items)
10699 		return ctx->items == &tunnel->item;
10700 	else if (ctx->actions)
10701 		return ctx->actions == &tunnel->action;
10702 
10703 	return false;
10704 }
10705 
10706 static void
10707 tunnel_element_release_hit(struct rte_eth_dev *dev,
10708 			   struct mlx5_flow_tunnel *tunnel, void *x)
10709 {
10710 	struct tunnel_db_element_release_ctx *ctx = x;
10711 	ctx->ret = 0;
10712 	if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
10713 		mlx5_flow_tunnel_free(dev, tunnel);
10714 }
10715 
10716 static void
10717 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
10718 {
10719 	struct tunnel_db_element_release_ctx *ctx = x;
10720 	RTE_SET_USED(dev);
10721 	ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
10722 				      RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10723 				      "invalid argument");
10724 }
10725 
10726 static int
10727 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
10728 		       struct rte_flow_item *pmd_items,
10729 		       uint32_t num_items, struct rte_flow_error *err)
10730 {
10731 	struct tunnel_db_element_release_ctx ctx = {
10732 		.items = pmd_items,
10733 		.actions = NULL,
10734 		.num_elements = num_items,
10735 		.error = err,
10736 	};
10737 
10738 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10739 				      tunnel_element_release_hit,
10740 				      tunnel_element_release_miss, &ctx, false);
10741 
10742 	return ctx.ret;
10743 }
10744 
10745 static int
10746 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
10747 			 struct rte_flow_action *pmd_actions,
10748 			 uint32_t num_actions, struct rte_flow_error *err)
10749 {
10750 	struct tunnel_db_element_release_ctx ctx = {
10751 		.items = NULL,
10752 		.actions = pmd_actions,
10753 		.num_elements = num_actions,
10754 		.error = err,
10755 	};
10756 
10757 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10758 				      tunnel_element_release_hit,
10759 				      tunnel_element_release_miss, &ctx, false);
10760 
10761 	return ctx.ret;
10762 }
10763 
10764 static int
10765 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
10766 				  struct rte_mbuf *m,
10767 				  struct rte_flow_restore_info *info,
10768 				  struct rte_flow_error *err)
10769 {
10770 	uint64_t ol_flags = m->ol_flags;
10771 	const struct mlx5_flow_tbl_data_entry *tble;
10772 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
10773 
10774 	if (!is_tunnel_offload_active(dev)) {
10775 		info->flags = 0;
10776 		return 0;
10777 	}
10778 
10779 	if ((ol_flags & mask) != mask)
10780 		goto err;
10781 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
10782 	if (!tble) {
10783 		DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
10784 			dev->data->port_id, m->hash.fdir.hi);
10785 		goto err;
10786 	}
10787 	MLX5_ASSERT(tble->tunnel);
10788 	memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
10789 	info->group_id = tble->group_id;
10790 	info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
10791 		      RTE_FLOW_RESTORE_INFO_GROUP_ID |
10792 		      RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
10793 
10794 	return 0;
10795 
10796 err:
10797 	return rte_flow_error_set(err, EINVAL,
10798 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10799 				  "failed to get restore info");
10800 }
10801 
10802 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
10803 static int
10804 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
10805 			   __rte_unused struct rte_flow_tunnel *app_tunnel,
10806 			   __rte_unused struct rte_flow_action **actions,
10807 			   __rte_unused uint32_t *num_of_actions,
10808 			   __rte_unused struct rte_flow_error *error)
10809 {
10810 	return -ENOTSUP;
10811 }
10812 
10813 static int
10814 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
10815 		       __rte_unused struct rte_flow_tunnel *app_tunnel,
10816 		       __rte_unused struct rte_flow_item **items,
10817 		       __rte_unused uint32_t *num_of_items,
10818 		       __rte_unused struct rte_flow_error *error)
10819 {
10820 	return -ENOTSUP;
10821 }
10822 
10823 static int
10824 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
10825 			      __rte_unused struct rte_flow_item *pmd_items,
10826 			      __rte_unused uint32_t num_items,
10827 			      __rte_unused struct rte_flow_error *err)
10828 {
10829 	return -ENOTSUP;
10830 }
10831 
10832 static int
10833 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
10834 				__rte_unused struct rte_flow_action *pmd_action,
10835 				__rte_unused uint32_t num_actions,
10836 				__rte_unused struct rte_flow_error *err)
10837 {
10838 	return -ENOTSUP;
10839 }
10840 
10841 static int
10842 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
10843 				  __rte_unused struct rte_mbuf *m,
10844 				  __rte_unused struct rte_flow_restore_info *i,
10845 				  __rte_unused struct rte_flow_error *err)
10846 {
10847 	return -ENOTSUP;
10848 }
10849 
10850 static int
10851 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
10852 			     __rte_unused struct rte_flow *flow,
10853 			     __rte_unused const struct rte_flow_attr *attr,
10854 			     __rte_unused const struct rte_flow_action *actions,
10855 			     __rte_unused uint32_t flow_idx,
10856 			     __rte_unused const struct mlx5_flow_tunnel *tunnel,
10857 			     __rte_unused struct tunnel_default_miss_ctx *ctx,
10858 			     __rte_unused struct rte_flow_error *error)
10859 {
10860 	return -ENOTSUP;
10861 }
10862 
10863 static struct mlx5_flow_tunnel *
10864 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
10865 		    __rte_unused uint32_t id)
10866 {
10867 	return NULL;
10868 }
10869 
10870 static void
10871 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
10872 		      __rte_unused struct mlx5_flow_tunnel *tunnel)
10873 {
10874 }
10875 
10876 static uint32_t
10877 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
10878 				__rte_unused const struct mlx5_flow_tunnel *t,
10879 				__rte_unused uint32_t group,
10880 				__rte_unused uint32_t *table,
10881 				struct rte_flow_error *error)
10882 {
10883 	return rte_flow_error_set(error, ENOTSUP,
10884 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10885 				  "tunnel offload requires DV support");
10886 }
10887 
10888 void
10889 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
10890 			__rte_unused  uint16_t port_id)
10891 {
10892 }
10893 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
10894 
10895 /* Flex flow item API */
10896 static struct rte_flow_item_flex_handle *
10897 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
10898 			   const struct rte_flow_item_flex_conf *conf,
10899 			   struct rte_flow_error *error)
10900 {
10901 	static const char err_msg[] = "flex item creation unsupported";
10902 	struct mlx5_priv *priv = dev->data->dev_private;
10903 	struct rte_flow_attr attr = { .transfer = 0 };
10904 	const struct mlx5_flow_driver_ops *fops =
10905 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10906 
10907 	if (!priv->pci_dev) {
10908 		rte_flow_error_set(error, ENOTSUP,
10909 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10910 				   "create flex item on PF only");
10911 		return NULL;
10912 	}
10913 	switch (priv->pci_dev->id.device_id) {
10914 	case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF:
10915 	case PCI_DEVICE_ID_MELLANOX_CONNECTX7BF:
10916 		break;
10917 	default:
10918 		rte_flow_error_set(error, ENOTSUP,
10919 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10920 				   "flex item available on BlueField ports only");
10921 		return NULL;
10922 	}
10923 	if (!fops->item_create) {
10924 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10925 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10926 				   NULL, err_msg);
10927 		return NULL;
10928 	}
10929 	return fops->item_create(dev, conf, error);
10930 }
10931 
10932 static int
10933 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
10934 			    const struct rte_flow_item_flex_handle *handle,
10935 			    struct rte_flow_error *error)
10936 {
10937 	static const char err_msg[] = "flex item release unsupported";
10938 	struct rte_flow_attr attr = { .transfer = 0 };
10939 	const struct mlx5_flow_driver_ops *fops =
10940 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10941 
10942 	if (!fops->item_release) {
10943 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10944 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10945 				   NULL, err_msg);
10946 		return -rte_errno;
10947 	}
10948 	return fops->item_release(dev, handle, error);
10949 }
10950 
10951 static void
10952 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
10953 {
10954 	int ret;
10955 	struct rte_flow_error error;
10956 
10957 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10958 		char *item_name;
10959 		ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
10960 				    sizeof(item_name),
10961 				    (void *)(uintptr_t)item->type, &error);
10962 		if (ret > 0)
10963 			printf("%s ", item_name);
10964 		else
10965 			printf("%d\n", (int)item->type);
10966 	}
10967 	printf("END\n");
10968 }
10969 
10970 static int
10971 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
10972 {
10973 	const struct rte_flow_item_udp *spec = udp_item->spec;
10974 	const struct rte_flow_item_udp *mask = udp_item->mask;
10975 	uint16_t udp_dport = 0;
10976 
10977 	if (spec != NULL) {
10978 		if (!mask)
10979 			mask = &rte_flow_item_udp_mask;
10980 		udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
10981 				mask->hdr.dst_port);
10982 	}
10983 	return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
10984 }
10985 
10986 static const struct mlx5_flow_expand_node *
10987 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
10988 		unsigned int item_idx,
10989 		const struct mlx5_flow_expand_node graph[],
10990 		const struct mlx5_flow_expand_node *node)
10991 {
10992 	const struct rte_flow_item *item = pattern + item_idx, *prev_item;
10993 
10994 	if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
10995 			node != NULL &&
10996 			node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
10997 		/*
10998 		 * The expansion node is VXLAN and it is also the last
10999 		 * expandable item in the pattern, so need to continue
11000 		 * expansion of the inner tunnel.
11001 		 */
11002 		MLX5_ASSERT(item_idx > 0);
11003 		prev_item = pattern + item_idx - 1;
11004 		MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
11005 		if (mlx5_flow_is_std_vxlan_port(prev_item))
11006 			return &graph[MLX5_EXPANSION_STD_VXLAN];
11007 		return &graph[MLX5_EXPANSION_L3_VXLAN];
11008 	}
11009 	return node;
11010 }
11011 
11012 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
11013 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
11014 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
11015 };
11016 
11017 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
11018 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
11019 	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
11020 	{ 9, 10, 11 }, { 12, 13, 14 },
11021 };
11022 
11023 /**
11024  * Discover the number of available flow priorities.
11025  *
11026  * @param dev
11027  *   Ethernet device.
11028  *
11029  * @return
11030  *   On success, number of available flow priorities.
11031  *   On failure, a negative errno-style code and rte_errno is set.
11032  */
11033 int
11034 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
11035 {
11036 	static const uint16_t vprio[] = {8, 16};
11037 	const struct mlx5_priv *priv = dev->data->dev_private;
11038 	const struct mlx5_flow_driver_ops *fops;
11039 	enum mlx5_flow_drv_type type;
11040 	int ret;
11041 
11042 	type = mlx5_flow_os_get_type();
11043 	if (type == MLX5_FLOW_TYPE_MAX) {
11044 		type = MLX5_FLOW_TYPE_VERBS;
11045 		if (priv->sh->cdev->config.devx && priv->sh->config.dv_flow_en)
11046 			type = MLX5_FLOW_TYPE_DV;
11047 	}
11048 	fops = flow_get_drv_ops(type);
11049 	if (fops->discover_priorities == NULL) {
11050 		DRV_LOG(ERR, "Priority discovery not supported");
11051 		rte_errno = ENOTSUP;
11052 		return -rte_errno;
11053 	}
11054 	ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
11055 	if (ret < 0)
11056 		return ret;
11057 	switch (ret) {
11058 	case 8:
11059 		ret = RTE_DIM(priority_map_3);
11060 		break;
11061 	case 16:
11062 		ret = RTE_DIM(priority_map_5);
11063 		break;
11064 	default:
11065 		rte_errno = ENOTSUP;
11066 		DRV_LOG(ERR,
11067 			"port %u maximum priority: %d expected 8/16",
11068 			dev->data->port_id, ret);
11069 		return -rte_errno;
11070 	}
11071 	DRV_LOG(INFO, "port %u supported flow priorities:"
11072 		" 0-%d for ingress or egress root table,"
11073 		" 0-%d for non-root table or transfer root table.",
11074 		dev->data->port_id, ret - 2,
11075 		MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
11076 	return ret;
11077 }
11078 
11079 /**
11080  * Adjust flow priority based on the highest layer and the request priority.
11081  *
11082  * @param[in] dev
11083  *   Pointer to the Ethernet device structure.
11084  * @param[in] priority
11085  *   The rule base priority.
11086  * @param[in] subpriority
11087  *   The priority based on the items.
11088  *
11089  * @return
11090  *   The new priority.
11091  */
11092 uint32_t
11093 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
11094 			  uint32_t subpriority)
11095 {
11096 	uint32_t res = 0;
11097 	struct mlx5_priv *priv = dev->data->dev_private;
11098 
11099 	switch (priv->sh->flow_max_priority) {
11100 	case RTE_DIM(priority_map_3):
11101 		res = priority_map_3[priority][subpriority];
11102 		break;
11103 	case RTE_DIM(priority_map_5):
11104 		res = priority_map_5[priority][subpriority];
11105 		break;
11106 	}
11107 	return  res;
11108 }
11109 
11110 /**
11111  * Get the priority for sending traffic to kernel table.
11112  *
11113  * @param[in] dev
11114  *   Pointer to the Ethernet device structure.
11115  *
11116  * @return
11117  *   On success: the value of priority for sending traffic to kernel table
11118  *   On failure: -1
11119  */
11120 uint32_t
11121 mlx5_get_send_to_kernel_priority(struct rte_eth_dev *dev)
11122 {
11123 	struct mlx5_priv *priv = dev->data->dev_private;
11124 	uint32_t res;
11125 
11126 	switch (priv->sh->flow_max_priority) {
11127 	case RTE_DIM(priority_map_5):
11128 		res = 15;
11129 		break;
11130 	case RTE_DIM(priority_map_3):
11131 		res = 7;
11132 		break;
11133 	default:
11134 		DRV_LOG(ERR,
11135 			"port %u maximum priority: %d expected 8/16",
11136 			dev->data->port_id, priv->sh->flow_max_priority);
11137 		res = (uint32_t)-1;
11138 	}
11139 	return res;
11140 }
11141 
11142 /**
11143  * Get the E-Switch Manager vport id.
11144  *
11145  * @param[in] dev
11146  *   Pointer to the Ethernet device structure.
11147  *
11148  * @return
11149  *   The vport id.
11150  */
11151 int16_t mlx5_flow_get_esw_manager_vport_id(struct rte_eth_dev *dev)
11152 {
11153 	struct mlx5_priv *priv = dev->data->dev_private;
11154 	struct mlx5_common_device *cdev = priv->sh->cdev;
11155 
11156 	/* New FW exposes E-Switch Manager vport ID, can use it directly. */
11157 	if (cdev->config.hca_attr.esw_mgr_vport_id_valid)
11158 		return (int16_t)cdev->config.hca_attr.esw_mgr_vport_id;
11159 
11160 	if (priv->pci_dev == NULL)
11161 		return 0;
11162 	switch (priv->pci_dev->id.device_id) {
11163 	case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF:
11164 	case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF:
11165 	case PCI_DEVICE_ID_MELLANOX_CONNECTX7BF:
11166 	/*
11167 	 * In old FW which doesn't expose the E-Switch Manager vport ID in the capability,
11168 	 * only the BF embedded CPUs control the E-Switch Manager port. Hence,
11169 	 * ECPF vport ID is selected and not the host port (0) in any BF case.
11170 	 */
11171 		return (int16_t)MLX5_ECPF_VPORT_ID;
11172 	default:
11173 		return MLX5_PF_VPORT_ID;
11174 	}
11175 }
11176 
11177 /**
11178  * Parse item to get the vport id.
11179  *
11180  * @param[in] dev
11181  *   Pointer to the Ethernet device structure.
11182  * @param[in] item
11183  *   The src port id match item.
11184  * @param[out] vport_id
11185  *   Pointer to put the vport id.
11186  * @param[out] all_ports
11187  *   Indicate if the item matches all ports.
11188  * @param[out] error
11189  *   Pointer to error structure.
11190  *
11191  * @return
11192  *   0 on success, a negative errno value otherwise and rte_errno is set.
11193  */
11194 int mlx5_flow_get_item_vport_id(struct rte_eth_dev *dev,
11195 				const struct rte_flow_item *item,
11196 				uint16_t *vport_id,
11197 				bool *all_ports,
11198 				struct rte_flow_error *error)
11199 {
11200 	struct mlx5_priv *port_priv;
11201 	const struct rte_flow_item_port_id *pid_v;
11202 	uint32_t esw_mgr_port;
11203 
11204 	if (item->type != RTE_FLOW_ITEM_TYPE_PORT_ID &&
11205 	    item->type != RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT)
11206 		return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
11207 					  NULL, "Incorrect item type.");
11208 	pid_v = item->spec;
11209 	if (!pid_v) {
11210 		if (all_ports)
11211 			*all_ports = (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT);
11212 		return 0;
11213 	}
11214 	if (all_ports)
11215 		*all_ports = false;
11216 	esw_mgr_port = (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) ?
11217 				MLX5_REPRESENTED_PORT_ESW_MGR : MLX5_PORT_ESW_MGR;
11218 	if (pid_v->id == esw_mgr_port) {
11219 		*vport_id = mlx5_flow_get_esw_manager_vport_id(dev);
11220 	} else {
11221 		port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
11222 		if (!port_priv)
11223 			return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
11224 						  NULL, "Failed to get port info.");
11225 		*vport_id = port_priv->representor_id;
11226 	}
11227 
11228 	return 0;
11229 }
11230 
11231 int
11232 mlx5_flow_pick_transfer_proxy(struct rte_eth_dev *dev,
11233 			      uint16_t *proxy_port_id,
11234 			      struct rte_flow_error *error)
11235 {
11236 	const struct mlx5_priv *priv = dev->data->dev_private;
11237 	uint16_t port_id;
11238 
11239 	if (!priv->sh->config.dv_esw_en)
11240 		return rte_flow_error_set(error, EINVAL,
11241 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
11242 					  NULL,
11243 					  "unable to provide a proxy port"
11244 					  " without E-Switch configured");
11245 	if (!priv->master && !priv->representor)
11246 		return rte_flow_error_set(error, EINVAL,
11247 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
11248 					  NULL,
11249 					  "unable to provide a proxy port"
11250 					  " for port which is not a master"
11251 					  " or a representor port");
11252 	if (priv->master) {
11253 		*proxy_port_id = dev->data->port_id;
11254 		return 0;
11255 	}
11256 	MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
11257 		const struct rte_eth_dev *port_dev = &rte_eth_devices[port_id];
11258 		const struct mlx5_priv *port_priv = port_dev->data->dev_private;
11259 
11260 		if (port_priv->master &&
11261 		    port_priv->domain_id == priv->domain_id) {
11262 			*proxy_port_id = port_id;
11263 			return 0;
11264 		}
11265 	}
11266 	return rte_flow_error_set(error, EINVAL,
11267 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
11268 				  NULL, "unable to find a proxy port");
11269 }
11270