xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 5bd0e3e671c4caec6f70426f164ea383fbf2b7d8)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11 
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21 
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26 
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35 
36 /*
37  * Shared array for quick translation between port_id and vport mask/values
38  * used for HWS rules.
39  */
40 struct flow_hw_port_info mlx5_flow_hw_port_infos[RTE_MAX_ETHPORTS];
41 
42 struct tunnel_default_miss_ctx {
43 	uint16_t *queue;
44 	__extension__
45 	union {
46 		struct rte_flow_action_rss action_rss;
47 		struct rte_flow_action_queue miss_queue;
48 		struct rte_flow_action_jump miss_jump;
49 		uint8_t raw[0];
50 	};
51 };
52 
53 static int
54 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
55 			     struct rte_flow *flow,
56 			     const struct rte_flow_attr *attr,
57 			     const struct rte_flow_action *app_actions,
58 			     uint32_t flow_idx,
59 			     const struct mlx5_flow_tunnel *tunnel,
60 			     struct tunnel_default_miss_ctx *ctx,
61 			     struct rte_flow_error *error);
62 static struct mlx5_flow_tunnel *
63 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
64 static void
65 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
66 static uint32_t
67 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
68 				const struct mlx5_flow_tunnel *tunnel,
69 				uint32_t group, uint32_t *table,
70 				struct rte_flow_error *error);
71 
72 /** Device flow drivers. */
73 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
74 
75 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
76 
77 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
78 	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
79 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
80 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
81 	[MLX5_FLOW_TYPE_HW] = &mlx5_flow_hw_drv_ops,
82 #endif
83 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
84 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
85 };
86 
87 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
88 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
89 	(const int []){ \
90 		__VA_ARGS__, 0, \
91 	}
92 
93 /** Node object of input graph for mlx5_flow_expand_rss(). */
94 struct mlx5_flow_expand_node {
95 	const int *const next;
96 	/**<
97 	 * List of next node indexes. Index 0 is interpreted as a terminator.
98 	 */
99 	const enum rte_flow_item_type type;
100 	/**< Pattern item type of current node. */
101 	uint64_t rss_types;
102 	/**<
103 	 * RSS types bit-field associated with this node
104 	 * (see RTE_ETH_RSS_* definitions).
105 	 */
106 	uint64_t node_flags;
107 	/**<
108 	 *  Bit-fields that define how the node is used in the expansion.
109 	 * (see MLX5_EXPANSION_NODE_* definitions).
110 	 */
111 };
112 
113 /* Optional expand field. The expansion alg will not go deeper. */
114 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
115 
116 /* The node is not added implicitly as expansion to the flow pattern.
117  * If the node type does not match the flow pattern item type, the
118  * expansion alg will go deeper to its next items.
119  * In the current implementation, the list of next nodes indexes can
120  * have up to one node with this flag set and it has to be the last
121  * node index (before the list terminator).
122  */
123 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
124 
125 /** Object returned by mlx5_flow_expand_rss(). */
126 struct mlx5_flow_expand_rss {
127 	uint32_t entries;
128 	/**< Number of entries @p patterns and @p priorities. */
129 	struct {
130 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
131 		uint32_t priority; /**< Priority offset for each expansion. */
132 	} entry[];
133 };
134 
135 static void
136 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
137 
138 static const struct mlx5_flow_expand_node *
139 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
140 		unsigned int item_idx,
141 		const struct mlx5_flow_expand_node graph[],
142 		const struct mlx5_flow_expand_node *node);
143 
144 static bool
145 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
146 {
147 	switch (item->type) {
148 	case RTE_FLOW_ITEM_TYPE_ETH:
149 	case RTE_FLOW_ITEM_TYPE_VLAN:
150 	case RTE_FLOW_ITEM_TYPE_IPV4:
151 	case RTE_FLOW_ITEM_TYPE_IPV6:
152 	case RTE_FLOW_ITEM_TYPE_UDP:
153 	case RTE_FLOW_ITEM_TYPE_TCP:
154 	case RTE_FLOW_ITEM_TYPE_ESP:
155 	case RTE_FLOW_ITEM_TYPE_ICMP:
156 	case RTE_FLOW_ITEM_TYPE_ICMP6:
157 	case RTE_FLOW_ITEM_TYPE_VXLAN:
158 	case RTE_FLOW_ITEM_TYPE_NVGRE:
159 	case RTE_FLOW_ITEM_TYPE_GRE:
160 	case RTE_FLOW_ITEM_TYPE_GENEVE:
161 	case RTE_FLOW_ITEM_TYPE_MPLS:
162 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
163 	case RTE_FLOW_ITEM_TYPE_GRE_KEY:
164 	case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
165 	case RTE_FLOW_ITEM_TYPE_GTP:
166 		return true;
167 	default:
168 		break;
169 	}
170 	return false;
171 }
172 
173 /**
174  * Network Service Header (NSH) and its next protocol values
175  * are described in RFC-8393.
176  */
177 static enum rte_flow_item_type
178 mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
179 {
180 	enum rte_flow_item_type type;
181 
182 	switch (proto_mask & proto_spec) {
183 	case 0:
184 		type = RTE_FLOW_ITEM_TYPE_VOID;
185 		break;
186 	case RTE_VXLAN_GPE_TYPE_IPV4:
187 		type = RTE_FLOW_ITEM_TYPE_IPV4;
188 		break;
189 	case RTE_VXLAN_GPE_TYPE_IPV6:
190 		type = RTE_VXLAN_GPE_TYPE_IPV6;
191 		break;
192 	case RTE_VXLAN_GPE_TYPE_ETH:
193 		type = RTE_FLOW_ITEM_TYPE_ETH;
194 		break;
195 	default:
196 		type = RTE_FLOW_ITEM_TYPE_END;
197 	}
198 	return type;
199 }
200 
201 static enum rte_flow_item_type
202 mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
203 {
204 	enum rte_flow_item_type type;
205 
206 	switch (proto_mask & proto_spec) {
207 	case 0:
208 		type = RTE_FLOW_ITEM_TYPE_VOID;
209 		break;
210 	case IPPROTO_UDP:
211 		type = RTE_FLOW_ITEM_TYPE_UDP;
212 		break;
213 	case IPPROTO_TCP:
214 		type = RTE_FLOW_ITEM_TYPE_TCP;
215 		break;
216 	case IPPROTO_IPIP:
217 		type = RTE_FLOW_ITEM_TYPE_IPV4;
218 		break;
219 	case IPPROTO_IPV6:
220 		type = RTE_FLOW_ITEM_TYPE_IPV6;
221 		break;
222 	case IPPROTO_ESP:
223 		type = RTE_FLOW_ITEM_TYPE_ESP;
224 		break;
225 	default:
226 		type = RTE_FLOW_ITEM_TYPE_END;
227 	}
228 	return type;
229 }
230 
231 static enum rte_flow_item_type
232 mlx5_ethertype_to_item_type(rte_be16_t type_spec,
233 			    rte_be16_t type_mask, bool is_tunnel)
234 {
235 	enum rte_flow_item_type type;
236 
237 	switch (rte_be_to_cpu_16(type_spec & type_mask)) {
238 	case 0:
239 		type = RTE_FLOW_ITEM_TYPE_VOID;
240 		break;
241 	case RTE_ETHER_TYPE_TEB:
242 		type = is_tunnel ?
243 		       RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END;
244 		break;
245 	case RTE_ETHER_TYPE_VLAN:
246 		type = !is_tunnel ?
247 		       RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END;
248 		break;
249 	case RTE_ETHER_TYPE_IPV4:
250 		type = RTE_FLOW_ITEM_TYPE_IPV4;
251 		break;
252 	case RTE_ETHER_TYPE_IPV6:
253 		type = RTE_FLOW_ITEM_TYPE_IPV6;
254 		break;
255 	default:
256 		type = RTE_FLOW_ITEM_TYPE_END;
257 	}
258 	return type;
259 }
260 
261 static enum rte_flow_item_type
262 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
263 {
264 #define MLX5_XSET_ITEM_MASK_SPEC(type, fld)                              \
265 	do {                                                             \
266 		const void *m = item->mask;                              \
267 		const void *s = item->spec;                              \
268 		mask = m ?                                               \
269 			((const struct rte_flow_item_##type *)m)->fld :  \
270 			rte_flow_item_##type##_mask.fld;                 \
271 		spec = ((const struct rte_flow_item_##type *)s)->fld;    \
272 	} while (0)
273 
274 	enum rte_flow_item_type ret;
275 	uint16_t spec, mask;
276 
277 	if (item == NULL || item->spec == NULL)
278 		return RTE_FLOW_ITEM_TYPE_VOID;
279 	switch (item->type) {
280 	case RTE_FLOW_ITEM_TYPE_ETH:
281 		MLX5_XSET_ITEM_MASK_SPEC(eth, type);
282 		if (!mask)
283 			return RTE_FLOW_ITEM_TYPE_VOID;
284 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
285 		break;
286 	case RTE_FLOW_ITEM_TYPE_VLAN:
287 		MLX5_XSET_ITEM_MASK_SPEC(vlan, inner_type);
288 		if (!mask)
289 			return RTE_FLOW_ITEM_TYPE_VOID;
290 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
291 		break;
292 	case RTE_FLOW_ITEM_TYPE_IPV4:
293 		MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id);
294 		if (!mask)
295 			return RTE_FLOW_ITEM_TYPE_VOID;
296 		ret = mlx5_inet_proto_to_item_type(spec, mask);
297 		break;
298 	case RTE_FLOW_ITEM_TYPE_IPV6:
299 		MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto);
300 		if (!mask)
301 			return RTE_FLOW_ITEM_TYPE_VOID;
302 		ret = mlx5_inet_proto_to_item_type(spec, mask);
303 		break;
304 	case RTE_FLOW_ITEM_TYPE_GENEVE:
305 		MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol);
306 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
307 		break;
308 	case RTE_FLOW_ITEM_TYPE_GRE:
309 		MLX5_XSET_ITEM_MASK_SPEC(gre, protocol);
310 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
311 		break;
312 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
313 		MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, protocol);
314 		ret = mlx5_nsh_proto_to_item_type(spec, mask);
315 		break;
316 	default:
317 		ret = RTE_FLOW_ITEM_TYPE_VOID;
318 		break;
319 	}
320 	return ret;
321 #undef MLX5_XSET_ITEM_MASK_SPEC
322 }
323 
324 static const int *
325 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
326 		const int *next_node)
327 {
328 	const struct mlx5_flow_expand_node *node = NULL;
329 	const int *next = next_node;
330 
331 	while (next && *next) {
332 		/*
333 		 * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
334 		 * flag set, because they were not found in the flow pattern.
335 		 */
336 		node = &graph[*next];
337 		if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
338 			break;
339 		next = node->next;
340 	}
341 	return next;
342 }
343 
344 #define MLX5_RSS_EXP_ELT_N 16
345 
346 /**
347  * Expand RSS flows into several possible flows according to the RSS hash
348  * fields requested and the driver capabilities.
349  *
350  * @param[out] buf
351  *   Buffer to store the result expansion.
352  * @param[in] size
353  *   Buffer size in bytes. If 0, @p buf can be NULL.
354  * @param[in] pattern
355  *   User flow pattern.
356  * @param[in] types
357  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
358  * @param[in] graph
359  *   Input graph to expand @p pattern according to @p types.
360  * @param[in] graph_root_index
361  *   Index of root node in @p graph, typically 0.
362  *
363  * @return
364  *   A positive value representing the size of @p buf in bytes regardless of
365  *   @p size on success, a negative errno value otherwise and rte_errno is
366  *   set, the following errors are defined:
367  *
368  *   -E2BIG: graph-depth @p graph is too deep.
369  *   -EINVAL: @p size has not enough space for expanded pattern.
370  */
371 static int
372 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
373 		     const struct rte_flow_item *pattern, uint64_t types,
374 		     const struct mlx5_flow_expand_node graph[],
375 		     int graph_root_index)
376 {
377 	const struct rte_flow_item *item;
378 	const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
379 	const int *next_node;
380 	const int *stack[MLX5_RSS_EXP_ELT_N];
381 	int stack_pos = 0;
382 	struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
383 	unsigned int i, item_idx, last_expand_item_idx = 0;
384 	size_t lsize;
385 	size_t user_pattern_size = 0;
386 	void *addr = NULL;
387 	const struct mlx5_flow_expand_node *next = NULL;
388 	struct rte_flow_item missed_item;
389 	int missed = 0;
390 	int elt = 0;
391 	const struct rte_flow_item *last_expand_item = NULL;
392 
393 	memset(&missed_item, 0, sizeof(missed_item));
394 	lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
395 		MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
396 	if (lsize > size)
397 		return -EINVAL;
398 	buf->entry[0].priority = 0;
399 	buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
400 	buf->entries = 0;
401 	addr = buf->entry[0].pattern;
402 	for (item = pattern, item_idx = 0;
403 			item->type != RTE_FLOW_ITEM_TYPE_END;
404 			item++, item_idx++) {
405 		if (!mlx5_flow_is_rss_expandable_item(item)) {
406 			user_pattern_size += sizeof(*item);
407 			continue;
408 		}
409 		last_expand_item = item;
410 		last_expand_item_idx = item_idx;
411 		i = 0;
412 		while (node->next && node->next[i]) {
413 			next = &graph[node->next[i]];
414 			if (next->type == item->type)
415 				break;
416 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
417 				node = next;
418 				i = 0;
419 			} else {
420 				++i;
421 			}
422 		}
423 		if (next)
424 			node = next;
425 		user_pattern_size += sizeof(*item);
426 	}
427 	user_pattern_size += sizeof(*item); /* Handle END item. */
428 	lsize += user_pattern_size;
429 	if (lsize > size)
430 		return -EINVAL;
431 	/* Copy the user pattern in the first entry of the buffer. */
432 	rte_memcpy(addr, pattern, user_pattern_size);
433 	addr = (void *)(((uintptr_t)addr) + user_pattern_size);
434 	buf->entries = 1;
435 	/* Start expanding. */
436 	memset(flow_items, 0, sizeof(flow_items));
437 	user_pattern_size -= sizeof(*item);
438 	/*
439 	 * Check if the last valid item has spec set, need complete pattern,
440 	 * and the pattern can be used for expansion.
441 	 */
442 	missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
443 	if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
444 		/* Item type END indicates expansion is not required. */
445 		return lsize;
446 	}
447 	if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
448 		next = NULL;
449 		missed = 1;
450 		i = 0;
451 		while (node->next && node->next[i]) {
452 			next = &graph[node->next[i]];
453 			if (next->type == missed_item.type) {
454 				flow_items[0].type = missed_item.type;
455 				flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
456 				break;
457 			}
458 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
459 				node = next;
460 				i = 0;
461 			} else {
462 				++i;
463 			}
464 			next = NULL;
465 		}
466 	}
467 	if (next && missed) {
468 		elt = 2; /* missed item + item end. */
469 		node = next;
470 		lsize += elt * sizeof(*item) + user_pattern_size;
471 		if (lsize > size)
472 			return -EINVAL;
473 		if (node->rss_types & types) {
474 			buf->entry[buf->entries].priority = 1;
475 			buf->entry[buf->entries].pattern = addr;
476 			buf->entries++;
477 			rte_memcpy(addr, buf->entry[0].pattern,
478 				   user_pattern_size);
479 			addr = (void *)(((uintptr_t)addr) + user_pattern_size);
480 			rte_memcpy(addr, flow_items, elt * sizeof(*item));
481 			addr = (void *)(((uintptr_t)addr) +
482 					elt * sizeof(*item));
483 		}
484 	} else if (last_expand_item != NULL) {
485 		node = mlx5_flow_expand_rss_adjust_node(pattern,
486 				last_expand_item_idx, graph, node);
487 	}
488 	memset(flow_items, 0, sizeof(flow_items));
489 	next_node = mlx5_flow_expand_rss_skip_explicit(graph,
490 			node->next);
491 	stack[stack_pos] = next_node;
492 	node = next_node ? &graph[*next_node] : NULL;
493 	while (node) {
494 		flow_items[stack_pos].type = node->type;
495 		if (node->rss_types & types) {
496 			size_t n;
497 			/*
498 			 * compute the number of items to copy from the
499 			 * expansion and copy it.
500 			 * When the stack_pos is 0, there are 1 element in it,
501 			 * plus the addition END item.
502 			 */
503 			elt = stack_pos + 2;
504 			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
505 			lsize += elt * sizeof(*item) + user_pattern_size;
506 			if (lsize > size)
507 				return -EINVAL;
508 			n = elt * sizeof(*item);
509 			buf->entry[buf->entries].priority =
510 				stack_pos + 1 + missed;
511 			buf->entry[buf->entries].pattern = addr;
512 			buf->entries++;
513 			rte_memcpy(addr, buf->entry[0].pattern,
514 				   user_pattern_size);
515 			addr = (void *)(((uintptr_t)addr) +
516 					user_pattern_size);
517 			rte_memcpy(addr, &missed_item,
518 				   missed * sizeof(*item));
519 			addr = (void *)(((uintptr_t)addr) +
520 				missed * sizeof(*item));
521 			rte_memcpy(addr, flow_items, n);
522 			addr = (void *)(((uintptr_t)addr) + n);
523 		}
524 		/* Go deeper. */
525 		if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
526 				node->next) {
527 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
528 					node->next);
529 			if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
530 				rte_errno = E2BIG;
531 				return -rte_errno;
532 			}
533 			stack[stack_pos] = next_node;
534 		} else if (*(next_node + 1)) {
535 			/* Follow up with the next possibility. */
536 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
537 					++next_node);
538 		} else if (!stack_pos) {
539 			/*
540 			 * Completing the traverse over the different paths.
541 			 * The next_node is advanced to the terminator.
542 			 */
543 			++next_node;
544 		} else {
545 			/* Move to the next path. */
546 			while (stack_pos) {
547 				next_node = stack[--stack_pos];
548 				next_node++;
549 				if (*next_node)
550 					break;
551 			}
552 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
553 					next_node);
554 			stack[stack_pos] = next_node;
555 		}
556 		node = next_node && *next_node ? &graph[*next_node] : NULL;
557 	};
558 	return lsize;
559 }
560 
561 enum mlx5_expansion {
562 	MLX5_EXPANSION_ROOT,
563 	MLX5_EXPANSION_ROOT_OUTER,
564 	MLX5_EXPANSION_OUTER_ETH,
565 	MLX5_EXPANSION_OUTER_VLAN,
566 	MLX5_EXPANSION_OUTER_IPV4,
567 	MLX5_EXPANSION_OUTER_IPV4_UDP,
568 	MLX5_EXPANSION_OUTER_IPV4_TCP,
569 	MLX5_EXPANSION_OUTER_IPV4_ESP,
570 	MLX5_EXPANSION_OUTER_IPV4_ICMP,
571 	MLX5_EXPANSION_OUTER_IPV6,
572 	MLX5_EXPANSION_OUTER_IPV6_UDP,
573 	MLX5_EXPANSION_OUTER_IPV6_TCP,
574 	MLX5_EXPANSION_OUTER_IPV6_ESP,
575 	MLX5_EXPANSION_OUTER_IPV6_ICMP6,
576 	MLX5_EXPANSION_VXLAN,
577 	MLX5_EXPANSION_STD_VXLAN,
578 	MLX5_EXPANSION_L3_VXLAN,
579 	MLX5_EXPANSION_VXLAN_GPE,
580 	MLX5_EXPANSION_GRE,
581 	MLX5_EXPANSION_NVGRE,
582 	MLX5_EXPANSION_GRE_KEY,
583 	MLX5_EXPANSION_MPLS,
584 	MLX5_EXPANSION_ETH,
585 	MLX5_EXPANSION_VLAN,
586 	MLX5_EXPANSION_IPV4,
587 	MLX5_EXPANSION_IPV4_UDP,
588 	MLX5_EXPANSION_IPV4_TCP,
589 	MLX5_EXPANSION_IPV4_ESP,
590 	MLX5_EXPANSION_IPV4_ICMP,
591 	MLX5_EXPANSION_IPV6,
592 	MLX5_EXPANSION_IPV6_UDP,
593 	MLX5_EXPANSION_IPV6_TCP,
594 	MLX5_EXPANSION_IPV6_ESP,
595 	MLX5_EXPANSION_IPV6_ICMP6,
596 	MLX5_EXPANSION_IPV6_FRAG_EXT,
597 	MLX5_EXPANSION_GTP,
598 	MLX5_EXPANSION_GENEVE,
599 };
600 
601 /** Supported expansion of items. */
602 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
603 	[MLX5_EXPANSION_ROOT] = {
604 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
605 						  MLX5_EXPANSION_IPV4,
606 						  MLX5_EXPANSION_IPV6),
607 		.type = RTE_FLOW_ITEM_TYPE_END,
608 	},
609 	[MLX5_EXPANSION_ROOT_OUTER] = {
610 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
611 						  MLX5_EXPANSION_OUTER_IPV4,
612 						  MLX5_EXPANSION_OUTER_IPV6),
613 		.type = RTE_FLOW_ITEM_TYPE_END,
614 	},
615 	[MLX5_EXPANSION_OUTER_ETH] = {
616 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
617 		.type = RTE_FLOW_ITEM_TYPE_ETH,
618 		.rss_types = 0,
619 	},
620 	[MLX5_EXPANSION_OUTER_VLAN] = {
621 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
622 						  MLX5_EXPANSION_OUTER_IPV6),
623 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
624 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
625 	},
626 	[MLX5_EXPANSION_OUTER_IPV4] = {
627 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
628 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
629 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
630 			 MLX5_EXPANSION_OUTER_IPV4_ESP,
631 			 MLX5_EXPANSION_OUTER_IPV4_ICMP,
632 			 MLX5_EXPANSION_GRE,
633 			 MLX5_EXPANSION_NVGRE,
634 			 MLX5_EXPANSION_IPV4,
635 			 MLX5_EXPANSION_IPV6),
636 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
637 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
638 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
639 	},
640 	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
641 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
642 						  MLX5_EXPANSION_VXLAN_GPE,
643 						  MLX5_EXPANSION_MPLS,
644 						  MLX5_EXPANSION_GENEVE,
645 						  MLX5_EXPANSION_GTP),
646 		.type = RTE_FLOW_ITEM_TYPE_UDP,
647 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
648 	},
649 	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
650 		.type = RTE_FLOW_ITEM_TYPE_TCP,
651 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
652 	},
653 	[MLX5_EXPANSION_OUTER_IPV4_ESP] = {
654 		.type = RTE_FLOW_ITEM_TYPE_ESP,
655 		.rss_types = RTE_ETH_RSS_ESP,
656 	},
657 	[MLX5_EXPANSION_OUTER_IPV4_ICMP] = {
658 		.type = RTE_FLOW_ITEM_TYPE_ICMP,
659 	},
660 	[MLX5_EXPANSION_OUTER_IPV6] = {
661 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
662 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
663 			 MLX5_EXPANSION_OUTER_IPV6_TCP,
664 			 MLX5_EXPANSION_OUTER_IPV6_ESP,
665 			 MLX5_EXPANSION_OUTER_IPV6_ICMP6,
666 			 MLX5_EXPANSION_IPV4,
667 			 MLX5_EXPANSION_IPV6,
668 			 MLX5_EXPANSION_GRE,
669 			 MLX5_EXPANSION_NVGRE),
670 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
671 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
672 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
673 	},
674 	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
675 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
676 						  MLX5_EXPANSION_VXLAN_GPE,
677 						  MLX5_EXPANSION_MPLS,
678 						  MLX5_EXPANSION_GENEVE,
679 						  MLX5_EXPANSION_GTP),
680 		.type = RTE_FLOW_ITEM_TYPE_UDP,
681 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
682 	},
683 	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
684 		.type = RTE_FLOW_ITEM_TYPE_TCP,
685 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
686 	},
687 	[MLX5_EXPANSION_OUTER_IPV6_ESP] = {
688 		.type = RTE_FLOW_ITEM_TYPE_ESP,
689 		.rss_types = RTE_ETH_RSS_ESP,
690 	},
691 	[MLX5_EXPANSION_OUTER_IPV6_ICMP6] = {
692 		.type = RTE_FLOW_ITEM_TYPE_ICMP6,
693 	},
694 	[MLX5_EXPANSION_VXLAN] = {
695 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
696 						  MLX5_EXPANSION_IPV4,
697 						  MLX5_EXPANSION_IPV6),
698 		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
699 	},
700 	[MLX5_EXPANSION_STD_VXLAN] = {
701 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
702 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
703 	},
704 	[MLX5_EXPANSION_L3_VXLAN] = {
705 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
706 					MLX5_EXPANSION_IPV6),
707 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
708 	},
709 	[MLX5_EXPANSION_VXLAN_GPE] = {
710 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
711 						  MLX5_EXPANSION_IPV4,
712 						  MLX5_EXPANSION_IPV6),
713 		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
714 	},
715 	[MLX5_EXPANSION_GRE] = {
716 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
717 						  MLX5_EXPANSION_IPV4,
718 						  MLX5_EXPANSION_IPV6,
719 						  MLX5_EXPANSION_GRE_KEY,
720 						  MLX5_EXPANSION_MPLS),
721 		.type = RTE_FLOW_ITEM_TYPE_GRE,
722 	},
723 	[MLX5_EXPANSION_GRE_KEY] = {
724 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
725 						  MLX5_EXPANSION_IPV6,
726 						  MLX5_EXPANSION_MPLS),
727 		.type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
728 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
729 	},
730 	[MLX5_EXPANSION_NVGRE] = {
731 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
732 		.type = RTE_FLOW_ITEM_TYPE_NVGRE,
733 	},
734 	[MLX5_EXPANSION_MPLS] = {
735 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
736 						  MLX5_EXPANSION_IPV6,
737 						  MLX5_EXPANSION_ETH),
738 		.type = RTE_FLOW_ITEM_TYPE_MPLS,
739 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
740 	},
741 	[MLX5_EXPANSION_ETH] = {
742 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
743 		.type = RTE_FLOW_ITEM_TYPE_ETH,
744 	},
745 	[MLX5_EXPANSION_VLAN] = {
746 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
747 						  MLX5_EXPANSION_IPV6),
748 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
749 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
750 	},
751 	[MLX5_EXPANSION_IPV4] = {
752 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
753 						  MLX5_EXPANSION_IPV4_TCP,
754 						  MLX5_EXPANSION_IPV4_ESP,
755 						  MLX5_EXPANSION_IPV4_ICMP),
756 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
757 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
758 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
759 	},
760 	[MLX5_EXPANSION_IPV4_UDP] = {
761 		.type = RTE_FLOW_ITEM_TYPE_UDP,
762 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
763 	},
764 	[MLX5_EXPANSION_IPV4_TCP] = {
765 		.type = RTE_FLOW_ITEM_TYPE_TCP,
766 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
767 	},
768 	[MLX5_EXPANSION_IPV4_ESP] = {
769 		.type = RTE_FLOW_ITEM_TYPE_ESP,
770 		.rss_types = RTE_ETH_RSS_ESP,
771 	},
772 	[MLX5_EXPANSION_IPV4_ICMP] = {
773 		.type = RTE_FLOW_ITEM_TYPE_ICMP,
774 	},
775 	[MLX5_EXPANSION_IPV6] = {
776 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
777 						  MLX5_EXPANSION_IPV6_TCP,
778 						  MLX5_EXPANSION_IPV6_ESP,
779 						  MLX5_EXPANSION_IPV6_ICMP6,
780 						  MLX5_EXPANSION_IPV6_FRAG_EXT),
781 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
782 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
783 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
784 	},
785 	[MLX5_EXPANSION_IPV6_UDP] = {
786 		.type = RTE_FLOW_ITEM_TYPE_UDP,
787 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
788 	},
789 	[MLX5_EXPANSION_IPV6_TCP] = {
790 		.type = RTE_FLOW_ITEM_TYPE_TCP,
791 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
792 	},
793 	[MLX5_EXPANSION_IPV6_ESP] = {
794 		.type = RTE_FLOW_ITEM_TYPE_ESP,
795 		.rss_types = RTE_ETH_RSS_ESP,
796 	},
797 	[MLX5_EXPANSION_IPV6_FRAG_EXT] = {
798 		.type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
799 	},
800 	[MLX5_EXPANSION_IPV6_ICMP6] = {
801 		.type = RTE_FLOW_ITEM_TYPE_ICMP6,
802 	},
803 	[MLX5_EXPANSION_GTP] = {
804 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
805 						  MLX5_EXPANSION_IPV6),
806 		.type = RTE_FLOW_ITEM_TYPE_GTP,
807 	},
808 	[MLX5_EXPANSION_GENEVE] = {
809 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
810 						  MLX5_EXPANSION_IPV4,
811 						  MLX5_EXPANSION_IPV6),
812 		.type = RTE_FLOW_ITEM_TYPE_GENEVE,
813 	},
814 };
815 
816 static struct rte_flow_action_handle *
817 mlx5_action_handle_create(struct rte_eth_dev *dev,
818 			  const struct rte_flow_indir_action_conf *conf,
819 			  const struct rte_flow_action *action,
820 			  struct rte_flow_error *error);
821 static int mlx5_action_handle_destroy
822 				(struct rte_eth_dev *dev,
823 				 struct rte_flow_action_handle *handle,
824 				 struct rte_flow_error *error);
825 static int mlx5_action_handle_update
826 				(struct rte_eth_dev *dev,
827 				 struct rte_flow_action_handle *handle,
828 				 const void *update,
829 				 struct rte_flow_error *error);
830 static int mlx5_action_handle_query
831 				(struct rte_eth_dev *dev,
832 				 const struct rte_flow_action_handle *handle,
833 				 void *data,
834 				 struct rte_flow_error *error);
835 static int
836 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
837 		    struct rte_flow_tunnel *app_tunnel,
838 		    struct rte_flow_action **actions,
839 		    uint32_t *num_of_actions,
840 		    struct rte_flow_error *error);
841 static int
842 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
843 		       struct rte_flow_tunnel *app_tunnel,
844 		       struct rte_flow_item **items,
845 		       uint32_t *num_of_items,
846 		       struct rte_flow_error *error);
847 static int
848 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
849 			      struct rte_flow_item *pmd_items,
850 			      uint32_t num_items, struct rte_flow_error *err);
851 static int
852 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
853 				struct rte_flow_action *pmd_actions,
854 				uint32_t num_actions,
855 				struct rte_flow_error *err);
856 static int
857 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
858 				  struct rte_mbuf *m,
859 				  struct rte_flow_restore_info *info,
860 				  struct rte_flow_error *err);
861 static struct rte_flow_item_flex_handle *
862 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
863 			   const struct rte_flow_item_flex_conf *conf,
864 			   struct rte_flow_error *error);
865 static int
866 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
867 			    const struct rte_flow_item_flex_handle *handle,
868 			    struct rte_flow_error *error);
869 static int
870 mlx5_flow_info_get(struct rte_eth_dev *dev,
871 		   struct rte_flow_port_info *port_info,
872 		   struct rte_flow_queue_info *queue_info,
873 		   struct rte_flow_error *error);
874 static int
875 mlx5_flow_port_configure(struct rte_eth_dev *dev,
876 			 const struct rte_flow_port_attr *port_attr,
877 			 uint16_t nb_queue,
878 			 const struct rte_flow_queue_attr *queue_attr[],
879 			 struct rte_flow_error *err);
880 
881 static struct rte_flow_pattern_template *
882 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
883 		const struct rte_flow_pattern_template_attr *attr,
884 		const struct rte_flow_item items[],
885 		struct rte_flow_error *error);
886 
887 static int
888 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
889 				   struct rte_flow_pattern_template *template,
890 				   struct rte_flow_error *error);
891 static struct rte_flow_actions_template *
892 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
893 			const struct rte_flow_actions_template_attr *attr,
894 			const struct rte_flow_action actions[],
895 			const struct rte_flow_action masks[],
896 			struct rte_flow_error *error);
897 static int
898 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
899 				   struct rte_flow_actions_template *template,
900 				   struct rte_flow_error *error);
901 
902 static struct rte_flow_template_table *
903 mlx5_flow_table_create(struct rte_eth_dev *dev,
904 		       const struct rte_flow_template_table_attr *attr,
905 		       struct rte_flow_pattern_template *item_templates[],
906 		       uint8_t nb_item_templates,
907 		       struct rte_flow_actions_template *action_templates[],
908 		       uint8_t nb_action_templates,
909 		       struct rte_flow_error *error);
910 static int
911 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
912 			struct rte_flow_template_table *table,
913 			struct rte_flow_error *error);
914 static struct rte_flow *
915 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
916 			    uint32_t queue,
917 			    const struct rte_flow_op_attr *attr,
918 			    struct rte_flow_template_table *table,
919 			    const struct rte_flow_item items[],
920 			    uint8_t pattern_template_index,
921 			    const struct rte_flow_action actions[],
922 			    uint8_t action_template_index,
923 			    void *user_data,
924 			    struct rte_flow_error *error);
925 static int
926 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
927 			     uint32_t queue,
928 			     const struct rte_flow_op_attr *attr,
929 			     struct rte_flow *flow,
930 			     void *user_data,
931 			     struct rte_flow_error *error);
932 static int
933 mlx5_flow_pull(struct rte_eth_dev *dev,
934 	       uint32_t queue,
935 	       struct rte_flow_op_result res[],
936 	       uint16_t n_res,
937 	       struct rte_flow_error *error);
938 static int
939 mlx5_flow_push(struct rte_eth_dev *dev,
940 	       uint32_t queue,
941 	       struct rte_flow_error *error);
942 
943 static struct rte_flow_action_handle *
944 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
945 				 const struct rte_flow_op_attr *attr,
946 				 const struct rte_flow_indir_action_conf *conf,
947 				 const struct rte_flow_action *action,
948 				 void *user_data,
949 				 struct rte_flow_error *error);
950 
951 static int
952 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
953 				 const struct rte_flow_op_attr *attr,
954 				 struct rte_flow_action_handle *handle,
955 				 const void *update,
956 				 void *user_data,
957 				 struct rte_flow_error *error);
958 
959 static int
960 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
961 				  const struct rte_flow_op_attr *attr,
962 				  struct rte_flow_action_handle *handle,
963 				  void *user_data,
964 				  struct rte_flow_error *error);
965 
966 static const struct rte_flow_ops mlx5_flow_ops = {
967 	.validate = mlx5_flow_validate,
968 	.create = mlx5_flow_create,
969 	.destroy = mlx5_flow_destroy,
970 	.flush = mlx5_flow_flush,
971 	.isolate = mlx5_flow_isolate,
972 	.query = mlx5_flow_query,
973 	.dev_dump = mlx5_flow_dev_dump,
974 	.get_aged_flows = mlx5_flow_get_aged_flows,
975 	.action_handle_create = mlx5_action_handle_create,
976 	.action_handle_destroy = mlx5_action_handle_destroy,
977 	.action_handle_update = mlx5_action_handle_update,
978 	.action_handle_query = mlx5_action_handle_query,
979 	.tunnel_decap_set = mlx5_flow_tunnel_decap_set,
980 	.tunnel_match = mlx5_flow_tunnel_match,
981 	.tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
982 	.tunnel_item_release = mlx5_flow_tunnel_item_release,
983 	.get_restore_info = mlx5_flow_tunnel_get_restore_info,
984 	.flex_item_create = mlx5_flow_flex_item_create,
985 	.flex_item_release = mlx5_flow_flex_item_release,
986 	.info_get = mlx5_flow_info_get,
987 	.configure = mlx5_flow_port_configure,
988 	.pattern_template_create = mlx5_flow_pattern_template_create,
989 	.pattern_template_destroy = mlx5_flow_pattern_template_destroy,
990 	.actions_template_create = mlx5_flow_actions_template_create,
991 	.actions_template_destroy = mlx5_flow_actions_template_destroy,
992 	.template_table_create = mlx5_flow_table_create,
993 	.template_table_destroy = mlx5_flow_table_destroy,
994 	.async_create = mlx5_flow_async_flow_create,
995 	.async_destroy = mlx5_flow_async_flow_destroy,
996 	.pull = mlx5_flow_pull,
997 	.push = mlx5_flow_push,
998 	.async_action_handle_create = mlx5_flow_async_action_handle_create,
999 	.async_action_handle_update = mlx5_flow_async_action_handle_update,
1000 	.async_action_handle_destroy = mlx5_flow_async_action_handle_destroy,
1001 };
1002 
1003 /* Tunnel information. */
1004 struct mlx5_flow_tunnel_info {
1005 	uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
1006 	uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
1007 };
1008 
1009 static struct mlx5_flow_tunnel_info tunnels_info[] = {
1010 	{
1011 		.tunnel = MLX5_FLOW_LAYER_VXLAN,
1012 		.ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
1013 	},
1014 	{
1015 		.tunnel = MLX5_FLOW_LAYER_GENEVE,
1016 		.ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
1017 	},
1018 	{
1019 		.tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
1020 		.ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
1021 	},
1022 	{
1023 		.tunnel = MLX5_FLOW_LAYER_GRE,
1024 		.ptype = RTE_PTYPE_TUNNEL_GRE,
1025 	},
1026 	{
1027 		.tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
1028 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
1029 	},
1030 	{
1031 		.tunnel = MLX5_FLOW_LAYER_MPLS,
1032 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
1033 	},
1034 	{
1035 		.tunnel = MLX5_FLOW_LAYER_NVGRE,
1036 		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
1037 	},
1038 	{
1039 		.tunnel = MLX5_FLOW_LAYER_IPIP,
1040 		.ptype = RTE_PTYPE_TUNNEL_IP,
1041 	},
1042 	{
1043 		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
1044 		.ptype = RTE_PTYPE_TUNNEL_IP,
1045 	},
1046 	{
1047 		.tunnel = MLX5_FLOW_LAYER_GTP,
1048 		.ptype = RTE_PTYPE_TUNNEL_GTPU,
1049 	},
1050 };
1051 
1052 
1053 
1054 /**
1055  * Translate tag ID to register.
1056  *
1057  * @param[in] dev
1058  *   Pointer to the Ethernet device structure.
1059  * @param[in] feature
1060  *   The feature that request the register.
1061  * @param[in] id
1062  *   The request register ID.
1063  * @param[out] error
1064  *   Error description in case of any.
1065  *
1066  * @return
1067  *   The request register on success, a negative errno
1068  *   value otherwise and rte_errno is set.
1069  */
1070 int
1071 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
1072 		     enum mlx5_feature_name feature,
1073 		     uint32_t id,
1074 		     struct rte_flow_error *error)
1075 {
1076 	struct mlx5_priv *priv = dev->data->dev_private;
1077 	struct mlx5_sh_config *config = &priv->sh->config;
1078 	enum modify_reg start_reg;
1079 	bool skip_mtr_reg = false;
1080 
1081 	switch (feature) {
1082 	case MLX5_HAIRPIN_RX:
1083 		return REG_B;
1084 	case MLX5_HAIRPIN_TX:
1085 		return REG_A;
1086 	case MLX5_METADATA_RX:
1087 		switch (config->dv_xmeta_en) {
1088 		case MLX5_XMETA_MODE_LEGACY:
1089 			return REG_B;
1090 		case MLX5_XMETA_MODE_META16:
1091 			return REG_C_0;
1092 		case MLX5_XMETA_MODE_META32:
1093 			return REG_C_1;
1094 		}
1095 		break;
1096 	case MLX5_METADATA_TX:
1097 		return REG_A;
1098 	case MLX5_METADATA_FDB:
1099 		switch (config->dv_xmeta_en) {
1100 		case MLX5_XMETA_MODE_LEGACY:
1101 			return REG_NON;
1102 		case MLX5_XMETA_MODE_META16:
1103 			return REG_C_0;
1104 		case MLX5_XMETA_MODE_META32:
1105 			return REG_C_1;
1106 		}
1107 		break;
1108 	case MLX5_FLOW_MARK:
1109 		switch (config->dv_xmeta_en) {
1110 		case MLX5_XMETA_MODE_LEGACY:
1111 			return REG_NON;
1112 		case MLX5_XMETA_MODE_META16:
1113 			return REG_C_1;
1114 		case MLX5_XMETA_MODE_META32:
1115 			return REG_C_0;
1116 		}
1117 		break;
1118 	case MLX5_MTR_ID:
1119 		/*
1120 		 * If meter color and meter id share one register, flow match
1121 		 * should use the meter color register for match.
1122 		 */
1123 		if (priv->mtr_reg_share)
1124 			return priv->mtr_color_reg;
1125 		else
1126 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1127 			       REG_C_3;
1128 	case MLX5_MTR_COLOR:
1129 	case MLX5_ASO_FLOW_HIT:
1130 	case MLX5_ASO_CONNTRACK:
1131 	case MLX5_SAMPLE_ID:
1132 		/* All features use the same REG_C. */
1133 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
1134 		return priv->mtr_color_reg;
1135 	case MLX5_COPY_MARK:
1136 		/*
1137 		 * Metadata COPY_MARK register using is in meter suffix sub
1138 		 * flow while with meter. It's safe to share the same register.
1139 		 */
1140 		return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
1141 	case MLX5_APP_TAG:
1142 		/*
1143 		 * If meter is enable, it will engage the register for color
1144 		 * match and flow match. If meter color match is not using the
1145 		 * REG_C_2, need to skip the REG_C_x be used by meter color
1146 		 * match.
1147 		 * If meter is disable, free to use all available registers.
1148 		 */
1149 		start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1150 			    (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
1151 		skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
1152 		if (id > (uint32_t)(REG_C_7 - start_reg))
1153 			return rte_flow_error_set(error, EINVAL,
1154 						  RTE_FLOW_ERROR_TYPE_ITEM,
1155 						  NULL, "invalid tag id");
1156 		if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
1157 			return rte_flow_error_set(error, ENOTSUP,
1158 						  RTE_FLOW_ERROR_TYPE_ITEM,
1159 						  NULL, "unsupported tag id");
1160 		/*
1161 		 * This case means meter is using the REG_C_x great than 2.
1162 		 * Take care not to conflict with meter color REG_C_x.
1163 		 * If the available index REG_C_y >= REG_C_x, skip the
1164 		 * color register.
1165 		 */
1166 		if (skip_mtr_reg && priv->sh->flow_mreg_c
1167 		    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
1168 			if (id >= (uint32_t)(REG_C_7 - start_reg))
1169 				return rte_flow_error_set(error, EINVAL,
1170 						       RTE_FLOW_ERROR_TYPE_ITEM,
1171 							NULL, "invalid tag id");
1172 			if (priv->sh->flow_mreg_c
1173 			    [id + 1 + start_reg - REG_C_0] != REG_NON)
1174 				return priv->sh->flow_mreg_c
1175 					       [id + 1 + start_reg - REG_C_0];
1176 			return rte_flow_error_set(error, ENOTSUP,
1177 						  RTE_FLOW_ERROR_TYPE_ITEM,
1178 						  NULL, "unsupported tag id");
1179 		}
1180 		return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
1181 	}
1182 	MLX5_ASSERT(false);
1183 	return rte_flow_error_set(error, EINVAL,
1184 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1185 				  NULL, "invalid feature name");
1186 }
1187 
1188 /**
1189  * Check extensive flow metadata register support.
1190  *
1191  * @param dev
1192  *   Pointer to rte_eth_dev structure.
1193  *
1194  * @return
1195  *   True if device supports extensive flow metadata register, otherwise false.
1196  */
1197 bool
1198 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
1199 {
1200 	struct mlx5_priv *priv = dev->data->dev_private;
1201 
1202 	/*
1203 	 * Having available reg_c can be regarded inclusively as supporting
1204 	 * extensive flow metadata register, which could mean,
1205 	 * - metadata register copy action by modify header.
1206 	 * - 16 modify header actions is supported.
1207 	 * - reg_c's are preserved across different domain (FDB and NIC) on
1208 	 *   packet loopback by flow lookup miss.
1209 	 */
1210 	return priv->sh->flow_mreg_c[2] != REG_NON;
1211 }
1212 
1213 /**
1214  * Get the lowest priority.
1215  *
1216  * @param[in] dev
1217  *   Pointer to the Ethernet device structure.
1218  * @param[in] attributes
1219  *   Pointer to device flow rule attributes.
1220  *
1221  * @return
1222  *   The value of lowest priority of flow.
1223  */
1224 uint32_t
1225 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
1226 			  const struct rte_flow_attr *attr)
1227 {
1228 	struct mlx5_priv *priv = dev->data->dev_private;
1229 
1230 	if (!attr->group && !attr->transfer)
1231 		return priv->sh->flow_max_priority - 2;
1232 	return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1233 }
1234 
1235 /**
1236  * Calculate matcher priority of the flow.
1237  *
1238  * @param[in] dev
1239  *   Pointer to the Ethernet device structure.
1240  * @param[in] attr
1241  *   Pointer to device flow rule attributes.
1242  * @param[in] subpriority
1243  *   The priority based on the items.
1244  * @param[in] external
1245  *   Flow is user flow.
1246  * @return
1247  *   The matcher priority of the flow.
1248  */
1249 uint16_t
1250 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1251 			  const struct rte_flow_attr *attr,
1252 			  uint32_t subpriority, bool external)
1253 {
1254 	uint16_t priority = (uint16_t)attr->priority;
1255 	struct mlx5_priv *priv = dev->data->dev_private;
1256 
1257 	if (!attr->group && !attr->transfer) {
1258 		if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1259 			priority = priv->sh->flow_max_priority - 1;
1260 		return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1261 	} else if (!external && attr->transfer && attr->group == 0 &&
1262 		   attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1263 		return (priv->sh->flow_max_priority - 1) * 3;
1264 	}
1265 	if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1266 		priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1267 	return priority * 3 + subpriority;
1268 }
1269 
1270 /**
1271  * Verify the @p item specifications (spec, last, mask) are compatible with the
1272  * NIC capabilities.
1273  *
1274  * @param[in] item
1275  *   Item specification.
1276  * @param[in] mask
1277  *   @p item->mask or flow default bit-masks.
1278  * @param[in] nic_mask
1279  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1280  * @param[in] size
1281  *   Bit-masks size in bytes.
1282  * @param[in] range_accepted
1283  *   True if range of values is accepted for specific fields, false otherwise.
1284  * @param[out] error
1285  *   Pointer to error structure.
1286  *
1287  * @return
1288  *   0 on success, a negative errno value otherwise and rte_errno is set.
1289  */
1290 int
1291 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1292 			  const uint8_t *mask,
1293 			  const uint8_t *nic_mask,
1294 			  unsigned int size,
1295 			  bool range_accepted,
1296 			  struct rte_flow_error *error)
1297 {
1298 	unsigned int i;
1299 
1300 	MLX5_ASSERT(nic_mask);
1301 	for (i = 0; i < size; ++i)
1302 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
1303 			return rte_flow_error_set(error, ENOTSUP,
1304 						  RTE_FLOW_ERROR_TYPE_ITEM,
1305 						  item,
1306 						  "mask enables non supported"
1307 						  " bits");
1308 	if (!item->spec && (item->mask || item->last))
1309 		return rte_flow_error_set(error, EINVAL,
1310 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1311 					  "mask/last without a spec is not"
1312 					  " supported");
1313 	if (item->spec && item->last && !range_accepted) {
1314 		uint8_t spec[size];
1315 		uint8_t last[size];
1316 		unsigned int i;
1317 		int ret;
1318 
1319 		for (i = 0; i < size; ++i) {
1320 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1321 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1322 		}
1323 		ret = memcmp(spec, last, size);
1324 		if (ret != 0)
1325 			return rte_flow_error_set(error, EINVAL,
1326 						  RTE_FLOW_ERROR_TYPE_ITEM,
1327 						  item,
1328 						  "range is not valid");
1329 	}
1330 	return 0;
1331 }
1332 
1333 /**
1334  * Adjust the hash fields according to the @p flow information.
1335  *
1336  * @param[in] dev_flow.
1337  *   Pointer to the mlx5_flow.
1338  * @param[in] tunnel
1339  *   1 when the hash field is for a tunnel item.
1340  * @param[in] layer_types
1341  *   RTE_ETH_RSS_* types.
1342  * @param[in] hash_fields
1343  *   Item hash fields.
1344  *
1345  * @return
1346  *   The hash fields that should be used.
1347  */
1348 uint64_t
1349 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1350 			    int tunnel __rte_unused, uint64_t layer_types,
1351 			    uint64_t hash_fields)
1352 {
1353 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1354 	int rss_request_inner = rss_desc->level >= 2;
1355 
1356 	/* Check RSS hash level for tunnel. */
1357 	if (tunnel && rss_request_inner)
1358 		hash_fields |= IBV_RX_HASH_INNER;
1359 	else if (tunnel || rss_request_inner)
1360 		return 0;
1361 #endif
1362 	/* Check if requested layer matches RSS hash fields. */
1363 	if (!(rss_desc->types & layer_types))
1364 		return 0;
1365 	return hash_fields;
1366 }
1367 
1368 /**
1369  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1370  * if several tunnel rules are used on this queue, the tunnel ptype will be
1371  * cleared.
1372  *
1373  * @param rxq_ctrl
1374  *   Rx queue to update.
1375  */
1376 static void
1377 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1378 {
1379 	unsigned int i;
1380 	uint32_t tunnel_ptype = 0;
1381 
1382 	/* Look up for the ptype to use. */
1383 	for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1384 		if (!rxq_ctrl->flow_tunnels_n[i])
1385 			continue;
1386 		if (!tunnel_ptype) {
1387 			tunnel_ptype = tunnels_info[i].ptype;
1388 		} else {
1389 			tunnel_ptype = 0;
1390 			break;
1391 		}
1392 	}
1393 	rxq_ctrl->rxq.tunnel = tunnel_ptype;
1394 }
1395 
1396 /**
1397  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device
1398  * flow.
1399  *
1400  * @param[in] dev
1401  *   Pointer to the Ethernet device structure.
1402  * @param[in] dev_handle
1403  *   Pointer to device flow handle structure.
1404  */
1405 void
1406 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1407 		       struct mlx5_flow_handle *dev_handle)
1408 {
1409 	struct mlx5_priv *priv = dev->data->dev_private;
1410 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1411 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1412 	unsigned int i;
1413 
1414 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1415 		struct mlx5_hrxq *hrxq;
1416 
1417 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1418 			      dev_handle->rix_hrxq);
1419 		if (hrxq)
1420 			ind_tbl = hrxq->ind_table;
1421 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1422 		struct mlx5_shared_action_rss *shared_rss;
1423 
1424 		shared_rss = mlx5_ipool_get
1425 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1426 			 dev_handle->rix_srss);
1427 		if (shared_rss)
1428 			ind_tbl = shared_rss->ind_tbl;
1429 	}
1430 	if (!ind_tbl)
1431 		return;
1432 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1433 		int idx = ind_tbl->queues[i];
1434 		struct mlx5_rxq_ctrl *rxq_ctrl;
1435 
1436 		if (mlx5_is_external_rxq(dev, idx))
1437 			continue;
1438 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1439 		MLX5_ASSERT(rxq_ctrl != NULL);
1440 		if (rxq_ctrl == NULL)
1441 			continue;
1442 		/*
1443 		 * To support metadata register copy on Tx loopback,
1444 		 * this must be always enabled (metadata may arive
1445 		 * from other port - not from local flows only.
1446 		 */
1447 		if (tunnel) {
1448 			unsigned int j;
1449 
1450 			/* Increase the counter matching the flow. */
1451 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1452 				if ((tunnels_info[j].tunnel &
1453 				     dev_handle->layers) ==
1454 				    tunnels_info[j].tunnel) {
1455 					rxq_ctrl->flow_tunnels_n[j]++;
1456 					break;
1457 				}
1458 			}
1459 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1460 		}
1461 	}
1462 }
1463 
1464 static void
1465 flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
1466 {
1467 	struct mlx5_priv *priv = dev->data->dev_private;
1468 	struct mlx5_rxq_ctrl *rxq_ctrl;
1469 
1470 	if (priv->mark_enabled)
1471 		return;
1472 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1473 		rxq_ctrl->rxq.mark = 1;
1474 	}
1475 	priv->mark_enabled = 1;
1476 }
1477 
1478 /**
1479  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1480  *
1481  * @param[in] dev
1482  *   Pointer to the Ethernet device structure.
1483  * @param[in] flow
1484  *   Pointer to flow structure.
1485  */
1486 static void
1487 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1488 {
1489 	struct mlx5_priv *priv = dev->data->dev_private;
1490 	uint32_t handle_idx;
1491 	struct mlx5_flow_handle *dev_handle;
1492 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1493 
1494 	MLX5_ASSERT(wks);
1495 	if (wks->mark)
1496 		flow_rxq_mark_flag_set(dev);
1497 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1498 		       handle_idx, dev_handle, next)
1499 		flow_drv_rxq_flags_set(dev, dev_handle);
1500 }
1501 
1502 /**
1503  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1504  * device flow if no other flow uses it with the same kind of request.
1505  *
1506  * @param dev
1507  *   Pointer to Ethernet device.
1508  * @param[in] dev_handle
1509  *   Pointer to the device flow handle structure.
1510  */
1511 static void
1512 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1513 			struct mlx5_flow_handle *dev_handle)
1514 {
1515 	struct mlx5_priv *priv = dev->data->dev_private;
1516 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1517 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1518 	unsigned int i;
1519 
1520 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1521 		struct mlx5_hrxq *hrxq;
1522 
1523 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1524 			      dev_handle->rix_hrxq);
1525 		if (hrxq)
1526 			ind_tbl = hrxq->ind_table;
1527 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1528 		struct mlx5_shared_action_rss *shared_rss;
1529 
1530 		shared_rss = mlx5_ipool_get
1531 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1532 			 dev_handle->rix_srss);
1533 		if (shared_rss)
1534 			ind_tbl = shared_rss->ind_tbl;
1535 	}
1536 	if (!ind_tbl)
1537 		return;
1538 	MLX5_ASSERT(dev->data->dev_started);
1539 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1540 		int idx = ind_tbl->queues[i];
1541 		struct mlx5_rxq_ctrl *rxq_ctrl;
1542 
1543 		if (mlx5_is_external_rxq(dev, idx))
1544 			continue;
1545 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1546 		MLX5_ASSERT(rxq_ctrl != NULL);
1547 		if (rxq_ctrl == NULL)
1548 			continue;
1549 		if (tunnel) {
1550 			unsigned int j;
1551 
1552 			/* Decrease the counter matching the flow. */
1553 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1554 				if ((tunnels_info[j].tunnel &
1555 				     dev_handle->layers) ==
1556 				    tunnels_info[j].tunnel) {
1557 					rxq_ctrl->flow_tunnels_n[j]--;
1558 					break;
1559 				}
1560 			}
1561 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1562 		}
1563 	}
1564 }
1565 
1566 /**
1567  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1568  * @p flow if no other flow uses it with the same kind of request.
1569  *
1570  * @param dev
1571  *   Pointer to Ethernet device.
1572  * @param[in] flow
1573  *   Pointer to the flow.
1574  */
1575 static void
1576 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1577 {
1578 	struct mlx5_priv *priv = dev->data->dev_private;
1579 	uint32_t handle_idx;
1580 	struct mlx5_flow_handle *dev_handle;
1581 
1582 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1583 		       handle_idx, dev_handle, next)
1584 		flow_drv_rxq_flags_trim(dev, dev_handle);
1585 }
1586 
1587 /**
1588  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1589  *
1590  * @param dev
1591  *   Pointer to Ethernet device.
1592  */
1593 static void
1594 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1595 {
1596 	struct mlx5_priv *priv = dev->data->dev_private;
1597 	unsigned int i;
1598 
1599 	for (i = 0; i != priv->rxqs_n; ++i) {
1600 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1601 		unsigned int j;
1602 
1603 		if (rxq == NULL || rxq->ctrl == NULL)
1604 			continue;
1605 		rxq->ctrl->rxq.mark = 0;
1606 		for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1607 			rxq->ctrl->flow_tunnels_n[j] = 0;
1608 		rxq->ctrl->rxq.tunnel = 0;
1609 	}
1610 	priv->mark_enabled = 0;
1611 }
1612 
1613 /**
1614  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1615  *
1616  * @param[in] dev
1617  *   Pointer to the Ethernet device structure.
1618  */
1619 void
1620 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1621 {
1622 	struct mlx5_priv *priv = dev->data->dev_private;
1623 	unsigned int i;
1624 
1625 	for (i = 0; i != priv->rxqs_n; ++i) {
1626 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1627 		struct mlx5_rxq_data *data;
1628 
1629 		if (rxq == NULL || rxq->ctrl == NULL)
1630 			continue;
1631 		data = &rxq->ctrl->rxq;
1632 		if (!rte_flow_dynf_metadata_avail()) {
1633 			data->dynf_meta = 0;
1634 			data->flow_meta_mask = 0;
1635 			data->flow_meta_offset = -1;
1636 			data->flow_meta_port_mask = 0;
1637 		} else {
1638 			data->dynf_meta = 1;
1639 			data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1640 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1641 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1642 		}
1643 	}
1644 }
1645 
1646 /*
1647  * return a pointer to the desired action in the list of actions.
1648  *
1649  * @param[in] actions
1650  *   The list of actions to search the action in.
1651  * @param[in] action
1652  *   The action to find.
1653  *
1654  * @return
1655  *   Pointer to the action in the list, if found. NULL otherwise.
1656  */
1657 const struct rte_flow_action *
1658 mlx5_flow_find_action(const struct rte_flow_action *actions,
1659 		      enum rte_flow_action_type action)
1660 {
1661 	if (actions == NULL)
1662 		return NULL;
1663 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1664 		if (actions->type == action)
1665 			return actions;
1666 	return NULL;
1667 }
1668 
1669 /*
1670  * Validate the flag action.
1671  *
1672  * @param[in] action_flags
1673  *   Bit-fields that holds the actions detected until now.
1674  * @param[in] attr
1675  *   Attributes of flow that includes this action.
1676  * @param[out] error
1677  *   Pointer to error structure.
1678  *
1679  * @return
1680  *   0 on success, a negative errno value otherwise and rte_errno is set.
1681  */
1682 int
1683 mlx5_flow_validate_action_flag(uint64_t action_flags,
1684 			       const struct rte_flow_attr *attr,
1685 			       struct rte_flow_error *error)
1686 {
1687 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1688 		return rte_flow_error_set(error, EINVAL,
1689 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1690 					  "can't mark and flag in same flow");
1691 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1692 		return rte_flow_error_set(error, EINVAL,
1693 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1694 					  "can't have 2 flag"
1695 					  " actions in same flow");
1696 	if (attr->egress)
1697 		return rte_flow_error_set(error, ENOTSUP,
1698 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1699 					  "flag action not supported for "
1700 					  "egress");
1701 	return 0;
1702 }
1703 
1704 /*
1705  * Validate the mark action.
1706  *
1707  * @param[in] action
1708  *   Pointer to the queue action.
1709  * @param[in] action_flags
1710  *   Bit-fields that holds the actions detected until now.
1711  * @param[in] attr
1712  *   Attributes of flow that includes this action.
1713  * @param[out] error
1714  *   Pointer to error structure.
1715  *
1716  * @return
1717  *   0 on success, a negative errno value otherwise and rte_errno is set.
1718  */
1719 int
1720 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1721 			       uint64_t action_flags,
1722 			       const struct rte_flow_attr *attr,
1723 			       struct rte_flow_error *error)
1724 {
1725 	const struct rte_flow_action_mark *mark = action->conf;
1726 
1727 	if (!mark)
1728 		return rte_flow_error_set(error, EINVAL,
1729 					  RTE_FLOW_ERROR_TYPE_ACTION,
1730 					  action,
1731 					  "configuration cannot be null");
1732 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1733 		return rte_flow_error_set(error, EINVAL,
1734 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1735 					  &mark->id,
1736 					  "mark id must in 0 <= id < "
1737 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1738 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1739 		return rte_flow_error_set(error, EINVAL,
1740 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1741 					  "can't flag and mark in same flow");
1742 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1743 		return rte_flow_error_set(error, EINVAL,
1744 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1745 					  "can't have 2 mark actions in same"
1746 					  " flow");
1747 	if (attr->egress)
1748 		return rte_flow_error_set(error, ENOTSUP,
1749 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1750 					  "mark action not supported for "
1751 					  "egress");
1752 	return 0;
1753 }
1754 
1755 /*
1756  * Validate the drop action.
1757  *
1758  * @param[in] action_flags
1759  *   Bit-fields that holds the actions detected until now.
1760  * @param[in] attr
1761  *   Attributes of flow that includes this action.
1762  * @param[out] error
1763  *   Pointer to error structure.
1764  *
1765  * @return
1766  *   0 on success, a negative errno value otherwise and rte_errno is set.
1767  */
1768 int
1769 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1770 			       const struct rte_flow_attr *attr,
1771 			       struct rte_flow_error *error)
1772 {
1773 	if (attr->egress)
1774 		return rte_flow_error_set(error, ENOTSUP,
1775 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1776 					  "drop action not supported for "
1777 					  "egress");
1778 	return 0;
1779 }
1780 
1781 /*
1782  * Validate the queue action.
1783  *
1784  * @param[in] action
1785  *   Pointer to the queue action.
1786  * @param[in] action_flags
1787  *   Bit-fields that holds the actions detected until now.
1788  * @param[in] dev
1789  *   Pointer to the Ethernet device structure.
1790  * @param[in] attr
1791  *   Attributes of flow that includes this action.
1792  * @param[out] error
1793  *   Pointer to error structure.
1794  *
1795  * @return
1796  *   0 on success, a negative errno value otherwise and rte_errno is set.
1797  */
1798 int
1799 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1800 				uint64_t action_flags,
1801 				struct rte_eth_dev *dev,
1802 				const struct rte_flow_attr *attr,
1803 				struct rte_flow_error *error)
1804 {
1805 	struct mlx5_priv *priv = dev->data->dev_private;
1806 	const struct rte_flow_action_queue *queue = action->conf;
1807 
1808 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1809 		return rte_flow_error_set(error, EINVAL,
1810 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1811 					  "can't have 2 fate actions in"
1812 					  " same flow");
1813 	if (attr->egress)
1814 		return rte_flow_error_set(error, ENOTSUP,
1815 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1816 					  "queue action not supported for egress.");
1817 	if (mlx5_is_external_rxq(dev, queue->index))
1818 		return 0;
1819 	if (!priv->rxqs_n)
1820 		return rte_flow_error_set(error, EINVAL,
1821 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1822 					  NULL, "No Rx queues configured");
1823 	if (queue->index >= priv->rxqs_n)
1824 		return rte_flow_error_set(error, EINVAL,
1825 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1826 					  &queue->index,
1827 					  "queue index out of range");
1828 	if (mlx5_rxq_get(dev, queue->index) == NULL)
1829 		return rte_flow_error_set(error, EINVAL,
1830 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1831 					  &queue->index,
1832 					  "queue is not configured");
1833 	return 0;
1834 }
1835 
1836 /**
1837  * Validate queue numbers for device RSS.
1838  *
1839  * @param[in] dev
1840  *   Configured device.
1841  * @param[in] queues
1842  *   Array of queue numbers.
1843  * @param[in] queues_n
1844  *   Size of the @p queues array.
1845  * @param[out] error
1846  *   On error, filled with a textual error description.
1847  * @param[out] queue_idx
1848  *   On error, filled with an offending queue index in @p queues array.
1849  *
1850  * @return
1851  *   0 on success, a negative errno code on error.
1852  */
1853 static int
1854 mlx5_validate_rss_queues(struct rte_eth_dev *dev,
1855 			 const uint16_t *queues, uint32_t queues_n,
1856 			 const char **error, uint32_t *queue_idx)
1857 {
1858 	const struct mlx5_priv *priv = dev->data->dev_private;
1859 	bool is_hairpin = false;
1860 	bool is_ext_rss = false;
1861 	uint32_t i;
1862 
1863 	for (i = 0; i != queues_n; ++i) {
1864 		struct mlx5_rxq_ctrl *rxq_ctrl;
1865 
1866 		if (mlx5_is_external_rxq(dev, queues[0])) {
1867 			is_ext_rss = true;
1868 			continue;
1869 		}
1870 		if (is_ext_rss) {
1871 			*error = "Combining external and regular RSS queues is not supported";
1872 			*queue_idx = i;
1873 			return -ENOTSUP;
1874 		}
1875 		if (queues[i] >= priv->rxqs_n) {
1876 			*error = "queue index out of range";
1877 			*queue_idx = i;
1878 			return -EINVAL;
1879 		}
1880 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, queues[i]);
1881 		if (rxq_ctrl == NULL) {
1882 			*error =  "queue is not configured";
1883 			*queue_idx = i;
1884 			return -EINVAL;
1885 		}
1886 		if (i == 0 && rxq_ctrl->is_hairpin)
1887 			is_hairpin = true;
1888 		if (is_hairpin != rxq_ctrl->is_hairpin) {
1889 			*error = "combining hairpin and regular RSS queues is not supported";
1890 			*queue_idx = i;
1891 			return -ENOTSUP;
1892 		}
1893 	}
1894 	return 0;
1895 }
1896 
1897 /*
1898  * Validate the rss action.
1899  *
1900  * @param[in] dev
1901  *   Pointer to the Ethernet device structure.
1902  * @param[in] action
1903  *   Pointer to the queue action.
1904  * @param[out] error
1905  *   Pointer to error structure.
1906  *
1907  * @return
1908  *   0 on success, a negative errno value otherwise and rte_errno is set.
1909  */
1910 int
1911 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1912 			 const struct rte_flow_action *action,
1913 			 struct rte_flow_error *error)
1914 {
1915 	struct mlx5_priv *priv = dev->data->dev_private;
1916 	const struct rte_flow_action_rss *rss = action->conf;
1917 	int ret;
1918 	const char *message;
1919 	uint32_t queue_idx;
1920 
1921 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1922 	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1923 		return rte_flow_error_set(error, ENOTSUP,
1924 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1925 					  &rss->func,
1926 					  "RSS hash function not supported");
1927 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1928 	if (rss->level > 2)
1929 #else
1930 	if (rss->level > 1)
1931 #endif
1932 		return rte_flow_error_set(error, ENOTSUP,
1933 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1934 					  &rss->level,
1935 					  "tunnel RSS is not supported");
1936 	/* allow RSS key_len 0 in case of NULL (default) RSS key. */
1937 	if (rss->key_len == 0 && rss->key != NULL)
1938 		return rte_flow_error_set(error, ENOTSUP,
1939 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1940 					  &rss->key_len,
1941 					  "RSS hash key length 0");
1942 	if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1943 		return rte_flow_error_set(error, ENOTSUP,
1944 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1945 					  &rss->key_len,
1946 					  "RSS hash key too small");
1947 	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1948 		return rte_flow_error_set(error, ENOTSUP,
1949 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1950 					  &rss->key_len,
1951 					  "RSS hash key too large");
1952 	if (rss->queue_num > priv->sh->dev_cap.ind_table_max_size)
1953 		return rte_flow_error_set(error, ENOTSUP,
1954 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1955 					  &rss->queue_num,
1956 					  "number of queues too large");
1957 	if (rss->types & MLX5_RSS_HF_MASK)
1958 		return rte_flow_error_set(error, ENOTSUP,
1959 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1960 					  &rss->types,
1961 					  "some RSS protocols are not"
1962 					  " supported");
1963 	if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
1964 	    !(rss->types & RTE_ETH_RSS_IP))
1965 		return rte_flow_error_set(error, EINVAL,
1966 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1967 					  "L3 partial RSS requested but L3 RSS"
1968 					  " type not specified");
1969 	if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
1970 	    !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
1971 		return rte_flow_error_set(error, EINVAL,
1972 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1973 					  "L4 partial RSS requested but L4 RSS"
1974 					  " type not specified");
1975 	if (!priv->rxqs_n && priv->ext_rxqs == NULL)
1976 		return rte_flow_error_set(error, EINVAL,
1977 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1978 					  NULL, "No Rx queues configured");
1979 	if (!rss->queue_num)
1980 		return rte_flow_error_set(error, EINVAL,
1981 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1982 					  NULL, "No queues configured");
1983 	ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
1984 				       &message, &queue_idx);
1985 	if (ret != 0) {
1986 		return rte_flow_error_set(error, -ret,
1987 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1988 					  &rss->queue[queue_idx], message);
1989 	}
1990 	return 0;
1991 }
1992 
1993 /*
1994  * Validate the rss action.
1995  *
1996  * @param[in] action
1997  *   Pointer to the queue action.
1998  * @param[in] action_flags
1999  *   Bit-fields that holds the actions detected until now.
2000  * @param[in] dev
2001  *   Pointer to the Ethernet device structure.
2002  * @param[in] attr
2003  *   Attributes of flow that includes this action.
2004  * @param[in] item_flags
2005  *   Items that were detected.
2006  * @param[out] error
2007  *   Pointer to error structure.
2008  *
2009  * @return
2010  *   0 on success, a negative errno value otherwise and rte_errno is set.
2011  */
2012 int
2013 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
2014 			      uint64_t action_flags,
2015 			      struct rte_eth_dev *dev,
2016 			      const struct rte_flow_attr *attr,
2017 			      uint64_t item_flags,
2018 			      struct rte_flow_error *error)
2019 {
2020 	const struct rte_flow_action_rss *rss = action->conf;
2021 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2022 	int ret;
2023 
2024 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2025 		return rte_flow_error_set(error, EINVAL,
2026 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2027 					  "can't have 2 fate actions"
2028 					  " in same flow");
2029 	ret = mlx5_validate_action_rss(dev, action, error);
2030 	if (ret)
2031 		return ret;
2032 	if (attr->egress)
2033 		return rte_flow_error_set(error, ENOTSUP,
2034 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2035 					  "rss action not supported for "
2036 					  "egress");
2037 	if (rss->level > 1 && !tunnel)
2038 		return rte_flow_error_set(error, EINVAL,
2039 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2040 					  "inner RSS is not supported for "
2041 					  "non-tunnel flows");
2042 	if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
2043 	    !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
2044 		return rte_flow_error_set(error, EINVAL,
2045 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2046 					  "RSS on eCPRI is not supported now");
2047 	}
2048 	if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
2049 	    !(item_flags &
2050 	      (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
2051 	    rss->level > 1)
2052 		return rte_flow_error_set(error, EINVAL,
2053 					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
2054 					  "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
2055 	return 0;
2056 }
2057 
2058 /*
2059  * Validate the default miss action.
2060  *
2061  * @param[in] action_flags
2062  *   Bit-fields that holds the actions detected until now.
2063  * @param[out] error
2064  *   Pointer to error structure.
2065  *
2066  * @return
2067  *   0 on success, a negative errno value otherwise and rte_errno is set.
2068  */
2069 int
2070 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
2071 				const struct rte_flow_attr *attr,
2072 				struct rte_flow_error *error)
2073 {
2074 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2075 		return rte_flow_error_set(error, EINVAL,
2076 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2077 					  "can't have 2 fate actions in"
2078 					  " same flow");
2079 	if (attr->egress)
2080 		return rte_flow_error_set(error, ENOTSUP,
2081 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2082 					  "default miss action not supported "
2083 					  "for egress");
2084 	if (attr->group)
2085 		return rte_flow_error_set(error, ENOTSUP,
2086 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
2087 					  "only group 0 is supported");
2088 	if (attr->transfer)
2089 		return rte_flow_error_set(error, ENOTSUP,
2090 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2091 					  NULL, "transfer is not supported");
2092 	return 0;
2093 }
2094 
2095 /*
2096  * Validate the count action.
2097  *
2098  * @param[in] dev
2099  *   Pointer to the Ethernet device structure.
2100  * @param[in] attr
2101  *   Attributes of flow that includes this action.
2102  * @param[out] error
2103  *   Pointer to error structure.
2104  *
2105  * @return
2106  *   0 on success, a negative errno value otherwise and rte_errno is set.
2107  */
2108 int
2109 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
2110 				const struct rte_flow_attr *attr,
2111 				struct rte_flow_error *error)
2112 {
2113 	if (attr->egress)
2114 		return rte_flow_error_set(error, ENOTSUP,
2115 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2116 					  "count action not supported for "
2117 					  "egress");
2118 	return 0;
2119 }
2120 
2121 /*
2122  * Validate the ASO CT action.
2123  *
2124  * @param[in] dev
2125  *   Pointer to the Ethernet device structure.
2126  * @param[in] conntrack
2127  *   Pointer to the CT action profile.
2128  * @param[out] error
2129  *   Pointer to error structure.
2130  *
2131  * @return
2132  *   0 on success, a negative errno value otherwise and rte_errno is set.
2133  */
2134 int
2135 mlx5_validate_action_ct(struct rte_eth_dev *dev,
2136 			const struct rte_flow_action_conntrack *conntrack,
2137 			struct rte_flow_error *error)
2138 {
2139 	RTE_SET_USED(dev);
2140 
2141 	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
2142 		return rte_flow_error_set(error, EINVAL,
2143 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2144 					  "Invalid CT state");
2145 	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
2146 		return rte_flow_error_set(error, EINVAL,
2147 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2148 					  "Invalid last TCP packet flag");
2149 	return 0;
2150 }
2151 
2152 /**
2153  * Verify the @p attributes will be correctly understood by the NIC and store
2154  * them in the @p flow if everything is correct.
2155  *
2156  * @param[in] dev
2157  *   Pointer to the Ethernet device structure.
2158  * @param[in] attributes
2159  *   Pointer to flow attributes
2160  * @param[out] error
2161  *   Pointer to error structure.
2162  *
2163  * @return
2164  *   0 on success, a negative errno value otherwise and rte_errno is set.
2165  */
2166 int
2167 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
2168 			      const struct rte_flow_attr *attributes,
2169 			      struct rte_flow_error *error)
2170 {
2171 	struct mlx5_priv *priv = dev->data->dev_private;
2172 	uint32_t priority_max = priv->sh->flow_max_priority - 1;
2173 
2174 	if (attributes->group)
2175 		return rte_flow_error_set(error, ENOTSUP,
2176 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
2177 					  NULL, "groups is not supported");
2178 	if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
2179 	    attributes->priority >= priority_max)
2180 		return rte_flow_error_set(error, ENOTSUP,
2181 					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
2182 					  NULL, "priority out of range");
2183 	if (attributes->egress)
2184 		return rte_flow_error_set(error, ENOTSUP,
2185 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2186 					  "egress is not supported");
2187 	if (attributes->transfer && !priv->sh->config.dv_esw_en)
2188 		return rte_flow_error_set(error, ENOTSUP,
2189 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2190 					  NULL, "transfer is not supported");
2191 	if (!attributes->ingress)
2192 		return rte_flow_error_set(error, EINVAL,
2193 					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
2194 					  NULL,
2195 					  "ingress attribute is mandatory");
2196 	return 0;
2197 }
2198 
2199 /**
2200  * Validate ICMP6 item.
2201  *
2202  * @param[in] item
2203  *   Item specification.
2204  * @param[in] item_flags
2205  *   Bit-fields that holds the items detected until now.
2206  * @param[in] ext_vlan_sup
2207  *   Whether extended VLAN features are supported or not.
2208  * @param[out] error
2209  *   Pointer to error structure.
2210  *
2211  * @return
2212  *   0 on success, a negative errno value otherwise and rte_errno is set.
2213  */
2214 int
2215 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
2216 			       uint64_t item_flags,
2217 			       uint8_t target_protocol,
2218 			       struct rte_flow_error *error)
2219 {
2220 	const struct rte_flow_item_icmp6 *mask = item->mask;
2221 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2222 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2223 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2224 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2225 				      MLX5_FLOW_LAYER_OUTER_L4;
2226 	int ret;
2227 
2228 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2229 		return rte_flow_error_set(error, EINVAL,
2230 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2231 					  "protocol filtering not compatible"
2232 					  " with ICMP6 layer");
2233 	if (!(item_flags & l3m))
2234 		return rte_flow_error_set(error, EINVAL,
2235 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2236 					  "IPv6 is mandatory to filter on"
2237 					  " ICMP6");
2238 	if (item_flags & l4m)
2239 		return rte_flow_error_set(error, EINVAL,
2240 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2241 					  "multiple L4 layers not supported");
2242 	if (!mask)
2243 		mask = &rte_flow_item_icmp6_mask;
2244 	ret = mlx5_flow_item_acceptable
2245 		(item, (const uint8_t *)mask,
2246 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
2247 		 sizeof(struct rte_flow_item_icmp6),
2248 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2249 	if (ret < 0)
2250 		return ret;
2251 	return 0;
2252 }
2253 
2254 /**
2255  * Validate ICMP item.
2256  *
2257  * @param[in] item
2258  *   Item specification.
2259  * @param[in] item_flags
2260  *   Bit-fields that holds the items detected until now.
2261  * @param[out] error
2262  *   Pointer to error structure.
2263  *
2264  * @return
2265  *   0 on success, a negative errno value otherwise and rte_errno is set.
2266  */
2267 int
2268 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
2269 			     uint64_t item_flags,
2270 			     uint8_t target_protocol,
2271 			     struct rte_flow_error *error)
2272 {
2273 	const struct rte_flow_item_icmp *mask = item->mask;
2274 	const struct rte_flow_item_icmp nic_mask = {
2275 		.hdr.icmp_type = 0xff,
2276 		.hdr.icmp_code = 0xff,
2277 		.hdr.icmp_ident = RTE_BE16(0xffff),
2278 		.hdr.icmp_seq_nb = RTE_BE16(0xffff),
2279 	};
2280 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2281 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2282 				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2283 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2284 				      MLX5_FLOW_LAYER_OUTER_L4;
2285 	int ret;
2286 
2287 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2288 		return rte_flow_error_set(error, EINVAL,
2289 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2290 					  "protocol filtering not compatible"
2291 					  " with ICMP layer");
2292 	if (!(item_flags & l3m))
2293 		return rte_flow_error_set(error, EINVAL,
2294 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2295 					  "IPv4 is mandatory to filter"
2296 					  " on ICMP");
2297 	if (item_flags & l4m)
2298 		return rte_flow_error_set(error, EINVAL,
2299 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2300 					  "multiple L4 layers not supported");
2301 	if (!mask)
2302 		mask = &nic_mask;
2303 	ret = mlx5_flow_item_acceptable
2304 		(item, (const uint8_t *)mask,
2305 		 (const uint8_t *)&nic_mask,
2306 		 sizeof(struct rte_flow_item_icmp),
2307 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2308 	if (ret < 0)
2309 		return ret;
2310 	return 0;
2311 }
2312 
2313 /**
2314  * Validate Ethernet item.
2315  *
2316  * @param[in] item
2317  *   Item specification.
2318  * @param[in] item_flags
2319  *   Bit-fields that holds the items detected until now.
2320  * @param[out] error
2321  *   Pointer to error structure.
2322  *
2323  * @return
2324  *   0 on success, a negative errno value otherwise and rte_errno is set.
2325  */
2326 int
2327 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2328 			    uint64_t item_flags, bool ext_vlan_sup,
2329 			    struct rte_flow_error *error)
2330 {
2331 	const struct rte_flow_item_eth *mask = item->mask;
2332 	const struct rte_flow_item_eth nic_mask = {
2333 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2334 		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2335 		.type = RTE_BE16(0xffff),
2336 		.has_vlan = ext_vlan_sup ? 1 : 0,
2337 	};
2338 	int ret;
2339 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2340 	const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2	:
2341 				       MLX5_FLOW_LAYER_OUTER_L2;
2342 
2343 	if (item_flags & ethm)
2344 		return rte_flow_error_set(error, ENOTSUP,
2345 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2346 					  "multiple L2 layers not supported");
2347 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2348 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2349 		return rte_flow_error_set(error, EINVAL,
2350 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2351 					  "L2 layer should not follow "
2352 					  "L3 layers");
2353 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2354 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2355 		return rte_flow_error_set(error, EINVAL,
2356 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2357 					  "L2 layer should not follow VLAN");
2358 	if (item_flags & MLX5_FLOW_LAYER_GTP)
2359 		return rte_flow_error_set(error, EINVAL,
2360 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2361 					  "L2 layer should not follow GTP");
2362 	if (!mask)
2363 		mask = &rte_flow_item_eth_mask;
2364 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2365 					(const uint8_t *)&nic_mask,
2366 					sizeof(struct rte_flow_item_eth),
2367 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2368 	return ret;
2369 }
2370 
2371 /**
2372  * Validate VLAN item.
2373  *
2374  * @param[in] item
2375  *   Item specification.
2376  * @param[in] item_flags
2377  *   Bit-fields that holds the items detected until now.
2378  * @param[in] dev
2379  *   Ethernet device flow is being created on.
2380  * @param[out] error
2381  *   Pointer to error structure.
2382  *
2383  * @return
2384  *   0 on success, a negative errno value otherwise and rte_errno is set.
2385  */
2386 int
2387 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2388 			     uint64_t item_flags,
2389 			     struct rte_eth_dev *dev,
2390 			     struct rte_flow_error *error)
2391 {
2392 	const struct rte_flow_item_vlan *spec = item->spec;
2393 	const struct rte_flow_item_vlan *mask = item->mask;
2394 	const struct rte_flow_item_vlan nic_mask = {
2395 		.tci = RTE_BE16(UINT16_MAX),
2396 		.inner_type = RTE_BE16(UINT16_MAX),
2397 	};
2398 	uint16_t vlan_tag = 0;
2399 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2400 	int ret;
2401 	const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2402 					MLX5_FLOW_LAYER_INNER_L4) :
2403 				       (MLX5_FLOW_LAYER_OUTER_L3 |
2404 					MLX5_FLOW_LAYER_OUTER_L4);
2405 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2406 					MLX5_FLOW_LAYER_OUTER_VLAN;
2407 
2408 	if (item_flags & vlanm)
2409 		return rte_flow_error_set(error, EINVAL,
2410 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2411 					  "multiple VLAN layers not supported");
2412 	else if ((item_flags & l34m) != 0)
2413 		return rte_flow_error_set(error, EINVAL,
2414 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2415 					  "VLAN cannot follow L3/L4 layer");
2416 	if (!mask)
2417 		mask = &rte_flow_item_vlan_mask;
2418 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2419 					(const uint8_t *)&nic_mask,
2420 					sizeof(struct rte_flow_item_vlan),
2421 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2422 	if (ret)
2423 		return ret;
2424 	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2425 		struct mlx5_priv *priv = dev->data->dev_private;
2426 
2427 		if (priv->vmwa_context) {
2428 			/*
2429 			 * Non-NULL context means we have a virtual machine
2430 			 * and SR-IOV enabled, we have to create VLAN interface
2431 			 * to make hypervisor to setup E-Switch vport
2432 			 * context correctly. We avoid creating the multiple
2433 			 * VLAN interfaces, so we cannot support VLAN tag mask.
2434 			 */
2435 			return rte_flow_error_set(error, EINVAL,
2436 						  RTE_FLOW_ERROR_TYPE_ITEM,
2437 						  item,
2438 						  "VLAN tag mask is not"
2439 						  " supported in virtual"
2440 						  " environment");
2441 		}
2442 	}
2443 	if (spec) {
2444 		vlan_tag = spec->tci;
2445 		vlan_tag &= mask->tci;
2446 	}
2447 	/*
2448 	 * From verbs perspective an empty VLAN is equivalent
2449 	 * to a packet without VLAN layer.
2450 	 */
2451 	if (!vlan_tag)
2452 		return rte_flow_error_set(error, EINVAL,
2453 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2454 					  item->spec,
2455 					  "VLAN cannot be empty");
2456 	return 0;
2457 }
2458 
2459 /**
2460  * Validate IPV4 item.
2461  *
2462  * @param[in] item
2463  *   Item specification.
2464  * @param[in] item_flags
2465  *   Bit-fields that holds the items detected until now.
2466  * @param[in] last_item
2467  *   Previous validated item in the pattern items.
2468  * @param[in] ether_type
2469  *   Type in the ethernet layer header (including dot1q).
2470  * @param[in] acc_mask
2471  *   Acceptable mask, if NULL default internal default mask
2472  *   will be used to check whether item fields are supported.
2473  * @param[in] range_accepted
2474  *   True if range of values is accepted for specific fields, false otherwise.
2475  * @param[out] error
2476  *   Pointer to error structure.
2477  *
2478  * @return
2479  *   0 on success, a negative errno value otherwise and rte_errno is set.
2480  */
2481 int
2482 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2483 			     uint64_t item_flags,
2484 			     uint64_t last_item,
2485 			     uint16_t ether_type,
2486 			     const struct rte_flow_item_ipv4 *acc_mask,
2487 			     bool range_accepted,
2488 			     struct rte_flow_error *error)
2489 {
2490 	const struct rte_flow_item_ipv4 *mask = item->mask;
2491 	const struct rte_flow_item_ipv4 *spec = item->spec;
2492 	const struct rte_flow_item_ipv4 nic_mask = {
2493 		.hdr = {
2494 			.src_addr = RTE_BE32(0xffffffff),
2495 			.dst_addr = RTE_BE32(0xffffffff),
2496 			.type_of_service = 0xff,
2497 			.next_proto_id = 0xff,
2498 		},
2499 	};
2500 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2501 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2502 				      MLX5_FLOW_LAYER_OUTER_L3;
2503 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2504 				      MLX5_FLOW_LAYER_OUTER_L4;
2505 	int ret;
2506 	uint8_t next_proto = 0xFF;
2507 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2508 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2509 				  MLX5_FLOW_LAYER_INNER_VLAN);
2510 
2511 	if ((last_item & l2_vlan) && ether_type &&
2512 	    ether_type != RTE_ETHER_TYPE_IPV4)
2513 		return rte_flow_error_set(error, EINVAL,
2514 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2515 					  "IPv4 cannot follow L2/VLAN layer "
2516 					  "which ether type is not IPv4");
2517 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2518 		if (mask && spec)
2519 			next_proto = mask->hdr.next_proto_id &
2520 				     spec->hdr.next_proto_id;
2521 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2522 			return rte_flow_error_set(error, EINVAL,
2523 						  RTE_FLOW_ERROR_TYPE_ITEM,
2524 						  item,
2525 						  "multiple tunnel "
2526 						  "not supported");
2527 	}
2528 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2529 		return rte_flow_error_set(error, EINVAL,
2530 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2531 					  "wrong tunnel type - IPv6 specified "
2532 					  "but IPv4 item provided");
2533 	if (item_flags & l3m)
2534 		return rte_flow_error_set(error, ENOTSUP,
2535 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2536 					  "multiple L3 layers not supported");
2537 	else if (item_flags & l4m)
2538 		return rte_flow_error_set(error, EINVAL,
2539 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2540 					  "L3 cannot follow an L4 layer.");
2541 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2542 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2543 		return rte_flow_error_set(error, EINVAL,
2544 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2545 					  "L3 cannot follow an NVGRE layer.");
2546 	if (!mask)
2547 		mask = &rte_flow_item_ipv4_mask;
2548 	else if (mask->hdr.next_proto_id != 0 &&
2549 		 mask->hdr.next_proto_id != 0xff)
2550 		return rte_flow_error_set(error, EINVAL,
2551 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2552 					  "partial mask is not supported"
2553 					  " for protocol");
2554 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2555 					acc_mask ? (const uint8_t *)acc_mask
2556 						 : (const uint8_t *)&nic_mask,
2557 					sizeof(struct rte_flow_item_ipv4),
2558 					range_accepted, error);
2559 	if (ret < 0)
2560 		return ret;
2561 	return 0;
2562 }
2563 
2564 /**
2565  * Validate IPV6 item.
2566  *
2567  * @param[in] item
2568  *   Item specification.
2569  * @param[in] item_flags
2570  *   Bit-fields that holds the items detected until now.
2571  * @param[in] last_item
2572  *   Previous validated item in the pattern items.
2573  * @param[in] ether_type
2574  *   Type in the ethernet layer header (including dot1q).
2575  * @param[in] acc_mask
2576  *   Acceptable mask, if NULL default internal default mask
2577  *   will be used to check whether item fields are supported.
2578  * @param[out] error
2579  *   Pointer to error structure.
2580  *
2581  * @return
2582  *   0 on success, a negative errno value otherwise and rte_errno is set.
2583  */
2584 int
2585 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2586 			     uint64_t item_flags,
2587 			     uint64_t last_item,
2588 			     uint16_t ether_type,
2589 			     const struct rte_flow_item_ipv6 *acc_mask,
2590 			     struct rte_flow_error *error)
2591 {
2592 	const struct rte_flow_item_ipv6 *mask = item->mask;
2593 	const struct rte_flow_item_ipv6 *spec = item->spec;
2594 	const struct rte_flow_item_ipv6 nic_mask = {
2595 		.hdr = {
2596 			.src_addr =
2597 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2598 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2599 			.dst_addr =
2600 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2601 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2602 			.vtc_flow = RTE_BE32(0xffffffff),
2603 			.proto = 0xff,
2604 		},
2605 	};
2606 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2607 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2608 				      MLX5_FLOW_LAYER_OUTER_L3;
2609 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2610 				      MLX5_FLOW_LAYER_OUTER_L4;
2611 	int ret;
2612 	uint8_t next_proto = 0xFF;
2613 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2614 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2615 				  MLX5_FLOW_LAYER_INNER_VLAN);
2616 
2617 	if ((last_item & l2_vlan) && ether_type &&
2618 	    ether_type != RTE_ETHER_TYPE_IPV6)
2619 		return rte_flow_error_set(error, EINVAL,
2620 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2621 					  "IPv6 cannot follow L2/VLAN layer "
2622 					  "which ether type is not IPv6");
2623 	if (mask && mask->hdr.proto == UINT8_MAX && spec)
2624 		next_proto = spec->hdr.proto;
2625 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2626 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2627 			return rte_flow_error_set(error, EINVAL,
2628 						  RTE_FLOW_ERROR_TYPE_ITEM,
2629 						  item,
2630 						  "multiple tunnel "
2631 						  "not supported");
2632 	}
2633 	if (next_proto == IPPROTO_HOPOPTS  ||
2634 	    next_proto == IPPROTO_ROUTING  ||
2635 	    next_proto == IPPROTO_FRAGMENT ||
2636 	    next_proto == IPPROTO_ESP	   ||
2637 	    next_proto == IPPROTO_AH	   ||
2638 	    next_proto == IPPROTO_DSTOPTS)
2639 		return rte_flow_error_set(error, EINVAL,
2640 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2641 					  "IPv6 proto (next header) should "
2642 					  "not be set as extension header");
2643 	if (item_flags & MLX5_FLOW_LAYER_IPIP)
2644 		return rte_flow_error_set(error, EINVAL,
2645 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2646 					  "wrong tunnel type - IPv4 specified "
2647 					  "but IPv6 item provided");
2648 	if (item_flags & l3m)
2649 		return rte_flow_error_set(error, ENOTSUP,
2650 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2651 					  "multiple L3 layers not supported");
2652 	else if (item_flags & l4m)
2653 		return rte_flow_error_set(error, EINVAL,
2654 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2655 					  "L3 cannot follow an L4 layer.");
2656 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2657 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2658 		return rte_flow_error_set(error, EINVAL,
2659 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2660 					  "L3 cannot follow an NVGRE layer.");
2661 	if (!mask)
2662 		mask = &rte_flow_item_ipv6_mask;
2663 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2664 					acc_mask ? (const uint8_t *)acc_mask
2665 						 : (const uint8_t *)&nic_mask,
2666 					sizeof(struct rte_flow_item_ipv6),
2667 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2668 	if (ret < 0)
2669 		return ret;
2670 	return 0;
2671 }
2672 
2673 /**
2674  * Validate UDP item.
2675  *
2676  * @param[in] item
2677  *   Item specification.
2678  * @param[in] item_flags
2679  *   Bit-fields that holds the items detected until now.
2680  * @param[in] target_protocol
2681  *   The next protocol in the previous item.
2682  * @param[in] flow_mask
2683  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2684  * @param[out] error
2685  *   Pointer to error structure.
2686  *
2687  * @return
2688  *   0 on success, a negative errno value otherwise and rte_errno is set.
2689  */
2690 int
2691 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2692 			    uint64_t item_flags,
2693 			    uint8_t target_protocol,
2694 			    struct rte_flow_error *error)
2695 {
2696 	const struct rte_flow_item_udp *mask = item->mask;
2697 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2698 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2699 				      MLX5_FLOW_LAYER_OUTER_L3;
2700 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2701 				      MLX5_FLOW_LAYER_OUTER_L4;
2702 	int ret;
2703 
2704 	if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2705 		return rte_flow_error_set(error, EINVAL,
2706 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2707 					  "protocol filtering not compatible"
2708 					  " with UDP layer");
2709 	if (!(item_flags & l3m))
2710 		return rte_flow_error_set(error, EINVAL,
2711 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2712 					  "L3 is mandatory to filter on L4");
2713 	if (item_flags & l4m)
2714 		return rte_flow_error_set(error, EINVAL,
2715 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2716 					  "multiple L4 layers not supported");
2717 	if (!mask)
2718 		mask = &rte_flow_item_udp_mask;
2719 	ret = mlx5_flow_item_acceptable
2720 		(item, (const uint8_t *)mask,
2721 		 (const uint8_t *)&rte_flow_item_udp_mask,
2722 		 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2723 		 error);
2724 	if (ret < 0)
2725 		return ret;
2726 	return 0;
2727 }
2728 
2729 /**
2730  * Validate TCP item.
2731  *
2732  * @param[in] item
2733  *   Item specification.
2734  * @param[in] item_flags
2735  *   Bit-fields that holds the items detected until now.
2736  * @param[in] target_protocol
2737  *   The next protocol in the previous item.
2738  * @param[out] error
2739  *   Pointer to error structure.
2740  *
2741  * @return
2742  *   0 on success, a negative errno value otherwise and rte_errno is set.
2743  */
2744 int
2745 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2746 			    uint64_t item_flags,
2747 			    uint8_t target_protocol,
2748 			    const struct rte_flow_item_tcp *flow_mask,
2749 			    struct rte_flow_error *error)
2750 {
2751 	const struct rte_flow_item_tcp *mask = item->mask;
2752 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2753 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2754 				      MLX5_FLOW_LAYER_OUTER_L3;
2755 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2756 				      MLX5_FLOW_LAYER_OUTER_L4;
2757 	int ret;
2758 
2759 	MLX5_ASSERT(flow_mask);
2760 	if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2761 		return rte_flow_error_set(error, EINVAL,
2762 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2763 					  "protocol filtering not compatible"
2764 					  " with TCP layer");
2765 	if (!(item_flags & l3m))
2766 		return rte_flow_error_set(error, EINVAL,
2767 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2768 					  "L3 is mandatory to filter on L4");
2769 	if (item_flags & l4m)
2770 		return rte_flow_error_set(error, EINVAL,
2771 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2772 					  "multiple L4 layers not supported");
2773 	if (!mask)
2774 		mask = &rte_flow_item_tcp_mask;
2775 	ret = mlx5_flow_item_acceptable
2776 		(item, (const uint8_t *)mask,
2777 		 (const uint8_t *)flow_mask,
2778 		 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2779 		 error);
2780 	if (ret < 0)
2781 		return ret;
2782 	return 0;
2783 }
2784 
2785 /**
2786  * Validate VXLAN item.
2787  *
2788  * @param[in] dev
2789  *   Pointer to the Ethernet device structure.
2790  * @param[in] udp_dport
2791  *   UDP destination port
2792  * @param[in] item
2793  *   Item specification.
2794  * @param[in] item_flags
2795  *   Bit-fields that holds the items detected until now.
2796  * @param[in] attr
2797  *   Flow rule attributes.
2798  * @param[out] error
2799  *   Pointer to error structure.
2800  *
2801  * @return
2802  *   0 on success, a negative errno value otherwise and rte_errno is set.
2803  */
2804 int
2805 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2806 			      uint16_t udp_dport,
2807 			      const struct rte_flow_item *item,
2808 			      uint64_t item_flags,
2809 			      const struct rte_flow_attr *attr,
2810 			      struct rte_flow_error *error)
2811 {
2812 	const struct rte_flow_item_vxlan *spec = item->spec;
2813 	const struct rte_flow_item_vxlan *mask = item->mask;
2814 	int ret;
2815 	struct mlx5_priv *priv = dev->data->dev_private;
2816 	union vni {
2817 		uint32_t vlan_id;
2818 		uint8_t vni[4];
2819 	} id = { .vlan_id = 0, };
2820 	const struct rte_flow_item_vxlan nic_mask = {
2821 		.vni = "\xff\xff\xff",
2822 		.rsvd1 = 0xff,
2823 	};
2824 	const struct rte_flow_item_vxlan *valid_mask;
2825 
2826 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2827 		return rte_flow_error_set(error, ENOTSUP,
2828 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2829 					  "multiple tunnel layers not"
2830 					  " supported");
2831 	valid_mask = &rte_flow_item_vxlan_mask;
2832 	/*
2833 	 * Verify only UDPv4 is present as defined in
2834 	 * https://tools.ietf.org/html/rfc7348
2835 	 */
2836 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2837 		return rte_flow_error_set(error, EINVAL,
2838 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2839 					  "no outer UDP layer found");
2840 	if (!mask)
2841 		mask = &rte_flow_item_vxlan_mask;
2842 
2843 	if (priv->sh->steering_format_version !=
2844 	    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2845 	    !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2846 		/* FDB domain & NIC domain non-zero group */
2847 		if ((attr->transfer || attr->group) && priv->sh->misc5_cap)
2848 			valid_mask = &nic_mask;
2849 		/* Group zero in NIC domain */
2850 		if (!attr->group && !attr->transfer &&
2851 		    priv->sh->tunnel_header_0_1)
2852 			valid_mask = &nic_mask;
2853 	}
2854 	ret = mlx5_flow_item_acceptable
2855 		(item, (const uint8_t *)mask,
2856 		 (const uint8_t *)valid_mask,
2857 		 sizeof(struct rte_flow_item_vxlan),
2858 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2859 	if (ret < 0)
2860 		return ret;
2861 	if (spec) {
2862 		memcpy(&id.vni[1], spec->vni, 3);
2863 		memcpy(&id.vni[1], mask->vni, 3);
2864 	}
2865 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2866 		return rte_flow_error_set(error, ENOTSUP,
2867 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2868 					  "VXLAN tunnel must be fully defined");
2869 	return 0;
2870 }
2871 
2872 /**
2873  * Validate VXLAN_GPE item.
2874  *
2875  * @param[in] item
2876  *   Item specification.
2877  * @param[in] item_flags
2878  *   Bit-fields that holds the items detected until now.
2879  * @param[in] priv
2880  *   Pointer to the private data structure.
2881  * @param[in] target_protocol
2882  *   The next protocol in the previous item.
2883  * @param[out] error
2884  *   Pointer to error structure.
2885  *
2886  * @return
2887  *   0 on success, a negative errno value otherwise and rte_errno is set.
2888  */
2889 int
2890 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2891 				  uint64_t item_flags,
2892 				  struct rte_eth_dev *dev,
2893 				  struct rte_flow_error *error)
2894 {
2895 	struct mlx5_priv *priv = dev->data->dev_private;
2896 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2897 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2898 	int ret;
2899 	union vni {
2900 		uint32_t vlan_id;
2901 		uint8_t vni[4];
2902 	} id = { .vlan_id = 0, };
2903 
2904 	if (!priv->sh->config.l3_vxlan_en)
2905 		return rte_flow_error_set(error, ENOTSUP,
2906 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2907 					  "L3 VXLAN is not enabled by device"
2908 					  " parameter and/or not configured in"
2909 					  " firmware");
2910 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2911 		return rte_flow_error_set(error, ENOTSUP,
2912 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2913 					  "multiple tunnel layers not"
2914 					  " supported");
2915 	/*
2916 	 * Verify only UDPv4 is present as defined in
2917 	 * https://tools.ietf.org/html/rfc7348
2918 	 */
2919 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2920 		return rte_flow_error_set(error, EINVAL,
2921 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2922 					  "no outer UDP layer found");
2923 	if (!mask)
2924 		mask = &rte_flow_item_vxlan_gpe_mask;
2925 	ret = mlx5_flow_item_acceptable
2926 		(item, (const uint8_t *)mask,
2927 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2928 		 sizeof(struct rte_flow_item_vxlan_gpe),
2929 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2930 	if (ret < 0)
2931 		return ret;
2932 	if (spec) {
2933 		if (spec->protocol)
2934 			return rte_flow_error_set(error, ENOTSUP,
2935 						  RTE_FLOW_ERROR_TYPE_ITEM,
2936 						  item,
2937 						  "VxLAN-GPE protocol"
2938 						  " not supported");
2939 		memcpy(&id.vni[1], spec->vni, 3);
2940 		memcpy(&id.vni[1], mask->vni, 3);
2941 	}
2942 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2943 		return rte_flow_error_set(error, ENOTSUP,
2944 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2945 					  "VXLAN-GPE tunnel must be fully"
2946 					  " defined");
2947 	return 0;
2948 }
2949 /**
2950  * Validate GRE Key item.
2951  *
2952  * @param[in] item
2953  *   Item specification.
2954  * @param[in] item_flags
2955  *   Bit flags to mark detected items.
2956  * @param[in] gre_item
2957  *   Pointer to gre_item
2958  * @param[out] error
2959  *   Pointer to error structure.
2960  *
2961  * @return
2962  *   0 on success, a negative errno value otherwise and rte_errno is set.
2963  */
2964 int
2965 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2966 				uint64_t item_flags,
2967 				const struct rte_flow_item *gre_item,
2968 				struct rte_flow_error *error)
2969 {
2970 	const rte_be32_t *mask = item->mask;
2971 	int ret = 0;
2972 	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2973 	const struct rte_flow_item_gre *gre_spec;
2974 	const struct rte_flow_item_gre *gre_mask;
2975 
2976 	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2977 		return rte_flow_error_set(error, ENOTSUP,
2978 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2979 					  "Multiple GRE key not support");
2980 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2981 		return rte_flow_error_set(error, ENOTSUP,
2982 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2983 					  "No preceding GRE header");
2984 	if (item_flags & MLX5_FLOW_LAYER_INNER)
2985 		return rte_flow_error_set(error, ENOTSUP,
2986 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2987 					  "GRE key following a wrong item");
2988 	gre_mask = gre_item->mask;
2989 	if (!gre_mask)
2990 		gre_mask = &rte_flow_item_gre_mask;
2991 	gre_spec = gre_item->spec;
2992 	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2993 			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2994 		return rte_flow_error_set(error, EINVAL,
2995 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2996 					  "Key bit must be on");
2997 
2998 	if (!mask)
2999 		mask = &gre_key_default_mask;
3000 	ret = mlx5_flow_item_acceptable
3001 		(item, (const uint8_t *)mask,
3002 		 (const uint8_t *)&gre_key_default_mask,
3003 		 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3004 	return ret;
3005 }
3006 
3007 /**
3008  * Validate GRE optional item.
3009  *
3010  * @param[in] dev
3011  *   Pointer to the Ethernet device structure.
3012  * @param[in] item
3013  *   Item specification.
3014  * @param[in] item_flags
3015  *   Bit flags to mark detected items.
3016  * @param[in] attr
3017  *   Flow rule attributes.
3018  * @param[in] gre_item
3019  *   Pointer to gre_item
3020  * @param[out] error
3021  *   Pointer to error structure.
3022  *
3023  * @return
3024  *   0 on success, a negative errno value otherwise and rte_errno is set.
3025  */
3026 int
3027 mlx5_flow_validate_item_gre_option(struct rte_eth_dev *dev,
3028 				   const struct rte_flow_item *item,
3029 				   uint64_t item_flags,
3030 				   const struct rte_flow_attr *attr,
3031 				   const struct rte_flow_item *gre_item,
3032 				   struct rte_flow_error *error)
3033 {
3034 	const struct rte_flow_item_gre *gre_spec = gre_item->spec;
3035 	const struct rte_flow_item_gre *gre_mask = gre_item->mask;
3036 	const struct rte_flow_item_gre_opt *spec = item->spec;
3037 	const struct rte_flow_item_gre_opt *mask = item->mask;
3038 	struct mlx5_priv *priv = dev->data->dev_private;
3039 	int ret = 0;
3040 	struct rte_flow_item_gre_opt nic_mask = {
3041 		.checksum_rsvd = {
3042 			.checksum = RTE_BE16(UINT16_MAX),
3043 			.reserved1 = 0x0,
3044 		},
3045 		.key = {
3046 			.key = RTE_BE32(UINT32_MAX),
3047 		},
3048 		.sequence = {
3049 			.sequence = RTE_BE32(UINT32_MAX),
3050 		},
3051 	};
3052 
3053 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3054 		return rte_flow_error_set(error, ENOTSUP,
3055 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3056 					  "No preceding GRE header");
3057 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3058 		return rte_flow_error_set(error, ENOTSUP,
3059 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3060 					  "GRE option following a wrong item");
3061 	if (!spec || !mask)
3062 		return rte_flow_error_set(error, EINVAL,
3063 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3064 					  "At least one field gre_option(checksum/key/sequence) must be specified");
3065 	if (!gre_mask)
3066 		gre_mask = &rte_flow_item_gre_mask;
3067 	if (mask->checksum_rsvd.checksum)
3068 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x8000)) &&
3069 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x8000)))
3070 			return rte_flow_error_set(error, EINVAL,
3071 						  RTE_FLOW_ERROR_TYPE_ITEM,
3072 						  item,
3073 						  "Checksum bit must be on");
3074 	if (mask->key.key)
3075 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3076 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3077 			return rte_flow_error_set(error, EINVAL,
3078 						  RTE_FLOW_ERROR_TYPE_ITEM,
3079 						  item, "Key bit must be on");
3080 	if (mask->sequence.sequence)
3081 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x1000)) &&
3082 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x1000)))
3083 			return rte_flow_error_set(error, EINVAL,
3084 						  RTE_FLOW_ERROR_TYPE_ITEM,
3085 						  item,
3086 						  "Sequence bit must be on");
3087 	if (mask->checksum_rsvd.checksum || mask->sequence.sequence) {
3088 		if (priv->sh->steering_format_version ==
3089 		    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3090 		    ((attr->group || attr->transfer) &&
3091 		     !priv->sh->misc5_cap) ||
3092 		    (!(priv->sh->tunnel_header_0_1 &&
3093 		       priv->sh->tunnel_header_2_3) &&
3094 		    !attr->group && !attr->transfer))
3095 			return rte_flow_error_set(error, EINVAL,
3096 						  RTE_FLOW_ERROR_TYPE_ITEM,
3097 						  item,
3098 						  "Checksum/Sequence not supported");
3099 	}
3100 	ret = mlx5_flow_item_acceptable
3101 		(item, (const uint8_t *)mask,
3102 		 (const uint8_t *)&nic_mask,
3103 		 sizeof(struct rte_flow_item_gre_opt),
3104 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3105 	return ret;
3106 }
3107 
3108 /**
3109  * Validate GRE item.
3110  *
3111  * @param[in] item
3112  *   Item specification.
3113  * @param[in] item_flags
3114  *   Bit flags to mark detected items.
3115  * @param[in] target_protocol
3116  *   The next protocol in the previous item.
3117  * @param[out] error
3118  *   Pointer to error structure.
3119  *
3120  * @return
3121  *   0 on success, a negative errno value otherwise and rte_errno is set.
3122  */
3123 int
3124 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3125 			    uint64_t item_flags,
3126 			    uint8_t target_protocol,
3127 			    struct rte_flow_error *error)
3128 {
3129 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3130 	const struct rte_flow_item_gre *mask = item->mask;
3131 	int ret;
3132 	const struct rte_flow_item_gre nic_mask = {
3133 		.c_rsvd0_ver = RTE_BE16(0xB000),
3134 		.protocol = RTE_BE16(UINT16_MAX),
3135 	};
3136 
3137 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3138 		return rte_flow_error_set(error, EINVAL,
3139 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3140 					  "protocol filtering not compatible"
3141 					  " with this GRE layer");
3142 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3143 		return rte_flow_error_set(error, ENOTSUP,
3144 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3145 					  "multiple tunnel layers not"
3146 					  " supported");
3147 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3148 		return rte_flow_error_set(error, ENOTSUP,
3149 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3150 					  "L3 Layer is missing");
3151 	if (!mask)
3152 		mask = &rte_flow_item_gre_mask;
3153 	ret = mlx5_flow_item_acceptable
3154 		(item, (const uint8_t *)mask,
3155 		 (const uint8_t *)&nic_mask,
3156 		 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3157 		 error);
3158 	if (ret < 0)
3159 		return ret;
3160 #ifndef HAVE_MLX5DV_DR
3161 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3162 	if (spec && (spec->protocol & mask->protocol))
3163 		return rte_flow_error_set(error, ENOTSUP,
3164 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3165 					  "without MPLS support the"
3166 					  " specification cannot be used for"
3167 					  " filtering");
3168 #endif
3169 #endif
3170 	return 0;
3171 }
3172 
3173 /**
3174  * Validate Geneve item.
3175  *
3176  * @param[in] item
3177  *   Item specification.
3178  * @param[in] itemFlags
3179  *   Bit-fields that holds the items detected until now.
3180  * @param[in] enPriv
3181  *   Pointer to the private data structure.
3182  * @param[out] error
3183  *   Pointer to error structure.
3184  *
3185  * @return
3186  *   0 on success, a negative errno value otherwise and rte_errno is set.
3187  */
3188 
3189 int
3190 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
3191 			       uint64_t item_flags,
3192 			       struct rte_eth_dev *dev,
3193 			       struct rte_flow_error *error)
3194 {
3195 	struct mlx5_priv *priv = dev->data->dev_private;
3196 	const struct rte_flow_item_geneve *spec = item->spec;
3197 	const struct rte_flow_item_geneve *mask = item->mask;
3198 	int ret;
3199 	uint16_t gbhdr;
3200 	uint8_t opt_len = priv->sh->cdev->config.hca_attr.geneve_max_opt_len ?
3201 			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
3202 	const struct rte_flow_item_geneve nic_mask = {
3203 		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
3204 		.vni = "\xff\xff\xff",
3205 		.protocol = RTE_BE16(UINT16_MAX),
3206 	};
3207 
3208 	if (!priv->sh->cdev->config.hca_attr.tunnel_stateless_geneve_rx)
3209 		return rte_flow_error_set(error, ENOTSUP,
3210 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3211 					  "L3 Geneve is not enabled by device"
3212 					  " parameter and/or not configured in"
3213 					  " firmware");
3214 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3215 		return rte_flow_error_set(error, ENOTSUP,
3216 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3217 					  "multiple tunnel layers not"
3218 					  " supported");
3219 	/*
3220 	 * Verify only UDPv4 is present as defined in
3221 	 * https://tools.ietf.org/html/rfc7348
3222 	 */
3223 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3224 		return rte_flow_error_set(error, EINVAL,
3225 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3226 					  "no outer UDP layer found");
3227 	if (!mask)
3228 		mask = &rte_flow_item_geneve_mask;
3229 	ret = mlx5_flow_item_acceptable
3230 				  (item, (const uint8_t *)mask,
3231 				   (const uint8_t *)&nic_mask,
3232 				   sizeof(struct rte_flow_item_geneve),
3233 				   MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3234 	if (ret)
3235 		return ret;
3236 	if (spec) {
3237 		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
3238 		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
3239 		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
3240 		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
3241 			return rte_flow_error_set(error, ENOTSUP,
3242 						  RTE_FLOW_ERROR_TYPE_ITEM,
3243 						  item,
3244 						  "Geneve protocol unsupported"
3245 						  " fields are being used");
3246 		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
3247 			return rte_flow_error_set
3248 					(error, ENOTSUP,
3249 					 RTE_FLOW_ERROR_TYPE_ITEM,
3250 					 item,
3251 					 "Unsupported Geneve options length");
3252 	}
3253 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3254 		return rte_flow_error_set
3255 				    (error, ENOTSUP,
3256 				     RTE_FLOW_ERROR_TYPE_ITEM, item,
3257 				     "Geneve tunnel must be fully defined");
3258 	return 0;
3259 }
3260 
3261 /**
3262  * Validate Geneve TLV option item.
3263  *
3264  * @param[in] item
3265  *   Item specification.
3266  * @param[in] last_item
3267  *   Previous validated item in the pattern items.
3268  * @param[in] geneve_item
3269  *   Previous GENEVE item specification.
3270  * @param[in] dev
3271  *   Pointer to the rte_eth_dev structure.
3272  * @param[out] error
3273  *   Pointer to error structure.
3274  *
3275  * @return
3276  *   0 on success, a negative errno value otherwise and rte_errno is set.
3277  */
3278 int
3279 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
3280 				   uint64_t last_item,
3281 				   const struct rte_flow_item *geneve_item,
3282 				   struct rte_eth_dev *dev,
3283 				   struct rte_flow_error *error)
3284 {
3285 	struct mlx5_priv *priv = dev->data->dev_private;
3286 	struct mlx5_dev_ctx_shared *sh = priv->sh;
3287 	struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
3288 	struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
3289 	uint8_t data_max_supported =
3290 			hca_attr->max_geneve_tlv_option_data_len * 4;
3291 	const struct rte_flow_item_geneve *geneve_spec;
3292 	const struct rte_flow_item_geneve *geneve_mask;
3293 	const struct rte_flow_item_geneve_opt *spec = item->spec;
3294 	const struct rte_flow_item_geneve_opt *mask = item->mask;
3295 	unsigned int i;
3296 	unsigned int data_len;
3297 	uint8_t tlv_option_len;
3298 	uint16_t optlen_m, optlen_v;
3299 	const struct rte_flow_item_geneve_opt full_mask = {
3300 		.option_class = RTE_BE16(0xffff),
3301 		.option_type = 0xff,
3302 		.option_len = 0x1f,
3303 	};
3304 
3305 	if (!mask)
3306 		mask = &rte_flow_item_geneve_opt_mask;
3307 	if (!spec)
3308 		return rte_flow_error_set
3309 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3310 			"Geneve TLV opt class/type/length must be specified");
3311 	if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
3312 		return rte_flow_error_set
3313 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3314 			"Geneve TLV opt length exceeds the limit (31)");
3315 	/* Check if class type and length masks are full. */
3316 	if (full_mask.option_class != mask->option_class ||
3317 	    full_mask.option_type != mask->option_type ||
3318 	    full_mask.option_len != (mask->option_len & full_mask.option_len))
3319 		return rte_flow_error_set
3320 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3321 			"Geneve TLV opt class/type/length masks must be full");
3322 	/* Check if length is supported */
3323 	if ((uint32_t)spec->option_len >
3324 			hca_attr->max_geneve_tlv_option_data_len)
3325 		return rte_flow_error_set
3326 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3327 			"Geneve TLV opt length not supported");
3328 	if (hca_attr->max_geneve_tlv_options > 1)
3329 		DRV_LOG(DEBUG,
3330 			"max_geneve_tlv_options supports more than 1 option");
3331 	/* Check GENEVE item preceding. */
3332 	if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
3333 		return rte_flow_error_set
3334 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3335 			"Geneve opt item must be preceded with Geneve item");
3336 	geneve_spec = geneve_item->spec;
3337 	geneve_mask = geneve_item->mask ? geneve_item->mask :
3338 					  &rte_flow_item_geneve_mask;
3339 	/* Check if GENEVE TLV option size doesn't exceed option length */
3340 	if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
3341 			    geneve_spec->ver_opt_len_o_c_rsvd0)) {
3342 		tlv_option_len = spec->option_len & mask->option_len;
3343 		optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
3344 		optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
3345 		optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
3346 		optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
3347 		if ((optlen_v & optlen_m) <= tlv_option_len)
3348 			return rte_flow_error_set
3349 				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3350 				 "GENEVE TLV option length exceeds optlen");
3351 	}
3352 	/* Check if length is 0 or data is 0. */
3353 	if (spec->data == NULL || spec->option_len == 0)
3354 		return rte_flow_error_set
3355 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3356 			"Geneve TLV opt with zero data/length not supported");
3357 	/* Check not all data & mask are 0. */
3358 	data_len = spec->option_len * 4;
3359 	if (mask->data == NULL) {
3360 		for (i = 0; i < data_len; i++)
3361 			if (spec->data[i])
3362 				break;
3363 		if (i == data_len)
3364 			return rte_flow_error_set(error, ENOTSUP,
3365 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3366 				"Can't match on Geneve option data 0");
3367 	} else {
3368 		for (i = 0; i < data_len; i++)
3369 			if (spec->data[i] & mask->data[i])
3370 				break;
3371 		if (i == data_len)
3372 			return rte_flow_error_set(error, ENOTSUP,
3373 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3374 				"Can't match on Geneve option data and mask 0");
3375 		/* Check data mask supported. */
3376 		for (i = data_max_supported; i < data_len ; i++)
3377 			if (mask->data[i])
3378 				return rte_flow_error_set(error, ENOTSUP,
3379 					RTE_FLOW_ERROR_TYPE_ITEM, item,
3380 					"Data mask is of unsupported size");
3381 	}
3382 	/* Check GENEVE option is supported in NIC. */
3383 	if (!hca_attr->geneve_tlv_opt)
3384 		return rte_flow_error_set
3385 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3386 			"Geneve TLV opt not supported");
3387 	/* Check if we already have geneve option with different type/class. */
3388 	rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3389 	geneve_opt_resource = sh->geneve_tlv_option_resource;
3390 	if (geneve_opt_resource != NULL)
3391 		if (geneve_opt_resource->option_class != spec->option_class ||
3392 		    geneve_opt_resource->option_type != spec->option_type ||
3393 		    geneve_opt_resource->length != spec->option_len) {
3394 			rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3395 			return rte_flow_error_set(error, ENOTSUP,
3396 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3397 				"Only one Geneve TLV option supported");
3398 		}
3399 	rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3400 	return 0;
3401 }
3402 
3403 /**
3404  * Validate MPLS item.
3405  *
3406  * @param[in] dev
3407  *   Pointer to the rte_eth_dev structure.
3408  * @param[in] item
3409  *   Item specification.
3410  * @param[in] item_flags
3411  *   Bit-fields that holds the items detected until now.
3412  * @param[in] prev_layer
3413  *   The protocol layer indicated in previous item.
3414  * @param[out] error
3415  *   Pointer to error structure.
3416  *
3417  * @return
3418  *   0 on success, a negative errno value otherwise and rte_errno is set.
3419  */
3420 int
3421 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3422 			     const struct rte_flow_item *item __rte_unused,
3423 			     uint64_t item_flags __rte_unused,
3424 			     uint64_t prev_layer __rte_unused,
3425 			     struct rte_flow_error *error)
3426 {
3427 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3428 	const struct rte_flow_item_mpls *mask = item->mask;
3429 	struct mlx5_priv *priv = dev->data->dev_private;
3430 	int ret;
3431 
3432 	if (!priv->sh->dev_cap.mpls_en)
3433 		return rte_flow_error_set(error, ENOTSUP,
3434 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3435 					  "MPLS not supported or"
3436 					  " disabled in firmware"
3437 					  " configuration.");
3438 	/* MPLS over UDP, GRE is allowed */
3439 	if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3440 			    MLX5_FLOW_LAYER_GRE |
3441 			    MLX5_FLOW_LAYER_GRE_KEY)))
3442 		return rte_flow_error_set(error, EINVAL,
3443 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3444 					  "protocol filtering not compatible"
3445 					  " with MPLS layer");
3446 	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3447 	if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3448 	    !(item_flags & MLX5_FLOW_LAYER_GRE))
3449 		return rte_flow_error_set(error, ENOTSUP,
3450 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3451 					  "multiple tunnel layers not"
3452 					  " supported");
3453 	if (!mask)
3454 		mask = &rte_flow_item_mpls_mask;
3455 	ret = mlx5_flow_item_acceptable
3456 		(item, (const uint8_t *)mask,
3457 		 (const uint8_t *)&rte_flow_item_mpls_mask,
3458 		 sizeof(struct rte_flow_item_mpls),
3459 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3460 	if (ret < 0)
3461 		return ret;
3462 	return 0;
3463 #else
3464 	return rte_flow_error_set(error, ENOTSUP,
3465 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
3466 				  "MPLS is not supported by Verbs, please"
3467 				  " update.");
3468 #endif
3469 }
3470 
3471 /**
3472  * Validate NVGRE item.
3473  *
3474  * @param[in] item
3475  *   Item specification.
3476  * @param[in] item_flags
3477  *   Bit flags to mark detected items.
3478  * @param[in] target_protocol
3479  *   The next protocol in the previous item.
3480  * @param[out] error
3481  *   Pointer to error structure.
3482  *
3483  * @return
3484  *   0 on success, a negative errno value otherwise and rte_errno is set.
3485  */
3486 int
3487 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3488 			      uint64_t item_flags,
3489 			      uint8_t target_protocol,
3490 			      struct rte_flow_error *error)
3491 {
3492 	const struct rte_flow_item_nvgre *mask = item->mask;
3493 	int ret;
3494 
3495 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3496 		return rte_flow_error_set(error, EINVAL,
3497 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3498 					  "protocol filtering not compatible"
3499 					  " with this GRE layer");
3500 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3501 		return rte_flow_error_set(error, ENOTSUP,
3502 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3503 					  "multiple tunnel layers not"
3504 					  " supported");
3505 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3506 		return rte_flow_error_set(error, ENOTSUP,
3507 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3508 					  "L3 Layer is missing");
3509 	if (!mask)
3510 		mask = &rte_flow_item_nvgre_mask;
3511 	ret = mlx5_flow_item_acceptable
3512 		(item, (const uint8_t *)mask,
3513 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
3514 		 sizeof(struct rte_flow_item_nvgre),
3515 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3516 	if (ret < 0)
3517 		return ret;
3518 	return 0;
3519 }
3520 
3521 /**
3522  * Validate eCPRI item.
3523  *
3524  * @param[in] item
3525  *   Item specification.
3526  * @param[in] item_flags
3527  *   Bit-fields that holds the items detected until now.
3528  * @param[in] last_item
3529  *   Previous validated item in the pattern items.
3530  * @param[in] ether_type
3531  *   Type in the ethernet layer header (including dot1q).
3532  * @param[in] acc_mask
3533  *   Acceptable mask, if NULL default internal default mask
3534  *   will be used to check whether item fields are supported.
3535  * @param[out] error
3536  *   Pointer to error structure.
3537  *
3538  * @return
3539  *   0 on success, a negative errno value otherwise and rte_errno is set.
3540  */
3541 int
3542 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3543 			      uint64_t item_flags,
3544 			      uint64_t last_item,
3545 			      uint16_t ether_type,
3546 			      const struct rte_flow_item_ecpri *acc_mask,
3547 			      struct rte_flow_error *error)
3548 {
3549 	const struct rte_flow_item_ecpri *mask = item->mask;
3550 	const struct rte_flow_item_ecpri nic_mask = {
3551 		.hdr = {
3552 			.common = {
3553 				.u32 =
3554 				RTE_BE32(((const struct rte_ecpri_common_hdr) {
3555 					.type = 0xFF,
3556 					}).u32),
3557 			},
3558 			.dummy[0] = 0xFFFFFFFF,
3559 		},
3560 	};
3561 	const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3562 					MLX5_FLOW_LAYER_OUTER_VLAN);
3563 	struct rte_flow_item_ecpri mask_lo;
3564 
3565 	if (!(last_item & outer_l2_vlan) &&
3566 	    last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3567 		return rte_flow_error_set(error, EINVAL,
3568 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3569 					  "eCPRI can only follow L2/VLAN layer or UDP layer");
3570 	if ((last_item & outer_l2_vlan) && ether_type &&
3571 	    ether_type != RTE_ETHER_TYPE_ECPRI)
3572 		return rte_flow_error_set(error, EINVAL,
3573 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3574 					  "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3575 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3576 		return rte_flow_error_set(error, EINVAL,
3577 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3578 					  "eCPRI with tunnel is not supported right now");
3579 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3580 		return rte_flow_error_set(error, ENOTSUP,
3581 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3582 					  "multiple L3 layers not supported");
3583 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3584 		return rte_flow_error_set(error, EINVAL,
3585 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3586 					  "eCPRI cannot coexist with a TCP layer");
3587 	/* In specification, eCPRI could be over UDP layer. */
3588 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3589 		return rte_flow_error_set(error, EINVAL,
3590 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3591 					  "eCPRI over UDP layer is not yet supported right now");
3592 	/* Mask for type field in common header could be zero. */
3593 	if (!mask)
3594 		mask = &rte_flow_item_ecpri_mask;
3595 	mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3596 	/* Input mask is in big-endian format. */
3597 	if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3598 		return rte_flow_error_set(error, EINVAL,
3599 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3600 					  "partial mask is not supported for protocol");
3601 	else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3602 		return rte_flow_error_set(error, EINVAL,
3603 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3604 					  "message header mask must be after a type mask");
3605 	return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3606 					 acc_mask ? (const uint8_t *)acc_mask
3607 						  : (const uint8_t *)&nic_mask,
3608 					 sizeof(struct rte_flow_item_ecpri),
3609 					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3610 }
3611 
3612 static int
3613 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3614 		   const struct rte_flow_attr *attr __rte_unused,
3615 		   const struct rte_flow_item items[] __rte_unused,
3616 		   const struct rte_flow_action actions[] __rte_unused,
3617 		   bool external __rte_unused,
3618 		   int hairpin __rte_unused,
3619 		   struct rte_flow_error *error)
3620 {
3621 	return rte_flow_error_set(error, ENOTSUP,
3622 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3623 }
3624 
3625 static struct mlx5_flow *
3626 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3627 		  const struct rte_flow_attr *attr __rte_unused,
3628 		  const struct rte_flow_item items[] __rte_unused,
3629 		  const struct rte_flow_action actions[] __rte_unused,
3630 		  struct rte_flow_error *error)
3631 {
3632 	rte_flow_error_set(error, ENOTSUP,
3633 			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3634 	return NULL;
3635 }
3636 
3637 static int
3638 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3639 		    struct mlx5_flow *dev_flow __rte_unused,
3640 		    const struct rte_flow_attr *attr __rte_unused,
3641 		    const struct rte_flow_item items[] __rte_unused,
3642 		    const struct rte_flow_action actions[] __rte_unused,
3643 		    struct rte_flow_error *error)
3644 {
3645 	return rte_flow_error_set(error, ENOTSUP,
3646 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3647 }
3648 
3649 static int
3650 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3651 		struct rte_flow *flow __rte_unused,
3652 		struct rte_flow_error *error)
3653 {
3654 	return rte_flow_error_set(error, ENOTSUP,
3655 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3656 }
3657 
3658 static void
3659 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3660 		 struct rte_flow *flow __rte_unused)
3661 {
3662 }
3663 
3664 static void
3665 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3666 		  struct rte_flow *flow __rte_unused)
3667 {
3668 }
3669 
3670 static int
3671 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3672 		struct rte_flow *flow __rte_unused,
3673 		const struct rte_flow_action *actions __rte_unused,
3674 		void *data __rte_unused,
3675 		struct rte_flow_error *error)
3676 {
3677 	return rte_flow_error_set(error, ENOTSUP,
3678 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3679 }
3680 
3681 static int
3682 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3683 		      uint32_t domains __rte_unused,
3684 		      uint32_t flags __rte_unused)
3685 {
3686 	return 0;
3687 }
3688 
3689 /* Void driver to protect from null pointer reference. */
3690 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3691 	.validate = flow_null_validate,
3692 	.prepare = flow_null_prepare,
3693 	.translate = flow_null_translate,
3694 	.apply = flow_null_apply,
3695 	.remove = flow_null_remove,
3696 	.destroy = flow_null_destroy,
3697 	.query = flow_null_query,
3698 	.sync_domain = flow_null_sync_domain,
3699 };
3700 
3701 /**
3702  * Select flow driver type according to flow attributes and device
3703  * configuration.
3704  *
3705  * @param[in] dev
3706  *   Pointer to the dev structure.
3707  * @param[in] attr
3708  *   Pointer to the flow attributes.
3709  *
3710  * @return
3711  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3712  */
3713 static enum mlx5_flow_drv_type
3714 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3715 {
3716 	struct mlx5_priv *priv = dev->data->dev_private;
3717 	/* The OS can determine first a specific flow type (DV, VERBS) */
3718 	enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3719 
3720 	if (type != MLX5_FLOW_TYPE_MAX)
3721 		return type;
3722 	/*
3723 	 * Currently when dv_flow_en == 2, only HW steering engine is
3724 	 * supported. New engines can also be chosen here if ready.
3725 	 */
3726 	if (priv->sh->config.dv_flow_en == 2)
3727 		return MLX5_FLOW_TYPE_HW;
3728 	/* If no OS specific type - continue with DV/VERBS selection */
3729 	if (attr->transfer && priv->sh->config.dv_esw_en)
3730 		type = MLX5_FLOW_TYPE_DV;
3731 	if (!attr->transfer)
3732 		type = priv->sh->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3733 						     MLX5_FLOW_TYPE_VERBS;
3734 	return type;
3735 }
3736 
3737 #define flow_get_drv_ops(type) flow_drv_ops[type]
3738 
3739 /**
3740  * Flow driver validation API. This abstracts calling driver specific functions.
3741  * The type of flow driver is determined according to flow attributes.
3742  *
3743  * @param[in] dev
3744  *   Pointer to the dev structure.
3745  * @param[in] attr
3746  *   Pointer to the flow attributes.
3747  * @param[in] items
3748  *   Pointer to the list of items.
3749  * @param[in] actions
3750  *   Pointer to the list of actions.
3751  * @param[in] external
3752  *   This flow rule is created by request external to PMD.
3753  * @param[in] hairpin
3754  *   Number of hairpin TX actions, 0 means classic flow.
3755  * @param[out] error
3756  *   Pointer to the error structure.
3757  *
3758  * @return
3759  *   0 on success, a negative errno value otherwise and rte_errno is set.
3760  */
3761 static inline int
3762 flow_drv_validate(struct rte_eth_dev *dev,
3763 		  const struct rte_flow_attr *attr,
3764 		  const struct rte_flow_item items[],
3765 		  const struct rte_flow_action actions[],
3766 		  bool external, int hairpin, struct rte_flow_error *error)
3767 {
3768 	const struct mlx5_flow_driver_ops *fops;
3769 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3770 
3771 	fops = flow_get_drv_ops(type);
3772 	return fops->validate(dev, attr, items, actions, external,
3773 			      hairpin, error);
3774 }
3775 
3776 /**
3777  * Flow driver preparation API. This abstracts calling driver specific
3778  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3779  * calculates the size of memory required for device flow, allocates the memory,
3780  * initializes the device flow and returns the pointer.
3781  *
3782  * @note
3783  *   This function initializes device flow structure such as dv or verbs in
3784  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3785  *   rest. For example, adding returning device flow to flow->dev_flow list and
3786  *   setting backward reference to the flow should be done out of this function.
3787  *   layers field is not filled either.
3788  *
3789  * @param[in] dev
3790  *   Pointer to the dev structure.
3791  * @param[in] attr
3792  *   Pointer to the flow attributes.
3793  * @param[in] items
3794  *   Pointer to the list of items.
3795  * @param[in] actions
3796  *   Pointer to the list of actions.
3797  * @param[in] flow_idx
3798  *   This memory pool index to the flow.
3799  * @param[out] error
3800  *   Pointer to the error structure.
3801  *
3802  * @return
3803  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3804  */
3805 static inline struct mlx5_flow *
3806 flow_drv_prepare(struct rte_eth_dev *dev,
3807 		 const struct rte_flow *flow,
3808 		 const struct rte_flow_attr *attr,
3809 		 const struct rte_flow_item items[],
3810 		 const struct rte_flow_action actions[],
3811 		 uint32_t flow_idx,
3812 		 struct rte_flow_error *error)
3813 {
3814 	const struct mlx5_flow_driver_ops *fops;
3815 	enum mlx5_flow_drv_type type = flow->drv_type;
3816 	struct mlx5_flow *mlx5_flow = NULL;
3817 
3818 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3819 	fops = flow_get_drv_ops(type);
3820 	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3821 	if (mlx5_flow)
3822 		mlx5_flow->flow_idx = flow_idx;
3823 	return mlx5_flow;
3824 }
3825 
3826 /**
3827  * Flow driver translation API. This abstracts calling driver specific
3828  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3829  * translates a generic flow into a driver flow. flow_drv_prepare() must
3830  * precede.
3831  *
3832  * @note
3833  *   dev_flow->layers could be filled as a result of parsing during translation
3834  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3835  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3836  *   flow->actions could be overwritten even though all the expanded dev_flows
3837  *   have the same actions.
3838  *
3839  * @param[in] dev
3840  *   Pointer to the rte dev structure.
3841  * @param[in, out] dev_flow
3842  *   Pointer to the mlx5 flow.
3843  * @param[in] attr
3844  *   Pointer to the flow attributes.
3845  * @param[in] items
3846  *   Pointer to the list of items.
3847  * @param[in] actions
3848  *   Pointer to the list of actions.
3849  * @param[out] error
3850  *   Pointer to the error structure.
3851  *
3852  * @return
3853  *   0 on success, a negative errno value otherwise and rte_errno is set.
3854  */
3855 static inline int
3856 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3857 		   const struct rte_flow_attr *attr,
3858 		   const struct rte_flow_item items[],
3859 		   const struct rte_flow_action actions[],
3860 		   struct rte_flow_error *error)
3861 {
3862 	const struct mlx5_flow_driver_ops *fops;
3863 	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3864 
3865 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3866 	fops = flow_get_drv_ops(type);
3867 	return fops->translate(dev, dev_flow, attr, items, actions, error);
3868 }
3869 
3870 /**
3871  * Flow driver apply API. This abstracts calling driver specific functions.
3872  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3873  * translated driver flows on to device. flow_drv_translate() must precede.
3874  *
3875  * @param[in] dev
3876  *   Pointer to Ethernet device structure.
3877  * @param[in, out] flow
3878  *   Pointer to flow structure.
3879  * @param[out] error
3880  *   Pointer to error structure.
3881  *
3882  * @return
3883  *   0 on success, a negative errno value otherwise and rte_errno is set.
3884  */
3885 static inline int
3886 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3887 	       struct rte_flow_error *error)
3888 {
3889 	const struct mlx5_flow_driver_ops *fops;
3890 	enum mlx5_flow_drv_type type = flow->drv_type;
3891 
3892 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3893 	fops = flow_get_drv_ops(type);
3894 	return fops->apply(dev, flow, error);
3895 }
3896 
3897 /**
3898  * Flow driver destroy API. This abstracts calling driver specific functions.
3899  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3900  * on device and releases resources of the flow.
3901  *
3902  * @param[in] dev
3903  *   Pointer to Ethernet device.
3904  * @param[in, out] flow
3905  *   Pointer to flow structure.
3906  */
3907 static inline void
3908 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3909 {
3910 	const struct mlx5_flow_driver_ops *fops;
3911 	enum mlx5_flow_drv_type type = flow->drv_type;
3912 
3913 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3914 	fops = flow_get_drv_ops(type);
3915 	fops->destroy(dev, flow);
3916 }
3917 
3918 /**
3919  * Flow driver find RSS policy tbl API. This abstracts calling driver
3920  * specific functions. Parent flow (rte_flow) should have driver
3921  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3922  *
3923  * @param[in] dev
3924  *   Pointer to Ethernet device.
3925  * @param[in, out] flow
3926  *   Pointer to flow structure.
3927  * @param[in] policy
3928  *   Pointer to meter policy table.
3929  * @param[in] rss_desc
3930  *   Pointer to rss_desc
3931  */
3932 static struct mlx5_flow_meter_sub_policy *
3933 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3934 		struct rte_flow *flow,
3935 		struct mlx5_flow_meter_policy *policy,
3936 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3937 {
3938 	const struct mlx5_flow_driver_ops *fops;
3939 	enum mlx5_flow_drv_type type = flow->drv_type;
3940 
3941 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3942 	fops = flow_get_drv_ops(type);
3943 	return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3944 }
3945 
3946 /**
3947  * Flow driver color tag rule API. This abstracts calling driver
3948  * specific functions. Parent flow (rte_flow) should have driver
3949  * type (drv_type). It will create the color tag rules in hierarchy meter.
3950  *
3951  * @param[in] dev
3952  *   Pointer to Ethernet device.
3953  * @param[in, out] flow
3954  *   Pointer to flow structure.
3955  * @param[in] fm
3956  *   Pointer to flow meter structure.
3957  * @param[in] src_port
3958  *   The src port this extra rule should use.
3959  * @param[in] item
3960  *   The src port id match item.
3961  * @param[out] error
3962  *   Pointer to error structure.
3963  */
3964 static int
3965 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
3966 		struct rte_flow *flow,
3967 		struct mlx5_flow_meter_info *fm,
3968 		int32_t src_port,
3969 		const struct rte_flow_item *item,
3970 		struct rte_flow_error *error)
3971 {
3972 	const struct mlx5_flow_driver_ops *fops;
3973 	enum mlx5_flow_drv_type type = flow->drv_type;
3974 
3975 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3976 	fops = flow_get_drv_ops(type);
3977 	return fops->meter_hierarchy_rule_create(dev, fm,
3978 						src_port, item, error);
3979 }
3980 
3981 /**
3982  * Get RSS action from the action list.
3983  *
3984  * @param[in] dev
3985  *   Pointer to Ethernet device.
3986  * @param[in] actions
3987  *   Pointer to the list of actions.
3988  * @param[in] flow
3989  *   Parent flow structure pointer.
3990  *
3991  * @return
3992  *   Pointer to the RSS action if exist, else return NULL.
3993  */
3994 static const struct rte_flow_action_rss*
3995 flow_get_rss_action(struct rte_eth_dev *dev,
3996 		    const struct rte_flow_action actions[])
3997 {
3998 	struct mlx5_priv *priv = dev->data->dev_private;
3999 	const struct rte_flow_action_rss *rss = NULL;
4000 	struct mlx5_meter_policy_action_container *acg;
4001 	struct mlx5_meter_policy_action_container *acy;
4002 
4003 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4004 		switch (actions->type) {
4005 		case RTE_FLOW_ACTION_TYPE_RSS:
4006 			rss = actions->conf;
4007 			break;
4008 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
4009 		{
4010 			const struct rte_flow_action_sample *sample =
4011 								actions->conf;
4012 			const struct rte_flow_action *act = sample->actions;
4013 			for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
4014 				if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
4015 					rss = act->conf;
4016 			break;
4017 		}
4018 		case RTE_FLOW_ACTION_TYPE_METER:
4019 		{
4020 			uint32_t mtr_idx;
4021 			struct mlx5_flow_meter_info *fm;
4022 			struct mlx5_flow_meter_policy *policy;
4023 			const struct rte_flow_action_meter *mtr = actions->conf;
4024 
4025 			fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
4026 			if (fm && !fm->def_policy) {
4027 				policy = mlx5_flow_meter_policy_find(dev,
4028 						fm->policy_id, NULL);
4029 				MLX5_ASSERT(policy);
4030 				if (policy->is_hierarchy) {
4031 					policy =
4032 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
4033 									policy);
4034 					if (!policy)
4035 						return NULL;
4036 				}
4037 				if (policy->is_rss) {
4038 					acg =
4039 					&policy->act_cnt[RTE_COLOR_GREEN];
4040 					acy =
4041 					&policy->act_cnt[RTE_COLOR_YELLOW];
4042 					if (acg->fate_action ==
4043 					    MLX5_FLOW_FATE_SHARED_RSS)
4044 						rss = acg->rss->conf;
4045 					else if (acy->fate_action ==
4046 						 MLX5_FLOW_FATE_SHARED_RSS)
4047 						rss = acy->rss->conf;
4048 				}
4049 			}
4050 			break;
4051 		}
4052 		default:
4053 			break;
4054 		}
4055 	}
4056 	return rss;
4057 }
4058 
4059 /**
4060  * Get ASO age action by index.
4061  *
4062  * @param[in] dev
4063  *   Pointer to the Ethernet device structure.
4064  * @param[in] age_idx
4065  *   Index to the ASO age action.
4066  *
4067  * @return
4068  *   The specified ASO age action.
4069  */
4070 struct mlx5_aso_age_action*
4071 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
4072 {
4073 	uint16_t pool_idx = age_idx & UINT16_MAX;
4074 	uint16_t offset = (age_idx >> 16) & UINT16_MAX;
4075 	struct mlx5_priv *priv = dev->data->dev_private;
4076 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
4077 	struct mlx5_aso_age_pool *pool;
4078 
4079 	rte_rwlock_read_lock(&mng->resize_rwl);
4080 	pool = mng->pools[pool_idx];
4081 	rte_rwlock_read_unlock(&mng->resize_rwl);
4082 	return &pool->actions[offset - 1];
4083 }
4084 
4085 /* maps indirect action to translated direct in some actions array */
4086 struct mlx5_translated_action_handle {
4087 	struct rte_flow_action_handle *action; /**< Indirect action handle. */
4088 	int index; /**< Index in related array of rte_flow_action. */
4089 };
4090 
4091 /**
4092  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
4093  * direct action if translation possible.
4094  * This functionality used to run same execution path for both direct and
4095  * indirect actions on flow create. All necessary preparations for indirect
4096  * action handling should be performed on *handle* actions list returned
4097  * from this call.
4098  *
4099  * @param[in] dev
4100  *   Pointer to Ethernet device.
4101  * @param[in] actions
4102  *   List of actions to translate.
4103  * @param[out] handle
4104  *   List to store translated indirect action object handles.
4105  * @param[in, out] indir_n
4106  *   Size of *handle* array. On return should be updated with number of
4107  *   indirect actions retrieved from the *actions* list.
4108  * @param[out] translated_actions
4109  *   List of actions where all indirect actions were translated to direct
4110  *   if possible. NULL if no translation took place.
4111  * @param[out] error
4112  *   Pointer to the error structure.
4113  *
4114  * @return
4115  *   0 on success, a negative errno value otherwise and rte_errno is set.
4116  */
4117 static int
4118 flow_action_handles_translate(struct rte_eth_dev *dev,
4119 			      const struct rte_flow_action actions[],
4120 			      struct mlx5_translated_action_handle *handle,
4121 			      int *indir_n,
4122 			      struct rte_flow_action **translated_actions,
4123 			      struct rte_flow_error *error)
4124 {
4125 	struct mlx5_priv *priv = dev->data->dev_private;
4126 	struct rte_flow_action *translated = NULL;
4127 	size_t actions_size;
4128 	int n;
4129 	int copied_n = 0;
4130 	struct mlx5_translated_action_handle *handle_end = NULL;
4131 
4132 	for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
4133 		if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
4134 			continue;
4135 		if (copied_n == *indir_n) {
4136 			return rte_flow_error_set
4137 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
4138 				 NULL, "too many shared actions");
4139 		}
4140 		rte_memcpy(&handle[copied_n].action, &actions[n].conf,
4141 			   sizeof(actions[n].conf));
4142 		handle[copied_n].index = n;
4143 		copied_n++;
4144 	}
4145 	n++;
4146 	*indir_n = copied_n;
4147 	if (!copied_n)
4148 		return 0;
4149 	actions_size = sizeof(struct rte_flow_action) * n;
4150 	translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
4151 	if (!translated) {
4152 		rte_errno = ENOMEM;
4153 		return -ENOMEM;
4154 	}
4155 	memcpy(translated, actions, actions_size);
4156 	for (handle_end = handle + copied_n; handle < handle_end; handle++) {
4157 		struct mlx5_shared_action_rss *shared_rss;
4158 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4159 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4160 		uint32_t idx = act_idx &
4161 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4162 
4163 		switch (type) {
4164 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4165 			shared_rss = mlx5_ipool_get
4166 			  (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
4167 			translated[handle->index].type =
4168 				RTE_FLOW_ACTION_TYPE_RSS;
4169 			translated[handle->index].conf =
4170 				&shared_rss->origin;
4171 			break;
4172 		case MLX5_INDIRECT_ACTION_TYPE_COUNT:
4173 			translated[handle->index].type =
4174 						(enum rte_flow_action_type)
4175 						MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
4176 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4177 			break;
4178 		case MLX5_INDIRECT_ACTION_TYPE_AGE:
4179 			if (priv->sh->flow_hit_aso_en) {
4180 				translated[handle->index].type =
4181 					(enum rte_flow_action_type)
4182 					MLX5_RTE_FLOW_ACTION_TYPE_AGE;
4183 				translated[handle->index].conf =
4184 							 (void *)(uintptr_t)idx;
4185 				break;
4186 			}
4187 			/* Fall-through */
4188 		case MLX5_INDIRECT_ACTION_TYPE_CT:
4189 			if (priv->sh->ct_aso_en) {
4190 				translated[handle->index].type =
4191 					RTE_FLOW_ACTION_TYPE_CONNTRACK;
4192 				translated[handle->index].conf =
4193 							 (void *)(uintptr_t)idx;
4194 				break;
4195 			}
4196 			/* Fall-through */
4197 		default:
4198 			mlx5_free(translated);
4199 			return rte_flow_error_set
4200 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
4201 				 NULL, "invalid indirect action type");
4202 		}
4203 	}
4204 	*translated_actions = translated;
4205 	return 0;
4206 }
4207 
4208 /**
4209  * Get Shared RSS action from the action list.
4210  *
4211  * @param[in] dev
4212  *   Pointer to Ethernet device.
4213  * @param[in] shared
4214  *   Pointer to the list of actions.
4215  * @param[in] shared_n
4216  *   Actions list length.
4217  *
4218  * @return
4219  *   The MLX5 RSS action ID if exists, otherwise return 0.
4220  */
4221 static uint32_t
4222 flow_get_shared_rss_action(struct rte_eth_dev *dev,
4223 			   struct mlx5_translated_action_handle *handle,
4224 			   int shared_n)
4225 {
4226 	struct mlx5_translated_action_handle *handle_end;
4227 	struct mlx5_priv *priv = dev->data->dev_private;
4228 	struct mlx5_shared_action_rss *shared_rss;
4229 
4230 
4231 	for (handle_end = handle + shared_n; handle < handle_end; handle++) {
4232 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4233 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4234 		uint32_t idx = act_idx &
4235 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4236 		switch (type) {
4237 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4238 			shared_rss = mlx5_ipool_get
4239 				(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
4240 									   idx);
4241 			__atomic_add_fetch(&shared_rss->refcnt, 1,
4242 					   __ATOMIC_RELAXED);
4243 			return idx;
4244 		default:
4245 			break;
4246 		}
4247 	}
4248 	return 0;
4249 }
4250 
4251 static unsigned int
4252 find_graph_root(uint32_t rss_level)
4253 {
4254 	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
4255 			       MLX5_EXPANSION_ROOT_OUTER;
4256 }
4257 
4258 /**
4259  *  Get layer flags from the prefix flow.
4260  *
4261  *  Some flows may be split to several subflows, the prefix subflow gets the
4262  *  match items and the suffix sub flow gets the actions.
4263  *  Some actions need the user defined match item flags to get the detail for
4264  *  the action.
4265  *  This function helps the suffix flow to get the item layer flags from prefix
4266  *  subflow.
4267  *
4268  * @param[in] dev_flow
4269  *   Pointer the created prefix subflow.
4270  *
4271  * @return
4272  *   The layers get from prefix subflow.
4273  */
4274 static inline uint64_t
4275 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
4276 {
4277 	uint64_t layers = 0;
4278 
4279 	/*
4280 	 * Layers bits could be localization, but usually the compiler will
4281 	 * help to do the optimization work for source code.
4282 	 * If no decap actions, use the layers directly.
4283 	 */
4284 	if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
4285 		return dev_flow->handle->layers;
4286 	/* Convert L3 layers with decap action. */
4287 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
4288 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
4289 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
4290 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
4291 	/* Convert L4 layers with decap action.  */
4292 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
4293 		layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
4294 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
4295 		layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
4296 	return layers;
4297 }
4298 
4299 /**
4300  * Get metadata split action information.
4301  *
4302  * @param[in] actions
4303  *   Pointer to the list of actions.
4304  * @param[out] qrss
4305  *   Pointer to the return pointer.
4306  * @param[out] qrss_type
4307  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
4308  *   if no QUEUE/RSS is found.
4309  * @param[out] encap_idx
4310  *   Pointer to the index of the encap action if exists, otherwise the last
4311  *   action index.
4312  *
4313  * @return
4314  *   Total number of actions.
4315  */
4316 static int
4317 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
4318 				       const struct rte_flow_action **qrss,
4319 				       int *encap_idx)
4320 {
4321 	const struct rte_flow_action_raw_encap *raw_encap;
4322 	int actions_n = 0;
4323 	int raw_decap_idx = -1;
4324 
4325 	*encap_idx = -1;
4326 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4327 		switch (actions->type) {
4328 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4329 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4330 			*encap_idx = actions_n;
4331 			break;
4332 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4333 			raw_decap_idx = actions_n;
4334 			break;
4335 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4336 			raw_encap = actions->conf;
4337 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4338 				*encap_idx = raw_decap_idx != -1 ?
4339 						      raw_decap_idx : actions_n;
4340 			break;
4341 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4342 		case RTE_FLOW_ACTION_TYPE_RSS:
4343 			*qrss = actions;
4344 			break;
4345 		default:
4346 			break;
4347 		}
4348 		actions_n++;
4349 	}
4350 	if (*encap_idx == -1)
4351 		*encap_idx = actions_n;
4352 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4353 	return actions_n + 1;
4354 }
4355 
4356 /**
4357  * Check if the action will change packet.
4358  *
4359  * @param dev
4360  *   Pointer to Ethernet device.
4361  * @param[in] type
4362  *   action type.
4363  *
4364  * @return
4365  *   true if action will change packet, false otherwise.
4366  */
4367 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
4368 					  enum rte_flow_action_type type)
4369 {
4370 	struct mlx5_priv *priv = dev->data->dev_private;
4371 
4372 	switch (type) {
4373 	case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4374 	case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4375 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4376 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4377 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4378 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4379 	case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4380 	case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4381 	case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4382 	case RTE_FLOW_ACTION_TYPE_SET_TTL:
4383 	case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4384 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4385 	case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4386 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4387 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4388 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4389 	case RTE_FLOW_ACTION_TYPE_SET_META:
4390 	case RTE_FLOW_ACTION_TYPE_SET_TAG:
4391 	case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4392 	case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4393 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4394 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4395 	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4396 	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4397 	case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4398 	case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4399 	case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4400 	case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4401 	case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4402 		return true;
4403 	case RTE_FLOW_ACTION_TYPE_FLAG:
4404 	case RTE_FLOW_ACTION_TYPE_MARK:
4405 		if (priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
4406 			return true;
4407 		else
4408 			return false;
4409 	default:
4410 		return false;
4411 	}
4412 }
4413 
4414 /**
4415  * Check meter action from the action list.
4416  *
4417  * @param dev
4418  *   Pointer to Ethernet device.
4419  * @param[in] actions
4420  *   Pointer to the list of actions.
4421  * @param[out] has_mtr
4422  *   Pointer to the meter exist flag.
4423  * @param[out] has_modify
4424  *   Pointer to the flag showing there's packet change action.
4425  * @param[out] meter_id
4426  *   Pointer to the meter id.
4427  *
4428  * @return
4429  *   Total number of actions.
4430  */
4431 static int
4432 flow_check_meter_action(struct rte_eth_dev *dev,
4433 			const struct rte_flow_action actions[],
4434 			bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4435 {
4436 	const struct rte_flow_action_meter *mtr = NULL;
4437 	int actions_n = 0;
4438 
4439 	MLX5_ASSERT(has_mtr);
4440 	*has_mtr = false;
4441 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4442 		switch (actions->type) {
4443 		case RTE_FLOW_ACTION_TYPE_METER:
4444 			mtr = actions->conf;
4445 			*meter_id = mtr->mtr_id;
4446 			*has_mtr = true;
4447 			break;
4448 		default:
4449 			break;
4450 		}
4451 		if (!*has_mtr)
4452 			*has_modify |= flow_check_modify_action_type(dev,
4453 								actions->type);
4454 		actions_n++;
4455 	}
4456 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4457 	return actions_n + 1;
4458 }
4459 
4460 /**
4461  * Check if the flow should be split due to hairpin.
4462  * The reason for the split is that in current HW we can't
4463  * support encap and push-vlan on Rx, so if a flow contains
4464  * these actions we move it to Tx.
4465  *
4466  * @param dev
4467  *   Pointer to Ethernet device.
4468  * @param[in] attr
4469  *   Flow rule attributes.
4470  * @param[in] actions
4471  *   Associated actions (list terminated by the END action).
4472  *
4473  * @return
4474  *   > 0 the number of actions and the flow should be split,
4475  *   0 when no split required.
4476  */
4477 static int
4478 flow_check_hairpin_split(struct rte_eth_dev *dev,
4479 			 const struct rte_flow_attr *attr,
4480 			 const struct rte_flow_action actions[])
4481 {
4482 	int queue_action = 0;
4483 	int action_n = 0;
4484 	int split = 0;
4485 	const struct rte_flow_action_queue *queue;
4486 	const struct rte_flow_action_rss *rss;
4487 	const struct rte_flow_action_raw_encap *raw_encap;
4488 	const struct rte_eth_hairpin_conf *conf;
4489 
4490 	if (!attr->ingress)
4491 		return 0;
4492 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4493 		switch (actions->type) {
4494 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4495 			queue = actions->conf;
4496 			if (queue == NULL)
4497 				return 0;
4498 			conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4499 			if (conf == NULL || conf->tx_explicit != 0)
4500 				return 0;
4501 			queue_action = 1;
4502 			action_n++;
4503 			break;
4504 		case RTE_FLOW_ACTION_TYPE_RSS:
4505 			rss = actions->conf;
4506 			if (rss == NULL || rss->queue_num == 0)
4507 				return 0;
4508 			conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4509 			if (conf == NULL || conf->tx_explicit != 0)
4510 				return 0;
4511 			queue_action = 1;
4512 			action_n++;
4513 			break;
4514 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4515 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4516 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4517 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4518 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4519 			split++;
4520 			action_n++;
4521 			break;
4522 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4523 			raw_encap = actions->conf;
4524 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4525 				split++;
4526 			action_n++;
4527 			break;
4528 		default:
4529 			action_n++;
4530 			break;
4531 		}
4532 	}
4533 	if (split && queue_action)
4534 		return action_n;
4535 	return 0;
4536 }
4537 
4538 /* Declare flow create/destroy prototype in advance. */
4539 static uint32_t
4540 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4541 		 const struct rte_flow_attr *attr,
4542 		 const struct rte_flow_item items[],
4543 		 const struct rte_flow_action actions[],
4544 		 bool external, struct rte_flow_error *error);
4545 
4546 static void
4547 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4548 		  uint32_t flow_idx);
4549 
4550 int
4551 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4552 		      struct mlx5_list_entry *entry, void *cb_ctx)
4553 {
4554 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4555 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4556 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4557 
4558 	return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4559 }
4560 
4561 struct mlx5_list_entry *
4562 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4563 {
4564 	struct rte_eth_dev *dev = tool_ctx;
4565 	struct mlx5_priv *priv = dev->data->dev_private;
4566 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4567 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4568 	struct rte_flow_error *error = ctx->error;
4569 	uint32_t idx = 0;
4570 	int ret;
4571 	uint32_t mark_id = *(uint32_t *)(ctx->data);
4572 	struct rte_flow_attr attr = {
4573 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4574 		.ingress = 1,
4575 	};
4576 	struct mlx5_rte_flow_item_tag tag_spec = {
4577 		.data = mark_id,
4578 	};
4579 	struct rte_flow_item items[] = {
4580 		[1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4581 	};
4582 	struct rte_flow_action_mark ftag = {
4583 		.id = mark_id,
4584 	};
4585 	struct mlx5_flow_action_copy_mreg cp_mreg = {
4586 		.dst = REG_B,
4587 		.src = REG_NON,
4588 	};
4589 	struct rte_flow_action_jump jump = {
4590 		.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4591 	};
4592 	struct rte_flow_action actions[] = {
4593 		[3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4594 	};
4595 
4596 	/* Fill the register fields in the flow. */
4597 	ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4598 	if (ret < 0)
4599 		return NULL;
4600 	tag_spec.id = ret;
4601 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4602 	if (ret < 0)
4603 		return NULL;
4604 	cp_mreg.src = ret;
4605 	/* Provide the full width of FLAG specific value. */
4606 	if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4607 		tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4608 	/* Build a new flow. */
4609 	if (mark_id != MLX5_DEFAULT_COPY_ID) {
4610 		items[0] = (struct rte_flow_item){
4611 			.type = (enum rte_flow_item_type)
4612 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4613 			.spec = &tag_spec,
4614 		};
4615 		items[1] = (struct rte_flow_item){
4616 			.type = RTE_FLOW_ITEM_TYPE_END,
4617 		};
4618 		actions[0] = (struct rte_flow_action){
4619 			.type = (enum rte_flow_action_type)
4620 				MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4621 			.conf = &ftag,
4622 		};
4623 		actions[1] = (struct rte_flow_action){
4624 			.type = (enum rte_flow_action_type)
4625 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4626 			.conf = &cp_mreg,
4627 		};
4628 		actions[2] = (struct rte_flow_action){
4629 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4630 			.conf = &jump,
4631 		};
4632 		actions[3] = (struct rte_flow_action){
4633 			.type = RTE_FLOW_ACTION_TYPE_END,
4634 		};
4635 	} else {
4636 		/* Default rule, wildcard match. */
4637 		attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4638 		items[0] = (struct rte_flow_item){
4639 			.type = RTE_FLOW_ITEM_TYPE_END,
4640 		};
4641 		actions[0] = (struct rte_flow_action){
4642 			.type = (enum rte_flow_action_type)
4643 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4644 			.conf = &cp_mreg,
4645 		};
4646 		actions[1] = (struct rte_flow_action){
4647 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4648 			.conf = &jump,
4649 		};
4650 		actions[2] = (struct rte_flow_action){
4651 			.type = RTE_FLOW_ACTION_TYPE_END,
4652 		};
4653 	}
4654 	/* Build a new entry. */
4655 	mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4656 	if (!mcp_res) {
4657 		rte_errno = ENOMEM;
4658 		return NULL;
4659 	}
4660 	mcp_res->idx = idx;
4661 	mcp_res->mark_id = mark_id;
4662 	/*
4663 	 * The copy Flows are not included in any list. There
4664 	 * ones are referenced from other Flows and can not
4665 	 * be applied, removed, deleted in arbitrary order
4666 	 * by list traversing.
4667 	 */
4668 	mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4669 					&attr, items, actions, false, error);
4670 	if (!mcp_res->rix_flow) {
4671 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4672 		return NULL;
4673 	}
4674 	return &mcp_res->hlist_ent;
4675 }
4676 
4677 struct mlx5_list_entry *
4678 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4679 		      void *cb_ctx __rte_unused)
4680 {
4681 	struct rte_eth_dev *dev = tool_ctx;
4682 	struct mlx5_priv *priv = dev->data->dev_private;
4683 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4684 	uint32_t idx = 0;
4685 
4686 	mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4687 	if (!mcp_res) {
4688 		rte_errno = ENOMEM;
4689 		return NULL;
4690 	}
4691 	memcpy(mcp_res, oentry, sizeof(*mcp_res));
4692 	mcp_res->idx = idx;
4693 	return &mcp_res->hlist_ent;
4694 }
4695 
4696 void
4697 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4698 {
4699 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4700 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4701 	struct rte_eth_dev *dev = tool_ctx;
4702 	struct mlx5_priv *priv = dev->data->dev_private;
4703 
4704 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4705 }
4706 
4707 /**
4708  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4709  *
4710  * As mark_id is unique, if there's already a registered flow for the mark_id,
4711  * return by increasing the reference counter of the resource. Otherwise, create
4712  * the resource (mcp_res) and flow.
4713  *
4714  * Flow looks like,
4715  *   - If ingress port is ANY and reg_c[1] is mark_id,
4716  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4717  *
4718  * For default flow (zero mark_id), flow is like,
4719  *   - If ingress port is ANY,
4720  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4721  *
4722  * @param dev
4723  *   Pointer to Ethernet device.
4724  * @param mark_id
4725  *   ID of MARK action, zero means default flow for META.
4726  * @param[out] error
4727  *   Perform verbose error reporting if not NULL.
4728  *
4729  * @return
4730  *   Associated resource on success, NULL otherwise and rte_errno is set.
4731  */
4732 static struct mlx5_flow_mreg_copy_resource *
4733 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4734 			  struct rte_flow_error *error)
4735 {
4736 	struct mlx5_priv *priv = dev->data->dev_private;
4737 	struct mlx5_list_entry *entry;
4738 	struct mlx5_flow_cb_ctx ctx = {
4739 		.dev = dev,
4740 		.error = error,
4741 		.data = &mark_id,
4742 	};
4743 
4744 	/* Check if already registered. */
4745 	MLX5_ASSERT(priv->mreg_cp_tbl);
4746 	entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4747 	if (!entry)
4748 		return NULL;
4749 	return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4750 			    hlist_ent);
4751 }
4752 
4753 void
4754 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4755 {
4756 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4757 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4758 	struct rte_eth_dev *dev = tool_ctx;
4759 	struct mlx5_priv *priv = dev->data->dev_private;
4760 
4761 	MLX5_ASSERT(mcp_res->rix_flow);
4762 	flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4763 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4764 }
4765 
4766 /**
4767  * Release flow in RX_CP_TBL.
4768  *
4769  * @param dev
4770  *   Pointer to Ethernet device.
4771  * @flow
4772  *   Parent flow for wich copying is provided.
4773  */
4774 static void
4775 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4776 			  struct rte_flow *flow)
4777 {
4778 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4779 	struct mlx5_priv *priv = dev->data->dev_private;
4780 
4781 	if (!flow->rix_mreg_copy)
4782 		return;
4783 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4784 				 flow->rix_mreg_copy);
4785 	if (!mcp_res || !priv->mreg_cp_tbl)
4786 		return;
4787 	MLX5_ASSERT(mcp_res->rix_flow);
4788 	mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4789 	flow->rix_mreg_copy = 0;
4790 }
4791 
4792 /**
4793  * Remove the default copy action from RX_CP_TBL.
4794  *
4795  * This functions is called in the mlx5_dev_start(). No thread safe
4796  * is guaranteed.
4797  *
4798  * @param dev
4799  *   Pointer to Ethernet device.
4800  */
4801 static void
4802 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4803 {
4804 	struct mlx5_list_entry *entry;
4805 	struct mlx5_priv *priv = dev->data->dev_private;
4806 	struct mlx5_flow_cb_ctx ctx;
4807 	uint32_t mark_id;
4808 
4809 	/* Check if default flow is registered. */
4810 	if (!priv->mreg_cp_tbl)
4811 		return;
4812 	mark_id = MLX5_DEFAULT_COPY_ID;
4813 	ctx.data = &mark_id;
4814 	entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4815 	if (!entry)
4816 		return;
4817 	mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4818 }
4819 
4820 /**
4821  * Add the default copy action in in RX_CP_TBL.
4822  *
4823  * This functions is called in the mlx5_dev_start(). No thread safe
4824  * is guaranteed.
4825  *
4826  * @param dev
4827  *   Pointer to Ethernet device.
4828  * @param[out] error
4829  *   Perform verbose error reporting if not NULL.
4830  *
4831  * @return
4832  *   0 for success, negative value otherwise and rte_errno is set.
4833  */
4834 static int
4835 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4836 				  struct rte_flow_error *error)
4837 {
4838 	struct mlx5_priv *priv = dev->data->dev_private;
4839 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4840 	struct mlx5_flow_cb_ctx ctx;
4841 	uint32_t mark_id;
4842 
4843 	/* Check whether extensive metadata feature is engaged. */
4844 	if (!priv->sh->config.dv_flow_en ||
4845 	    priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4846 	    !mlx5_flow_ext_mreg_supported(dev) ||
4847 	    !priv->sh->dv_regc0_mask)
4848 		return 0;
4849 	/*
4850 	 * Add default mreg copy flow may be called multiple time, but
4851 	 * only be called once in stop. Avoid register it twice.
4852 	 */
4853 	mark_id = MLX5_DEFAULT_COPY_ID;
4854 	ctx.data = &mark_id;
4855 	if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4856 		return 0;
4857 	mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4858 	if (!mcp_res)
4859 		return -rte_errno;
4860 	return 0;
4861 }
4862 
4863 /**
4864  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4865  *
4866  * All the flow having Q/RSS action should be split by
4867  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4868  * performs the following,
4869  *   - CQE->flow_tag := reg_c[1] (MARK)
4870  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4871  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4872  * but there should be a flow per each MARK ID set by MARK action.
4873  *
4874  * For the aforementioned reason, if there's a MARK action in flow's action
4875  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4876  * the MARK ID to CQE's flow_tag like,
4877  *   - If reg_c[1] is mark_id,
4878  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4879  *
4880  * For SET_META action which stores value in reg_c[0], as the destination is
4881  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4882  * MARK ID means the default flow. The default flow looks like,
4883  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4884  *
4885  * @param dev
4886  *   Pointer to Ethernet device.
4887  * @param flow
4888  *   Pointer to flow structure.
4889  * @param[in] actions
4890  *   Pointer to the list of actions.
4891  * @param[out] error
4892  *   Perform verbose error reporting if not NULL.
4893  *
4894  * @return
4895  *   0 on success, negative value otherwise and rte_errno is set.
4896  */
4897 static int
4898 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4899 			    struct rte_flow *flow,
4900 			    const struct rte_flow_action *actions,
4901 			    struct rte_flow_error *error)
4902 {
4903 	struct mlx5_priv *priv = dev->data->dev_private;
4904 	struct mlx5_sh_config *config = &priv->sh->config;
4905 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4906 	const struct rte_flow_action_mark *mark;
4907 
4908 	/* Check whether extensive metadata feature is engaged. */
4909 	if (!config->dv_flow_en ||
4910 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4911 	    !mlx5_flow_ext_mreg_supported(dev) ||
4912 	    !priv->sh->dv_regc0_mask)
4913 		return 0;
4914 	/* Find MARK action. */
4915 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4916 		switch (actions->type) {
4917 		case RTE_FLOW_ACTION_TYPE_FLAG:
4918 			mcp_res = flow_mreg_add_copy_action
4919 				(dev, MLX5_FLOW_MARK_DEFAULT, error);
4920 			if (!mcp_res)
4921 				return -rte_errno;
4922 			flow->rix_mreg_copy = mcp_res->idx;
4923 			return 0;
4924 		case RTE_FLOW_ACTION_TYPE_MARK:
4925 			mark = (const struct rte_flow_action_mark *)
4926 				actions->conf;
4927 			mcp_res =
4928 				flow_mreg_add_copy_action(dev, mark->id, error);
4929 			if (!mcp_res)
4930 				return -rte_errno;
4931 			flow->rix_mreg_copy = mcp_res->idx;
4932 			return 0;
4933 		default:
4934 			break;
4935 		}
4936 	}
4937 	return 0;
4938 }
4939 
4940 #define MLX5_MAX_SPLIT_ACTIONS 24
4941 #define MLX5_MAX_SPLIT_ITEMS 24
4942 
4943 /**
4944  * Split the hairpin flow.
4945  * Since HW can't support encap and push-vlan on Rx, we move these
4946  * actions to Tx.
4947  * If the count action is after the encap then we also
4948  * move the count action. in this case the count will also measure
4949  * the outer bytes.
4950  *
4951  * @param dev
4952  *   Pointer to Ethernet device.
4953  * @param[in] actions
4954  *   Associated actions (list terminated by the END action).
4955  * @param[out] actions_rx
4956  *   Rx flow actions.
4957  * @param[out] actions_tx
4958  *   Tx flow actions..
4959  * @param[out] pattern_tx
4960  *   The pattern items for the Tx flow.
4961  * @param[out] flow_id
4962  *   The flow ID connected to this flow.
4963  *
4964  * @return
4965  *   0 on success.
4966  */
4967 static int
4968 flow_hairpin_split(struct rte_eth_dev *dev,
4969 		   const struct rte_flow_action actions[],
4970 		   struct rte_flow_action actions_rx[],
4971 		   struct rte_flow_action actions_tx[],
4972 		   struct rte_flow_item pattern_tx[],
4973 		   uint32_t flow_id)
4974 {
4975 	const struct rte_flow_action_raw_encap *raw_encap;
4976 	const struct rte_flow_action_raw_decap *raw_decap;
4977 	struct mlx5_rte_flow_action_set_tag *set_tag;
4978 	struct rte_flow_action *tag_action;
4979 	struct mlx5_rte_flow_item_tag *tag_item;
4980 	struct rte_flow_item *item;
4981 	char *addr;
4982 	int encap = 0;
4983 
4984 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4985 		switch (actions->type) {
4986 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4987 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4988 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4989 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4990 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4991 			rte_memcpy(actions_tx, actions,
4992 			       sizeof(struct rte_flow_action));
4993 			actions_tx++;
4994 			break;
4995 		case RTE_FLOW_ACTION_TYPE_COUNT:
4996 			if (encap) {
4997 				rte_memcpy(actions_tx, actions,
4998 					   sizeof(struct rte_flow_action));
4999 				actions_tx++;
5000 			} else {
5001 				rte_memcpy(actions_rx, actions,
5002 					   sizeof(struct rte_flow_action));
5003 				actions_rx++;
5004 			}
5005 			break;
5006 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5007 			raw_encap = actions->conf;
5008 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
5009 				memcpy(actions_tx, actions,
5010 				       sizeof(struct rte_flow_action));
5011 				actions_tx++;
5012 				encap = 1;
5013 			} else {
5014 				rte_memcpy(actions_rx, actions,
5015 					   sizeof(struct rte_flow_action));
5016 				actions_rx++;
5017 			}
5018 			break;
5019 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5020 			raw_decap = actions->conf;
5021 			if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
5022 				memcpy(actions_tx, actions,
5023 				       sizeof(struct rte_flow_action));
5024 				actions_tx++;
5025 			} else {
5026 				rte_memcpy(actions_rx, actions,
5027 					   sizeof(struct rte_flow_action));
5028 				actions_rx++;
5029 			}
5030 			break;
5031 		default:
5032 			rte_memcpy(actions_rx, actions,
5033 				   sizeof(struct rte_flow_action));
5034 			actions_rx++;
5035 			break;
5036 		}
5037 	}
5038 	/* Add set meta action and end action for the Rx flow. */
5039 	tag_action = actions_rx;
5040 	tag_action->type = (enum rte_flow_action_type)
5041 			   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5042 	actions_rx++;
5043 	rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
5044 	actions_rx++;
5045 	set_tag = (void *)actions_rx;
5046 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5047 		.id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
5048 		.data = flow_id,
5049 	};
5050 	MLX5_ASSERT(set_tag->id > REG_NON);
5051 	tag_action->conf = set_tag;
5052 	/* Create Tx item list. */
5053 	rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
5054 	addr = (void *)&pattern_tx[2];
5055 	item = pattern_tx;
5056 	item->type = (enum rte_flow_item_type)
5057 		     MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5058 	tag_item = (void *)addr;
5059 	tag_item->data = flow_id;
5060 	tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
5061 	MLX5_ASSERT(set_tag->id > REG_NON);
5062 	item->spec = tag_item;
5063 	addr += sizeof(struct mlx5_rte_flow_item_tag);
5064 	tag_item = (void *)addr;
5065 	tag_item->data = UINT32_MAX;
5066 	tag_item->id = UINT16_MAX;
5067 	item->mask = tag_item;
5068 	item->last = NULL;
5069 	item++;
5070 	item->type = RTE_FLOW_ITEM_TYPE_END;
5071 	return 0;
5072 }
5073 
5074 /**
5075  * The last stage of splitting chain, just creates the subflow
5076  * without any modification.
5077  *
5078  * @param[in] dev
5079  *   Pointer to Ethernet device.
5080  * @param[in] flow
5081  *   Parent flow structure pointer.
5082  * @param[in, out] sub_flow
5083  *   Pointer to return the created subflow, may be NULL.
5084  * @param[in] attr
5085  *   Flow rule attributes.
5086  * @param[in] items
5087  *   Pattern specification (list terminated by the END pattern item).
5088  * @param[in] actions
5089  *   Associated actions (list terminated by the END action).
5090  * @param[in] flow_split_info
5091  *   Pointer to flow split info structure.
5092  * @param[out] error
5093  *   Perform verbose error reporting if not NULL.
5094  * @return
5095  *   0 on success, negative value otherwise
5096  */
5097 static int
5098 flow_create_split_inner(struct rte_eth_dev *dev,
5099 			struct rte_flow *flow,
5100 			struct mlx5_flow **sub_flow,
5101 			const struct rte_flow_attr *attr,
5102 			const struct rte_flow_item items[],
5103 			const struct rte_flow_action actions[],
5104 			struct mlx5_flow_split_info *flow_split_info,
5105 			struct rte_flow_error *error)
5106 {
5107 	struct mlx5_flow *dev_flow;
5108 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5109 
5110 	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
5111 				    flow_split_info->flow_idx, error);
5112 	if (!dev_flow)
5113 		return -rte_errno;
5114 	dev_flow->flow = flow;
5115 	dev_flow->external = flow_split_info->external;
5116 	dev_flow->skip_scale = flow_split_info->skip_scale;
5117 	/* Subflow object was created, we must include one in the list. */
5118 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5119 		      dev_flow->handle, next);
5120 	/*
5121 	 * If dev_flow is as one of the suffix flow, some actions in suffix
5122 	 * flow may need some user defined item layer flags, and pass the
5123 	 * Metadata rxq mark flag to suffix flow as well.
5124 	 */
5125 	if (flow_split_info->prefix_layers)
5126 		dev_flow->handle->layers = flow_split_info->prefix_layers;
5127 	if (flow_split_info->prefix_mark) {
5128 		MLX5_ASSERT(wks);
5129 		wks->mark = 1;
5130 	}
5131 	if (sub_flow)
5132 		*sub_flow = dev_flow;
5133 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5134 	dev_flow->dv.table_id = flow_split_info->table_id;
5135 #endif
5136 	return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
5137 }
5138 
5139 /**
5140  * Get the sub policy of a meter.
5141  *
5142  * @param[in] dev
5143  *   Pointer to Ethernet device.
5144  * @param[in] flow
5145  *   Parent flow structure pointer.
5146  * @param wks
5147  *   Pointer to thread flow work space.
5148  * @param[in] attr
5149  *   Flow rule attributes.
5150  * @param[in] items
5151  *   Pattern specification (list terminated by the END pattern item).
5152  * @param[out] error
5153  *   Perform verbose error reporting if not NULL.
5154  *
5155  * @return
5156  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
5157  */
5158 static struct mlx5_flow_meter_sub_policy *
5159 get_meter_sub_policy(struct rte_eth_dev *dev,
5160 		     struct rte_flow *flow,
5161 		     struct mlx5_flow_workspace *wks,
5162 		     const struct rte_flow_attr *attr,
5163 		     const struct rte_flow_item items[],
5164 		     struct rte_flow_error *error)
5165 {
5166 	struct mlx5_flow_meter_policy *policy;
5167 	struct mlx5_flow_meter_policy *final_policy;
5168 	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
5169 
5170 	policy = wks->policy;
5171 	final_policy = policy->is_hierarchy ? wks->final_policy : policy;
5172 	if (final_policy->is_rss || final_policy->is_queue) {
5173 		struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
5174 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
5175 		uint32_t i;
5176 
5177 		/*
5178 		 * This is a tmp dev_flow,
5179 		 * no need to register any matcher for it in translate.
5180 		 */
5181 		wks->skip_matcher_reg = 1;
5182 		for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
5183 			struct mlx5_flow dev_flow = {0};
5184 			struct mlx5_flow_handle dev_handle = { {0} };
5185 			uint8_t fate = final_policy->act_cnt[i].fate_action;
5186 
5187 			if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
5188 				const struct rte_flow_action_rss *rss_act =
5189 					final_policy->act_cnt[i].rss->conf;
5190 				struct rte_flow_action rss_actions[2] = {
5191 					[0] = {
5192 					.type = RTE_FLOW_ACTION_TYPE_RSS,
5193 					.conf = rss_act,
5194 					},
5195 					[1] = {
5196 					.type = RTE_FLOW_ACTION_TYPE_END,
5197 					.conf = NULL,
5198 					}
5199 				};
5200 
5201 				dev_flow.handle = &dev_handle;
5202 				dev_flow.ingress = attr->ingress;
5203 				dev_flow.flow = flow;
5204 				dev_flow.external = 0;
5205 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5206 				dev_flow.dv.transfer = attr->transfer;
5207 #endif
5208 				/**
5209 				 * Translate RSS action to get rss hash fields.
5210 				 */
5211 				if (flow_drv_translate(dev, &dev_flow, attr,
5212 						items, rss_actions, error))
5213 					goto exit;
5214 				rss_desc_v[i] = wks->rss_desc;
5215 				rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
5216 				rss_desc_v[i].hash_fields =
5217 						dev_flow.hash_fields;
5218 				rss_desc_v[i].queue_num =
5219 						rss_desc_v[i].hash_fields ?
5220 						rss_desc_v[i].queue_num : 1;
5221 				rss_desc_v[i].tunnel =
5222 						!!(dev_flow.handle->layers &
5223 						   MLX5_FLOW_LAYER_TUNNEL);
5224 				/* Use the RSS queues in the containers. */
5225 				rss_desc_v[i].queue =
5226 					(uint16_t *)(uintptr_t)rss_act->queue;
5227 				rss_desc[i] = &rss_desc_v[i];
5228 			} else if (fate == MLX5_FLOW_FATE_QUEUE) {
5229 				/* This is queue action. */
5230 				rss_desc_v[i] = wks->rss_desc;
5231 				rss_desc_v[i].key_len = 0;
5232 				rss_desc_v[i].hash_fields = 0;
5233 				rss_desc_v[i].queue =
5234 					&final_policy->act_cnt[i].queue;
5235 				rss_desc_v[i].queue_num = 1;
5236 				rss_desc[i] = &rss_desc_v[i];
5237 			} else {
5238 				rss_desc[i] = NULL;
5239 			}
5240 		}
5241 		sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
5242 						flow, policy, rss_desc);
5243 	} else {
5244 		enum mlx5_meter_domain mtr_domain =
5245 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5246 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5247 						MLX5_MTR_DOMAIN_INGRESS);
5248 		sub_policy = policy->sub_policys[mtr_domain][0];
5249 	}
5250 	if (!sub_policy)
5251 		rte_flow_error_set(error, EINVAL,
5252 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5253 				   "Failed to get meter sub-policy.");
5254 exit:
5255 	return sub_policy;
5256 }
5257 
5258 /**
5259  * Split the meter flow.
5260  *
5261  * As meter flow will split to three sub flow, other than meter
5262  * action, the other actions make sense to only meter accepts
5263  * the packet. If it need to be dropped, no other additional
5264  * actions should be take.
5265  *
5266  * One kind of special action which decapsulates the L3 tunnel
5267  * header will be in the prefix sub flow, as not to take the
5268  * L3 tunnel header into account.
5269  *
5270  * @param[in] dev
5271  *   Pointer to Ethernet device.
5272  * @param[in] flow
5273  *   Parent flow structure pointer.
5274  * @param wks
5275  *   Pointer to thread flow work space.
5276  * @param[in] attr
5277  *   Flow rule attributes.
5278  * @param[in] items
5279  *   Pattern specification (list terminated by the END pattern item).
5280  * @param[out] sfx_items
5281  *   Suffix flow match items (list terminated by the END pattern item).
5282  * @param[in] actions
5283  *   Associated actions (list terminated by the END action).
5284  * @param[out] actions_sfx
5285  *   Suffix flow actions.
5286  * @param[out] actions_pre
5287  *   Prefix flow actions.
5288  * @param[out] mtr_flow_id
5289  *   Pointer to meter flow id.
5290  * @param[out] error
5291  *   Perform verbose error reporting if not NULL.
5292  *
5293  * @return
5294  *   0 on success, a negative errno value otherwise and rte_errno is set.
5295  */
5296 static int
5297 flow_meter_split_prep(struct rte_eth_dev *dev,
5298 		      struct rte_flow *flow,
5299 		      struct mlx5_flow_workspace *wks,
5300 		      const struct rte_flow_attr *attr,
5301 		      const struct rte_flow_item items[],
5302 		      struct rte_flow_item sfx_items[],
5303 		      const struct rte_flow_action actions[],
5304 		      struct rte_flow_action actions_sfx[],
5305 		      struct rte_flow_action actions_pre[],
5306 		      uint32_t *mtr_flow_id,
5307 		      struct rte_flow_error *error)
5308 {
5309 	struct mlx5_priv *priv = dev->data->dev_private;
5310 	struct mlx5_flow_meter_info *fm = wks->fm;
5311 	struct rte_flow_action *tag_action = NULL;
5312 	struct rte_flow_item *tag_item;
5313 	struct mlx5_rte_flow_action_set_tag *set_tag;
5314 	const struct rte_flow_action_raw_encap *raw_encap;
5315 	const struct rte_flow_action_raw_decap *raw_decap;
5316 	struct mlx5_rte_flow_item_tag *tag_item_spec;
5317 	struct mlx5_rte_flow_item_tag *tag_item_mask;
5318 	uint32_t tag_id = 0;
5319 	struct rte_flow_item *vlan_item_dst = NULL;
5320 	const struct rte_flow_item *vlan_item_src = NULL;
5321 	const struct rte_flow_item *orig_items = items;
5322 	struct rte_flow_action *hw_mtr_action;
5323 	struct rte_flow_action *action_pre_head = NULL;
5324 	uint16_t flow_src_port = priv->representor_id;
5325 	bool mtr_first;
5326 	uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
5327 	uint8_t mtr_reg_bits = priv->mtr_reg_share ?
5328 				MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
5329 	uint32_t flow_id = 0;
5330 	uint32_t flow_id_reversed = 0;
5331 	uint8_t flow_id_bits = 0;
5332 	bool after_meter = false;
5333 	int shift;
5334 
5335 	/* Prepare the suffix subflow items. */
5336 	tag_item = sfx_items++;
5337 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5338 		int item_type = items->type;
5339 
5340 		switch (item_type) {
5341 		case RTE_FLOW_ITEM_TYPE_PORT_ID:
5342 		case RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT:
5343 			if (mlx5_flow_get_item_vport_id(dev, items, &flow_src_port, NULL, error))
5344 				return -rte_errno;
5345 			if (!fm->def_policy && wks->policy->is_hierarchy &&
5346 			    flow_src_port != priv->representor_id) {
5347 				if (flow_drv_mtr_hierarchy_rule_create(dev,
5348 								flow, fm,
5349 								flow_src_port,
5350 								items,
5351 								error))
5352 					return -rte_errno;
5353 			}
5354 			memcpy(sfx_items, items, sizeof(*sfx_items));
5355 			sfx_items++;
5356 			break;
5357 		case RTE_FLOW_ITEM_TYPE_VLAN:
5358 			/* Determine if copy vlan item below. */
5359 			vlan_item_src = items;
5360 			vlan_item_dst = sfx_items++;
5361 			vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
5362 			break;
5363 		default:
5364 			break;
5365 		}
5366 	}
5367 	sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
5368 	sfx_items++;
5369 	mtr_first = priv->sh->meter_aso_en &&
5370 		(attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
5371 	/* For ASO meter, meter must be before tag in TX direction. */
5372 	if (mtr_first) {
5373 		action_pre_head = actions_pre++;
5374 		/* Leave space for tag action. */
5375 		tag_action = actions_pre++;
5376 	}
5377 	/* Prepare the actions for prefix and suffix flow. */
5378 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5379 		struct rte_flow_action *action_cur = NULL;
5380 
5381 		switch (actions->type) {
5382 		case RTE_FLOW_ACTION_TYPE_METER:
5383 			if (mtr_first) {
5384 				action_cur = action_pre_head;
5385 			} else {
5386 				/* Leave space for tag action. */
5387 				tag_action = actions_pre++;
5388 				action_cur = actions_pre++;
5389 			}
5390 			after_meter = true;
5391 			break;
5392 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5393 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5394 			action_cur = actions_pre++;
5395 			break;
5396 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5397 			raw_encap = actions->conf;
5398 			if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5399 				action_cur = actions_pre++;
5400 			break;
5401 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5402 			raw_decap = actions->conf;
5403 			if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5404 				action_cur = actions_pre++;
5405 			break;
5406 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5407 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5408 			if (vlan_item_dst && vlan_item_src) {
5409 				memcpy(vlan_item_dst, vlan_item_src,
5410 					sizeof(*vlan_item_dst));
5411 				/*
5412 				 * Convert to internal match item, it is used
5413 				 * for vlan push and set vid.
5414 				 */
5415 				vlan_item_dst->type = (enum rte_flow_item_type)
5416 						MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5417 			}
5418 			break;
5419 		case RTE_FLOW_ACTION_TYPE_COUNT:
5420 			if (fm->def_policy)
5421 				action_cur = after_meter ?
5422 						actions_sfx++ : actions_pre++;
5423 			break;
5424 		default:
5425 			break;
5426 		}
5427 		if (!action_cur)
5428 			action_cur = (fm->def_policy) ?
5429 					actions_sfx++ : actions_pre++;
5430 		memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5431 	}
5432 	/* Add end action to the actions. */
5433 	actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5434 	if (priv->sh->meter_aso_en) {
5435 		/**
5436 		 * For ASO meter, need to add an extra jump action explicitly,
5437 		 * to jump from meter to policer table.
5438 		 */
5439 		struct mlx5_flow_meter_sub_policy *sub_policy;
5440 		struct mlx5_flow_tbl_data_entry *tbl_data;
5441 
5442 		if (!fm->def_policy) {
5443 			sub_policy = get_meter_sub_policy(dev, flow, wks,
5444 							  attr, orig_items,
5445 							  error);
5446 			if (!sub_policy)
5447 				return -rte_errno;
5448 		} else {
5449 			enum mlx5_meter_domain mtr_domain =
5450 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5451 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5452 						MLX5_MTR_DOMAIN_INGRESS);
5453 
5454 			sub_policy =
5455 			&priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5456 		}
5457 		tbl_data = container_of(sub_policy->tbl_rsc,
5458 					struct mlx5_flow_tbl_data_entry, tbl);
5459 		hw_mtr_action = actions_pre++;
5460 		hw_mtr_action->type = (enum rte_flow_action_type)
5461 				      MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5462 		hw_mtr_action->conf = tbl_data->jump.action;
5463 	}
5464 	actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5465 	actions_pre++;
5466 	if (!tag_action)
5467 		return rte_flow_error_set(error, ENOMEM,
5468 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5469 					  NULL, "No tag action space.");
5470 	if (!mtr_flow_id) {
5471 		tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5472 		goto exit;
5473 	}
5474 	/* Only default-policy Meter creates mtr flow id. */
5475 	if (fm->def_policy) {
5476 		mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5477 		if (!tag_id)
5478 			return rte_flow_error_set(error, ENOMEM,
5479 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5480 					"Failed to allocate meter flow id.");
5481 		flow_id = tag_id - 1;
5482 		flow_id_bits = (!flow_id) ? 1 :
5483 				(MLX5_REG_BITS - __builtin_clz(flow_id));
5484 		if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5485 		    mtr_reg_bits) {
5486 			mlx5_ipool_free(fm->flow_ipool, tag_id);
5487 			return rte_flow_error_set(error, EINVAL,
5488 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5489 					"Meter flow id exceeds max limit.");
5490 		}
5491 		if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5492 			priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5493 	}
5494 	/* Build tag actions and items for meter_id/meter flow_id. */
5495 	set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5496 	tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5497 	tag_item_mask = tag_item_spec + 1;
5498 	/* Both flow_id and meter_id share the same register. */
5499 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5500 		.id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5501 							    0, error),
5502 		.offset = mtr_id_offset,
5503 		.length = mtr_reg_bits,
5504 		.data = flow->meter,
5505 	};
5506 	/*
5507 	 * The color Reg bits used by flow_id are growing from
5508 	 * msb to lsb, so must do bit reverse for flow_id val in RegC.
5509 	 */
5510 	for (shift = 0; shift < flow_id_bits; shift++)
5511 		flow_id_reversed = (flow_id_reversed << 1) |
5512 				((flow_id >> shift) & 0x1);
5513 	set_tag->data |=
5514 		flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5515 	tag_item_spec->id = set_tag->id;
5516 	tag_item_spec->data = set_tag->data << mtr_id_offset;
5517 	tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5518 	tag_action->type = (enum rte_flow_action_type)
5519 				MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5520 	tag_action->conf = set_tag;
5521 	tag_item->type = (enum rte_flow_item_type)
5522 				MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5523 	tag_item->spec = tag_item_spec;
5524 	tag_item->last = NULL;
5525 	tag_item->mask = tag_item_mask;
5526 exit:
5527 	if (mtr_flow_id)
5528 		*mtr_flow_id = tag_id;
5529 	return 0;
5530 }
5531 
5532 /**
5533  * Split action list having QUEUE/RSS for metadata register copy.
5534  *
5535  * Once Q/RSS action is detected in user's action list, the flow action
5536  * should be split in order to copy metadata registers, which will happen in
5537  * RX_CP_TBL like,
5538  *   - CQE->flow_tag := reg_c[1] (MARK)
5539  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5540  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5541  * This is because the last action of each flow must be a terminal action
5542  * (QUEUE, RSS or DROP).
5543  *
5544  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5545  * stored and kept in the mlx5_flow structure per each sub_flow.
5546  *
5547  * The Q/RSS action is replaced with,
5548  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5549  * And the following JUMP action is added at the end,
5550  *   - JUMP, to RX_CP_TBL.
5551  *
5552  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5553  * flow_create_split_metadata() routine. The flow will look like,
5554  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5555  *
5556  * @param dev
5557  *   Pointer to Ethernet device.
5558  * @param[out] split_actions
5559  *   Pointer to store split actions to jump to CP_TBL.
5560  * @param[in] actions
5561  *   Pointer to the list of original flow actions.
5562  * @param[in] qrss
5563  *   Pointer to the Q/RSS action.
5564  * @param[in] actions_n
5565  *   Number of original actions.
5566  * @param[in] mtr_sfx
5567  *   Check if it is in meter suffix table.
5568  * @param[out] error
5569  *   Perform verbose error reporting if not NULL.
5570  *
5571  * @return
5572  *   non-zero unique flow_id on success, otherwise 0 and
5573  *   error/rte_error are set.
5574  */
5575 static uint32_t
5576 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5577 			  struct rte_flow_action *split_actions,
5578 			  const struct rte_flow_action *actions,
5579 			  const struct rte_flow_action *qrss,
5580 			  int actions_n, int mtr_sfx,
5581 			  struct rte_flow_error *error)
5582 {
5583 	struct mlx5_priv *priv = dev->data->dev_private;
5584 	struct mlx5_rte_flow_action_set_tag *set_tag;
5585 	struct rte_flow_action_jump *jump;
5586 	const int qrss_idx = qrss - actions;
5587 	uint32_t flow_id = 0;
5588 	int ret = 0;
5589 
5590 	/*
5591 	 * Given actions will be split
5592 	 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5593 	 * - Add jump to mreg CP_TBL.
5594 	 * As a result, there will be one more action.
5595 	 */
5596 	memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5597 	/* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */
5598 	++actions_n;
5599 	set_tag = (void *)(split_actions + actions_n);
5600 	/*
5601 	 * If we are not the meter suffix flow, add the tag action.
5602 	 * Since meter suffix flow already has the tag added.
5603 	 */
5604 	if (!mtr_sfx) {
5605 		/*
5606 		 * Allocate the new subflow ID. This one is unique within
5607 		 * device and not shared with representors. Otherwise,
5608 		 * we would have to resolve multi-thread access synch
5609 		 * issue. Each flow on the shared device is appended
5610 		 * with source vport identifier, so the resulting
5611 		 * flows will be unique in the shared (by master and
5612 		 * representors) domain even if they have coinciding
5613 		 * IDs.
5614 		 */
5615 		mlx5_ipool_malloc(priv->sh->ipool
5616 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5617 		if (!flow_id)
5618 			return rte_flow_error_set(error, ENOMEM,
5619 						  RTE_FLOW_ERROR_TYPE_ACTION,
5620 						  NULL, "can't allocate id "
5621 						  "for split Q/RSS subflow");
5622 		/* Internal SET_TAG action to set flow ID. */
5623 		*set_tag = (struct mlx5_rte_flow_action_set_tag){
5624 			.data = flow_id,
5625 		};
5626 		ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5627 		if (ret < 0)
5628 			return ret;
5629 		set_tag->id = ret;
5630 		/* Construct new actions array. */
5631 		/* Replace QUEUE/RSS action. */
5632 		split_actions[qrss_idx] = (struct rte_flow_action){
5633 			.type = (enum rte_flow_action_type)
5634 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5635 			.conf = set_tag,
5636 		};
5637 	} else {
5638 		/*
5639 		 * If we are the suffix flow of meter, tag already exist.
5640 		 * Set the QUEUE/RSS action to void.
5641 		 */
5642 		split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID;
5643 	}
5644 	/* JUMP action to jump to mreg copy table (CP_TBL). */
5645 	jump = (void *)(set_tag + 1);
5646 	*jump = (struct rte_flow_action_jump){
5647 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5648 	};
5649 	split_actions[actions_n - 2] = (struct rte_flow_action){
5650 		.type = RTE_FLOW_ACTION_TYPE_JUMP,
5651 		.conf = jump,
5652 	};
5653 	split_actions[actions_n - 1] = (struct rte_flow_action){
5654 		.type = RTE_FLOW_ACTION_TYPE_END,
5655 	};
5656 	return flow_id;
5657 }
5658 
5659 /**
5660  * Extend the given action list for Tx metadata copy.
5661  *
5662  * Copy the given action list to the ext_actions and add flow metadata register
5663  * copy action in order to copy reg_a set by WQE to reg_c[0].
5664  *
5665  * @param[out] ext_actions
5666  *   Pointer to the extended action list.
5667  * @param[in] actions
5668  *   Pointer to the list of actions.
5669  * @param[in] actions_n
5670  *   Number of actions in the list.
5671  * @param[out] error
5672  *   Perform verbose error reporting if not NULL.
5673  * @param[in] encap_idx
5674  *   The encap action index.
5675  *
5676  * @return
5677  *   0 on success, negative value otherwise
5678  */
5679 static int
5680 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5681 		       struct rte_flow_action *ext_actions,
5682 		       const struct rte_flow_action *actions,
5683 		       int actions_n, struct rte_flow_error *error,
5684 		       int encap_idx)
5685 {
5686 	struct mlx5_flow_action_copy_mreg *cp_mreg =
5687 		(struct mlx5_flow_action_copy_mreg *)
5688 			(ext_actions + actions_n + 1);
5689 	int ret;
5690 
5691 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5692 	if (ret < 0)
5693 		return ret;
5694 	cp_mreg->dst = ret;
5695 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5696 	if (ret < 0)
5697 		return ret;
5698 	cp_mreg->src = ret;
5699 	if (encap_idx != 0)
5700 		memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5701 	if (encap_idx == actions_n - 1) {
5702 		ext_actions[actions_n - 1] = (struct rte_flow_action){
5703 			.type = (enum rte_flow_action_type)
5704 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5705 			.conf = cp_mreg,
5706 		};
5707 		ext_actions[actions_n] = (struct rte_flow_action){
5708 			.type = RTE_FLOW_ACTION_TYPE_END,
5709 		};
5710 	} else {
5711 		ext_actions[encap_idx] = (struct rte_flow_action){
5712 			.type = (enum rte_flow_action_type)
5713 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5714 			.conf = cp_mreg,
5715 		};
5716 		memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5717 				sizeof(*ext_actions) * (actions_n - encap_idx));
5718 	}
5719 	return 0;
5720 }
5721 
5722 /**
5723  * Check the match action from the action list.
5724  *
5725  * @param[in] actions
5726  *   Pointer to the list of actions.
5727  * @param[in] attr
5728  *   Flow rule attributes.
5729  * @param[in] action
5730  *   The action to be check if exist.
5731  * @param[out] match_action_pos
5732  *   Pointer to the position of the matched action if exists, otherwise is -1.
5733  * @param[out] qrss_action_pos
5734  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5735  * @param[out] modify_after_mirror
5736  *   Pointer to the flag of modify action after FDB mirroring.
5737  *
5738  * @return
5739  *   > 0 the total number of actions.
5740  *   0 if not found match action in action list.
5741  */
5742 static int
5743 flow_check_match_action(const struct rte_flow_action actions[],
5744 			const struct rte_flow_attr *attr,
5745 			enum rte_flow_action_type action,
5746 			int *match_action_pos, int *qrss_action_pos,
5747 			int *modify_after_mirror)
5748 {
5749 	const struct rte_flow_action_sample *sample;
5750 	const struct rte_flow_action_raw_decap *decap;
5751 	int actions_n = 0;
5752 	uint32_t ratio = 0;
5753 	int sub_type = 0;
5754 	int flag = 0;
5755 	int fdb_mirror = 0;
5756 
5757 	*match_action_pos = -1;
5758 	*qrss_action_pos = -1;
5759 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5760 		if (actions->type == action) {
5761 			flag = 1;
5762 			*match_action_pos = actions_n;
5763 		}
5764 		switch (actions->type) {
5765 		case RTE_FLOW_ACTION_TYPE_QUEUE:
5766 		case RTE_FLOW_ACTION_TYPE_RSS:
5767 			*qrss_action_pos = actions_n;
5768 			break;
5769 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
5770 			sample = actions->conf;
5771 			ratio = sample->ratio;
5772 			sub_type = ((const struct rte_flow_action *)
5773 					(sample->actions))->type;
5774 			if (ratio == 1 && attr->transfer)
5775 				fdb_mirror = 1;
5776 			break;
5777 		case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5778 		case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5779 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5780 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5781 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5782 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5783 		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5784 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5785 		case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5786 		case RTE_FLOW_ACTION_TYPE_SET_TTL:
5787 		case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5788 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5789 		case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5790 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5791 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5792 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5793 		case RTE_FLOW_ACTION_TYPE_FLAG:
5794 		case RTE_FLOW_ACTION_TYPE_MARK:
5795 		case RTE_FLOW_ACTION_TYPE_SET_META:
5796 		case RTE_FLOW_ACTION_TYPE_SET_TAG:
5797 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5798 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5799 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5800 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5801 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5802 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5803 		case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5804 		case RTE_FLOW_ACTION_TYPE_METER:
5805 			if (fdb_mirror)
5806 				*modify_after_mirror = 1;
5807 			break;
5808 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5809 			decap = actions->conf;
5810 			while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5811 				;
5812 			actions_n++;
5813 			if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5814 				const struct rte_flow_action_raw_encap *encap =
5815 								actions->conf;
5816 				if (decap->size <=
5817 					MLX5_ENCAPSULATION_DECISION_SIZE &&
5818 				    encap->size >
5819 					MLX5_ENCAPSULATION_DECISION_SIZE)
5820 					/* L3 encap. */
5821 					break;
5822 			}
5823 			if (fdb_mirror)
5824 				*modify_after_mirror = 1;
5825 			break;
5826 		default:
5827 			break;
5828 		}
5829 		actions_n++;
5830 	}
5831 	if (flag && fdb_mirror && !*modify_after_mirror) {
5832 		/* FDB mirroring uses the destination array to implement
5833 		 * instead of FLOW_SAMPLER object.
5834 		 */
5835 		if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5836 			flag = 0;
5837 	}
5838 	/* Count RTE_FLOW_ACTION_TYPE_END. */
5839 	return flag ? actions_n + 1 : 0;
5840 }
5841 
5842 #define SAMPLE_SUFFIX_ITEM 3
5843 
5844 /**
5845  * Split the sample flow.
5846  *
5847  * As sample flow will split to two sub flow, sample flow with
5848  * sample action, the other actions will move to new suffix flow.
5849  *
5850  * Also add unique tag id with tag action in the sample flow,
5851  * the same tag id will be as match in the suffix flow.
5852  *
5853  * @param dev
5854  *   Pointer to Ethernet device.
5855  * @param[in] add_tag
5856  *   Add extra tag action flag.
5857  * @param[out] sfx_items
5858  *   Suffix flow match items (list terminated by the END pattern item).
5859  * @param[in] actions
5860  *   Associated actions (list terminated by the END action).
5861  * @param[out] actions_sfx
5862  *   Suffix flow actions.
5863  * @param[out] actions_pre
5864  *   Prefix flow actions.
5865  * @param[in] actions_n
5866  *  The total number of actions.
5867  * @param[in] sample_action_pos
5868  *   The sample action position.
5869  * @param[in] qrss_action_pos
5870  *   The Queue/RSS action position.
5871  * @param[in] jump_table
5872  *   Add extra jump action flag.
5873  * @param[out] error
5874  *   Perform verbose error reporting if not NULL.
5875  *
5876  * @return
5877  *   0 on success, or unique flow_id, a negative errno value
5878  *   otherwise and rte_errno is set.
5879  */
5880 static int
5881 flow_sample_split_prep(struct rte_eth_dev *dev,
5882 		       int add_tag,
5883 		       const struct rte_flow_item items[],
5884 		       struct rte_flow_item sfx_items[],
5885 		       const struct rte_flow_action actions[],
5886 		       struct rte_flow_action actions_sfx[],
5887 		       struct rte_flow_action actions_pre[],
5888 		       int actions_n,
5889 		       int sample_action_pos,
5890 		       int qrss_action_pos,
5891 		       int jump_table,
5892 		       struct rte_flow_error *error)
5893 {
5894 	struct mlx5_priv *priv = dev->data->dev_private;
5895 	struct mlx5_rte_flow_action_set_tag *set_tag;
5896 	struct mlx5_rte_flow_item_tag *tag_spec;
5897 	struct mlx5_rte_flow_item_tag *tag_mask;
5898 	struct rte_flow_action_jump *jump_action;
5899 	uint32_t tag_id = 0;
5900 	int append_index = 0;
5901 	int set_tag_idx = -1;
5902 	int index;
5903 	int ret;
5904 
5905 	if (sample_action_pos < 0)
5906 		return rte_flow_error_set(error, EINVAL,
5907 					  RTE_FLOW_ERROR_TYPE_ACTION,
5908 					  NULL, "invalid position of sample "
5909 					  "action in list");
5910 	/* Prepare the actions for prefix and suffix flow. */
5911 	if (add_tag) {
5912 		/* Update the new added tag action index preceding
5913 		 * the PUSH_VLAN or ENCAP action.
5914 		 */
5915 		const struct rte_flow_action_raw_encap *raw_encap;
5916 		const struct rte_flow_action *action = actions;
5917 		int encap_idx;
5918 		int action_idx = 0;
5919 		int raw_decap_idx = -1;
5920 		int push_vlan_idx = -1;
5921 		for (; action->type != RTE_FLOW_ACTION_TYPE_END; action++) {
5922 			switch (action->type) {
5923 			case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5924 				raw_decap_idx = action_idx;
5925 				break;
5926 			case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5927 				raw_encap = action->conf;
5928 				if (raw_encap->size >
5929 					MLX5_ENCAPSULATION_DECISION_SIZE) {
5930 					encap_idx = raw_decap_idx != -1 ?
5931 						    raw_decap_idx : action_idx;
5932 					if (encap_idx < sample_action_pos &&
5933 					    push_vlan_idx == -1)
5934 						set_tag_idx = encap_idx;
5935 				}
5936 				break;
5937 			case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5938 			case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5939 				encap_idx = action_idx;
5940 				if (encap_idx < sample_action_pos &&
5941 				    push_vlan_idx == -1)
5942 					set_tag_idx = encap_idx;
5943 				break;
5944 			case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5945 			case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5946 				push_vlan_idx = action_idx;
5947 				if (push_vlan_idx < sample_action_pos)
5948 					set_tag_idx = action_idx;
5949 				break;
5950 			default:
5951 				break;
5952 			}
5953 			action_idx++;
5954 		}
5955 	}
5956 	/* Prepare the actions for prefix and suffix flow. */
5957 	if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5958 		index = qrss_action_pos;
5959 		/* Put the preceding the Queue/RSS action into prefix flow. */
5960 		if (index != 0)
5961 			memcpy(actions_pre, actions,
5962 			       sizeof(struct rte_flow_action) * index);
5963 		/* Put others preceding the sample action into prefix flow. */
5964 		if (sample_action_pos > index + 1)
5965 			memcpy(actions_pre + index, actions + index + 1,
5966 			       sizeof(struct rte_flow_action) *
5967 			       (sample_action_pos - index - 1));
5968 		index = sample_action_pos - 1;
5969 		/* Put Queue/RSS action into Suffix flow. */
5970 		memcpy(actions_sfx, actions + qrss_action_pos,
5971 		       sizeof(struct rte_flow_action));
5972 		actions_sfx++;
5973 	} else if (add_tag && set_tag_idx >= 0) {
5974 		if (set_tag_idx > 0)
5975 			memcpy(actions_pre, actions,
5976 			       sizeof(struct rte_flow_action) * set_tag_idx);
5977 		memcpy(actions_pre + set_tag_idx + 1, actions + set_tag_idx,
5978 		       sizeof(struct rte_flow_action) *
5979 		       (sample_action_pos - set_tag_idx));
5980 		index = sample_action_pos;
5981 	} else {
5982 		index = sample_action_pos;
5983 		if (index != 0)
5984 			memcpy(actions_pre, actions,
5985 			       sizeof(struct rte_flow_action) * index);
5986 	}
5987 	/* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
5988 	 * For CX6DX and above, metadata registers Cx preserve their value,
5989 	 * add an extra tag action for NIC-RX and E-Switch Domain.
5990 	 */
5991 	if (add_tag) {
5992 		/* Prepare the prefix tag action. */
5993 		append_index++;
5994 		set_tag = (void *)(actions_pre + actions_n + append_index);
5995 		ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error);
5996 		/* Trust VF/SF on CX5 not supported meter so that the reserved
5997 		 * metadata regC is REG_NON, back to use application tag
5998 		 * index 0.
5999 		 */
6000 		if (unlikely(ret == REG_NON))
6001 			ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
6002 		if (ret < 0)
6003 			return ret;
6004 		mlx5_ipool_malloc(priv->sh->ipool
6005 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
6006 		*set_tag = (struct mlx5_rte_flow_action_set_tag) {
6007 			.id = ret,
6008 			.data = tag_id,
6009 		};
6010 		/* Prepare the suffix subflow items. */
6011 		for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
6012 			if (items->type == RTE_FLOW_ITEM_TYPE_PORT_ID) {
6013 				memcpy(sfx_items, items, sizeof(*sfx_items));
6014 				sfx_items++;
6015 			}
6016 		}
6017 		tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
6018 		tag_spec->data = tag_id;
6019 		tag_spec->id = set_tag->id;
6020 		tag_mask = tag_spec + 1;
6021 		tag_mask->data = UINT32_MAX;
6022 		sfx_items[0] = (struct rte_flow_item){
6023 			.type = (enum rte_flow_item_type)
6024 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6025 			.spec = tag_spec,
6026 			.last = NULL,
6027 			.mask = tag_mask,
6028 		};
6029 		sfx_items[1] = (struct rte_flow_item){
6030 			.type = (enum rte_flow_item_type)
6031 				RTE_FLOW_ITEM_TYPE_END,
6032 		};
6033 		/* Prepare the tag action in prefix subflow. */
6034 		set_tag_idx = (set_tag_idx == -1) ? index : set_tag_idx;
6035 		actions_pre[set_tag_idx] =
6036 			(struct rte_flow_action){
6037 			.type = (enum rte_flow_action_type)
6038 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
6039 			.conf = set_tag,
6040 		};
6041 		/* Update next sample position due to add one tag action */
6042 		index += 1;
6043 	}
6044 	/* Copy the sample action into prefix flow. */
6045 	memcpy(actions_pre + index, actions + sample_action_pos,
6046 	       sizeof(struct rte_flow_action));
6047 	index += 1;
6048 	/* For the modify action after the sample action in E-Switch mirroring,
6049 	 * Add the extra jump action in prefix subflow and jump into the next
6050 	 * table, then do the modify action in the new table.
6051 	 */
6052 	if (jump_table) {
6053 		/* Prepare the prefix jump action. */
6054 		append_index++;
6055 		jump_action = (void *)(actions_pre + actions_n + append_index);
6056 		jump_action->group = jump_table;
6057 		actions_pre[index++] =
6058 			(struct rte_flow_action){
6059 			.type = (enum rte_flow_action_type)
6060 				RTE_FLOW_ACTION_TYPE_JUMP,
6061 			.conf = jump_action,
6062 		};
6063 	}
6064 	actions_pre[index] = (struct rte_flow_action){
6065 		.type = (enum rte_flow_action_type)
6066 			RTE_FLOW_ACTION_TYPE_END,
6067 	};
6068 	/* Put the actions after sample into Suffix flow. */
6069 	memcpy(actions_sfx, actions + sample_action_pos + 1,
6070 	       sizeof(struct rte_flow_action) *
6071 	       (actions_n - sample_action_pos - 1));
6072 	return tag_id;
6073 }
6074 
6075 /**
6076  * The splitting for metadata feature.
6077  *
6078  * - Q/RSS action on NIC Rx should be split in order to pass by
6079  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
6080  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
6081  *
6082  * - All the actions on NIC Tx should have a mreg copy action to
6083  *   copy reg_a from WQE to reg_c[0].
6084  *
6085  * @param dev
6086  *   Pointer to Ethernet device.
6087  * @param[in] flow
6088  *   Parent flow structure pointer.
6089  * @param[in] attr
6090  *   Flow rule attributes.
6091  * @param[in] items
6092  *   Pattern specification (list terminated by the END pattern item).
6093  * @param[in] actions
6094  *   Associated actions (list terminated by the END action).
6095  * @param[in] flow_split_info
6096  *   Pointer to flow split info structure.
6097  * @param[out] error
6098  *   Perform verbose error reporting if not NULL.
6099  * @return
6100  *   0 on success, negative value otherwise
6101  */
6102 static int
6103 flow_create_split_metadata(struct rte_eth_dev *dev,
6104 			   struct rte_flow *flow,
6105 			   const struct rte_flow_attr *attr,
6106 			   const struct rte_flow_item items[],
6107 			   const struct rte_flow_action actions[],
6108 			   struct mlx5_flow_split_info *flow_split_info,
6109 			   struct rte_flow_error *error)
6110 {
6111 	struct mlx5_priv *priv = dev->data->dev_private;
6112 	struct mlx5_sh_config *config = &priv->sh->config;
6113 	const struct rte_flow_action *qrss = NULL;
6114 	struct rte_flow_action *ext_actions = NULL;
6115 	struct mlx5_flow *dev_flow = NULL;
6116 	uint32_t qrss_id = 0;
6117 	int mtr_sfx = 0;
6118 	size_t act_size;
6119 	int actions_n;
6120 	int encap_idx;
6121 	int ret;
6122 
6123 	/* Check whether extensive metadata feature is engaged. */
6124 	if (!config->dv_flow_en ||
6125 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
6126 	    !mlx5_flow_ext_mreg_supported(dev))
6127 		return flow_create_split_inner(dev, flow, NULL, attr, items,
6128 					       actions, flow_split_info, error);
6129 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
6130 							   &encap_idx);
6131 	if (qrss) {
6132 		/* Exclude hairpin flows from splitting. */
6133 		if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
6134 			const struct rte_flow_action_queue *queue;
6135 
6136 			queue = qrss->conf;
6137 			if (mlx5_rxq_is_hairpin(dev, queue->index))
6138 				qrss = NULL;
6139 		} else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
6140 			const struct rte_flow_action_rss *rss;
6141 
6142 			rss = qrss->conf;
6143 			if (mlx5_rxq_is_hairpin(dev, rss->queue[0]))
6144 				qrss = NULL;
6145 		}
6146 	}
6147 	if (qrss) {
6148 		/* Check if it is in meter suffix table. */
6149 		mtr_sfx = attr->group == (attr->transfer ?
6150 			  (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6151 			  MLX5_FLOW_TABLE_LEVEL_METER);
6152 		/*
6153 		 * Q/RSS action on NIC Rx should be split in order to pass by
6154 		 * the mreg copy table (RX_CP_TBL) and then it jumps to the
6155 		 * action table (RX_ACT_TBL) which has the split Q/RSS action.
6156 		 */
6157 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6158 			   sizeof(struct rte_flow_action_set_tag) +
6159 			   sizeof(struct rte_flow_action_jump);
6160 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6161 					  SOCKET_ID_ANY);
6162 		if (!ext_actions)
6163 			return rte_flow_error_set(error, ENOMEM,
6164 						  RTE_FLOW_ERROR_TYPE_ACTION,
6165 						  NULL, "no memory to split "
6166 						  "metadata flow");
6167 		/*
6168 		 * Create the new actions list with removed Q/RSS action
6169 		 * and appended set tag and jump to register copy table
6170 		 * (RX_CP_TBL). We should preallocate unique tag ID here
6171 		 * in advance, because it is needed for set tag action.
6172 		 */
6173 		qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
6174 						    qrss, actions_n,
6175 						    mtr_sfx, error);
6176 		if (!mtr_sfx && !qrss_id) {
6177 			ret = -rte_errno;
6178 			goto exit;
6179 		}
6180 	} else if (attr->egress && !attr->transfer) {
6181 		/*
6182 		 * All the actions on NIC Tx should have a metadata register
6183 		 * copy action to copy reg_a from WQE to reg_c[meta]
6184 		 */
6185 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6186 			   sizeof(struct mlx5_flow_action_copy_mreg);
6187 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6188 					  SOCKET_ID_ANY);
6189 		if (!ext_actions)
6190 			return rte_flow_error_set(error, ENOMEM,
6191 						  RTE_FLOW_ERROR_TYPE_ACTION,
6192 						  NULL, "no memory to split "
6193 						  "metadata flow");
6194 		/* Create the action list appended with copy register. */
6195 		ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
6196 					     actions_n, error, encap_idx);
6197 		if (ret < 0)
6198 			goto exit;
6199 	}
6200 	/* Add the unmodified original or prefix subflow. */
6201 	ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6202 				      items, ext_actions ? ext_actions :
6203 				      actions, flow_split_info, error);
6204 	if (ret < 0)
6205 		goto exit;
6206 	MLX5_ASSERT(dev_flow);
6207 	if (qrss) {
6208 		const struct rte_flow_attr q_attr = {
6209 			.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6210 			.ingress = 1,
6211 		};
6212 		/* Internal PMD action to set register. */
6213 		struct mlx5_rte_flow_item_tag q_tag_spec = {
6214 			.data = qrss_id,
6215 			.id = REG_NON,
6216 		};
6217 		struct rte_flow_item q_items[] = {
6218 			{
6219 				.type = (enum rte_flow_item_type)
6220 					MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6221 				.spec = &q_tag_spec,
6222 				.last = NULL,
6223 				.mask = NULL,
6224 			},
6225 			{
6226 				.type = RTE_FLOW_ITEM_TYPE_END,
6227 			},
6228 		};
6229 		struct rte_flow_action q_actions[] = {
6230 			{
6231 				.type = qrss->type,
6232 				.conf = qrss->conf,
6233 			},
6234 			{
6235 				.type = RTE_FLOW_ACTION_TYPE_END,
6236 			},
6237 		};
6238 		uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
6239 
6240 		/*
6241 		 * Configure the tag item only if there is no meter subflow.
6242 		 * Since tag is already marked in the meter suffix subflow
6243 		 * we can just use the meter suffix items as is.
6244 		 */
6245 		if (qrss_id) {
6246 			/* Not meter subflow. */
6247 			MLX5_ASSERT(!mtr_sfx);
6248 			/*
6249 			 * Put unique id in prefix flow due to it is destroyed
6250 			 * after suffix flow and id will be freed after there
6251 			 * is no actual flows with this id and identifier
6252 			 * reallocation becomes possible (for example, for
6253 			 * other flows in other threads).
6254 			 */
6255 			dev_flow->handle->split_flow_id = qrss_id;
6256 			ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
6257 						   error);
6258 			if (ret < 0)
6259 				goto exit;
6260 			q_tag_spec.id = ret;
6261 		}
6262 		dev_flow = NULL;
6263 		/* Add suffix subflow to execute Q/RSS. */
6264 		flow_split_info->prefix_layers = layers;
6265 		flow_split_info->prefix_mark = 0;
6266 		flow_split_info->table_id = 0;
6267 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6268 					      &q_attr, mtr_sfx ? items :
6269 					      q_items, q_actions,
6270 					      flow_split_info, error);
6271 		if (ret < 0)
6272 			goto exit;
6273 		/* qrss ID should be freed if failed. */
6274 		qrss_id = 0;
6275 		MLX5_ASSERT(dev_flow);
6276 	}
6277 
6278 exit:
6279 	/*
6280 	 * We do not destroy the partially created sub_flows in case of error.
6281 	 * These ones are included into parent flow list and will be destroyed
6282 	 * by flow_drv_destroy.
6283 	 */
6284 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
6285 			qrss_id);
6286 	mlx5_free(ext_actions);
6287 	return ret;
6288 }
6289 
6290 /**
6291  * Create meter internal drop flow with the original pattern.
6292  *
6293  * @param dev
6294  *   Pointer to Ethernet device.
6295  * @param[in] flow
6296  *   Parent flow structure pointer.
6297  * @param[in] attr
6298  *   Flow rule attributes.
6299  * @param[in] items
6300  *   Pattern specification (list terminated by the END pattern item).
6301  * @param[in] flow_split_info
6302  *   Pointer to flow split info structure.
6303  * @param[in] fm
6304  *   Pointer to flow meter structure.
6305  * @param[out] error
6306  *   Perform verbose error reporting if not NULL.
6307  * @return
6308  *   0 on success, negative value otherwise
6309  */
6310 static uint32_t
6311 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
6312 			struct rte_flow *flow,
6313 			const struct rte_flow_attr *attr,
6314 			const struct rte_flow_item items[],
6315 			struct mlx5_flow_split_info *flow_split_info,
6316 			struct mlx5_flow_meter_info *fm,
6317 			struct rte_flow_error *error)
6318 {
6319 	struct mlx5_flow *dev_flow = NULL;
6320 	struct rte_flow_attr drop_attr = *attr;
6321 	struct rte_flow_action drop_actions[3];
6322 	struct mlx5_flow_split_info drop_split_info = *flow_split_info;
6323 
6324 	MLX5_ASSERT(fm->drop_cnt);
6325 	drop_actions[0].type =
6326 		(enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
6327 	drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
6328 	drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
6329 	drop_actions[1].conf = NULL;
6330 	drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
6331 	drop_actions[2].conf = NULL;
6332 	drop_split_info.external = false;
6333 	drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6334 	drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
6335 	drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
6336 	return flow_create_split_inner(dev, flow, &dev_flow,
6337 				&drop_attr, items, drop_actions,
6338 				&drop_split_info, error);
6339 }
6340 
6341 /**
6342  * The splitting for meter feature.
6343  *
6344  * - The meter flow will be split to two flows as prefix and
6345  *   suffix flow. The packets make sense only it pass the prefix
6346  *   meter action.
6347  *
6348  * - Reg_C_5 is used for the packet to match betweend prefix and
6349  *   suffix flow.
6350  *
6351  * @param dev
6352  *   Pointer to Ethernet device.
6353  * @param[in] flow
6354  *   Parent flow structure pointer.
6355  * @param[in] attr
6356  *   Flow rule attributes.
6357  * @param[in] items
6358  *   Pattern specification (list terminated by the END pattern item).
6359  * @param[in] actions
6360  *   Associated actions (list terminated by the END action).
6361  * @param[in] flow_split_info
6362  *   Pointer to flow split info structure.
6363  * @param[out] error
6364  *   Perform verbose error reporting if not NULL.
6365  * @return
6366  *   0 on success, negative value otherwise
6367  */
6368 static int
6369 flow_create_split_meter(struct rte_eth_dev *dev,
6370 			struct rte_flow *flow,
6371 			const struct rte_flow_attr *attr,
6372 			const struct rte_flow_item items[],
6373 			const struct rte_flow_action actions[],
6374 			struct mlx5_flow_split_info *flow_split_info,
6375 			struct rte_flow_error *error)
6376 {
6377 	struct mlx5_priv *priv = dev->data->dev_private;
6378 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6379 	struct rte_flow_action *sfx_actions = NULL;
6380 	struct rte_flow_action *pre_actions = NULL;
6381 	struct rte_flow_item *sfx_items = NULL;
6382 	struct mlx5_flow *dev_flow = NULL;
6383 	struct rte_flow_attr sfx_attr = *attr;
6384 	struct mlx5_flow_meter_info *fm = NULL;
6385 	uint8_t skip_scale_restore;
6386 	bool has_mtr = false;
6387 	bool has_modify = false;
6388 	bool set_mtr_reg = true;
6389 	bool is_mtr_hierarchy = false;
6390 	uint32_t meter_id = 0;
6391 	uint32_t mtr_idx = 0;
6392 	uint32_t mtr_flow_id = 0;
6393 	size_t act_size;
6394 	size_t item_size;
6395 	int actions_n = 0;
6396 	int ret = 0;
6397 
6398 	if (priv->mtr_en)
6399 		actions_n = flow_check_meter_action(dev, actions, &has_mtr,
6400 						    &has_modify, &meter_id);
6401 	if (has_mtr) {
6402 		if (flow->meter) {
6403 			fm = flow_dv_meter_find_by_idx(priv, flow->meter);
6404 			if (!fm)
6405 				return rte_flow_error_set(error, EINVAL,
6406 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6407 						NULL, "Meter not found.");
6408 		} else {
6409 			fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
6410 			if (!fm)
6411 				return rte_flow_error_set(error, EINVAL,
6412 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6413 						NULL, "Meter not found.");
6414 			ret = mlx5_flow_meter_attach(priv, fm,
6415 						     &sfx_attr, error);
6416 			if (ret)
6417 				return -rte_errno;
6418 			flow->meter = mtr_idx;
6419 		}
6420 		MLX5_ASSERT(wks);
6421 		wks->fm = fm;
6422 		if (!fm->def_policy) {
6423 			wks->policy = mlx5_flow_meter_policy_find(dev,
6424 								  fm->policy_id,
6425 								  NULL);
6426 			MLX5_ASSERT(wks->policy);
6427 			if (wks->policy->mark)
6428 				wks->mark = 1;
6429 			if (wks->policy->is_hierarchy) {
6430 				wks->final_policy =
6431 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
6432 								wks->policy);
6433 				if (!wks->final_policy)
6434 					return rte_flow_error_set(error,
6435 					EINVAL,
6436 					RTE_FLOW_ERROR_TYPE_ACTION, NULL,
6437 				"Failed to find terminal policy of hierarchy.");
6438 				is_mtr_hierarchy = true;
6439 			}
6440 		}
6441 		/*
6442 		 * If it isn't default-policy Meter, and
6443 		 * 1. Not meter hierarchy and there's no action in flow to change
6444 		 *    packet (modify/encap/decap etc.), OR
6445 		 * 2. No drop count needed for this meter.
6446 		 * Then no need to use regC to save meter id anymore.
6447 		 */
6448 		if (!fm->def_policy && ((!has_modify && !is_mtr_hierarchy) || !fm->drop_cnt))
6449 			set_mtr_reg = false;
6450 		/* Prefix actions: meter, decap, encap, tag, jump, end, cnt. */
6451 #define METER_PREFIX_ACTION 7
6452 		act_size = (sizeof(struct rte_flow_action) *
6453 			    (actions_n + METER_PREFIX_ACTION)) +
6454 			   sizeof(struct mlx5_rte_flow_action_set_tag);
6455 		/* Suffix items: tag, vlan, port id, end. */
6456 #define METER_SUFFIX_ITEM 4
6457 		item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
6458 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6459 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
6460 					  0, SOCKET_ID_ANY);
6461 		if (!sfx_actions)
6462 			return rte_flow_error_set(error, ENOMEM,
6463 						  RTE_FLOW_ERROR_TYPE_ACTION,
6464 						  NULL, "no memory to split "
6465 						  "meter flow");
6466 		sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6467 			     act_size);
6468 		/* There's no suffix flow for meter of non-default policy. */
6469 		if (!fm->def_policy)
6470 			pre_actions = sfx_actions + 1;
6471 		else
6472 			pre_actions = sfx_actions + actions_n;
6473 		ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6474 					    items, sfx_items, actions,
6475 					    sfx_actions, pre_actions,
6476 					    (set_mtr_reg ? &mtr_flow_id : NULL),
6477 					    error);
6478 		if (ret) {
6479 			ret = -rte_errno;
6480 			goto exit;
6481 		}
6482 		/* Add the prefix subflow. */
6483 		skip_scale_restore = flow_split_info->skip_scale;
6484 		flow_split_info->skip_scale |=
6485 			1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6486 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6487 					      attr, items, pre_actions,
6488 					      flow_split_info, error);
6489 		flow_split_info->skip_scale = skip_scale_restore;
6490 		if (ret) {
6491 			if (mtr_flow_id)
6492 				mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6493 			ret = -rte_errno;
6494 			goto exit;
6495 		}
6496 		if (mtr_flow_id) {
6497 			dev_flow->handle->split_flow_id = mtr_flow_id;
6498 			dev_flow->handle->is_meter_flow_id = 1;
6499 		}
6500 		if (!fm->def_policy) {
6501 			if (!set_mtr_reg && fm->drop_cnt)
6502 				ret =
6503 			flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6504 							&sfx_attr, items,
6505 							flow_split_info,
6506 							fm, error);
6507 			goto exit;
6508 		}
6509 		/* Setting the sfx group atrr. */
6510 		sfx_attr.group = sfx_attr.transfer ?
6511 				(MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6512 				 MLX5_FLOW_TABLE_LEVEL_METER;
6513 		flow_split_info->prefix_layers =
6514 				flow_get_prefix_layer_flags(dev_flow);
6515 		flow_split_info->prefix_mark |= wks->mark;
6516 		flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6517 	}
6518 	/* Add the prefix subflow. */
6519 	ret = flow_create_split_metadata(dev, flow,
6520 					 &sfx_attr, sfx_items ?
6521 					 sfx_items : items,
6522 					 sfx_actions ? sfx_actions : actions,
6523 					 flow_split_info, error);
6524 exit:
6525 	if (sfx_actions)
6526 		mlx5_free(sfx_actions);
6527 	return ret;
6528 }
6529 
6530 /**
6531  * The splitting for sample feature.
6532  *
6533  * Once Sample action is detected in the action list, the flow actions should
6534  * be split into prefix sub flow and suffix sub flow.
6535  *
6536  * The original items remain in the prefix sub flow, all actions preceding the
6537  * sample action and the sample action itself will be copied to the prefix
6538  * sub flow, the actions following the sample action will be copied to the
6539  * suffix sub flow, Queue action always be located in the suffix sub flow.
6540  *
6541  * In order to make the packet from prefix sub flow matches with suffix sub
6542  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6543  * flow uses tag item with the unique flow id.
6544  *
6545  * @param dev
6546  *   Pointer to Ethernet device.
6547  * @param[in] flow
6548  *   Parent flow structure pointer.
6549  * @param[in] attr
6550  *   Flow rule attributes.
6551  * @param[in] items
6552  *   Pattern specification (list terminated by the END pattern item).
6553  * @param[in] actions
6554  *   Associated actions (list terminated by the END action).
6555  * @param[in] flow_split_info
6556  *   Pointer to flow split info structure.
6557  * @param[out] error
6558  *   Perform verbose error reporting if not NULL.
6559  * @return
6560  *   0 on success, negative value otherwise
6561  */
6562 static int
6563 flow_create_split_sample(struct rte_eth_dev *dev,
6564 			 struct rte_flow *flow,
6565 			 const struct rte_flow_attr *attr,
6566 			 const struct rte_flow_item items[],
6567 			 const struct rte_flow_action actions[],
6568 			 struct mlx5_flow_split_info *flow_split_info,
6569 			 struct rte_flow_error *error)
6570 {
6571 	struct mlx5_priv *priv = dev->data->dev_private;
6572 	struct rte_flow_action *sfx_actions = NULL;
6573 	struct rte_flow_action *pre_actions = NULL;
6574 	struct rte_flow_item *sfx_items = NULL;
6575 	struct mlx5_flow *dev_flow = NULL;
6576 	struct rte_flow_attr sfx_attr = *attr;
6577 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6578 	struct mlx5_flow_dv_sample_resource *sample_res;
6579 	struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6580 	struct mlx5_flow_tbl_resource *sfx_tbl;
6581 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6582 #endif
6583 	size_t act_size;
6584 	size_t item_size;
6585 	uint32_t fdb_tx = 0;
6586 	int32_t tag_id = 0;
6587 	int actions_n = 0;
6588 	int sample_action_pos;
6589 	int qrss_action_pos;
6590 	int add_tag = 0;
6591 	int modify_after_mirror = 0;
6592 	uint16_t jump_table = 0;
6593 	const uint32_t next_ft_step = 1;
6594 	int ret = 0;
6595 
6596 	if (priv->sampler_en)
6597 		actions_n = flow_check_match_action(actions, attr,
6598 					RTE_FLOW_ACTION_TYPE_SAMPLE,
6599 					&sample_action_pos, &qrss_action_pos,
6600 					&modify_after_mirror);
6601 	if (actions_n) {
6602 		/* The prefix actions must includes sample, tag, end. */
6603 		act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6604 			   + sizeof(struct mlx5_rte_flow_action_set_tag);
6605 		item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6606 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6607 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6608 					  item_size), 0, SOCKET_ID_ANY);
6609 		if (!sfx_actions)
6610 			return rte_flow_error_set(error, ENOMEM,
6611 						  RTE_FLOW_ERROR_TYPE_ACTION,
6612 						  NULL, "no memory to split "
6613 						  "sample flow");
6614 		/* The representor_id is UINT16_MAX for uplink. */
6615 		fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6616 		/*
6617 		 * When reg_c_preserve is set, metadata registers Cx preserve
6618 		 * their value even through packet duplication.
6619 		 */
6620 		add_tag = (!fdb_tx ||
6621 			   priv->sh->cdev->config.hca_attr.reg_c_preserve);
6622 		if (add_tag)
6623 			sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6624 					+ act_size);
6625 		if (modify_after_mirror)
6626 			jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6627 				     next_ft_step;
6628 		pre_actions = sfx_actions + actions_n;
6629 		tag_id = flow_sample_split_prep(dev, add_tag, items, sfx_items,
6630 						actions, sfx_actions,
6631 						pre_actions, actions_n,
6632 						sample_action_pos,
6633 						qrss_action_pos, jump_table,
6634 						error);
6635 		if (tag_id < 0 || (add_tag && !tag_id)) {
6636 			ret = -rte_errno;
6637 			goto exit;
6638 		}
6639 		if (modify_after_mirror)
6640 			flow_split_info->skip_scale =
6641 					1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6642 		/* Add the prefix subflow. */
6643 		ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6644 					      items, pre_actions,
6645 					      flow_split_info, error);
6646 		if (ret) {
6647 			ret = -rte_errno;
6648 			goto exit;
6649 		}
6650 		dev_flow->handle->split_flow_id = tag_id;
6651 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6652 		if (!modify_after_mirror) {
6653 			/* Set the sfx group attr. */
6654 			sample_res = (struct mlx5_flow_dv_sample_resource *)
6655 						dev_flow->dv.sample_res;
6656 			sfx_tbl = (struct mlx5_flow_tbl_resource *)
6657 						sample_res->normal_path_tbl;
6658 			sfx_tbl_data = container_of(sfx_tbl,
6659 						struct mlx5_flow_tbl_data_entry,
6660 						tbl);
6661 			sfx_attr.group = sfx_attr.transfer ?
6662 			(sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6663 		} else {
6664 			MLX5_ASSERT(attr->transfer);
6665 			sfx_attr.group = jump_table;
6666 		}
6667 		flow_split_info->prefix_layers =
6668 				flow_get_prefix_layer_flags(dev_flow);
6669 		MLX5_ASSERT(wks);
6670 		flow_split_info->prefix_mark |= wks->mark;
6671 		/* Suffix group level already be scaled with factor, set
6672 		 * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6673 		 * again in translation.
6674 		 */
6675 		flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6676 #endif
6677 	}
6678 	/* Add the suffix subflow. */
6679 	ret = flow_create_split_meter(dev, flow, &sfx_attr,
6680 				      sfx_items ? sfx_items : items,
6681 				      sfx_actions ? sfx_actions : actions,
6682 				      flow_split_info, error);
6683 exit:
6684 	if (sfx_actions)
6685 		mlx5_free(sfx_actions);
6686 	return ret;
6687 }
6688 
6689 /**
6690  * Split the flow to subflow set. The splitters might be linked
6691  * in the chain, like this:
6692  * flow_create_split_outer() calls:
6693  *   flow_create_split_meter() calls:
6694  *     flow_create_split_metadata(meter_subflow_0) calls:
6695  *       flow_create_split_inner(metadata_subflow_0)
6696  *       flow_create_split_inner(metadata_subflow_1)
6697  *       flow_create_split_inner(metadata_subflow_2)
6698  *     flow_create_split_metadata(meter_subflow_1) calls:
6699  *       flow_create_split_inner(metadata_subflow_0)
6700  *       flow_create_split_inner(metadata_subflow_1)
6701  *       flow_create_split_inner(metadata_subflow_2)
6702  *
6703  * This provide flexible way to add new levels of flow splitting.
6704  * The all of successfully created subflows are included to the
6705  * parent flow dev_flow list.
6706  *
6707  * @param dev
6708  *   Pointer to Ethernet device.
6709  * @param[in] flow
6710  *   Parent flow structure pointer.
6711  * @param[in] attr
6712  *   Flow rule attributes.
6713  * @param[in] items
6714  *   Pattern specification (list terminated by the END pattern item).
6715  * @param[in] actions
6716  *   Associated actions (list terminated by the END action).
6717  * @param[in] flow_split_info
6718  *   Pointer to flow split info structure.
6719  * @param[out] error
6720  *   Perform verbose error reporting if not NULL.
6721  * @return
6722  *   0 on success, negative value otherwise
6723  */
6724 static int
6725 flow_create_split_outer(struct rte_eth_dev *dev,
6726 			struct rte_flow *flow,
6727 			const struct rte_flow_attr *attr,
6728 			const struct rte_flow_item items[],
6729 			const struct rte_flow_action actions[],
6730 			struct mlx5_flow_split_info *flow_split_info,
6731 			struct rte_flow_error *error)
6732 {
6733 	int ret;
6734 
6735 	ret = flow_create_split_sample(dev, flow, attr, items,
6736 				       actions, flow_split_info, error);
6737 	MLX5_ASSERT(ret <= 0);
6738 	return ret;
6739 }
6740 
6741 static inline struct mlx5_flow_tunnel *
6742 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6743 {
6744 	struct mlx5_flow_tunnel *tunnel;
6745 
6746 #pragma GCC diagnostic push
6747 #pragma GCC diagnostic ignored "-Wcast-qual"
6748 	tunnel = (typeof(tunnel))flow->tunnel;
6749 #pragma GCC diagnostic pop
6750 
6751 	return tunnel;
6752 }
6753 
6754 /**
6755  * Adjust flow RSS workspace if needed.
6756  *
6757  * @param wks
6758  *   Pointer to thread flow work space.
6759  * @param rss_desc
6760  *   Pointer to RSS descriptor.
6761  * @param[in] nrssq_num
6762  *   New RSS queue number.
6763  *
6764  * @return
6765  *   0 on success, -1 otherwise and rte_errno is set.
6766  */
6767 static int
6768 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6769 			  struct mlx5_flow_rss_desc *rss_desc,
6770 			  uint32_t nrssq_num)
6771 {
6772 	if (likely(nrssq_num <= wks->rssq_num))
6773 		return 0;
6774 	rss_desc->queue = realloc(rss_desc->queue,
6775 			  sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6776 	if (!rss_desc->queue) {
6777 		rte_errno = ENOMEM;
6778 		return -1;
6779 	}
6780 	wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6781 	return 0;
6782 }
6783 
6784 /**
6785  * Create a flow and add it to @p list.
6786  *
6787  * @param dev
6788  *   Pointer to Ethernet device.
6789  * @param list
6790  *   Pointer to a TAILQ flow list. If this parameter NULL,
6791  *   no list insertion occurred, flow is just created,
6792  *   this is caller's responsibility to track the
6793  *   created flow.
6794  * @param[in] attr
6795  *   Flow rule attributes.
6796  * @param[in] items
6797  *   Pattern specification (list terminated by the END pattern item).
6798  * @param[in] actions
6799  *   Associated actions (list terminated by the END action).
6800  * @param[in] external
6801  *   This flow rule is created by request external to PMD.
6802  * @param[out] error
6803  *   Perform verbose error reporting if not NULL.
6804  *
6805  * @return
6806  *   A flow index on success, 0 otherwise and rte_errno is set.
6807  */
6808 static uint32_t
6809 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6810 		 const struct rte_flow_attr *attr,
6811 		 const struct rte_flow_item items[],
6812 		 const struct rte_flow_action original_actions[],
6813 		 bool external, struct rte_flow_error *error)
6814 {
6815 	struct mlx5_priv *priv = dev->data->dev_private;
6816 	struct rte_flow *flow = NULL;
6817 	struct mlx5_flow *dev_flow;
6818 	const struct rte_flow_action_rss *rss = NULL;
6819 	struct mlx5_translated_action_handle
6820 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6821 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6822 	union {
6823 		struct mlx5_flow_expand_rss buf;
6824 		uint8_t buffer[4096];
6825 	} expand_buffer;
6826 	union {
6827 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6828 		uint8_t buffer[2048];
6829 	} actions_rx;
6830 	union {
6831 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6832 		uint8_t buffer[2048];
6833 	} actions_hairpin_tx;
6834 	union {
6835 		struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6836 		uint8_t buffer[2048];
6837 	} items_tx;
6838 	struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6839 	struct mlx5_flow_rss_desc *rss_desc;
6840 	const struct rte_flow_action *p_actions_rx;
6841 	uint32_t i;
6842 	uint32_t idx = 0;
6843 	int hairpin_flow;
6844 	struct rte_flow_attr attr_tx = { .priority = 0 };
6845 	const struct rte_flow_action *actions;
6846 	struct rte_flow_action *translated_actions = NULL;
6847 	struct mlx5_flow_tunnel *tunnel;
6848 	struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6849 	struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6850 	struct mlx5_flow_split_info flow_split_info = {
6851 		.external = !!external,
6852 		.skip_scale = 0,
6853 		.flow_idx = 0,
6854 		.prefix_mark = 0,
6855 		.prefix_layers = 0,
6856 		.table_id = 0
6857 	};
6858 	int ret;
6859 
6860 	MLX5_ASSERT(wks);
6861 	rss_desc = &wks->rss_desc;
6862 	ret = flow_action_handles_translate(dev, original_actions,
6863 					    indir_actions,
6864 					    &indir_actions_n,
6865 					    &translated_actions, error);
6866 	if (ret < 0) {
6867 		MLX5_ASSERT(translated_actions == NULL);
6868 		return 0;
6869 	}
6870 	actions = translated_actions ? translated_actions : original_actions;
6871 	p_actions_rx = actions;
6872 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6873 	ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6874 				external, hairpin_flow, error);
6875 	if (ret < 0)
6876 		goto error_before_hairpin_split;
6877 	flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6878 	if (!flow) {
6879 		rte_errno = ENOMEM;
6880 		goto error_before_hairpin_split;
6881 	}
6882 	if (hairpin_flow > 0) {
6883 		if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6884 			rte_errno = EINVAL;
6885 			goto error_before_hairpin_split;
6886 		}
6887 		flow_hairpin_split(dev, actions, actions_rx.actions,
6888 				   actions_hairpin_tx.actions, items_tx.items,
6889 				   idx);
6890 		p_actions_rx = actions_rx.actions;
6891 	}
6892 	flow_split_info.flow_idx = idx;
6893 	flow->drv_type = flow_get_drv_type(dev, attr);
6894 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6895 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
6896 	memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6897 	/* RSS Action only works on NIC RX domain */
6898 	if (attr->ingress && !attr->transfer)
6899 		rss = flow_get_rss_action(dev, p_actions_rx);
6900 	if (rss) {
6901 		if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6902 			return 0;
6903 		/*
6904 		 * The following information is required by
6905 		 * mlx5_flow_hashfields_adjust() in advance.
6906 		 */
6907 		rss_desc->level = rss->level;
6908 		/* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
6909 		rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
6910 	}
6911 	flow->dev_handles = 0;
6912 	if (rss && rss->types) {
6913 		unsigned int graph_root;
6914 
6915 		graph_root = find_graph_root(rss->level);
6916 		ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6917 					   items, rss->types,
6918 					   mlx5_support_expansion, graph_root);
6919 		MLX5_ASSERT(ret > 0 &&
6920 		       (unsigned int)ret < sizeof(expand_buffer.buffer));
6921 		if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6922 			for (i = 0; i < buf->entries; ++i)
6923 				mlx5_dbg__print_pattern(buf->entry[i].pattern);
6924 		}
6925 	} else {
6926 		buf->entries = 1;
6927 		buf->entry[0].pattern = (void *)(uintptr_t)items;
6928 	}
6929 	rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6930 						      indir_actions_n);
6931 	for (i = 0; i < buf->entries; ++i) {
6932 		/* Initialize flow split data. */
6933 		flow_split_info.prefix_layers = 0;
6934 		flow_split_info.prefix_mark = 0;
6935 		flow_split_info.skip_scale = 0;
6936 		/*
6937 		 * The splitter may create multiple dev_flows,
6938 		 * depending on configuration. In the simplest
6939 		 * case it just creates unmodified original flow.
6940 		 */
6941 		ret = flow_create_split_outer(dev, flow, attr,
6942 					      buf->entry[i].pattern,
6943 					      p_actions_rx, &flow_split_info,
6944 					      error);
6945 		if (ret < 0)
6946 			goto error;
6947 		if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
6948 			ret = flow_tunnel_add_default_miss(dev, flow, attr,
6949 							   p_actions_rx,
6950 							   idx,
6951 							   wks->flows[0].tunnel,
6952 							   &default_miss_ctx,
6953 							   error);
6954 			if (ret < 0) {
6955 				mlx5_free(default_miss_ctx.queue);
6956 				goto error;
6957 			}
6958 		}
6959 	}
6960 	/* Create the tx flow. */
6961 	if (hairpin_flow) {
6962 		attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
6963 		attr_tx.ingress = 0;
6964 		attr_tx.egress = 1;
6965 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
6966 					 actions_hairpin_tx.actions,
6967 					 idx, error);
6968 		if (!dev_flow)
6969 			goto error;
6970 		dev_flow->flow = flow;
6971 		dev_flow->external = 0;
6972 		SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
6973 			      dev_flow->handle, next);
6974 		ret = flow_drv_translate(dev, dev_flow, &attr_tx,
6975 					 items_tx.items,
6976 					 actions_hairpin_tx.actions, error);
6977 		if (ret < 0)
6978 			goto error;
6979 	}
6980 	/*
6981 	 * Update the metadata register copy table. If extensive
6982 	 * metadata feature is enabled and registers are supported
6983 	 * we might create the extra rte_flow for each unique
6984 	 * MARK/FLAG action ID.
6985 	 *
6986 	 * The table is updated for ingress Flows only, because
6987 	 * the egress Flows belong to the different device and
6988 	 * copy table should be updated in peer NIC Rx domain.
6989 	 */
6990 	if (attr->ingress &&
6991 	    (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
6992 		ret = flow_mreg_update_copy_table(dev, flow, actions, error);
6993 		if (ret)
6994 			goto error;
6995 	}
6996 	/*
6997 	 * If the flow is external (from application) OR device is started,
6998 	 * OR mreg discover, then apply immediately.
6999 	 */
7000 	if (external || dev->data->dev_started ||
7001 	    (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
7002 	     attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
7003 		ret = flow_drv_apply(dev, flow, error);
7004 		if (ret < 0)
7005 			goto error;
7006 	}
7007 	flow->type = type;
7008 	flow_rxq_flags_set(dev, flow);
7009 	rte_free(translated_actions);
7010 	tunnel = flow_tunnel_from_rule(wks->flows);
7011 	if (tunnel) {
7012 		flow->tunnel = 1;
7013 		flow->tunnel_id = tunnel->tunnel_id;
7014 		__atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
7015 		mlx5_free(default_miss_ctx.queue);
7016 	}
7017 	mlx5_flow_pop_thread_workspace();
7018 	return idx;
7019 error:
7020 	MLX5_ASSERT(flow);
7021 	ret = rte_errno; /* Save rte_errno before cleanup. */
7022 	flow_mreg_del_copy_action(dev, flow);
7023 	flow_drv_destroy(dev, flow);
7024 	if (rss_desc->shared_rss)
7025 		__atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
7026 			mlx5_ipool_get
7027 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
7028 			rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
7029 	mlx5_ipool_free(priv->flows[type], idx);
7030 	rte_errno = ret; /* Restore rte_errno. */
7031 	ret = rte_errno;
7032 	rte_errno = ret;
7033 error_before_hairpin_split:
7034 	mlx5_flow_pop_thread_workspace();
7035 	rte_free(translated_actions);
7036 	return 0;
7037 }
7038 
7039 /**
7040  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
7041  * incoming packets to table 1.
7042  *
7043  * Other flow rules, requested for group n, will be created in
7044  * e-switch table n+1.
7045  * Jump action to e-switch group n will be created to group n+1.
7046  *
7047  * Used when working in switchdev mode, to utilise advantages of table 1
7048  * and above.
7049  *
7050  * @param dev
7051  *   Pointer to Ethernet device.
7052  *
7053  * @return
7054  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
7055  */
7056 struct rte_flow *
7057 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
7058 {
7059 	const struct rte_flow_attr attr = {
7060 		.group = 0,
7061 		.priority = 0,
7062 		.ingress = 1,
7063 		.egress = 0,
7064 		.transfer = 1,
7065 	};
7066 	const struct rte_flow_item pattern = {
7067 		.type = RTE_FLOW_ITEM_TYPE_END,
7068 	};
7069 	struct rte_flow_action_jump jump = {
7070 		.group = 1,
7071 	};
7072 	const struct rte_flow_action actions[] = {
7073 		{
7074 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7075 			.conf = &jump,
7076 		},
7077 		{
7078 			.type = RTE_FLOW_ACTION_TYPE_END,
7079 		},
7080 	};
7081 	struct rte_flow_error error;
7082 
7083 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7084 						   &attr, &pattern,
7085 						   actions, false, &error);
7086 }
7087 
7088 /**
7089  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
7090  * and sq number, directs all packets to peer vport.
7091  *
7092  * @param dev
7093  *   Pointer to Ethernet device.
7094  * @param txq
7095  *   Txq index.
7096  *
7097  * @return
7098  *   Flow ID on success, 0 otherwise and rte_errno is set.
7099  */
7100 uint32_t
7101 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
7102 {
7103 	struct rte_flow_attr attr = {
7104 		.group = 0,
7105 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7106 		.ingress = 1,
7107 		.egress = 0,
7108 		.transfer = 1,
7109 	};
7110 	struct rte_flow_item_port_id port_spec = {
7111 		.id = MLX5_PORT_ESW_MGR,
7112 	};
7113 	struct mlx5_rte_flow_item_sq txq_spec = {
7114 		.queue = txq,
7115 	};
7116 	struct rte_flow_item pattern[] = {
7117 		{
7118 			.type = RTE_FLOW_ITEM_TYPE_PORT_ID,
7119 			.spec = &port_spec,
7120 		},
7121 		{
7122 			.type = (enum rte_flow_item_type)
7123 				MLX5_RTE_FLOW_ITEM_TYPE_SQ,
7124 			.spec = &txq_spec,
7125 		},
7126 		{
7127 			.type = RTE_FLOW_ITEM_TYPE_END,
7128 		},
7129 	};
7130 	struct rte_flow_action_jump jump = {
7131 		.group = 1,
7132 	};
7133 	struct rte_flow_action_port_id port = {
7134 		.id = dev->data->port_id,
7135 	};
7136 	struct rte_flow_action actions[] = {
7137 		{
7138 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7139 			.conf = &jump,
7140 		},
7141 		{
7142 			.type = RTE_FLOW_ACTION_TYPE_END,
7143 		},
7144 	};
7145 	struct rte_flow_error error;
7146 
7147 	/*
7148 	 * Creates group 0, highest priority jump flow.
7149 	 * Matches txq to bypass kernel packets.
7150 	 */
7151 	if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
7152 			     false, &error) == 0)
7153 		return 0;
7154 	/* Create group 1, lowest priority redirect flow for txq. */
7155 	attr.group = 1;
7156 	actions[0].conf = &port;
7157 	actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
7158 	return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
7159 				actions, false, &error);
7160 }
7161 
7162 /**
7163  * Validate a flow supported by the NIC.
7164  *
7165  * @see rte_flow_validate()
7166  * @see rte_flow_ops
7167  */
7168 int
7169 mlx5_flow_validate(struct rte_eth_dev *dev,
7170 		   const struct rte_flow_attr *attr,
7171 		   const struct rte_flow_item items[],
7172 		   const struct rte_flow_action original_actions[],
7173 		   struct rte_flow_error *error)
7174 {
7175 	int hairpin_flow;
7176 	struct mlx5_translated_action_handle
7177 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7178 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7179 	const struct rte_flow_action *actions;
7180 	struct rte_flow_action *translated_actions = NULL;
7181 	int ret = flow_action_handles_translate(dev, original_actions,
7182 						indir_actions,
7183 						&indir_actions_n,
7184 						&translated_actions, error);
7185 
7186 	if (ret)
7187 		return ret;
7188 	actions = translated_actions ? translated_actions : original_actions;
7189 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7190 	ret = flow_drv_validate(dev, attr, items, actions,
7191 				true, hairpin_flow, error);
7192 	rte_free(translated_actions);
7193 	return ret;
7194 }
7195 
7196 /**
7197  * Create a flow.
7198  *
7199  * @see rte_flow_create()
7200  * @see rte_flow_ops
7201  */
7202 struct rte_flow *
7203 mlx5_flow_create(struct rte_eth_dev *dev,
7204 		 const struct rte_flow_attr *attr,
7205 		 const struct rte_flow_item items[],
7206 		 const struct rte_flow_action actions[],
7207 		 struct rte_flow_error *error)
7208 {
7209 	struct mlx5_priv *priv = dev->data->dev_private;
7210 
7211 	if (priv->sh->config.dv_flow_en == 2) {
7212 		rte_flow_error_set(error, ENOTSUP,
7213 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7214 			  NULL,
7215 			  "Flow non-Q creation not supported");
7216 		return NULL;
7217 	}
7218 	/*
7219 	 * If the device is not started yet, it is not allowed to created a
7220 	 * flow from application. PMD default flows and traffic control flows
7221 	 * are not affected.
7222 	 */
7223 	if (unlikely(!dev->data->dev_started)) {
7224 		DRV_LOG(DEBUG, "port %u is not started when "
7225 			"inserting a flow", dev->data->port_id);
7226 		rte_flow_error_set(error, ENODEV,
7227 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7228 				   NULL,
7229 				   "port not started");
7230 		return NULL;
7231 	}
7232 
7233 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
7234 						   attr, items, actions,
7235 						   true, error);
7236 }
7237 
7238 /**
7239  * Destroy a flow in a list.
7240  *
7241  * @param dev
7242  *   Pointer to Ethernet device.
7243  * @param[in] flow_idx
7244  *   Index of flow to destroy.
7245  */
7246 static void
7247 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7248 		  uint32_t flow_idx)
7249 {
7250 	struct mlx5_priv *priv = dev->data->dev_private;
7251 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
7252 
7253 	if (!flow)
7254 		return;
7255 	MLX5_ASSERT(flow->type == type);
7256 	/*
7257 	 * Update RX queue flags only if port is started, otherwise it is
7258 	 * already clean.
7259 	 */
7260 	if (dev->data->dev_started)
7261 		flow_rxq_flags_trim(dev, flow);
7262 	flow_drv_destroy(dev, flow);
7263 	if (flow->tunnel) {
7264 		struct mlx5_flow_tunnel *tunnel;
7265 
7266 		tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
7267 		RTE_VERIFY(tunnel);
7268 		if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
7269 			mlx5_flow_tunnel_free(dev, tunnel);
7270 	}
7271 	flow_mreg_del_copy_action(dev, flow);
7272 	mlx5_ipool_free(priv->flows[type], flow_idx);
7273 }
7274 
7275 /**
7276  * Destroy all flows.
7277  *
7278  * @param dev
7279  *   Pointer to Ethernet device.
7280  * @param type
7281  *   Flow type to be flushed.
7282  * @param active
7283  *   If flushing is called actively.
7284  */
7285 void
7286 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7287 		     bool active)
7288 {
7289 	struct mlx5_priv *priv = dev->data->dev_private;
7290 	uint32_t num_flushed = 0, fidx = 1;
7291 	struct rte_flow *flow;
7292 
7293 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7294 	if (priv->sh->config.dv_flow_en == 2 &&
7295 	    type == MLX5_FLOW_TYPE_GEN) {
7296 		flow_hw_q_flow_flush(dev, NULL);
7297 		return;
7298 	}
7299 #endif
7300 
7301 	MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
7302 		flow_list_destroy(dev, type, fidx);
7303 		num_flushed++;
7304 	}
7305 	if (active) {
7306 		DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
7307 			dev->data->port_id, num_flushed);
7308 	}
7309 }
7310 
7311 /**
7312  * Stop all default actions for flows.
7313  *
7314  * @param dev
7315  *   Pointer to Ethernet device.
7316  */
7317 void
7318 mlx5_flow_stop_default(struct rte_eth_dev *dev)
7319 {
7320 	flow_mreg_del_default_copy_action(dev);
7321 	flow_rxq_flags_clear(dev);
7322 }
7323 
7324 /**
7325  * Start all default actions for flows.
7326  *
7327  * @param dev
7328  *   Pointer to Ethernet device.
7329  * @return
7330  *   0 on success, a negative errno value otherwise and rte_errno is set.
7331  */
7332 int
7333 mlx5_flow_start_default(struct rte_eth_dev *dev)
7334 {
7335 	struct rte_flow_error error;
7336 
7337 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
7338 	return flow_mreg_add_default_copy_action(dev, &error);
7339 }
7340 
7341 /**
7342  * Release key of thread specific flow workspace data.
7343  */
7344 void
7345 flow_release_workspace(void *data)
7346 {
7347 	struct mlx5_flow_workspace *wks = data;
7348 	struct mlx5_flow_workspace *next;
7349 
7350 	while (wks) {
7351 		next = wks->next;
7352 		free(wks->rss_desc.queue);
7353 		free(wks);
7354 		wks = next;
7355 	}
7356 }
7357 
7358 /**
7359  * Get thread specific current flow workspace.
7360  *
7361  * @return pointer to thread specific flow workspace data, NULL on error.
7362  */
7363 struct mlx5_flow_workspace*
7364 mlx5_flow_get_thread_workspace(void)
7365 {
7366 	struct mlx5_flow_workspace *data;
7367 
7368 	data = mlx5_flow_os_get_specific_workspace();
7369 	MLX5_ASSERT(data && data->inuse);
7370 	if (!data || !data->inuse)
7371 		DRV_LOG(ERR, "flow workspace not initialized.");
7372 	return data;
7373 }
7374 
7375 /**
7376  * Allocate and init new flow workspace.
7377  *
7378  * @return pointer to flow workspace data, NULL on error.
7379  */
7380 static struct mlx5_flow_workspace*
7381 flow_alloc_thread_workspace(void)
7382 {
7383 	struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
7384 
7385 	if (!data) {
7386 		DRV_LOG(ERR, "Failed to allocate flow workspace "
7387 			"memory.");
7388 		return NULL;
7389 	}
7390 	data->rss_desc.queue = calloc(1,
7391 			sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
7392 	if (!data->rss_desc.queue)
7393 		goto err;
7394 	data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
7395 	return data;
7396 err:
7397 	free(data->rss_desc.queue);
7398 	free(data);
7399 	return NULL;
7400 }
7401 
7402 /**
7403  * Get new thread specific flow workspace.
7404  *
7405  * If current workspace inuse, create new one and set as current.
7406  *
7407  * @return pointer to thread specific flow workspace data, NULL on error.
7408  */
7409 struct mlx5_flow_workspace*
7410 mlx5_flow_push_thread_workspace(void)
7411 {
7412 	struct mlx5_flow_workspace *curr;
7413 	struct mlx5_flow_workspace *data;
7414 
7415 	curr = mlx5_flow_os_get_specific_workspace();
7416 	if (!curr) {
7417 		data = flow_alloc_thread_workspace();
7418 		if (!data)
7419 			return NULL;
7420 	} else if (!curr->inuse) {
7421 		data = curr;
7422 	} else if (curr->next) {
7423 		data = curr->next;
7424 	} else {
7425 		data = flow_alloc_thread_workspace();
7426 		if (!data)
7427 			return NULL;
7428 		curr->next = data;
7429 		data->prev = curr;
7430 	}
7431 	data->inuse = 1;
7432 	data->flow_idx = 0;
7433 	/* Set as current workspace */
7434 	if (mlx5_flow_os_set_specific_workspace(data))
7435 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7436 	return data;
7437 }
7438 
7439 /**
7440  * Close current thread specific flow workspace.
7441  *
7442  * If previous workspace available, set it as current.
7443  *
7444  * @return pointer to thread specific flow workspace data, NULL on error.
7445  */
7446 void
7447 mlx5_flow_pop_thread_workspace(void)
7448 {
7449 	struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
7450 
7451 	if (!data)
7452 		return;
7453 	if (!data->inuse) {
7454 		DRV_LOG(ERR, "Failed to close unused flow workspace.");
7455 		return;
7456 	}
7457 	data->inuse = 0;
7458 	if (!data->prev)
7459 		return;
7460 	if (mlx5_flow_os_set_specific_workspace(data->prev))
7461 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7462 }
7463 
7464 /**
7465  * Verify the flow list is empty
7466  *
7467  * @param dev
7468  *  Pointer to Ethernet device.
7469  *
7470  * @return the number of flows not released.
7471  */
7472 int
7473 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
7474 {
7475 	struct mlx5_priv *priv = dev->data->dev_private;
7476 	struct rte_flow *flow;
7477 	uint32_t idx = 0;
7478 	int ret = 0, i;
7479 
7480 	for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
7481 		MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
7482 			DRV_LOG(DEBUG, "port %u flow %p still referenced",
7483 				dev->data->port_id, (void *)flow);
7484 			ret++;
7485 		}
7486 	}
7487 	return ret;
7488 }
7489 
7490 /**
7491  * Enable default hairpin egress flow.
7492  *
7493  * @param dev
7494  *   Pointer to Ethernet device.
7495  * @param queue
7496  *   The queue index.
7497  *
7498  * @return
7499  *   0 on success, a negative errno value otherwise and rte_errno is set.
7500  */
7501 int
7502 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
7503 			    uint32_t queue)
7504 {
7505 	const struct rte_flow_attr attr = {
7506 		.egress = 1,
7507 		.priority = 0,
7508 	};
7509 	struct mlx5_rte_flow_item_sq queue_spec = {
7510 		.queue = queue,
7511 	};
7512 	struct mlx5_rte_flow_item_sq queue_mask = {
7513 		.queue = UINT32_MAX,
7514 	};
7515 	struct rte_flow_item items[] = {
7516 		{
7517 			.type = (enum rte_flow_item_type)
7518 				MLX5_RTE_FLOW_ITEM_TYPE_SQ,
7519 			.spec = &queue_spec,
7520 			.last = NULL,
7521 			.mask = &queue_mask,
7522 		},
7523 		{
7524 			.type = RTE_FLOW_ITEM_TYPE_END,
7525 		},
7526 	};
7527 	struct rte_flow_action_jump jump = {
7528 		.group = MLX5_HAIRPIN_TX_TABLE,
7529 	};
7530 	struct rte_flow_action actions[2];
7531 	uint32_t flow_idx;
7532 	struct rte_flow_error error;
7533 
7534 	actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
7535 	actions[0].conf = &jump;
7536 	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
7537 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7538 				    &attr, items, actions, false, &error);
7539 	if (!flow_idx) {
7540 		DRV_LOG(DEBUG,
7541 			"Failed to create ctrl flow: rte_errno(%d),"
7542 			" type(%d), message(%s)",
7543 			rte_errno, error.type,
7544 			error.message ? error.message : " (no stated reason)");
7545 		return -rte_errno;
7546 	}
7547 	return 0;
7548 }
7549 
7550 /**
7551  * Enable a control flow configured from the control plane.
7552  *
7553  * @param dev
7554  *   Pointer to Ethernet device.
7555  * @param eth_spec
7556  *   An Ethernet flow spec to apply.
7557  * @param eth_mask
7558  *   An Ethernet flow mask to apply.
7559  * @param vlan_spec
7560  *   A VLAN flow spec to apply.
7561  * @param vlan_mask
7562  *   A VLAN flow mask to apply.
7563  *
7564  * @return
7565  *   0 on success, a negative errno value otherwise and rte_errno is set.
7566  */
7567 int
7568 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
7569 		    struct rte_flow_item_eth *eth_spec,
7570 		    struct rte_flow_item_eth *eth_mask,
7571 		    struct rte_flow_item_vlan *vlan_spec,
7572 		    struct rte_flow_item_vlan *vlan_mask)
7573 {
7574 	struct mlx5_priv *priv = dev->data->dev_private;
7575 	const struct rte_flow_attr attr = {
7576 		.ingress = 1,
7577 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7578 	};
7579 	struct rte_flow_item items[] = {
7580 		{
7581 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7582 			.spec = eth_spec,
7583 			.last = NULL,
7584 			.mask = eth_mask,
7585 		},
7586 		{
7587 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
7588 					      RTE_FLOW_ITEM_TYPE_END,
7589 			.spec = vlan_spec,
7590 			.last = NULL,
7591 			.mask = vlan_mask,
7592 		},
7593 		{
7594 			.type = RTE_FLOW_ITEM_TYPE_END,
7595 		},
7596 	};
7597 	uint16_t queue[priv->reta_idx_n];
7598 	struct rte_flow_action_rss action_rss = {
7599 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7600 		.level = 0,
7601 		.types = priv->rss_conf.rss_hf,
7602 		.key_len = priv->rss_conf.rss_key_len,
7603 		.queue_num = priv->reta_idx_n,
7604 		.key = priv->rss_conf.rss_key,
7605 		.queue = queue,
7606 	};
7607 	struct rte_flow_action actions[] = {
7608 		{
7609 			.type = RTE_FLOW_ACTION_TYPE_RSS,
7610 			.conf = &action_rss,
7611 		},
7612 		{
7613 			.type = RTE_FLOW_ACTION_TYPE_END,
7614 		},
7615 	};
7616 	uint32_t flow_idx;
7617 	struct rte_flow_error error;
7618 	unsigned int i;
7619 
7620 	if (!priv->reta_idx_n || !priv->rxqs_n) {
7621 		return 0;
7622 	}
7623 	if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
7624 		action_rss.types = 0;
7625 	for (i = 0; i != priv->reta_idx_n; ++i)
7626 		queue[i] = (*priv->reta_idx)[i];
7627 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7628 				    &attr, items, actions, false, &error);
7629 	if (!flow_idx)
7630 		return -rte_errno;
7631 	return 0;
7632 }
7633 
7634 /**
7635  * Enable a flow control configured from the control plane.
7636  *
7637  * @param dev
7638  *   Pointer to Ethernet device.
7639  * @param eth_spec
7640  *   An Ethernet flow spec to apply.
7641  * @param eth_mask
7642  *   An Ethernet flow mask to apply.
7643  *
7644  * @return
7645  *   0 on success, a negative errno value otherwise and rte_errno is set.
7646  */
7647 int
7648 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7649 	       struct rte_flow_item_eth *eth_spec,
7650 	       struct rte_flow_item_eth *eth_mask)
7651 {
7652 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7653 }
7654 
7655 /**
7656  * Create default miss flow rule matching lacp traffic
7657  *
7658  * @param dev
7659  *   Pointer to Ethernet device.
7660  * @param eth_spec
7661  *   An Ethernet flow spec to apply.
7662  *
7663  * @return
7664  *   0 on success, a negative errno value otherwise and rte_errno is set.
7665  */
7666 int
7667 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7668 {
7669 	/*
7670 	 * The LACP matching is done by only using ether type since using
7671 	 * a multicast dst mac causes kernel to give low priority to this flow.
7672 	 */
7673 	static const struct rte_flow_item_eth lacp_spec = {
7674 		.type = RTE_BE16(0x8809),
7675 	};
7676 	static const struct rte_flow_item_eth lacp_mask = {
7677 		.type = 0xffff,
7678 	};
7679 	const struct rte_flow_attr attr = {
7680 		.ingress = 1,
7681 	};
7682 	struct rte_flow_item items[] = {
7683 		{
7684 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7685 			.spec = &lacp_spec,
7686 			.mask = &lacp_mask,
7687 		},
7688 		{
7689 			.type = RTE_FLOW_ITEM_TYPE_END,
7690 		},
7691 	};
7692 	struct rte_flow_action actions[] = {
7693 		{
7694 			.type = (enum rte_flow_action_type)
7695 				MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7696 		},
7697 		{
7698 			.type = RTE_FLOW_ACTION_TYPE_END,
7699 		},
7700 	};
7701 	struct rte_flow_error error;
7702 	uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7703 					&attr, items, actions,
7704 					false, &error);
7705 
7706 	if (!flow_idx)
7707 		return -rte_errno;
7708 	return 0;
7709 }
7710 
7711 /**
7712  * Destroy a flow.
7713  *
7714  * @see rte_flow_destroy()
7715  * @see rte_flow_ops
7716  */
7717 int
7718 mlx5_flow_destroy(struct rte_eth_dev *dev,
7719 		  struct rte_flow *flow,
7720 		  struct rte_flow_error *error __rte_unused)
7721 {
7722 	struct mlx5_priv *priv = dev->data->dev_private;
7723 
7724 	if (priv->sh->config.dv_flow_en == 2)
7725 		return rte_flow_error_set(error, ENOTSUP,
7726 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7727 			  NULL,
7728 			  "Flow non-Q destruction not supported");
7729 	flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7730 				(uintptr_t)(void *)flow);
7731 	return 0;
7732 }
7733 
7734 /**
7735  * Destroy all flows.
7736  *
7737  * @see rte_flow_flush()
7738  * @see rte_flow_ops
7739  */
7740 int
7741 mlx5_flow_flush(struct rte_eth_dev *dev,
7742 		struct rte_flow_error *error __rte_unused)
7743 {
7744 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7745 	return 0;
7746 }
7747 
7748 /**
7749  * Isolated mode.
7750  *
7751  * @see rte_flow_isolate()
7752  * @see rte_flow_ops
7753  */
7754 int
7755 mlx5_flow_isolate(struct rte_eth_dev *dev,
7756 		  int enable,
7757 		  struct rte_flow_error *error)
7758 {
7759 	struct mlx5_priv *priv = dev->data->dev_private;
7760 
7761 	if (dev->data->dev_started) {
7762 		rte_flow_error_set(error, EBUSY,
7763 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7764 				   NULL,
7765 				   "port must be stopped first");
7766 		return -rte_errno;
7767 	}
7768 	priv->isolated = !!enable;
7769 	if (enable)
7770 		dev->dev_ops = &mlx5_dev_ops_isolate;
7771 	else
7772 		dev->dev_ops = &mlx5_dev_ops;
7773 
7774 	dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7775 	dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7776 
7777 	return 0;
7778 }
7779 
7780 /**
7781  * Query a flow.
7782  *
7783  * @see rte_flow_query()
7784  * @see rte_flow_ops
7785  */
7786 static int
7787 flow_drv_query(struct rte_eth_dev *dev,
7788 	       uint32_t flow_idx,
7789 	       const struct rte_flow_action *actions,
7790 	       void *data,
7791 	       struct rte_flow_error *error)
7792 {
7793 	struct mlx5_priv *priv = dev->data->dev_private;
7794 	const struct mlx5_flow_driver_ops *fops;
7795 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7796 					       flow_idx);
7797 	enum mlx5_flow_drv_type ftype;
7798 
7799 	if (!flow) {
7800 		return rte_flow_error_set(error, ENOENT,
7801 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7802 			  NULL,
7803 			  "invalid flow handle");
7804 	}
7805 	ftype = flow->drv_type;
7806 	MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7807 	fops = flow_get_drv_ops(ftype);
7808 
7809 	return fops->query(dev, flow, actions, data, error);
7810 }
7811 
7812 /**
7813  * Query a flow.
7814  *
7815  * @see rte_flow_query()
7816  * @see rte_flow_ops
7817  */
7818 int
7819 mlx5_flow_query(struct rte_eth_dev *dev,
7820 		struct rte_flow *flow,
7821 		const struct rte_flow_action *actions,
7822 		void *data,
7823 		struct rte_flow_error *error)
7824 {
7825 	int ret;
7826 	struct mlx5_priv *priv = dev->data->dev_private;
7827 
7828 	if (priv->sh->config.dv_flow_en == 2)
7829 		return rte_flow_error_set(error, ENOTSUP,
7830 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7831 			  NULL,
7832 			  "Flow non-Q query not supported");
7833 	ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
7834 			     error);
7835 	if (ret < 0)
7836 		return ret;
7837 	return 0;
7838 }
7839 
7840 /**
7841  * Get rte_flow callbacks.
7842  *
7843  * @param dev
7844  *   Pointer to Ethernet device structure.
7845  * @param ops
7846  *   Pointer to operation-specific structure.
7847  *
7848  * @return 0
7849  */
7850 int
7851 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7852 		  const struct rte_flow_ops **ops)
7853 {
7854 	*ops = &mlx5_flow_ops;
7855 	return 0;
7856 }
7857 
7858 /**
7859  * Validate meter policy actions.
7860  * Dispatcher for action type specific validation.
7861  *
7862  * @param[in] dev
7863  *   Pointer to the Ethernet device structure.
7864  * @param[in] action
7865  *   The meter policy action object to validate.
7866  * @param[in] attr
7867  *   Attributes of flow to determine steering domain.
7868  * @param[out] is_rss
7869  *   Is RSS or not.
7870  * @param[out] domain_bitmap
7871  *   Domain bitmap.
7872  * @param[out] is_def_policy
7873  *   Is default policy or not.
7874  * @param[out] error
7875  *   Perform verbose error reporting if not NULL. Initialized in case of
7876  *   error only.
7877  *
7878  * @return
7879  *   0 on success, otherwise negative errno value.
7880  */
7881 int
7882 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7883 			const struct rte_flow_action *actions[RTE_COLORS],
7884 			struct rte_flow_attr *attr,
7885 			bool *is_rss,
7886 			uint8_t *domain_bitmap,
7887 			uint8_t *policy_mode,
7888 			struct rte_mtr_error *error)
7889 {
7890 	const struct mlx5_flow_driver_ops *fops;
7891 
7892 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7893 	return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7894 				       domain_bitmap, policy_mode, error);
7895 }
7896 
7897 /**
7898  * Destroy the meter table set.
7899  *
7900  * @param[in] dev
7901  *   Pointer to Ethernet device.
7902  * @param[in] mtr_policy
7903  *   Meter policy struct.
7904  */
7905 void
7906 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7907 		      struct mlx5_flow_meter_policy *mtr_policy)
7908 {
7909 	const struct mlx5_flow_driver_ops *fops;
7910 
7911 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7912 	fops->destroy_mtr_acts(dev, mtr_policy);
7913 }
7914 
7915 /**
7916  * Create policy action, lock free,
7917  * (mutex should be acquired by caller).
7918  * Dispatcher for action type specific call.
7919  *
7920  * @param[in] dev
7921  *   Pointer to the Ethernet device structure.
7922  * @param[in] mtr_policy
7923  *   Meter policy struct.
7924  * @param[in] action
7925  *   Action specification used to create meter actions.
7926  * @param[in] attr
7927  *   Flow rule attributes.
7928  * @param[out] error
7929  *   Perform verbose error reporting if not NULL. Initialized in case of
7930  *   error only.
7931  *
7932  * @return
7933  *   0 on success, otherwise negative errno value.
7934  */
7935 int
7936 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7937 		      struct mlx5_flow_meter_policy *mtr_policy,
7938 		      const struct rte_flow_action *actions[RTE_COLORS],
7939 		      struct rte_flow_attr *attr,
7940 		      struct rte_mtr_error *error)
7941 {
7942 	const struct mlx5_flow_driver_ops *fops;
7943 
7944 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7945 	return fops->create_mtr_acts(dev, mtr_policy, actions, attr, error);
7946 }
7947 
7948 /**
7949  * Create policy rules, lock free,
7950  * (mutex should be acquired by caller).
7951  * Dispatcher for action type specific call.
7952  *
7953  * @param[in] dev
7954  *   Pointer to the Ethernet device structure.
7955  * @param[in] mtr_policy
7956  *   Meter policy struct.
7957  *
7958  * @return
7959  *   0 on success, -1 otherwise.
7960  */
7961 int
7962 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
7963 			     struct mlx5_flow_meter_policy *mtr_policy)
7964 {
7965 	const struct mlx5_flow_driver_ops *fops;
7966 
7967 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7968 	return fops->create_policy_rules(dev, mtr_policy);
7969 }
7970 
7971 /**
7972  * Destroy policy rules, lock free,
7973  * (mutex should be acquired by caller).
7974  * Dispatcher for action type specific call.
7975  *
7976  * @param[in] dev
7977  *   Pointer to the Ethernet device structure.
7978  * @param[in] mtr_policy
7979  *   Meter policy struct.
7980  */
7981 void
7982 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
7983 			     struct mlx5_flow_meter_policy *mtr_policy)
7984 {
7985 	const struct mlx5_flow_driver_ops *fops;
7986 
7987 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7988 	fops->destroy_policy_rules(dev, mtr_policy);
7989 }
7990 
7991 /**
7992  * Destroy the default policy table set.
7993  *
7994  * @param[in] dev
7995  *   Pointer to Ethernet device.
7996  */
7997 void
7998 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
7999 {
8000 	const struct mlx5_flow_driver_ops *fops;
8001 
8002 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8003 	fops->destroy_def_policy(dev);
8004 }
8005 
8006 /**
8007  * Destroy the default policy table set.
8008  *
8009  * @param[in] dev
8010  *   Pointer to Ethernet device.
8011  *
8012  * @return
8013  *   0 on success, -1 otherwise.
8014  */
8015 int
8016 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
8017 {
8018 	const struct mlx5_flow_driver_ops *fops;
8019 
8020 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8021 	return fops->create_def_policy(dev);
8022 }
8023 
8024 /**
8025  * Create the needed meter and suffix tables.
8026  *
8027  * @param[in] dev
8028  *   Pointer to Ethernet device.
8029  *
8030  * @return
8031  *   0 on success, -1 otherwise.
8032  */
8033 int
8034 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
8035 			struct mlx5_flow_meter_info *fm,
8036 			uint32_t mtr_idx,
8037 			uint8_t domain_bitmap)
8038 {
8039 	const struct mlx5_flow_driver_ops *fops;
8040 
8041 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8042 	return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
8043 }
8044 
8045 /**
8046  * Destroy the meter table set.
8047  *
8048  * @param[in] dev
8049  *   Pointer to Ethernet device.
8050  * @param[in] tbl
8051  *   Pointer to the meter table set.
8052  */
8053 void
8054 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
8055 			   struct mlx5_flow_meter_info *fm)
8056 {
8057 	const struct mlx5_flow_driver_ops *fops;
8058 
8059 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8060 	fops->destroy_mtr_tbls(dev, fm);
8061 }
8062 
8063 /**
8064  * Destroy the global meter drop table.
8065  *
8066  * @param[in] dev
8067  *   Pointer to Ethernet device.
8068  */
8069 void
8070 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
8071 {
8072 	const struct mlx5_flow_driver_ops *fops;
8073 
8074 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8075 	fops->destroy_mtr_drop_tbls(dev);
8076 }
8077 
8078 /**
8079  * Destroy the sub policy table with RX queue.
8080  *
8081  * @param[in] dev
8082  *   Pointer to Ethernet device.
8083  * @param[in] mtr_policy
8084  *   Pointer to meter policy table.
8085  */
8086 void
8087 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
8088 		struct mlx5_flow_meter_policy *mtr_policy)
8089 {
8090 	const struct mlx5_flow_driver_ops *fops;
8091 
8092 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8093 	fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
8094 }
8095 
8096 /**
8097  * Allocate the needed aso flow meter id.
8098  *
8099  * @param[in] dev
8100  *   Pointer to Ethernet device.
8101  *
8102  * @return
8103  *   Index to aso flow meter on success, NULL otherwise.
8104  */
8105 uint32_t
8106 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
8107 {
8108 	const struct mlx5_flow_driver_ops *fops;
8109 
8110 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8111 	return fops->create_meter(dev);
8112 }
8113 
8114 /**
8115  * Free the aso flow meter id.
8116  *
8117  * @param[in] dev
8118  *   Pointer to Ethernet device.
8119  * @param[in] mtr_idx
8120  *  Index to aso flow meter to be free.
8121  *
8122  * @return
8123  *   0 on success.
8124  */
8125 void
8126 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
8127 {
8128 	const struct mlx5_flow_driver_ops *fops;
8129 
8130 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8131 	fops->free_meter(dev, mtr_idx);
8132 }
8133 
8134 /**
8135  * Allocate a counter.
8136  *
8137  * @param[in] dev
8138  *   Pointer to Ethernet device structure.
8139  *
8140  * @return
8141  *   Index to allocated counter  on success, 0 otherwise.
8142  */
8143 uint32_t
8144 mlx5_counter_alloc(struct rte_eth_dev *dev)
8145 {
8146 	const struct mlx5_flow_driver_ops *fops;
8147 	struct rte_flow_attr attr = { .transfer = 0 };
8148 
8149 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8150 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8151 		return fops->counter_alloc(dev);
8152 	}
8153 	DRV_LOG(ERR,
8154 		"port %u counter allocate is not supported.",
8155 		 dev->data->port_id);
8156 	return 0;
8157 }
8158 
8159 /**
8160  * Free a counter.
8161  *
8162  * @param[in] dev
8163  *   Pointer to Ethernet device structure.
8164  * @param[in] cnt
8165  *   Index to counter to be free.
8166  */
8167 void
8168 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
8169 {
8170 	const struct mlx5_flow_driver_ops *fops;
8171 	struct rte_flow_attr attr = { .transfer = 0 };
8172 
8173 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8174 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8175 		fops->counter_free(dev, cnt);
8176 		return;
8177 	}
8178 	DRV_LOG(ERR,
8179 		"port %u counter free is not supported.",
8180 		 dev->data->port_id);
8181 }
8182 
8183 /**
8184  * Query counter statistics.
8185  *
8186  * @param[in] dev
8187  *   Pointer to Ethernet device structure.
8188  * @param[in] cnt
8189  *   Index to counter to query.
8190  * @param[in] clear
8191  *   Set to clear counter statistics.
8192  * @param[out] pkts
8193  *   The counter hits packets number to save.
8194  * @param[out] bytes
8195  *   The counter hits bytes number to save.
8196  *
8197  * @return
8198  *   0 on success, a negative errno value otherwise.
8199  */
8200 int
8201 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
8202 		   bool clear, uint64_t *pkts, uint64_t *bytes, void **action)
8203 {
8204 	const struct mlx5_flow_driver_ops *fops;
8205 	struct rte_flow_attr attr = { .transfer = 0 };
8206 
8207 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8208 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8209 		return fops->counter_query(dev, cnt, clear, pkts,
8210 					bytes, action);
8211 	}
8212 	DRV_LOG(ERR,
8213 		"port %u counter query is not supported.",
8214 		 dev->data->port_id);
8215 	return -ENOTSUP;
8216 }
8217 
8218 /**
8219  * Get information about HWS pre-configurable resources.
8220  *
8221  * @param[in] dev
8222  *   Pointer to the rte_eth_dev structure.
8223  * @param[out] port_info
8224  *   Pointer to port information.
8225  * @param[out] queue_info
8226  *   Pointer to queue information.
8227  * @param[out] error
8228  *   Pointer to error structure.
8229  *
8230  * @return
8231  *   0 on success, a negative errno value otherwise and rte_errno is set.
8232  */
8233 static int
8234 mlx5_flow_info_get(struct rte_eth_dev *dev,
8235 		   struct rte_flow_port_info *port_info,
8236 		   struct rte_flow_queue_info *queue_info,
8237 		   struct rte_flow_error *error)
8238 {
8239 	const struct mlx5_flow_driver_ops *fops;
8240 
8241 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8242 		return rte_flow_error_set(error, ENOTSUP,
8243 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8244 				NULL,
8245 				"info get with incorrect steering mode");
8246 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8247 	return fops->info_get(dev, port_info, queue_info, error);
8248 }
8249 
8250 /**
8251  * Configure port HWS resources.
8252  *
8253  * @param[in] dev
8254  *   Pointer to the rte_eth_dev structure.
8255  * @param[in] port_attr
8256  *   Port configuration attributes.
8257  * @param[in] nb_queue
8258  *   Number of queue.
8259  * @param[in] queue_attr
8260  *   Array that holds attributes for each flow queue.
8261  * @param[out] error
8262  *   Pointer to error structure.
8263  *
8264  * @return
8265  *   0 on success, a negative errno value otherwise and rte_errno is set.
8266  */
8267 static int
8268 mlx5_flow_port_configure(struct rte_eth_dev *dev,
8269 			 const struct rte_flow_port_attr *port_attr,
8270 			 uint16_t nb_queue,
8271 			 const struct rte_flow_queue_attr *queue_attr[],
8272 			 struct rte_flow_error *error)
8273 {
8274 	const struct mlx5_flow_driver_ops *fops;
8275 
8276 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8277 		return rte_flow_error_set(error, ENOTSUP,
8278 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8279 				NULL,
8280 				"port configure with incorrect steering mode");
8281 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8282 	return fops->configure(dev, port_attr, nb_queue, queue_attr, error);
8283 }
8284 
8285 /**
8286  * Create flow item template.
8287  *
8288  * @param[in] dev
8289  *   Pointer to the rte_eth_dev structure.
8290  * @param[in] attr
8291  *   Pointer to the item template attributes.
8292  * @param[in] items
8293  *   The template item pattern.
8294  * @param[out] error
8295  *   Pointer to error structure.
8296  *
8297  * @return
8298  *   0 on success, a negative errno value otherwise and rte_errno is set.
8299  */
8300 static struct rte_flow_pattern_template *
8301 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
8302 		const struct rte_flow_pattern_template_attr *attr,
8303 		const struct rte_flow_item items[],
8304 		struct rte_flow_error *error)
8305 {
8306 	const struct mlx5_flow_driver_ops *fops;
8307 
8308 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8309 		rte_flow_error_set(error, ENOTSUP,
8310 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8311 				NULL,
8312 				"pattern create with incorrect steering mode");
8313 		return NULL;
8314 	}
8315 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8316 	return fops->pattern_template_create(dev, attr, items, error);
8317 }
8318 
8319 /**
8320  * Destroy flow item template.
8321  *
8322  * @param[in] dev
8323  *   Pointer to the rte_eth_dev structure.
8324  * @param[in] template
8325  *   Pointer to the item template to be destroyed.
8326  * @param[out] error
8327  *   Pointer to error structure.
8328  *
8329  * @return
8330  *   0 on success, a negative errno value otherwise and rte_errno is set.
8331  */
8332 static int
8333 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
8334 				   struct rte_flow_pattern_template *template,
8335 				   struct rte_flow_error *error)
8336 {
8337 	const struct mlx5_flow_driver_ops *fops;
8338 
8339 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8340 		return rte_flow_error_set(error, ENOTSUP,
8341 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8342 				NULL,
8343 				"pattern destroy with incorrect steering mode");
8344 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8345 	return fops->pattern_template_destroy(dev, template, error);
8346 }
8347 
8348 /**
8349  * Create flow item template.
8350  *
8351  * @param[in] dev
8352  *   Pointer to the rte_eth_dev structure.
8353  * @param[in] attr
8354  *   Pointer to the action template attributes.
8355  * @param[in] actions
8356  *   Associated actions (list terminated by the END action).
8357  * @param[in] masks
8358  *   List of actions that marks which of the action's member is constant.
8359  * @param[out] error
8360  *   Pointer to error structure.
8361  *
8362  * @return
8363  *   0 on success, a negative errno value otherwise and rte_errno is set.
8364  */
8365 static struct rte_flow_actions_template *
8366 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
8367 			const struct rte_flow_actions_template_attr *attr,
8368 			const struct rte_flow_action actions[],
8369 			const struct rte_flow_action masks[],
8370 			struct rte_flow_error *error)
8371 {
8372 	const struct mlx5_flow_driver_ops *fops;
8373 
8374 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8375 		rte_flow_error_set(error, ENOTSUP,
8376 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8377 				NULL,
8378 				"action create with incorrect steering mode");
8379 		return NULL;
8380 	}
8381 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8382 	return fops->actions_template_create(dev, attr, actions, masks, error);
8383 }
8384 
8385 /**
8386  * Destroy flow action template.
8387  *
8388  * @param[in] dev
8389  *   Pointer to the rte_eth_dev structure.
8390  * @param[in] template
8391  *   Pointer to the action template to be destroyed.
8392  * @param[out] error
8393  *   Pointer to error structure.
8394  *
8395  * @return
8396  *   0 on success, a negative errno value otherwise and rte_errno is set.
8397  */
8398 static int
8399 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
8400 				   struct rte_flow_actions_template *template,
8401 				   struct rte_flow_error *error)
8402 {
8403 	const struct mlx5_flow_driver_ops *fops;
8404 
8405 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8406 		return rte_flow_error_set(error, ENOTSUP,
8407 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8408 				NULL,
8409 				"action destroy with incorrect steering mode");
8410 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8411 	return fops->actions_template_destroy(dev, template, error);
8412 }
8413 
8414 /**
8415  * Create flow table.
8416  *
8417  * @param[in] dev
8418  *   Pointer to the rte_eth_dev structure.
8419  * @param[in] attr
8420  *   Pointer to the table attributes.
8421  * @param[in] item_templates
8422  *   Item template array to be binded to the table.
8423  * @param[in] nb_item_templates
8424  *   Number of item template.
8425  * @param[in] action_templates
8426  *   Action template array to be binded to the table.
8427  * @param[in] nb_action_templates
8428  *   Number of action template.
8429  * @param[out] error
8430  *   Pointer to error structure.
8431  *
8432  * @return
8433  *    Table on success, NULL otherwise and rte_errno is set.
8434  */
8435 static struct rte_flow_template_table *
8436 mlx5_flow_table_create(struct rte_eth_dev *dev,
8437 		       const struct rte_flow_template_table_attr *attr,
8438 		       struct rte_flow_pattern_template *item_templates[],
8439 		       uint8_t nb_item_templates,
8440 		       struct rte_flow_actions_template *action_templates[],
8441 		       uint8_t nb_action_templates,
8442 		       struct rte_flow_error *error)
8443 {
8444 	const struct mlx5_flow_driver_ops *fops;
8445 
8446 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8447 		rte_flow_error_set(error, ENOTSUP,
8448 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8449 				NULL,
8450 				"table create with incorrect steering mode");
8451 		return NULL;
8452 	}
8453 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8454 	return fops->template_table_create(dev,
8455 					   attr,
8456 					   item_templates,
8457 					   nb_item_templates,
8458 					   action_templates,
8459 					   nb_action_templates,
8460 					   error);
8461 }
8462 
8463 /**
8464  * PMD destroy flow table.
8465  *
8466  * @param[in] dev
8467  *   Pointer to the rte_eth_dev structure.
8468  * @param[in] table
8469  *   Pointer to the table to be destroyed.
8470  * @param[out] error
8471  *   Pointer to error structure.
8472  *
8473  * @return
8474  *   0 on success, a negative errno value otherwise and rte_errno is set.
8475  */
8476 static int
8477 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
8478 			struct rte_flow_template_table *table,
8479 			struct rte_flow_error *error)
8480 {
8481 	const struct mlx5_flow_driver_ops *fops;
8482 
8483 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8484 		return rte_flow_error_set(error, ENOTSUP,
8485 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8486 				NULL,
8487 				"table destroy with incorrect steering mode");
8488 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8489 	return fops->template_table_destroy(dev, table, error);
8490 }
8491 
8492 /**
8493  * Enqueue flow creation.
8494  *
8495  * @param[in] dev
8496  *   Pointer to the rte_eth_dev structure.
8497  * @param[in] queue_id
8498  *   The queue to create the flow.
8499  * @param[in] attr
8500  *   Pointer to the flow operation attributes.
8501  * @param[in] items
8502  *   Items with flow spec value.
8503  * @param[in] pattern_template_index
8504  *   The item pattern flow follows from the table.
8505  * @param[in] actions
8506  *   Action with flow spec value.
8507  * @param[in] action_template_index
8508  *   The action pattern flow follows from the table.
8509  * @param[in] user_data
8510  *   Pointer to the user_data.
8511  * @param[out] error
8512  *   Pointer to error structure.
8513  *
8514  * @return
8515  *    Flow pointer on success, NULL otherwise and rte_errno is set.
8516  */
8517 static struct rte_flow *
8518 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
8519 			    uint32_t queue_id,
8520 			    const struct rte_flow_op_attr *attr,
8521 			    struct rte_flow_template_table *table,
8522 			    const struct rte_flow_item items[],
8523 			    uint8_t pattern_template_index,
8524 			    const struct rte_flow_action actions[],
8525 			    uint8_t action_template_index,
8526 			    void *user_data,
8527 			    struct rte_flow_error *error)
8528 {
8529 	const struct mlx5_flow_driver_ops *fops;
8530 
8531 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8532 		rte_flow_error_set(error, ENOTSUP,
8533 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8534 				NULL,
8535 				"flow_q create with incorrect steering mode");
8536 		return NULL;
8537 	}
8538 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8539 	return fops->async_flow_create(dev, queue_id, attr, table,
8540 				       items, pattern_template_index,
8541 				       actions, action_template_index,
8542 				       user_data, error);
8543 }
8544 
8545 /**
8546  * Enqueue flow destruction.
8547  *
8548  * @param[in] dev
8549  *   Pointer to the rte_eth_dev structure.
8550  * @param[in] queue
8551  *   The queue to destroy the flow.
8552  * @param[in] attr
8553  *   Pointer to the flow operation attributes.
8554  * @param[in] flow
8555  *   Pointer to the flow to be destroyed.
8556  * @param[in] user_data
8557  *   Pointer to the user_data.
8558  * @param[out] error
8559  *   Pointer to error structure.
8560  *
8561  * @return
8562  *    0 on success, negative value otherwise and rte_errno is set.
8563  */
8564 static int
8565 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
8566 			     uint32_t queue,
8567 			     const struct rte_flow_op_attr *attr,
8568 			     struct rte_flow *flow,
8569 			     void *user_data,
8570 			     struct rte_flow_error *error)
8571 {
8572 	const struct mlx5_flow_driver_ops *fops;
8573 
8574 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8575 		return rte_flow_error_set(error, ENOTSUP,
8576 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8577 				NULL,
8578 				"flow_q destroy with incorrect steering mode");
8579 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8580 	return fops->async_flow_destroy(dev, queue, attr, flow,
8581 					user_data, error);
8582 }
8583 
8584 /**
8585  * Pull the enqueued flows.
8586  *
8587  * @param[in] dev
8588  *   Pointer to the rte_eth_dev structure.
8589  * @param[in] queue
8590  *   The queue to pull the result.
8591  * @param[in/out] res
8592  *   Array to save the results.
8593  * @param[in] n_res
8594  *   Available result with the array.
8595  * @param[out] error
8596  *   Pointer to error structure.
8597  *
8598  * @return
8599  *    Result number on success, negative value otherwise and rte_errno is set.
8600  */
8601 static int
8602 mlx5_flow_pull(struct rte_eth_dev *dev,
8603 	       uint32_t queue,
8604 	       struct rte_flow_op_result res[],
8605 	       uint16_t n_res,
8606 	       struct rte_flow_error *error)
8607 {
8608 	const struct mlx5_flow_driver_ops *fops;
8609 
8610 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8611 		return rte_flow_error_set(error, ENOTSUP,
8612 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8613 				NULL,
8614 				"flow_q pull with incorrect steering mode");
8615 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8616 	return fops->pull(dev, queue, res, n_res, error);
8617 }
8618 
8619 /**
8620  * Push the enqueued flows.
8621  *
8622  * @param[in] dev
8623  *   Pointer to the rte_eth_dev structure.
8624  * @param[in] queue
8625  *   The queue to push the flows.
8626  * @param[out] error
8627  *   Pointer to error structure.
8628  *
8629  * @return
8630  *    0 on success, negative value otherwise and rte_errno is set.
8631  */
8632 static int
8633 mlx5_flow_push(struct rte_eth_dev *dev,
8634 	       uint32_t queue,
8635 	       struct rte_flow_error *error)
8636 {
8637 	const struct mlx5_flow_driver_ops *fops;
8638 
8639 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8640 		return rte_flow_error_set(error, ENOTSUP,
8641 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8642 				NULL,
8643 				"flow_q push with incorrect steering mode");
8644 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8645 	return fops->push(dev, queue, error);
8646 }
8647 
8648 /**
8649  * Create shared action.
8650  *
8651  * @param[in] dev
8652  *   Pointer to the rte_eth_dev structure.
8653  * @param[in] queue
8654  *   Which queue to be used..
8655  * @param[in] attr
8656  *   Operation attribute.
8657  * @param[in] conf
8658  *   Indirect action configuration.
8659  * @param[in] action
8660  *   rte_flow action detail.
8661  * @param[in] user_data
8662  *   Pointer to the user_data.
8663  * @param[out] error
8664  *   Pointer to error structure.
8665  *
8666  * @return
8667  *   Action handle on success, NULL otherwise and rte_errno is set.
8668  */
8669 static struct rte_flow_action_handle *
8670 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
8671 				 const struct rte_flow_op_attr *attr,
8672 				 const struct rte_flow_indir_action_conf *conf,
8673 				 const struct rte_flow_action *action,
8674 				 void *user_data,
8675 				 struct rte_flow_error *error)
8676 {
8677 	const struct mlx5_flow_driver_ops *fops =
8678 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8679 
8680 	return fops->async_action_create(dev, queue, attr, conf, action,
8681 					 user_data, error);
8682 }
8683 
8684 /**
8685  * Update shared action.
8686  *
8687  * @param[in] dev
8688  *   Pointer to the rte_eth_dev structure.
8689  * @param[in] queue
8690  *   Which queue to be used..
8691  * @param[in] attr
8692  *   Operation attribute.
8693  * @param[in] handle
8694  *   Action handle to be updated.
8695  * @param[in] update
8696  *   Update value.
8697  * @param[in] user_data
8698  *   Pointer to the user_data.
8699  * @param[out] error
8700  *   Pointer to error structure.
8701  *
8702  * @return
8703  *   0 on success, negative value otherwise and rte_errno is set.
8704  */
8705 static int
8706 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
8707 				     const struct rte_flow_op_attr *attr,
8708 				     struct rte_flow_action_handle *handle,
8709 				     const void *update,
8710 				     void *user_data,
8711 				     struct rte_flow_error *error)
8712 {
8713 	const struct mlx5_flow_driver_ops *fops =
8714 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8715 
8716 	return fops->async_action_update(dev, queue, attr, handle,
8717 					 update, user_data, error);
8718 }
8719 
8720 /**
8721  * Destroy shared action.
8722  *
8723  * @param[in] dev
8724  *   Pointer to the rte_eth_dev structure.
8725  * @param[in] queue
8726  *   Which queue to be used..
8727  * @param[in] attr
8728  *   Operation attribute.
8729  * @param[in] handle
8730  *   Action handle to be destroyed.
8731  * @param[in] user_data
8732  *   Pointer to the user_data.
8733  * @param[out] error
8734  *   Pointer to error structure.
8735  *
8736  * @return
8737  *   0 on success, negative value otherwise and rte_errno is set.
8738  */
8739 static int
8740 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
8741 				      const struct rte_flow_op_attr *attr,
8742 				      struct rte_flow_action_handle *handle,
8743 				      void *user_data,
8744 				      struct rte_flow_error *error)
8745 {
8746 	const struct mlx5_flow_driver_ops *fops =
8747 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8748 
8749 	return fops->async_action_destroy(dev, queue, attr, handle,
8750 					  user_data, error);
8751 }
8752 
8753 /**
8754  * Allocate a new memory for the counter values wrapped by all the needed
8755  * management.
8756  *
8757  * @param[in] sh
8758  *   Pointer to mlx5_dev_ctx_shared object.
8759  *
8760  * @return
8761  *   0 on success, a negative errno value otherwise.
8762  */
8763 static int
8764 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
8765 {
8766 	struct mlx5_counter_stats_mem_mng *mem_mng;
8767 	volatile struct flow_counter_stats *raw_data;
8768 	int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
8769 	int size = (sizeof(struct flow_counter_stats) *
8770 			MLX5_COUNTERS_PER_POOL +
8771 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
8772 			sizeof(struct mlx5_counter_stats_mem_mng);
8773 	size_t pgsize = rte_mem_page_size();
8774 	uint8_t *mem;
8775 	int ret;
8776 	int i;
8777 
8778 	if (pgsize == (size_t)-1) {
8779 		DRV_LOG(ERR, "Failed to get mem page size");
8780 		rte_errno = ENOMEM;
8781 		return -ENOMEM;
8782 	}
8783 	mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
8784 	if (!mem) {
8785 		rte_errno = ENOMEM;
8786 		return -ENOMEM;
8787 	}
8788 	mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
8789 	size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
8790 	ret = mlx5_os_wrapped_mkey_create(sh->cdev->ctx, sh->cdev->pd,
8791 					  sh->cdev->pdn, mem, size,
8792 					  &mem_mng->wm);
8793 	if (ret) {
8794 		rte_errno = errno;
8795 		mlx5_free(mem);
8796 		return -rte_errno;
8797 	}
8798 	mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
8799 	raw_data = (volatile struct flow_counter_stats *)mem;
8800 	for (i = 0; i < raws_n; ++i) {
8801 		mem_mng->raws[i].mem_mng = mem_mng;
8802 		mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
8803 	}
8804 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
8805 		LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
8806 				 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
8807 				 next);
8808 	LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
8809 	sh->cmng.mem_mng = mem_mng;
8810 	return 0;
8811 }
8812 
8813 /**
8814  * Set the statistic memory to the new counter pool.
8815  *
8816  * @param[in] sh
8817  *   Pointer to mlx5_dev_ctx_shared object.
8818  * @param[in] pool
8819  *   Pointer to the pool to set the statistic memory.
8820  *
8821  * @return
8822  *   0 on success, a negative errno value otherwise.
8823  */
8824 static int
8825 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
8826 			       struct mlx5_flow_counter_pool *pool)
8827 {
8828 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8829 	/* Resize statistic memory once used out. */
8830 	if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
8831 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
8832 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
8833 		return -1;
8834 	}
8835 	rte_spinlock_lock(&pool->sl);
8836 	pool->raw = cmng->mem_mng->raws + pool->index %
8837 		    MLX5_CNT_CONTAINER_RESIZE;
8838 	rte_spinlock_unlock(&pool->sl);
8839 	pool->raw_hw = NULL;
8840 	return 0;
8841 }
8842 
8843 #define MLX5_POOL_QUERY_FREQ_US 1000000
8844 
8845 /**
8846  * Set the periodic procedure for triggering asynchronous batch queries for all
8847  * the counter pools.
8848  *
8849  * @param[in] sh
8850  *   Pointer to mlx5_dev_ctx_shared object.
8851  */
8852 void
8853 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
8854 {
8855 	uint32_t pools_n, us;
8856 
8857 	pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
8858 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
8859 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
8860 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
8861 		sh->cmng.query_thread_on = 0;
8862 		DRV_LOG(ERR, "Cannot reinitialize query alarm");
8863 	} else {
8864 		sh->cmng.query_thread_on = 1;
8865 	}
8866 }
8867 
8868 /**
8869  * The periodic procedure for triggering asynchronous batch queries for all the
8870  * counter pools. This function is probably called by the host thread.
8871  *
8872  * @param[in] arg
8873  *   The parameter for the alarm process.
8874  */
8875 void
8876 mlx5_flow_query_alarm(void *arg)
8877 {
8878 	struct mlx5_dev_ctx_shared *sh = arg;
8879 	int ret;
8880 	uint16_t pool_index = sh->cmng.pool_index;
8881 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8882 	struct mlx5_flow_counter_pool *pool;
8883 	uint16_t n_valid;
8884 
8885 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
8886 		goto set_alarm;
8887 	rte_spinlock_lock(&cmng->pool_update_sl);
8888 	pool = cmng->pools[pool_index];
8889 	n_valid = cmng->n_valid;
8890 	rte_spinlock_unlock(&cmng->pool_update_sl);
8891 	/* Set the statistic memory to the new created pool. */
8892 	if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
8893 		goto set_alarm;
8894 	if (pool->raw_hw)
8895 		/* There is a pool query in progress. */
8896 		goto set_alarm;
8897 	pool->raw_hw =
8898 		LIST_FIRST(&sh->cmng.free_stat_raws);
8899 	if (!pool->raw_hw)
8900 		/* No free counter statistics raw memory. */
8901 		goto set_alarm;
8902 	/*
8903 	 * Identify the counters released between query trigger and query
8904 	 * handle more efficiently. The counter released in this gap period
8905 	 * should wait for a new round of query as the new arrived packets
8906 	 * will not be taken into account.
8907 	 */
8908 	pool->query_gen++;
8909 	ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
8910 					       MLX5_COUNTERS_PER_POOL,
8911 					       NULL, NULL,
8912 					       pool->raw_hw->mem_mng->wm.lkey,
8913 					       (void *)(uintptr_t)
8914 					       pool->raw_hw->data,
8915 					       sh->devx_comp,
8916 					       (uint64_t)(uintptr_t)pool);
8917 	if (ret) {
8918 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
8919 			" %d", pool->min_dcs->id);
8920 		pool->raw_hw = NULL;
8921 		goto set_alarm;
8922 	}
8923 	LIST_REMOVE(pool->raw_hw, next);
8924 	sh->cmng.pending_queries++;
8925 	pool_index++;
8926 	if (pool_index >= n_valid)
8927 		pool_index = 0;
8928 set_alarm:
8929 	sh->cmng.pool_index = pool_index;
8930 	mlx5_set_query_alarm(sh);
8931 }
8932 
8933 /**
8934  * Check and callback event for new aged flow in the counter pool
8935  *
8936  * @param[in] sh
8937  *   Pointer to mlx5_dev_ctx_shared object.
8938  * @param[in] pool
8939  *   Pointer to Current counter pool.
8940  */
8941 static void
8942 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
8943 		   struct mlx5_flow_counter_pool *pool)
8944 {
8945 	struct mlx5_priv *priv;
8946 	struct mlx5_flow_counter *cnt;
8947 	struct mlx5_age_info *age_info;
8948 	struct mlx5_age_param *age_param;
8949 	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
8950 	struct mlx5_counter_stats_raw *prev = pool->raw;
8951 	const uint64_t curr_time = MLX5_CURR_TIME_SEC;
8952 	const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
8953 	uint16_t expected = AGE_CANDIDATE;
8954 	uint32_t i;
8955 
8956 	pool->time_of_last_age_check = curr_time;
8957 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
8958 		cnt = MLX5_POOL_GET_CNT(pool, i);
8959 		age_param = MLX5_CNT_TO_AGE(cnt);
8960 		if (__atomic_load_n(&age_param->state,
8961 				    __ATOMIC_RELAXED) != AGE_CANDIDATE)
8962 			continue;
8963 		if (cur->data[i].hits != prev->data[i].hits) {
8964 			__atomic_store_n(&age_param->sec_since_last_hit, 0,
8965 					 __ATOMIC_RELAXED);
8966 			continue;
8967 		}
8968 		if (__atomic_add_fetch(&age_param->sec_since_last_hit,
8969 				       time_delta,
8970 				       __ATOMIC_RELAXED) <= age_param->timeout)
8971 			continue;
8972 		/**
8973 		 * Hold the lock first, or if between the
8974 		 * state AGE_TMOUT and tailq operation the
8975 		 * release happened, the release procedure
8976 		 * may delete a non-existent tailq node.
8977 		 */
8978 		priv = rte_eth_devices[age_param->port_id].data->dev_private;
8979 		age_info = GET_PORT_AGE_INFO(priv);
8980 		rte_spinlock_lock(&age_info->aged_sl);
8981 		if (__atomic_compare_exchange_n(&age_param->state, &expected,
8982 						AGE_TMOUT, false,
8983 						__ATOMIC_RELAXED,
8984 						__ATOMIC_RELAXED)) {
8985 			TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
8986 			MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
8987 		}
8988 		rte_spinlock_unlock(&age_info->aged_sl);
8989 	}
8990 	mlx5_age_event_prepare(sh);
8991 }
8992 
8993 /**
8994  * Handler for the HW respond about ready values from an asynchronous batch
8995  * query. This function is probably called by the host thread.
8996  *
8997  * @param[in] sh
8998  *   The pointer to the shared device context.
8999  * @param[in] async_id
9000  *   The Devx async ID.
9001  * @param[in] status
9002  *   The status of the completion.
9003  */
9004 void
9005 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
9006 				  uint64_t async_id, int status)
9007 {
9008 	struct mlx5_flow_counter_pool *pool =
9009 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
9010 	struct mlx5_counter_stats_raw *raw_to_free;
9011 	uint8_t query_gen = pool->query_gen ^ 1;
9012 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
9013 	enum mlx5_counter_type cnt_type =
9014 		pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
9015 				MLX5_COUNTER_TYPE_ORIGIN;
9016 
9017 	if (unlikely(status)) {
9018 		raw_to_free = pool->raw_hw;
9019 	} else {
9020 		raw_to_free = pool->raw;
9021 		if (pool->is_aged)
9022 			mlx5_flow_aging_check(sh, pool);
9023 		rte_spinlock_lock(&pool->sl);
9024 		pool->raw = pool->raw_hw;
9025 		rte_spinlock_unlock(&pool->sl);
9026 		/* Be sure the new raw counters data is updated in memory. */
9027 		rte_io_wmb();
9028 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
9029 			rte_spinlock_lock(&cmng->csl[cnt_type]);
9030 			TAILQ_CONCAT(&cmng->counters[cnt_type],
9031 				     &pool->counters[query_gen], next);
9032 			rte_spinlock_unlock(&cmng->csl[cnt_type]);
9033 		}
9034 	}
9035 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
9036 	pool->raw_hw = NULL;
9037 	sh->cmng.pending_queries--;
9038 }
9039 
9040 static int
9041 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
9042 		    const struct flow_grp_info *grp_info,
9043 		    struct rte_flow_error *error)
9044 {
9045 	if (grp_info->transfer && grp_info->external &&
9046 	    grp_info->fdb_def_rule) {
9047 		if (group == UINT32_MAX)
9048 			return rte_flow_error_set
9049 						(error, EINVAL,
9050 						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9051 						 NULL,
9052 						 "group index not supported");
9053 		*table = group + 1;
9054 	} else {
9055 		*table = group;
9056 	}
9057 	DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
9058 	return 0;
9059 }
9060 
9061 /**
9062  * Translate the rte_flow group index to HW table value.
9063  *
9064  * If tunnel offload is disabled, all group ids converted to flow table
9065  * id using the standard method.
9066  * If tunnel offload is enabled, group id can be converted using the
9067  * standard or tunnel conversion method. Group conversion method
9068  * selection depends on flags in `grp_info` parameter:
9069  * - Internal (grp_info.external == 0) groups conversion uses the
9070  *   standard method.
9071  * - Group ids in JUMP action converted with the tunnel conversion.
9072  * - Group id in rule attribute conversion depends on a rule type and
9073  *   group id value:
9074  *   ** non zero group attributes converted with the tunnel method
9075  *   ** zero group attribute in non-tunnel rule is converted using the
9076  *      standard method - there's only one root table
9077  *   ** zero group attribute in steer tunnel rule is converted with the
9078  *      standard method - single root table
9079  *   ** zero group attribute in match tunnel rule is a special OvS
9080  *      case: that value is used for portability reasons. That group
9081  *      id is converted with the tunnel conversion method.
9082  *
9083  * @param[in] dev
9084  *   Port device
9085  * @param[in] tunnel
9086  *   PMD tunnel offload object
9087  * @param[in] group
9088  *   rte_flow group index value.
9089  * @param[out] table
9090  *   HW table value.
9091  * @param[in] grp_info
9092  *   flags used for conversion
9093  * @param[out] error
9094  *   Pointer to error structure.
9095  *
9096  * @return
9097  *   0 on success, a negative errno value otherwise and rte_errno is set.
9098  */
9099 int
9100 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
9101 			 const struct mlx5_flow_tunnel *tunnel,
9102 			 uint32_t group, uint32_t *table,
9103 			 const struct flow_grp_info *grp_info,
9104 			 struct rte_flow_error *error)
9105 {
9106 	int ret;
9107 	bool standard_translation;
9108 
9109 	if (!grp_info->skip_scale && grp_info->external &&
9110 	    group < MLX5_MAX_TABLES_EXTERNAL)
9111 		group *= MLX5_FLOW_TABLE_FACTOR;
9112 	if (is_tunnel_offload_active(dev)) {
9113 		standard_translation = !grp_info->external ||
9114 					grp_info->std_tbl_fix;
9115 	} else {
9116 		standard_translation = true;
9117 	}
9118 	DRV_LOG(DEBUG,
9119 		"port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
9120 		dev->data->port_id, group, grp_info->transfer,
9121 		grp_info->external, grp_info->fdb_def_rule,
9122 		standard_translation ? "STANDARD" : "TUNNEL");
9123 	if (standard_translation)
9124 		ret = flow_group_to_table(dev->data->port_id, group, table,
9125 					  grp_info, error);
9126 	else
9127 		ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
9128 						      table, error);
9129 
9130 	return ret;
9131 }
9132 
9133 /**
9134  * Discover availability of metadata reg_c's.
9135  *
9136  * Iteratively use test flows to check availability.
9137  *
9138  * @param[in] dev
9139  *   Pointer to the Ethernet device structure.
9140  *
9141  * @return
9142  *   0 on success, a negative errno value otherwise and rte_errno is set.
9143  */
9144 int
9145 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
9146 {
9147 	struct mlx5_priv *priv = dev->data->dev_private;
9148 	enum modify_reg idx;
9149 	int n = 0;
9150 
9151 	/* reg_c[0] and reg_c[1] are reserved. */
9152 	priv->sh->flow_mreg_c[n++] = REG_C_0;
9153 	priv->sh->flow_mreg_c[n++] = REG_C_1;
9154 	/* Discover availability of other reg_c's. */
9155 	for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
9156 		struct rte_flow_attr attr = {
9157 			.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
9158 			.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
9159 			.ingress = 1,
9160 		};
9161 		struct rte_flow_item items[] = {
9162 			[0] = {
9163 				.type = RTE_FLOW_ITEM_TYPE_END,
9164 			},
9165 		};
9166 		struct rte_flow_action actions[] = {
9167 			[0] = {
9168 				.type = (enum rte_flow_action_type)
9169 					MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
9170 				.conf = &(struct mlx5_flow_action_copy_mreg){
9171 					.src = REG_C_1,
9172 					.dst = idx,
9173 				},
9174 			},
9175 			[1] = {
9176 				.type = RTE_FLOW_ACTION_TYPE_JUMP,
9177 				.conf = &(struct rte_flow_action_jump){
9178 					.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
9179 				},
9180 			},
9181 			[2] = {
9182 				.type = RTE_FLOW_ACTION_TYPE_END,
9183 			},
9184 		};
9185 		uint32_t flow_idx;
9186 		struct rte_flow *flow;
9187 		struct rte_flow_error error;
9188 
9189 		if (!priv->sh->config.dv_flow_en)
9190 			break;
9191 		/* Create internal flow, validation skips copy action. */
9192 		flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
9193 					items, actions, false, &error);
9194 		flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9195 				      flow_idx);
9196 		if (!flow)
9197 			continue;
9198 		priv->sh->flow_mreg_c[n++] = idx;
9199 		flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
9200 	}
9201 	for (; n < MLX5_MREG_C_NUM; ++n)
9202 		priv->sh->flow_mreg_c[n] = REG_NON;
9203 	priv->sh->metadata_regc_check_flag = 1;
9204 	return 0;
9205 }
9206 
9207 int
9208 save_dump_file(const uint8_t *data, uint32_t size,
9209 	uint32_t type, uint64_t id, void *arg, FILE *file)
9210 {
9211 	char line[BUF_SIZE];
9212 	uint32_t out = 0;
9213 	uint32_t k;
9214 	uint32_t actions_num;
9215 	struct rte_flow_query_count *count;
9216 
9217 	memset(line, 0, BUF_SIZE);
9218 	switch (type) {
9219 	case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
9220 		actions_num = *(uint32_t *)(arg);
9221 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
9222 				type, id, actions_num);
9223 		break;
9224 	case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
9225 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
9226 				type, id);
9227 		break;
9228 	case DR_DUMP_REC_TYPE_PMD_COUNTER:
9229 		count = (struct rte_flow_query_count *)arg;
9230 		fprintf(file,
9231 			"%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
9232 			type, id, count->hits, count->bytes);
9233 		return 0;
9234 	default:
9235 		return -1;
9236 	}
9237 
9238 	for (k = 0; k < size; k++) {
9239 		/* Make sure we do not overrun the line buffer length. */
9240 		if (out >= BUF_SIZE - 4) {
9241 			line[out] = '\0';
9242 			break;
9243 		}
9244 		out += snprintf(line + out, BUF_SIZE - out, "%02x",
9245 				(data[k]) & 0xff);
9246 	}
9247 	fprintf(file, "%s\n", line);
9248 	return 0;
9249 }
9250 
9251 int
9252 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
9253 	struct rte_flow_query_count *count, struct rte_flow_error *error)
9254 {
9255 	struct rte_flow_action action[2];
9256 	enum mlx5_flow_drv_type ftype;
9257 	const struct mlx5_flow_driver_ops *fops;
9258 
9259 	if (!flow) {
9260 		return rte_flow_error_set(error, ENOENT,
9261 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9262 				NULL,
9263 				"invalid flow handle");
9264 	}
9265 	action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
9266 	action[1].type = RTE_FLOW_ACTION_TYPE_END;
9267 	if (flow->counter) {
9268 		memset(count, 0, sizeof(struct rte_flow_query_count));
9269 		ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
9270 		MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
9271 						ftype < MLX5_FLOW_TYPE_MAX);
9272 		fops = flow_get_drv_ops(ftype);
9273 		return fops->query(dev, flow, action, count, error);
9274 	}
9275 	return -1;
9276 }
9277 
9278 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9279 /**
9280  * Dump flow ipool data to file
9281  *
9282  * @param[in] dev
9283  *   The pointer to Ethernet device.
9284  * @param[in] file
9285  *   A pointer to a file for output.
9286  * @param[out] error
9287  *   Perform verbose error reporting if not NULL. PMDs initialize this
9288  *   structure in case of error only.
9289  * @return
9290  *   0 on success, a negative value otherwise.
9291  */
9292 int
9293 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
9294 	struct rte_flow *flow, FILE *file,
9295 	struct rte_flow_error *error)
9296 {
9297 	struct mlx5_priv *priv = dev->data->dev_private;
9298 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9299 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9300 	uint32_t handle_idx;
9301 	struct mlx5_flow_handle *dh;
9302 	struct rte_flow_query_count count;
9303 	uint32_t actions_num;
9304 	const uint8_t *data;
9305 	size_t size;
9306 	uint64_t id;
9307 	uint32_t type;
9308 	void *action = NULL;
9309 
9310 	if (!flow) {
9311 		return rte_flow_error_set(error, ENOENT,
9312 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9313 				NULL,
9314 				"invalid flow handle");
9315 	}
9316 	handle_idx = flow->dev_handles;
9317 	/* query counter */
9318 	if (flow->counter &&
9319 	(!mlx5_counter_query(dev, flow->counter, false,
9320 	&count.hits, &count.bytes, &action)) && action) {
9321 		id = (uint64_t)(uintptr_t)action;
9322 		type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9323 		save_dump_file(NULL, 0, type,
9324 			id, (void *)&count, file);
9325 	}
9326 
9327 	while (handle_idx) {
9328 		dh = mlx5_ipool_get(priv->sh->ipool
9329 				[MLX5_IPOOL_MLX5_FLOW], handle_idx);
9330 		if (!dh)
9331 			continue;
9332 		handle_idx = dh->next.next;
9333 
9334 		/* Get modify_hdr and encap_decap buf from ipools. */
9335 		encap_decap = NULL;
9336 		modify_hdr = dh->dvh.modify_hdr;
9337 
9338 		if (dh->dvh.rix_encap_decap) {
9339 			encap_decap = mlx5_ipool_get(priv->sh->ipool
9340 						[MLX5_IPOOL_DECAP_ENCAP],
9341 						dh->dvh.rix_encap_decap);
9342 		}
9343 		if (modify_hdr) {
9344 			data = (const uint8_t *)modify_hdr->actions;
9345 			size = (size_t)(modify_hdr->actions_num) * 8;
9346 			id = (uint64_t)(uintptr_t)modify_hdr->action;
9347 			actions_num = modify_hdr->actions_num;
9348 			type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9349 			save_dump_file(data, size, type, id,
9350 						(void *)(&actions_num), file);
9351 		}
9352 		if (encap_decap) {
9353 			data = encap_decap->buf;
9354 			size = encap_decap->size;
9355 			id = (uint64_t)(uintptr_t)encap_decap->action;
9356 			type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9357 			save_dump_file(data, size, type,
9358 						id, NULL, file);
9359 		}
9360 	}
9361 	return 0;
9362 }
9363 
9364 /**
9365  * Dump all flow's encap_decap/modify_hdr/counter data to file
9366  *
9367  * @param[in] dev
9368  *   The pointer to Ethernet device.
9369  * @param[in] file
9370  *   A pointer to a file for output.
9371  * @param[out] error
9372  *   Perform verbose error reporting if not NULL. PMDs initialize this
9373  *   structure in case of error only.
9374  * @return
9375  *   0 on success, a negative value otherwise.
9376  */
9377 static int
9378 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
9379 	FILE *file, struct rte_flow_error *error __rte_unused)
9380 {
9381 	struct mlx5_priv *priv = dev->data->dev_private;
9382 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9383 	struct mlx5_hlist *h;
9384 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9385 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9386 	struct rte_flow_query_count count;
9387 	uint32_t actions_num;
9388 	const uint8_t *data;
9389 	size_t size;
9390 	uint64_t id;
9391 	uint32_t type;
9392 	uint32_t i;
9393 	uint32_t j;
9394 	struct mlx5_list_inconst *l_inconst;
9395 	struct mlx5_list_entry *e;
9396 	int lcore_index;
9397 	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
9398 	uint32_t max;
9399 	void *action;
9400 
9401 	/* encap_decap hlist is lcore_share, get global core cache. */
9402 	i = MLX5_LIST_GLOBAL;
9403 	h = sh->encaps_decaps;
9404 	if (h) {
9405 		for (j = 0; j <= h->mask; j++) {
9406 			l_inconst = &h->buckets[j].l;
9407 			if (!l_inconst || !l_inconst->cache[i])
9408 				continue;
9409 
9410 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9411 			while (e) {
9412 				encap_decap =
9413 				(struct mlx5_flow_dv_encap_decap_resource *)e;
9414 				data = encap_decap->buf;
9415 				size = encap_decap->size;
9416 				id = (uint64_t)(uintptr_t)encap_decap->action;
9417 				type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9418 				save_dump_file(data, size, type,
9419 					id, NULL, file);
9420 				e = LIST_NEXT(e, next);
9421 			}
9422 		}
9423 	}
9424 
9425 	/* get modify_hdr */
9426 	h = sh->modify_cmds;
9427 	if (h) {
9428 		lcore_index = rte_lcore_index(rte_lcore_id());
9429 		if (unlikely(lcore_index == -1)) {
9430 			lcore_index = MLX5_LIST_NLCORE;
9431 			rte_spinlock_lock(&h->l_const.lcore_lock);
9432 		}
9433 		i = lcore_index;
9434 
9435 		for (j = 0; j <= h->mask; j++) {
9436 			l_inconst = &h->buckets[j].l;
9437 			if (!l_inconst || !l_inconst->cache[i])
9438 				continue;
9439 
9440 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9441 			while (e) {
9442 				modify_hdr =
9443 				(struct mlx5_flow_dv_modify_hdr_resource *)e;
9444 				data = (const uint8_t *)modify_hdr->actions;
9445 				size = (size_t)(modify_hdr->actions_num) * 8;
9446 				actions_num = modify_hdr->actions_num;
9447 				id = (uint64_t)(uintptr_t)modify_hdr->action;
9448 				type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9449 				save_dump_file(data, size, type, id,
9450 						(void *)(&actions_num), file);
9451 				e = LIST_NEXT(e, next);
9452 			}
9453 		}
9454 
9455 		if (unlikely(lcore_index == MLX5_LIST_NLCORE))
9456 			rte_spinlock_unlock(&h->l_const.lcore_lock);
9457 	}
9458 
9459 	/* get counter */
9460 	MLX5_ASSERT(cmng->n_valid <= cmng->n);
9461 	max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
9462 	for (j = 1; j <= max; j++) {
9463 		action = NULL;
9464 		if ((!mlx5_counter_query(dev, j, false, &count.hits,
9465 		&count.bytes, &action)) && action) {
9466 			id = (uint64_t)(uintptr_t)action;
9467 			type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9468 			save_dump_file(NULL, 0, type,
9469 					id, (void *)&count, file);
9470 		}
9471 	}
9472 	return 0;
9473 }
9474 #endif
9475 
9476 /**
9477  * Dump flow raw hw data to file
9478  *
9479  * @param[in] dev
9480  *    The pointer to Ethernet device.
9481  * @param[in] file
9482  *   A pointer to a file for output.
9483  * @param[out] error
9484  *   Perform verbose error reporting if not NULL. PMDs initialize this
9485  *   structure in case of error only.
9486  * @return
9487  *   0 on success, a negative value otherwise.
9488  */
9489 int
9490 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
9491 		   FILE *file,
9492 		   struct rte_flow_error *error __rte_unused)
9493 {
9494 	struct mlx5_priv *priv = dev->data->dev_private;
9495 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9496 	uint32_t handle_idx;
9497 	int ret;
9498 	struct mlx5_flow_handle *dh;
9499 	struct rte_flow *flow;
9500 
9501 	if (!sh->config.dv_flow_en) {
9502 		if (fputs("device dv flow disabled\n", file) <= 0)
9503 			return -errno;
9504 		return -ENOTSUP;
9505 	}
9506 
9507 	/* dump all */
9508 	if (!flow_idx) {
9509 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9510 		if (mlx5_flow_dev_dump_sh_all(dev, file, error))
9511 			return -EINVAL;
9512 #endif
9513 		return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
9514 					sh->rx_domain,
9515 					sh->tx_domain, file);
9516 	}
9517 	/* dump one */
9518 	flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9519 			(uintptr_t)(void *)flow_idx);
9520 	if (!flow)
9521 		return -EINVAL;
9522 
9523 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9524 	mlx5_flow_dev_dump_ipool(dev, flow, file, error);
9525 #endif
9526 	handle_idx = flow->dev_handles;
9527 	while (handle_idx) {
9528 		dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
9529 				handle_idx);
9530 		if (!dh)
9531 			return -ENOENT;
9532 		if (dh->drv_flow) {
9533 			ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
9534 					file);
9535 			if (ret)
9536 				return -ENOENT;
9537 		}
9538 		handle_idx = dh->next.next;
9539 	}
9540 	return 0;
9541 }
9542 
9543 /**
9544  * Get aged-out flows.
9545  *
9546  * @param[in] dev
9547  *   Pointer to the Ethernet device structure.
9548  * @param[in] context
9549  *   The address of an array of pointers to the aged-out flows contexts.
9550  * @param[in] nb_countexts
9551  *   The length of context array pointers.
9552  * @param[out] error
9553  *   Perform verbose error reporting if not NULL. Initialized in case of
9554  *   error only.
9555  *
9556  * @return
9557  *   how many contexts get in success, otherwise negative errno value.
9558  *   if nb_contexts is 0, return the amount of all aged contexts.
9559  *   if nb_contexts is not 0 , return the amount of aged flows reported
9560  *   in the context array.
9561  */
9562 int
9563 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
9564 			uint32_t nb_contexts, struct rte_flow_error *error)
9565 {
9566 	const struct mlx5_flow_driver_ops *fops;
9567 	struct rte_flow_attr attr = { .transfer = 0 };
9568 
9569 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
9570 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
9571 		return fops->get_aged_flows(dev, contexts, nb_contexts,
9572 						    error);
9573 	}
9574 	DRV_LOG(ERR,
9575 		"port %u get aged flows is not supported.",
9576 		 dev->data->port_id);
9577 	return -ENOTSUP;
9578 }
9579 
9580 /* Wrapper for driver action_validate op callback */
9581 static int
9582 flow_drv_action_validate(struct rte_eth_dev *dev,
9583 			 const struct rte_flow_indir_action_conf *conf,
9584 			 const struct rte_flow_action *action,
9585 			 const struct mlx5_flow_driver_ops *fops,
9586 			 struct rte_flow_error *error)
9587 {
9588 	static const char err_msg[] = "indirect action validation unsupported";
9589 
9590 	if (!fops->action_validate) {
9591 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9592 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9593 				   NULL, err_msg);
9594 		return -rte_errno;
9595 	}
9596 	return fops->action_validate(dev, conf, action, error);
9597 }
9598 
9599 /**
9600  * Destroys the shared action by handle.
9601  *
9602  * @param dev
9603  *   Pointer to Ethernet device structure.
9604  * @param[in] handle
9605  *   Handle for the indirect action object to be destroyed.
9606  * @param[out] error
9607  *   Perform verbose error reporting if not NULL. PMDs initialize this
9608  *   structure in case of error only.
9609  *
9610  * @return
9611  *   0 on success, a negative errno value otherwise and rte_errno is set.
9612  *
9613  * @note: wrapper for driver action_create op callback.
9614  */
9615 static int
9616 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
9617 			   struct rte_flow_action_handle *handle,
9618 			   struct rte_flow_error *error)
9619 {
9620 	static const char err_msg[] = "indirect action destruction unsupported";
9621 	struct rte_flow_attr attr = { .transfer = 0 };
9622 	const struct mlx5_flow_driver_ops *fops =
9623 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9624 
9625 	if (!fops->action_destroy) {
9626 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9627 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9628 				   NULL, err_msg);
9629 		return -rte_errno;
9630 	}
9631 	return fops->action_destroy(dev, handle, error);
9632 }
9633 
9634 /* Wrapper for driver action_destroy op callback */
9635 static int
9636 flow_drv_action_update(struct rte_eth_dev *dev,
9637 		       struct rte_flow_action_handle *handle,
9638 		       const void *update,
9639 		       const struct mlx5_flow_driver_ops *fops,
9640 		       struct rte_flow_error *error)
9641 {
9642 	static const char err_msg[] = "indirect action update unsupported";
9643 
9644 	if (!fops->action_update) {
9645 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9646 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9647 				   NULL, err_msg);
9648 		return -rte_errno;
9649 	}
9650 	return fops->action_update(dev, handle, update, error);
9651 }
9652 
9653 /* Wrapper for driver action_destroy op callback */
9654 static int
9655 flow_drv_action_query(struct rte_eth_dev *dev,
9656 		      const struct rte_flow_action_handle *handle,
9657 		      void *data,
9658 		      const struct mlx5_flow_driver_ops *fops,
9659 		      struct rte_flow_error *error)
9660 {
9661 	static const char err_msg[] = "indirect action query unsupported";
9662 
9663 	if (!fops->action_query) {
9664 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9665 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9666 				   NULL, err_msg);
9667 		return -rte_errno;
9668 	}
9669 	return fops->action_query(dev, handle, data, error);
9670 }
9671 
9672 /**
9673  * Create indirect action for reuse in multiple flow rules.
9674  *
9675  * @param dev
9676  *   Pointer to Ethernet device structure.
9677  * @param conf
9678  *   Pointer to indirect action object configuration.
9679  * @param[in] action
9680  *   Action configuration for indirect action object creation.
9681  * @param[out] error
9682  *   Perform verbose error reporting if not NULL. PMDs initialize this
9683  *   structure in case of error only.
9684  * @return
9685  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
9686  */
9687 static struct rte_flow_action_handle *
9688 mlx5_action_handle_create(struct rte_eth_dev *dev,
9689 			  const struct rte_flow_indir_action_conf *conf,
9690 			  const struct rte_flow_action *action,
9691 			  struct rte_flow_error *error)
9692 {
9693 	static const char err_msg[] = "indirect action creation unsupported";
9694 	struct rte_flow_attr attr = { .transfer = 0 };
9695 	const struct mlx5_flow_driver_ops *fops =
9696 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9697 
9698 	if (flow_drv_action_validate(dev, conf, action, fops, error))
9699 		return NULL;
9700 	if (!fops->action_create) {
9701 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9702 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9703 				   NULL, err_msg);
9704 		return NULL;
9705 	}
9706 	return fops->action_create(dev, conf, action, error);
9707 }
9708 
9709 /**
9710  * Updates inplace the indirect action configuration pointed by *handle*
9711  * with the configuration provided as *update* argument.
9712  * The update of the indirect action configuration effects all flow rules
9713  * reusing the action via handle.
9714  *
9715  * @param dev
9716  *   Pointer to Ethernet device structure.
9717  * @param[in] handle
9718  *   Handle for the indirect action to be updated.
9719  * @param[in] update
9720  *   Action specification used to modify the action pointed by handle.
9721  *   *update* could be of same type with the action pointed by the *handle*
9722  *   handle argument, or some other structures like a wrapper, depending on
9723  *   the indirect action type.
9724  * @param[out] error
9725  *   Perform verbose error reporting if not NULL. PMDs initialize this
9726  *   structure in case of error only.
9727  *
9728  * @return
9729  *   0 on success, a negative errno value otherwise and rte_errno is set.
9730  */
9731 static int
9732 mlx5_action_handle_update(struct rte_eth_dev *dev,
9733 		struct rte_flow_action_handle *handle,
9734 		const void *update,
9735 		struct rte_flow_error *error)
9736 {
9737 	struct rte_flow_attr attr = { .transfer = 0 };
9738 	const struct mlx5_flow_driver_ops *fops =
9739 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9740 	int ret;
9741 
9742 	ret = flow_drv_action_validate(dev, NULL,
9743 			(const struct rte_flow_action *)update, fops, error);
9744 	if (ret)
9745 		return ret;
9746 	return flow_drv_action_update(dev, handle, update, fops,
9747 				      error);
9748 }
9749 
9750 /**
9751  * Query the indirect action by handle.
9752  *
9753  * This function allows retrieving action-specific data such as counters.
9754  * Data is gathered by special action which may be present/referenced in
9755  * more than one flow rule definition.
9756  *
9757  * see @RTE_FLOW_ACTION_TYPE_COUNT
9758  *
9759  * @param dev
9760  *   Pointer to Ethernet device structure.
9761  * @param[in] handle
9762  *   Handle for the indirect action to query.
9763  * @param[in, out] data
9764  *   Pointer to storage for the associated query data type.
9765  * @param[out] error
9766  *   Perform verbose error reporting if not NULL. PMDs initialize this
9767  *   structure in case of error only.
9768  *
9769  * @return
9770  *   0 on success, a negative errno value otherwise and rte_errno is set.
9771  */
9772 static int
9773 mlx5_action_handle_query(struct rte_eth_dev *dev,
9774 			 const struct rte_flow_action_handle *handle,
9775 			 void *data,
9776 			 struct rte_flow_error *error)
9777 {
9778 	struct rte_flow_attr attr = { .transfer = 0 };
9779 	const struct mlx5_flow_driver_ops *fops =
9780 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9781 
9782 	return flow_drv_action_query(dev, handle, data, fops, error);
9783 }
9784 
9785 /**
9786  * Destroy all indirect actions (shared RSS).
9787  *
9788  * @param dev
9789  *   Pointer to Ethernet device.
9790  *
9791  * @return
9792  *   0 on success, a negative errno value otherwise and rte_errno is set.
9793  */
9794 int
9795 mlx5_action_handle_flush(struct rte_eth_dev *dev)
9796 {
9797 	struct rte_flow_error error;
9798 	struct mlx5_priv *priv = dev->data->dev_private;
9799 	struct mlx5_shared_action_rss *shared_rss;
9800 	int ret = 0;
9801 	uint32_t idx;
9802 
9803 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
9804 		      priv->rss_shared_actions, idx, shared_rss, next) {
9805 		ret |= mlx5_action_handle_destroy(dev,
9806 		       (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
9807 	}
9808 	return ret;
9809 }
9810 
9811 /**
9812  * Validate existing indirect actions against current device configuration
9813  * and attach them to device resources.
9814  *
9815  * @param dev
9816  *   Pointer to Ethernet device.
9817  *
9818  * @return
9819  *   0 on success, a negative errno value otherwise and rte_errno is set.
9820  */
9821 int
9822 mlx5_action_handle_attach(struct rte_eth_dev *dev)
9823 {
9824 	struct mlx5_priv *priv = dev->data->dev_private;
9825 	int ret = 0;
9826 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9827 
9828 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9829 		const char *message;
9830 		uint32_t queue_idx;
9831 
9832 		ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
9833 					       ind_tbl->queues_n,
9834 					       &message, &queue_idx);
9835 		if (ret != 0) {
9836 			DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
9837 				dev->data->port_id, ind_tbl->queues[queue_idx],
9838 				message);
9839 			break;
9840 		}
9841 	}
9842 	if (ret != 0)
9843 		return ret;
9844 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9845 		ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
9846 		if (ret != 0) {
9847 			DRV_LOG(ERR, "Port %u could not attach "
9848 				"indirection table obj %p",
9849 				dev->data->port_id, (void *)ind_tbl);
9850 			goto error;
9851 		}
9852 	}
9853 
9854 	return 0;
9855 error:
9856 	ind_tbl_last = ind_tbl;
9857 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9858 		if (ind_tbl == ind_tbl_last)
9859 			break;
9860 		if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
9861 			DRV_LOG(CRIT, "Port %u could not detach "
9862 				"indirection table obj %p on rollback",
9863 				dev->data->port_id, (void *)ind_tbl);
9864 	}
9865 	return ret;
9866 }
9867 
9868 /**
9869  * Detach indirect actions of the device from its resources.
9870  *
9871  * @param dev
9872  *   Pointer to Ethernet device.
9873  *
9874  * @return
9875  *   0 on success, a negative errno value otherwise and rte_errno is set.
9876  */
9877 int
9878 mlx5_action_handle_detach(struct rte_eth_dev *dev)
9879 {
9880 	struct mlx5_priv *priv = dev->data->dev_private;
9881 	int ret = 0;
9882 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9883 
9884 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9885 		ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
9886 		if (ret != 0) {
9887 			DRV_LOG(ERR, "Port %u could not detach "
9888 				"indirection table obj %p",
9889 				dev->data->port_id, (void *)ind_tbl);
9890 			goto error;
9891 		}
9892 	}
9893 	return 0;
9894 error:
9895 	ind_tbl_last = ind_tbl;
9896 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9897 		if (ind_tbl == ind_tbl_last)
9898 			break;
9899 		if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
9900 			DRV_LOG(CRIT, "Port %u could not attach "
9901 				"indirection table obj %p on rollback",
9902 				dev->data->port_id, (void *)ind_tbl);
9903 	}
9904 	return ret;
9905 }
9906 
9907 #ifndef HAVE_MLX5DV_DR
9908 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
9909 #else
9910 #define MLX5_DOMAIN_SYNC_FLOW \
9911 	(MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
9912 #endif
9913 
9914 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
9915 {
9916 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
9917 	const struct mlx5_flow_driver_ops *fops;
9918 	int ret;
9919 	struct rte_flow_attr attr = { .transfer = 0 };
9920 
9921 	fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9922 	ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
9923 	if (ret > 0)
9924 		ret = -ret;
9925 	return ret;
9926 }
9927 
9928 const struct mlx5_flow_tunnel *
9929 mlx5_get_tof(const struct rte_flow_item *item,
9930 	     const struct rte_flow_action *action,
9931 	     enum mlx5_tof_rule_type *rule_type)
9932 {
9933 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
9934 		if (item->type == (typeof(item->type))
9935 				  MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
9936 			*rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
9937 			return flow_items_to_tunnel(item);
9938 		}
9939 	}
9940 	for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
9941 		if (action->type == (typeof(action->type))
9942 				    MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
9943 			*rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
9944 			return flow_actions_to_tunnel(action);
9945 		}
9946 	}
9947 	return NULL;
9948 }
9949 
9950 /**
9951  * tunnel offload functionality is defined for DV environment only
9952  */
9953 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9954 __extension__
9955 union tunnel_offload_mark {
9956 	uint32_t val;
9957 	struct {
9958 		uint32_t app_reserve:8;
9959 		uint32_t table_id:15;
9960 		uint32_t transfer:1;
9961 		uint32_t _unused_:8;
9962 	};
9963 };
9964 
9965 static bool
9966 mlx5_access_tunnel_offload_db
9967 	(struct rte_eth_dev *dev,
9968 	 bool (*match)(struct rte_eth_dev *,
9969 		       struct mlx5_flow_tunnel *, const void *),
9970 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
9971 	 void (*miss)(struct rte_eth_dev *, void *),
9972 	 void *ctx, bool lock_op);
9973 
9974 static int
9975 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
9976 			     struct rte_flow *flow,
9977 			     const struct rte_flow_attr *attr,
9978 			     const struct rte_flow_action *app_actions,
9979 			     uint32_t flow_idx,
9980 			     const struct mlx5_flow_tunnel *tunnel,
9981 			     struct tunnel_default_miss_ctx *ctx,
9982 			     struct rte_flow_error *error)
9983 {
9984 	struct mlx5_priv *priv = dev->data->dev_private;
9985 	struct mlx5_flow *dev_flow;
9986 	struct rte_flow_attr miss_attr = *attr;
9987 	const struct rte_flow_item miss_items[2] = {
9988 		{
9989 			.type = RTE_FLOW_ITEM_TYPE_ETH,
9990 			.spec = NULL,
9991 			.last = NULL,
9992 			.mask = NULL
9993 		},
9994 		{
9995 			.type = RTE_FLOW_ITEM_TYPE_END,
9996 			.spec = NULL,
9997 			.last = NULL,
9998 			.mask = NULL
9999 		}
10000 	};
10001 	union tunnel_offload_mark mark_id;
10002 	struct rte_flow_action_mark miss_mark;
10003 	struct rte_flow_action miss_actions[3] = {
10004 		[0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
10005 		[2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
10006 	};
10007 	const struct rte_flow_action_jump *jump_data;
10008 	uint32_t i, flow_table = 0; /* prevent compilation warning */
10009 	struct flow_grp_info grp_info = {
10010 		.external = 1,
10011 		.transfer = attr->transfer,
10012 		.fdb_def_rule = !!priv->fdb_def_rule,
10013 		.std_tbl_fix = 0,
10014 	};
10015 	int ret;
10016 
10017 	if (!attr->transfer) {
10018 		uint32_t q_size;
10019 
10020 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
10021 		q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
10022 		ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
10023 					 0, SOCKET_ID_ANY);
10024 		if (!ctx->queue)
10025 			return rte_flow_error_set
10026 				(error, ENOMEM,
10027 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10028 				NULL, "invalid default miss RSS");
10029 		ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
10030 		ctx->action_rss.level = 0,
10031 		ctx->action_rss.types = priv->rss_conf.rss_hf,
10032 		ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
10033 		ctx->action_rss.queue_num = priv->reta_idx_n,
10034 		ctx->action_rss.key = priv->rss_conf.rss_key,
10035 		ctx->action_rss.queue = ctx->queue;
10036 		if (!priv->reta_idx_n || !priv->rxqs_n)
10037 			return rte_flow_error_set
10038 				(error, EINVAL,
10039 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10040 				NULL, "invalid port configuration");
10041 		if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
10042 			ctx->action_rss.types = 0;
10043 		for (i = 0; i != priv->reta_idx_n; ++i)
10044 			ctx->queue[i] = (*priv->reta_idx)[i];
10045 	} else {
10046 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
10047 		ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
10048 	}
10049 	miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
10050 	for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
10051 	jump_data = app_actions->conf;
10052 	miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
10053 	miss_attr.group = jump_data->group;
10054 	ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
10055 				       &flow_table, &grp_info, error);
10056 	if (ret)
10057 		return rte_flow_error_set(error, EINVAL,
10058 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10059 					  NULL, "invalid tunnel id");
10060 	mark_id.app_reserve = 0;
10061 	mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
10062 	mark_id.transfer = !!attr->transfer;
10063 	mark_id._unused_ = 0;
10064 	miss_mark.id = mark_id.val;
10065 	dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
10066 				    miss_items, miss_actions, flow_idx, error);
10067 	if (!dev_flow)
10068 		return -rte_errno;
10069 	dev_flow->flow = flow;
10070 	dev_flow->external = true;
10071 	dev_flow->tunnel = tunnel;
10072 	dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
10073 	/* Subflow object was created, we must include one in the list. */
10074 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
10075 		      dev_flow->handle, next);
10076 	DRV_LOG(DEBUG,
10077 		"port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
10078 		dev->data->port_id, tunnel->app_tunnel.type,
10079 		tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
10080 	ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
10081 				  miss_actions, error);
10082 	if (!ret)
10083 		ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
10084 						  error);
10085 
10086 	return ret;
10087 }
10088 
10089 static const struct mlx5_flow_tbl_data_entry  *
10090 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
10091 {
10092 	struct mlx5_priv *priv = dev->data->dev_private;
10093 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10094 	struct mlx5_list_entry *he;
10095 	union tunnel_offload_mark mbits = { .val = mark };
10096 	union mlx5_flow_tbl_key table_key = {
10097 		{
10098 			.level = tunnel_id_to_flow_tbl(mbits.table_id),
10099 			.id = 0,
10100 			.reserved = 0,
10101 			.dummy = 0,
10102 			.is_fdb = !!mbits.transfer,
10103 			.is_egress = 0,
10104 		}
10105 	};
10106 	struct mlx5_flow_cb_ctx ctx = {
10107 		.data = &table_key.v64,
10108 	};
10109 
10110 	he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
10111 	return he ?
10112 	       container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
10113 }
10114 
10115 static void
10116 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
10117 				   struct mlx5_list_entry *entry)
10118 {
10119 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10120 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10121 
10122 	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10123 			tunnel_flow_tbl_to_id(tte->flow_table));
10124 	mlx5_free(tte);
10125 }
10126 
10127 static int
10128 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
10129 				  struct mlx5_list_entry *entry, void *cb_ctx)
10130 {
10131 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10132 	union tunnel_tbl_key tbl = {
10133 		.val = *(uint64_t *)(ctx->data),
10134 	};
10135 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10136 
10137 	return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
10138 }
10139 
10140 static struct mlx5_list_entry *
10141 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
10142 {
10143 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10144 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10145 	struct tunnel_tbl_entry *tte;
10146 	union tunnel_tbl_key tbl = {
10147 		.val = *(uint64_t *)(ctx->data),
10148 	};
10149 
10150 	tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
10151 			  sizeof(*tte), 0,
10152 			  SOCKET_ID_ANY);
10153 	if (!tte)
10154 		goto err;
10155 	mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10156 			  &tte->flow_table);
10157 	if (tte->flow_table >= MLX5_MAX_TABLES) {
10158 		DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
10159 			tte->flow_table);
10160 		mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10161 				tte->flow_table);
10162 		goto err;
10163 	} else if (!tte->flow_table) {
10164 		goto err;
10165 	}
10166 	tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
10167 	tte->tunnel_id = tbl.tunnel_id;
10168 	tte->group = tbl.group;
10169 	return &tte->hash;
10170 err:
10171 	if (tte)
10172 		mlx5_free(tte);
10173 	return NULL;
10174 }
10175 
10176 static struct mlx5_list_entry *
10177 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
10178 				  struct mlx5_list_entry *oentry,
10179 				  void *cb_ctx __rte_unused)
10180 {
10181 	struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
10182 						   0, SOCKET_ID_ANY);
10183 
10184 	if (!tte)
10185 		return NULL;
10186 	memcpy(tte, oentry, sizeof(*tte));
10187 	return &tte->hash;
10188 }
10189 
10190 static void
10191 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
10192 				       struct mlx5_list_entry *entry)
10193 {
10194 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10195 
10196 	mlx5_free(tte);
10197 }
10198 
10199 static uint32_t
10200 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
10201 				const struct mlx5_flow_tunnel *tunnel,
10202 				uint32_t group, uint32_t *table,
10203 				struct rte_flow_error *error)
10204 {
10205 	struct mlx5_list_entry *he;
10206 	struct tunnel_tbl_entry *tte;
10207 	union tunnel_tbl_key key = {
10208 		.tunnel_id = tunnel ? tunnel->tunnel_id : 0,
10209 		.group = group
10210 	};
10211 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10212 	struct mlx5_hlist *group_hash;
10213 	struct mlx5_flow_cb_ctx ctx = {
10214 		.data = &key.val,
10215 	};
10216 
10217 	group_hash = tunnel ? tunnel->groups : thub->groups;
10218 	he = mlx5_hlist_register(group_hash, key.val, &ctx);
10219 	if (!he)
10220 		return rte_flow_error_set(error, EINVAL,
10221 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
10222 					  NULL,
10223 					  "tunnel group index not supported");
10224 	tte = container_of(he, typeof(*tte), hash);
10225 	*table = tte->flow_table;
10226 	DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
10227 		dev->data->port_id, key.tunnel_id, group, *table);
10228 	return 0;
10229 }
10230 
10231 static void
10232 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
10233 		      struct mlx5_flow_tunnel *tunnel)
10234 {
10235 	struct mlx5_priv *priv = dev->data->dev_private;
10236 	struct mlx5_indexed_pool *ipool;
10237 
10238 	DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
10239 		dev->data->port_id, tunnel->tunnel_id);
10240 	LIST_REMOVE(tunnel, chain);
10241 	mlx5_hlist_destroy(tunnel->groups);
10242 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10243 	mlx5_ipool_free(ipool, tunnel->tunnel_id);
10244 }
10245 
10246 static bool
10247 mlx5_access_tunnel_offload_db
10248 	(struct rte_eth_dev *dev,
10249 	 bool (*match)(struct rte_eth_dev *,
10250 		       struct mlx5_flow_tunnel *, const void *),
10251 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
10252 	 void (*miss)(struct rte_eth_dev *, void *),
10253 	 void *ctx, bool lock_op)
10254 {
10255 	bool verdict = false;
10256 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10257 	struct mlx5_flow_tunnel *tunnel;
10258 
10259 	rte_spinlock_lock(&thub->sl);
10260 	LIST_FOREACH(tunnel, &thub->tunnels, chain) {
10261 		verdict = match(dev, tunnel, (const void *)ctx);
10262 		if (verdict)
10263 			break;
10264 	}
10265 	if (!lock_op)
10266 		rte_spinlock_unlock(&thub->sl);
10267 	if (verdict && hit)
10268 		hit(dev, tunnel, ctx);
10269 	if (!verdict && miss)
10270 		miss(dev, ctx);
10271 	if (lock_op)
10272 		rte_spinlock_unlock(&thub->sl);
10273 
10274 	return verdict;
10275 }
10276 
10277 struct tunnel_db_find_tunnel_id_ctx {
10278 	uint32_t tunnel_id;
10279 	struct mlx5_flow_tunnel *tunnel;
10280 };
10281 
10282 static bool
10283 find_tunnel_id_match(struct rte_eth_dev *dev,
10284 		     struct mlx5_flow_tunnel *tunnel, const void *x)
10285 {
10286 	const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10287 
10288 	RTE_SET_USED(dev);
10289 	return tunnel->tunnel_id == ctx->tunnel_id;
10290 }
10291 
10292 static void
10293 find_tunnel_id_hit(struct rte_eth_dev *dev,
10294 		   struct mlx5_flow_tunnel *tunnel, void *x)
10295 {
10296 	struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10297 	RTE_SET_USED(dev);
10298 	ctx->tunnel = tunnel;
10299 }
10300 
10301 static struct mlx5_flow_tunnel *
10302 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
10303 {
10304 	struct tunnel_db_find_tunnel_id_ctx ctx = {
10305 		.tunnel_id = id,
10306 	};
10307 
10308 	mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
10309 				      find_tunnel_id_hit, NULL, &ctx, true);
10310 
10311 	return ctx.tunnel;
10312 }
10313 
10314 static struct mlx5_flow_tunnel *
10315 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
10316 			  const struct rte_flow_tunnel *app_tunnel)
10317 {
10318 	struct mlx5_priv *priv = dev->data->dev_private;
10319 	struct mlx5_indexed_pool *ipool;
10320 	struct mlx5_flow_tunnel *tunnel;
10321 	uint32_t id;
10322 
10323 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10324 	tunnel = mlx5_ipool_zmalloc(ipool, &id);
10325 	if (!tunnel)
10326 		return NULL;
10327 	if (id >= MLX5_MAX_TUNNELS) {
10328 		mlx5_ipool_free(ipool, id);
10329 		DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
10330 		return NULL;
10331 	}
10332 	tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
10333 					   priv->sh,
10334 					   mlx5_flow_tunnel_grp2tbl_create_cb,
10335 					   mlx5_flow_tunnel_grp2tbl_match_cb,
10336 					   mlx5_flow_tunnel_grp2tbl_remove_cb,
10337 					   mlx5_flow_tunnel_grp2tbl_clone_cb,
10338 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10339 	if (!tunnel->groups) {
10340 		mlx5_ipool_free(ipool, id);
10341 		return NULL;
10342 	}
10343 	/* initiate new PMD tunnel */
10344 	memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
10345 	tunnel->tunnel_id = id;
10346 	tunnel->action.type = (typeof(tunnel->action.type))
10347 			      MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
10348 	tunnel->action.conf = tunnel;
10349 	tunnel->item.type = (typeof(tunnel->item.type))
10350 			    MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
10351 	tunnel->item.spec = tunnel;
10352 	tunnel->item.last = NULL;
10353 	tunnel->item.mask = NULL;
10354 
10355 	DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
10356 		dev->data->port_id, tunnel->tunnel_id);
10357 
10358 	return tunnel;
10359 }
10360 
10361 struct tunnel_db_get_tunnel_ctx {
10362 	const struct rte_flow_tunnel *app_tunnel;
10363 	struct mlx5_flow_tunnel *tunnel;
10364 };
10365 
10366 static bool get_tunnel_match(struct rte_eth_dev *dev,
10367 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10368 {
10369 	const struct tunnel_db_get_tunnel_ctx *ctx = x;
10370 
10371 	RTE_SET_USED(dev);
10372 	return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
10373 		       sizeof(*ctx->app_tunnel));
10374 }
10375 
10376 static void get_tunnel_hit(struct rte_eth_dev *dev,
10377 			   struct mlx5_flow_tunnel *tunnel, void *x)
10378 {
10379 	/* called under tunnel spinlock protection */
10380 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10381 
10382 	RTE_SET_USED(dev);
10383 	tunnel->refctn++;
10384 	ctx->tunnel = tunnel;
10385 }
10386 
10387 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
10388 {
10389 	/* called under tunnel spinlock protection */
10390 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10391 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10392 
10393 	rte_spinlock_unlock(&thub->sl);
10394 	ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
10395 	rte_spinlock_lock(&thub->sl);
10396 	if (ctx->tunnel) {
10397 		ctx->tunnel->refctn = 1;
10398 		LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
10399 	}
10400 }
10401 
10402 
10403 static int
10404 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
10405 		     const struct rte_flow_tunnel *app_tunnel,
10406 		     struct mlx5_flow_tunnel **tunnel)
10407 {
10408 	struct tunnel_db_get_tunnel_ctx ctx = {
10409 		.app_tunnel = app_tunnel,
10410 	};
10411 
10412 	mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
10413 				      get_tunnel_miss, &ctx, true);
10414 	*tunnel = ctx.tunnel;
10415 	return ctx.tunnel ? 0 : -ENOMEM;
10416 }
10417 
10418 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
10419 {
10420 	struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
10421 
10422 	if (!thub)
10423 		return;
10424 	if (!LIST_EMPTY(&thub->tunnels))
10425 		DRV_LOG(WARNING, "port %u tunnels present", port_id);
10426 	mlx5_hlist_destroy(thub->groups);
10427 	mlx5_free(thub);
10428 }
10429 
10430 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
10431 {
10432 	int err;
10433 	struct mlx5_flow_tunnel_hub *thub;
10434 
10435 	thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
10436 			   0, SOCKET_ID_ANY);
10437 	if (!thub)
10438 		return -ENOMEM;
10439 	LIST_INIT(&thub->tunnels);
10440 	rte_spinlock_init(&thub->sl);
10441 	thub->groups = mlx5_hlist_create("flow groups", 64,
10442 					 false, true, sh,
10443 					 mlx5_flow_tunnel_grp2tbl_create_cb,
10444 					 mlx5_flow_tunnel_grp2tbl_match_cb,
10445 					 mlx5_flow_tunnel_grp2tbl_remove_cb,
10446 					 mlx5_flow_tunnel_grp2tbl_clone_cb,
10447 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10448 	if (!thub->groups) {
10449 		err = -rte_errno;
10450 		goto err;
10451 	}
10452 	sh->tunnel_hub = thub;
10453 
10454 	return 0;
10455 
10456 err:
10457 	if (thub->groups)
10458 		mlx5_hlist_destroy(thub->groups);
10459 	if (thub)
10460 		mlx5_free(thub);
10461 	return err;
10462 }
10463 
10464 static inline int
10465 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
10466 			  struct rte_flow_tunnel *tunnel,
10467 			  struct rte_flow_error *error)
10468 {
10469 	struct mlx5_priv *priv = dev->data->dev_private;
10470 
10471 	if (!priv->sh->config.dv_flow_en)
10472 		return rte_flow_error_set(error, ENOTSUP,
10473 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10474 					  "flow DV interface is off");
10475 	if (!is_tunnel_offload_active(dev))
10476 		return rte_flow_error_set(error, ENOTSUP,
10477 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10478 					  "tunnel offload was not activated");
10479 	if (!tunnel)
10480 		return rte_flow_error_set(error, EINVAL,
10481 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10482 					  "no application tunnel");
10483 	switch (tunnel->type) {
10484 	default:
10485 		return rte_flow_error_set(error, EINVAL,
10486 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10487 					  "unsupported tunnel type");
10488 	case RTE_FLOW_ITEM_TYPE_VXLAN:
10489 	case RTE_FLOW_ITEM_TYPE_GRE:
10490 	case RTE_FLOW_ITEM_TYPE_NVGRE:
10491 	case RTE_FLOW_ITEM_TYPE_GENEVE:
10492 		break;
10493 	}
10494 	return 0;
10495 }
10496 
10497 static int
10498 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
10499 		    struct rte_flow_tunnel *app_tunnel,
10500 		    struct rte_flow_action **actions,
10501 		    uint32_t *num_of_actions,
10502 		    struct rte_flow_error *error)
10503 {
10504 	struct mlx5_flow_tunnel *tunnel;
10505 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10506 
10507 	if (ret)
10508 		return ret;
10509 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10510 	if (ret < 0) {
10511 		return rte_flow_error_set(error, ret,
10512 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10513 					  "failed to initialize pmd tunnel");
10514 	}
10515 	*actions = &tunnel->action;
10516 	*num_of_actions = 1;
10517 	return 0;
10518 }
10519 
10520 static int
10521 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
10522 		       struct rte_flow_tunnel *app_tunnel,
10523 		       struct rte_flow_item **items,
10524 		       uint32_t *num_of_items,
10525 		       struct rte_flow_error *error)
10526 {
10527 	struct mlx5_flow_tunnel *tunnel;
10528 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10529 
10530 	if (ret)
10531 		return ret;
10532 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10533 	if (ret < 0) {
10534 		return rte_flow_error_set(error, ret,
10535 					  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10536 					  "failed to initialize pmd tunnel");
10537 	}
10538 	*items = &tunnel->item;
10539 	*num_of_items = 1;
10540 	return 0;
10541 }
10542 
10543 struct tunnel_db_element_release_ctx {
10544 	struct rte_flow_item *items;
10545 	struct rte_flow_action *actions;
10546 	uint32_t num_elements;
10547 	struct rte_flow_error *error;
10548 	int ret;
10549 };
10550 
10551 static bool
10552 tunnel_element_release_match(struct rte_eth_dev *dev,
10553 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10554 {
10555 	const struct tunnel_db_element_release_ctx *ctx = x;
10556 
10557 	RTE_SET_USED(dev);
10558 	if (ctx->num_elements != 1)
10559 		return false;
10560 	else if (ctx->items)
10561 		return ctx->items == &tunnel->item;
10562 	else if (ctx->actions)
10563 		return ctx->actions == &tunnel->action;
10564 
10565 	return false;
10566 }
10567 
10568 static void
10569 tunnel_element_release_hit(struct rte_eth_dev *dev,
10570 			   struct mlx5_flow_tunnel *tunnel, void *x)
10571 {
10572 	struct tunnel_db_element_release_ctx *ctx = x;
10573 	ctx->ret = 0;
10574 	if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
10575 		mlx5_flow_tunnel_free(dev, tunnel);
10576 }
10577 
10578 static void
10579 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
10580 {
10581 	struct tunnel_db_element_release_ctx *ctx = x;
10582 	RTE_SET_USED(dev);
10583 	ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
10584 				      RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10585 				      "invalid argument");
10586 }
10587 
10588 static int
10589 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
10590 		       struct rte_flow_item *pmd_items,
10591 		       uint32_t num_items, struct rte_flow_error *err)
10592 {
10593 	struct tunnel_db_element_release_ctx ctx = {
10594 		.items = pmd_items,
10595 		.actions = NULL,
10596 		.num_elements = num_items,
10597 		.error = err,
10598 	};
10599 
10600 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10601 				      tunnel_element_release_hit,
10602 				      tunnel_element_release_miss, &ctx, false);
10603 
10604 	return ctx.ret;
10605 }
10606 
10607 static int
10608 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
10609 			 struct rte_flow_action *pmd_actions,
10610 			 uint32_t num_actions, struct rte_flow_error *err)
10611 {
10612 	struct tunnel_db_element_release_ctx ctx = {
10613 		.items = NULL,
10614 		.actions = pmd_actions,
10615 		.num_elements = num_actions,
10616 		.error = err,
10617 	};
10618 
10619 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10620 				      tunnel_element_release_hit,
10621 				      tunnel_element_release_miss, &ctx, false);
10622 
10623 	return ctx.ret;
10624 }
10625 
10626 static int
10627 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
10628 				  struct rte_mbuf *m,
10629 				  struct rte_flow_restore_info *info,
10630 				  struct rte_flow_error *err)
10631 {
10632 	uint64_t ol_flags = m->ol_flags;
10633 	const struct mlx5_flow_tbl_data_entry *tble;
10634 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
10635 
10636 	if (!is_tunnel_offload_active(dev)) {
10637 		info->flags = 0;
10638 		return 0;
10639 	}
10640 
10641 	if ((ol_flags & mask) != mask)
10642 		goto err;
10643 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
10644 	if (!tble) {
10645 		DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
10646 			dev->data->port_id, m->hash.fdir.hi);
10647 		goto err;
10648 	}
10649 	MLX5_ASSERT(tble->tunnel);
10650 	memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
10651 	info->group_id = tble->group_id;
10652 	info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
10653 		      RTE_FLOW_RESTORE_INFO_GROUP_ID |
10654 		      RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
10655 
10656 	return 0;
10657 
10658 err:
10659 	return rte_flow_error_set(err, EINVAL,
10660 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10661 				  "failed to get restore info");
10662 }
10663 
10664 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
10665 static int
10666 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
10667 			   __rte_unused struct rte_flow_tunnel *app_tunnel,
10668 			   __rte_unused struct rte_flow_action **actions,
10669 			   __rte_unused uint32_t *num_of_actions,
10670 			   __rte_unused struct rte_flow_error *error)
10671 {
10672 	return -ENOTSUP;
10673 }
10674 
10675 static int
10676 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
10677 		       __rte_unused struct rte_flow_tunnel *app_tunnel,
10678 		       __rte_unused struct rte_flow_item **items,
10679 		       __rte_unused uint32_t *num_of_items,
10680 		       __rte_unused struct rte_flow_error *error)
10681 {
10682 	return -ENOTSUP;
10683 }
10684 
10685 static int
10686 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
10687 			      __rte_unused struct rte_flow_item *pmd_items,
10688 			      __rte_unused uint32_t num_items,
10689 			      __rte_unused struct rte_flow_error *err)
10690 {
10691 	return -ENOTSUP;
10692 }
10693 
10694 static int
10695 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
10696 				__rte_unused struct rte_flow_action *pmd_action,
10697 				__rte_unused uint32_t num_actions,
10698 				__rte_unused struct rte_flow_error *err)
10699 {
10700 	return -ENOTSUP;
10701 }
10702 
10703 static int
10704 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
10705 				  __rte_unused struct rte_mbuf *m,
10706 				  __rte_unused struct rte_flow_restore_info *i,
10707 				  __rte_unused struct rte_flow_error *err)
10708 {
10709 	return -ENOTSUP;
10710 }
10711 
10712 static int
10713 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
10714 			     __rte_unused struct rte_flow *flow,
10715 			     __rte_unused const struct rte_flow_attr *attr,
10716 			     __rte_unused const struct rte_flow_action *actions,
10717 			     __rte_unused uint32_t flow_idx,
10718 			     __rte_unused const struct mlx5_flow_tunnel *tunnel,
10719 			     __rte_unused struct tunnel_default_miss_ctx *ctx,
10720 			     __rte_unused struct rte_flow_error *error)
10721 {
10722 	return -ENOTSUP;
10723 }
10724 
10725 static struct mlx5_flow_tunnel *
10726 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
10727 		    __rte_unused uint32_t id)
10728 {
10729 	return NULL;
10730 }
10731 
10732 static void
10733 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
10734 		      __rte_unused struct mlx5_flow_tunnel *tunnel)
10735 {
10736 }
10737 
10738 static uint32_t
10739 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
10740 				__rte_unused const struct mlx5_flow_tunnel *t,
10741 				__rte_unused uint32_t group,
10742 				__rte_unused uint32_t *table,
10743 				struct rte_flow_error *error)
10744 {
10745 	return rte_flow_error_set(error, ENOTSUP,
10746 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10747 				  "tunnel offload requires DV support");
10748 }
10749 
10750 void
10751 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
10752 			__rte_unused  uint16_t port_id)
10753 {
10754 }
10755 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
10756 
10757 /* Flex flow item API */
10758 static struct rte_flow_item_flex_handle *
10759 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
10760 			   const struct rte_flow_item_flex_conf *conf,
10761 			   struct rte_flow_error *error)
10762 {
10763 	static const char err_msg[] = "flex item creation unsupported";
10764 	struct mlx5_priv *priv = dev->data->dev_private;
10765 	struct rte_flow_attr attr = { .transfer = 0 };
10766 	const struct mlx5_flow_driver_ops *fops =
10767 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10768 
10769 	if (!priv->pci_dev) {
10770 		rte_flow_error_set(error, ENOTSUP,
10771 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10772 				   "create flex item on PF only");
10773 		return NULL;
10774 	}
10775 	switch (priv->pci_dev->id.device_id) {
10776 	case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF:
10777 	case PCI_DEVICE_ID_MELLANOX_CONNECTX7BF:
10778 		break;
10779 	default:
10780 		rte_flow_error_set(error, ENOTSUP,
10781 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10782 				   "flex item available on BlueField ports only");
10783 		return NULL;
10784 	}
10785 	if (!fops->item_create) {
10786 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10787 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10788 				   NULL, err_msg);
10789 		return NULL;
10790 	}
10791 	return fops->item_create(dev, conf, error);
10792 }
10793 
10794 static int
10795 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
10796 			    const struct rte_flow_item_flex_handle *handle,
10797 			    struct rte_flow_error *error)
10798 {
10799 	static const char err_msg[] = "flex item release unsupported";
10800 	struct rte_flow_attr attr = { .transfer = 0 };
10801 	const struct mlx5_flow_driver_ops *fops =
10802 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10803 
10804 	if (!fops->item_release) {
10805 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10806 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10807 				   NULL, err_msg);
10808 		return -rte_errno;
10809 	}
10810 	return fops->item_release(dev, handle, error);
10811 }
10812 
10813 static void
10814 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
10815 {
10816 	int ret;
10817 	struct rte_flow_error error;
10818 
10819 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10820 		char *item_name;
10821 		ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
10822 				    sizeof(item_name),
10823 				    (void *)(uintptr_t)item->type, &error);
10824 		if (ret > 0)
10825 			printf("%s ", item_name);
10826 		else
10827 			printf("%d\n", (int)item->type);
10828 	}
10829 	printf("END\n");
10830 }
10831 
10832 static int
10833 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
10834 {
10835 	const struct rte_flow_item_udp *spec = udp_item->spec;
10836 	const struct rte_flow_item_udp *mask = udp_item->mask;
10837 	uint16_t udp_dport = 0;
10838 
10839 	if (spec != NULL) {
10840 		if (!mask)
10841 			mask = &rte_flow_item_udp_mask;
10842 		udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
10843 				mask->hdr.dst_port);
10844 	}
10845 	return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
10846 }
10847 
10848 static const struct mlx5_flow_expand_node *
10849 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
10850 		unsigned int item_idx,
10851 		const struct mlx5_flow_expand_node graph[],
10852 		const struct mlx5_flow_expand_node *node)
10853 {
10854 	const struct rte_flow_item *item = pattern + item_idx, *prev_item;
10855 
10856 	if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
10857 			node != NULL &&
10858 			node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
10859 		/*
10860 		 * The expansion node is VXLAN and it is also the last
10861 		 * expandable item in the pattern, so need to continue
10862 		 * expansion of the inner tunnel.
10863 		 */
10864 		MLX5_ASSERT(item_idx > 0);
10865 		prev_item = pattern + item_idx - 1;
10866 		MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
10867 		if (mlx5_flow_is_std_vxlan_port(prev_item))
10868 			return &graph[MLX5_EXPANSION_STD_VXLAN];
10869 		return &graph[MLX5_EXPANSION_L3_VXLAN];
10870 	}
10871 	return node;
10872 }
10873 
10874 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
10875 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
10876 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
10877 };
10878 
10879 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
10880 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
10881 	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
10882 	{ 9, 10, 11 }, { 12, 13, 14 },
10883 };
10884 
10885 /**
10886  * Discover the number of available flow priorities.
10887  *
10888  * @param dev
10889  *   Ethernet device.
10890  *
10891  * @return
10892  *   On success, number of available flow priorities.
10893  *   On failure, a negative errno-style code and rte_errno is set.
10894  */
10895 int
10896 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
10897 {
10898 	static const uint16_t vprio[] = {8, 16};
10899 	const struct mlx5_priv *priv = dev->data->dev_private;
10900 	const struct mlx5_flow_driver_ops *fops;
10901 	enum mlx5_flow_drv_type type;
10902 	int ret;
10903 
10904 	type = mlx5_flow_os_get_type();
10905 	if (type == MLX5_FLOW_TYPE_MAX) {
10906 		type = MLX5_FLOW_TYPE_VERBS;
10907 		if (priv->sh->cdev->config.devx && priv->sh->config.dv_flow_en)
10908 			type = MLX5_FLOW_TYPE_DV;
10909 	}
10910 	fops = flow_get_drv_ops(type);
10911 	if (fops->discover_priorities == NULL) {
10912 		DRV_LOG(ERR, "Priority discovery not supported");
10913 		rte_errno = ENOTSUP;
10914 		return -rte_errno;
10915 	}
10916 	ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
10917 	if (ret < 0)
10918 		return ret;
10919 	switch (ret) {
10920 	case 8:
10921 		ret = RTE_DIM(priority_map_3);
10922 		break;
10923 	case 16:
10924 		ret = RTE_DIM(priority_map_5);
10925 		break;
10926 	default:
10927 		rte_errno = ENOTSUP;
10928 		DRV_LOG(ERR,
10929 			"port %u maximum priority: %d expected 8/16",
10930 			dev->data->port_id, ret);
10931 		return -rte_errno;
10932 	}
10933 	DRV_LOG(INFO, "port %u supported flow priorities:"
10934 		" 0-%d for ingress or egress root table,"
10935 		" 0-%d for non-root table or transfer root table.",
10936 		dev->data->port_id, ret - 2,
10937 		MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
10938 	return ret;
10939 }
10940 
10941 /**
10942  * Adjust flow priority based on the highest layer and the request priority.
10943  *
10944  * @param[in] dev
10945  *   Pointer to the Ethernet device structure.
10946  * @param[in] priority
10947  *   The rule base priority.
10948  * @param[in] subpriority
10949  *   The priority based on the items.
10950  *
10951  * @return
10952  *   The new priority.
10953  */
10954 uint32_t
10955 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
10956 			  uint32_t subpriority)
10957 {
10958 	uint32_t res = 0;
10959 	struct mlx5_priv *priv = dev->data->dev_private;
10960 
10961 	switch (priv->sh->flow_max_priority) {
10962 	case RTE_DIM(priority_map_3):
10963 		res = priority_map_3[priority][subpriority];
10964 		break;
10965 	case RTE_DIM(priority_map_5):
10966 		res = priority_map_5[priority][subpriority];
10967 		break;
10968 	}
10969 	return  res;
10970 }
10971 
10972 /**
10973  * Get the priority for sending traffic to kernel table.
10974  *
10975  * @param[in] dev
10976  *   Pointer to the Ethernet device structure.
10977  *
10978  * @return
10979  *   On success: the value of priority for sending traffic to kernel table
10980  *   On failure: -1
10981  */
10982 uint32_t
10983 mlx5_get_send_to_kernel_priority(struct rte_eth_dev *dev)
10984 {
10985 	struct mlx5_priv *priv = dev->data->dev_private;
10986 	uint32_t res;
10987 
10988 	switch (priv->sh->flow_max_priority) {
10989 	case RTE_DIM(priority_map_5):
10990 		res = 15;
10991 		break;
10992 	case RTE_DIM(priority_map_3):
10993 		res = 7;
10994 		break;
10995 	default:
10996 		DRV_LOG(ERR,
10997 			"port %u maximum priority: %d expected 8/16",
10998 			dev->data->port_id, priv->sh->flow_max_priority);
10999 		res = (uint32_t)-1;
11000 	}
11001 	return res;
11002 }
11003 
11004 /**
11005  * Get the E-Switch Manager vport id.
11006  *
11007  * @param[in] dev
11008  *   Pointer to the Ethernet device structure.
11009  *
11010  * @return
11011  *   The vport id.
11012  */
11013 int16_t mlx5_flow_get_esw_manager_vport_id(struct rte_eth_dev *dev)
11014 {
11015 	struct mlx5_priv *priv = dev->data->dev_private;
11016 	struct mlx5_common_device *cdev = priv->sh->cdev;
11017 
11018 	/* New FW exposes E-Switch Manager vport ID, can use it directly. */
11019 	if (cdev->config.hca_attr.esw_mgr_vport_id_valid)
11020 		return (int16_t)cdev->config.hca_attr.esw_mgr_vport_id;
11021 
11022 	if (priv->pci_dev == NULL)
11023 		return 0;
11024 	switch (priv->pci_dev->id.device_id) {
11025 	case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF:
11026 	case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF:
11027 	case PCI_DEVICE_ID_MELLANOX_CONNECTX7BF:
11028 	/*
11029 	 * In old FW which doesn't expose the E-Switch Manager vport ID in the capability,
11030 	 * only the BF embedded CPUs control the E-Switch Manager port. Hence,
11031 	 * ECPF vport ID is selected and not the host port (0) in any BF case.
11032 	 */
11033 		return (int16_t)MLX5_ECPF_VPORT_ID;
11034 	default:
11035 		return MLX5_PF_VPORT_ID;
11036 	}
11037 }
11038 
11039 /**
11040  * Parse item to get the vport id.
11041  *
11042  * @param[in] dev
11043  *   Pointer to the Ethernet device structure.
11044  * @param[in] item
11045  *   The src port id match item.
11046  * @param[out] vport_id
11047  *   Pointer to put the vport id.
11048  * @param[out] all_ports
11049  *   Indicate if the item matches all ports.
11050  * @param[out] error
11051  *   Pointer to error structure.
11052  *
11053  * @return
11054  *   0 on success, a negative errno value otherwise and rte_errno is set.
11055  */
11056 int mlx5_flow_get_item_vport_id(struct rte_eth_dev *dev,
11057 				const struct rte_flow_item *item,
11058 				uint16_t *vport_id,
11059 				bool *all_ports,
11060 				struct rte_flow_error *error)
11061 {
11062 	struct mlx5_priv *port_priv;
11063 	const struct rte_flow_item_port_id *pid_v;
11064 	uint32_t esw_mgr_port;
11065 
11066 	if (item->type != RTE_FLOW_ITEM_TYPE_PORT_ID &&
11067 	    item->type != RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT)
11068 		return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
11069 					  NULL, "Incorrect item type.");
11070 	pid_v = item->spec;
11071 	if (!pid_v) {
11072 		if (all_ports)
11073 			*all_ports = (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT);
11074 		return 0;
11075 	}
11076 	if (all_ports)
11077 		*all_ports = false;
11078 	esw_mgr_port = (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) ?
11079 				MLX5_REPRESENTED_PORT_ESW_MGR : MLX5_PORT_ESW_MGR;
11080 	if (pid_v->id == esw_mgr_port) {
11081 		*vport_id = mlx5_flow_get_esw_manager_vport_id(dev);
11082 	} else {
11083 		port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
11084 		if (!port_priv)
11085 			return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
11086 						  NULL, "Failed to get port info.");
11087 		*vport_id = port_priv->representor_id;
11088 	}
11089 
11090 	return 0;
11091 }
11092