xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision bc70e55948380ce57cbc079930f217c73ea59b39)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11 
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21 
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26 
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35 
36 struct tunnel_default_miss_ctx {
37 	uint16_t *queue;
38 	__extension__
39 	union {
40 		struct rte_flow_action_rss action_rss;
41 		struct rte_flow_action_queue miss_queue;
42 		struct rte_flow_action_jump miss_jump;
43 		uint8_t raw[0];
44 	};
45 };
46 
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49 			     struct rte_flow *flow,
50 			     const struct rte_flow_attr *attr,
51 			     const struct rte_flow_action *app_actions,
52 			     uint32_t flow_idx,
53 			     const struct mlx5_flow_tunnel *tunnel,
54 			     struct tunnel_default_miss_ctx *ctx,
55 			     struct rte_flow_error *error);
56 static struct mlx5_flow_tunnel *
57 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
58 static void
59 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
60 static uint32_t
61 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
62 				const struct mlx5_flow_tunnel *tunnel,
63 				uint32_t group, uint32_t *table,
64 				struct rte_flow_error *error);
65 
66 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
67 static void mlx5_flow_pop_thread_workspace(void);
68 
69 
70 /** Device flow drivers. */
71 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
72 
73 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
74 
75 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
76 	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
77 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
78 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
79 	[MLX5_FLOW_TYPE_HW] = &mlx5_flow_hw_drv_ops,
80 #endif
81 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
82 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
83 };
84 
85 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
86 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
87 	(const int []){ \
88 		__VA_ARGS__, 0, \
89 	}
90 
91 /** Node object of input graph for mlx5_flow_expand_rss(). */
92 struct mlx5_flow_expand_node {
93 	const int *const next;
94 	/**<
95 	 * List of next node indexes. Index 0 is interpreted as a terminator.
96 	 */
97 	const enum rte_flow_item_type type;
98 	/**< Pattern item type of current node. */
99 	uint64_t rss_types;
100 	/**<
101 	 * RSS types bit-field associated with this node
102 	 * (see RTE_ETH_RSS_* definitions).
103 	 */
104 	uint64_t node_flags;
105 	/**<
106 	 *  Bit-fields that define how the node is used in the expansion.
107 	 * (see MLX5_EXPANSION_NODE_* definitions).
108 	 */
109 };
110 
111 /* Optional expand field. The expansion alg will not go deeper. */
112 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
113 
114 /* The node is not added implicitly as expansion to the flow pattern.
115  * If the node type does not match the flow pattern item type, the
116  * expansion alg will go deeper to its next items.
117  * In the current implementation, the list of next nodes indexes can
118  * have up to one node with this flag set and it has to be the last
119  * node index (before the list terminator).
120  */
121 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
122 
123 /** Object returned by mlx5_flow_expand_rss(). */
124 struct mlx5_flow_expand_rss {
125 	uint32_t entries;
126 	/**< Number of entries @p patterns and @p priorities. */
127 	struct {
128 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
129 		uint32_t priority; /**< Priority offset for each expansion. */
130 	} entry[];
131 };
132 
133 static void
134 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
135 
136 static const struct mlx5_flow_expand_node *
137 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
138 		unsigned int item_idx,
139 		const struct mlx5_flow_expand_node graph[],
140 		const struct mlx5_flow_expand_node *node);
141 
142 static bool
143 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
144 {
145 	switch (item->type) {
146 	case RTE_FLOW_ITEM_TYPE_ETH:
147 	case RTE_FLOW_ITEM_TYPE_VLAN:
148 	case RTE_FLOW_ITEM_TYPE_IPV4:
149 	case RTE_FLOW_ITEM_TYPE_IPV6:
150 	case RTE_FLOW_ITEM_TYPE_UDP:
151 	case RTE_FLOW_ITEM_TYPE_TCP:
152 	case RTE_FLOW_ITEM_TYPE_ESP:
153 	case RTE_FLOW_ITEM_TYPE_VXLAN:
154 	case RTE_FLOW_ITEM_TYPE_NVGRE:
155 	case RTE_FLOW_ITEM_TYPE_GRE:
156 	case RTE_FLOW_ITEM_TYPE_GENEVE:
157 	case RTE_FLOW_ITEM_TYPE_MPLS:
158 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
159 	case RTE_FLOW_ITEM_TYPE_GRE_KEY:
160 	case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
161 	case RTE_FLOW_ITEM_TYPE_GTP:
162 		return true;
163 	default:
164 		break;
165 	}
166 	return false;
167 }
168 
169 /**
170  * Network Service Header (NSH) and its next protocol values
171  * are described in RFC-8393.
172  */
173 static enum rte_flow_item_type
174 mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
175 {
176 	enum rte_flow_item_type type;
177 
178 	switch (proto_mask & proto_spec) {
179 	case 0:
180 		type = RTE_FLOW_ITEM_TYPE_VOID;
181 		break;
182 	case RTE_VXLAN_GPE_TYPE_IPV4:
183 		type = RTE_FLOW_ITEM_TYPE_IPV4;
184 		break;
185 	case RTE_VXLAN_GPE_TYPE_IPV6:
186 		type = RTE_VXLAN_GPE_TYPE_IPV6;
187 		break;
188 	case RTE_VXLAN_GPE_TYPE_ETH:
189 		type = RTE_FLOW_ITEM_TYPE_ETH;
190 		break;
191 	default:
192 		type = RTE_FLOW_ITEM_TYPE_END;
193 	}
194 	return type;
195 }
196 
197 static enum rte_flow_item_type
198 mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
199 {
200 	enum rte_flow_item_type type;
201 
202 	switch (proto_mask & proto_spec) {
203 	case 0:
204 		type = RTE_FLOW_ITEM_TYPE_VOID;
205 		break;
206 	case IPPROTO_UDP:
207 		type = RTE_FLOW_ITEM_TYPE_UDP;
208 		break;
209 	case IPPROTO_TCP:
210 		type = RTE_FLOW_ITEM_TYPE_TCP;
211 		break;
212 	case IPPROTO_IPIP:
213 		type = RTE_FLOW_ITEM_TYPE_IPV4;
214 		break;
215 	case IPPROTO_IPV6:
216 		type = RTE_FLOW_ITEM_TYPE_IPV6;
217 		break;
218 	case IPPROTO_ESP:
219 		type = RTE_FLOW_ITEM_TYPE_ESP;
220 		break;
221 	default:
222 		type = RTE_FLOW_ITEM_TYPE_END;
223 	}
224 	return type;
225 }
226 
227 static enum rte_flow_item_type
228 mlx5_ethertype_to_item_type(rte_be16_t type_spec,
229 			    rte_be16_t type_mask, bool is_tunnel)
230 {
231 	enum rte_flow_item_type type;
232 
233 	switch (rte_be_to_cpu_16(type_spec & type_mask)) {
234 	case 0:
235 		type = RTE_FLOW_ITEM_TYPE_VOID;
236 		break;
237 	case RTE_ETHER_TYPE_TEB:
238 		type = is_tunnel ?
239 		       RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END;
240 		break;
241 	case RTE_ETHER_TYPE_VLAN:
242 		type = !is_tunnel ?
243 		       RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END;
244 		break;
245 	case RTE_ETHER_TYPE_IPV4:
246 		type = RTE_FLOW_ITEM_TYPE_IPV4;
247 		break;
248 	case RTE_ETHER_TYPE_IPV6:
249 		type = RTE_FLOW_ITEM_TYPE_IPV6;
250 		break;
251 	default:
252 		type = RTE_FLOW_ITEM_TYPE_END;
253 	}
254 	return type;
255 }
256 
257 static enum rte_flow_item_type
258 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
259 {
260 #define MLX5_XSET_ITEM_MASK_SPEC(type, fld)                              \
261 	do {                                                             \
262 		const void *m = item->mask;                              \
263 		const void *s = item->spec;                              \
264 		mask = m ?                                               \
265 			((const struct rte_flow_item_##type *)m)->fld :  \
266 			rte_flow_item_##type##_mask.fld;                 \
267 		spec = ((const struct rte_flow_item_##type *)s)->fld;    \
268 	} while (0)
269 
270 	enum rte_flow_item_type ret;
271 	uint16_t spec, mask;
272 
273 	if (item == NULL || item->spec == NULL)
274 		return RTE_FLOW_ITEM_TYPE_VOID;
275 	switch (item->type) {
276 	case RTE_FLOW_ITEM_TYPE_ETH:
277 		MLX5_XSET_ITEM_MASK_SPEC(eth, type);
278 		if (!mask)
279 			return RTE_FLOW_ITEM_TYPE_VOID;
280 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
281 		break;
282 	case RTE_FLOW_ITEM_TYPE_VLAN:
283 		MLX5_XSET_ITEM_MASK_SPEC(vlan, inner_type);
284 		if (!mask)
285 			return RTE_FLOW_ITEM_TYPE_VOID;
286 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
287 		break;
288 	case RTE_FLOW_ITEM_TYPE_IPV4:
289 		MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id);
290 		if (!mask)
291 			return RTE_FLOW_ITEM_TYPE_VOID;
292 		ret = mlx5_inet_proto_to_item_type(spec, mask);
293 		break;
294 	case RTE_FLOW_ITEM_TYPE_IPV6:
295 		MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto);
296 		if (!mask)
297 			return RTE_FLOW_ITEM_TYPE_VOID;
298 		ret = mlx5_inet_proto_to_item_type(spec, mask);
299 		break;
300 	case RTE_FLOW_ITEM_TYPE_GENEVE:
301 		MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol);
302 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
303 		break;
304 	case RTE_FLOW_ITEM_TYPE_GRE:
305 		MLX5_XSET_ITEM_MASK_SPEC(gre, protocol);
306 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
307 		break;
308 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
309 		MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, protocol);
310 		ret = mlx5_nsh_proto_to_item_type(spec, mask);
311 		break;
312 	default:
313 		ret = RTE_FLOW_ITEM_TYPE_VOID;
314 		break;
315 	}
316 	return ret;
317 #undef MLX5_XSET_ITEM_MASK_SPEC
318 }
319 
320 static const int *
321 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
322 		const int *next_node)
323 {
324 	const struct mlx5_flow_expand_node *node = NULL;
325 	const int *next = next_node;
326 
327 	while (next && *next) {
328 		/*
329 		 * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
330 		 * flag set, because they were not found in the flow pattern.
331 		 */
332 		node = &graph[*next];
333 		if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
334 			break;
335 		next = node->next;
336 	}
337 	return next;
338 }
339 
340 #define MLX5_RSS_EXP_ELT_N 16
341 
342 /**
343  * Expand RSS flows into several possible flows according to the RSS hash
344  * fields requested and the driver capabilities.
345  *
346  * @param[out] buf
347  *   Buffer to store the result expansion.
348  * @param[in] size
349  *   Buffer size in bytes. If 0, @p buf can be NULL.
350  * @param[in] pattern
351  *   User flow pattern.
352  * @param[in] types
353  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
354  * @param[in] graph
355  *   Input graph to expand @p pattern according to @p types.
356  * @param[in] graph_root_index
357  *   Index of root node in @p graph, typically 0.
358  *
359  * @return
360  *   A positive value representing the size of @p buf in bytes regardless of
361  *   @p size on success, a negative errno value otherwise and rte_errno is
362  *   set, the following errors are defined:
363  *
364  *   -E2BIG: graph-depth @p graph is too deep.
365  *   -EINVAL: @p size has not enough space for expanded pattern.
366  */
367 static int
368 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
369 		     const struct rte_flow_item *pattern, uint64_t types,
370 		     const struct mlx5_flow_expand_node graph[],
371 		     int graph_root_index)
372 {
373 	const struct rte_flow_item *item;
374 	const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
375 	const int *next_node;
376 	const int *stack[MLX5_RSS_EXP_ELT_N];
377 	int stack_pos = 0;
378 	struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
379 	unsigned int i, item_idx, last_expand_item_idx = 0;
380 	size_t lsize;
381 	size_t user_pattern_size = 0;
382 	void *addr = NULL;
383 	const struct mlx5_flow_expand_node *next = NULL;
384 	struct rte_flow_item missed_item;
385 	int missed = 0;
386 	int elt = 0;
387 	const struct rte_flow_item *last_expand_item = NULL;
388 
389 	memset(&missed_item, 0, sizeof(missed_item));
390 	lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
391 		MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
392 	if (lsize > size)
393 		return -EINVAL;
394 	buf->entry[0].priority = 0;
395 	buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
396 	buf->entries = 0;
397 	addr = buf->entry[0].pattern;
398 	for (item = pattern, item_idx = 0;
399 			item->type != RTE_FLOW_ITEM_TYPE_END;
400 			item++, item_idx++) {
401 		if (!mlx5_flow_is_rss_expandable_item(item)) {
402 			user_pattern_size += sizeof(*item);
403 			continue;
404 		}
405 		last_expand_item = item;
406 		last_expand_item_idx = item_idx;
407 		i = 0;
408 		while (node->next && node->next[i]) {
409 			next = &graph[node->next[i]];
410 			if (next->type == item->type)
411 				break;
412 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
413 				node = next;
414 				i = 0;
415 			} else {
416 				++i;
417 			}
418 		}
419 		if (next)
420 			node = next;
421 		user_pattern_size += sizeof(*item);
422 	}
423 	user_pattern_size += sizeof(*item); /* Handle END item. */
424 	lsize += user_pattern_size;
425 	if (lsize > size)
426 		return -EINVAL;
427 	/* Copy the user pattern in the first entry of the buffer. */
428 	rte_memcpy(addr, pattern, user_pattern_size);
429 	addr = (void *)(((uintptr_t)addr) + user_pattern_size);
430 	buf->entries = 1;
431 	/* Start expanding. */
432 	memset(flow_items, 0, sizeof(flow_items));
433 	user_pattern_size -= sizeof(*item);
434 	/*
435 	 * Check if the last valid item has spec set, need complete pattern,
436 	 * and the pattern can be used for expansion.
437 	 */
438 	missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
439 	if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
440 		/* Item type END indicates expansion is not required. */
441 		return lsize;
442 	}
443 	if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
444 		next = NULL;
445 		missed = 1;
446 		i = 0;
447 		while (node->next && node->next[i]) {
448 			next = &graph[node->next[i]];
449 			if (next->type == missed_item.type) {
450 				flow_items[0].type = missed_item.type;
451 				flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
452 				break;
453 			}
454 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
455 				node = next;
456 				i = 0;
457 			} else {
458 				++i;
459 			}
460 			next = NULL;
461 		}
462 	}
463 	if (next && missed) {
464 		elt = 2; /* missed item + item end. */
465 		node = next;
466 		lsize += elt * sizeof(*item) + user_pattern_size;
467 		if (lsize > size)
468 			return -EINVAL;
469 		if (node->rss_types & types) {
470 			buf->entry[buf->entries].priority = 1;
471 			buf->entry[buf->entries].pattern = addr;
472 			buf->entries++;
473 			rte_memcpy(addr, buf->entry[0].pattern,
474 				   user_pattern_size);
475 			addr = (void *)(((uintptr_t)addr) + user_pattern_size);
476 			rte_memcpy(addr, flow_items, elt * sizeof(*item));
477 			addr = (void *)(((uintptr_t)addr) +
478 					elt * sizeof(*item));
479 		}
480 	} else if (last_expand_item != NULL) {
481 		node = mlx5_flow_expand_rss_adjust_node(pattern,
482 				last_expand_item_idx, graph, node);
483 	}
484 	memset(flow_items, 0, sizeof(flow_items));
485 	next_node = mlx5_flow_expand_rss_skip_explicit(graph,
486 			node->next);
487 	stack[stack_pos] = next_node;
488 	node = next_node ? &graph[*next_node] : NULL;
489 	while (node) {
490 		flow_items[stack_pos].type = node->type;
491 		if (node->rss_types & types) {
492 			size_t n;
493 			/*
494 			 * compute the number of items to copy from the
495 			 * expansion and copy it.
496 			 * When the stack_pos is 0, there are 1 element in it,
497 			 * plus the addition END item.
498 			 */
499 			elt = stack_pos + 2;
500 			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
501 			lsize += elt * sizeof(*item) + user_pattern_size;
502 			if (lsize > size)
503 				return -EINVAL;
504 			n = elt * sizeof(*item);
505 			buf->entry[buf->entries].priority =
506 				stack_pos + 1 + missed;
507 			buf->entry[buf->entries].pattern = addr;
508 			buf->entries++;
509 			rte_memcpy(addr, buf->entry[0].pattern,
510 				   user_pattern_size);
511 			addr = (void *)(((uintptr_t)addr) +
512 					user_pattern_size);
513 			rte_memcpy(addr, &missed_item,
514 				   missed * sizeof(*item));
515 			addr = (void *)(((uintptr_t)addr) +
516 				missed * sizeof(*item));
517 			rte_memcpy(addr, flow_items, n);
518 			addr = (void *)(((uintptr_t)addr) + n);
519 		}
520 		/* Go deeper. */
521 		if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
522 				node->next) {
523 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
524 					node->next);
525 			if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
526 				rte_errno = E2BIG;
527 				return -rte_errno;
528 			}
529 			stack[stack_pos] = next_node;
530 		} else if (*(next_node + 1)) {
531 			/* Follow up with the next possibility. */
532 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
533 					++next_node);
534 		} else if (!stack_pos) {
535 			/*
536 			 * Completing the traverse over the different paths.
537 			 * The next_node is advanced to the terminator.
538 			 */
539 			++next_node;
540 		} else {
541 			/* Move to the next path. */
542 			while (stack_pos) {
543 				next_node = stack[--stack_pos];
544 				next_node++;
545 				if (*next_node)
546 					break;
547 			}
548 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
549 					next_node);
550 			stack[stack_pos] = next_node;
551 		}
552 		node = next_node && *next_node ? &graph[*next_node] : NULL;
553 	};
554 	return lsize;
555 }
556 
557 enum mlx5_expansion {
558 	MLX5_EXPANSION_ROOT,
559 	MLX5_EXPANSION_ROOT_OUTER,
560 	MLX5_EXPANSION_OUTER_ETH,
561 	MLX5_EXPANSION_OUTER_VLAN,
562 	MLX5_EXPANSION_OUTER_IPV4,
563 	MLX5_EXPANSION_OUTER_IPV4_UDP,
564 	MLX5_EXPANSION_OUTER_IPV4_TCP,
565 	MLX5_EXPANSION_OUTER_IPV4_ESP,
566 	MLX5_EXPANSION_OUTER_IPV6,
567 	MLX5_EXPANSION_OUTER_IPV6_UDP,
568 	MLX5_EXPANSION_OUTER_IPV6_TCP,
569 	MLX5_EXPANSION_OUTER_IPV6_ESP,
570 	MLX5_EXPANSION_VXLAN,
571 	MLX5_EXPANSION_STD_VXLAN,
572 	MLX5_EXPANSION_L3_VXLAN,
573 	MLX5_EXPANSION_VXLAN_GPE,
574 	MLX5_EXPANSION_GRE,
575 	MLX5_EXPANSION_NVGRE,
576 	MLX5_EXPANSION_GRE_KEY,
577 	MLX5_EXPANSION_MPLS,
578 	MLX5_EXPANSION_ETH,
579 	MLX5_EXPANSION_VLAN,
580 	MLX5_EXPANSION_IPV4,
581 	MLX5_EXPANSION_IPV4_UDP,
582 	MLX5_EXPANSION_IPV4_TCP,
583 	MLX5_EXPANSION_IPV4_ESP,
584 	MLX5_EXPANSION_IPV6,
585 	MLX5_EXPANSION_IPV6_UDP,
586 	MLX5_EXPANSION_IPV6_TCP,
587 	MLX5_EXPANSION_IPV6_ESP,
588 	MLX5_EXPANSION_IPV6_FRAG_EXT,
589 	MLX5_EXPANSION_GTP,
590 	MLX5_EXPANSION_GENEVE,
591 };
592 
593 /** Supported expansion of items. */
594 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
595 	[MLX5_EXPANSION_ROOT] = {
596 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
597 						  MLX5_EXPANSION_IPV4,
598 						  MLX5_EXPANSION_IPV6),
599 		.type = RTE_FLOW_ITEM_TYPE_END,
600 	},
601 	[MLX5_EXPANSION_ROOT_OUTER] = {
602 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
603 						  MLX5_EXPANSION_OUTER_IPV4,
604 						  MLX5_EXPANSION_OUTER_IPV6),
605 		.type = RTE_FLOW_ITEM_TYPE_END,
606 	},
607 	[MLX5_EXPANSION_OUTER_ETH] = {
608 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
609 		.type = RTE_FLOW_ITEM_TYPE_ETH,
610 		.rss_types = 0,
611 	},
612 	[MLX5_EXPANSION_OUTER_VLAN] = {
613 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
614 						  MLX5_EXPANSION_OUTER_IPV6),
615 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
616 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
617 	},
618 	[MLX5_EXPANSION_OUTER_IPV4] = {
619 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
620 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
621 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
622 			 MLX5_EXPANSION_OUTER_IPV4_ESP,
623 			 MLX5_EXPANSION_GRE,
624 			 MLX5_EXPANSION_NVGRE,
625 			 MLX5_EXPANSION_IPV4,
626 			 MLX5_EXPANSION_IPV6),
627 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
628 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
629 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
630 	},
631 	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
632 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
633 						  MLX5_EXPANSION_VXLAN_GPE,
634 						  MLX5_EXPANSION_MPLS,
635 						  MLX5_EXPANSION_GENEVE,
636 						  MLX5_EXPANSION_GTP),
637 		.type = RTE_FLOW_ITEM_TYPE_UDP,
638 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
639 	},
640 	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
641 		.type = RTE_FLOW_ITEM_TYPE_TCP,
642 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
643 	},
644 	[MLX5_EXPANSION_OUTER_IPV4_ESP] = {
645 		.type = RTE_FLOW_ITEM_TYPE_ESP,
646 		.rss_types = RTE_ETH_RSS_ESP,
647 	},
648 	[MLX5_EXPANSION_OUTER_IPV6] = {
649 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
650 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
651 			 MLX5_EXPANSION_OUTER_IPV6_TCP,
652 			 MLX5_EXPANSION_OUTER_IPV6_ESP,
653 			 MLX5_EXPANSION_IPV4,
654 			 MLX5_EXPANSION_IPV6,
655 			 MLX5_EXPANSION_GRE,
656 			 MLX5_EXPANSION_NVGRE),
657 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
658 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
659 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
660 	},
661 	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
662 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
663 						  MLX5_EXPANSION_VXLAN_GPE,
664 						  MLX5_EXPANSION_MPLS,
665 						  MLX5_EXPANSION_GENEVE,
666 						  MLX5_EXPANSION_GTP),
667 		.type = RTE_FLOW_ITEM_TYPE_UDP,
668 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
669 	},
670 	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
671 		.type = RTE_FLOW_ITEM_TYPE_TCP,
672 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
673 	},
674 	[MLX5_EXPANSION_OUTER_IPV6_ESP] = {
675 		.type = RTE_FLOW_ITEM_TYPE_ESP,
676 		.rss_types = RTE_ETH_RSS_ESP,
677 	},
678 	[MLX5_EXPANSION_VXLAN] = {
679 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
680 						  MLX5_EXPANSION_IPV4,
681 						  MLX5_EXPANSION_IPV6),
682 		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
683 	},
684 	[MLX5_EXPANSION_STD_VXLAN] = {
685 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
686 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
687 	},
688 	[MLX5_EXPANSION_L3_VXLAN] = {
689 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
690 					MLX5_EXPANSION_IPV6),
691 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
692 	},
693 	[MLX5_EXPANSION_VXLAN_GPE] = {
694 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
695 						  MLX5_EXPANSION_IPV4,
696 						  MLX5_EXPANSION_IPV6),
697 		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
698 	},
699 	[MLX5_EXPANSION_GRE] = {
700 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
701 						  MLX5_EXPANSION_IPV4,
702 						  MLX5_EXPANSION_IPV6,
703 						  MLX5_EXPANSION_GRE_KEY,
704 						  MLX5_EXPANSION_MPLS),
705 		.type = RTE_FLOW_ITEM_TYPE_GRE,
706 	},
707 	[MLX5_EXPANSION_GRE_KEY] = {
708 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
709 						  MLX5_EXPANSION_IPV6,
710 						  MLX5_EXPANSION_MPLS),
711 		.type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
712 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
713 	},
714 	[MLX5_EXPANSION_NVGRE] = {
715 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
716 		.type = RTE_FLOW_ITEM_TYPE_NVGRE,
717 	},
718 	[MLX5_EXPANSION_MPLS] = {
719 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
720 						  MLX5_EXPANSION_IPV6,
721 						  MLX5_EXPANSION_ETH),
722 		.type = RTE_FLOW_ITEM_TYPE_MPLS,
723 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
724 	},
725 	[MLX5_EXPANSION_ETH] = {
726 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
727 		.type = RTE_FLOW_ITEM_TYPE_ETH,
728 	},
729 	[MLX5_EXPANSION_VLAN] = {
730 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
731 						  MLX5_EXPANSION_IPV6),
732 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
733 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
734 	},
735 	[MLX5_EXPANSION_IPV4] = {
736 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
737 						  MLX5_EXPANSION_IPV4_TCP,
738 						  MLX5_EXPANSION_IPV4_ESP),
739 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
740 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
741 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
742 	},
743 	[MLX5_EXPANSION_IPV4_UDP] = {
744 		.type = RTE_FLOW_ITEM_TYPE_UDP,
745 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
746 	},
747 	[MLX5_EXPANSION_IPV4_TCP] = {
748 		.type = RTE_FLOW_ITEM_TYPE_TCP,
749 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
750 	},
751 	[MLX5_EXPANSION_IPV4_ESP] = {
752 		.type = RTE_FLOW_ITEM_TYPE_ESP,
753 		.rss_types = RTE_ETH_RSS_ESP,
754 	},
755 	[MLX5_EXPANSION_IPV6] = {
756 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
757 						  MLX5_EXPANSION_IPV6_TCP,
758 						  MLX5_EXPANSION_IPV6_ESP,
759 						  MLX5_EXPANSION_IPV6_FRAG_EXT),
760 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
761 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
762 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
763 	},
764 	[MLX5_EXPANSION_IPV6_UDP] = {
765 		.type = RTE_FLOW_ITEM_TYPE_UDP,
766 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
767 	},
768 	[MLX5_EXPANSION_IPV6_TCP] = {
769 		.type = RTE_FLOW_ITEM_TYPE_TCP,
770 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
771 	},
772 	[MLX5_EXPANSION_IPV6_ESP] = {
773 		.type = RTE_FLOW_ITEM_TYPE_ESP,
774 		.rss_types = RTE_ETH_RSS_ESP,
775 	},
776 	[MLX5_EXPANSION_IPV6_FRAG_EXT] = {
777 		.type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
778 	},
779 	[MLX5_EXPANSION_GTP] = {
780 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
781 						  MLX5_EXPANSION_IPV6),
782 		.type = RTE_FLOW_ITEM_TYPE_GTP,
783 	},
784 	[MLX5_EXPANSION_GENEVE] = {
785 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
786 						  MLX5_EXPANSION_IPV4,
787 						  MLX5_EXPANSION_IPV6),
788 		.type = RTE_FLOW_ITEM_TYPE_GENEVE,
789 	},
790 };
791 
792 static struct rte_flow_action_handle *
793 mlx5_action_handle_create(struct rte_eth_dev *dev,
794 			  const struct rte_flow_indir_action_conf *conf,
795 			  const struct rte_flow_action *action,
796 			  struct rte_flow_error *error);
797 static int mlx5_action_handle_destroy
798 				(struct rte_eth_dev *dev,
799 				 struct rte_flow_action_handle *handle,
800 				 struct rte_flow_error *error);
801 static int mlx5_action_handle_update
802 				(struct rte_eth_dev *dev,
803 				 struct rte_flow_action_handle *handle,
804 				 const void *update,
805 				 struct rte_flow_error *error);
806 static int mlx5_action_handle_query
807 				(struct rte_eth_dev *dev,
808 				 const struct rte_flow_action_handle *handle,
809 				 void *data,
810 				 struct rte_flow_error *error);
811 static int
812 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
813 		    struct rte_flow_tunnel *app_tunnel,
814 		    struct rte_flow_action **actions,
815 		    uint32_t *num_of_actions,
816 		    struct rte_flow_error *error);
817 static int
818 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
819 		       struct rte_flow_tunnel *app_tunnel,
820 		       struct rte_flow_item **items,
821 		       uint32_t *num_of_items,
822 		       struct rte_flow_error *error);
823 static int
824 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
825 			      struct rte_flow_item *pmd_items,
826 			      uint32_t num_items, struct rte_flow_error *err);
827 static int
828 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
829 				struct rte_flow_action *pmd_actions,
830 				uint32_t num_actions,
831 				struct rte_flow_error *err);
832 static int
833 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
834 				  struct rte_mbuf *m,
835 				  struct rte_flow_restore_info *info,
836 				  struct rte_flow_error *err);
837 static struct rte_flow_item_flex_handle *
838 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
839 			   const struct rte_flow_item_flex_conf *conf,
840 			   struct rte_flow_error *error);
841 static int
842 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
843 			    const struct rte_flow_item_flex_handle *handle,
844 			    struct rte_flow_error *error);
845 static int
846 mlx5_flow_info_get(struct rte_eth_dev *dev,
847 		   struct rte_flow_port_info *port_info,
848 		   struct rte_flow_queue_info *queue_info,
849 		   struct rte_flow_error *error);
850 static int
851 mlx5_flow_port_configure(struct rte_eth_dev *dev,
852 			 const struct rte_flow_port_attr *port_attr,
853 			 uint16_t nb_queue,
854 			 const struct rte_flow_queue_attr *queue_attr[],
855 			 struct rte_flow_error *err);
856 
857 static struct rte_flow_pattern_template *
858 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
859 		const struct rte_flow_pattern_template_attr *attr,
860 		const struct rte_flow_item items[],
861 		struct rte_flow_error *error);
862 
863 static int
864 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
865 				   struct rte_flow_pattern_template *template,
866 				   struct rte_flow_error *error);
867 static struct rte_flow_actions_template *
868 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
869 			const struct rte_flow_actions_template_attr *attr,
870 			const struct rte_flow_action actions[],
871 			const struct rte_flow_action masks[],
872 			struct rte_flow_error *error);
873 static int
874 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
875 				   struct rte_flow_actions_template *template,
876 				   struct rte_flow_error *error);
877 
878 static struct rte_flow_template_table *
879 mlx5_flow_table_create(struct rte_eth_dev *dev,
880 		       const struct rte_flow_template_table_attr *attr,
881 		       struct rte_flow_pattern_template *item_templates[],
882 		       uint8_t nb_item_templates,
883 		       struct rte_flow_actions_template *action_templates[],
884 		       uint8_t nb_action_templates,
885 		       struct rte_flow_error *error);
886 static int
887 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
888 			struct rte_flow_template_table *table,
889 			struct rte_flow_error *error);
890 static struct rte_flow *
891 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
892 			    uint32_t queue,
893 			    const struct rte_flow_op_attr *attr,
894 			    struct rte_flow_template_table *table,
895 			    const struct rte_flow_item items[],
896 			    uint8_t pattern_template_index,
897 			    const struct rte_flow_action actions[],
898 			    uint8_t action_template_index,
899 			    void *user_data,
900 			    struct rte_flow_error *error);
901 static int
902 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
903 			     uint32_t queue,
904 			     const struct rte_flow_op_attr *attr,
905 			     struct rte_flow *flow,
906 			     void *user_data,
907 			     struct rte_flow_error *error);
908 static int
909 mlx5_flow_pull(struct rte_eth_dev *dev,
910 	       uint32_t queue,
911 	       struct rte_flow_op_result res[],
912 	       uint16_t n_res,
913 	       struct rte_flow_error *error);
914 static int
915 mlx5_flow_push(struct rte_eth_dev *dev,
916 	       uint32_t queue,
917 	       struct rte_flow_error *error);
918 
919 static struct rte_flow_action_handle *
920 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
921 				 const struct rte_flow_op_attr *attr,
922 				 const struct rte_flow_indir_action_conf *conf,
923 				 const struct rte_flow_action *action,
924 				 void *user_data,
925 				 struct rte_flow_error *error);
926 
927 static int
928 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
929 				 const struct rte_flow_op_attr *attr,
930 				 struct rte_flow_action_handle *handle,
931 				 const void *update,
932 				 void *user_data,
933 				 struct rte_flow_error *error);
934 
935 static int
936 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
937 				  const struct rte_flow_op_attr *attr,
938 				  struct rte_flow_action_handle *handle,
939 				  void *user_data,
940 				  struct rte_flow_error *error);
941 
942 static const struct rte_flow_ops mlx5_flow_ops = {
943 	.validate = mlx5_flow_validate,
944 	.create = mlx5_flow_create,
945 	.destroy = mlx5_flow_destroy,
946 	.flush = mlx5_flow_flush,
947 	.isolate = mlx5_flow_isolate,
948 	.query = mlx5_flow_query,
949 	.dev_dump = mlx5_flow_dev_dump,
950 	.get_aged_flows = mlx5_flow_get_aged_flows,
951 	.action_handle_create = mlx5_action_handle_create,
952 	.action_handle_destroy = mlx5_action_handle_destroy,
953 	.action_handle_update = mlx5_action_handle_update,
954 	.action_handle_query = mlx5_action_handle_query,
955 	.tunnel_decap_set = mlx5_flow_tunnel_decap_set,
956 	.tunnel_match = mlx5_flow_tunnel_match,
957 	.tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
958 	.tunnel_item_release = mlx5_flow_tunnel_item_release,
959 	.get_restore_info = mlx5_flow_tunnel_get_restore_info,
960 	.flex_item_create = mlx5_flow_flex_item_create,
961 	.flex_item_release = mlx5_flow_flex_item_release,
962 	.info_get = mlx5_flow_info_get,
963 	.configure = mlx5_flow_port_configure,
964 	.pattern_template_create = mlx5_flow_pattern_template_create,
965 	.pattern_template_destroy = mlx5_flow_pattern_template_destroy,
966 	.actions_template_create = mlx5_flow_actions_template_create,
967 	.actions_template_destroy = mlx5_flow_actions_template_destroy,
968 	.template_table_create = mlx5_flow_table_create,
969 	.template_table_destroy = mlx5_flow_table_destroy,
970 	.async_create = mlx5_flow_async_flow_create,
971 	.async_destroy = mlx5_flow_async_flow_destroy,
972 	.pull = mlx5_flow_pull,
973 	.push = mlx5_flow_push,
974 	.async_action_handle_create = mlx5_flow_async_action_handle_create,
975 	.async_action_handle_update = mlx5_flow_async_action_handle_update,
976 	.async_action_handle_destroy = mlx5_flow_async_action_handle_destroy,
977 };
978 
979 /* Tunnel information. */
980 struct mlx5_flow_tunnel_info {
981 	uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
982 	uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
983 };
984 
985 static struct mlx5_flow_tunnel_info tunnels_info[] = {
986 	{
987 		.tunnel = MLX5_FLOW_LAYER_VXLAN,
988 		.ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
989 	},
990 	{
991 		.tunnel = MLX5_FLOW_LAYER_GENEVE,
992 		.ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
993 	},
994 	{
995 		.tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
996 		.ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
997 	},
998 	{
999 		.tunnel = MLX5_FLOW_LAYER_GRE,
1000 		.ptype = RTE_PTYPE_TUNNEL_GRE,
1001 	},
1002 	{
1003 		.tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
1004 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
1005 	},
1006 	{
1007 		.tunnel = MLX5_FLOW_LAYER_MPLS,
1008 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
1009 	},
1010 	{
1011 		.tunnel = MLX5_FLOW_LAYER_NVGRE,
1012 		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
1013 	},
1014 	{
1015 		.tunnel = MLX5_FLOW_LAYER_IPIP,
1016 		.ptype = RTE_PTYPE_TUNNEL_IP,
1017 	},
1018 	{
1019 		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
1020 		.ptype = RTE_PTYPE_TUNNEL_IP,
1021 	},
1022 	{
1023 		.tunnel = MLX5_FLOW_LAYER_GTP,
1024 		.ptype = RTE_PTYPE_TUNNEL_GTPU,
1025 	},
1026 };
1027 
1028 
1029 
1030 /**
1031  * Translate tag ID to register.
1032  *
1033  * @param[in] dev
1034  *   Pointer to the Ethernet device structure.
1035  * @param[in] feature
1036  *   The feature that request the register.
1037  * @param[in] id
1038  *   The request register ID.
1039  * @param[out] error
1040  *   Error description in case of any.
1041  *
1042  * @return
1043  *   The request register on success, a negative errno
1044  *   value otherwise and rte_errno is set.
1045  */
1046 int
1047 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
1048 		     enum mlx5_feature_name feature,
1049 		     uint32_t id,
1050 		     struct rte_flow_error *error)
1051 {
1052 	struct mlx5_priv *priv = dev->data->dev_private;
1053 	struct mlx5_sh_config *config = &priv->sh->config;
1054 	enum modify_reg start_reg;
1055 	bool skip_mtr_reg = false;
1056 
1057 	switch (feature) {
1058 	case MLX5_HAIRPIN_RX:
1059 		return REG_B;
1060 	case MLX5_HAIRPIN_TX:
1061 		return REG_A;
1062 	case MLX5_METADATA_RX:
1063 		switch (config->dv_xmeta_en) {
1064 		case MLX5_XMETA_MODE_LEGACY:
1065 			return REG_B;
1066 		case MLX5_XMETA_MODE_META16:
1067 			return REG_C_0;
1068 		case MLX5_XMETA_MODE_META32:
1069 			return REG_C_1;
1070 		}
1071 		break;
1072 	case MLX5_METADATA_TX:
1073 		return REG_A;
1074 	case MLX5_METADATA_FDB:
1075 		switch (config->dv_xmeta_en) {
1076 		case MLX5_XMETA_MODE_LEGACY:
1077 			return REG_NON;
1078 		case MLX5_XMETA_MODE_META16:
1079 			return REG_C_0;
1080 		case MLX5_XMETA_MODE_META32:
1081 			return REG_C_1;
1082 		}
1083 		break;
1084 	case MLX5_FLOW_MARK:
1085 		switch (config->dv_xmeta_en) {
1086 		case MLX5_XMETA_MODE_LEGACY:
1087 			return REG_NON;
1088 		case MLX5_XMETA_MODE_META16:
1089 			return REG_C_1;
1090 		case MLX5_XMETA_MODE_META32:
1091 			return REG_C_0;
1092 		}
1093 		break;
1094 	case MLX5_MTR_ID:
1095 		/*
1096 		 * If meter color and meter id share one register, flow match
1097 		 * should use the meter color register for match.
1098 		 */
1099 		if (priv->mtr_reg_share)
1100 			return priv->mtr_color_reg;
1101 		else
1102 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1103 			       REG_C_3;
1104 	case MLX5_MTR_COLOR:
1105 	case MLX5_ASO_FLOW_HIT:
1106 	case MLX5_ASO_CONNTRACK:
1107 	case MLX5_SAMPLE_ID:
1108 		/* All features use the same REG_C. */
1109 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
1110 		return priv->mtr_color_reg;
1111 	case MLX5_COPY_MARK:
1112 		/*
1113 		 * Metadata COPY_MARK register using is in meter suffix sub
1114 		 * flow while with meter. It's safe to share the same register.
1115 		 */
1116 		return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
1117 	case MLX5_APP_TAG:
1118 		/*
1119 		 * If meter is enable, it will engage the register for color
1120 		 * match and flow match. If meter color match is not using the
1121 		 * REG_C_2, need to skip the REG_C_x be used by meter color
1122 		 * match.
1123 		 * If meter is disable, free to use all available registers.
1124 		 */
1125 		start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1126 			    (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
1127 		skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
1128 		if (id > (uint32_t)(REG_C_7 - start_reg))
1129 			return rte_flow_error_set(error, EINVAL,
1130 						  RTE_FLOW_ERROR_TYPE_ITEM,
1131 						  NULL, "invalid tag id");
1132 		if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
1133 			return rte_flow_error_set(error, ENOTSUP,
1134 						  RTE_FLOW_ERROR_TYPE_ITEM,
1135 						  NULL, "unsupported tag id");
1136 		/*
1137 		 * This case means meter is using the REG_C_x great than 2.
1138 		 * Take care not to conflict with meter color REG_C_x.
1139 		 * If the available index REG_C_y >= REG_C_x, skip the
1140 		 * color register.
1141 		 */
1142 		if (skip_mtr_reg && priv->sh->flow_mreg_c
1143 		    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
1144 			if (id >= (uint32_t)(REG_C_7 - start_reg))
1145 				return rte_flow_error_set(error, EINVAL,
1146 						       RTE_FLOW_ERROR_TYPE_ITEM,
1147 							NULL, "invalid tag id");
1148 			if (priv->sh->flow_mreg_c
1149 			    [id + 1 + start_reg - REG_C_0] != REG_NON)
1150 				return priv->sh->flow_mreg_c
1151 					       [id + 1 + start_reg - REG_C_0];
1152 			return rte_flow_error_set(error, ENOTSUP,
1153 						  RTE_FLOW_ERROR_TYPE_ITEM,
1154 						  NULL, "unsupported tag id");
1155 		}
1156 		return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
1157 	}
1158 	MLX5_ASSERT(false);
1159 	return rte_flow_error_set(error, EINVAL,
1160 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1161 				  NULL, "invalid feature name");
1162 }
1163 
1164 /**
1165  * Check extensive flow metadata register support.
1166  *
1167  * @param dev
1168  *   Pointer to rte_eth_dev structure.
1169  *
1170  * @return
1171  *   True if device supports extensive flow metadata register, otherwise false.
1172  */
1173 bool
1174 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
1175 {
1176 	struct mlx5_priv *priv = dev->data->dev_private;
1177 
1178 	/*
1179 	 * Having available reg_c can be regarded inclusively as supporting
1180 	 * extensive flow metadata register, which could mean,
1181 	 * - metadata register copy action by modify header.
1182 	 * - 16 modify header actions is supported.
1183 	 * - reg_c's are preserved across different domain (FDB and NIC) on
1184 	 *   packet loopback by flow lookup miss.
1185 	 */
1186 	return priv->sh->flow_mreg_c[2] != REG_NON;
1187 }
1188 
1189 /**
1190  * Get the lowest priority.
1191  *
1192  * @param[in] dev
1193  *   Pointer to the Ethernet device structure.
1194  * @param[in] attributes
1195  *   Pointer to device flow rule attributes.
1196  *
1197  * @return
1198  *   The value of lowest priority of flow.
1199  */
1200 uint32_t
1201 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
1202 			  const struct rte_flow_attr *attr)
1203 {
1204 	struct mlx5_priv *priv = dev->data->dev_private;
1205 
1206 	if (!attr->group && !attr->transfer)
1207 		return priv->sh->flow_max_priority - 2;
1208 	return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1209 }
1210 
1211 /**
1212  * Calculate matcher priority of the flow.
1213  *
1214  * @param[in] dev
1215  *   Pointer to the Ethernet device structure.
1216  * @param[in] attr
1217  *   Pointer to device flow rule attributes.
1218  * @param[in] subpriority
1219  *   The priority based on the items.
1220  * @param[in] external
1221  *   Flow is user flow.
1222  * @return
1223  *   The matcher priority of the flow.
1224  */
1225 uint16_t
1226 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1227 			  const struct rte_flow_attr *attr,
1228 			  uint32_t subpriority, bool external)
1229 {
1230 	uint16_t priority = (uint16_t)attr->priority;
1231 	struct mlx5_priv *priv = dev->data->dev_private;
1232 
1233 	if (!attr->group && !attr->transfer) {
1234 		if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1235 			priority = priv->sh->flow_max_priority - 1;
1236 		return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1237 	} else if (!external && attr->transfer && attr->group == 0 &&
1238 		   attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1239 		return (priv->sh->flow_max_priority - 1) * 3;
1240 	}
1241 	if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1242 		priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1243 	return priority * 3 + subpriority;
1244 }
1245 
1246 /**
1247  * Verify the @p item specifications (spec, last, mask) are compatible with the
1248  * NIC capabilities.
1249  *
1250  * @param[in] item
1251  *   Item specification.
1252  * @param[in] mask
1253  *   @p item->mask or flow default bit-masks.
1254  * @param[in] nic_mask
1255  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1256  * @param[in] size
1257  *   Bit-masks size in bytes.
1258  * @param[in] range_accepted
1259  *   True if range of values is accepted for specific fields, false otherwise.
1260  * @param[out] error
1261  *   Pointer to error structure.
1262  *
1263  * @return
1264  *   0 on success, a negative errno value otherwise and rte_errno is set.
1265  */
1266 int
1267 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1268 			  const uint8_t *mask,
1269 			  const uint8_t *nic_mask,
1270 			  unsigned int size,
1271 			  bool range_accepted,
1272 			  struct rte_flow_error *error)
1273 {
1274 	unsigned int i;
1275 
1276 	MLX5_ASSERT(nic_mask);
1277 	for (i = 0; i < size; ++i)
1278 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
1279 			return rte_flow_error_set(error, ENOTSUP,
1280 						  RTE_FLOW_ERROR_TYPE_ITEM,
1281 						  item,
1282 						  "mask enables non supported"
1283 						  " bits");
1284 	if (!item->spec && (item->mask || item->last))
1285 		return rte_flow_error_set(error, EINVAL,
1286 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1287 					  "mask/last without a spec is not"
1288 					  " supported");
1289 	if (item->spec && item->last && !range_accepted) {
1290 		uint8_t spec[size];
1291 		uint8_t last[size];
1292 		unsigned int i;
1293 		int ret;
1294 
1295 		for (i = 0; i < size; ++i) {
1296 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1297 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1298 		}
1299 		ret = memcmp(spec, last, size);
1300 		if (ret != 0)
1301 			return rte_flow_error_set(error, EINVAL,
1302 						  RTE_FLOW_ERROR_TYPE_ITEM,
1303 						  item,
1304 						  "range is not valid");
1305 	}
1306 	return 0;
1307 }
1308 
1309 /**
1310  * Adjust the hash fields according to the @p flow information.
1311  *
1312  * @param[in] dev_flow.
1313  *   Pointer to the mlx5_flow.
1314  * @param[in] tunnel
1315  *   1 when the hash field is for a tunnel item.
1316  * @param[in] layer_types
1317  *   RTE_ETH_RSS_* types.
1318  * @param[in] hash_fields
1319  *   Item hash fields.
1320  *
1321  * @return
1322  *   The hash fields that should be used.
1323  */
1324 uint64_t
1325 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1326 			    int tunnel __rte_unused, uint64_t layer_types,
1327 			    uint64_t hash_fields)
1328 {
1329 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1330 	int rss_request_inner = rss_desc->level >= 2;
1331 
1332 	/* Check RSS hash level for tunnel. */
1333 	if (tunnel && rss_request_inner)
1334 		hash_fields |= IBV_RX_HASH_INNER;
1335 	else if (tunnel || rss_request_inner)
1336 		return 0;
1337 #endif
1338 	/* Check if requested layer matches RSS hash fields. */
1339 	if (!(rss_desc->types & layer_types))
1340 		return 0;
1341 	return hash_fields;
1342 }
1343 
1344 /**
1345  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1346  * if several tunnel rules are used on this queue, the tunnel ptype will be
1347  * cleared.
1348  *
1349  * @param rxq_ctrl
1350  *   Rx queue to update.
1351  */
1352 static void
1353 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1354 {
1355 	unsigned int i;
1356 	uint32_t tunnel_ptype = 0;
1357 
1358 	/* Look up for the ptype to use. */
1359 	for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1360 		if (!rxq_ctrl->flow_tunnels_n[i])
1361 			continue;
1362 		if (!tunnel_ptype) {
1363 			tunnel_ptype = tunnels_info[i].ptype;
1364 		} else {
1365 			tunnel_ptype = 0;
1366 			break;
1367 		}
1368 	}
1369 	rxq_ctrl->rxq.tunnel = tunnel_ptype;
1370 }
1371 
1372 /**
1373  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device
1374  * flow.
1375  *
1376  * @param[in] dev
1377  *   Pointer to the Ethernet device structure.
1378  * @param[in] dev_handle
1379  *   Pointer to device flow handle structure.
1380  */
1381 void
1382 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1383 		       struct mlx5_flow_handle *dev_handle)
1384 {
1385 	struct mlx5_priv *priv = dev->data->dev_private;
1386 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1387 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1388 	unsigned int i;
1389 
1390 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1391 		struct mlx5_hrxq *hrxq;
1392 
1393 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1394 			      dev_handle->rix_hrxq);
1395 		if (hrxq)
1396 			ind_tbl = hrxq->ind_table;
1397 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1398 		struct mlx5_shared_action_rss *shared_rss;
1399 
1400 		shared_rss = mlx5_ipool_get
1401 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1402 			 dev_handle->rix_srss);
1403 		if (shared_rss)
1404 			ind_tbl = shared_rss->ind_tbl;
1405 	}
1406 	if (!ind_tbl)
1407 		return;
1408 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1409 		int idx = ind_tbl->queues[i];
1410 		struct mlx5_rxq_ctrl *rxq_ctrl;
1411 
1412 		if (mlx5_is_external_rxq(dev, idx))
1413 			continue;
1414 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1415 		MLX5_ASSERT(rxq_ctrl != NULL);
1416 		if (rxq_ctrl == NULL)
1417 			continue;
1418 		/*
1419 		 * To support metadata register copy on Tx loopback,
1420 		 * this must be always enabled (metadata may arive
1421 		 * from other port - not from local flows only.
1422 		 */
1423 		if (tunnel) {
1424 			unsigned int j;
1425 
1426 			/* Increase the counter matching the flow. */
1427 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1428 				if ((tunnels_info[j].tunnel &
1429 				     dev_handle->layers) ==
1430 				    tunnels_info[j].tunnel) {
1431 					rxq_ctrl->flow_tunnels_n[j]++;
1432 					break;
1433 				}
1434 			}
1435 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1436 		}
1437 	}
1438 }
1439 
1440 static void
1441 flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
1442 {
1443 	struct mlx5_priv *priv = dev->data->dev_private;
1444 	struct mlx5_rxq_ctrl *rxq_ctrl;
1445 
1446 	if (priv->mark_enabled)
1447 		return;
1448 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1449 		rxq_ctrl->rxq.mark = 1;
1450 	}
1451 	priv->mark_enabled = 1;
1452 }
1453 
1454 /**
1455  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1456  *
1457  * @param[in] dev
1458  *   Pointer to the Ethernet device structure.
1459  * @param[in] flow
1460  *   Pointer to flow structure.
1461  */
1462 static void
1463 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1464 {
1465 	struct mlx5_priv *priv = dev->data->dev_private;
1466 	uint32_t handle_idx;
1467 	struct mlx5_flow_handle *dev_handle;
1468 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1469 
1470 	MLX5_ASSERT(wks);
1471 	if (wks->mark)
1472 		flow_rxq_mark_flag_set(dev);
1473 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1474 		       handle_idx, dev_handle, next)
1475 		flow_drv_rxq_flags_set(dev, dev_handle);
1476 }
1477 
1478 /**
1479  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1480  * device flow if no other flow uses it with the same kind of request.
1481  *
1482  * @param dev
1483  *   Pointer to Ethernet device.
1484  * @param[in] dev_handle
1485  *   Pointer to the device flow handle structure.
1486  */
1487 static void
1488 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1489 			struct mlx5_flow_handle *dev_handle)
1490 {
1491 	struct mlx5_priv *priv = dev->data->dev_private;
1492 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1493 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1494 	unsigned int i;
1495 
1496 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1497 		struct mlx5_hrxq *hrxq;
1498 
1499 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1500 			      dev_handle->rix_hrxq);
1501 		if (hrxq)
1502 			ind_tbl = hrxq->ind_table;
1503 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1504 		struct mlx5_shared_action_rss *shared_rss;
1505 
1506 		shared_rss = mlx5_ipool_get
1507 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1508 			 dev_handle->rix_srss);
1509 		if (shared_rss)
1510 			ind_tbl = shared_rss->ind_tbl;
1511 	}
1512 	if (!ind_tbl)
1513 		return;
1514 	MLX5_ASSERT(dev->data->dev_started);
1515 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1516 		int idx = ind_tbl->queues[i];
1517 		struct mlx5_rxq_ctrl *rxq_ctrl;
1518 
1519 		if (mlx5_is_external_rxq(dev, idx))
1520 			continue;
1521 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1522 		MLX5_ASSERT(rxq_ctrl != NULL);
1523 		if (rxq_ctrl == NULL)
1524 			continue;
1525 		if (tunnel) {
1526 			unsigned int j;
1527 
1528 			/* Decrease the counter matching the flow. */
1529 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1530 				if ((tunnels_info[j].tunnel &
1531 				     dev_handle->layers) ==
1532 				    tunnels_info[j].tunnel) {
1533 					rxq_ctrl->flow_tunnels_n[j]--;
1534 					break;
1535 				}
1536 			}
1537 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1538 		}
1539 	}
1540 }
1541 
1542 /**
1543  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1544  * @p flow if no other flow uses it with the same kind of request.
1545  *
1546  * @param dev
1547  *   Pointer to Ethernet device.
1548  * @param[in] flow
1549  *   Pointer to the flow.
1550  */
1551 static void
1552 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1553 {
1554 	struct mlx5_priv *priv = dev->data->dev_private;
1555 	uint32_t handle_idx;
1556 	struct mlx5_flow_handle *dev_handle;
1557 
1558 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1559 		       handle_idx, dev_handle, next)
1560 		flow_drv_rxq_flags_trim(dev, dev_handle);
1561 }
1562 
1563 /**
1564  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1565  *
1566  * @param dev
1567  *   Pointer to Ethernet device.
1568  */
1569 static void
1570 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1571 {
1572 	struct mlx5_priv *priv = dev->data->dev_private;
1573 	unsigned int i;
1574 
1575 	for (i = 0; i != priv->rxqs_n; ++i) {
1576 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1577 		unsigned int j;
1578 
1579 		if (rxq == NULL || rxq->ctrl == NULL)
1580 			continue;
1581 		rxq->ctrl->rxq.mark = 0;
1582 		for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1583 			rxq->ctrl->flow_tunnels_n[j] = 0;
1584 		rxq->ctrl->rxq.tunnel = 0;
1585 	}
1586 	priv->mark_enabled = 0;
1587 }
1588 
1589 /**
1590  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1591  *
1592  * @param[in] dev
1593  *   Pointer to the Ethernet device structure.
1594  */
1595 void
1596 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1597 {
1598 	struct mlx5_priv *priv = dev->data->dev_private;
1599 	unsigned int i;
1600 
1601 	for (i = 0; i != priv->rxqs_n; ++i) {
1602 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1603 		struct mlx5_rxq_data *data;
1604 
1605 		if (rxq == NULL || rxq->ctrl == NULL)
1606 			continue;
1607 		data = &rxq->ctrl->rxq;
1608 		if (!rte_flow_dynf_metadata_avail()) {
1609 			data->dynf_meta = 0;
1610 			data->flow_meta_mask = 0;
1611 			data->flow_meta_offset = -1;
1612 			data->flow_meta_port_mask = 0;
1613 		} else {
1614 			data->dynf_meta = 1;
1615 			data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1616 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1617 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1618 		}
1619 	}
1620 }
1621 
1622 /*
1623  * return a pointer to the desired action in the list of actions.
1624  *
1625  * @param[in] actions
1626  *   The list of actions to search the action in.
1627  * @param[in] action
1628  *   The action to find.
1629  *
1630  * @return
1631  *   Pointer to the action in the list, if found. NULL otherwise.
1632  */
1633 const struct rte_flow_action *
1634 mlx5_flow_find_action(const struct rte_flow_action *actions,
1635 		      enum rte_flow_action_type action)
1636 {
1637 	if (actions == NULL)
1638 		return NULL;
1639 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1640 		if (actions->type == action)
1641 			return actions;
1642 	return NULL;
1643 }
1644 
1645 /*
1646  * Validate the flag action.
1647  *
1648  * @param[in] action_flags
1649  *   Bit-fields that holds the actions detected until now.
1650  * @param[in] attr
1651  *   Attributes of flow that includes this action.
1652  * @param[out] error
1653  *   Pointer to error structure.
1654  *
1655  * @return
1656  *   0 on success, a negative errno value otherwise and rte_errno is set.
1657  */
1658 int
1659 mlx5_flow_validate_action_flag(uint64_t action_flags,
1660 			       const struct rte_flow_attr *attr,
1661 			       struct rte_flow_error *error)
1662 {
1663 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1664 		return rte_flow_error_set(error, EINVAL,
1665 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1666 					  "can't mark and flag in same flow");
1667 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1668 		return rte_flow_error_set(error, EINVAL,
1669 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1670 					  "can't have 2 flag"
1671 					  " actions in same flow");
1672 	if (attr->egress)
1673 		return rte_flow_error_set(error, ENOTSUP,
1674 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1675 					  "flag action not supported for "
1676 					  "egress");
1677 	return 0;
1678 }
1679 
1680 /*
1681  * Validate the mark action.
1682  *
1683  * @param[in] action
1684  *   Pointer to the queue action.
1685  * @param[in] action_flags
1686  *   Bit-fields that holds the actions detected until now.
1687  * @param[in] attr
1688  *   Attributes of flow that includes this action.
1689  * @param[out] error
1690  *   Pointer to error structure.
1691  *
1692  * @return
1693  *   0 on success, a negative errno value otherwise and rte_errno is set.
1694  */
1695 int
1696 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1697 			       uint64_t action_flags,
1698 			       const struct rte_flow_attr *attr,
1699 			       struct rte_flow_error *error)
1700 {
1701 	const struct rte_flow_action_mark *mark = action->conf;
1702 
1703 	if (!mark)
1704 		return rte_flow_error_set(error, EINVAL,
1705 					  RTE_FLOW_ERROR_TYPE_ACTION,
1706 					  action,
1707 					  "configuration cannot be null");
1708 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1709 		return rte_flow_error_set(error, EINVAL,
1710 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1711 					  &mark->id,
1712 					  "mark id must in 0 <= id < "
1713 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1714 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1715 		return rte_flow_error_set(error, EINVAL,
1716 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1717 					  "can't flag and mark in same flow");
1718 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1719 		return rte_flow_error_set(error, EINVAL,
1720 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1721 					  "can't have 2 mark actions in same"
1722 					  " flow");
1723 	if (attr->egress)
1724 		return rte_flow_error_set(error, ENOTSUP,
1725 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1726 					  "mark action not supported for "
1727 					  "egress");
1728 	return 0;
1729 }
1730 
1731 /*
1732  * Validate the drop action.
1733  *
1734  * @param[in] action_flags
1735  *   Bit-fields that holds the actions detected until now.
1736  * @param[in] attr
1737  *   Attributes of flow that includes this action.
1738  * @param[out] error
1739  *   Pointer to error structure.
1740  *
1741  * @return
1742  *   0 on success, a negative errno value otherwise and rte_errno is set.
1743  */
1744 int
1745 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1746 			       const struct rte_flow_attr *attr,
1747 			       struct rte_flow_error *error)
1748 {
1749 	if (attr->egress)
1750 		return rte_flow_error_set(error, ENOTSUP,
1751 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1752 					  "drop action not supported for "
1753 					  "egress");
1754 	return 0;
1755 }
1756 
1757 /*
1758  * Validate the queue action.
1759  *
1760  * @param[in] action
1761  *   Pointer to the queue action.
1762  * @param[in] action_flags
1763  *   Bit-fields that holds the actions detected until now.
1764  * @param[in] dev
1765  *   Pointer to the Ethernet device structure.
1766  * @param[in] attr
1767  *   Attributes of flow that includes this action.
1768  * @param[out] error
1769  *   Pointer to error structure.
1770  *
1771  * @return
1772  *   0 on success, a negative errno value otherwise and rte_errno is set.
1773  */
1774 int
1775 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1776 				uint64_t action_flags,
1777 				struct rte_eth_dev *dev,
1778 				const struct rte_flow_attr *attr,
1779 				struct rte_flow_error *error)
1780 {
1781 	struct mlx5_priv *priv = dev->data->dev_private;
1782 	const struct rte_flow_action_queue *queue = action->conf;
1783 
1784 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1785 		return rte_flow_error_set(error, EINVAL,
1786 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1787 					  "can't have 2 fate actions in"
1788 					  " same flow");
1789 	if (attr->egress)
1790 		return rte_flow_error_set(error, ENOTSUP,
1791 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1792 					  "queue action not supported for egress.");
1793 	if (mlx5_is_external_rxq(dev, queue->index))
1794 		return 0;
1795 	if (!priv->rxqs_n)
1796 		return rte_flow_error_set(error, EINVAL,
1797 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1798 					  NULL, "No Rx queues configured");
1799 	if (queue->index >= priv->rxqs_n)
1800 		return rte_flow_error_set(error, EINVAL,
1801 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1802 					  &queue->index,
1803 					  "queue index out of range");
1804 	if (mlx5_rxq_get(dev, queue->index) == NULL)
1805 		return rte_flow_error_set(error, EINVAL,
1806 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1807 					  &queue->index,
1808 					  "queue is not configured");
1809 	return 0;
1810 }
1811 
1812 /**
1813  * Validate queue numbers for device RSS.
1814  *
1815  * @param[in] dev
1816  *   Configured device.
1817  * @param[in] queues
1818  *   Array of queue numbers.
1819  * @param[in] queues_n
1820  *   Size of the @p queues array.
1821  * @param[out] error
1822  *   On error, filled with a textual error description.
1823  * @param[out] queue_idx
1824  *   On error, filled with an offending queue index in @p queues array.
1825  *
1826  * @return
1827  *   0 on success, a negative errno code on error.
1828  */
1829 static int
1830 mlx5_validate_rss_queues(struct rte_eth_dev *dev,
1831 			 const uint16_t *queues, uint32_t queues_n,
1832 			 const char **error, uint32_t *queue_idx)
1833 {
1834 	const struct mlx5_priv *priv = dev->data->dev_private;
1835 	bool is_hairpin = false;
1836 	bool is_ext_rss = false;
1837 	uint32_t i;
1838 
1839 	for (i = 0; i != queues_n; ++i) {
1840 		struct mlx5_rxq_ctrl *rxq_ctrl;
1841 
1842 		if (mlx5_is_external_rxq(dev, queues[0])) {
1843 			is_ext_rss = true;
1844 			continue;
1845 		}
1846 		if (is_ext_rss) {
1847 			*error = "Combining external and regular RSS queues is not supported";
1848 			*queue_idx = i;
1849 			return -ENOTSUP;
1850 		}
1851 		if (queues[i] >= priv->rxqs_n) {
1852 			*error = "queue index out of range";
1853 			*queue_idx = i;
1854 			return -EINVAL;
1855 		}
1856 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, queues[i]);
1857 		if (rxq_ctrl == NULL) {
1858 			*error =  "queue is not configured";
1859 			*queue_idx = i;
1860 			return -EINVAL;
1861 		}
1862 		if (i == 0 && rxq_ctrl->is_hairpin)
1863 			is_hairpin = true;
1864 		if (is_hairpin != rxq_ctrl->is_hairpin) {
1865 			*error = "combining hairpin and regular RSS queues is not supported";
1866 			*queue_idx = i;
1867 			return -ENOTSUP;
1868 		}
1869 	}
1870 	return 0;
1871 }
1872 
1873 /*
1874  * Validate the rss action.
1875  *
1876  * @param[in] dev
1877  *   Pointer to the Ethernet device structure.
1878  * @param[in] action
1879  *   Pointer to the queue action.
1880  * @param[out] error
1881  *   Pointer to error structure.
1882  *
1883  * @return
1884  *   0 on success, a negative errno value otherwise and rte_errno is set.
1885  */
1886 int
1887 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1888 			 const struct rte_flow_action *action,
1889 			 struct rte_flow_error *error)
1890 {
1891 	struct mlx5_priv *priv = dev->data->dev_private;
1892 	const struct rte_flow_action_rss *rss = action->conf;
1893 	int ret;
1894 	const char *message;
1895 	uint32_t queue_idx;
1896 
1897 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1898 	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1899 		return rte_flow_error_set(error, ENOTSUP,
1900 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1901 					  &rss->func,
1902 					  "RSS hash function not supported");
1903 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1904 	if (rss->level > 2)
1905 #else
1906 	if (rss->level > 1)
1907 #endif
1908 		return rte_flow_error_set(error, ENOTSUP,
1909 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1910 					  &rss->level,
1911 					  "tunnel RSS is not supported");
1912 	/* allow RSS key_len 0 in case of NULL (default) RSS key. */
1913 	if (rss->key_len == 0 && rss->key != NULL)
1914 		return rte_flow_error_set(error, ENOTSUP,
1915 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1916 					  &rss->key_len,
1917 					  "RSS hash key length 0");
1918 	if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1919 		return rte_flow_error_set(error, ENOTSUP,
1920 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1921 					  &rss->key_len,
1922 					  "RSS hash key too small");
1923 	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1924 		return rte_flow_error_set(error, ENOTSUP,
1925 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1926 					  &rss->key_len,
1927 					  "RSS hash key too large");
1928 	if (rss->queue_num > priv->sh->dev_cap.ind_table_max_size)
1929 		return rte_flow_error_set(error, ENOTSUP,
1930 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1931 					  &rss->queue_num,
1932 					  "number of queues too large");
1933 	if (rss->types & MLX5_RSS_HF_MASK)
1934 		return rte_flow_error_set(error, ENOTSUP,
1935 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1936 					  &rss->types,
1937 					  "some RSS protocols are not"
1938 					  " supported");
1939 	if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
1940 	    !(rss->types & RTE_ETH_RSS_IP))
1941 		return rte_flow_error_set(error, EINVAL,
1942 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1943 					  "L3 partial RSS requested but L3 RSS"
1944 					  " type not specified");
1945 	if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
1946 	    !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
1947 		return rte_flow_error_set(error, EINVAL,
1948 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1949 					  "L4 partial RSS requested but L4 RSS"
1950 					  " type not specified");
1951 	if (!priv->rxqs_n && priv->ext_rxqs == NULL)
1952 		return rte_flow_error_set(error, EINVAL,
1953 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1954 					  NULL, "No Rx queues configured");
1955 	if (!rss->queue_num)
1956 		return rte_flow_error_set(error, EINVAL,
1957 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1958 					  NULL, "No queues configured");
1959 	ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
1960 				       &message, &queue_idx);
1961 	if (ret != 0) {
1962 		return rte_flow_error_set(error, -ret,
1963 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1964 					  &rss->queue[queue_idx], message);
1965 	}
1966 	return 0;
1967 }
1968 
1969 /*
1970  * Validate the rss action.
1971  *
1972  * @param[in] action
1973  *   Pointer to the queue action.
1974  * @param[in] action_flags
1975  *   Bit-fields that holds the actions detected until now.
1976  * @param[in] dev
1977  *   Pointer to the Ethernet device structure.
1978  * @param[in] attr
1979  *   Attributes of flow that includes this action.
1980  * @param[in] item_flags
1981  *   Items that were detected.
1982  * @param[out] error
1983  *   Pointer to error structure.
1984  *
1985  * @return
1986  *   0 on success, a negative errno value otherwise and rte_errno is set.
1987  */
1988 int
1989 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1990 			      uint64_t action_flags,
1991 			      struct rte_eth_dev *dev,
1992 			      const struct rte_flow_attr *attr,
1993 			      uint64_t item_flags,
1994 			      struct rte_flow_error *error)
1995 {
1996 	const struct rte_flow_action_rss *rss = action->conf;
1997 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1998 	int ret;
1999 
2000 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2001 		return rte_flow_error_set(error, EINVAL,
2002 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2003 					  "can't have 2 fate actions"
2004 					  " in same flow");
2005 	ret = mlx5_validate_action_rss(dev, action, error);
2006 	if (ret)
2007 		return ret;
2008 	if (attr->egress)
2009 		return rte_flow_error_set(error, ENOTSUP,
2010 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2011 					  "rss action not supported for "
2012 					  "egress");
2013 	if (rss->level > 1 && !tunnel)
2014 		return rte_flow_error_set(error, EINVAL,
2015 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2016 					  "inner RSS is not supported for "
2017 					  "non-tunnel flows");
2018 	if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
2019 	    !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
2020 		return rte_flow_error_set(error, EINVAL,
2021 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2022 					  "RSS on eCPRI is not supported now");
2023 	}
2024 	if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
2025 	    !(item_flags &
2026 	      (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
2027 	    rss->level > 1)
2028 		return rte_flow_error_set(error, EINVAL,
2029 					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
2030 					  "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
2031 	return 0;
2032 }
2033 
2034 /*
2035  * Validate the default miss action.
2036  *
2037  * @param[in] action_flags
2038  *   Bit-fields that holds the actions detected until now.
2039  * @param[out] error
2040  *   Pointer to error structure.
2041  *
2042  * @return
2043  *   0 on success, a negative errno value otherwise and rte_errno is set.
2044  */
2045 int
2046 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
2047 				const struct rte_flow_attr *attr,
2048 				struct rte_flow_error *error)
2049 {
2050 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2051 		return rte_flow_error_set(error, EINVAL,
2052 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2053 					  "can't have 2 fate actions in"
2054 					  " same flow");
2055 	if (attr->egress)
2056 		return rte_flow_error_set(error, ENOTSUP,
2057 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2058 					  "default miss action not supported "
2059 					  "for egress");
2060 	if (attr->group)
2061 		return rte_flow_error_set(error, ENOTSUP,
2062 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
2063 					  "only group 0 is supported");
2064 	if (attr->transfer)
2065 		return rte_flow_error_set(error, ENOTSUP,
2066 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2067 					  NULL, "transfer is not supported");
2068 	return 0;
2069 }
2070 
2071 /*
2072  * Validate the count action.
2073  *
2074  * @param[in] dev
2075  *   Pointer to the Ethernet device structure.
2076  * @param[in] attr
2077  *   Attributes of flow that includes this action.
2078  * @param[out] error
2079  *   Pointer to error structure.
2080  *
2081  * @return
2082  *   0 on success, a negative errno value otherwise and rte_errno is set.
2083  */
2084 int
2085 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
2086 				const struct rte_flow_attr *attr,
2087 				struct rte_flow_error *error)
2088 {
2089 	if (attr->egress)
2090 		return rte_flow_error_set(error, ENOTSUP,
2091 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2092 					  "count action not supported for "
2093 					  "egress");
2094 	return 0;
2095 }
2096 
2097 /*
2098  * Validate the ASO CT action.
2099  *
2100  * @param[in] dev
2101  *   Pointer to the Ethernet device structure.
2102  * @param[in] conntrack
2103  *   Pointer to the CT action profile.
2104  * @param[out] error
2105  *   Pointer to error structure.
2106  *
2107  * @return
2108  *   0 on success, a negative errno value otherwise and rte_errno is set.
2109  */
2110 int
2111 mlx5_validate_action_ct(struct rte_eth_dev *dev,
2112 			const struct rte_flow_action_conntrack *conntrack,
2113 			struct rte_flow_error *error)
2114 {
2115 	RTE_SET_USED(dev);
2116 
2117 	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
2118 		return rte_flow_error_set(error, EINVAL,
2119 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2120 					  "Invalid CT state");
2121 	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
2122 		return rte_flow_error_set(error, EINVAL,
2123 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2124 					  "Invalid last TCP packet flag");
2125 	return 0;
2126 }
2127 
2128 /**
2129  * Verify the @p attributes will be correctly understood by the NIC and store
2130  * them in the @p flow if everything is correct.
2131  *
2132  * @param[in] dev
2133  *   Pointer to the Ethernet device structure.
2134  * @param[in] attributes
2135  *   Pointer to flow attributes
2136  * @param[out] error
2137  *   Pointer to error structure.
2138  *
2139  * @return
2140  *   0 on success, a negative errno value otherwise and rte_errno is set.
2141  */
2142 int
2143 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
2144 			      const struct rte_flow_attr *attributes,
2145 			      struct rte_flow_error *error)
2146 {
2147 	struct mlx5_priv *priv = dev->data->dev_private;
2148 	uint32_t priority_max = priv->sh->flow_max_priority - 1;
2149 
2150 	if (attributes->group)
2151 		return rte_flow_error_set(error, ENOTSUP,
2152 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
2153 					  NULL, "groups is not supported");
2154 	if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
2155 	    attributes->priority >= priority_max)
2156 		return rte_flow_error_set(error, ENOTSUP,
2157 					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
2158 					  NULL, "priority out of range");
2159 	if (attributes->egress)
2160 		return rte_flow_error_set(error, ENOTSUP,
2161 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2162 					  "egress is not supported");
2163 	if (attributes->transfer && !priv->sh->config.dv_esw_en)
2164 		return rte_flow_error_set(error, ENOTSUP,
2165 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2166 					  NULL, "transfer is not supported");
2167 	if (!attributes->ingress)
2168 		return rte_flow_error_set(error, EINVAL,
2169 					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
2170 					  NULL,
2171 					  "ingress attribute is mandatory");
2172 	return 0;
2173 }
2174 
2175 /**
2176  * Validate ICMP6 item.
2177  *
2178  * @param[in] item
2179  *   Item specification.
2180  * @param[in] item_flags
2181  *   Bit-fields that holds the items detected until now.
2182  * @param[in] ext_vlan_sup
2183  *   Whether extended VLAN features are supported or not.
2184  * @param[out] error
2185  *   Pointer to error structure.
2186  *
2187  * @return
2188  *   0 on success, a negative errno value otherwise and rte_errno is set.
2189  */
2190 int
2191 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
2192 			       uint64_t item_flags,
2193 			       uint8_t target_protocol,
2194 			       struct rte_flow_error *error)
2195 {
2196 	const struct rte_flow_item_icmp6 *mask = item->mask;
2197 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2198 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2199 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2200 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2201 				      MLX5_FLOW_LAYER_OUTER_L4;
2202 	int ret;
2203 
2204 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2205 		return rte_flow_error_set(error, EINVAL,
2206 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2207 					  "protocol filtering not compatible"
2208 					  " with ICMP6 layer");
2209 	if (!(item_flags & l3m))
2210 		return rte_flow_error_set(error, EINVAL,
2211 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2212 					  "IPv6 is mandatory to filter on"
2213 					  " ICMP6");
2214 	if (item_flags & l4m)
2215 		return rte_flow_error_set(error, EINVAL,
2216 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2217 					  "multiple L4 layers not supported");
2218 	if (!mask)
2219 		mask = &rte_flow_item_icmp6_mask;
2220 	ret = mlx5_flow_item_acceptable
2221 		(item, (const uint8_t *)mask,
2222 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
2223 		 sizeof(struct rte_flow_item_icmp6),
2224 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2225 	if (ret < 0)
2226 		return ret;
2227 	return 0;
2228 }
2229 
2230 /**
2231  * Validate ICMP item.
2232  *
2233  * @param[in] item
2234  *   Item specification.
2235  * @param[in] item_flags
2236  *   Bit-fields that holds the items detected until now.
2237  * @param[out] error
2238  *   Pointer to error structure.
2239  *
2240  * @return
2241  *   0 on success, a negative errno value otherwise and rte_errno is set.
2242  */
2243 int
2244 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
2245 			     uint64_t item_flags,
2246 			     uint8_t target_protocol,
2247 			     struct rte_flow_error *error)
2248 {
2249 	const struct rte_flow_item_icmp *mask = item->mask;
2250 	const struct rte_flow_item_icmp nic_mask = {
2251 		.hdr.icmp_type = 0xff,
2252 		.hdr.icmp_code = 0xff,
2253 		.hdr.icmp_ident = RTE_BE16(0xffff),
2254 		.hdr.icmp_seq_nb = RTE_BE16(0xffff),
2255 	};
2256 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2257 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2258 				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2259 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2260 				      MLX5_FLOW_LAYER_OUTER_L4;
2261 	int ret;
2262 
2263 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2264 		return rte_flow_error_set(error, EINVAL,
2265 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2266 					  "protocol filtering not compatible"
2267 					  " with ICMP layer");
2268 	if (!(item_flags & l3m))
2269 		return rte_flow_error_set(error, EINVAL,
2270 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2271 					  "IPv4 is mandatory to filter"
2272 					  " on ICMP");
2273 	if (item_flags & l4m)
2274 		return rte_flow_error_set(error, EINVAL,
2275 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2276 					  "multiple L4 layers not supported");
2277 	if (!mask)
2278 		mask = &nic_mask;
2279 	ret = mlx5_flow_item_acceptable
2280 		(item, (const uint8_t *)mask,
2281 		 (const uint8_t *)&nic_mask,
2282 		 sizeof(struct rte_flow_item_icmp),
2283 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2284 	if (ret < 0)
2285 		return ret;
2286 	return 0;
2287 }
2288 
2289 /**
2290  * Validate Ethernet item.
2291  *
2292  * @param[in] item
2293  *   Item specification.
2294  * @param[in] item_flags
2295  *   Bit-fields that holds the items detected until now.
2296  * @param[out] error
2297  *   Pointer to error structure.
2298  *
2299  * @return
2300  *   0 on success, a negative errno value otherwise and rte_errno is set.
2301  */
2302 int
2303 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2304 			    uint64_t item_flags, bool ext_vlan_sup,
2305 			    struct rte_flow_error *error)
2306 {
2307 	const struct rte_flow_item_eth *mask = item->mask;
2308 	const struct rte_flow_item_eth nic_mask = {
2309 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2310 		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2311 		.type = RTE_BE16(0xffff),
2312 		.has_vlan = ext_vlan_sup ? 1 : 0,
2313 	};
2314 	int ret;
2315 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2316 	const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2	:
2317 				       MLX5_FLOW_LAYER_OUTER_L2;
2318 
2319 	if (item_flags & ethm)
2320 		return rte_flow_error_set(error, ENOTSUP,
2321 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2322 					  "multiple L2 layers not supported");
2323 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2324 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2325 		return rte_flow_error_set(error, EINVAL,
2326 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2327 					  "L2 layer should not follow "
2328 					  "L3 layers");
2329 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2330 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2331 		return rte_flow_error_set(error, EINVAL,
2332 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2333 					  "L2 layer should not follow VLAN");
2334 	if (item_flags & MLX5_FLOW_LAYER_GTP)
2335 		return rte_flow_error_set(error, EINVAL,
2336 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2337 					  "L2 layer should not follow GTP");
2338 	if (!mask)
2339 		mask = &rte_flow_item_eth_mask;
2340 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2341 					(const uint8_t *)&nic_mask,
2342 					sizeof(struct rte_flow_item_eth),
2343 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2344 	return ret;
2345 }
2346 
2347 /**
2348  * Validate VLAN item.
2349  *
2350  * @param[in] item
2351  *   Item specification.
2352  * @param[in] item_flags
2353  *   Bit-fields that holds the items detected until now.
2354  * @param[in] dev
2355  *   Ethernet device flow is being created on.
2356  * @param[out] error
2357  *   Pointer to error structure.
2358  *
2359  * @return
2360  *   0 on success, a negative errno value otherwise and rte_errno is set.
2361  */
2362 int
2363 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2364 			     uint64_t item_flags,
2365 			     struct rte_eth_dev *dev,
2366 			     struct rte_flow_error *error)
2367 {
2368 	const struct rte_flow_item_vlan *spec = item->spec;
2369 	const struct rte_flow_item_vlan *mask = item->mask;
2370 	const struct rte_flow_item_vlan nic_mask = {
2371 		.tci = RTE_BE16(UINT16_MAX),
2372 		.inner_type = RTE_BE16(UINT16_MAX),
2373 	};
2374 	uint16_t vlan_tag = 0;
2375 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2376 	int ret;
2377 	const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2378 					MLX5_FLOW_LAYER_INNER_L4) :
2379 				       (MLX5_FLOW_LAYER_OUTER_L3 |
2380 					MLX5_FLOW_LAYER_OUTER_L4);
2381 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2382 					MLX5_FLOW_LAYER_OUTER_VLAN;
2383 
2384 	if (item_flags & vlanm)
2385 		return rte_flow_error_set(error, EINVAL,
2386 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2387 					  "multiple VLAN layers not supported");
2388 	else if ((item_flags & l34m) != 0)
2389 		return rte_flow_error_set(error, EINVAL,
2390 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2391 					  "VLAN cannot follow L3/L4 layer");
2392 	if (!mask)
2393 		mask = &rte_flow_item_vlan_mask;
2394 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2395 					(const uint8_t *)&nic_mask,
2396 					sizeof(struct rte_flow_item_vlan),
2397 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2398 	if (ret)
2399 		return ret;
2400 	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2401 		struct mlx5_priv *priv = dev->data->dev_private;
2402 
2403 		if (priv->vmwa_context) {
2404 			/*
2405 			 * Non-NULL context means we have a virtual machine
2406 			 * and SR-IOV enabled, we have to create VLAN interface
2407 			 * to make hypervisor to setup E-Switch vport
2408 			 * context correctly. We avoid creating the multiple
2409 			 * VLAN interfaces, so we cannot support VLAN tag mask.
2410 			 */
2411 			return rte_flow_error_set(error, EINVAL,
2412 						  RTE_FLOW_ERROR_TYPE_ITEM,
2413 						  item,
2414 						  "VLAN tag mask is not"
2415 						  " supported in virtual"
2416 						  " environment");
2417 		}
2418 	}
2419 	if (spec) {
2420 		vlan_tag = spec->tci;
2421 		vlan_tag &= mask->tci;
2422 	}
2423 	/*
2424 	 * From verbs perspective an empty VLAN is equivalent
2425 	 * to a packet without VLAN layer.
2426 	 */
2427 	if (!vlan_tag)
2428 		return rte_flow_error_set(error, EINVAL,
2429 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2430 					  item->spec,
2431 					  "VLAN cannot be empty");
2432 	return 0;
2433 }
2434 
2435 /**
2436  * Validate IPV4 item.
2437  *
2438  * @param[in] item
2439  *   Item specification.
2440  * @param[in] item_flags
2441  *   Bit-fields that holds the items detected until now.
2442  * @param[in] last_item
2443  *   Previous validated item in the pattern items.
2444  * @param[in] ether_type
2445  *   Type in the ethernet layer header (including dot1q).
2446  * @param[in] acc_mask
2447  *   Acceptable mask, if NULL default internal default mask
2448  *   will be used to check whether item fields are supported.
2449  * @param[in] range_accepted
2450  *   True if range of values is accepted for specific fields, false otherwise.
2451  * @param[out] error
2452  *   Pointer to error structure.
2453  *
2454  * @return
2455  *   0 on success, a negative errno value otherwise and rte_errno is set.
2456  */
2457 int
2458 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2459 			     uint64_t item_flags,
2460 			     uint64_t last_item,
2461 			     uint16_t ether_type,
2462 			     const struct rte_flow_item_ipv4 *acc_mask,
2463 			     bool range_accepted,
2464 			     struct rte_flow_error *error)
2465 {
2466 	const struct rte_flow_item_ipv4 *mask = item->mask;
2467 	const struct rte_flow_item_ipv4 *spec = item->spec;
2468 	const struct rte_flow_item_ipv4 nic_mask = {
2469 		.hdr = {
2470 			.src_addr = RTE_BE32(0xffffffff),
2471 			.dst_addr = RTE_BE32(0xffffffff),
2472 			.type_of_service = 0xff,
2473 			.next_proto_id = 0xff,
2474 		},
2475 	};
2476 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2477 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2478 				      MLX5_FLOW_LAYER_OUTER_L3;
2479 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2480 				      MLX5_FLOW_LAYER_OUTER_L4;
2481 	int ret;
2482 	uint8_t next_proto = 0xFF;
2483 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2484 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2485 				  MLX5_FLOW_LAYER_INNER_VLAN);
2486 
2487 	if ((last_item & l2_vlan) && ether_type &&
2488 	    ether_type != RTE_ETHER_TYPE_IPV4)
2489 		return rte_flow_error_set(error, EINVAL,
2490 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2491 					  "IPv4 cannot follow L2/VLAN layer "
2492 					  "which ether type is not IPv4");
2493 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2494 		if (mask && spec)
2495 			next_proto = mask->hdr.next_proto_id &
2496 				     spec->hdr.next_proto_id;
2497 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2498 			return rte_flow_error_set(error, EINVAL,
2499 						  RTE_FLOW_ERROR_TYPE_ITEM,
2500 						  item,
2501 						  "multiple tunnel "
2502 						  "not supported");
2503 	}
2504 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2505 		return rte_flow_error_set(error, EINVAL,
2506 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2507 					  "wrong tunnel type - IPv6 specified "
2508 					  "but IPv4 item provided");
2509 	if (item_flags & l3m)
2510 		return rte_flow_error_set(error, ENOTSUP,
2511 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2512 					  "multiple L3 layers not supported");
2513 	else if (item_flags & l4m)
2514 		return rte_flow_error_set(error, EINVAL,
2515 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2516 					  "L3 cannot follow an L4 layer.");
2517 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2518 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2519 		return rte_flow_error_set(error, EINVAL,
2520 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2521 					  "L3 cannot follow an NVGRE layer.");
2522 	if (!mask)
2523 		mask = &rte_flow_item_ipv4_mask;
2524 	else if (mask->hdr.next_proto_id != 0 &&
2525 		 mask->hdr.next_proto_id != 0xff)
2526 		return rte_flow_error_set(error, EINVAL,
2527 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2528 					  "partial mask is not supported"
2529 					  " for protocol");
2530 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2531 					acc_mask ? (const uint8_t *)acc_mask
2532 						 : (const uint8_t *)&nic_mask,
2533 					sizeof(struct rte_flow_item_ipv4),
2534 					range_accepted, error);
2535 	if (ret < 0)
2536 		return ret;
2537 	return 0;
2538 }
2539 
2540 /**
2541  * Validate IPV6 item.
2542  *
2543  * @param[in] item
2544  *   Item specification.
2545  * @param[in] item_flags
2546  *   Bit-fields that holds the items detected until now.
2547  * @param[in] last_item
2548  *   Previous validated item in the pattern items.
2549  * @param[in] ether_type
2550  *   Type in the ethernet layer header (including dot1q).
2551  * @param[in] acc_mask
2552  *   Acceptable mask, if NULL default internal default mask
2553  *   will be used to check whether item fields are supported.
2554  * @param[out] error
2555  *   Pointer to error structure.
2556  *
2557  * @return
2558  *   0 on success, a negative errno value otherwise and rte_errno is set.
2559  */
2560 int
2561 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2562 			     uint64_t item_flags,
2563 			     uint64_t last_item,
2564 			     uint16_t ether_type,
2565 			     const struct rte_flow_item_ipv6 *acc_mask,
2566 			     struct rte_flow_error *error)
2567 {
2568 	const struct rte_flow_item_ipv6 *mask = item->mask;
2569 	const struct rte_flow_item_ipv6 *spec = item->spec;
2570 	const struct rte_flow_item_ipv6 nic_mask = {
2571 		.hdr = {
2572 			.src_addr =
2573 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2574 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2575 			.dst_addr =
2576 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2577 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2578 			.vtc_flow = RTE_BE32(0xffffffff),
2579 			.proto = 0xff,
2580 		},
2581 	};
2582 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2583 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2584 				      MLX5_FLOW_LAYER_OUTER_L3;
2585 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2586 				      MLX5_FLOW_LAYER_OUTER_L4;
2587 	int ret;
2588 	uint8_t next_proto = 0xFF;
2589 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2590 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2591 				  MLX5_FLOW_LAYER_INNER_VLAN);
2592 
2593 	if ((last_item & l2_vlan) && ether_type &&
2594 	    ether_type != RTE_ETHER_TYPE_IPV6)
2595 		return rte_flow_error_set(error, EINVAL,
2596 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2597 					  "IPv6 cannot follow L2/VLAN layer "
2598 					  "which ether type is not IPv6");
2599 	if (mask && mask->hdr.proto == UINT8_MAX && spec)
2600 		next_proto = spec->hdr.proto;
2601 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2602 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2603 			return rte_flow_error_set(error, EINVAL,
2604 						  RTE_FLOW_ERROR_TYPE_ITEM,
2605 						  item,
2606 						  "multiple tunnel "
2607 						  "not supported");
2608 	}
2609 	if (next_proto == IPPROTO_HOPOPTS  ||
2610 	    next_proto == IPPROTO_ROUTING  ||
2611 	    next_proto == IPPROTO_FRAGMENT ||
2612 	    next_proto == IPPROTO_ESP	   ||
2613 	    next_proto == IPPROTO_AH	   ||
2614 	    next_proto == IPPROTO_DSTOPTS)
2615 		return rte_flow_error_set(error, EINVAL,
2616 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2617 					  "IPv6 proto (next header) should "
2618 					  "not be set as extension header");
2619 	if (item_flags & MLX5_FLOW_LAYER_IPIP)
2620 		return rte_flow_error_set(error, EINVAL,
2621 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2622 					  "wrong tunnel type - IPv4 specified "
2623 					  "but IPv6 item provided");
2624 	if (item_flags & l3m)
2625 		return rte_flow_error_set(error, ENOTSUP,
2626 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2627 					  "multiple L3 layers not supported");
2628 	else if (item_flags & l4m)
2629 		return rte_flow_error_set(error, EINVAL,
2630 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2631 					  "L3 cannot follow an L4 layer.");
2632 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2633 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2634 		return rte_flow_error_set(error, EINVAL,
2635 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2636 					  "L3 cannot follow an NVGRE layer.");
2637 	if (!mask)
2638 		mask = &rte_flow_item_ipv6_mask;
2639 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2640 					acc_mask ? (const uint8_t *)acc_mask
2641 						 : (const uint8_t *)&nic_mask,
2642 					sizeof(struct rte_flow_item_ipv6),
2643 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2644 	if (ret < 0)
2645 		return ret;
2646 	return 0;
2647 }
2648 
2649 /**
2650  * Validate UDP item.
2651  *
2652  * @param[in] item
2653  *   Item specification.
2654  * @param[in] item_flags
2655  *   Bit-fields that holds the items detected until now.
2656  * @param[in] target_protocol
2657  *   The next protocol in the previous item.
2658  * @param[in] flow_mask
2659  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2660  * @param[out] error
2661  *   Pointer to error structure.
2662  *
2663  * @return
2664  *   0 on success, a negative errno value otherwise and rte_errno is set.
2665  */
2666 int
2667 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2668 			    uint64_t item_flags,
2669 			    uint8_t target_protocol,
2670 			    struct rte_flow_error *error)
2671 {
2672 	const struct rte_flow_item_udp *mask = item->mask;
2673 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2674 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2675 				      MLX5_FLOW_LAYER_OUTER_L3;
2676 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2677 				      MLX5_FLOW_LAYER_OUTER_L4;
2678 	int ret;
2679 
2680 	if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2681 		return rte_flow_error_set(error, EINVAL,
2682 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2683 					  "protocol filtering not compatible"
2684 					  " with UDP layer");
2685 	if (!(item_flags & l3m))
2686 		return rte_flow_error_set(error, EINVAL,
2687 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2688 					  "L3 is mandatory to filter on L4");
2689 	if (item_flags & l4m)
2690 		return rte_flow_error_set(error, EINVAL,
2691 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2692 					  "multiple L4 layers not supported");
2693 	if (!mask)
2694 		mask = &rte_flow_item_udp_mask;
2695 	ret = mlx5_flow_item_acceptable
2696 		(item, (const uint8_t *)mask,
2697 		 (const uint8_t *)&rte_flow_item_udp_mask,
2698 		 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2699 		 error);
2700 	if (ret < 0)
2701 		return ret;
2702 	return 0;
2703 }
2704 
2705 /**
2706  * Validate TCP item.
2707  *
2708  * @param[in] item
2709  *   Item specification.
2710  * @param[in] item_flags
2711  *   Bit-fields that holds the items detected until now.
2712  * @param[in] target_protocol
2713  *   The next protocol in the previous item.
2714  * @param[out] error
2715  *   Pointer to error structure.
2716  *
2717  * @return
2718  *   0 on success, a negative errno value otherwise and rte_errno is set.
2719  */
2720 int
2721 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2722 			    uint64_t item_flags,
2723 			    uint8_t target_protocol,
2724 			    const struct rte_flow_item_tcp *flow_mask,
2725 			    struct rte_flow_error *error)
2726 {
2727 	const struct rte_flow_item_tcp *mask = item->mask;
2728 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2729 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2730 				      MLX5_FLOW_LAYER_OUTER_L3;
2731 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2732 				      MLX5_FLOW_LAYER_OUTER_L4;
2733 	int ret;
2734 
2735 	MLX5_ASSERT(flow_mask);
2736 	if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2737 		return rte_flow_error_set(error, EINVAL,
2738 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2739 					  "protocol filtering not compatible"
2740 					  " with TCP layer");
2741 	if (!(item_flags & l3m))
2742 		return rte_flow_error_set(error, EINVAL,
2743 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2744 					  "L3 is mandatory to filter on L4");
2745 	if (item_flags & l4m)
2746 		return rte_flow_error_set(error, EINVAL,
2747 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2748 					  "multiple L4 layers not supported");
2749 	if (!mask)
2750 		mask = &rte_flow_item_tcp_mask;
2751 	ret = mlx5_flow_item_acceptable
2752 		(item, (const uint8_t *)mask,
2753 		 (const uint8_t *)flow_mask,
2754 		 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2755 		 error);
2756 	if (ret < 0)
2757 		return ret;
2758 	return 0;
2759 }
2760 
2761 /**
2762  * Validate VXLAN item.
2763  *
2764  * @param[in] dev
2765  *   Pointer to the Ethernet device structure.
2766  * @param[in] udp_dport
2767  *   UDP destination port
2768  * @param[in] item
2769  *   Item specification.
2770  * @param[in] item_flags
2771  *   Bit-fields that holds the items detected until now.
2772  * @param[in] attr
2773  *   Flow rule attributes.
2774  * @param[out] error
2775  *   Pointer to error structure.
2776  *
2777  * @return
2778  *   0 on success, a negative errno value otherwise and rte_errno is set.
2779  */
2780 int
2781 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2782 			      uint16_t udp_dport,
2783 			      const struct rte_flow_item *item,
2784 			      uint64_t item_flags,
2785 			      const struct rte_flow_attr *attr,
2786 			      struct rte_flow_error *error)
2787 {
2788 	const struct rte_flow_item_vxlan *spec = item->spec;
2789 	const struct rte_flow_item_vxlan *mask = item->mask;
2790 	int ret;
2791 	struct mlx5_priv *priv = dev->data->dev_private;
2792 	union vni {
2793 		uint32_t vlan_id;
2794 		uint8_t vni[4];
2795 	} id = { .vlan_id = 0, };
2796 	const struct rte_flow_item_vxlan nic_mask = {
2797 		.vni = "\xff\xff\xff",
2798 		.rsvd1 = 0xff,
2799 	};
2800 	const struct rte_flow_item_vxlan *valid_mask;
2801 
2802 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2803 		return rte_flow_error_set(error, ENOTSUP,
2804 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2805 					  "multiple tunnel layers not"
2806 					  " supported");
2807 	valid_mask = &rte_flow_item_vxlan_mask;
2808 	/*
2809 	 * Verify only UDPv4 is present as defined in
2810 	 * https://tools.ietf.org/html/rfc7348
2811 	 */
2812 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2813 		return rte_flow_error_set(error, EINVAL,
2814 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2815 					  "no outer UDP layer found");
2816 	if (!mask)
2817 		mask = &rte_flow_item_vxlan_mask;
2818 
2819 	if (priv->sh->steering_format_version !=
2820 	    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2821 	    !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2822 		/* FDB domain & NIC domain non-zero group */
2823 		if ((attr->transfer || attr->group) && priv->sh->misc5_cap)
2824 			valid_mask = &nic_mask;
2825 		/* Group zero in NIC domain */
2826 		if (!attr->group && !attr->transfer &&
2827 		    priv->sh->tunnel_header_0_1)
2828 			valid_mask = &nic_mask;
2829 	}
2830 	ret = mlx5_flow_item_acceptable
2831 		(item, (const uint8_t *)mask,
2832 		 (const uint8_t *)valid_mask,
2833 		 sizeof(struct rte_flow_item_vxlan),
2834 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2835 	if (ret < 0)
2836 		return ret;
2837 	if (spec) {
2838 		memcpy(&id.vni[1], spec->vni, 3);
2839 		memcpy(&id.vni[1], mask->vni, 3);
2840 	}
2841 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2842 		return rte_flow_error_set(error, ENOTSUP,
2843 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2844 					  "VXLAN tunnel must be fully defined");
2845 	return 0;
2846 }
2847 
2848 /**
2849  * Validate VXLAN_GPE item.
2850  *
2851  * @param[in] item
2852  *   Item specification.
2853  * @param[in] item_flags
2854  *   Bit-fields that holds the items detected until now.
2855  * @param[in] priv
2856  *   Pointer to the private data structure.
2857  * @param[in] target_protocol
2858  *   The next protocol in the previous item.
2859  * @param[out] error
2860  *   Pointer to error structure.
2861  *
2862  * @return
2863  *   0 on success, a negative errno value otherwise and rte_errno is set.
2864  */
2865 int
2866 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2867 				  uint64_t item_flags,
2868 				  struct rte_eth_dev *dev,
2869 				  struct rte_flow_error *error)
2870 {
2871 	struct mlx5_priv *priv = dev->data->dev_private;
2872 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2873 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2874 	int ret;
2875 	union vni {
2876 		uint32_t vlan_id;
2877 		uint8_t vni[4];
2878 	} id = { .vlan_id = 0, };
2879 
2880 	if (!priv->sh->config.l3_vxlan_en)
2881 		return rte_flow_error_set(error, ENOTSUP,
2882 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2883 					  "L3 VXLAN is not enabled by device"
2884 					  " parameter and/or not configured in"
2885 					  " firmware");
2886 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2887 		return rte_flow_error_set(error, ENOTSUP,
2888 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2889 					  "multiple tunnel layers not"
2890 					  " supported");
2891 	/*
2892 	 * Verify only UDPv4 is present as defined in
2893 	 * https://tools.ietf.org/html/rfc7348
2894 	 */
2895 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2896 		return rte_flow_error_set(error, EINVAL,
2897 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2898 					  "no outer UDP layer found");
2899 	if (!mask)
2900 		mask = &rte_flow_item_vxlan_gpe_mask;
2901 	ret = mlx5_flow_item_acceptable
2902 		(item, (const uint8_t *)mask,
2903 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2904 		 sizeof(struct rte_flow_item_vxlan_gpe),
2905 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2906 	if (ret < 0)
2907 		return ret;
2908 	if (spec) {
2909 		if (spec->protocol)
2910 			return rte_flow_error_set(error, ENOTSUP,
2911 						  RTE_FLOW_ERROR_TYPE_ITEM,
2912 						  item,
2913 						  "VxLAN-GPE protocol"
2914 						  " not supported");
2915 		memcpy(&id.vni[1], spec->vni, 3);
2916 		memcpy(&id.vni[1], mask->vni, 3);
2917 	}
2918 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2919 		return rte_flow_error_set(error, ENOTSUP,
2920 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2921 					  "VXLAN-GPE tunnel must be fully"
2922 					  " defined");
2923 	return 0;
2924 }
2925 /**
2926  * Validate GRE Key item.
2927  *
2928  * @param[in] item
2929  *   Item specification.
2930  * @param[in] item_flags
2931  *   Bit flags to mark detected items.
2932  * @param[in] gre_item
2933  *   Pointer to gre_item
2934  * @param[out] error
2935  *   Pointer to error structure.
2936  *
2937  * @return
2938  *   0 on success, a negative errno value otherwise and rte_errno is set.
2939  */
2940 int
2941 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2942 				uint64_t item_flags,
2943 				const struct rte_flow_item *gre_item,
2944 				struct rte_flow_error *error)
2945 {
2946 	const rte_be32_t *mask = item->mask;
2947 	int ret = 0;
2948 	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2949 	const struct rte_flow_item_gre *gre_spec;
2950 	const struct rte_flow_item_gre *gre_mask;
2951 
2952 	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2953 		return rte_flow_error_set(error, ENOTSUP,
2954 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2955 					  "Multiple GRE key not support");
2956 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2957 		return rte_flow_error_set(error, ENOTSUP,
2958 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2959 					  "No preceding GRE header");
2960 	if (item_flags & MLX5_FLOW_LAYER_INNER)
2961 		return rte_flow_error_set(error, ENOTSUP,
2962 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2963 					  "GRE key following a wrong item");
2964 	gre_mask = gre_item->mask;
2965 	if (!gre_mask)
2966 		gre_mask = &rte_flow_item_gre_mask;
2967 	gre_spec = gre_item->spec;
2968 	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2969 			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2970 		return rte_flow_error_set(error, EINVAL,
2971 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2972 					  "Key bit must be on");
2973 
2974 	if (!mask)
2975 		mask = &gre_key_default_mask;
2976 	ret = mlx5_flow_item_acceptable
2977 		(item, (const uint8_t *)mask,
2978 		 (const uint8_t *)&gre_key_default_mask,
2979 		 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2980 	return ret;
2981 }
2982 
2983 /**
2984  * Validate GRE optional item.
2985  *
2986  * @param[in] dev
2987  *   Pointer to the Ethernet device structure.
2988  * @param[in] item
2989  *   Item specification.
2990  * @param[in] item_flags
2991  *   Bit flags to mark detected items.
2992  * @param[in] attr
2993  *   Flow rule attributes.
2994  * @param[in] gre_item
2995  *   Pointer to gre_item
2996  * @param[out] error
2997  *   Pointer to error structure.
2998  *
2999  * @return
3000  *   0 on success, a negative errno value otherwise and rte_errno is set.
3001  */
3002 int
3003 mlx5_flow_validate_item_gre_option(struct rte_eth_dev *dev,
3004 				   const struct rte_flow_item *item,
3005 				   uint64_t item_flags,
3006 				   const struct rte_flow_attr *attr,
3007 				   const struct rte_flow_item *gre_item,
3008 				   struct rte_flow_error *error)
3009 {
3010 	const struct rte_flow_item_gre *gre_spec = gre_item->spec;
3011 	const struct rte_flow_item_gre *gre_mask = gre_item->mask;
3012 	const struct rte_flow_item_gre_opt *spec = item->spec;
3013 	const struct rte_flow_item_gre_opt *mask = item->mask;
3014 	struct mlx5_priv *priv = dev->data->dev_private;
3015 	int ret = 0;
3016 	struct rte_flow_item_gre_opt nic_mask = {
3017 		.checksum_rsvd = {
3018 			.checksum = RTE_BE16(UINT16_MAX),
3019 			.reserved1 = 0x0,
3020 		},
3021 		.key = {
3022 			.key = RTE_BE32(UINT32_MAX),
3023 		},
3024 		.sequence = {
3025 			.sequence = RTE_BE32(UINT32_MAX),
3026 		},
3027 	};
3028 
3029 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3030 		return rte_flow_error_set(error, ENOTSUP,
3031 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3032 					  "No preceding GRE header");
3033 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3034 		return rte_flow_error_set(error, ENOTSUP,
3035 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3036 					  "GRE option following a wrong item");
3037 	if (!spec || !mask)
3038 		return rte_flow_error_set(error, EINVAL,
3039 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3040 					  "At least one field gre_option(checksum/key/sequence) must be specified");
3041 	if (!gre_mask)
3042 		gre_mask = &rte_flow_item_gre_mask;
3043 	if (mask->checksum_rsvd.checksum)
3044 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x8000)) &&
3045 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x8000)))
3046 			return rte_flow_error_set(error, EINVAL,
3047 						  RTE_FLOW_ERROR_TYPE_ITEM,
3048 						  item,
3049 						  "Checksum bit must be on");
3050 	if (mask->key.key)
3051 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3052 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3053 			return rte_flow_error_set(error, EINVAL,
3054 						  RTE_FLOW_ERROR_TYPE_ITEM,
3055 						  item, "Key bit must be on");
3056 	if (mask->sequence.sequence)
3057 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x1000)) &&
3058 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x1000)))
3059 			return rte_flow_error_set(error, EINVAL,
3060 						  RTE_FLOW_ERROR_TYPE_ITEM,
3061 						  item,
3062 						  "Sequence bit must be on");
3063 	if (mask->checksum_rsvd.checksum || mask->sequence.sequence) {
3064 		if (priv->sh->steering_format_version ==
3065 		    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3066 		    ((attr->group || attr->transfer) &&
3067 		     !priv->sh->misc5_cap) ||
3068 		    (!(priv->sh->tunnel_header_0_1 &&
3069 		       priv->sh->tunnel_header_2_3) &&
3070 		    !attr->group && !attr->transfer))
3071 			return rte_flow_error_set(error, EINVAL,
3072 						  RTE_FLOW_ERROR_TYPE_ITEM,
3073 						  item,
3074 						  "Checksum/Sequence not supported");
3075 	}
3076 	ret = mlx5_flow_item_acceptable
3077 		(item, (const uint8_t *)mask,
3078 		 (const uint8_t *)&nic_mask,
3079 		 sizeof(struct rte_flow_item_gre_opt),
3080 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3081 	return ret;
3082 }
3083 
3084 /**
3085  * Validate GRE item.
3086  *
3087  * @param[in] item
3088  *   Item specification.
3089  * @param[in] item_flags
3090  *   Bit flags to mark detected items.
3091  * @param[in] target_protocol
3092  *   The next protocol in the previous item.
3093  * @param[out] error
3094  *   Pointer to error structure.
3095  *
3096  * @return
3097  *   0 on success, a negative errno value otherwise and rte_errno is set.
3098  */
3099 int
3100 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3101 			    uint64_t item_flags,
3102 			    uint8_t target_protocol,
3103 			    struct rte_flow_error *error)
3104 {
3105 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3106 	const struct rte_flow_item_gre *mask = item->mask;
3107 	int ret;
3108 	const struct rte_flow_item_gre nic_mask = {
3109 		.c_rsvd0_ver = RTE_BE16(0xB000),
3110 		.protocol = RTE_BE16(UINT16_MAX),
3111 	};
3112 
3113 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3114 		return rte_flow_error_set(error, EINVAL,
3115 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3116 					  "protocol filtering not compatible"
3117 					  " with this GRE layer");
3118 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3119 		return rte_flow_error_set(error, ENOTSUP,
3120 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3121 					  "multiple tunnel layers not"
3122 					  " supported");
3123 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3124 		return rte_flow_error_set(error, ENOTSUP,
3125 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3126 					  "L3 Layer is missing");
3127 	if (!mask)
3128 		mask = &rte_flow_item_gre_mask;
3129 	ret = mlx5_flow_item_acceptable
3130 		(item, (const uint8_t *)mask,
3131 		 (const uint8_t *)&nic_mask,
3132 		 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3133 		 error);
3134 	if (ret < 0)
3135 		return ret;
3136 #ifndef HAVE_MLX5DV_DR
3137 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3138 	if (spec && (spec->protocol & mask->protocol))
3139 		return rte_flow_error_set(error, ENOTSUP,
3140 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3141 					  "without MPLS support the"
3142 					  " specification cannot be used for"
3143 					  " filtering");
3144 #endif
3145 #endif
3146 	return 0;
3147 }
3148 
3149 /**
3150  * Validate Geneve item.
3151  *
3152  * @param[in] item
3153  *   Item specification.
3154  * @param[in] itemFlags
3155  *   Bit-fields that holds the items detected until now.
3156  * @param[in] enPriv
3157  *   Pointer to the private data structure.
3158  * @param[out] error
3159  *   Pointer to error structure.
3160  *
3161  * @return
3162  *   0 on success, a negative errno value otherwise and rte_errno is set.
3163  */
3164 
3165 int
3166 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
3167 			       uint64_t item_flags,
3168 			       struct rte_eth_dev *dev,
3169 			       struct rte_flow_error *error)
3170 {
3171 	struct mlx5_priv *priv = dev->data->dev_private;
3172 	const struct rte_flow_item_geneve *spec = item->spec;
3173 	const struct rte_flow_item_geneve *mask = item->mask;
3174 	int ret;
3175 	uint16_t gbhdr;
3176 	uint8_t opt_len = priv->sh->cdev->config.hca_attr.geneve_max_opt_len ?
3177 			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
3178 	const struct rte_flow_item_geneve nic_mask = {
3179 		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
3180 		.vni = "\xff\xff\xff",
3181 		.protocol = RTE_BE16(UINT16_MAX),
3182 	};
3183 
3184 	if (!priv->sh->cdev->config.hca_attr.tunnel_stateless_geneve_rx)
3185 		return rte_flow_error_set(error, ENOTSUP,
3186 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3187 					  "L3 Geneve is not enabled by device"
3188 					  " parameter and/or not configured in"
3189 					  " firmware");
3190 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3191 		return rte_flow_error_set(error, ENOTSUP,
3192 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3193 					  "multiple tunnel layers not"
3194 					  " supported");
3195 	/*
3196 	 * Verify only UDPv4 is present as defined in
3197 	 * https://tools.ietf.org/html/rfc7348
3198 	 */
3199 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3200 		return rte_flow_error_set(error, EINVAL,
3201 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3202 					  "no outer UDP layer found");
3203 	if (!mask)
3204 		mask = &rte_flow_item_geneve_mask;
3205 	ret = mlx5_flow_item_acceptable
3206 				  (item, (const uint8_t *)mask,
3207 				   (const uint8_t *)&nic_mask,
3208 				   sizeof(struct rte_flow_item_geneve),
3209 				   MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3210 	if (ret)
3211 		return ret;
3212 	if (spec) {
3213 		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
3214 		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
3215 		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
3216 		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
3217 			return rte_flow_error_set(error, ENOTSUP,
3218 						  RTE_FLOW_ERROR_TYPE_ITEM,
3219 						  item,
3220 						  "Geneve protocol unsupported"
3221 						  " fields are being used");
3222 		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
3223 			return rte_flow_error_set
3224 					(error, ENOTSUP,
3225 					 RTE_FLOW_ERROR_TYPE_ITEM,
3226 					 item,
3227 					 "Unsupported Geneve options length");
3228 	}
3229 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3230 		return rte_flow_error_set
3231 				    (error, ENOTSUP,
3232 				     RTE_FLOW_ERROR_TYPE_ITEM, item,
3233 				     "Geneve tunnel must be fully defined");
3234 	return 0;
3235 }
3236 
3237 /**
3238  * Validate Geneve TLV option item.
3239  *
3240  * @param[in] item
3241  *   Item specification.
3242  * @param[in] last_item
3243  *   Previous validated item in the pattern items.
3244  * @param[in] geneve_item
3245  *   Previous GENEVE item specification.
3246  * @param[in] dev
3247  *   Pointer to the rte_eth_dev structure.
3248  * @param[out] error
3249  *   Pointer to error structure.
3250  *
3251  * @return
3252  *   0 on success, a negative errno value otherwise and rte_errno is set.
3253  */
3254 int
3255 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
3256 				   uint64_t last_item,
3257 				   const struct rte_flow_item *geneve_item,
3258 				   struct rte_eth_dev *dev,
3259 				   struct rte_flow_error *error)
3260 {
3261 	struct mlx5_priv *priv = dev->data->dev_private;
3262 	struct mlx5_dev_ctx_shared *sh = priv->sh;
3263 	struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
3264 	struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
3265 	uint8_t data_max_supported =
3266 			hca_attr->max_geneve_tlv_option_data_len * 4;
3267 	const struct rte_flow_item_geneve *geneve_spec;
3268 	const struct rte_flow_item_geneve *geneve_mask;
3269 	const struct rte_flow_item_geneve_opt *spec = item->spec;
3270 	const struct rte_flow_item_geneve_opt *mask = item->mask;
3271 	unsigned int i;
3272 	unsigned int data_len;
3273 	uint8_t tlv_option_len;
3274 	uint16_t optlen_m, optlen_v;
3275 	const struct rte_flow_item_geneve_opt full_mask = {
3276 		.option_class = RTE_BE16(0xffff),
3277 		.option_type = 0xff,
3278 		.option_len = 0x1f,
3279 	};
3280 
3281 	if (!mask)
3282 		mask = &rte_flow_item_geneve_opt_mask;
3283 	if (!spec)
3284 		return rte_flow_error_set
3285 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3286 			"Geneve TLV opt class/type/length must be specified");
3287 	if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
3288 		return rte_flow_error_set
3289 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3290 			"Geneve TLV opt length exceeds the limit (31)");
3291 	/* Check if class type and length masks are full. */
3292 	if (full_mask.option_class != mask->option_class ||
3293 	    full_mask.option_type != mask->option_type ||
3294 	    full_mask.option_len != (mask->option_len & full_mask.option_len))
3295 		return rte_flow_error_set
3296 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3297 			"Geneve TLV opt class/type/length masks must be full");
3298 	/* Check if length is supported */
3299 	if ((uint32_t)spec->option_len >
3300 			hca_attr->max_geneve_tlv_option_data_len)
3301 		return rte_flow_error_set
3302 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3303 			"Geneve TLV opt length not supported");
3304 	if (hca_attr->max_geneve_tlv_options > 1)
3305 		DRV_LOG(DEBUG,
3306 			"max_geneve_tlv_options supports more than 1 option");
3307 	/* Check GENEVE item preceding. */
3308 	if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
3309 		return rte_flow_error_set
3310 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3311 			"Geneve opt item must be preceded with Geneve item");
3312 	geneve_spec = geneve_item->spec;
3313 	geneve_mask = geneve_item->mask ? geneve_item->mask :
3314 					  &rte_flow_item_geneve_mask;
3315 	/* Check if GENEVE TLV option size doesn't exceed option length */
3316 	if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
3317 			    geneve_spec->ver_opt_len_o_c_rsvd0)) {
3318 		tlv_option_len = spec->option_len & mask->option_len;
3319 		optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
3320 		optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
3321 		optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
3322 		optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
3323 		if ((optlen_v & optlen_m) <= tlv_option_len)
3324 			return rte_flow_error_set
3325 				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3326 				 "GENEVE TLV option length exceeds optlen");
3327 	}
3328 	/* Check if length is 0 or data is 0. */
3329 	if (spec->data == NULL || spec->option_len == 0)
3330 		return rte_flow_error_set
3331 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3332 			"Geneve TLV opt with zero data/length not supported");
3333 	/* Check not all data & mask are 0. */
3334 	data_len = spec->option_len * 4;
3335 	if (mask->data == NULL) {
3336 		for (i = 0; i < data_len; i++)
3337 			if (spec->data[i])
3338 				break;
3339 		if (i == data_len)
3340 			return rte_flow_error_set(error, ENOTSUP,
3341 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3342 				"Can't match on Geneve option data 0");
3343 	} else {
3344 		for (i = 0; i < data_len; i++)
3345 			if (spec->data[i] & mask->data[i])
3346 				break;
3347 		if (i == data_len)
3348 			return rte_flow_error_set(error, ENOTSUP,
3349 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3350 				"Can't match on Geneve option data and mask 0");
3351 		/* Check data mask supported. */
3352 		for (i = data_max_supported; i < data_len ; i++)
3353 			if (mask->data[i])
3354 				return rte_flow_error_set(error, ENOTSUP,
3355 					RTE_FLOW_ERROR_TYPE_ITEM, item,
3356 					"Data mask is of unsupported size");
3357 	}
3358 	/* Check GENEVE option is supported in NIC. */
3359 	if (!hca_attr->geneve_tlv_opt)
3360 		return rte_flow_error_set
3361 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3362 			"Geneve TLV opt not supported");
3363 	/* Check if we already have geneve option with different type/class. */
3364 	rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3365 	geneve_opt_resource = sh->geneve_tlv_option_resource;
3366 	if (geneve_opt_resource != NULL)
3367 		if (geneve_opt_resource->option_class != spec->option_class ||
3368 		    geneve_opt_resource->option_type != spec->option_type ||
3369 		    geneve_opt_resource->length != spec->option_len) {
3370 			rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3371 			return rte_flow_error_set(error, ENOTSUP,
3372 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3373 				"Only one Geneve TLV option supported");
3374 		}
3375 	rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3376 	return 0;
3377 }
3378 
3379 /**
3380  * Validate MPLS item.
3381  *
3382  * @param[in] dev
3383  *   Pointer to the rte_eth_dev structure.
3384  * @param[in] item
3385  *   Item specification.
3386  * @param[in] item_flags
3387  *   Bit-fields that holds the items detected until now.
3388  * @param[in] prev_layer
3389  *   The protocol layer indicated in previous item.
3390  * @param[out] error
3391  *   Pointer to error structure.
3392  *
3393  * @return
3394  *   0 on success, a negative errno value otherwise and rte_errno is set.
3395  */
3396 int
3397 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3398 			     const struct rte_flow_item *item __rte_unused,
3399 			     uint64_t item_flags __rte_unused,
3400 			     uint64_t prev_layer __rte_unused,
3401 			     struct rte_flow_error *error)
3402 {
3403 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3404 	const struct rte_flow_item_mpls *mask = item->mask;
3405 	struct mlx5_priv *priv = dev->data->dev_private;
3406 	int ret;
3407 
3408 	if (!priv->sh->dev_cap.mpls_en)
3409 		return rte_flow_error_set(error, ENOTSUP,
3410 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3411 					  "MPLS not supported or"
3412 					  " disabled in firmware"
3413 					  " configuration.");
3414 	/* MPLS over UDP, GRE is allowed */
3415 	if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3416 			    MLX5_FLOW_LAYER_GRE |
3417 			    MLX5_FLOW_LAYER_GRE_KEY)))
3418 		return rte_flow_error_set(error, EINVAL,
3419 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3420 					  "protocol filtering not compatible"
3421 					  " with MPLS layer");
3422 	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3423 	if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3424 	    !(item_flags & MLX5_FLOW_LAYER_GRE))
3425 		return rte_flow_error_set(error, ENOTSUP,
3426 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3427 					  "multiple tunnel layers not"
3428 					  " supported");
3429 	if (!mask)
3430 		mask = &rte_flow_item_mpls_mask;
3431 	ret = mlx5_flow_item_acceptable
3432 		(item, (const uint8_t *)mask,
3433 		 (const uint8_t *)&rte_flow_item_mpls_mask,
3434 		 sizeof(struct rte_flow_item_mpls),
3435 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3436 	if (ret < 0)
3437 		return ret;
3438 	return 0;
3439 #else
3440 	return rte_flow_error_set(error, ENOTSUP,
3441 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
3442 				  "MPLS is not supported by Verbs, please"
3443 				  " update.");
3444 #endif
3445 }
3446 
3447 /**
3448  * Validate NVGRE item.
3449  *
3450  * @param[in] item
3451  *   Item specification.
3452  * @param[in] item_flags
3453  *   Bit flags to mark detected items.
3454  * @param[in] target_protocol
3455  *   The next protocol in the previous item.
3456  * @param[out] error
3457  *   Pointer to error structure.
3458  *
3459  * @return
3460  *   0 on success, a negative errno value otherwise and rte_errno is set.
3461  */
3462 int
3463 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3464 			      uint64_t item_flags,
3465 			      uint8_t target_protocol,
3466 			      struct rte_flow_error *error)
3467 {
3468 	const struct rte_flow_item_nvgre *mask = item->mask;
3469 	int ret;
3470 
3471 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3472 		return rte_flow_error_set(error, EINVAL,
3473 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3474 					  "protocol filtering not compatible"
3475 					  " with this GRE layer");
3476 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3477 		return rte_flow_error_set(error, ENOTSUP,
3478 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3479 					  "multiple tunnel layers not"
3480 					  " supported");
3481 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3482 		return rte_flow_error_set(error, ENOTSUP,
3483 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3484 					  "L3 Layer is missing");
3485 	if (!mask)
3486 		mask = &rte_flow_item_nvgre_mask;
3487 	ret = mlx5_flow_item_acceptable
3488 		(item, (const uint8_t *)mask,
3489 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
3490 		 sizeof(struct rte_flow_item_nvgre),
3491 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3492 	if (ret < 0)
3493 		return ret;
3494 	return 0;
3495 }
3496 
3497 /**
3498  * Validate eCPRI item.
3499  *
3500  * @param[in] item
3501  *   Item specification.
3502  * @param[in] item_flags
3503  *   Bit-fields that holds the items detected until now.
3504  * @param[in] last_item
3505  *   Previous validated item in the pattern items.
3506  * @param[in] ether_type
3507  *   Type in the ethernet layer header (including dot1q).
3508  * @param[in] acc_mask
3509  *   Acceptable mask, if NULL default internal default mask
3510  *   will be used to check whether item fields are supported.
3511  * @param[out] error
3512  *   Pointer to error structure.
3513  *
3514  * @return
3515  *   0 on success, a negative errno value otherwise and rte_errno is set.
3516  */
3517 int
3518 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3519 			      uint64_t item_flags,
3520 			      uint64_t last_item,
3521 			      uint16_t ether_type,
3522 			      const struct rte_flow_item_ecpri *acc_mask,
3523 			      struct rte_flow_error *error)
3524 {
3525 	const struct rte_flow_item_ecpri *mask = item->mask;
3526 	const struct rte_flow_item_ecpri nic_mask = {
3527 		.hdr = {
3528 			.common = {
3529 				.u32 =
3530 				RTE_BE32(((const struct rte_ecpri_common_hdr) {
3531 					.type = 0xFF,
3532 					}).u32),
3533 			},
3534 			.dummy[0] = 0xFFFFFFFF,
3535 		},
3536 	};
3537 	const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3538 					MLX5_FLOW_LAYER_OUTER_VLAN);
3539 	struct rte_flow_item_ecpri mask_lo;
3540 
3541 	if (!(last_item & outer_l2_vlan) &&
3542 	    last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3543 		return rte_flow_error_set(error, EINVAL,
3544 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3545 					  "eCPRI can only follow L2/VLAN layer or UDP layer");
3546 	if ((last_item & outer_l2_vlan) && ether_type &&
3547 	    ether_type != RTE_ETHER_TYPE_ECPRI)
3548 		return rte_flow_error_set(error, EINVAL,
3549 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3550 					  "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3551 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3552 		return rte_flow_error_set(error, EINVAL,
3553 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3554 					  "eCPRI with tunnel is not supported right now");
3555 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3556 		return rte_flow_error_set(error, ENOTSUP,
3557 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3558 					  "multiple L3 layers not supported");
3559 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3560 		return rte_flow_error_set(error, EINVAL,
3561 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3562 					  "eCPRI cannot coexist with a TCP layer");
3563 	/* In specification, eCPRI could be over UDP layer. */
3564 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3565 		return rte_flow_error_set(error, EINVAL,
3566 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3567 					  "eCPRI over UDP layer is not yet supported right now");
3568 	/* Mask for type field in common header could be zero. */
3569 	if (!mask)
3570 		mask = &rte_flow_item_ecpri_mask;
3571 	mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3572 	/* Input mask is in big-endian format. */
3573 	if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3574 		return rte_flow_error_set(error, EINVAL,
3575 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3576 					  "partial mask is not supported for protocol");
3577 	else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3578 		return rte_flow_error_set(error, EINVAL,
3579 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3580 					  "message header mask must be after a type mask");
3581 	return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3582 					 acc_mask ? (const uint8_t *)acc_mask
3583 						  : (const uint8_t *)&nic_mask,
3584 					 sizeof(struct rte_flow_item_ecpri),
3585 					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3586 }
3587 
3588 static int
3589 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3590 		   const struct rte_flow_attr *attr __rte_unused,
3591 		   const struct rte_flow_item items[] __rte_unused,
3592 		   const struct rte_flow_action actions[] __rte_unused,
3593 		   bool external __rte_unused,
3594 		   int hairpin __rte_unused,
3595 		   struct rte_flow_error *error)
3596 {
3597 	return rte_flow_error_set(error, ENOTSUP,
3598 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3599 }
3600 
3601 static struct mlx5_flow *
3602 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3603 		  const struct rte_flow_attr *attr __rte_unused,
3604 		  const struct rte_flow_item items[] __rte_unused,
3605 		  const struct rte_flow_action actions[] __rte_unused,
3606 		  struct rte_flow_error *error)
3607 {
3608 	rte_flow_error_set(error, ENOTSUP,
3609 			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3610 	return NULL;
3611 }
3612 
3613 static int
3614 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3615 		    struct mlx5_flow *dev_flow __rte_unused,
3616 		    const struct rte_flow_attr *attr __rte_unused,
3617 		    const struct rte_flow_item items[] __rte_unused,
3618 		    const struct rte_flow_action actions[] __rte_unused,
3619 		    struct rte_flow_error *error)
3620 {
3621 	return rte_flow_error_set(error, ENOTSUP,
3622 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3623 }
3624 
3625 static int
3626 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3627 		struct rte_flow *flow __rte_unused,
3628 		struct rte_flow_error *error)
3629 {
3630 	return rte_flow_error_set(error, ENOTSUP,
3631 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3632 }
3633 
3634 static void
3635 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3636 		 struct rte_flow *flow __rte_unused)
3637 {
3638 }
3639 
3640 static void
3641 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3642 		  struct rte_flow *flow __rte_unused)
3643 {
3644 }
3645 
3646 static int
3647 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3648 		struct rte_flow *flow __rte_unused,
3649 		const struct rte_flow_action *actions __rte_unused,
3650 		void *data __rte_unused,
3651 		struct rte_flow_error *error)
3652 {
3653 	return rte_flow_error_set(error, ENOTSUP,
3654 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3655 }
3656 
3657 static int
3658 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3659 		      uint32_t domains __rte_unused,
3660 		      uint32_t flags __rte_unused)
3661 {
3662 	return 0;
3663 }
3664 
3665 /* Void driver to protect from null pointer reference. */
3666 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3667 	.validate = flow_null_validate,
3668 	.prepare = flow_null_prepare,
3669 	.translate = flow_null_translate,
3670 	.apply = flow_null_apply,
3671 	.remove = flow_null_remove,
3672 	.destroy = flow_null_destroy,
3673 	.query = flow_null_query,
3674 	.sync_domain = flow_null_sync_domain,
3675 };
3676 
3677 /**
3678  * Select flow driver type according to flow attributes and device
3679  * configuration.
3680  *
3681  * @param[in] dev
3682  *   Pointer to the dev structure.
3683  * @param[in] attr
3684  *   Pointer to the flow attributes.
3685  *
3686  * @return
3687  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3688  */
3689 static enum mlx5_flow_drv_type
3690 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3691 {
3692 	struct mlx5_priv *priv = dev->data->dev_private;
3693 	/* The OS can determine first a specific flow type (DV, VERBS) */
3694 	enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3695 
3696 	if (type != MLX5_FLOW_TYPE_MAX)
3697 		return type;
3698 	/*
3699 	 * Currently when dv_flow_en == 2, only HW steering engine is
3700 	 * supported. New engines can also be chosen here if ready.
3701 	 */
3702 	if (priv->sh->config.dv_flow_en == 2)
3703 		return MLX5_FLOW_TYPE_HW;
3704 	/* If no OS specific type - continue with DV/VERBS selection */
3705 	if (attr->transfer && priv->sh->config.dv_esw_en)
3706 		type = MLX5_FLOW_TYPE_DV;
3707 	if (!attr->transfer)
3708 		type = priv->sh->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3709 						     MLX5_FLOW_TYPE_VERBS;
3710 	return type;
3711 }
3712 
3713 #define flow_get_drv_ops(type) flow_drv_ops[type]
3714 
3715 /**
3716  * Flow driver validation API. This abstracts calling driver specific functions.
3717  * The type of flow driver is determined according to flow attributes.
3718  *
3719  * @param[in] dev
3720  *   Pointer to the dev structure.
3721  * @param[in] attr
3722  *   Pointer to the flow attributes.
3723  * @param[in] items
3724  *   Pointer to the list of items.
3725  * @param[in] actions
3726  *   Pointer to the list of actions.
3727  * @param[in] external
3728  *   This flow rule is created by request external to PMD.
3729  * @param[in] hairpin
3730  *   Number of hairpin TX actions, 0 means classic flow.
3731  * @param[out] error
3732  *   Pointer to the error structure.
3733  *
3734  * @return
3735  *   0 on success, a negative errno value otherwise and rte_errno is set.
3736  */
3737 static inline int
3738 flow_drv_validate(struct rte_eth_dev *dev,
3739 		  const struct rte_flow_attr *attr,
3740 		  const struct rte_flow_item items[],
3741 		  const struct rte_flow_action actions[],
3742 		  bool external, int hairpin, struct rte_flow_error *error)
3743 {
3744 	const struct mlx5_flow_driver_ops *fops;
3745 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3746 
3747 	fops = flow_get_drv_ops(type);
3748 	return fops->validate(dev, attr, items, actions, external,
3749 			      hairpin, error);
3750 }
3751 
3752 /**
3753  * Flow driver preparation API. This abstracts calling driver specific
3754  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3755  * calculates the size of memory required for device flow, allocates the memory,
3756  * initializes the device flow and returns the pointer.
3757  *
3758  * @note
3759  *   This function initializes device flow structure such as dv or verbs in
3760  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3761  *   rest. For example, adding returning device flow to flow->dev_flow list and
3762  *   setting backward reference to the flow should be done out of this function.
3763  *   layers field is not filled either.
3764  *
3765  * @param[in] dev
3766  *   Pointer to the dev structure.
3767  * @param[in] attr
3768  *   Pointer to the flow attributes.
3769  * @param[in] items
3770  *   Pointer to the list of items.
3771  * @param[in] actions
3772  *   Pointer to the list of actions.
3773  * @param[in] flow_idx
3774  *   This memory pool index to the flow.
3775  * @param[out] error
3776  *   Pointer to the error structure.
3777  *
3778  * @return
3779  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3780  */
3781 static inline struct mlx5_flow *
3782 flow_drv_prepare(struct rte_eth_dev *dev,
3783 		 const struct rte_flow *flow,
3784 		 const struct rte_flow_attr *attr,
3785 		 const struct rte_flow_item items[],
3786 		 const struct rte_flow_action actions[],
3787 		 uint32_t flow_idx,
3788 		 struct rte_flow_error *error)
3789 {
3790 	const struct mlx5_flow_driver_ops *fops;
3791 	enum mlx5_flow_drv_type type = flow->drv_type;
3792 	struct mlx5_flow *mlx5_flow = NULL;
3793 
3794 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3795 	fops = flow_get_drv_ops(type);
3796 	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3797 	if (mlx5_flow)
3798 		mlx5_flow->flow_idx = flow_idx;
3799 	return mlx5_flow;
3800 }
3801 
3802 /**
3803  * Flow driver translation API. This abstracts calling driver specific
3804  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3805  * translates a generic flow into a driver flow. flow_drv_prepare() must
3806  * precede.
3807  *
3808  * @note
3809  *   dev_flow->layers could be filled as a result of parsing during translation
3810  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3811  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3812  *   flow->actions could be overwritten even though all the expanded dev_flows
3813  *   have the same actions.
3814  *
3815  * @param[in] dev
3816  *   Pointer to the rte dev structure.
3817  * @param[in, out] dev_flow
3818  *   Pointer to the mlx5 flow.
3819  * @param[in] attr
3820  *   Pointer to the flow attributes.
3821  * @param[in] items
3822  *   Pointer to the list of items.
3823  * @param[in] actions
3824  *   Pointer to the list of actions.
3825  * @param[out] error
3826  *   Pointer to the error structure.
3827  *
3828  * @return
3829  *   0 on success, a negative errno value otherwise and rte_errno is set.
3830  */
3831 static inline int
3832 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3833 		   const struct rte_flow_attr *attr,
3834 		   const struct rte_flow_item items[],
3835 		   const struct rte_flow_action actions[],
3836 		   struct rte_flow_error *error)
3837 {
3838 	const struct mlx5_flow_driver_ops *fops;
3839 	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3840 
3841 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3842 	fops = flow_get_drv_ops(type);
3843 	return fops->translate(dev, dev_flow, attr, items, actions, error);
3844 }
3845 
3846 /**
3847  * Flow driver apply API. This abstracts calling driver specific functions.
3848  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3849  * translated driver flows on to device. flow_drv_translate() must precede.
3850  *
3851  * @param[in] dev
3852  *   Pointer to Ethernet device structure.
3853  * @param[in, out] flow
3854  *   Pointer to flow structure.
3855  * @param[out] error
3856  *   Pointer to error structure.
3857  *
3858  * @return
3859  *   0 on success, a negative errno value otherwise and rte_errno is set.
3860  */
3861 static inline int
3862 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3863 	       struct rte_flow_error *error)
3864 {
3865 	const struct mlx5_flow_driver_ops *fops;
3866 	enum mlx5_flow_drv_type type = flow->drv_type;
3867 
3868 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3869 	fops = flow_get_drv_ops(type);
3870 	return fops->apply(dev, flow, error);
3871 }
3872 
3873 /**
3874  * Flow driver destroy API. This abstracts calling driver specific functions.
3875  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3876  * on device and releases resources of the flow.
3877  *
3878  * @param[in] dev
3879  *   Pointer to Ethernet device.
3880  * @param[in, out] flow
3881  *   Pointer to flow structure.
3882  */
3883 static inline void
3884 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3885 {
3886 	const struct mlx5_flow_driver_ops *fops;
3887 	enum mlx5_flow_drv_type type = flow->drv_type;
3888 
3889 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3890 	fops = flow_get_drv_ops(type);
3891 	fops->destroy(dev, flow);
3892 }
3893 
3894 /**
3895  * Flow driver find RSS policy tbl API. This abstracts calling driver
3896  * specific functions. Parent flow (rte_flow) should have driver
3897  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3898  *
3899  * @param[in] dev
3900  *   Pointer to Ethernet device.
3901  * @param[in, out] flow
3902  *   Pointer to flow structure.
3903  * @param[in] policy
3904  *   Pointer to meter policy table.
3905  * @param[in] rss_desc
3906  *   Pointer to rss_desc
3907  */
3908 static struct mlx5_flow_meter_sub_policy *
3909 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3910 		struct rte_flow *flow,
3911 		struct mlx5_flow_meter_policy *policy,
3912 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3913 {
3914 	const struct mlx5_flow_driver_ops *fops;
3915 	enum mlx5_flow_drv_type type = flow->drv_type;
3916 
3917 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3918 	fops = flow_get_drv_ops(type);
3919 	return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3920 }
3921 
3922 /**
3923  * Flow driver color tag rule API. This abstracts calling driver
3924  * specific functions. Parent flow (rte_flow) should have driver
3925  * type (drv_type). It will create the color tag rules in hierarchy meter.
3926  *
3927  * @param[in] dev
3928  *   Pointer to Ethernet device.
3929  * @param[in, out] flow
3930  *   Pointer to flow structure.
3931  * @param[in] fm
3932  *   Pointer to flow meter structure.
3933  * @param[in] src_port
3934  *   The src port this extra rule should use.
3935  * @param[in] item
3936  *   The src port id match item.
3937  * @param[out] error
3938  *   Pointer to error structure.
3939  */
3940 static int
3941 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
3942 		struct rte_flow *flow,
3943 		struct mlx5_flow_meter_info *fm,
3944 		int32_t src_port,
3945 		const struct rte_flow_item *item,
3946 		struct rte_flow_error *error)
3947 {
3948 	const struct mlx5_flow_driver_ops *fops;
3949 	enum mlx5_flow_drv_type type = flow->drv_type;
3950 
3951 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3952 	fops = flow_get_drv_ops(type);
3953 	return fops->meter_hierarchy_rule_create(dev, fm,
3954 						src_port, item, error);
3955 }
3956 
3957 /**
3958  * Get RSS action from the action list.
3959  *
3960  * @param[in] dev
3961  *   Pointer to Ethernet device.
3962  * @param[in] actions
3963  *   Pointer to the list of actions.
3964  * @param[in] flow
3965  *   Parent flow structure pointer.
3966  *
3967  * @return
3968  *   Pointer to the RSS action if exist, else return NULL.
3969  */
3970 static const struct rte_flow_action_rss*
3971 flow_get_rss_action(struct rte_eth_dev *dev,
3972 		    const struct rte_flow_action actions[])
3973 {
3974 	struct mlx5_priv *priv = dev->data->dev_private;
3975 	const struct rte_flow_action_rss *rss = NULL;
3976 	struct mlx5_meter_policy_action_container *acg;
3977 	struct mlx5_meter_policy_action_container *acy;
3978 
3979 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3980 		switch (actions->type) {
3981 		case RTE_FLOW_ACTION_TYPE_RSS:
3982 			rss = actions->conf;
3983 			break;
3984 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
3985 		{
3986 			const struct rte_flow_action_sample *sample =
3987 								actions->conf;
3988 			const struct rte_flow_action *act = sample->actions;
3989 			for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3990 				if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3991 					rss = act->conf;
3992 			break;
3993 		}
3994 		case RTE_FLOW_ACTION_TYPE_METER:
3995 		{
3996 			uint32_t mtr_idx;
3997 			struct mlx5_flow_meter_info *fm;
3998 			struct mlx5_flow_meter_policy *policy;
3999 			const struct rte_flow_action_meter *mtr = actions->conf;
4000 
4001 			fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
4002 			if (fm && !fm->def_policy) {
4003 				policy = mlx5_flow_meter_policy_find(dev,
4004 						fm->policy_id, NULL);
4005 				MLX5_ASSERT(policy);
4006 				if (policy->is_hierarchy) {
4007 					policy =
4008 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
4009 									policy);
4010 					if (!policy)
4011 						return NULL;
4012 				}
4013 				if (policy->is_rss) {
4014 					acg =
4015 					&policy->act_cnt[RTE_COLOR_GREEN];
4016 					acy =
4017 					&policy->act_cnt[RTE_COLOR_YELLOW];
4018 					if (acg->fate_action ==
4019 					    MLX5_FLOW_FATE_SHARED_RSS)
4020 						rss = acg->rss->conf;
4021 					else if (acy->fate_action ==
4022 						 MLX5_FLOW_FATE_SHARED_RSS)
4023 						rss = acy->rss->conf;
4024 				}
4025 			}
4026 			break;
4027 		}
4028 		default:
4029 			break;
4030 		}
4031 	}
4032 	return rss;
4033 }
4034 
4035 /**
4036  * Get ASO age action by index.
4037  *
4038  * @param[in] dev
4039  *   Pointer to the Ethernet device structure.
4040  * @param[in] age_idx
4041  *   Index to the ASO age action.
4042  *
4043  * @return
4044  *   The specified ASO age action.
4045  */
4046 struct mlx5_aso_age_action*
4047 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
4048 {
4049 	uint16_t pool_idx = age_idx & UINT16_MAX;
4050 	uint16_t offset = (age_idx >> 16) & UINT16_MAX;
4051 	struct mlx5_priv *priv = dev->data->dev_private;
4052 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
4053 	struct mlx5_aso_age_pool *pool;
4054 
4055 	rte_rwlock_read_lock(&mng->resize_rwl);
4056 	pool = mng->pools[pool_idx];
4057 	rte_rwlock_read_unlock(&mng->resize_rwl);
4058 	return &pool->actions[offset - 1];
4059 }
4060 
4061 /* maps indirect action to translated direct in some actions array */
4062 struct mlx5_translated_action_handle {
4063 	struct rte_flow_action_handle *action; /**< Indirect action handle. */
4064 	int index; /**< Index in related array of rte_flow_action. */
4065 };
4066 
4067 /**
4068  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
4069  * direct action if translation possible.
4070  * This functionality used to run same execution path for both direct and
4071  * indirect actions on flow create. All necessary preparations for indirect
4072  * action handling should be performed on *handle* actions list returned
4073  * from this call.
4074  *
4075  * @param[in] dev
4076  *   Pointer to Ethernet device.
4077  * @param[in] actions
4078  *   List of actions to translate.
4079  * @param[out] handle
4080  *   List to store translated indirect action object handles.
4081  * @param[in, out] indir_n
4082  *   Size of *handle* array. On return should be updated with number of
4083  *   indirect actions retrieved from the *actions* list.
4084  * @param[out] translated_actions
4085  *   List of actions where all indirect actions were translated to direct
4086  *   if possible. NULL if no translation took place.
4087  * @param[out] error
4088  *   Pointer to the error structure.
4089  *
4090  * @return
4091  *   0 on success, a negative errno value otherwise and rte_errno is set.
4092  */
4093 static int
4094 flow_action_handles_translate(struct rte_eth_dev *dev,
4095 			      const struct rte_flow_action actions[],
4096 			      struct mlx5_translated_action_handle *handle,
4097 			      int *indir_n,
4098 			      struct rte_flow_action **translated_actions,
4099 			      struct rte_flow_error *error)
4100 {
4101 	struct mlx5_priv *priv = dev->data->dev_private;
4102 	struct rte_flow_action *translated = NULL;
4103 	size_t actions_size;
4104 	int n;
4105 	int copied_n = 0;
4106 	struct mlx5_translated_action_handle *handle_end = NULL;
4107 
4108 	for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
4109 		if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
4110 			continue;
4111 		if (copied_n == *indir_n) {
4112 			return rte_flow_error_set
4113 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
4114 				 NULL, "too many shared actions");
4115 		}
4116 		rte_memcpy(&handle[copied_n].action, &actions[n].conf,
4117 			   sizeof(actions[n].conf));
4118 		handle[copied_n].index = n;
4119 		copied_n++;
4120 	}
4121 	n++;
4122 	*indir_n = copied_n;
4123 	if (!copied_n)
4124 		return 0;
4125 	actions_size = sizeof(struct rte_flow_action) * n;
4126 	translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
4127 	if (!translated) {
4128 		rte_errno = ENOMEM;
4129 		return -ENOMEM;
4130 	}
4131 	memcpy(translated, actions, actions_size);
4132 	for (handle_end = handle + copied_n; handle < handle_end; handle++) {
4133 		struct mlx5_shared_action_rss *shared_rss;
4134 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4135 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4136 		uint32_t idx = act_idx &
4137 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4138 
4139 		switch (type) {
4140 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4141 			shared_rss = mlx5_ipool_get
4142 			  (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
4143 			translated[handle->index].type =
4144 				RTE_FLOW_ACTION_TYPE_RSS;
4145 			translated[handle->index].conf =
4146 				&shared_rss->origin;
4147 			break;
4148 		case MLX5_INDIRECT_ACTION_TYPE_COUNT:
4149 			translated[handle->index].type =
4150 						(enum rte_flow_action_type)
4151 						MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
4152 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4153 			break;
4154 		case MLX5_INDIRECT_ACTION_TYPE_AGE:
4155 			if (priv->sh->flow_hit_aso_en) {
4156 				translated[handle->index].type =
4157 					(enum rte_flow_action_type)
4158 					MLX5_RTE_FLOW_ACTION_TYPE_AGE;
4159 				translated[handle->index].conf =
4160 							 (void *)(uintptr_t)idx;
4161 				break;
4162 			}
4163 			/* Fall-through */
4164 		case MLX5_INDIRECT_ACTION_TYPE_CT:
4165 			if (priv->sh->ct_aso_en) {
4166 				translated[handle->index].type =
4167 					RTE_FLOW_ACTION_TYPE_CONNTRACK;
4168 				translated[handle->index].conf =
4169 							 (void *)(uintptr_t)idx;
4170 				break;
4171 			}
4172 			/* Fall-through */
4173 		default:
4174 			mlx5_free(translated);
4175 			return rte_flow_error_set
4176 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
4177 				 NULL, "invalid indirect action type");
4178 		}
4179 	}
4180 	*translated_actions = translated;
4181 	return 0;
4182 }
4183 
4184 /**
4185  * Get Shared RSS action from the action list.
4186  *
4187  * @param[in] dev
4188  *   Pointer to Ethernet device.
4189  * @param[in] shared
4190  *   Pointer to the list of actions.
4191  * @param[in] shared_n
4192  *   Actions list length.
4193  *
4194  * @return
4195  *   The MLX5 RSS action ID if exists, otherwise return 0.
4196  */
4197 static uint32_t
4198 flow_get_shared_rss_action(struct rte_eth_dev *dev,
4199 			   struct mlx5_translated_action_handle *handle,
4200 			   int shared_n)
4201 {
4202 	struct mlx5_translated_action_handle *handle_end;
4203 	struct mlx5_priv *priv = dev->data->dev_private;
4204 	struct mlx5_shared_action_rss *shared_rss;
4205 
4206 
4207 	for (handle_end = handle + shared_n; handle < handle_end; handle++) {
4208 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4209 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4210 		uint32_t idx = act_idx &
4211 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4212 		switch (type) {
4213 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4214 			shared_rss = mlx5_ipool_get
4215 				(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
4216 									   idx);
4217 			__atomic_add_fetch(&shared_rss->refcnt, 1,
4218 					   __ATOMIC_RELAXED);
4219 			return idx;
4220 		default:
4221 			break;
4222 		}
4223 	}
4224 	return 0;
4225 }
4226 
4227 static unsigned int
4228 find_graph_root(uint32_t rss_level)
4229 {
4230 	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
4231 			       MLX5_EXPANSION_ROOT_OUTER;
4232 }
4233 
4234 /**
4235  *  Get layer flags from the prefix flow.
4236  *
4237  *  Some flows may be split to several subflows, the prefix subflow gets the
4238  *  match items and the suffix sub flow gets the actions.
4239  *  Some actions need the user defined match item flags to get the detail for
4240  *  the action.
4241  *  This function helps the suffix flow to get the item layer flags from prefix
4242  *  subflow.
4243  *
4244  * @param[in] dev_flow
4245  *   Pointer the created prefix subflow.
4246  *
4247  * @return
4248  *   The layers get from prefix subflow.
4249  */
4250 static inline uint64_t
4251 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
4252 {
4253 	uint64_t layers = 0;
4254 
4255 	/*
4256 	 * Layers bits could be localization, but usually the compiler will
4257 	 * help to do the optimization work for source code.
4258 	 * If no decap actions, use the layers directly.
4259 	 */
4260 	if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
4261 		return dev_flow->handle->layers;
4262 	/* Convert L3 layers with decap action. */
4263 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
4264 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
4265 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
4266 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
4267 	/* Convert L4 layers with decap action.  */
4268 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
4269 		layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
4270 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
4271 		layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
4272 	return layers;
4273 }
4274 
4275 /**
4276  * Get metadata split action information.
4277  *
4278  * @param[in] actions
4279  *   Pointer to the list of actions.
4280  * @param[out] qrss
4281  *   Pointer to the return pointer.
4282  * @param[out] qrss_type
4283  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
4284  *   if no QUEUE/RSS is found.
4285  * @param[out] encap_idx
4286  *   Pointer to the index of the encap action if exists, otherwise the last
4287  *   action index.
4288  *
4289  * @return
4290  *   Total number of actions.
4291  */
4292 static int
4293 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
4294 				       const struct rte_flow_action **qrss,
4295 				       int *encap_idx)
4296 {
4297 	const struct rte_flow_action_raw_encap *raw_encap;
4298 	int actions_n = 0;
4299 	int raw_decap_idx = -1;
4300 
4301 	*encap_idx = -1;
4302 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4303 		switch (actions->type) {
4304 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4305 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4306 			*encap_idx = actions_n;
4307 			break;
4308 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4309 			raw_decap_idx = actions_n;
4310 			break;
4311 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4312 			raw_encap = actions->conf;
4313 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4314 				*encap_idx = raw_decap_idx != -1 ?
4315 						      raw_decap_idx : actions_n;
4316 			break;
4317 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4318 		case RTE_FLOW_ACTION_TYPE_RSS:
4319 			*qrss = actions;
4320 			break;
4321 		default:
4322 			break;
4323 		}
4324 		actions_n++;
4325 	}
4326 	if (*encap_idx == -1)
4327 		*encap_idx = actions_n;
4328 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4329 	return actions_n + 1;
4330 }
4331 
4332 /**
4333  * Check if the action will change packet.
4334  *
4335  * @param dev
4336  *   Pointer to Ethernet device.
4337  * @param[in] type
4338  *   action type.
4339  *
4340  * @return
4341  *   true if action will change packet, false otherwise.
4342  */
4343 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
4344 					  enum rte_flow_action_type type)
4345 {
4346 	struct mlx5_priv *priv = dev->data->dev_private;
4347 
4348 	switch (type) {
4349 	case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4350 	case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4351 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4352 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4353 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4354 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4355 	case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4356 	case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4357 	case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4358 	case RTE_FLOW_ACTION_TYPE_SET_TTL:
4359 	case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4360 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4361 	case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4362 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4363 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4364 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4365 	case RTE_FLOW_ACTION_TYPE_SET_META:
4366 	case RTE_FLOW_ACTION_TYPE_SET_TAG:
4367 	case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4368 	case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4369 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4370 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4371 	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4372 	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4373 	case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4374 	case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4375 	case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4376 	case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4377 	case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4378 		return true;
4379 	case RTE_FLOW_ACTION_TYPE_FLAG:
4380 	case RTE_FLOW_ACTION_TYPE_MARK:
4381 		if (priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
4382 			return true;
4383 		else
4384 			return false;
4385 	default:
4386 		return false;
4387 	}
4388 }
4389 
4390 /**
4391  * Check meter action from the action list.
4392  *
4393  * @param dev
4394  *   Pointer to Ethernet device.
4395  * @param[in] actions
4396  *   Pointer to the list of actions.
4397  * @param[out] has_mtr
4398  *   Pointer to the meter exist flag.
4399  * @param[out] has_modify
4400  *   Pointer to the flag showing there's packet change action.
4401  * @param[out] meter_id
4402  *   Pointer to the meter id.
4403  *
4404  * @return
4405  *   Total number of actions.
4406  */
4407 static int
4408 flow_check_meter_action(struct rte_eth_dev *dev,
4409 			const struct rte_flow_action actions[],
4410 			bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4411 {
4412 	const struct rte_flow_action_meter *mtr = NULL;
4413 	int actions_n = 0;
4414 
4415 	MLX5_ASSERT(has_mtr);
4416 	*has_mtr = false;
4417 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4418 		switch (actions->type) {
4419 		case RTE_FLOW_ACTION_TYPE_METER:
4420 			mtr = actions->conf;
4421 			*meter_id = mtr->mtr_id;
4422 			*has_mtr = true;
4423 			break;
4424 		default:
4425 			break;
4426 		}
4427 		if (!*has_mtr)
4428 			*has_modify |= flow_check_modify_action_type(dev,
4429 								actions->type);
4430 		actions_n++;
4431 	}
4432 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4433 	return actions_n + 1;
4434 }
4435 
4436 /**
4437  * Check if the flow should be split due to hairpin.
4438  * The reason for the split is that in current HW we can't
4439  * support encap and push-vlan on Rx, so if a flow contains
4440  * these actions we move it to Tx.
4441  *
4442  * @param dev
4443  *   Pointer to Ethernet device.
4444  * @param[in] attr
4445  *   Flow rule attributes.
4446  * @param[in] actions
4447  *   Associated actions (list terminated by the END action).
4448  *
4449  * @return
4450  *   > 0 the number of actions and the flow should be split,
4451  *   0 when no split required.
4452  */
4453 static int
4454 flow_check_hairpin_split(struct rte_eth_dev *dev,
4455 			 const struct rte_flow_attr *attr,
4456 			 const struct rte_flow_action actions[])
4457 {
4458 	int queue_action = 0;
4459 	int action_n = 0;
4460 	int split = 0;
4461 	const struct rte_flow_action_queue *queue;
4462 	const struct rte_flow_action_rss *rss;
4463 	const struct rte_flow_action_raw_encap *raw_encap;
4464 	const struct rte_eth_hairpin_conf *conf;
4465 
4466 	if (!attr->ingress)
4467 		return 0;
4468 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4469 		switch (actions->type) {
4470 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4471 			queue = actions->conf;
4472 			if (queue == NULL)
4473 				return 0;
4474 			conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4475 			if (conf == NULL || conf->tx_explicit != 0)
4476 				return 0;
4477 			queue_action = 1;
4478 			action_n++;
4479 			break;
4480 		case RTE_FLOW_ACTION_TYPE_RSS:
4481 			rss = actions->conf;
4482 			if (rss == NULL || rss->queue_num == 0)
4483 				return 0;
4484 			conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4485 			if (conf == NULL || conf->tx_explicit != 0)
4486 				return 0;
4487 			queue_action = 1;
4488 			action_n++;
4489 			break;
4490 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4491 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4492 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4493 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4494 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4495 			split++;
4496 			action_n++;
4497 			break;
4498 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4499 			raw_encap = actions->conf;
4500 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4501 				split++;
4502 			action_n++;
4503 			break;
4504 		default:
4505 			action_n++;
4506 			break;
4507 		}
4508 	}
4509 	if (split && queue_action)
4510 		return action_n;
4511 	return 0;
4512 }
4513 
4514 /* Declare flow create/destroy prototype in advance. */
4515 static uint32_t
4516 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4517 		 const struct rte_flow_attr *attr,
4518 		 const struct rte_flow_item items[],
4519 		 const struct rte_flow_action actions[],
4520 		 bool external, struct rte_flow_error *error);
4521 
4522 static void
4523 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4524 		  uint32_t flow_idx);
4525 
4526 int
4527 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4528 		      struct mlx5_list_entry *entry, void *cb_ctx)
4529 {
4530 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4531 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4532 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4533 
4534 	return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4535 }
4536 
4537 struct mlx5_list_entry *
4538 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4539 {
4540 	struct rte_eth_dev *dev = tool_ctx;
4541 	struct mlx5_priv *priv = dev->data->dev_private;
4542 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4543 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4544 	struct rte_flow_error *error = ctx->error;
4545 	uint32_t idx = 0;
4546 	int ret;
4547 	uint32_t mark_id = *(uint32_t *)(ctx->data);
4548 	struct rte_flow_attr attr = {
4549 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4550 		.ingress = 1,
4551 	};
4552 	struct mlx5_rte_flow_item_tag tag_spec = {
4553 		.data = mark_id,
4554 	};
4555 	struct rte_flow_item items[] = {
4556 		[1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4557 	};
4558 	struct rte_flow_action_mark ftag = {
4559 		.id = mark_id,
4560 	};
4561 	struct mlx5_flow_action_copy_mreg cp_mreg = {
4562 		.dst = REG_B,
4563 		.src = REG_NON,
4564 	};
4565 	struct rte_flow_action_jump jump = {
4566 		.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4567 	};
4568 	struct rte_flow_action actions[] = {
4569 		[3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4570 	};
4571 
4572 	/* Fill the register fields in the flow. */
4573 	ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4574 	if (ret < 0)
4575 		return NULL;
4576 	tag_spec.id = ret;
4577 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4578 	if (ret < 0)
4579 		return NULL;
4580 	cp_mreg.src = ret;
4581 	/* Provide the full width of FLAG specific value. */
4582 	if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4583 		tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4584 	/* Build a new flow. */
4585 	if (mark_id != MLX5_DEFAULT_COPY_ID) {
4586 		items[0] = (struct rte_flow_item){
4587 			.type = (enum rte_flow_item_type)
4588 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4589 			.spec = &tag_spec,
4590 		};
4591 		items[1] = (struct rte_flow_item){
4592 			.type = RTE_FLOW_ITEM_TYPE_END,
4593 		};
4594 		actions[0] = (struct rte_flow_action){
4595 			.type = (enum rte_flow_action_type)
4596 				MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4597 			.conf = &ftag,
4598 		};
4599 		actions[1] = (struct rte_flow_action){
4600 			.type = (enum rte_flow_action_type)
4601 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4602 			.conf = &cp_mreg,
4603 		};
4604 		actions[2] = (struct rte_flow_action){
4605 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4606 			.conf = &jump,
4607 		};
4608 		actions[3] = (struct rte_flow_action){
4609 			.type = RTE_FLOW_ACTION_TYPE_END,
4610 		};
4611 	} else {
4612 		/* Default rule, wildcard match. */
4613 		attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4614 		items[0] = (struct rte_flow_item){
4615 			.type = RTE_FLOW_ITEM_TYPE_END,
4616 		};
4617 		actions[0] = (struct rte_flow_action){
4618 			.type = (enum rte_flow_action_type)
4619 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4620 			.conf = &cp_mreg,
4621 		};
4622 		actions[1] = (struct rte_flow_action){
4623 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4624 			.conf = &jump,
4625 		};
4626 		actions[2] = (struct rte_flow_action){
4627 			.type = RTE_FLOW_ACTION_TYPE_END,
4628 		};
4629 	}
4630 	/* Build a new entry. */
4631 	mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4632 	if (!mcp_res) {
4633 		rte_errno = ENOMEM;
4634 		return NULL;
4635 	}
4636 	mcp_res->idx = idx;
4637 	mcp_res->mark_id = mark_id;
4638 	/*
4639 	 * The copy Flows are not included in any list. There
4640 	 * ones are referenced from other Flows and can not
4641 	 * be applied, removed, deleted in arbitrary order
4642 	 * by list traversing.
4643 	 */
4644 	mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4645 					&attr, items, actions, false, error);
4646 	if (!mcp_res->rix_flow) {
4647 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4648 		return NULL;
4649 	}
4650 	return &mcp_res->hlist_ent;
4651 }
4652 
4653 struct mlx5_list_entry *
4654 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4655 		      void *cb_ctx __rte_unused)
4656 {
4657 	struct rte_eth_dev *dev = tool_ctx;
4658 	struct mlx5_priv *priv = dev->data->dev_private;
4659 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4660 	uint32_t idx = 0;
4661 
4662 	mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4663 	if (!mcp_res) {
4664 		rte_errno = ENOMEM;
4665 		return NULL;
4666 	}
4667 	memcpy(mcp_res, oentry, sizeof(*mcp_res));
4668 	mcp_res->idx = idx;
4669 	return &mcp_res->hlist_ent;
4670 }
4671 
4672 void
4673 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4674 {
4675 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4676 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4677 	struct rte_eth_dev *dev = tool_ctx;
4678 	struct mlx5_priv *priv = dev->data->dev_private;
4679 
4680 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4681 }
4682 
4683 /**
4684  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4685  *
4686  * As mark_id is unique, if there's already a registered flow for the mark_id,
4687  * return by increasing the reference counter of the resource. Otherwise, create
4688  * the resource (mcp_res) and flow.
4689  *
4690  * Flow looks like,
4691  *   - If ingress port is ANY and reg_c[1] is mark_id,
4692  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4693  *
4694  * For default flow (zero mark_id), flow is like,
4695  *   - If ingress port is ANY,
4696  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4697  *
4698  * @param dev
4699  *   Pointer to Ethernet device.
4700  * @param mark_id
4701  *   ID of MARK action, zero means default flow for META.
4702  * @param[out] error
4703  *   Perform verbose error reporting if not NULL.
4704  *
4705  * @return
4706  *   Associated resource on success, NULL otherwise and rte_errno is set.
4707  */
4708 static struct mlx5_flow_mreg_copy_resource *
4709 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4710 			  struct rte_flow_error *error)
4711 {
4712 	struct mlx5_priv *priv = dev->data->dev_private;
4713 	struct mlx5_list_entry *entry;
4714 	struct mlx5_flow_cb_ctx ctx = {
4715 		.dev = dev,
4716 		.error = error,
4717 		.data = &mark_id,
4718 	};
4719 
4720 	/* Check if already registered. */
4721 	MLX5_ASSERT(priv->mreg_cp_tbl);
4722 	entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4723 	if (!entry)
4724 		return NULL;
4725 	return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4726 			    hlist_ent);
4727 }
4728 
4729 void
4730 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4731 {
4732 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4733 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4734 	struct rte_eth_dev *dev = tool_ctx;
4735 	struct mlx5_priv *priv = dev->data->dev_private;
4736 
4737 	MLX5_ASSERT(mcp_res->rix_flow);
4738 	flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4739 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4740 }
4741 
4742 /**
4743  * Release flow in RX_CP_TBL.
4744  *
4745  * @param dev
4746  *   Pointer to Ethernet device.
4747  * @flow
4748  *   Parent flow for wich copying is provided.
4749  */
4750 static void
4751 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4752 			  struct rte_flow *flow)
4753 {
4754 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4755 	struct mlx5_priv *priv = dev->data->dev_private;
4756 
4757 	if (!flow->rix_mreg_copy)
4758 		return;
4759 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4760 				 flow->rix_mreg_copy);
4761 	if (!mcp_res || !priv->mreg_cp_tbl)
4762 		return;
4763 	MLX5_ASSERT(mcp_res->rix_flow);
4764 	mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4765 	flow->rix_mreg_copy = 0;
4766 }
4767 
4768 /**
4769  * Remove the default copy action from RX_CP_TBL.
4770  *
4771  * This functions is called in the mlx5_dev_start(). No thread safe
4772  * is guaranteed.
4773  *
4774  * @param dev
4775  *   Pointer to Ethernet device.
4776  */
4777 static void
4778 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4779 {
4780 	struct mlx5_list_entry *entry;
4781 	struct mlx5_priv *priv = dev->data->dev_private;
4782 	struct mlx5_flow_cb_ctx ctx;
4783 	uint32_t mark_id;
4784 
4785 	/* Check if default flow is registered. */
4786 	if (!priv->mreg_cp_tbl)
4787 		return;
4788 	mark_id = MLX5_DEFAULT_COPY_ID;
4789 	ctx.data = &mark_id;
4790 	entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4791 	if (!entry)
4792 		return;
4793 	mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4794 }
4795 
4796 /**
4797  * Add the default copy action in in RX_CP_TBL.
4798  *
4799  * This functions is called in the mlx5_dev_start(). No thread safe
4800  * is guaranteed.
4801  *
4802  * @param dev
4803  *   Pointer to Ethernet device.
4804  * @param[out] error
4805  *   Perform verbose error reporting if not NULL.
4806  *
4807  * @return
4808  *   0 for success, negative value otherwise and rte_errno is set.
4809  */
4810 static int
4811 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4812 				  struct rte_flow_error *error)
4813 {
4814 	struct mlx5_priv *priv = dev->data->dev_private;
4815 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4816 	struct mlx5_flow_cb_ctx ctx;
4817 	uint32_t mark_id;
4818 
4819 	/* Check whether extensive metadata feature is engaged. */
4820 	if (!priv->sh->config.dv_flow_en ||
4821 	    priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4822 	    !mlx5_flow_ext_mreg_supported(dev) ||
4823 	    !priv->sh->dv_regc0_mask)
4824 		return 0;
4825 	/*
4826 	 * Add default mreg copy flow may be called multiple time, but
4827 	 * only be called once in stop. Avoid register it twice.
4828 	 */
4829 	mark_id = MLX5_DEFAULT_COPY_ID;
4830 	ctx.data = &mark_id;
4831 	if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4832 		return 0;
4833 	mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4834 	if (!mcp_res)
4835 		return -rte_errno;
4836 	return 0;
4837 }
4838 
4839 /**
4840  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4841  *
4842  * All the flow having Q/RSS action should be split by
4843  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4844  * performs the following,
4845  *   - CQE->flow_tag := reg_c[1] (MARK)
4846  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4847  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4848  * but there should be a flow per each MARK ID set by MARK action.
4849  *
4850  * For the aforementioned reason, if there's a MARK action in flow's action
4851  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4852  * the MARK ID to CQE's flow_tag like,
4853  *   - If reg_c[1] is mark_id,
4854  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4855  *
4856  * For SET_META action which stores value in reg_c[0], as the destination is
4857  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4858  * MARK ID means the default flow. The default flow looks like,
4859  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4860  *
4861  * @param dev
4862  *   Pointer to Ethernet device.
4863  * @param flow
4864  *   Pointer to flow structure.
4865  * @param[in] actions
4866  *   Pointer to the list of actions.
4867  * @param[out] error
4868  *   Perform verbose error reporting if not NULL.
4869  *
4870  * @return
4871  *   0 on success, negative value otherwise and rte_errno is set.
4872  */
4873 static int
4874 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4875 			    struct rte_flow *flow,
4876 			    const struct rte_flow_action *actions,
4877 			    struct rte_flow_error *error)
4878 {
4879 	struct mlx5_priv *priv = dev->data->dev_private;
4880 	struct mlx5_sh_config *config = &priv->sh->config;
4881 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4882 	const struct rte_flow_action_mark *mark;
4883 
4884 	/* Check whether extensive metadata feature is engaged. */
4885 	if (!config->dv_flow_en ||
4886 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4887 	    !mlx5_flow_ext_mreg_supported(dev) ||
4888 	    !priv->sh->dv_regc0_mask)
4889 		return 0;
4890 	/* Find MARK action. */
4891 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4892 		switch (actions->type) {
4893 		case RTE_FLOW_ACTION_TYPE_FLAG:
4894 			mcp_res = flow_mreg_add_copy_action
4895 				(dev, MLX5_FLOW_MARK_DEFAULT, error);
4896 			if (!mcp_res)
4897 				return -rte_errno;
4898 			flow->rix_mreg_copy = mcp_res->idx;
4899 			return 0;
4900 		case RTE_FLOW_ACTION_TYPE_MARK:
4901 			mark = (const struct rte_flow_action_mark *)
4902 				actions->conf;
4903 			mcp_res =
4904 				flow_mreg_add_copy_action(dev, mark->id, error);
4905 			if (!mcp_res)
4906 				return -rte_errno;
4907 			flow->rix_mreg_copy = mcp_res->idx;
4908 			return 0;
4909 		default:
4910 			break;
4911 		}
4912 	}
4913 	return 0;
4914 }
4915 
4916 #define MLX5_MAX_SPLIT_ACTIONS 24
4917 #define MLX5_MAX_SPLIT_ITEMS 24
4918 
4919 /**
4920  * Split the hairpin flow.
4921  * Since HW can't support encap and push-vlan on Rx, we move these
4922  * actions to Tx.
4923  * If the count action is after the encap then we also
4924  * move the count action. in this case the count will also measure
4925  * the outer bytes.
4926  *
4927  * @param dev
4928  *   Pointer to Ethernet device.
4929  * @param[in] actions
4930  *   Associated actions (list terminated by the END action).
4931  * @param[out] actions_rx
4932  *   Rx flow actions.
4933  * @param[out] actions_tx
4934  *   Tx flow actions..
4935  * @param[out] pattern_tx
4936  *   The pattern items for the Tx flow.
4937  * @param[out] flow_id
4938  *   The flow ID connected to this flow.
4939  *
4940  * @return
4941  *   0 on success.
4942  */
4943 static int
4944 flow_hairpin_split(struct rte_eth_dev *dev,
4945 		   const struct rte_flow_action actions[],
4946 		   struct rte_flow_action actions_rx[],
4947 		   struct rte_flow_action actions_tx[],
4948 		   struct rte_flow_item pattern_tx[],
4949 		   uint32_t flow_id)
4950 {
4951 	const struct rte_flow_action_raw_encap *raw_encap;
4952 	const struct rte_flow_action_raw_decap *raw_decap;
4953 	struct mlx5_rte_flow_action_set_tag *set_tag;
4954 	struct rte_flow_action *tag_action;
4955 	struct mlx5_rte_flow_item_tag *tag_item;
4956 	struct rte_flow_item *item;
4957 	char *addr;
4958 	int encap = 0;
4959 
4960 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4961 		switch (actions->type) {
4962 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4963 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4964 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4965 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4966 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4967 			rte_memcpy(actions_tx, actions,
4968 			       sizeof(struct rte_flow_action));
4969 			actions_tx++;
4970 			break;
4971 		case RTE_FLOW_ACTION_TYPE_COUNT:
4972 			if (encap) {
4973 				rte_memcpy(actions_tx, actions,
4974 					   sizeof(struct rte_flow_action));
4975 				actions_tx++;
4976 			} else {
4977 				rte_memcpy(actions_rx, actions,
4978 					   sizeof(struct rte_flow_action));
4979 				actions_rx++;
4980 			}
4981 			break;
4982 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4983 			raw_encap = actions->conf;
4984 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4985 				memcpy(actions_tx, actions,
4986 				       sizeof(struct rte_flow_action));
4987 				actions_tx++;
4988 				encap = 1;
4989 			} else {
4990 				rte_memcpy(actions_rx, actions,
4991 					   sizeof(struct rte_flow_action));
4992 				actions_rx++;
4993 			}
4994 			break;
4995 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4996 			raw_decap = actions->conf;
4997 			if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4998 				memcpy(actions_tx, actions,
4999 				       sizeof(struct rte_flow_action));
5000 				actions_tx++;
5001 			} else {
5002 				rte_memcpy(actions_rx, actions,
5003 					   sizeof(struct rte_flow_action));
5004 				actions_rx++;
5005 			}
5006 			break;
5007 		default:
5008 			rte_memcpy(actions_rx, actions,
5009 				   sizeof(struct rte_flow_action));
5010 			actions_rx++;
5011 			break;
5012 		}
5013 	}
5014 	/* Add set meta action and end action for the Rx flow. */
5015 	tag_action = actions_rx;
5016 	tag_action->type = (enum rte_flow_action_type)
5017 			   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5018 	actions_rx++;
5019 	rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
5020 	actions_rx++;
5021 	set_tag = (void *)actions_rx;
5022 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5023 		.id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
5024 		.data = flow_id,
5025 	};
5026 	MLX5_ASSERT(set_tag->id > REG_NON);
5027 	tag_action->conf = set_tag;
5028 	/* Create Tx item list. */
5029 	rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
5030 	addr = (void *)&pattern_tx[2];
5031 	item = pattern_tx;
5032 	item->type = (enum rte_flow_item_type)
5033 		     MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5034 	tag_item = (void *)addr;
5035 	tag_item->data = flow_id;
5036 	tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
5037 	MLX5_ASSERT(set_tag->id > REG_NON);
5038 	item->spec = tag_item;
5039 	addr += sizeof(struct mlx5_rte_flow_item_tag);
5040 	tag_item = (void *)addr;
5041 	tag_item->data = UINT32_MAX;
5042 	tag_item->id = UINT16_MAX;
5043 	item->mask = tag_item;
5044 	item->last = NULL;
5045 	item++;
5046 	item->type = RTE_FLOW_ITEM_TYPE_END;
5047 	return 0;
5048 }
5049 
5050 /**
5051  * The last stage of splitting chain, just creates the subflow
5052  * without any modification.
5053  *
5054  * @param[in] dev
5055  *   Pointer to Ethernet device.
5056  * @param[in] flow
5057  *   Parent flow structure pointer.
5058  * @param[in, out] sub_flow
5059  *   Pointer to return the created subflow, may be NULL.
5060  * @param[in] attr
5061  *   Flow rule attributes.
5062  * @param[in] items
5063  *   Pattern specification (list terminated by the END pattern item).
5064  * @param[in] actions
5065  *   Associated actions (list terminated by the END action).
5066  * @param[in] flow_split_info
5067  *   Pointer to flow split info structure.
5068  * @param[out] error
5069  *   Perform verbose error reporting if not NULL.
5070  * @return
5071  *   0 on success, negative value otherwise
5072  */
5073 static int
5074 flow_create_split_inner(struct rte_eth_dev *dev,
5075 			struct rte_flow *flow,
5076 			struct mlx5_flow **sub_flow,
5077 			const struct rte_flow_attr *attr,
5078 			const struct rte_flow_item items[],
5079 			const struct rte_flow_action actions[],
5080 			struct mlx5_flow_split_info *flow_split_info,
5081 			struct rte_flow_error *error)
5082 {
5083 	struct mlx5_flow *dev_flow;
5084 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5085 
5086 	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
5087 				    flow_split_info->flow_idx, error);
5088 	if (!dev_flow)
5089 		return -rte_errno;
5090 	dev_flow->flow = flow;
5091 	dev_flow->external = flow_split_info->external;
5092 	dev_flow->skip_scale = flow_split_info->skip_scale;
5093 	/* Subflow object was created, we must include one in the list. */
5094 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5095 		      dev_flow->handle, next);
5096 	/*
5097 	 * If dev_flow is as one of the suffix flow, some actions in suffix
5098 	 * flow may need some user defined item layer flags, and pass the
5099 	 * Metadata rxq mark flag to suffix flow as well.
5100 	 */
5101 	if (flow_split_info->prefix_layers)
5102 		dev_flow->handle->layers = flow_split_info->prefix_layers;
5103 	if (flow_split_info->prefix_mark) {
5104 		MLX5_ASSERT(wks);
5105 		wks->mark = 1;
5106 	}
5107 	if (sub_flow)
5108 		*sub_flow = dev_flow;
5109 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5110 	dev_flow->dv.table_id = flow_split_info->table_id;
5111 #endif
5112 	return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
5113 }
5114 
5115 /**
5116  * Get the sub policy of a meter.
5117  *
5118  * @param[in] dev
5119  *   Pointer to Ethernet device.
5120  * @param[in] flow
5121  *   Parent flow structure pointer.
5122  * @param wks
5123  *   Pointer to thread flow work space.
5124  * @param[in] attr
5125  *   Flow rule attributes.
5126  * @param[in] items
5127  *   Pattern specification (list terminated by the END pattern item).
5128  * @param[out] error
5129  *   Perform verbose error reporting if not NULL.
5130  *
5131  * @return
5132  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
5133  */
5134 static struct mlx5_flow_meter_sub_policy *
5135 get_meter_sub_policy(struct rte_eth_dev *dev,
5136 		     struct rte_flow *flow,
5137 		     struct mlx5_flow_workspace *wks,
5138 		     const struct rte_flow_attr *attr,
5139 		     const struct rte_flow_item items[],
5140 		     struct rte_flow_error *error)
5141 {
5142 	struct mlx5_flow_meter_policy *policy;
5143 	struct mlx5_flow_meter_policy *final_policy;
5144 	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
5145 
5146 	policy = wks->policy;
5147 	final_policy = policy->is_hierarchy ? wks->final_policy : policy;
5148 	if (final_policy->is_rss || final_policy->is_queue) {
5149 		struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
5150 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
5151 		uint32_t i;
5152 
5153 		/*
5154 		 * This is a tmp dev_flow,
5155 		 * no need to register any matcher for it in translate.
5156 		 */
5157 		wks->skip_matcher_reg = 1;
5158 		for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
5159 			struct mlx5_flow dev_flow = {0};
5160 			struct mlx5_flow_handle dev_handle = { {0} };
5161 			uint8_t fate = final_policy->act_cnt[i].fate_action;
5162 
5163 			if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
5164 				const struct rte_flow_action_rss *rss_act =
5165 					final_policy->act_cnt[i].rss->conf;
5166 				struct rte_flow_action rss_actions[2] = {
5167 					[0] = {
5168 					.type = RTE_FLOW_ACTION_TYPE_RSS,
5169 					.conf = rss_act,
5170 					},
5171 					[1] = {
5172 					.type = RTE_FLOW_ACTION_TYPE_END,
5173 					.conf = NULL,
5174 					}
5175 				};
5176 
5177 				dev_flow.handle = &dev_handle;
5178 				dev_flow.ingress = attr->ingress;
5179 				dev_flow.flow = flow;
5180 				dev_flow.external = 0;
5181 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5182 				dev_flow.dv.transfer = attr->transfer;
5183 #endif
5184 				/**
5185 				 * Translate RSS action to get rss hash fields.
5186 				 */
5187 				if (flow_drv_translate(dev, &dev_flow, attr,
5188 						items, rss_actions, error))
5189 					goto exit;
5190 				rss_desc_v[i] = wks->rss_desc;
5191 				rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
5192 				rss_desc_v[i].hash_fields =
5193 						dev_flow.hash_fields;
5194 				rss_desc_v[i].queue_num =
5195 						rss_desc_v[i].hash_fields ?
5196 						rss_desc_v[i].queue_num : 1;
5197 				rss_desc_v[i].tunnel =
5198 						!!(dev_flow.handle->layers &
5199 						   MLX5_FLOW_LAYER_TUNNEL);
5200 				/* Use the RSS queues in the containers. */
5201 				rss_desc_v[i].queue =
5202 					(uint16_t *)(uintptr_t)rss_act->queue;
5203 				rss_desc[i] = &rss_desc_v[i];
5204 			} else if (fate == MLX5_FLOW_FATE_QUEUE) {
5205 				/* This is queue action. */
5206 				rss_desc_v[i] = wks->rss_desc;
5207 				rss_desc_v[i].key_len = 0;
5208 				rss_desc_v[i].hash_fields = 0;
5209 				rss_desc_v[i].queue =
5210 					&final_policy->act_cnt[i].queue;
5211 				rss_desc_v[i].queue_num = 1;
5212 				rss_desc[i] = &rss_desc_v[i];
5213 			} else {
5214 				rss_desc[i] = NULL;
5215 			}
5216 		}
5217 		sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
5218 						flow, policy, rss_desc);
5219 	} else {
5220 		enum mlx5_meter_domain mtr_domain =
5221 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5222 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5223 						MLX5_MTR_DOMAIN_INGRESS);
5224 		sub_policy = policy->sub_policys[mtr_domain][0];
5225 	}
5226 	if (!sub_policy)
5227 		rte_flow_error_set(error, EINVAL,
5228 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5229 				   "Failed to get meter sub-policy.");
5230 exit:
5231 	return sub_policy;
5232 }
5233 
5234 /**
5235  * Split the meter flow.
5236  *
5237  * As meter flow will split to three sub flow, other than meter
5238  * action, the other actions make sense to only meter accepts
5239  * the packet. If it need to be dropped, no other additional
5240  * actions should be take.
5241  *
5242  * One kind of special action which decapsulates the L3 tunnel
5243  * header will be in the prefix sub flow, as not to take the
5244  * L3 tunnel header into account.
5245  *
5246  * @param[in] dev
5247  *   Pointer to Ethernet device.
5248  * @param[in] flow
5249  *   Parent flow structure pointer.
5250  * @param wks
5251  *   Pointer to thread flow work space.
5252  * @param[in] attr
5253  *   Flow rule attributes.
5254  * @param[in] items
5255  *   Pattern specification (list terminated by the END pattern item).
5256  * @param[out] sfx_items
5257  *   Suffix flow match items (list terminated by the END pattern item).
5258  * @param[in] actions
5259  *   Associated actions (list terminated by the END action).
5260  * @param[out] actions_sfx
5261  *   Suffix flow actions.
5262  * @param[out] actions_pre
5263  *   Prefix flow actions.
5264  * @param[out] mtr_flow_id
5265  *   Pointer to meter flow id.
5266  * @param[out] error
5267  *   Perform verbose error reporting if not NULL.
5268  *
5269  * @return
5270  *   0 on success, a negative errno value otherwise and rte_errno is set.
5271  */
5272 static int
5273 flow_meter_split_prep(struct rte_eth_dev *dev,
5274 		      struct rte_flow *flow,
5275 		      struct mlx5_flow_workspace *wks,
5276 		      const struct rte_flow_attr *attr,
5277 		      const struct rte_flow_item items[],
5278 		      struct rte_flow_item sfx_items[],
5279 		      const struct rte_flow_action actions[],
5280 		      struct rte_flow_action actions_sfx[],
5281 		      struct rte_flow_action actions_pre[],
5282 		      uint32_t *mtr_flow_id,
5283 		      struct rte_flow_error *error)
5284 {
5285 	struct mlx5_priv *priv = dev->data->dev_private;
5286 	struct mlx5_flow_meter_info *fm = wks->fm;
5287 	struct rte_flow_action *tag_action = NULL;
5288 	struct rte_flow_item *tag_item;
5289 	struct mlx5_rte_flow_action_set_tag *set_tag;
5290 	const struct rte_flow_action_raw_encap *raw_encap;
5291 	const struct rte_flow_action_raw_decap *raw_decap;
5292 	struct mlx5_rte_flow_item_tag *tag_item_spec;
5293 	struct mlx5_rte_flow_item_tag *tag_item_mask;
5294 	uint32_t tag_id = 0;
5295 	struct rte_flow_item *vlan_item_dst = NULL;
5296 	const struct rte_flow_item *vlan_item_src = NULL;
5297 	const struct rte_flow_item *orig_items = items;
5298 	struct rte_flow_action *hw_mtr_action;
5299 	struct rte_flow_action *action_pre_head = NULL;
5300 	int32_t flow_src_port = priv->representor_id;
5301 	bool mtr_first;
5302 	uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
5303 	uint8_t mtr_reg_bits = priv->mtr_reg_share ?
5304 				MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
5305 	uint32_t flow_id = 0;
5306 	uint32_t flow_id_reversed = 0;
5307 	uint8_t flow_id_bits = 0;
5308 	bool after_meter = false;
5309 	int shift;
5310 
5311 	/* Prepare the suffix subflow items. */
5312 	tag_item = sfx_items++;
5313 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5314 		struct mlx5_priv *port_priv;
5315 		const struct rte_flow_item_port_id *pid_v;
5316 		int item_type = items->type;
5317 
5318 		switch (item_type) {
5319 		case RTE_FLOW_ITEM_TYPE_PORT_ID:
5320 			pid_v = items->spec;
5321 			MLX5_ASSERT(pid_v);
5322 			port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
5323 			if (!port_priv)
5324 				return rte_flow_error_set(error,
5325 						rte_errno,
5326 						RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
5327 						pid_v,
5328 						"Failed to get port info.");
5329 			flow_src_port = port_priv->representor_id;
5330 			if (!fm->def_policy && wks->policy->is_hierarchy &&
5331 			    flow_src_port != priv->representor_id) {
5332 				if (flow_drv_mtr_hierarchy_rule_create(dev,
5333 								flow, fm,
5334 								flow_src_port,
5335 								items,
5336 								error))
5337 					return -rte_errno;
5338 			}
5339 			memcpy(sfx_items, items, sizeof(*sfx_items));
5340 			sfx_items++;
5341 			break;
5342 		case RTE_FLOW_ITEM_TYPE_VLAN:
5343 			/* Determine if copy vlan item below. */
5344 			vlan_item_src = items;
5345 			vlan_item_dst = sfx_items++;
5346 			vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
5347 			break;
5348 		default:
5349 			break;
5350 		}
5351 	}
5352 	sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
5353 	sfx_items++;
5354 	mtr_first = priv->sh->meter_aso_en &&
5355 		(attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
5356 	/* For ASO meter, meter must be before tag in TX direction. */
5357 	if (mtr_first) {
5358 		action_pre_head = actions_pre++;
5359 		/* Leave space for tag action. */
5360 		tag_action = actions_pre++;
5361 	}
5362 	/* Prepare the actions for prefix and suffix flow. */
5363 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5364 		struct rte_flow_action *action_cur = NULL;
5365 
5366 		switch (actions->type) {
5367 		case RTE_FLOW_ACTION_TYPE_METER:
5368 			if (mtr_first) {
5369 				action_cur = action_pre_head;
5370 			} else {
5371 				/* Leave space for tag action. */
5372 				tag_action = actions_pre++;
5373 				action_cur = actions_pre++;
5374 			}
5375 			after_meter = true;
5376 			break;
5377 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5378 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5379 			action_cur = actions_pre++;
5380 			break;
5381 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5382 			raw_encap = actions->conf;
5383 			if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5384 				action_cur = actions_pre++;
5385 			break;
5386 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5387 			raw_decap = actions->conf;
5388 			if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5389 				action_cur = actions_pre++;
5390 			break;
5391 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5392 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5393 			if (vlan_item_dst && vlan_item_src) {
5394 				memcpy(vlan_item_dst, vlan_item_src,
5395 					sizeof(*vlan_item_dst));
5396 				/*
5397 				 * Convert to internal match item, it is used
5398 				 * for vlan push and set vid.
5399 				 */
5400 				vlan_item_dst->type = (enum rte_flow_item_type)
5401 						MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5402 			}
5403 			break;
5404 		case RTE_FLOW_ACTION_TYPE_COUNT:
5405 			if (fm->def_policy)
5406 				action_cur = after_meter ?
5407 						actions_sfx++ : actions_pre++;
5408 			break;
5409 		default:
5410 			break;
5411 		}
5412 		if (!action_cur)
5413 			action_cur = (fm->def_policy) ?
5414 					actions_sfx++ : actions_pre++;
5415 		memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5416 	}
5417 	/* Add end action to the actions. */
5418 	actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5419 	if (priv->sh->meter_aso_en) {
5420 		/**
5421 		 * For ASO meter, need to add an extra jump action explicitly,
5422 		 * to jump from meter to policer table.
5423 		 */
5424 		struct mlx5_flow_meter_sub_policy *sub_policy;
5425 		struct mlx5_flow_tbl_data_entry *tbl_data;
5426 
5427 		if (!fm->def_policy) {
5428 			sub_policy = get_meter_sub_policy(dev, flow, wks,
5429 							  attr, orig_items,
5430 							  error);
5431 			if (!sub_policy)
5432 				return -rte_errno;
5433 		} else {
5434 			enum mlx5_meter_domain mtr_domain =
5435 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5436 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5437 						MLX5_MTR_DOMAIN_INGRESS);
5438 
5439 			sub_policy =
5440 			&priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5441 		}
5442 		tbl_data = container_of(sub_policy->tbl_rsc,
5443 					struct mlx5_flow_tbl_data_entry, tbl);
5444 		hw_mtr_action = actions_pre++;
5445 		hw_mtr_action->type = (enum rte_flow_action_type)
5446 				      MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5447 		hw_mtr_action->conf = tbl_data->jump.action;
5448 	}
5449 	actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5450 	actions_pre++;
5451 	if (!tag_action)
5452 		return rte_flow_error_set(error, ENOMEM,
5453 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5454 					  NULL, "No tag action space.");
5455 	if (!mtr_flow_id) {
5456 		tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5457 		goto exit;
5458 	}
5459 	/* Only default-policy Meter creates mtr flow id. */
5460 	if (fm->def_policy) {
5461 		mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5462 		if (!tag_id)
5463 			return rte_flow_error_set(error, ENOMEM,
5464 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5465 					"Failed to allocate meter flow id.");
5466 		flow_id = tag_id - 1;
5467 		flow_id_bits = (!flow_id) ? 1 :
5468 				(MLX5_REG_BITS - __builtin_clz(flow_id));
5469 		if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5470 		    mtr_reg_bits) {
5471 			mlx5_ipool_free(fm->flow_ipool, tag_id);
5472 			return rte_flow_error_set(error, EINVAL,
5473 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5474 					"Meter flow id exceeds max limit.");
5475 		}
5476 		if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5477 			priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5478 	}
5479 	/* Build tag actions and items for meter_id/meter flow_id. */
5480 	set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5481 	tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5482 	tag_item_mask = tag_item_spec + 1;
5483 	/* Both flow_id and meter_id share the same register. */
5484 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5485 		.id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5486 							    0, error),
5487 		.offset = mtr_id_offset,
5488 		.length = mtr_reg_bits,
5489 		.data = flow->meter,
5490 	};
5491 	/*
5492 	 * The color Reg bits used by flow_id are growing from
5493 	 * msb to lsb, so must do bit reverse for flow_id val in RegC.
5494 	 */
5495 	for (shift = 0; shift < flow_id_bits; shift++)
5496 		flow_id_reversed = (flow_id_reversed << 1) |
5497 				((flow_id >> shift) & 0x1);
5498 	set_tag->data |=
5499 		flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5500 	tag_item_spec->id = set_tag->id;
5501 	tag_item_spec->data = set_tag->data << mtr_id_offset;
5502 	tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5503 	tag_action->type = (enum rte_flow_action_type)
5504 				MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5505 	tag_action->conf = set_tag;
5506 	tag_item->type = (enum rte_flow_item_type)
5507 				MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5508 	tag_item->spec = tag_item_spec;
5509 	tag_item->last = NULL;
5510 	tag_item->mask = tag_item_mask;
5511 exit:
5512 	if (mtr_flow_id)
5513 		*mtr_flow_id = tag_id;
5514 	return 0;
5515 }
5516 
5517 /**
5518  * Split action list having QUEUE/RSS for metadata register copy.
5519  *
5520  * Once Q/RSS action is detected in user's action list, the flow action
5521  * should be split in order to copy metadata registers, which will happen in
5522  * RX_CP_TBL like,
5523  *   - CQE->flow_tag := reg_c[1] (MARK)
5524  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5525  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5526  * This is because the last action of each flow must be a terminal action
5527  * (QUEUE, RSS or DROP).
5528  *
5529  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5530  * stored and kept in the mlx5_flow structure per each sub_flow.
5531  *
5532  * The Q/RSS action is replaced with,
5533  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5534  * And the following JUMP action is added at the end,
5535  *   - JUMP, to RX_CP_TBL.
5536  *
5537  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5538  * flow_create_split_metadata() routine. The flow will look like,
5539  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5540  *
5541  * @param dev
5542  *   Pointer to Ethernet device.
5543  * @param[out] split_actions
5544  *   Pointer to store split actions to jump to CP_TBL.
5545  * @param[in] actions
5546  *   Pointer to the list of original flow actions.
5547  * @param[in] qrss
5548  *   Pointer to the Q/RSS action.
5549  * @param[in] actions_n
5550  *   Number of original actions.
5551  * @param[in] mtr_sfx
5552  *   Check if it is in meter suffix table.
5553  * @param[out] error
5554  *   Perform verbose error reporting if not NULL.
5555  *
5556  * @return
5557  *   non-zero unique flow_id on success, otherwise 0 and
5558  *   error/rte_error are set.
5559  */
5560 static uint32_t
5561 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5562 			  struct rte_flow_action *split_actions,
5563 			  const struct rte_flow_action *actions,
5564 			  const struct rte_flow_action *qrss,
5565 			  int actions_n, int mtr_sfx,
5566 			  struct rte_flow_error *error)
5567 {
5568 	struct mlx5_priv *priv = dev->data->dev_private;
5569 	struct mlx5_rte_flow_action_set_tag *set_tag;
5570 	struct rte_flow_action_jump *jump;
5571 	const int qrss_idx = qrss - actions;
5572 	uint32_t flow_id = 0;
5573 	int ret = 0;
5574 
5575 	/*
5576 	 * Given actions will be split
5577 	 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5578 	 * - Add jump to mreg CP_TBL.
5579 	 * As a result, there will be one more action.
5580 	 */
5581 	memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5582 	/* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */
5583 	++actions_n;
5584 	set_tag = (void *)(split_actions + actions_n);
5585 	/*
5586 	 * If we are not the meter suffix flow, add the tag action.
5587 	 * Since meter suffix flow already has the tag added.
5588 	 */
5589 	if (!mtr_sfx) {
5590 		/*
5591 		 * Allocate the new subflow ID. This one is unique within
5592 		 * device and not shared with representors. Otherwise,
5593 		 * we would have to resolve multi-thread access synch
5594 		 * issue. Each flow on the shared device is appended
5595 		 * with source vport identifier, so the resulting
5596 		 * flows will be unique in the shared (by master and
5597 		 * representors) domain even if they have coinciding
5598 		 * IDs.
5599 		 */
5600 		mlx5_ipool_malloc(priv->sh->ipool
5601 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5602 		if (!flow_id)
5603 			return rte_flow_error_set(error, ENOMEM,
5604 						  RTE_FLOW_ERROR_TYPE_ACTION,
5605 						  NULL, "can't allocate id "
5606 						  "for split Q/RSS subflow");
5607 		/* Internal SET_TAG action to set flow ID. */
5608 		*set_tag = (struct mlx5_rte_flow_action_set_tag){
5609 			.data = flow_id,
5610 		};
5611 		ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5612 		if (ret < 0)
5613 			return ret;
5614 		set_tag->id = ret;
5615 		/* Construct new actions array. */
5616 		/* Replace QUEUE/RSS action. */
5617 		split_actions[qrss_idx] = (struct rte_flow_action){
5618 			.type = (enum rte_flow_action_type)
5619 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5620 			.conf = set_tag,
5621 		};
5622 	} else {
5623 		/*
5624 		 * If we are the suffix flow of meter, tag already exist.
5625 		 * Set the QUEUE/RSS action to void.
5626 		 */
5627 		split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID;
5628 	}
5629 	/* JUMP action to jump to mreg copy table (CP_TBL). */
5630 	jump = (void *)(set_tag + 1);
5631 	*jump = (struct rte_flow_action_jump){
5632 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5633 	};
5634 	split_actions[actions_n - 2] = (struct rte_flow_action){
5635 		.type = RTE_FLOW_ACTION_TYPE_JUMP,
5636 		.conf = jump,
5637 	};
5638 	split_actions[actions_n - 1] = (struct rte_flow_action){
5639 		.type = RTE_FLOW_ACTION_TYPE_END,
5640 	};
5641 	return flow_id;
5642 }
5643 
5644 /**
5645  * Extend the given action list for Tx metadata copy.
5646  *
5647  * Copy the given action list to the ext_actions and add flow metadata register
5648  * copy action in order to copy reg_a set by WQE to reg_c[0].
5649  *
5650  * @param[out] ext_actions
5651  *   Pointer to the extended action list.
5652  * @param[in] actions
5653  *   Pointer to the list of actions.
5654  * @param[in] actions_n
5655  *   Number of actions in the list.
5656  * @param[out] error
5657  *   Perform verbose error reporting if not NULL.
5658  * @param[in] encap_idx
5659  *   The encap action index.
5660  *
5661  * @return
5662  *   0 on success, negative value otherwise
5663  */
5664 static int
5665 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5666 		       struct rte_flow_action *ext_actions,
5667 		       const struct rte_flow_action *actions,
5668 		       int actions_n, struct rte_flow_error *error,
5669 		       int encap_idx)
5670 {
5671 	struct mlx5_flow_action_copy_mreg *cp_mreg =
5672 		(struct mlx5_flow_action_copy_mreg *)
5673 			(ext_actions + actions_n + 1);
5674 	int ret;
5675 
5676 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5677 	if (ret < 0)
5678 		return ret;
5679 	cp_mreg->dst = ret;
5680 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5681 	if (ret < 0)
5682 		return ret;
5683 	cp_mreg->src = ret;
5684 	if (encap_idx != 0)
5685 		memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5686 	if (encap_idx == actions_n - 1) {
5687 		ext_actions[actions_n - 1] = (struct rte_flow_action){
5688 			.type = (enum rte_flow_action_type)
5689 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5690 			.conf = cp_mreg,
5691 		};
5692 		ext_actions[actions_n] = (struct rte_flow_action){
5693 			.type = RTE_FLOW_ACTION_TYPE_END,
5694 		};
5695 	} else {
5696 		ext_actions[encap_idx] = (struct rte_flow_action){
5697 			.type = (enum rte_flow_action_type)
5698 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5699 			.conf = cp_mreg,
5700 		};
5701 		memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5702 				sizeof(*ext_actions) * (actions_n - encap_idx));
5703 	}
5704 	return 0;
5705 }
5706 
5707 /**
5708  * Check the match action from the action list.
5709  *
5710  * @param[in] actions
5711  *   Pointer to the list of actions.
5712  * @param[in] attr
5713  *   Flow rule attributes.
5714  * @param[in] action
5715  *   The action to be check if exist.
5716  * @param[out] match_action_pos
5717  *   Pointer to the position of the matched action if exists, otherwise is -1.
5718  * @param[out] qrss_action_pos
5719  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5720  * @param[out] modify_after_mirror
5721  *   Pointer to the flag of modify action after FDB mirroring.
5722  *
5723  * @return
5724  *   > 0 the total number of actions.
5725  *   0 if not found match action in action list.
5726  */
5727 static int
5728 flow_check_match_action(const struct rte_flow_action actions[],
5729 			const struct rte_flow_attr *attr,
5730 			enum rte_flow_action_type action,
5731 			int *match_action_pos, int *qrss_action_pos,
5732 			int *modify_after_mirror)
5733 {
5734 	const struct rte_flow_action_sample *sample;
5735 	const struct rte_flow_action_raw_decap *decap;
5736 	int actions_n = 0;
5737 	uint32_t ratio = 0;
5738 	int sub_type = 0;
5739 	int flag = 0;
5740 	int fdb_mirror = 0;
5741 
5742 	*match_action_pos = -1;
5743 	*qrss_action_pos = -1;
5744 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5745 		if (actions->type == action) {
5746 			flag = 1;
5747 			*match_action_pos = actions_n;
5748 		}
5749 		switch (actions->type) {
5750 		case RTE_FLOW_ACTION_TYPE_QUEUE:
5751 		case RTE_FLOW_ACTION_TYPE_RSS:
5752 			*qrss_action_pos = actions_n;
5753 			break;
5754 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
5755 			sample = actions->conf;
5756 			ratio = sample->ratio;
5757 			sub_type = ((const struct rte_flow_action *)
5758 					(sample->actions))->type;
5759 			if (ratio == 1 && attr->transfer)
5760 				fdb_mirror = 1;
5761 			break;
5762 		case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5763 		case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5764 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5765 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5766 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5767 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5768 		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5769 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5770 		case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5771 		case RTE_FLOW_ACTION_TYPE_SET_TTL:
5772 		case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5773 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5774 		case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5775 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5776 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5777 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5778 		case RTE_FLOW_ACTION_TYPE_FLAG:
5779 		case RTE_FLOW_ACTION_TYPE_MARK:
5780 		case RTE_FLOW_ACTION_TYPE_SET_META:
5781 		case RTE_FLOW_ACTION_TYPE_SET_TAG:
5782 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5783 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5784 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5785 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5786 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5787 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5788 		case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5789 		case RTE_FLOW_ACTION_TYPE_METER:
5790 			if (fdb_mirror)
5791 				*modify_after_mirror = 1;
5792 			break;
5793 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5794 			decap = actions->conf;
5795 			while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5796 				;
5797 			actions_n++;
5798 			if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5799 				const struct rte_flow_action_raw_encap *encap =
5800 								actions->conf;
5801 				if (decap->size <=
5802 					MLX5_ENCAPSULATION_DECISION_SIZE &&
5803 				    encap->size >
5804 					MLX5_ENCAPSULATION_DECISION_SIZE)
5805 					/* L3 encap. */
5806 					break;
5807 			}
5808 			if (fdb_mirror)
5809 				*modify_after_mirror = 1;
5810 			break;
5811 		default:
5812 			break;
5813 		}
5814 		actions_n++;
5815 	}
5816 	if (flag && fdb_mirror && !*modify_after_mirror) {
5817 		/* FDB mirroring uses the destination array to implement
5818 		 * instead of FLOW_SAMPLER object.
5819 		 */
5820 		if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5821 			flag = 0;
5822 	}
5823 	/* Count RTE_FLOW_ACTION_TYPE_END. */
5824 	return flag ? actions_n + 1 : 0;
5825 }
5826 
5827 #define SAMPLE_SUFFIX_ITEM 3
5828 
5829 /**
5830  * Split the sample flow.
5831  *
5832  * As sample flow will split to two sub flow, sample flow with
5833  * sample action, the other actions will move to new suffix flow.
5834  *
5835  * Also add unique tag id with tag action in the sample flow,
5836  * the same tag id will be as match in the suffix flow.
5837  *
5838  * @param dev
5839  *   Pointer to Ethernet device.
5840  * @param[in] add_tag
5841  *   Add extra tag action flag.
5842  * @param[out] sfx_items
5843  *   Suffix flow match items (list terminated by the END pattern item).
5844  * @param[in] actions
5845  *   Associated actions (list terminated by the END action).
5846  * @param[out] actions_sfx
5847  *   Suffix flow actions.
5848  * @param[out] actions_pre
5849  *   Prefix flow actions.
5850  * @param[in] actions_n
5851  *  The total number of actions.
5852  * @param[in] sample_action_pos
5853  *   The sample action position.
5854  * @param[in] qrss_action_pos
5855  *   The Queue/RSS action position.
5856  * @param[in] jump_table
5857  *   Add extra jump action flag.
5858  * @param[out] error
5859  *   Perform verbose error reporting if not NULL.
5860  *
5861  * @return
5862  *   0 on success, or unique flow_id, a negative errno value
5863  *   otherwise and rte_errno is set.
5864  */
5865 static int
5866 flow_sample_split_prep(struct rte_eth_dev *dev,
5867 		       int add_tag,
5868 		       const struct rte_flow_item items[],
5869 		       struct rte_flow_item sfx_items[],
5870 		       const struct rte_flow_action actions[],
5871 		       struct rte_flow_action actions_sfx[],
5872 		       struct rte_flow_action actions_pre[],
5873 		       int actions_n,
5874 		       int sample_action_pos,
5875 		       int qrss_action_pos,
5876 		       int jump_table,
5877 		       struct rte_flow_error *error)
5878 {
5879 	struct mlx5_priv *priv = dev->data->dev_private;
5880 	struct mlx5_rte_flow_action_set_tag *set_tag;
5881 	struct mlx5_rte_flow_item_tag *tag_spec;
5882 	struct mlx5_rte_flow_item_tag *tag_mask;
5883 	struct rte_flow_action_jump *jump_action;
5884 	uint32_t tag_id = 0;
5885 	int append_index = 0;
5886 	int set_tag_idx = -1;
5887 	int index;
5888 	int ret;
5889 
5890 	if (sample_action_pos < 0)
5891 		return rte_flow_error_set(error, EINVAL,
5892 					  RTE_FLOW_ERROR_TYPE_ACTION,
5893 					  NULL, "invalid position of sample "
5894 					  "action in list");
5895 	/* Prepare the actions for prefix and suffix flow. */
5896 	if (add_tag) {
5897 		/* Update the new added tag action index preceding
5898 		 * the PUSH_VLAN or ENCAP action.
5899 		 */
5900 		const struct rte_flow_action_raw_encap *raw_encap;
5901 		const struct rte_flow_action *action = actions;
5902 		int encap_idx;
5903 		int action_idx = 0;
5904 		int raw_decap_idx = -1;
5905 		int push_vlan_idx = -1;
5906 		for (; action->type != RTE_FLOW_ACTION_TYPE_END; action++) {
5907 			switch (action->type) {
5908 			case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5909 				raw_decap_idx = action_idx;
5910 				break;
5911 			case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5912 				raw_encap = action->conf;
5913 				if (raw_encap->size >
5914 					MLX5_ENCAPSULATION_DECISION_SIZE) {
5915 					encap_idx = raw_decap_idx != -1 ?
5916 						    raw_decap_idx : action_idx;
5917 					if (encap_idx < sample_action_pos &&
5918 					    push_vlan_idx == -1)
5919 						set_tag_idx = encap_idx;
5920 				}
5921 				break;
5922 			case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5923 			case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5924 				encap_idx = action_idx;
5925 				if (encap_idx < sample_action_pos &&
5926 				    push_vlan_idx == -1)
5927 					set_tag_idx = encap_idx;
5928 				break;
5929 			case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5930 			case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5931 				push_vlan_idx = action_idx;
5932 				if (push_vlan_idx < sample_action_pos)
5933 					set_tag_idx = action_idx;
5934 				break;
5935 			default:
5936 				break;
5937 			}
5938 			action_idx++;
5939 		}
5940 	}
5941 	/* Prepare the actions for prefix and suffix flow. */
5942 	if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5943 		index = qrss_action_pos;
5944 		/* Put the preceding the Queue/RSS action into prefix flow. */
5945 		if (index != 0)
5946 			memcpy(actions_pre, actions,
5947 			       sizeof(struct rte_flow_action) * index);
5948 		/* Put others preceding the sample action into prefix flow. */
5949 		if (sample_action_pos > index + 1)
5950 			memcpy(actions_pre + index, actions + index + 1,
5951 			       sizeof(struct rte_flow_action) *
5952 			       (sample_action_pos - index - 1));
5953 		index = sample_action_pos - 1;
5954 		/* Put Queue/RSS action into Suffix flow. */
5955 		memcpy(actions_sfx, actions + qrss_action_pos,
5956 		       sizeof(struct rte_flow_action));
5957 		actions_sfx++;
5958 	} else if (add_tag && set_tag_idx >= 0) {
5959 		if (set_tag_idx > 0)
5960 			memcpy(actions_pre, actions,
5961 			       sizeof(struct rte_flow_action) * set_tag_idx);
5962 		memcpy(actions_pre + set_tag_idx + 1, actions + set_tag_idx,
5963 		       sizeof(struct rte_flow_action) *
5964 		       (sample_action_pos - set_tag_idx));
5965 		index = sample_action_pos;
5966 	} else {
5967 		index = sample_action_pos;
5968 		if (index != 0)
5969 			memcpy(actions_pre, actions,
5970 			       sizeof(struct rte_flow_action) * index);
5971 	}
5972 	/* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
5973 	 * For CX6DX and above, metadata registers Cx preserve their value,
5974 	 * add an extra tag action for NIC-RX and E-Switch Domain.
5975 	 */
5976 	if (add_tag) {
5977 		/* Prepare the prefix tag action. */
5978 		append_index++;
5979 		set_tag = (void *)(actions_pre + actions_n + append_index);
5980 		ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error);
5981 		/* Trust VF/SF on CX5 not supported meter so that the reserved
5982 		 * metadata regC is REG_NON, back to use application tag
5983 		 * index 0.
5984 		 */
5985 		if (unlikely(ret == REG_NON))
5986 			ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
5987 		if (ret < 0)
5988 			return ret;
5989 		mlx5_ipool_malloc(priv->sh->ipool
5990 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
5991 		*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5992 			.id = ret,
5993 			.data = tag_id,
5994 		};
5995 		/* Prepare the suffix subflow items. */
5996 		for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5997 			if (items->type == RTE_FLOW_ITEM_TYPE_PORT_ID) {
5998 				memcpy(sfx_items, items, sizeof(*sfx_items));
5999 				sfx_items++;
6000 			}
6001 		}
6002 		tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
6003 		tag_spec->data = tag_id;
6004 		tag_spec->id = set_tag->id;
6005 		tag_mask = tag_spec + 1;
6006 		tag_mask->data = UINT32_MAX;
6007 		sfx_items[0] = (struct rte_flow_item){
6008 			.type = (enum rte_flow_item_type)
6009 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6010 			.spec = tag_spec,
6011 			.last = NULL,
6012 			.mask = tag_mask,
6013 		};
6014 		sfx_items[1] = (struct rte_flow_item){
6015 			.type = (enum rte_flow_item_type)
6016 				RTE_FLOW_ITEM_TYPE_END,
6017 		};
6018 		/* Prepare the tag action in prefix subflow. */
6019 		set_tag_idx = (set_tag_idx == -1) ? index : set_tag_idx;
6020 		actions_pre[set_tag_idx] =
6021 			(struct rte_flow_action){
6022 			.type = (enum rte_flow_action_type)
6023 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
6024 			.conf = set_tag,
6025 		};
6026 		/* Update next sample position due to add one tag action */
6027 		index += 1;
6028 	}
6029 	/* Copy the sample action into prefix flow. */
6030 	memcpy(actions_pre + index, actions + sample_action_pos,
6031 	       sizeof(struct rte_flow_action));
6032 	index += 1;
6033 	/* For the modify action after the sample action in E-Switch mirroring,
6034 	 * Add the extra jump action in prefix subflow and jump into the next
6035 	 * table, then do the modify action in the new table.
6036 	 */
6037 	if (jump_table) {
6038 		/* Prepare the prefix jump action. */
6039 		append_index++;
6040 		jump_action = (void *)(actions_pre + actions_n + append_index);
6041 		jump_action->group = jump_table;
6042 		actions_pre[index++] =
6043 			(struct rte_flow_action){
6044 			.type = (enum rte_flow_action_type)
6045 				RTE_FLOW_ACTION_TYPE_JUMP,
6046 			.conf = jump_action,
6047 		};
6048 	}
6049 	actions_pre[index] = (struct rte_flow_action){
6050 		.type = (enum rte_flow_action_type)
6051 			RTE_FLOW_ACTION_TYPE_END,
6052 	};
6053 	/* Put the actions after sample into Suffix flow. */
6054 	memcpy(actions_sfx, actions + sample_action_pos + 1,
6055 	       sizeof(struct rte_flow_action) *
6056 	       (actions_n - sample_action_pos - 1));
6057 	return tag_id;
6058 }
6059 
6060 /**
6061  * The splitting for metadata feature.
6062  *
6063  * - Q/RSS action on NIC Rx should be split in order to pass by
6064  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
6065  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
6066  *
6067  * - All the actions on NIC Tx should have a mreg copy action to
6068  *   copy reg_a from WQE to reg_c[0].
6069  *
6070  * @param dev
6071  *   Pointer to Ethernet device.
6072  * @param[in] flow
6073  *   Parent flow structure pointer.
6074  * @param[in] attr
6075  *   Flow rule attributes.
6076  * @param[in] items
6077  *   Pattern specification (list terminated by the END pattern item).
6078  * @param[in] actions
6079  *   Associated actions (list terminated by the END action).
6080  * @param[in] flow_split_info
6081  *   Pointer to flow split info structure.
6082  * @param[out] error
6083  *   Perform verbose error reporting if not NULL.
6084  * @return
6085  *   0 on success, negative value otherwise
6086  */
6087 static int
6088 flow_create_split_metadata(struct rte_eth_dev *dev,
6089 			   struct rte_flow *flow,
6090 			   const struct rte_flow_attr *attr,
6091 			   const struct rte_flow_item items[],
6092 			   const struct rte_flow_action actions[],
6093 			   struct mlx5_flow_split_info *flow_split_info,
6094 			   struct rte_flow_error *error)
6095 {
6096 	struct mlx5_priv *priv = dev->data->dev_private;
6097 	struct mlx5_sh_config *config = &priv->sh->config;
6098 	const struct rte_flow_action *qrss = NULL;
6099 	struct rte_flow_action *ext_actions = NULL;
6100 	struct mlx5_flow *dev_flow = NULL;
6101 	uint32_t qrss_id = 0;
6102 	int mtr_sfx = 0;
6103 	size_t act_size;
6104 	int actions_n;
6105 	int encap_idx;
6106 	int ret;
6107 
6108 	/* Check whether extensive metadata feature is engaged. */
6109 	if (!config->dv_flow_en ||
6110 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
6111 	    !mlx5_flow_ext_mreg_supported(dev))
6112 		return flow_create_split_inner(dev, flow, NULL, attr, items,
6113 					       actions, flow_split_info, error);
6114 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
6115 							   &encap_idx);
6116 	if (qrss) {
6117 		/* Exclude hairpin flows from splitting. */
6118 		if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
6119 			const struct rte_flow_action_queue *queue;
6120 
6121 			queue = qrss->conf;
6122 			if (mlx5_rxq_is_hairpin(dev, queue->index))
6123 				qrss = NULL;
6124 		} else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
6125 			const struct rte_flow_action_rss *rss;
6126 
6127 			rss = qrss->conf;
6128 			if (mlx5_rxq_is_hairpin(dev, rss->queue[0]))
6129 				qrss = NULL;
6130 		}
6131 	}
6132 	if (qrss) {
6133 		/* Check if it is in meter suffix table. */
6134 		mtr_sfx = attr->group == (attr->transfer ?
6135 			  (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6136 			  MLX5_FLOW_TABLE_LEVEL_METER);
6137 		/*
6138 		 * Q/RSS action on NIC Rx should be split in order to pass by
6139 		 * the mreg copy table (RX_CP_TBL) and then it jumps to the
6140 		 * action table (RX_ACT_TBL) which has the split Q/RSS action.
6141 		 */
6142 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6143 			   sizeof(struct rte_flow_action_set_tag) +
6144 			   sizeof(struct rte_flow_action_jump);
6145 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6146 					  SOCKET_ID_ANY);
6147 		if (!ext_actions)
6148 			return rte_flow_error_set(error, ENOMEM,
6149 						  RTE_FLOW_ERROR_TYPE_ACTION,
6150 						  NULL, "no memory to split "
6151 						  "metadata flow");
6152 		/*
6153 		 * Create the new actions list with removed Q/RSS action
6154 		 * and appended set tag and jump to register copy table
6155 		 * (RX_CP_TBL). We should preallocate unique tag ID here
6156 		 * in advance, because it is needed for set tag action.
6157 		 */
6158 		qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
6159 						    qrss, actions_n,
6160 						    mtr_sfx, error);
6161 		if (!mtr_sfx && !qrss_id) {
6162 			ret = -rte_errno;
6163 			goto exit;
6164 		}
6165 	} else if (attr->egress && !attr->transfer) {
6166 		/*
6167 		 * All the actions on NIC Tx should have a metadata register
6168 		 * copy action to copy reg_a from WQE to reg_c[meta]
6169 		 */
6170 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6171 			   sizeof(struct mlx5_flow_action_copy_mreg);
6172 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6173 					  SOCKET_ID_ANY);
6174 		if (!ext_actions)
6175 			return rte_flow_error_set(error, ENOMEM,
6176 						  RTE_FLOW_ERROR_TYPE_ACTION,
6177 						  NULL, "no memory to split "
6178 						  "metadata flow");
6179 		/* Create the action list appended with copy register. */
6180 		ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
6181 					     actions_n, error, encap_idx);
6182 		if (ret < 0)
6183 			goto exit;
6184 	}
6185 	/* Add the unmodified original or prefix subflow. */
6186 	ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6187 				      items, ext_actions ? ext_actions :
6188 				      actions, flow_split_info, error);
6189 	if (ret < 0)
6190 		goto exit;
6191 	MLX5_ASSERT(dev_flow);
6192 	if (qrss) {
6193 		const struct rte_flow_attr q_attr = {
6194 			.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6195 			.ingress = 1,
6196 		};
6197 		/* Internal PMD action to set register. */
6198 		struct mlx5_rte_flow_item_tag q_tag_spec = {
6199 			.data = qrss_id,
6200 			.id = REG_NON,
6201 		};
6202 		struct rte_flow_item q_items[] = {
6203 			{
6204 				.type = (enum rte_flow_item_type)
6205 					MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6206 				.spec = &q_tag_spec,
6207 				.last = NULL,
6208 				.mask = NULL,
6209 			},
6210 			{
6211 				.type = RTE_FLOW_ITEM_TYPE_END,
6212 			},
6213 		};
6214 		struct rte_flow_action q_actions[] = {
6215 			{
6216 				.type = qrss->type,
6217 				.conf = qrss->conf,
6218 			},
6219 			{
6220 				.type = RTE_FLOW_ACTION_TYPE_END,
6221 			},
6222 		};
6223 		uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
6224 
6225 		/*
6226 		 * Configure the tag item only if there is no meter subflow.
6227 		 * Since tag is already marked in the meter suffix subflow
6228 		 * we can just use the meter suffix items as is.
6229 		 */
6230 		if (qrss_id) {
6231 			/* Not meter subflow. */
6232 			MLX5_ASSERT(!mtr_sfx);
6233 			/*
6234 			 * Put unique id in prefix flow due to it is destroyed
6235 			 * after suffix flow and id will be freed after there
6236 			 * is no actual flows with this id and identifier
6237 			 * reallocation becomes possible (for example, for
6238 			 * other flows in other threads).
6239 			 */
6240 			dev_flow->handle->split_flow_id = qrss_id;
6241 			ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
6242 						   error);
6243 			if (ret < 0)
6244 				goto exit;
6245 			q_tag_spec.id = ret;
6246 		}
6247 		dev_flow = NULL;
6248 		/* Add suffix subflow to execute Q/RSS. */
6249 		flow_split_info->prefix_layers = layers;
6250 		flow_split_info->prefix_mark = 0;
6251 		flow_split_info->table_id = 0;
6252 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6253 					      &q_attr, mtr_sfx ? items :
6254 					      q_items, q_actions,
6255 					      flow_split_info, error);
6256 		if (ret < 0)
6257 			goto exit;
6258 		/* qrss ID should be freed if failed. */
6259 		qrss_id = 0;
6260 		MLX5_ASSERT(dev_flow);
6261 	}
6262 
6263 exit:
6264 	/*
6265 	 * We do not destroy the partially created sub_flows in case of error.
6266 	 * These ones are included into parent flow list and will be destroyed
6267 	 * by flow_drv_destroy.
6268 	 */
6269 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
6270 			qrss_id);
6271 	mlx5_free(ext_actions);
6272 	return ret;
6273 }
6274 
6275 /**
6276  * Create meter internal drop flow with the original pattern.
6277  *
6278  * @param dev
6279  *   Pointer to Ethernet device.
6280  * @param[in] flow
6281  *   Parent flow structure pointer.
6282  * @param[in] attr
6283  *   Flow rule attributes.
6284  * @param[in] items
6285  *   Pattern specification (list terminated by the END pattern item).
6286  * @param[in] flow_split_info
6287  *   Pointer to flow split info structure.
6288  * @param[in] fm
6289  *   Pointer to flow meter structure.
6290  * @param[out] error
6291  *   Perform verbose error reporting if not NULL.
6292  * @return
6293  *   0 on success, negative value otherwise
6294  */
6295 static uint32_t
6296 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
6297 			struct rte_flow *flow,
6298 			const struct rte_flow_attr *attr,
6299 			const struct rte_flow_item items[],
6300 			struct mlx5_flow_split_info *flow_split_info,
6301 			struct mlx5_flow_meter_info *fm,
6302 			struct rte_flow_error *error)
6303 {
6304 	struct mlx5_flow *dev_flow = NULL;
6305 	struct rte_flow_attr drop_attr = *attr;
6306 	struct rte_flow_action drop_actions[3];
6307 	struct mlx5_flow_split_info drop_split_info = *flow_split_info;
6308 
6309 	MLX5_ASSERT(fm->drop_cnt);
6310 	drop_actions[0].type =
6311 		(enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
6312 	drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
6313 	drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
6314 	drop_actions[1].conf = NULL;
6315 	drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
6316 	drop_actions[2].conf = NULL;
6317 	drop_split_info.external = false;
6318 	drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6319 	drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
6320 	drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
6321 	return flow_create_split_inner(dev, flow, &dev_flow,
6322 				&drop_attr, items, drop_actions,
6323 				&drop_split_info, error);
6324 }
6325 
6326 /**
6327  * The splitting for meter feature.
6328  *
6329  * - The meter flow will be split to two flows as prefix and
6330  *   suffix flow. The packets make sense only it pass the prefix
6331  *   meter action.
6332  *
6333  * - Reg_C_5 is used for the packet to match betweend prefix and
6334  *   suffix flow.
6335  *
6336  * @param dev
6337  *   Pointer to Ethernet device.
6338  * @param[in] flow
6339  *   Parent flow structure pointer.
6340  * @param[in] attr
6341  *   Flow rule attributes.
6342  * @param[in] items
6343  *   Pattern specification (list terminated by the END pattern item).
6344  * @param[in] actions
6345  *   Associated actions (list terminated by the END action).
6346  * @param[in] flow_split_info
6347  *   Pointer to flow split info structure.
6348  * @param[out] error
6349  *   Perform verbose error reporting if not NULL.
6350  * @return
6351  *   0 on success, negative value otherwise
6352  */
6353 static int
6354 flow_create_split_meter(struct rte_eth_dev *dev,
6355 			struct rte_flow *flow,
6356 			const struct rte_flow_attr *attr,
6357 			const struct rte_flow_item items[],
6358 			const struct rte_flow_action actions[],
6359 			struct mlx5_flow_split_info *flow_split_info,
6360 			struct rte_flow_error *error)
6361 {
6362 	struct mlx5_priv *priv = dev->data->dev_private;
6363 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6364 	struct rte_flow_action *sfx_actions = NULL;
6365 	struct rte_flow_action *pre_actions = NULL;
6366 	struct rte_flow_item *sfx_items = NULL;
6367 	struct mlx5_flow *dev_flow = NULL;
6368 	struct rte_flow_attr sfx_attr = *attr;
6369 	struct mlx5_flow_meter_info *fm = NULL;
6370 	uint8_t skip_scale_restore;
6371 	bool has_mtr = false;
6372 	bool has_modify = false;
6373 	bool set_mtr_reg = true;
6374 	bool is_mtr_hierarchy = false;
6375 	uint32_t meter_id = 0;
6376 	uint32_t mtr_idx = 0;
6377 	uint32_t mtr_flow_id = 0;
6378 	size_t act_size;
6379 	size_t item_size;
6380 	int actions_n = 0;
6381 	int ret = 0;
6382 
6383 	if (priv->mtr_en)
6384 		actions_n = flow_check_meter_action(dev, actions, &has_mtr,
6385 						    &has_modify, &meter_id);
6386 	if (has_mtr) {
6387 		if (flow->meter) {
6388 			fm = flow_dv_meter_find_by_idx(priv, flow->meter);
6389 			if (!fm)
6390 				return rte_flow_error_set(error, EINVAL,
6391 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6392 						NULL, "Meter not found.");
6393 		} else {
6394 			fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
6395 			if (!fm)
6396 				return rte_flow_error_set(error, EINVAL,
6397 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6398 						NULL, "Meter not found.");
6399 			ret = mlx5_flow_meter_attach(priv, fm,
6400 						     &sfx_attr, error);
6401 			if (ret)
6402 				return -rte_errno;
6403 			flow->meter = mtr_idx;
6404 		}
6405 		MLX5_ASSERT(wks);
6406 		wks->fm = fm;
6407 		if (!fm->def_policy) {
6408 			wks->policy = mlx5_flow_meter_policy_find(dev,
6409 								  fm->policy_id,
6410 								  NULL);
6411 			MLX5_ASSERT(wks->policy);
6412 			if (wks->policy->mark)
6413 				wks->mark = 1;
6414 			if (wks->policy->is_hierarchy) {
6415 				wks->final_policy =
6416 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
6417 								wks->policy);
6418 				if (!wks->final_policy)
6419 					return rte_flow_error_set(error,
6420 					EINVAL,
6421 					RTE_FLOW_ERROR_TYPE_ACTION, NULL,
6422 				"Failed to find terminal policy of hierarchy.");
6423 				is_mtr_hierarchy = true;
6424 			}
6425 		}
6426 		/*
6427 		 * If it isn't default-policy Meter, and
6428 		 * 1. There's no action in flow to change
6429 		 *    packet (modify/encap/decap etc.), OR
6430 		 * 2. No drop count needed for this meter.
6431 		 * 3. It's not meter hierarchy.
6432 		 * Then no need to use regC to save meter id anymore.
6433 		 */
6434 		if (!fm->def_policy && !is_mtr_hierarchy &&
6435 		    (!has_modify || !fm->drop_cnt))
6436 			set_mtr_reg = false;
6437 		/* Prefix actions: meter, decap, encap, tag, jump, end, cnt. */
6438 #define METER_PREFIX_ACTION 7
6439 		act_size = (sizeof(struct rte_flow_action) *
6440 			    (actions_n + METER_PREFIX_ACTION)) +
6441 			   sizeof(struct mlx5_rte_flow_action_set_tag);
6442 		/* Suffix items: tag, vlan, port id, end. */
6443 #define METER_SUFFIX_ITEM 4
6444 		item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
6445 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6446 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
6447 					  0, SOCKET_ID_ANY);
6448 		if (!sfx_actions)
6449 			return rte_flow_error_set(error, ENOMEM,
6450 						  RTE_FLOW_ERROR_TYPE_ACTION,
6451 						  NULL, "no memory to split "
6452 						  "meter flow");
6453 		sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6454 			     act_size);
6455 		/* There's no suffix flow for meter of non-default policy. */
6456 		if (!fm->def_policy)
6457 			pre_actions = sfx_actions + 1;
6458 		else
6459 			pre_actions = sfx_actions + actions_n;
6460 		ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6461 					    items, sfx_items, actions,
6462 					    sfx_actions, pre_actions,
6463 					    (set_mtr_reg ? &mtr_flow_id : NULL),
6464 					    error);
6465 		if (ret) {
6466 			ret = -rte_errno;
6467 			goto exit;
6468 		}
6469 		/* Add the prefix subflow. */
6470 		skip_scale_restore = flow_split_info->skip_scale;
6471 		flow_split_info->skip_scale |=
6472 			1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6473 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6474 					      attr, items, pre_actions,
6475 					      flow_split_info, error);
6476 		flow_split_info->skip_scale = skip_scale_restore;
6477 		if (ret) {
6478 			if (mtr_flow_id)
6479 				mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6480 			ret = -rte_errno;
6481 			goto exit;
6482 		}
6483 		if (mtr_flow_id) {
6484 			dev_flow->handle->split_flow_id = mtr_flow_id;
6485 			dev_flow->handle->is_meter_flow_id = 1;
6486 		}
6487 		if (!fm->def_policy) {
6488 			if (!set_mtr_reg && fm->drop_cnt)
6489 				ret =
6490 			flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6491 							&sfx_attr, items,
6492 							flow_split_info,
6493 							fm, error);
6494 			goto exit;
6495 		}
6496 		/* Setting the sfx group atrr. */
6497 		sfx_attr.group = sfx_attr.transfer ?
6498 				(MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6499 				 MLX5_FLOW_TABLE_LEVEL_METER;
6500 		flow_split_info->prefix_layers =
6501 				flow_get_prefix_layer_flags(dev_flow);
6502 		flow_split_info->prefix_mark |= wks->mark;
6503 		flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6504 	}
6505 	/* Add the prefix subflow. */
6506 	ret = flow_create_split_metadata(dev, flow,
6507 					 &sfx_attr, sfx_items ?
6508 					 sfx_items : items,
6509 					 sfx_actions ? sfx_actions : actions,
6510 					 flow_split_info, error);
6511 exit:
6512 	if (sfx_actions)
6513 		mlx5_free(sfx_actions);
6514 	return ret;
6515 }
6516 
6517 /**
6518  * The splitting for sample feature.
6519  *
6520  * Once Sample action is detected in the action list, the flow actions should
6521  * be split into prefix sub flow and suffix sub flow.
6522  *
6523  * The original items remain in the prefix sub flow, all actions preceding the
6524  * sample action and the sample action itself will be copied to the prefix
6525  * sub flow, the actions following the sample action will be copied to the
6526  * suffix sub flow, Queue action always be located in the suffix sub flow.
6527  *
6528  * In order to make the packet from prefix sub flow matches with suffix sub
6529  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6530  * flow uses tag item with the unique flow id.
6531  *
6532  * @param dev
6533  *   Pointer to Ethernet device.
6534  * @param[in] flow
6535  *   Parent flow structure pointer.
6536  * @param[in] attr
6537  *   Flow rule attributes.
6538  * @param[in] items
6539  *   Pattern specification (list terminated by the END pattern item).
6540  * @param[in] actions
6541  *   Associated actions (list terminated by the END action).
6542  * @param[in] flow_split_info
6543  *   Pointer to flow split info structure.
6544  * @param[out] error
6545  *   Perform verbose error reporting if not NULL.
6546  * @return
6547  *   0 on success, negative value otherwise
6548  */
6549 static int
6550 flow_create_split_sample(struct rte_eth_dev *dev,
6551 			 struct rte_flow *flow,
6552 			 const struct rte_flow_attr *attr,
6553 			 const struct rte_flow_item items[],
6554 			 const struct rte_flow_action actions[],
6555 			 struct mlx5_flow_split_info *flow_split_info,
6556 			 struct rte_flow_error *error)
6557 {
6558 	struct mlx5_priv *priv = dev->data->dev_private;
6559 	struct rte_flow_action *sfx_actions = NULL;
6560 	struct rte_flow_action *pre_actions = NULL;
6561 	struct rte_flow_item *sfx_items = NULL;
6562 	struct mlx5_flow *dev_flow = NULL;
6563 	struct rte_flow_attr sfx_attr = *attr;
6564 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6565 	struct mlx5_flow_dv_sample_resource *sample_res;
6566 	struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6567 	struct mlx5_flow_tbl_resource *sfx_tbl;
6568 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6569 #endif
6570 	size_t act_size;
6571 	size_t item_size;
6572 	uint32_t fdb_tx = 0;
6573 	int32_t tag_id = 0;
6574 	int actions_n = 0;
6575 	int sample_action_pos;
6576 	int qrss_action_pos;
6577 	int add_tag = 0;
6578 	int modify_after_mirror = 0;
6579 	uint16_t jump_table = 0;
6580 	const uint32_t next_ft_step = 1;
6581 	int ret = 0;
6582 
6583 	if (priv->sampler_en)
6584 		actions_n = flow_check_match_action(actions, attr,
6585 					RTE_FLOW_ACTION_TYPE_SAMPLE,
6586 					&sample_action_pos, &qrss_action_pos,
6587 					&modify_after_mirror);
6588 	if (actions_n) {
6589 		/* The prefix actions must includes sample, tag, end. */
6590 		act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6591 			   + sizeof(struct mlx5_rte_flow_action_set_tag);
6592 		item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6593 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6594 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6595 					  item_size), 0, SOCKET_ID_ANY);
6596 		if (!sfx_actions)
6597 			return rte_flow_error_set(error, ENOMEM,
6598 						  RTE_FLOW_ERROR_TYPE_ACTION,
6599 						  NULL, "no memory to split "
6600 						  "sample flow");
6601 		/* The representor_id is UINT16_MAX for uplink. */
6602 		fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6603 		/*
6604 		 * When reg_c_preserve is set, metadata registers Cx preserve
6605 		 * their value even through packet duplication.
6606 		 */
6607 		add_tag = (!fdb_tx ||
6608 			   priv->sh->cdev->config.hca_attr.reg_c_preserve);
6609 		if (add_tag)
6610 			sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6611 					+ act_size);
6612 		if (modify_after_mirror)
6613 			jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6614 				     next_ft_step;
6615 		pre_actions = sfx_actions + actions_n;
6616 		tag_id = flow_sample_split_prep(dev, add_tag, items, sfx_items,
6617 						actions, sfx_actions,
6618 						pre_actions, actions_n,
6619 						sample_action_pos,
6620 						qrss_action_pos, jump_table,
6621 						error);
6622 		if (tag_id < 0 || (add_tag && !tag_id)) {
6623 			ret = -rte_errno;
6624 			goto exit;
6625 		}
6626 		if (modify_after_mirror)
6627 			flow_split_info->skip_scale =
6628 					1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6629 		/* Add the prefix subflow. */
6630 		ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6631 					      items, pre_actions,
6632 					      flow_split_info, error);
6633 		if (ret) {
6634 			ret = -rte_errno;
6635 			goto exit;
6636 		}
6637 		dev_flow->handle->split_flow_id = tag_id;
6638 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6639 		if (!modify_after_mirror) {
6640 			/* Set the sfx group attr. */
6641 			sample_res = (struct mlx5_flow_dv_sample_resource *)
6642 						dev_flow->dv.sample_res;
6643 			sfx_tbl = (struct mlx5_flow_tbl_resource *)
6644 						sample_res->normal_path_tbl;
6645 			sfx_tbl_data = container_of(sfx_tbl,
6646 						struct mlx5_flow_tbl_data_entry,
6647 						tbl);
6648 			sfx_attr.group = sfx_attr.transfer ?
6649 			(sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6650 		} else {
6651 			MLX5_ASSERT(attr->transfer);
6652 			sfx_attr.group = jump_table;
6653 		}
6654 		flow_split_info->prefix_layers =
6655 				flow_get_prefix_layer_flags(dev_flow);
6656 		MLX5_ASSERT(wks);
6657 		flow_split_info->prefix_mark |= wks->mark;
6658 		/* Suffix group level already be scaled with factor, set
6659 		 * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6660 		 * again in translation.
6661 		 */
6662 		flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6663 #endif
6664 	}
6665 	/* Add the suffix subflow. */
6666 	ret = flow_create_split_meter(dev, flow, &sfx_attr,
6667 				      sfx_items ? sfx_items : items,
6668 				      sfx_actions ? sfx_actions : actions,
6669 				      flow_split_info, error);
6670 exit:
6671 	if (sfx_actions)
6672 		mlx5_free(sfx_actions);
6673 	return ret;
6674 }
6675 
6676 /**
6677  * Split the flow to subflow set. The splitters might be linked
6678  * in the chain, like this:
6679  * flow_create_split_outer() calls:
6680  *   flow_create_split_meter() calls:
6681  *     flow_create_split_metadata(meter_subflow_0) calls:
6682  *       flow_create_split_inner(metadata_subflow_0)
6683  *       flow_create_split_inner(metadata_subflow_1)
6684  *       flow_create_split_inner(metadata_subflow_2)
6685  *     flow_create_split_metadata(meter_subflow_1) calls:
6686  *       flow_create_split_inner(metadata_subflow_0)
6687  *       flow_create_split_inner(metadata_subflow_1)
6688  *       flow_create_split_inner(metadata_subflow_2)
6689  *
6690  * This provide flexible way to add new levels of flow splitting.
6691  * The all of successfully created subflows are included to the
6692  * parent flow dev_flow list.
6693  *
6694  * @param dev
6695  *   Pointer to Ethernet device.
6696  * @param[in] flow
6697  *   Parent flow structure pointer.
6698  * @param[in] attr
6699  *   Flow rule attributes.
6700  * @param[in] items
6701  *   Pattern specification (list terminated by the END pattern item).
6702  * @param[in] actions
6703  *   Associated actions (list terminated by the END action).
6704  * @param[in] flow_split_info
6705  *   Pointer to flow split info structure.
6706  * @param[out] error
6707  *   Perform verbose error reporting if not NULL.
6708  * @return
6709  *   0 on success, negative value otherwise
6710  */
6711 static int
6712 flow_create_split_outer(struct rte_eth_dev *dev,
6713 			struct rte_flow *flow,
6714 			const struct rte_flow_attr *attr,
6715 			const struct rte_flow_item items[],
6716 			const struct rte_flow_action actions[],
6717 			struct mlx5_flow_split_info *flow_split_info,
6718 			struct rte_flow_error *error)
6719 {
6720 	int ret;
6721 
6722 	ret = flow_create_split_sample(dev, flow, attr, items,
6723 				       actions, flow_split_info, error);
6724 	MLX5_ASSERT(ret <= 0);
6725 	return ret;
6726 }
6727 
6728 static inline struct mlx5_flow_tunnel *
6729 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6730 {
6731 	struct mlx5_flow_tunnel *tunnel;
6732 
6733 #pragma GCC diagnostic push
6734 #pragma GCC diagnostic ignored "-Wcast-qual"
6735 	tunnel = (typeof(tunnel))flow->tunnel;
6736 #pragma GCC diagnostic pop
6737 
6738 	return tunnel;
6739 }
6740 
6741 /**
6742  * Adjust flow RSS workspace if needed.
6743  *
6744  * @param wks
6745  *   Pointer to thread flow work space.
6746  * @param rss_desc
6747  *   Pointer to RSS descriptor.
6748  * @param[in] nrssq_num
6749  *   New RSS queue number.
6750  *
6751  * @return
6752  *   0 on success, -1 otherwise and rte_errno is set.
6753  */
6754 static int
6755 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6756 			  struct mlx5_flow_rss_desc *rss_desc,
6757 			  uint32_t nrssq_num)
6758 {
6759 	if (likely(nrssq_num <= wks->rssq_num))
6760 		return 0;
6761 	rss_desc->queue = realloc(rss_desc->queue,
6762 			  sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6763 	if (!rss_desc->queue) {
6764 		rte_errno = ENOMEM;
6765 		return -1;
6766 	}
6767 	wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6768 	return 0;
6769 }
6770 
6771 /**
6772  * Create a flow and add it to @p list.
6773  *
6774  * @param dev
6775  *   Pointer to Ethernet device.
6776  * @param list
6777  *   Pointer to a TAILQ flow list. If this parameter NULL,
6778  *   no list insertion occurred, flow is just created,
6779  *   this is caller's responsibility to track the
6780  *   created flow.
6781  * @param[in] attr
6782  *   Flow rule attributes.
6783  * @param[in] items
6784  *   Pattern specification (list terminated by the END pattern item).
6785  * @param[in] actions
6786  *   Associated actions (list terminated by the END action).
6787  * @param[in] external
6788  *   This flow rule is created by request external to PMD.
6789  * @param[out] error
6790  *   Perform verbose error reporting if not NULL.
6791  *
6792  * @return
6793  *   A flow index on success, 0 otherwise and rte_errno is set.
6794  */
6795 static uint32_t
6796 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6797 		 const struct rte_flow_attr *attr,
6798 		 const struct rte_flow_item items[],
6799 		 const struct rte_flow_action original_actions[],
6800 		 bool external, struct rte_flow_error *error)
6801 {
6802 	struct mlx5_priv *priv = dev->data->dev_private;
6803 	struct rte_flow *flow = NULL;
6804 	struct mlx5_flow *dev_flow;
6805 	const struct rte_flow_action_rss *rss = NULL;
6806 	struct mlx5_translated_action_handle
6807 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6808 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6809 	union {
6810 		struct mlx5_flow_expand_rss buf;
6811 		uint8_t buffer[4096];
6812 	} expand_buffer;
6813 	union {
6814 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6815 		uint8_t buffer[2048];
6816 	} actions_rx;
6817 	union {
6818 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6819 		uint8_t buffer[2048];
6820 	} actions_hairpin_tx;
6821 	union {
6822 		struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6823 		uint8_t buffer[2048];
6824 	} items_tx;
6825 	struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6826 	struct mlx5_flow_rss_desc *rss_desc;
6827 	const struct rte_flow_action *p_actions_rx;
6828 	uint32_t i;
6829 	uint32_t idx = 0;
6830 	int hairpin_flow;
6831 	struct rte_flow_attr attr_tx = { .priority = 0 };
6832 	const struct rte_flow_action *actions;
6833 	struct rte_flow_action *translated_actions = NULL;
6834 	struct mlx5_flow_tunnel *tunnel;
6835 	struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6836 	struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6837 	struct mlx5_flow_split_info flow_split_info = {
6838 		.external = !!external,
6839 		.skip_scale = 0,
6840 		.flow_idx = 0,
6841 		.prefix_mark = 0,
6842 		.prefix_layers = 0,
6843 		.table_id = 0
6844 	};
6845 	int ret;
6846 
6847 	MLX5_ASSERT(wks);
6848 	rss_desc = &wks->rss_desc;
6849 	ret = flow_action_handles_translate(dev, original_actions,
6850 					    indir_actions,
6851 					    &indir_actions_n,
6852 					    &translated_actions, error);
6853 	if (ret < 0) {
6854 		MLX5_ASSERT(translated_actions == NULL);
6855 		return 0;
6856 	}
6857 	actions = translated_actions ? translated_actions : original_actions;
6858 	p_actions_rx = actions;
6859 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6860 	ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6861 				external, hairpin_flow, error);
6862 	if (ret < 0)
6863 		goto error_before_hairpin_split;
6864 	flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6865 	if (!flow) {
6866 		rte_errno = ENOMEM;
6867 		goto error_before_hairpin_split;
6868 	}
6869 	if (hairpin_flow > 0) {
6870 		if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6871 			rte_errno = EINVAL;
6872 			goto error_before_hairpin_split;
6873 		}
6874 		flow_hairpin_split(dev, actions, actions_rx.actions,
6875 				   actions_hairpin_tx.actions, items_tx.items,
6876 				   idx);
6877 		p_actions_rx = actions_rx.actions;
6878 	}
6879 	flow_split_info.flow_idx = idx;
6880 	flow->drv_type = flow_get_drv_type(dev, attr);
6881 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6882 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
6883 	memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6884 	/* RSS Action only works on NIC RX domain */
6885 	if (attr->ingress && !attr->transfer)
6886 		rss = flow_get_rss_action(dev, p_actions_rx);
6887 	if (rss) {
6888 		if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6889 			return 0;
6890 		/*
6891 		 * The following information is required by
6892 		 * mlx5_flow_hashfields_adjust() in advance.
6893 		 */
6894 		rss_desc->level = rss->level;
6895 		/* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
6896 		rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
6897 	}
6898 	flow->dev_handles = 0;
6899 	if (rss && rss->types) {
6900 		unsigned int graph_root;
6901 
6902 		graph_root = find_graph_root(rss->level);
6903 		ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6904 					   items, rss->types,
6905 					   mlx5_support_expansion, graph_root);
6906 		MLX5_ASSERT(ret > 0 &&
6907 		       (unsigned int)ret < sizeof(expand_buffer.buffer));
6908 		if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6909 			for (i = 0; i < buf->entries; ++i)
6910 				mlx5_dbg__print_pattern(buf->entry[i].pattern);
6911 		}
6912 	} else {
6913 		buf->entries = 1;
6914 		buf->entry[0].pattern = (void *)(uintptr_t)items;
6915 	}
6916 	rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6917 						      indir_actions_n);
6918 	for (i = 0; i < buf->entries; ++i) {
6919 		/* Initialize flow split data. */
6920 		flow_split_info.prefix_layers = 0;
6921 		flow_split_info.prefix_mark = 0;
6922 		flow_split_info.skip_scale = 0;
6923 		/*
6924 		 * The splitter may create multiple dev_flows,
6925 		 * depending on configuration. In the simplest
6926 		 * case it just creates unmodified original flow.
6927 		 */
6928 		ret = flow_create_split_outer(dev, flow, attr,
6929 					      buf->entry[i].pattern,
6930 					      p_actions_rx, &flow_split_info,
6931 					      error);
6932 		if (ret < 0)
6933 			goto error;
6934 		if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
6935 			ret = flow_tunnel_add_default_miss(dev, flow, attr,
6936 							   p_actions_rx,
6937 							   idx,
6938 							   wks->flows[0].tunnel,
6939 							   &default_miss_ctx,
6940 							   error);
6941 			if (ret < 0) {
6942 				mlx5_free(default_miss_ctx.queue);
6943 				goto error;
6944 			}
6945 		}
6946 	}
6947 	/* Create the tx flow. */
6948 	if (hairpin_flow) {
6949 		attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
6950 		attr_tx.ingress = 0;
6951 		attr_tx.egress = 1;
6952 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
6953 					 actions_hairpin_tx.actions,
6954 					 idx, error);
6955 		if (!dev_flow)
6956 			goto error;
6957 		dev_flow->flow = flow;
6958 		dev_flow->external = 0;
6959 		SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
6960 			      dev_flow->handle, next);
6961 		ret = flow_drv_translate(dev, dev_flow, &attr_tx,
6962 					 items_tx.items,
6963 					 actions_hairpin_tx.actions, error);
6964 		if (ret < 0)
6965 			goto error;
6966 	}
6967 	/*
6968 	 * Update the metadata register copy table. If extensive
6969 	 * metadata feature is enabled and registers are supported
6970 	 * we might create the extra rte_flow for each unique
6971 	 * MARK/FLAG action ID.
6972 	 *
6973 	 * The table is updated for ingress Flows only, because
6974 	 * the egress Flows belong to the different device and
6975 	 * copy table should be updated in peer NIC Rx domain.
6976 	 */
6977 	if (attr->ingress &&
6978 	    (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
6979 		ret = flow_mreg_update_copy_table(dev, flow, actions, error);
6980 		if (ret)
6981 			goto error;
6982 	}
6983 	/*
6984 	 * If the flow is external (from application) OR device is started,
6985 	 * OR mreg discover, then apply immediately.
6986 	 */
6987 	if (external || dev->data->dev_started ||
6988 	    (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
6989 	     attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
6990 		ret = flow_drv_apply(dev, flow, error);
6991 		if (ret < 0)
6992 			goto error;
6993 	}
6994 	flow->type = type;
6995 	flow_rxq_flags_set(dev, flow);
6996 	rte_free(translated_actions);
6997 	tunnel = flow_tunnel_from_rule(wks->flows);
6998 	if (tunnel) {
6999 		flow->tunnel = 1;
7000 		flow->tunnel_id = tunnel->tunnel_id;
7001 		__atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
7002 		mlx5_free(default_miss_ctx.queue);
7003 	}
7004 	mlx5_flow_pop_thread_workspace();
7005 	return idx;
7006 error:
7007 	MLX5_ASSERT(flow);
7008 	ret = rte_errno; /* Save rte_errno before cleanup. */
7009 	flow_mreg_del_copy_action(dev, flow);
7010 	flow_drv_destroy(dev, flow);
7011 	if (rss_desc->shared_rss)
7012 		__atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
7013 			mlx5_ipool_get
7014 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
7015 			rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
7016 	mlx5_ipool_free(priv->flows[type], idx);
7017 	rte_errno = ret; /* Restore rte_errno. */
7018 	ret = rte_errno;
7019 	rte_errno = ret;
7020 	mlx5_flow_pop_thread_workspace();
7021 error_before_hairpin_split:
7022 	rte_free(translated_actions);
7023 	return 0;
7024 }
7025 
7026 /**
7027  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
7028  * incoming packets to table 1.
7029  *
7030  * Other flow rules, requested for group n, will be created in
7031  * e-switch table n+1.
7032  * Jump action to e-switch group n will be created to group n+1.
7033  *
7034  * Used when working in switchdev mode, to utilise advantages of table 1
7035  * and above.
7036  *
7037  * @param dev
7038  *   Pointer to Ethernet device.
7039  *
7040  * @return
7041  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
7042  */
7043 struct rte_flow *
7044 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
7045 {
7046 	const struct rte_flow_attr attr = {
7047 		.group = 0,
7048 		.priority = 0,
7049 		.ingress = 1,
7050 		.egress = 0,
7051 		.transfer = 1,
7052 	};
7053 	const struct rte_flow_item pattern = {
7054 		.type = RTE_FLOW_ITEM_TYPE_END,
7055 	};
7056 	struct rte_flow_action_jump jump = {
7057 		.group = 1,
7058 	};
7059 	const struct rte_flow_action actions[] = {
7060 		{
7061 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7062 			.conf = &jump,
7063 		},
7064 		{
7065 			.type = RTE_FLOW_ACTION_TYPE_END,
7066 		},
7067 	};
7068 	struct rte_flow_error error;
7069 
7070 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7071 						   &attr, &pattern,
7072 						   actions, false, &error);
7073 }
7074 
7075 /**
7076  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
7077  * and sq number, directs all packets to peer vport.
7078  *
7079  * @param dev
7080  *   Pointer to Ethernet device.
7081  * @param txq
7082  *   Txq index.
7083  *
7084  * @return
7085  *   Flow ID on success, 0 otherwise and rte_errno is set.
7086  */
7087 uint32_t
7088 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
7089 {
7090 	struct rte_flow_attr attr = {
7091 		.group = 0,
7092 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7093 		.ingress = 1,
7094 		.egress = 0,
7095 		.transfer = 1,
7096 	};
7097 	struct rte_flow_item_port_id port_spec = {
7098 		.id = MLX5_PORT_ESW_MGR,
7099 	};
7100 	struct mlx5_rte_flow_item_tx_queue txq_spec = {
7101 		.queue = txq,
7102 	};
7103 	struct rte_flow_item pattern[] = {
7104 		{
7105 			.type = RTE_FLOW_ITEM_TYPE_PORT_ID,
7106 			.spec = &port_spec,
7107 		},
7108 		{
7109 			.type = (enum rte_flow_item_type)
7110 				MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7111 			.spec = &txq_spec,
7112 		},
7113 		{
7114 			.type = RTE_FLOW_ITEM_TYPE_END,
7115 		},
7116 	};
7117 	struct rte_flow_action_jump jump = {
7118 		.group = 1,
7119 	};
7120 	struct rte_flow_action_port_id port = {
7121 		.id = dev->data->port_id,
7122 	};
7123 	struct rte_flow_action actions[] = {
7124 		{
7125 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7126 			.conf = &jump,
7127 		},
7128 		{
7129 			.type = RTE_FLOW_ACTION_TYPE_END,
7130 		},
7131 	};
7132 	struct rte_flow_error error;
7133 
7134 	/*
7135 	 * Creates group 0, highest priority jump flow.
7136 	 * Matches txq to bypass kernel packets.
7137 	 */
7138 	if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
7139 			     false, &error) == 0)
7140 		return 0;
7141 	/* Create group 1, lowest priority redirect flow for txq. */
7142 	attr.group = 1;
7143 	actions[0].conf = &port;
7144 	actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
7145 	return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
7146 				actions, false, &error);
7147 }
7148 
7149 /**
7150  * Validate a flow supported by the NIC.
7151  *
7152  * @see rte_flow_validate()
7153  * @see rte_flow_ops
7154  */
7155 int
7156 mlx5_flow_validate(struct rte_eth_dev *dev,
7157 		   const struct rte_flow_attr *attr,
7158 		   const struct rte_flow_item items[],
7159 		   const struct rte_flow_action original_actions[],
7160 		   struct rte_flow_error *error)
7161 {
7162 	int hairpin_flow;
7163 	struct mlx5_translated_action_handle
7164 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7165 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7166 	const struct rte_flow_action *actions;
7167 	struct rte_flow_action *translated_actions = NULL;
7168 	int ret = flow_action_handles_translate(dev, original_actions,
7169 						indir_actions,
7170 						&indir_actions_n,
7171 						&translated_actions, error);
7172 
7173 	if (ret)
7174 		return ret;
7175 	actions = translated_actions ? translated_actions : original_actions;
7176 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7177 	ret = flow_drv_validate(dev, attr, items, actions,
7178 				true, hairpin_flow, error);
7179 	rte_free(translated_actions);
7180 	return ret;
7181 }
7182 
7183 /**
7184  * Create a flow.
7185  *
7186  * @see rte_flow_create()
7187  * @see rte_flow_ops
7188  */
7189 struct rte_flow *
7190 mlx5_flow_create(struct rte_eth_dev *dev,
7191 		 const struct rte_flow_attr *attr,
7192 		 const struct rte_flow_item items[],
7193 		 const struct rte_flow_action actions[],
7194 		 struct rte_flow_error *error)
7195 {
7196 	struct mlx5_priv *priv = dev->data->dev_private;
7197 
7198 	if (priv->sh->config.dv_flow_en == 2) {
7199 		rte_flow_error_set(error, ENOTSUP,
7200 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7201 			  NULL,
7202 			  "Flow non-Q creation not supported");
7203 		return NULL;
7204 	}
7205 	/*
7206 	 * If the device is not started yet, it is not allowed to created a
7207 	 * flow from application. PMD default flows and traffic control flows
7208 	 * are not affected.
7209 	 */
7210 	if (unlikely(!dev->data->dev_started)) {
7211 		DRV_LOG(DEBUG, "port %u is not started when "
7212 			"inserting a flow", dev->data->port_id);
7213 		rte_flow_error_set(error, ENODEV,
7214 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7215 				   NULL,
7216 				   "port not started");
7217 		return NULL;
7218 	}
7219 
7220 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
7221 						   attr, items, actions,
7222 						   true, error);
7223 }
7224 
7225 /**
7226  * Destroy a flow in a list.
7227  *
7228  * @param dev
7229  *   Pointer to Ethernet device.
7230  * @param[in] flow_idx
7231  *   Index of flow to destroy.
7232  */
7233 static void
7234 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7235 		  uint32_t flow_idx)
7236 {
7237 	struct mlx5_priv *priv = dev->data->dev_private;
7238 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
7239 
7240 	if (!flow)
7241 		return;
7242 	MLX5_ASSERT(flow->type == type);
7243 	/*
7244 	 * Update RX queue flags only if port is started, otherwise it is
7245 	 * already clean.
7246 	 */
7247 	if (dev->data->dev_started)
7248 		flow_rxq_flags_trim(dev, flow);
7249 	flow_drv_destroy(dev, flow);
7250 	if (flow->tunnel) {
7251 		struct mlx5_flow_tunnel *tunnel;
7252 
7253 		tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
7254 		RTE_VERIFY(tunnel);
7255 		if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
7256 			mlx5_flow_tunnel_free(dev, tunnel);
7257 	}
7258 	flow_mreg_del_copy_action(dev, flow);
7259 	mlx5_ipool_free(priv->flows[type], flow_idx);
7260 }
7261 
7262 /**
7263  * Destroy all flows.
7264  *
7265  * @param dev
7266  *   Pointer to Ethernet device.
7267  * @param type
7268  *   Flow type to be flushed.
7269  * @param active
7270  *   If flushing is called actively.
7271  */
7272 void
7273 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7274 		     bool active)
7275 {
7276 	struct mlx5_priv *priv = dev->data->dev_private;
7277 	uint32_t num_flushed = 0, fidx = 1;
7278 	struct rte_flow *flow;
7279 
7280 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7281 	if (priv->sh->config.dv_flow_en == 2 &&
7282 	    type == MLX5_FLOW_TYPE_GEN) {
7283 		flow_hw_q_flow_flush(dev, NULL);
7284 		return;
7285 	}
7286 #endif
7287 
7288 	MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
7289 		flow_list_destroy(dev, type, fidx);
7290 		num_flushed++;
7291 	}
7292 	if (active) {
7293 		DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
7294 			dev->data->port_id, num_flushed);
7295 	}
7296 }
7297 
7298 /**
7299  * Stop all default actions for flows.
7300  *
7301  * @param dev
7302  *   Pointer to Ethernet device.
7303  */
7304 void
7305 mlx5_flow_stop_default(struct rte_eth_dev *dev)
7306 {
7307 	flow_mreg_del_default_copy_action(dev);
7308 	flow_rxq_flags_clear(dev);
7309 }
7310 
7311 /**
7312  * Start all default actions for flows.
7313  *
7314  * @param dev
7315  *   Pointer to Ethernet device.
7316  * @return
7317  *   0 on success, a negative errno value otherwise and rte_errno is set.
7318  */
7319 int
7320 mlx5_flow_start_default(struct rte_eth_dev *dev)
7321 {
7322 	struct rte_flow_error error;
7323 
7324 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
7325 	return flow_mreg_add_default_copy_action(dev, &error);
7326 }
7327 
7328 /**
7329  * Release key of thread specific flow workspace data.
7330  */
7331 void
7332 flow_release_workspace(void *data)
7333 {
7334 	struct mlx5_flow_workspace *wks = data;
7335 	struct mlx5_flow_workspace *next;
7336 
7337 	while (wks) {
7338 		next = wks->next;
7339 		free(wks->rss_desc.queue);
7340 		free(wks);
7341 		wks = next;
7342 	}
7343 }
7344 
7345 /**
7346  * Get thread specific current flow workspace.
7347  *
7348  * @return pointer to thread specific flow workspace data, NULL on error.
7349  */
7350 struct mlx5_flow_workspace*
7351 mlx5_flow_get_thread_workspace(void)
7352 {
7353 	struct mlx5_flow_workspace *data;
7354 
7355 	data = mlx5_flow_os_get_specific_workspace();
7356 	MLX5_ASSERT(data && data->inuse);
7357 	if (!data || !data->inuse)
7358 		DRV_LOG(ERR, "flow workspace not initialized.");
7359 	return data;
7360 }
7361 
7362 /**
7363  * Allocate and init new flow workspace.
7364  *
7365  * @return pointer to flow workspace data, NULL on error.
7366  */
7367 static struct mlx5_flow_workspace*
7368 flow_alloc_thread_workspace(void)
7369 {
7370 	struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
7371 
7372 	if (!data) {
7373 		DRV_LOG(ERR, "Failed to allocate flow workspace "
7374 			"memory.");
7375 		return NULL;
7376 	}
7377 	data->rss_desc.queue = calloc(1,
7378 			sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
7379 	if (!data->rss_desc.queue)
7380 		goto err;
7381 	data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
7382 	return data;
7383 err:
7384 	free(data->rss_desc.queue);
7385 	free(data);
7386 	return NULL;
7387 }
7388 
7389 /**
7390  * Get new thread specific flow workspace.
7391  *
7392  * If current workspace inuse, create new one and set as current.
7393  *
7394  * @return pointer to thread specific flow workspace data, NULL on error.
7395  */
7396 static struct mlx5_flow_workspace*
7397 mlx5_flow_push_thread_workspace(void)
7398 {
7399 	struct mlx5_flow_workspace *curr;
7400 	struct mlx5_flow_workspace *data;
7401 
7402 	curr = mlx5_flow_os_get_specific_workspace();
7403 	if (!curr) {
7404 		data = flow_alloc_thread_workspace();
7405 		if (!data)
7406 			return NULL;
7407 	} else if (!curr->inuse) {
7408 		data = curr;
7409 	} else if (curr->next) {
7410 		data = curr->next;
7411 	} else {
7412 		data = flow_alloc_thread_workspace();
7413 		if (!data)
7414 			return NULL;
7415 		curr->next = data;
7416 		data->prev = curr;
7417 	}
7418 	data->inuse = 1;
7419 	data->flow_idx = 0;
7420 	/* Set as current workspace */
7421 	if (mlx5_flow_os_set_specific_workspace(data))
7422 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7423 	return data;
7424 }
7425 
7426 /**
7427  * Close current thread specific flow workspace.
7428  *
7429  * If previous workspace available, set it as current.
7430  *
7431  * @return pointer to thread specific flow workspace data, NULL on error.
7432  */
7433 static void
7434 mlx5_flow_pop_thread_workspace(void)
7435 {
7436 	struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
7437 
7438 	if (!data)
7439 		return;
7440 	if (!data->inuse) {
7441 		DRV_LOG(ERR, "Failed to close unused flow workspace.");
7442 		return;
7443 	}
7444 	data->inuse = 0;
7445 	if (!data->prev)
7446 		return;
7447 	if (mlx5_flow_os_set_specific_workspace(data->prev))
7448 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7449 }
7450 
7451 /**
7452  * Verify the flow list is empty
7453  *
7454  * @param dev
7455  *  Pointer to Ethernet device.
7456  *
7457  * @return the number of flows not released.
7458  */
7459 int
7460 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
7461 {
7462 	struct mlx5_priv *priv = dev->data->dev_private;
7463 	struct rte_flow *flow;
7464 	uint32_t idx = 0;
7465 	int ret = 0, i;
7466 
7467 	for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
7468 		MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
7469 			DRV_LOG(DEBUG, "port %u flow %p still referenced",
7470 				dev->data->port_id, (void *)flow);
7471 			ret++;
7472 		}
7473 	}
7474 	return ret;
7475 }
7476 
7477 /**
7478  * Enable default hairpin egress flow.
7479  *
7480  * @param dev
7481  *   Pointer to Ethernet device.
7482  * @param queue
7483  *   The queue index.
7484  *
7485  * @return
7486  *   0 on success, a negative errno value otherwise and rte_errno is set.
7487  */
7488 int
7489 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
7490 			    uint32_t queue)
7491 {
7492 	const struct rte_flow_attr attr = {
7493 		.egress = 1,
7494 		.priority = 0,
7495 	};
7496 	struct mlx5_rte_flow_item_tx_queue queue_spec = {
7497 		.queue = queue,
7498 	};
7499 	struct mlx5_rte_flow_item_tx_queue queue_mask = {
7500 		.queue = UINT32_MAX,
7501 	};
7502 	struct rte_flow_item items[] = {
7503 		{
7504 			.type = (enum rte_flow_item_type)
7505 				MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7506 			.spec = &queue_spec,
7507 			.last = NULL,
7508 			.mask = &queue_mask,
7509 		},
7510 		{
7511 			.type = RTE_FLOW_ITEM_TYPE_END,
7512 		},
7513 	};
7514 	struct rte_flow_action_jump jump = {
7515 		.group = MLX5_HAIRPIN_TX_TABLE,
7516 	};
7517 	struct rte_flow_action actions[2];
7518 	uint32_t flow_idx;
7519 	struct rte_flow_error error;
7520 
7521 	actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
7522 	actions[0].conf = &jump;
7523 	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
7524 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7525 				    &attr, items, actions, false, &error);
7526 	if (!flow_idx) {
7527 		DRV_LOG(DEBUG,
7528 			"Failed to create ctrl flow: rte_errno(%d),"
7529 			" type(%d), message(%s)",
7530 			rte_errno, error.type,
7531 			error.message ? error.message : " (no stated reason)");
7532 		return -rte_errno;
7533 	}
7534 	return 0;
7535 }
7536 
7537 /**
7538  * Enable a control flow configured from the control plane.
7539  *
7540  * @param dev
7541  *   Pointer to Ethernet device.
7542  * @param eth_spec
7543  *   An Ethernet flow spec to apply.
7544  * @param eth_mask
7545  *   An Ethernet flow mask to apply.
7546  * @param vlan_spec
7547  *   A VLAN flow spec to apply.
7548  * @param vlan_mask
7549  *   A VLAN flow mask to apply.
7550  *
7551  * @return
7552  *   0 on success, a negative errno value otherwise and rte_errno is set.
7553  */
7554 int
7555 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
7556 		    struct rte_flow_item_eth *eth_spec,
7557 		    struct rte_flow_item_eth *eth_mask,
7558 		    struct rte_flow_item_vlan *vlan_spec,
7559 		    struct rte_flow_item_vlan *vlan_mask)
7560 {
7561 	struct mlx5_priv *priv = dev->data->dev_private;
7562 	const struct rte_flow_attr attr = {
7563 		.ingress = 1,
7564 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7565 	};
7566 	struct rte_flow_item items[] = {
7567 		{
7568 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7569 			.spec = eth_spec,
7570 			.last = NULL,
7571 			.mask = eth_mask,
7572 		},
7573 		{
7574 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
7575 					      RTE_FLOW_ITEM_TYPE_END,
7576 			.spec = vlan_spec,
7577 			.last = NULL,
7578 			.mask = vlan_mask,
7579 		},
7580 		{
7581 			.type = RTE_FLOW_ITEM_TYPE_END,
7582 		},
7583 	};
7584 	uint16_t queue[priv->reta_idx_n];
7585 	struct rte_flow_action_rss action_rss = {
7586 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7587 		.level = 0,
7588 		.types = priv->rss_conf.rss_hf,
7589 		.key_len = priv->rss_conf.rss_key_len,
7590 		.queue_num = priv->reta_idx_n,
7591 		.key = priv->rss_conf.rss_key,
7592 		.queue = queue,
7593 	};
7594 	struct rte_flow_action actions[] = {
7595 		{
7596 			.type = RTE_FLOW_ACTION_TYPE_RSS,
7597 			.conf = &action_rss,
7598 		},
7599 		{
7600 			.type = RTE_FLOW_ACTION_TYPE_END,
7601 		},
7602 	};
7603 	uint32_t flow_idx;
7604 	struct rte_flow_error error;
7605 	unsigned int i;
7606 
7607 	if (!priv->reta_idx_n || !priv->rxqs_n) {
7608 		return 0;
7609 	}
7610 	if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
7611 		action_rss.types = 0;
7612 	for (i = 0; i != priv->reta_idx_n; ++i)
7613 		queue[i] = (*priv->reta_idx)[i];
7614 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7615 				    &attr, items, actions, false, &error);
7616 	if (!flow_idx)
7617 		return -rte_errno;
7618 	return 0;
7619 }
7620 
7621 /**
7622  * Enable a flow control configured from the control plane.
7623  *
7624  * @param dev
7625  *   Pointer to Ethernet device.
7626  * @param eth_spec
7627  *   An Ethernet flow spec to apply.
7628  * @param eth_mask
7629  *   An Ethernet flow mask to apply.
7630  *
7631  * @return
7632  *   0 on success, a negative errno value otherwise and rte_errno is set.
7633  */
7634 int
7635 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7636 	       struct rte_flow_item_eth *eth_spec,
7637 	       struct rte_flow_item_eth *eth_mask)
7638 {
7639 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7640 }
7641 
7642 /**
7643  * Create default miss flow rule matching lacp traffic
7644  *
7645  * @param dev
7646  *   Pointer to Ethernet device.
7647  * @param eth_spec
7648  *   An Ethernet flow spec to apply.
7649  *
7650  * @return
7651  *   0 on success, a negative errno value otherwise and rte_errno is set.
7652  */
7653 int
7654 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7655 {
7656 	/*
7657 	 * The LACP matching is done by only using ether type since using
7658 	 * a multicast dst mac causes kernel to give low priority to this flow.
7659 	 */
7660 	static const struct rte_flow_item_eth lacp_spec = {
7661 		.type = RTE_BE16(0x8809),
7662 	};
7663 	static const struct rte_flow_item_eth lacp_mask = {
7664 		.type = 0xffff,
7665 	};
7666 	const struct rte_flow_attr attr = {
7667 		.ingress = 1,
7668 	};
7669 	struct rte_flow_item items[] = {
7670 		{
7671 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7672 			.spec = &lacp_spec,
7673 			.mask = &lacp_mask,
7674 		},
7675 		{
7676 			.type = RTE_FLOW_ITEM_TYPE_END,
7677 		},
7678 	};
7679 	struct rte_flow_action actions[] = {
7680 		{
7681 			.type = (enum rte_flow_action_type)
7682 				MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7683 		},
7684 		{
7685 			.type = RTE_FLOW_ACTION_TYPE_END,
7686 		},
7687 	};
7688 	struct rte_flow_error error;
7689 	uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7690 					&attr, items, actions,
7691 					false, &error);
7692 
7693 	if (!flow_idx)
7694 		return -rte_errno;
7695 	return 0;
7696 }
7697 
7698 /**
7699  * Destroy a flow.
7700  *
7701  * @see rte_flow_destroy()
7702  * @see rte_flow_ops
7703  */
7704 int
7705 mlx5_flow_destroy(struct rte_eth_dev *dev,
7706 		  struct rte_flow *flow,
7707 		  struct rte_flow_error *error __rte_unused)
7708 {
7709 	struct mlx5_priv *priv = dev->data->dev_private;
7710 
7711 	if (priv->sh->config.dv_flow_en == 2)
7712 		return rte_flow_error_set(error, ENOTSUP,
7713 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7714 			  NULL,
7715 			  "Flow non-Q destruction not supported");
7716 	flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7717 				(uintptr_t)(void *)flow);
7718 	return 0;
7719 }
7720 
7721 /**
7722  * Destroy all flows.
7723  *
7724  * @see rte_flow_flush()
7725  * @see rte_flow_ops
7726  */
7727 int
7728 mlx5_flow_flush(struct rte_eth_dev *dev,
7729 		struct rte_flow_error *error __rte_unused)
7730 {
7731 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7732 	return 0;
7733 }
7734 
7735 /**
7736  * Isolated mode.
7737  *
7738  * @see rte_flow_isolate()
7739  * @see rte_flow_ops
7740  */
7741 int
7742 mlx5_flow_isolate(struct rte_eth_dev *dev,
7743 		  int enable,
7744 		  struct rte_flow_error *error)
7745 {
7746 	struct mlx5_priv *priv = dev->data->dev_private;
7747 
7748 	if (dev->data->dev_started) {
7749 		rte_flow_error_set(error, EBUSY,
7750 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7751 				   NULL,
7752 				   "port must be stopped first");
7753 		return -rte_errno;
7754 	}
7755 	priv->isolated = !!enable;
7756 	if (enable)
7757 		dev->dev_ops = &mlx5_dev_ops_isolate;
7758 	else
7759 		dev->dev_ops = &mlx5_dev_ops;
7760 
7761 	dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7762 	dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7763 
7764 	return 0;
7765 }
7766 
7767 /**
7768  * Query a flow.
7769  *
7770  * @see rte_flow_query()
7771  * @see rte_flow_ops
7772  */
7773 static int
7774 flow_drv_query(struct rte_eth_dev *dev,
7775 	       uint32_t flow_idx,
7776 	       const struct rte_flow_action *actions,
7777 	       void *data,
7778 	       struct rte_flow_error *error)
7779 {
7780 	struct mlx5_priv *priv = dev->data->dev_private;
7781 	const struct mlx5_flow_driver_ops *fops;
7782 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7783 					       flow_idx);
7784 	enum mlx5_flow_drv_type ftype;
7785 
7786 	if (!flow) {
7787 		return rte_flow_error_set(error, ENOENT,
7788 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7789 			  NULL,
7790 			  "invalid flow handle");
7791 	}
7792 	ftype = flow->drv_type;
7793 	MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7794 	fops = flow_get_drv_ops(ftype);
7795 
7796 	return fops->query(dev, flow, actions, data, error);
7797 }
7798 
7799 /**
7800  * Query a flow.
7801  *
7802  * @see rte_flow_query()
7803  * @see rte_flow_ops
7804  */
7805 int
7806 mlx5_flow_query(struct rte_eth_dev *dev,
7807 		struct rte_flow *flow,
7808 		const struct rte_flow_action *actions,
7809 		void *data,
7810 		struct rte_flow_error *error)
7811 {
7812 	int ret;
7813 	struct mlx5_priv *priv = dev->data->dev_private;
7814 
7815 	if (priv->sh->config.dv_flow_en == 2)
7816 		return rte_flow_error_set(error, ENOTSUP,
7817 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7818 			  NULL,
7819 			  "Flow non-Q query not supported");
7820 	ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
7821 			     error);
7822 	if (ret < 0)
7823 		return ret;
7824 	return 0;
7825 }
7826 
7827 /**
7828  * Get rte_flow callbacks.
7829  *
7830  * @param dev
7831  *   Pointer to Ethernet device structure.
7832  * @param ops
7833  *   Pointer to operation-specific structure.
7834  *
7835  * @return 0
7836  */
7837 int
7838 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7839 		  const struct rte_flow_ops **ops)
7840 {
7841 	*ops = &mlx5_flow_ops;
7842 	return 0;
7843 }
7844 
7845 /**
7846  * Validate meter policy actions.
7847  * Dispatcher for action type specific validation.
7848  *
7849  * @param[in] dev
7850  *   Pointer to the Ethernet device structure.
7851  * @param[in] action
7852  *   The meter policy action object to validate.
7853  * @param[in] attr
7854  *   Attributes of flow to determine steering domain.
7855  * @param[out] is_rss
7856  *   Is RSS or not.
7857  * @param[out] domain_bitmap
7858  *   Domain bitmap.
7859  * @param[out] is_def_policy
7860  *   Is default policy or not.
7861  * @param[out] error
7862  *   Perform verbose error reporting if not NULL. Initialized in case of
7863  *   error only.
7864  *
7865  * @return
7866  *   0 on success, otherwise negative errno value.
7867  */
7868 int
7869 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7870 			const struct rte_flow_action *actions[RTE_COLORS],
7871 			struct rte_flow_attr *attr,
7872 			bool *is_rss,
7873 			uint8_t *domain_bitmap,
7874 			uint8_t *policy_mode,
7875 			struct rte_mtr_error *error)
7876 {
7877 	const struct mlx5_flow_driver_ops *fops;
7878 
7879 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7880 	return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7881 				       domain_bitmap, policy_mode, error);
7882 }
7883 
7884 /**
7885  * Destroy the meter table set.
7886  *
7887  * @param[in] dev
7888  *   Pointer to Ethernet device.
7889  * @param[in] mtr_policy
7890  *   Meter policy struct.
7891  */
7892 void
7893 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7894 		      struct mlx5_flow_meter_policy *mtr_policy)
7895 {
7896 	const struct mlx5_flow_driver_ops *fops;
7897 
7898 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7899 	fops->destroy_mtr_acts(dev, mtr_policy);
7900 }
7901 
7902 /**
7903  * Create policy action, lock free,
7904  * (mutex should be acquired by caller).
7905  * Dispatcher for action type specific call.
7906  *
7907  * @param[in] dev
7908  *   Pointer to the Ethernet device structure.
7909  * @param[in] mtr_policy
7910  *   Meter policy struct.
7911  * @param[in] action
7912  *   Action specification used to create meter actions.
7913  * @param[out] error
7914  *   Perform verbose error reporting if not NULL. Initialized in case of
7915  *   error only.
7916  *
7917  * @return
7918  *   0 on success, otherwise negative errno value.
7919  */
7920 int
7921 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7922 		      struct mlx5_flow_meter_policy *mtr_policy,
7923 		      const struct rte_flow_action *actions[RTE_COLORS],
7924 		      struct rte_mtr_error *error)
7925 {
7926 	const struct mlx5_flow_driver_ops *fops;
7927 
7928 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7929 	return fops->create_mtr_acts(dev, mtr_policy, actions, error);
7930 }
7931 
7932 /**
7933  * Create policy rules, lock free,
7934  * (mutex should be acquired by caller).
7935  * Dispatcher for action type specific call.
7936  *
7937  * @param[in] dev
7938  *   Pointer to the Ethernet device structure.
7939  * @param[in] mtr_policy
7940  *   Meter policy struct.
7941  *
7942  * @return
7943  *   0 on success, -1 otherwise.
7944  */
7945 int
7946 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
7947 			     struct mlx5_flow_meter_policy *mtr_policy)
7948 {
7949 	const struct mlx5_flow_driver_ops *fops;
7950 
7951 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7952 	return fops->create_policy_rules(dev, mtr_policy);
7953 }
7954 
7955 /**
7956  * Destroy policy rules, lock free,
7957  * (mutex should be acquired by caller).
7958  * Dispatcher for action type specific call.
7959  *
7960  * @param[in] dev
7961  *   Pointer to the Ethernet device structure.
7962  * @param[in] mtr_policy
7963  *   Meter policy struct.
7964  */
7965 void
7966 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
7967 			     struct mlx5_flow_meter_policy *mtr_policy)
7968 {
7969 	const struct mlx5_flow_driver_ops *fops;
7970 
7971 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7972 	fops->destroy_policy_rules(dev, mtr_policy);
7973 }
7974 
7975 /**
7976  * Destroy the default policy table set.
7977  *
7978  * @param[in] dev
7979  *   Pointer to Ethernet device.
7980  */
7981 void
7982 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
7983 {
7984 	const struct mlx5_flow_driver_ops *fops;
7985 
7986 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7987 	fops->destroy_def_policy(dev);
7988 }
7989 
7990 /**
7991  * Destroy the default policy table set.
7992  *
7993  * @param[in] dev
7994  *   Pointer to Ethernet device.
7995  *
7996  * @return
7997  *   0 on success, -1 otherwise.
7998  */
7999 int
8000 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
8001 {
8002 	const struct mlx5_flow_driver_ops *fops;
8003 
8004 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8005 	return fops->create_def_policy(dev);
8006 }
8007 
8008 /**
8009  * Create the needed meter and suffix tables.
8010  *
8011  * @param[in] dev
8012  *   Pointer to Ethernet device.
8013  *
8014  * @return
8015  *   0 on success, -1 otherwise.
8016  */
8017 int
8018 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
8019 			struct mlx5_flow_meter_info *fm,
8020 			uint32_t mtr_idx,
8021 			uint8_t domain_bitmap)
8022 {
8023 	const struct mlx5_flow_driver_ops *fops;
8024 
8025 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8026 	return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
8027 }
8028 
8029 /**
8030  * Destroy the meter table set.
8031  *
8032  * @param[in] dev
8033  *   Pointer to Ethernet device.
8034  * @param[in] tbl
8035  *   Pointer to the meter table set.
8036  */
8037 void
8038 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
8039 			   struct mlx5_flow_meter_info *fm)
8040 {
8041 	const struct mlx5_flow_driver_ops *fops;
8042 
8043 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8044 	fops->destroy_mtr_tbls(dev, fm);
8045 }
8046 
8047 /**
8048  * Destroy the global meter drop table.
8049  *
8050  * @param[in] dev
8051  *   Pointer to Ethernet device.
8052  */
8053 void
8054 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
8055 {
8056 	const struct mlx5_flow_driver_ops *fops;
8057 
8058 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8059 	fops->destroy_mtr_drop_tbls(dev);
8060 }
8061 
8062 /**
8063  * Destroy the sub policy table with RX queue.
8064  *
8065  * @param[in] dev
8066  *   Pointer to Ethernet device.
8067  * @param[in] mtr_policy
8068  *   Pointer to meter policy table.
8069  */
8070 void
8071 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
8072 		struct mlx5_flow_meter_policy *mtr_policy)
8073 {
8074 	const struct mlx5_flow_driver_ops *fops;
8075 
8076 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8077 	fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
8078 }
8079 
8080 /**
8081  * Allocate the needed aso flow meter id.
8082  *
8083  * @param[in] dev
8084  *   Pointer to Ethernet device.
8085  *
8086  * @return
8087  *   Index to aso flow meter on success, NULL otherwise.
8088  */
8089 uint32_t
8090 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
8091 {
8092 	const struct mlx5_flow_driver_ops *fops;
8093 
8094 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8095 	return fops->create_meter(dev);
8096 }
8097 
8098 /**
8099  * Free the aso flow meter id.
8100  *
8101  * @param[in] dev
8102  *   Pointer to Ethernet device.
8103  * @param[in] mtr_idx
8104  *  Index to aso flow meter to be free.
8105  *
8106  * @return
8107  *   0 on success.
8108  */
8109 void
8110 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
8111 {
8112 	const struct mlx5_flow_driver_ops *fops;
8113 
8114 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8115 	fops->free_meter(dev, mtr_idx);
8116 }
8117 
8118 /**
8119  * Allocate a counter.
8120  *
8121  * @param[in] dev
8122  *   Pointer to Ethernet device structure.
8123  *
8124  * @return
8125  *   Index to allocated counter  on success, 0 otherwise.
8126  */
8127 uint32_t
8128 mlx5_counter_alloc(struct rte_eth_dev *dev)
8129 {
8130 	const struct mlx5_flow_driver_ops *fops;
8131 	struct rte_flow_attr attr = { .transfer = 0 };
8132 
8133 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8134 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8135 		return fops->counter_alloc(dev);
8136 	}
8137 	DRV_LOG(ERR,
8138 		"port %u counter allocate is not supported.",
8139 		 dev->data->port_id);
8140 	return 0;
8141 }
8142 
8143 /**
8144  * Free a counter.
8145  *
8146  * @param[in] dev
8147  *   Pointer to Ethernet device structure.
8148  * @param[in] cnt
8149  *   Index to counter to be free.
8150  */
8151 void
8152 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
8153 {
8154 	const struct mlx5_flow_driver_ops *fops;
8155 	struct rte_flow_attr attr = { .transfer = 0 };
8156 
8157 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8158 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8159 		fops->counter_free(dev, cnt);
8160 		return;
8161 	}
8162 	DRV_LOG(ERR,
8163 		"port %u counter free is not supported.",
8164 		 dev->data->port_id);
8165 }
8166 
8167 /**
8168  * Query counter statistics.
8169  *
8170  * @param[in] dev
8171  *   Pointer to Ethernet device structure.
8172  * @param[in] cnt
8173  *   Index to counter to query.
8174  * @param[in] clear
8175  *   Set to clear counter statistics.
8176  * @param[out] pkts
8177  *   The counter hits packets number to save.
8178  * @param[out] bytes
8179  *   The counter hits bytes number to save.
8180  *
8181  * @return
8182  *   0 on success, a negative errno value otherwise.
8183  */
8184 int
8185 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
8186 		   bool clear, uint64_t *pkts, uint64_t *bytes, void **action)
8187 {
8188 	const struct mlx5_flow_driver_ops *fops;
8189 	struct rte_flow_attr attr = { .transfer = 0 };
8190 
8191 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8192 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8193 		return fops->counter_query(dev, cnt, clear, pkts,
8194 					bytes, action);
8195 	}
8196 	DRV_LOG(ERR,
8197 		"port %u counter query is not supported.",
8198 		 dev->data->port_id);
8199 	return -ENOTSUP;
8200 }
8201 
8202 /**
8203  * Get information about HWS pre-configurable resources.
8204  *
8205  * @param[in] dev
8206  *   Pointer to the rte_eth_dev structure.
8207  * @param[out] port_info
8208  *   Pointer to port information.
8209  * @param[out] queue_info
8210  *   Pointer to queue information.
8211  * @param[out] error
8212  *   Pointer to error structure.
8213  *
8214  * @return
8215  *   0 on success, a negative errno value otherwise and rte_errno is set.
8216  */
8217 static int
8218 mlx5_flow_info_get(struct rte_eth_dev *dev,
8219 		   struct rte_flow_port_info *port_info,
8220 		   struct rte_flow_queue_info *queue_info,
8221 		   struct rte_flow_error *error)
8222 {
8223 	const struct mlx5_flow_driver_ops *fops;
8224 
8225 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8226 		return rte_flow_error_set(error, ENOTSUP,
8227 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8228 				NULL,
8229 				"info get with incorrect steering mode");
8230 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8231 	return fops->info_get(dev, port_info, queue_info, error);
8232 }
8233 
8234 /**
8235  * Configure port HWS resources.
8236  *
8237  * @param[in] dev
8238  *   Pointer to the rte_eth_dev structure.
8239  * @param[in] port_attr
8240  *   Port configuration attributes.
8241  * @param[in] nb_queue
8242  *   Number of queue.
8243  * @param[in] queue_attr
8244  *   Array that holds attributes for each flow queue.
8245  * @param[out] error
8246  *   Pointer to error structure.
8247  *
8248  * @return
8249  *   0 on success, a negative errno value otherwise and rte_errno is set.
8250  */
8251 static int
8252 mlx5_flow_port_configure(struct rte_eth_dev *dev,
8253 			 const struct rte_flow_port_attr *port_attr,
8254 			 uint16_t nb_queue,
8255 			 const struct rte_flow_queue_attr *queue_attr[],
8256 			 struct rte_flow_error *error)
8257 {
8258 	const struct mlx5_flow_driver_ops *fops;
8259 
8260 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8261 		return rte_flow_error_set(error, ENOTSUP,
8262 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8263 				NULL,
8264 				"port configure with incorrect steering mode");
8265 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8266 	return fops->configure(dev, port_attr, nb_queue, queue_attr, error);
8267 }
8268 
8269 /**
8270  * Create flow item template.
8271  *
8272  * @param[in] dev
8273  *   Pointer to the rte_eth_dev structure.
8274  * @param[in] attr
8275  *   Pointer to the item template attributes.
8276  * @param[in] items
8277  *   The template item pattern.
8278  * @param[out] error
8279  *   Pointer to error structure.
8280  *
8281  * @return
8282  *   0 on success, a negative errno value otherwise and rte_errno is set.
8283  */
8284 static struct rte_flow_pattern_template *
8285 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
8286 		const struct rte_flow_pattern_template_attr *attr,
8287 		const struct rte_flow_item items[],
8288 		struct rte_flow_error *error)
8289 {
8290 	const struct mlx5_flow_driver_ops *fops;
8291 
8292 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8293 		rte_flow_error_set(error, ENOTSUP,
8294 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8295 				NULL,
8296 				"pattern create with incorrect steering mode");
8297 		return NULL;
8298 	}
8299 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8300 	return fops->pattern_template_create(dev, attr, items, error);
8301 }
8302 
8303 /**
8304  * Destroy flow item template.
8305  *
8306  * @param[in] dev
8307  *   Pointer to the rte_eth_dev structure.
8308  * @param[in] template
8309  *   Pointer to the item template to be destroyed.
8310  * @param[out] error
8311  *   Pointer to error structure.
8312  *
8313  * @return
8314  *   0 on success, a negative errno value otherwise and rte_errno is set.
8315  */
8316 static int
8317 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
8318 				   struct rte_flow_pattern_template *template,
8319 				   struct rte_flow_error *error)
8320 {
8321 	const struct mlx5_flow_driver_ops *fops;
8322 
8323 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8324 		return rte_flow_error_set(error, ENOTSUP,
8325 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8326 				NULL,
8327 				"pattern destroy with incorrect steering mode");
8328 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8329 	return fops->pattern_template_destroy(dev, template, error);
8330 }
8331 
8332 /**
8333  * Create flow item template.
8334  *
8335  * @param[in] dev
8336  *   Pointer to the rte_eth_dev structure.
8337  * @param[in] attr
8338  *   Pointer to the action template attributes.
8339  * @param[in] actions
8340  *   Associated actions (list terminated by the END action).
8341  * @param[in] masks
8342  *   List of actions that marks which of the action's member is constant.
8343  * @param[out] error
8344  *   Pointer to error structure.
8345  *
8346  * @return
8347  *   0 on success, a negative errno value otherwise and rte_errno is set.
8348  */
8349 static struct rte_flow_actions_template *
8350 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
8351 			const struct rte_flow_actions_template_attr *attr,
8352 			const struct rte_flow_action actions[],
8353 			const struct rte_flow_action masks[],
8354 			struct rte_flow_error *error)
8355 {
8356 	const struct mlx5_flow_driver_ops *fops;
8357 
8358 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8359 		rte_flow_error_set(error, ENOTSUP,
8360 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8361 				NULL,
8362 				"action create with incorrect steering mode");
8363 		return NULL;
8364 	}
8365 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8366 	return fops->actions_template_create(dev, attr, actions, masks, error);
8367 }
8368 
8369 /**
8370  * Destroy flow action template.
8371  *
8372  * @param[in] dev
8373  *   Pointer to the rte_eth_dev structure.
8374  * @param[in] template
8375  *   Pointer to the action template to be destroyed.
8376  * @param[out] error
8377  *   Pointer to error structure.
8378  *
8379  * @return
8380  *   0 on success, a negative errno value otherwise and rte_errno is set.
8381  */
8382 static int
8383 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
8384 				   struct rte_flow_actions_template *template,
8385 				   struct rte_flow_error *error)
8386 {
8387 	const struct mlx5_flow_driver_ops *fops;
8388 
8389 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8390 		return rte_flow_error_set(error, ENOTSUP,
8391 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8392 				NULL,
8393 				"action destroy with incorrect steering mode");
8394 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8395 	return fops->actions_template_destroy(dev, template, error);
8396 }
8397 
8398 /**
8399  * Create flow table.
8400  *
8401  * @param[in] dev
8402  *   Pointer to the rte_eth_dev structure.
8403  * @param[in] attr
8404  *   Pointer to the table attributes.
8405  * @param[in] item_templates
8406  *   Item template array to be binded to the table.
8407  * @param[in] nb_item_templates
8408  *   Number of item template.
8409  * @param[in] action_templates
8410  *   Action template array to be binded to the table.
8411  * @param[in] nb_action_templates
8412  *   Number of action template.
8413  * @param[out] error
8414  *   Pointer to error structure.
8415  *
8416  * @return
8417  *    Table on success, NULL otherwise and rte_errno is set.
8418  */
8419 static struct rte_flow_template_table *
8420 mlx5_flow_table_create(struct rte_eth_dev *dev,
8421 		       const struct rte_flow_template_table_attr *attr,
8422 		       struct rte_flow_pattern_template *item_templates[],
8423 		       uint8_t nb_item_templates,
8424 		       struct rte_flow_actions_template *action_templates[],
8425 		       uint8_t nb_action_templates,
8426 		       struct rte_flow_error *error)
8427 {
8428 	const struct mlx5_flow_driver_ops *fops;
8429 
8430 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8431 		rte_flow_error_set(error, ENOTSUP,
8432 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8433 				NULL,
8434 				"table create with incorrect steering mode");
8435 		return NULL;
8436 	}
8437 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8438 	return fops->template_table_create(dev,
8439 					   attr,
8440 					   item_templates,
8441 					   nb_item_templates,
8442 					   action_templates,
8443 					   nb_action_templates,
8444 					   error);
8445 }
8446 
8447 /**
8448  * PMD destroy flow table.
8449  *
8450  * @param[in] dev
8451  *   Pointer to the rte_eth_dev structure.
8452  * @param[in] table
8453  *   Pointer to the table to be destroyed.
8454  * @param[out] error
8455  *   Pointer to error structure.
8456  *
8457  * @return
8458  *   0 on success, a negative errno value otherwise and rte_errno is set.
8459  */
8460 static int
8461 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
8462 			struct rte_flow_template_table *table,
8463 			struct rte_flow_error *error)
8464 {
8465 	const struct mlx5_flow_driver_ops *fops;
8466 
8467 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8468 		return rte_flow_error_set(error, ENOTSUP,
8469 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8470 				NULL,
8471 				"table destroy with incorrect steering mode");
8472 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8473 	return fops->template_table_destroy(dev, table, error);
8474 }
8475 
8476 /**
8477  * Enqueue flow creation.
8478  *
8479  * @param[in] dev
8480  *   Pointer to the rte_eth_dev structure.
8481  * @param[in] queue_id
8482  *   The queue to create the flow.
8483  * @param[in] attr
8484  *   Pointer to the flow operation attributes.
8485  * @param[in] items
8486  *   Items with flow spec value.
8487  * @param[in] pattern_template_index
8488  *   The item pattern flow follows from the table.
8489  * @param[in] actions
8490  *   Action with flow spec value.
8491  * @param[in] action_template_index
8492  *   The action pattern flow follows from the table.
8493  * @param[in] user_data
8494  *   Pointer to the user_data.
8495  * @param[out] error
8496  *   Pointer to error structure.
8497  *
8498  * @return
8499  *    Flow pointer on success, NULL otherwise and rte_errno is set.
8500  */
8501 static struct rte_flow *
8502 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
8503 			    uint32_t queue_id,
8504 			    const struct rte_flow_op_attr *attr,
8505 			    struct rte_flow_template_table *table,
8506 			    const struct rte_flow_item items[],
8507 			    uint8_t pattern_template_index,
8508 			    const struct rte_flow_action actions[],
8509 			    uint8_t action_template_index,
8510 			    void *user_data,
8511 			    struct rte_flow_error *error)
8512 {
8513 	const struct mlx5_flow_driver_ops *fops;
8514 
8515 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8516 		rte_flow_error_set(error, ENOTSUP,
8517 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8518 				NULL,
8519 				"flow_q create with incorrect steering mode");
8520 		return NULL;
8521 	}
8522 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8523 	return fops->async_flow_create(dev, queue_id, attr, table,
8524 				       items, pattern_template_index,
8525 				       actions, action_template_index,
8526 				       user_data, error);
8527 }
8528 
8529 /**
8530  * Enqueue flow destruction.
8531  *
8532  * @param[in] dev
8533  *   Pointer to the rte_eth_dev structure.
8534  * @param[in] queue
8535  *   The queue to destroy the flow.
8536  * @param[in] attr
8537  *   Pointer to the flow operation attributes.
8538  * @param[in] flow
8539  *   Pointer to the flow to be destroyed.
8540  * @param[in] user_data
8541  *   Pointer to the user_data.
8542  * @param[out] error
8543  *   Pointer to error structure.
8544  *
8545  * @return
8546  *    0 on success, negative value otherwise and rte_errno is set.
8547  */
8548 static int
8549 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
8550 			     uint32_t queue,
8551 			     const struct rte_flow_op_attr *attr,
8552 			     struct rte_flow *flow,
8553 			     void *user_data,
8554 			     struct rte_flow_error *error)
8555 {
8556 	const struct mlx5_flow_driver_ops *fops;
8557 
8558 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8559 		return rte_flow_error_set(error, ENOTSUP,
8560 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8561 				NULL,
8562 				"flow_q destroy with incorrect steering mode");
8563 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8564 	return fops->async_flow_destroy(dev, queue, attr, flow,
8565 					user_data, error);
8566 }
8567 
8568 /**
8569  * Pull the enqueued flows.
8570  *
8571  * @param[in] dev
8572  *   Pointer to the rte_eth_dev structure.
8573  * @param[in] queue
8574  *   The queue to pull the result.
8575  * @param[in/out] res
8576  *   Array to save the results.
8577  * @param[in] n_res
8578  *   Available result with the array.
8579  * @param[out] error
8580  *   Pointer to error structure.
8581  *
8582  * @return
8583  *    Result number on success, negative value otherwise and rte_errno is set.
8584  */
8585 static int
8586 mlx5_flow_pull(struct rte_eth_dev *dev,
8587 	       uint32_t queue,
8588 	       struct rte_flow_op_result res[],
8589 	       uint16_t n_res,
8590 	       struct rte_flow_error *error)
8591 {
8592 	const struct mlx5_flow_driver_ops *fops;
8593 
8594 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8595 		return rte_flow_error_set(error, ENOTSUP,
8596 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8597 				NULL,
8598 				"flow_q pull with incorrect steering mode");
8599 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8600 	return fops->pull(dev, queue, res, n_res, error);
8601 }
8602 
8603 /**
8604  * Push the enqueued flows.
8605  *
8606  * @param[in] dev
8607  *   Pointer to the rte_eth_dev structure.
8608  * @param[in] queue
8609  *   The queue to push the flows.
8610  * @param[out] error
8611  *   Pointer to error structure.
8612  *
8613  * @return
8614  *    0 on success, negative value otherwise and rte_errno is set.
8615  */
8616 static int
8617 mlx5_flow_push(struct rte_eth_dev *dev,
8618 	       uint32_t queue,
8619 	       struct rte_flow_error *error)
8620 {
8621 	const struct mlx5_flow_driver_ops *fops;
8622 
8623 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8624 		return rte_flow_error_set(error, ENOTSUP,
8625 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8626 				NULL,
8627 				"flow_q push with incorrect steering mode");
8628 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8629 	return fops->push(dev, queue, error);
8630 }
8631 
8632 /**
8633  * Create shared action.
8634  *
8635  * @param[in] dev
8636  *   Pointer to the rte_eth_dev structure.
8637  * @param[in] queue
8638  *   Which queue to be used..
8639  * @param[in] attr
8640  *   Operation attribute.
8641  * @param[in] conf
8642  *   Indirect action configuration.
8643  * @param[in] action
8644  *   rte_flow action detail.
8645  * @param[in] user_data
8646  *   Pointer to the user_data.
8647  * @param[out] error
8648  *   Pointer to error structure.
8649  *
8650  * @return
8651  *   Action handle on success, NULL otherwise and rte_errno is set.
8652  */
8653 static struct rte_flow_action_handle *
8654 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
8655 				 const struct rte_flow_op_attr *attr,
8656 				 const struct rte_flow_indir_action_conf *conf,
8657 				 const struct rte_flow_action *action,
8658 				 void *user_data,
8659 				 struct rte_flow_error *error)
8660 {
8661 	const struct mlx5_flow_driver_ops *fops =
8662 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8663 
8664 	return fops->async_action_create(dev, queue, attr, conf, action,
8665 					 user_data, error);
8666 }
8667 
8668 /**
8669  * Update shared action.
8670  *
8671  * @param[in] dev
8672  *   Pointer to the rte_eth_dev structure.
8673  * @param[in] queue
8674  *   Which queue to be used..
8675  * @param[in] attr
8676  *   Operation attribute.
8677  * @param[in] handle
8678  *   Action handle to be updated.
8679  * @param[in] update
8680  *   Update value.
8681  * @param[in] user_data
8682  *   Pointer to the user_data.
8683  * @param[out] error
8684  *   Pointer to error structure.
8685  *
8686  * @return
8687  *   0 on success, negative value otherwise and rte_errno is set.
8688  */
8689 static int
8690 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
8691 				     const struct rte_flow_op_attr *attr,
8692 				     struct rte_flow_action_handle *handle,
8693 				     const void *update,
8694 				     void *user_data,
8695 				     struct rte_flow_error *error)
8696 {
8697 	const struct mlx5_flow_driver_ops *fops =
8698 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8699 
8700 	return fops->async_action_update(dev, queue, attr, handle,
8701 					 update, user_data, error);
8702 }
8703 
8704 /**
8705  * Destroy shared action.
8706  *
8707  * @param[in] dev
8708  *   Pointer to the rte_eth_dev structure.
8709  * @param[in] queue
8710  *   Which queue to be used..
8711  * @param[in] attr
8712  *   Operation attribute.
8713  * @param[in] handle
8714  *   Action handle to be destroyed.
8715  * @param[in] user_data
8716  *   Pointer to the user_data.
8717  * @param[out] error
8718  *   Pointer to error structure.
8719  *
8720  * @return
8721  *   0 on success, negative value otherwise and rte_errno is set.
8722  */
8723 static int
8724 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
8725 				      const struct rte_flow_op_attr *attr,
8726 				      struct rte_flow_action_handle *handle,
8727 				      void *user_data,
8728 				      struct rte_flow_error *error)
8729 {
8730 	const struct mlx5_flow_driver_ops *fops =
8731 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8732 
8733 	return fops->async_action_destroy(dev, queue, attr, handle,
8734 					  user_data, error);
8735 }
8736 
8737 /**
8738  * Allocate a new memory for the counter values wrapped by all the needed
8739  * management.
8740  *
8741  * @param[in] sh
8742  *   Pointer to mlx5_dev_ctx_shared object.
8743  *
8744  * @return
8745  *   0 on success, a negative errno value otherwise.
8746  */
8747 static int
8748 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
8749 {
8750 	struct mlx5_counter_stats_mem_mng *mem_mng;
8751 	volatile struct flow_counter_stats *raw_data;
8752 	int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
8753 	int size = (sizeof(struct flow_counter_stats) *
8754 			MLX5_COUNTERS_PER_POOL +
8755 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
8756 			sizeof(struct mlx5_counter_stats_mem_mng);
8757 	size_t pgsize = rte_mem_page_size();
8758 	uint8_t *mem;
8759 	int ret;
8760 	int i;
8761 
8762 	if (pgsize == (size_t)-1) {
8763 		DRV_LOG(ERR, "Failed to get mem page size");
8764 		rte_errno = ENOMEM;
8765 		return -ENOMEM;
8766 	}
8767 	mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
8768 	if (!mem) {
8769 		rte_errno = ENOMEM;
8770 		return -ENOMEM;
8771 	}
8772 	mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
8773 	size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
8774 	ret = mlx5_os_wrapped_mkey_create(sh->cdev->ctx, sh->cdev->pd,
8775 					  sh->cdev->pdn, mem, size,
8776 					  &mem_mng->wm);
8777 	if (ret) {
8778 		rte_errno = errno;
8779 		mlx5_free(mem);
8780 		return -rte_errno;
8781 	}
8782 	mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
8783 	raw_data = (volatile struct flow_counter_stats *)mem;
8784 	for (i = 0; i < raws_n; ++i) {
8785 		mem_mng->raws[i].mem_mng = mem_mng;
8786 		mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
8787 	}
8788 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
8789 		LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
8790 				 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
8791 				 next);
8792 	LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
8793 	sh->cmng.mem_mng = mem_mng;
8794 	return 0;
8795 }
8796 
8797 /**
8798  * Set the statistic memory to the new counter pool.
8799  *
8800  * @param[in] sh
8801  *   Pointer to mlx5_dev_ctx_shared object.
8802  * @param[in] pool
8803  *   Pointer to the pool to set the statistic memory.
8804  *
8805  * @return
8806  *   0 on success, a negative errno value otherwise.
8807  */
8808 static int
8809 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
8810 			       struct mlx5_flow_counter_pool *pool)
8811 {
8812 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8813 	/* Resize statistic memory once used out. */
8814 	if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
8815 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
8816 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
8817 		return -1;
8818 	}
8819 	rte_spinlock_lock(&pool->sl);
8820 	pool->raw = cmng->mem_mng->raws + pool->index %
8821 		    MLX5_CNT_CONTAINER_RESIZE;
8822 	rte_spinlock_unlock(&pool->sl);
8823 	pool->raw_hw = NULL;
8824 	return 0;
8825 }
8826 
8827 #define MLX5_POOL_QUERY_FREQ_US 1000000
8828 
8829 /**
8830  * Set the periodic procedure for triggering asynchronous batch queries for all
8831  * the counter pools.
8832  *
8833  * @param[in] sh
8834  *   Pointer to mlx5_dev_ctx_shared object.
8835  */
8836 void
8837 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
8838 {
8839 	uint32_t pools_n, us;
8840 
8841 	pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
8842 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
8843 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
8844 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
8845 		sh->cmng.query_thread_on = 0;
8846 		DRV_LOG(ERR, "Cannot reinitialize query alarm");
8847 	} else {
8848 		sh->cmng.query_thread_on = 1;
8849 	}
8850 }
8851 
8852 /**
8853  * The periodic procedure for triggering asynchronous batch queries for all the
8854  * counter pools. This function is probably called by the host thread.
8855  *
8856  * @param[in] arg
8857  *   The parameter for the alarm process.
8858  */
8859 void
8860 mlx5_flow_query_alarm(void *arg)
8861 {
8862 	struct mlx5_dev_ctx_shared *sh = arg;
8863 	int ret;
8864 	uint16_t pool_index = sh->cmng.pool_index;
8865 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8866 	struct mlx5_flow_counter_pool *pool;
8867 	uint16_t n_valid;
8868 
8869 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
8870 		goto set_alarm;
8871 	rte_spinlock_lock(&cmng->pool_update_sl);
8872 	pool = cmng->pools[pool_index];
8873 	n_valid = cmng->n_valid;
8874 	rte_spinlock_unlock(&cmng->pool_update_sl);
8875 	/* Set the statistic memory to the new created pool. */
8876 	if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
8877 		goto set_alarm;
8878 	if (pool->raw_hw)
8879 		/* There is a pool query in progress. */
8880 		goto set_alarm;
8881 	pool->raw_hw =
8882 		LIST_FIRST(&sh->cmng.free_stat_raws);
8883 	if (!pool->raw_hw)
8884 		/* No free counter statistics raw memory. */
8885 		goto set_alarm;
8886 	/*
8887 	 * Identify the counters released between query trigger and query
8888 	 * handle more efficiently. The counter released in this gap period
8889 	 * should wait for a new round of query as the new arrived packets
8890 	 * will not be taken into account.
8891 	 */
8892 	pool->query_gen++;
8893 	ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
8894 					       MLX5_COUNTERS_PER_POOL,
8895 					       NULL, NULL,
8896 					       pool->raw_hw->mem_mng->wm.lkey,
8897 					       (void *)(uintptr_t)
8898 					       pool->raw_hw->data,
8899 					       sh->devx_comp,
8900 					       (uint64_t)(uintptr_t)pool);
8901 	if (ret) {
8902 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
8903 			" %d", pool->min_dcs->id);
8904 		pool->raw_hw = NULL;
8905 		goto set_alarm;
8906 	}
8907 	LIST_REMOVE(pool->raw_hw, next);
8908 	sh->cmng.pending_queries++;
8909 	pool_index++;
8910 	if (pool_index >= n_valid)
8911 		pool_index = 0;
8912 set_alarm:
8913 	sh->cmng.pool_index = pool_index;
8914 	mlx5_set_query_alarm(sh);
8915 }
8916 
8917 /**
8918  * Check and callback event for new aged flow in the counter pool
8919  *
8920  * @param[in] sh
8921  *   Pointer to mlx5_dev_ctx_shared object.
8922  * @param[in] pool
8923  *   Pointer to Current counter pool.
8924  */
8925 static void
8926 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
8927 		   struct mlx5_flow_counter_pool *pool)
8928 {
8929 	struct mlx5_priv *priv;
8930 	struct mlx5_flow_counter *cnt;
8931 	struct mlx5_age_info *age_info;
8932 	struct mlx5_age_param *age_param;
8933 	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
8934 	struct mlx5_counter_stats_raw *prev = pool->raw;
8935 	const uint64_t curr_time = MLX5_CURR_TIME_SEC;
8936 	const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
8937 	uint16_t expected = AGE_CANDIDATE;
8938 	uint32_t i;
8939 
8940 	pool->time_of_last_age_check = curr_time;
8941 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
8942 		cnt = MLX5_POOL_GET_CNT(pool, i);
8943 		age_param = MLX5_CNT_TO_AGE(cnt);
8944 		if (__atomic_load_n(&age_param->state,
8945 				    __ATOMIC_RELAXED) != AGE_CANDIDATE)
8946 			continue;
8947 		if (cur->data[i].hits != prev->data[i].hits) {
8948 			__atomic_store_n(&age_param->sec_since_last_hit, 0,
8949 					 __ATOMIC_RELAXED);
8950 			continue;
8951 		}
8952 		if (__atomic_add_fetch(&age_param->sec_since_last_hit,
8953 				       time_delta,
8954 				       __ATOMIC_RELAXED) <= age_param->timeout)
8955 			continue;
8956 		/**
8957 		 * Hold the lock first, or if between the
8958 		 * state AGE_TMOUT and tailq operation the
8959 		 * release happened, the release procedure
8960 		 * may delete a non-existent tailq node.
8961 		 */
8962 		priv = rte_eth_devices[age_param->port_id].data->dev_private;
8963 		age_info = GET_PORT_AGE_INFO(priv);
8964 		rte_spinlock_lock(&age_info->aged_sl);
8965 		if (__atomic_compare_exchange_n(&age_param->state, &expected,
8966 						AGE_TMOUT, false,
8967 						__ATOMIC_RELAXED,
8968 						__ATOMIC_RELAXED)) {
8969 			TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
8970 			MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
8971 		}
8972 		rte_spinlock_unlock(&age_info->aged_sl);
8973 	}
8974 	mlx5_age_event_prepare(sh);
8975 }
8976 
8977 /**
8978  * Handler for the HW respond about ready values from an asynchronous batch
8979  * query. This function is probably called by the host thread.
8980  *
8981  * @param[in] sh
8982  *   The pointer to the shared device context.
8983  * @param[in] async_id
8984  *   The Devx async ID.
8985  * @param[in] status
8986  *   The status of the completion.
8987  */
8988 void
8989 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
8990 				  uint64_t async_id, int status)
8991 {
8992 	struct mlx5_flow_counter_pool *pool =
8993 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
8994 	struct mlx5_counter_stats_raw *raw_to_free;
8995 	uint8_t query_gen = pool->query_gen ^ 1;
8996 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8997 	enum mlx5_counter_type cnt_type =
8998 		pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
8999 				MLX5_COUNTER_TYPE_ORIGIN;
9000 
9001 	if (unlikely(status)) {
9002 		raw_to_free = pool->raw_hw;
9003 	} else {
9004 		raw_to_free = pool->raw;
9005 		if (pool->is_aged)
9006 			mlx5_flow_aging_check(sh, pool);
9007 		rte_spinlock_lock(&pool->sl);
9008 		pool->raw = pool->raw_hw;
9009 		rte_spinlock_unlock(&pool->sl);
9010 		/* Be sure the new raw counters data is updated in memory. */
9011 		rte_io_wmb();
9012 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
9013 			rte_spinlock_lock(&cmng->csl[cnt_type]);
9014 			TAILQ_CONCAT(&cmng->counters[cnt_type],
9015 				     &pool->counters[query_gen], next);
9016 			rte_spinlock_unlock(&cmng->csl[cnt_type]);
9017 		}
9018 	}
9019 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
9020 	pool->raw_hw = NULL;
9021 	sh->cmng.pending_queries--;
9022 }
9023 
9024 static int
9025 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
9026 		    const struct flow_grp_info *grp_info,
9027 		    struct rte_flow_error *error)
9028 {
9029 	if (grp_info->transfer && grp_info->external &&
9030 	    grp_info->fdb_def_rule) {
9031 		if (group == UINT32_MAX)
9032 			return rte_flow_error_set
9033 						(error, EINVAL,
9034 						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9035 						 NULL,
9036 						 "group index not supported");
9037 		*table = group + 1;
9038 	} else {
9039 		*table = group;
9040 	}
9041 	DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
9042 	return 0;
9043 }
9044 
9045 /**
9046  * Translate the rte_flow group index to HW table value.
9047  *
9048  * If tunnel offload is disabled, all group ids converted to flow table
9049  * id using the standard method.
9050  * If tunnel offload is enabled, group id can be converted using the
9051  * standard or tunnel conversion method. Group conversion method
9052  * selection depends on flags in `grp_info` parameter:
9053  * - Internal (grp_info.external == 0) groups conversion uses the
9054  *   standard method.
9055  * - Group ids in JUMP action converted with the tunnel conversion.
9056  * - Group id in rule attribute conversion depends on a rule type and
9057  *   group id value:
9058  *   ** non zero group attributes converted with the tunnel method
9059  *   ** zero group attribute in non-tunnel rule is converted using the
9060  *      standard method - there's only one root table
9061  *   ** zero group attribute in steer tunnel rule is converted with the
9062  *      standard method - single root table
9063  *   ** zero group attribute in match tunnel rule is a special OvS
9064  *      case: that value is used for portability reasons. That group
9065  *      id is converted with the tunnel conversion method.
9066  *
9067  * @param[in] dev
9068  *   Port device
9069  * @param[in] tunnel
9070  *   PMD tunnel offload object
9071  * @param[in] group
9072  *   rte_flow group index value.
9073  * @param[out] table
9074  *   HW table value.
9075  * @param[in] grp_info
9076  *   flags used for conversion
9077  * @param[out] error
9078  *   Pointer to error structure.
9079  *
9080  * @return
9081  *   0 on success, a negative errno value otherwise and rte_errno is set.
9082  */
9083 int
9084 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
9085 			 const struct mlx5_flow_tunnel *tunnel,
9086 			 uint32_t group, uint32_t *table,
9087 			 const struct flow_grp_info *grp_info,
9088 			 struct rte_flow_error *error)
9089 {
9090 	int ret;
9091 	bool standard_translation;
9092 
9093 	if (!grp_info->skip_scale && grp_info->external &&
9094 	    group < MLX5_MAX_TABLES_EXTERNAL)
9095 		group *= MLX5_FLOW_TABLE_FACTOR;
9096 	if (is_tunnel_offload_active(dev)) {
9097 		standard_translation = !grp_info->external ||
9098 					grp_info->std_tbl_fix;
9099 	} else {
9100 		standard_translation = true;
9101 	}
9102 	DRV_LOG(DEBUG,
9103 		"port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
9104 		dev->data->port_id, group, grp_info->transfer,
9105 		grp_info->external, grp_info->fdb_def_rule,
9106 		standard_translation ? "STANDARD" : "TUNNEL");
9107 	if (standard_translation)
9108 		ret = flow_group_to_table(dev->data->port_id, group, table,
9109 					  grp_info, error);
9110 	else
9111 		ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
9112 						      table, error);
9113 
9114 	return ret;
9115 }
9116 
9117 /**
9118  * Discover availability of metadata reg_c's.
9119  *
9120  * Iteratively use test flows to check availability.
9121  *
9122  * @param[in] dev
9123  *   Pointer to the Ethernet device structure.
9124  *
9125  * @return
9126  *   0 on success, a negative errno value otherwise and rte_errno is set.
9127  */
9128 int
9129 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
9130 {
9131 	struct mlx5_priv *priv = dev->data->dev_private;
9132 	enum modify_reg idx;
9133 	int n = 0;
9134 
9135 	/* reg_c[0] and reg_c[1] are reserved. */
9136 	priv->sh->flow_mreg_c[n++] = REG_C_0;
9137 	priv->sh->flow_mreg_c[n++] = REG_C_1;
9138 	/* Discover availability of other reg_c's. */
9139 	for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
9140 		struct rte_flow_attr attr = {
9141 			.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
9142 			.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
9143 			.ingress = 1,
9144 		};
9145 		struct rte_flow_item items[] = {
9146 			[0] = {
9147 				.type = RTE_FLOW_ITEM_TYPE_END,
9148 			},
9149 		};
9150 		struct rte_flow_action actions[] = {
9151 			[0] = {
9152 				.type = (enum rte_flow_action_type)
9153 					MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
9154 				.conf = &(struct mlx5_flow_action_copy_mreg){
9155 					.src = REG_C_1,
9156 					.dst = idx,
9157 				},
9158 			},
9159 			[1] = {
9160 				.type = RTE_FLOW_ACTION_TYPE_JUMP,
9161 				.conf = &(struct rte_flow_action_jump){
9162 					.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
9163 				},
9164 			},
9165 			[2] = {
9166 				.type = RTE_FLOW_ACTION_TYPE_END,
9167 			},
9168 		};
9169 		uint32_t flow_idx;
9170 		struct rte_flow *flow;
9171 		struct rte_flow_error error;
9172 
9173 		if (!priv->sh->config.dv_flow_en)
9174 			break;
9175 		/* Create internal flow, validation skips copy action. */
9176 		flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
9177 					items, actions, false, &error);
9178 		flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9179 				      flow_idx);
9180 		if (!flow)
9181 			continue;
9182 		priv->sh->flow_mreg_c[n++] = idx;
9183 		flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
9184 	}
9185 	for (; n < MLX5_MREG_C_NUM; ++n)
9186 		priv->sh->flow_mreg_c[n] = REG_NON;
9187 	priv->sh->metadata_regc_check_flag = 1;
9188 	return 0;
9189 }
9190 
9191 int
9192 save_dump_file(const uint8_t *data, uint32_t size,
9193 	uint32_t type, uint64_t id, void *arg, FILE *file)
9194 {
9195 	char line[BUF_SIZE];
9196 	uint32_t out = 0;
9197 	uint32_t k;
9198 	uint32_t actions_num;
9199 	struct rte_flow_query_count *count;
9200 
9201 	memset(line, 0, BUF_SIZE);
9202 	switch (type) {
9203 	case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
9204 		actions_num = *(uint32_t *)(arg);
9205 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
9206 				type, id, actions_num);
9207 		break;
9208 	case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
9209 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
9210 				type, id);
9211 		break;
9212 	case DR_DUMP_REC_TYPE_PMD_COUNTER:
9213 		count = (struct rte_flow_query_count *)arg;
9214 		fprintf(file,
9215 			"%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
9216 			type, id, count->hits, count->bytes);
9217 		return 0;
9218 	default:
9219 		return -1;
9220 	}
9221 
9222 	for (k = 0; k < size; k++) {
9223 		/* Make sure we do not overrun the line buffer length. */
9224 		if (out >= BUF_SIZE - 4) {
9225 			line[out] = '\0';
9226 			break;
9227 		}
9228 		out += snprintf(line + out, BUF_SIZE - out, "%02x",
9229 				(data[k]) & 0xff);
9230 	}
9231 	fprintf(file, "%s\n", line);
9232 	return 0;
9233 }
9234 
9235 int
9236 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
9237 	struct rte_flow_query_count *count, struct rte_flow_error *error)
9238 {
9239 	struct rte_flow_action action[2];
9240 	enum mlx5_flow_drv_type ftype;
9241 	const struct mlx5_flow_driver_ops *fops;
9242 
9243 	if (!flow) {
9244 		return rte_flow_error_set(error, ENOENT,
9245 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9246 				NULL,
9247 				"invalid flow handle");
9248 	}
9249 	action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
9250 	action[1].type = RTE_FLOW_ACTION_TYPE_END;
9251 	if (flow->counter) {
9252 		memset(count, 0, sizeof(struct rte_flow_query_count));
9253 		ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
9254 		MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
9255 						ftype < MLX5_FLOW_TYPE_MAX);
9256 		fops = flow_get_drv_ops(ftype);
9257 		return fops->query(dev, flow, action, count, error);
9258 	}
9259 	return -1;
9260 }
9261 
9262 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9263 /**
9264  * Dump flow ipool data to file
9265  *
9266  * @param[in] dev
9267  *   The pointer to Ethernet device.
9268  * @param[in] file
9269  *   A pointer to a file for output.
9270  * @param[out] error
9271  *   Perform verbose error reporting if not NULL. PMDs initialize this
9272  *   structure in case of error only.
9273  * @return
9274  *   0 on success, a negative value otherwise.
9275  */
9276 int
9277 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
9278 	struct rte_flow *flow, FILE *file,
9279 	struct rte_flow_error *error)
9280 {
9281 	struct mlx5_priv *priv = dev->data->dev_private;
9282 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9283 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9284 	uint32_t handle_idx;
9285 	struct mlx5_flow_handle *dh;
9286 	struct rte_flow_query_count count;
9287 	uint32_t actions_num;
9288 	const uint8_t *data;
9289 	size_t size;
9290 	uint64_t id;
9291 	uint32_t type;
9292 	void *action = NULL;
9293 
9294 	if (!flow) {
9295 		return rte_flow_error_set(error, ENOENT,
9296 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9297 				NULL,
9298 				"invalid flow handle");
9299 	}
9300 	handle_idx = flow->dev_handles;
9301 	/* query counter */
9302 	if (flow->counter &&
9303 	(!mlx5_counter_query(dev, flow->counter, false,
9304 	&count.hits, &count.bytes, &action)) && action) {
9305 		id = (uint64_t)(uintptr_t)action;
9306 		type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9307 		save_dump_file(NULL, 0, type,
9308 			id, (void *)&count, file);
9309 	}
9310 
9311 	while (handle_idx) {
9312 		dh = mlx5_ipool_get(priv->sh->ipool
9313 				[MLX5_IPOOL_MLX5_FLOW], handle_idx);
9314 		if (!dh)
9315 			continue;
9316 		handle_idx = dh->next.next;
9317 
9318 		/* Get modify_hdr and encap_decap buf from ipools. */
9319 		encap_decap = NULL;
9320 		modify_hdr = dh->dvh.modify_hdr;
9321 
9322 		if (dh->dvh.rix_encap_decap) {
9323 			encap_decap = mlx5_ipool_get(priv->sh->ipool
9324 						[MLX5_IPOOL_DECAP_ENCAP],
9325 						dh->dvh.rix_encap_decap);
9326 		}
9327 		if (modify_hdr) {
9328 			data = (const uint8_t *)modify_hdr->actions;
9329 			size = (size_t)(modify_hdr->actions_num) * 8;
9330 			id = (uint64_t)(uintptr_t)modify_hdr->action;
9331 			actions_num = modify_hdr->actions_num;
9332 			type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9333 			save_dump_file(data, size, type, id,
9334 						(void *)(&actions_num), file);
9335 		}
9336 		if (encap_decap) {
9337 			data = encap_decap->buf;
9338 			size = encap_decap->size;
9339 			id = (uint64_t)(uintptr_t)encap_decap->action;
9340 			type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9341 			save_dump_file(data, size, type,
9342 						id, NULL, file);
9343 		}
9344 	}
9345 	return 0;
9346 }
9347 
9348 /**
9349  * Dump all flow's encap_decap/modify_hdr/counter data to file
9350  *
9351  * @param[in] dev
9352  *   The pointer to Ethernet device.
9353  * @param[in] file
9354  *   A pointer to a file for output.
9355  * @param[out] error
9356  *   Perform verbose error reporting if not NULL. PMDs initialize this
9357  *   structure in case of error only.
9358  * @return
9359  *   0 on success, a negative value otherwise.
9360  */
9361 static int
9362 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
9363 	FILE *file, struct rte_flow_error *error __rte_unused)
9364 {
9365 	struct mlx5_priv *priv = dev->data->dev_private;
9366 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9367 	struct mlx5_hlist *h;
9368 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9369 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9370 	struct rte_flow_query_count count;
9371 	uint32_t actions_num;
9372 	const uint8_t *data;
9373 	size_t size;
9374 	uint64_t id;
9375 	uint32_t type;
9376 	uint32_t i;
9377 	uint32_t j;
9378 	struct mlx5_list_inconst *l_inconst;
9379 	struct mlx5_list_entry *e;
9380 	int lcore_index;
9381 	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
9382 	uint32_t max;
9383 	void *action;
9384 
9385 	/* encap_decap hlist is lcore_share, get global core cache. */
9386 	i = MLX5_LIST_GLOBAL;
9387 	h = sh->encaps_decaps;
9388 	if (h) {
9389 		for (j = 0; j <= h->mask; j++) {
9390 			l_inconst = &h->buckets[j].l;
9391 			if (!l_inconst || !l_inconst->cache[i])
9392 				continue;
9393 
9394 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9395 			while (e) {
9396 				encap_decap =
9397 				(struct mlx5_flow_dv_encap_decap_resource *)e;
9398 				data = encap_decap->buf;
9399 				size = encap_decap->size;
9400 				id = (uint64_t)(uintptr_t)encap_decap->action;
9401 				type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9402 				save_dump_file(data, size, type,
9403 					id, NULL, file);
9404 				e = LIST_NEXT(e, next);
9405 			}
9406 		}
9407 	}
9408 
9409 	/* get modify_hdr */
9410 	h = sh->modify_cmds;
9411 	if (h) {
9412 		lcore_index = rte_lcore_index(rte_lcore_id());
9413 		if (unlikely(lcore_index == -1)) {
9414 			lcore_index = MLX5_LIST_NLCORE;
9415 			rte_spinlock_lock(&h->l_const.lcore_lock);
9416 		}
9417 		i = lcore_index;
9418 
9419 		for (j = 0; j <= h->mask; j++) {
9420 			l_inconst = &h->buckets[j].l;
9421 			if (!l_inconst || !l_inconst->cache[i])
9422 				continue;
9423 
9424 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9425 			while (e) {
9426 				modify_hdr =
9427 				(struct mlx5_flow_dv_modify_hdr_resource *)e;
9428 				data = (const uint8_t *)modify_hdr->actions;
9429 				size = (size_t)(modify_hdr->actions_num) * 8;
9430 				actions_num = modify_hdr->actions_num;
9431 				id = (uint64_t)(uintptr_t)modify_hdr->action;
9432 				type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9433 				save_dump_file(data, size, type, id,
9434 						(void *)(&actions_num), file);
9435 				e = LIST_NEXT(e, next);
9436 			}
9437 		}
9438 
9439 		if (unlikely(lcore_index == MLX5_LIST_NLCORE))
9440 			rte_spinlock_unlock(&h->l_const.lcore_lock);
9441 	}
9442 
9443 	/* get counter */
9444 	MLX5_ASSERT(cmng->n_valid <= cmng->n);
9445 	max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
9446 	for (j = 1; j <= max; j++) {
9447 		action = NULL;
9448 		if ((!mlx5_counter_query(dev, j, false, &count.hits,
9449 		&count.bytes, &action)) && action) {
9450 			id = (uint64_t)(uintptr_t)action;
9451 			type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9452 			save_dump_file(NULL, 0, type,
9453 					id, (void *)&count, file);
9454 		}
9455 	}
9456 	return 0;
9457 }
9458 #endif
9459 
9460 /**
9461  * Dump flow raw hw data to file
9462  *
9463  * @param[in] dev
9464  *    The pointer to Ethernet device.
9465  * @param[in] file
9466  *   A pointer to a file for output.
9467  * @param[out] error
9468  *   Perform verbose error reporting if not NULL. PMDs initialize this
9469  *   structure in case of error only.
9470  * @return
9471  *   0 on success, a negative value otherwise.
9472  */
9473 int
9474 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
9475 		   FILE *file,
9476 		   struct rte_flow_error *error __rte_unused)
9477 {
9478 	struct mlx5_priv *priv = dev->data->dev_private;
9479 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9480 	uint32_t handle_idx;
9481 	int ret;
9482 	struct mlx5_flow_handle *dh;
9483 	struct rte_flow *flow;
9484 
9485 	if (!sh->config.dv_flow_en) {
9486 		if (fputs("device dv flow disabled\n", file) <= 0)
9487 			return -errno;
9488 		return -ENOTSUP;
9489 	}
9490 
9491 	/* dump all */
9492 	if (!flow_idx) {
9493 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9494 		if (mlx5_flow_dev_dump_sh_all(dev, file, error))
9495 			return -EINVAL;
9496 #endif
9497 		return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
9498 					sh->rx_domain,
9499 					sh->tx_domain, file);
9500 	}
9501 	/* dump one */
9502 	flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9503 			(uintptr_t)(void *)flow_idx);
9504 	if (!flow)
9505 		return -EINVAL;
9506 
9507 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9508 	mlx5_flow_dev_dump_ipool(dev, flow, file, error);
9509 #endif
9510 	handle_idx = flow->dev_handles;
9511 	while (handle_idx) {
9512 		dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
9513 				handle_idx);
9514 		if (!dh)
9515 			return -ENOENT;
9516 		if (dh->drv_flow) {
9517 			ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
9518 					file);
9519 			if (ret)
9520 				return -ENOENT;
9521 		}
9522 		handle_idx = dh->next.next;
9523 	}
9524 	return 0;
9525 }
9526 
9527 /**
9528  * Get aged-out flows.
9529  *
9530  * @param[in] dev
9531  *   Pointer to the Ethernet device structure.
9532  * @param[in] context
9533  *   The address of an array of pointers to the aged-out flows contexts.
9534  * @param[in] nb_countexts
9535  *   The length of context array pointers.
9536  * @param[out] error
9537  *   Perform verbose error reporting if not NULL. Initialized in case of
9538  *   error only.
9539  *
9540  * @return
9541  *   how many contexts get in success, otherwise negative errno value.
9542  *   if nb_contexts is 0, return the amount of all aged contexts.
9543  *   if nb_contexts is not 0 , return the amount of aged flows reported
9544  *   in the context array.
9545  */
9546 int
9547 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
9548 			uint32_t nb_contexts, struct rte_flow_error *error)
9549 {
9550 	const struct mlx5_flow_driver_ops *fops;
9551 	struct rte_flow_attr attr = { .transfer = 0 };
9552 
9553 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
9554 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
9555 		return fops->get_aged_flows(dev, contexts, nb_contexts,
9556 						    error);
9557 	}
9558 	DRV_LOG(ERR,
9559 		"port %u get aged flows is not supported.",
9560 		 dev->data->port_id);
9561 	return -ENOTSUP;
9562 }
9563 
9564 /* Wrapper for driver action_validate op callback */
9565 static int
9566 flow_drv_action_validate(struct rte_eth_dev *dev,
9567 			 const struct rte_flow_indir_action_conf *conf,
9568 			 const struct rte_flow_action *action,
9569 			 const struct mlx5_flow_driver_ops *fops,
9570 			 struct rte_flow_error *error)
9571 {
9572 	static const char err_msg[] = "indirect action validation unsupported";
9573 
9574 	if (!fops->action_validate) {
9575 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9576 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9577 				   NULL, err_msg);
9578 		return -rte_errno;
9579 	}
9580 	return fops->action_validate(dev, conf, action, error);
9581 }
9582 
9583 /**
9584  * Destroys the shared action by handle.
9585  *
9586  * @param dev
9587  *   Pointer to Ethernet device structure.
9588  * @param[in] handle
9589  *   Handle for the indirect action object to be destroyed.
9590  * @param[out] error
9591  *   Perform verbose error reporting if not NULL. PMDs initialize this
9592  *   structure in case of error only.
9593  *
9594  * @return
9595  *   0 on success, a negative errno value otherwise and rte_errno is set.
9596  *
9597  * @note: wrapper for driver action_create op callback.
9598  */
9599 static int
9600 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
9601 			   struct rte_flow_action_handle *handle,
9602 			   struct rte_flow_error *error)
9603 {
9604 	static const char err_msg[] = "indirect action destruction unsupported";
9605 	struct rte_flow_attr attr = { .transfer = 0 };
9606 	const struct mlx5_flow_driver_ops *fops =
9607 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9608 
9609 	if (!fops->action_destroy) {
9610 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9611 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9612 				   NULL, err_msg);
9613 		return -rte_errno;
9614 	}
9615 	return fops->action_destroy(dev, handle, error);
9616 }
9617 
9618 /* Wrapper for driver action_destroy op callback */
9619 static int
9620 flow_drv_action_update(struct rte_eth_dev *dev,
9621 		       struct rte_flow_action_handle *handle,
9622 		       const void *update,
9623 		       const struct mlx5_flow_driver_ops *fops,
9624 		       struct rte_flow_error *error)
9625 {
9626 	static const char err_msg[] = "indirect action update unsupported";
9627 
9628 	if (!fops->action_update) {
9629 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9630 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9631 				   NULL, err_msg);
9632 		return -rte_errno;
9633 	}
9634 	return fops->action_update(dev, handle, update, error);
9635 }
9636 
9637 /* Wrapper for driver action_destroy op callback */
9638 static int
9639 flow_drv_action_query(struct rte_eth_dev *dev,
9640 		      const struct rte_flow_action_handle *handle,
9641 		      void *data,
9642 		      const struct mlx5_flow_driver_ops *fops,
9643 		      struct rte_flow_error *error)
9644 {
9645 	static const char err_msg[] = "indirect action query unsupported";
9646 
9647 	if (!fops->action_query) {
9648 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9649 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9650 				   NULL, err_msg);
9651 		return -rte_errno;
9652 	}
9653 	return fops->action_query(dev, handle, data, error);
9654 }
9655 
9656 /**
9657  * Create indirect action for reuse in multiple flow rules.
9658  *
9659  * @param dev
9660  *   Pointer to Ethernet device structure.
9661  * @param conf
9662  *   Pointer to indirect action object configuration.
9663  * @param[in] action
9664  *   Action configuration for indirect action object creation.
9665  * @param[out] error
9666  *   Perform verbose error reporting if not NULL. PMDs initialize this
9667  *   structure in case of error only.
9668  * @return
9669  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
9670  */
9671 static struct rte_flow_action_handle *
9672 mlx5_action_handle_create(struct rte_eth_dev *dev,
9673 			  const struct rte_flow_indir_action_conf *conf,
9674 			  const struct rte_flow_action *action,
9675 			  struct rte_flow_error *error)
9676 {
9677 	static const char err_msg[] = "indirect action creation unsupported";
9678 	struct rte_flow_attr attr = { .transfer = 0 };
9679 	const struct mlx5_flow_driver_ops *fops =
9680 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9681 
9682 	if (flow_drv_action_validate(dev, conf, action, fops, error))
9683 		return NULL;
9684 	if (!fops->action_create) {
9685 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9686 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9687 				   NULL, err_msg);
9688 		return NULL;
9689 	}
9690 	return fops->action_create(dev, conf, action, error);
9691 }
9692 
9693 /**
9694  * Updates inplace the indirect action configuration pointed by *handle*
9695  * with the configuration provided as *update* argument.
9696  * The update of the indirect action configuration effects all flow rules
9697  * reusing the action via handle.
9698  *
9699  * @param dev
9700  *   Pointer to Ethernet device structure.
9701  * @param[in] handle
9702  *   Handle for the indirect action to be updated.
9703  * @param[in] update
9704  *   Action specification used to modify the action pointed by handle.
9705  *   *update* could be of same type with the action pointed by the *handle*
9706  *   handle argument, or some other structures like a wrapper, depending on
9707  *   the indirect action type.
9708  * @param[out] error
9709  *   Perform verbose error reporting if not NULL. PMDs initialize this
9710  *   structure in case of error only.
9711  *
9712  * @return
9713  *   0 on success, a negative errno value otherwise and rte_errno is set.
9714  */
9715 static int
9716 mlx5_action_handle_update(struct rte_eth_dev *dev,
9717 		struct rte_flow_action_handle *handle,
9718 		const void *update,
9719 		struct rte_flow_error *error)
9720 {
9721 	struct rte_flow_attr attr = { .transfer = 0 };
9722 	const struct mlx5_flow_driver_ops *fops =
9723 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9724 	int ret;
9725 
9726 	ret = flow_drv_action_validate(dev, NULL,
9727 			(const struct rte_flow_action *)update, fops, error);
9728 	if (ret)
9729 		return ret;
9730 	return flow_drv_action_update(dev, handle, update, fops,
9731 				      error);
9732 }
9733 
9734 /**
9735  * Query the indirect action by handle.
9736  *
9737  * This function allows retrieving action-specific data such as counters.
9738  * Data is gathered by special action which may be present/referenced in
9739  * more than one flow rule definition.
9740  *
9741  * see @RTE_FLOW_ACTION_TYPE_COUNT
9742  *
9743  * @param dev
9744  *   Pointer to Ethernet device structure.
9745  * @param[in] handle
9746  *   Handle for the indirect action to query.
9747  * @param[in, out] data
9748  *   Pointer to storage for the associated query data type.
9749  * @param[out] error
9750  *   Perform verbose error reporting if not NULL. PMDs initialize this
9751  *   structure in case of error only.
9752  *
9753  * @return
9754  *   0 on success, a negative errno value otherwise and rte_errno is set.
9755  */
9756 static int
9757 mlx5_action_handle_query(struct rte_eth_dev *dev,
9758 			 const struct rte_flow_action_handle *handle,
9759 			 void *data,
9760 			 struct rte_flow_error *error)
9761 {
9762 	struct rte_flow_attr attr = { .transfer = 0 };
9763 	const struct mlx5_flow_driver_ops *fops =
9764 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9765 
9766 	return flow_drv_action_query(dev, handle, data, fops, error);
9767 }
9768 
9769 /**
9770  * Destroy all indirect actions (shared RSS).
9771  *
9772  * @param dev
9773  *   Pointer to Ethernet device.
9774  *
9775  * @return
9776  *   0 on success, a negative errno value otherwise and rte_errno is set.
9777  */
9778 int
9779 mlx5_action_handle_flush(struct rte_eth_dev *dev)
9780 {
9781 	struct rte_flow_error error;
9782 	struct mlx5_priv *priv = dev->data->dev_private;
9783 	struct mlx5_shared_action_rss *shared_rss;
9784 	int ret = 0;
9785 	uint32_t idx;
9786 
9787 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
9788 		      priv->rss_shared_actions, idx, shared_rss, next) {
9789 		ret |= mlx5_action_handle_destroy(dev,
9790 		       (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
9791 	}
9792 	return ret;
9793 }
9794 
9795 /**
9796  * Validate existing indirect actions against current device configuration
9797  * and attach them to device resources.
9798  *
9799  * @param dev
9800  *   Pointer to Ethernet device.
9801  *
9802  * @return
9803  *   0 on success, a negative errno value otherwise and rte_errno is set.
9804  */
9805 int
9806 mlx5_action_handle_attach(struct rte_eth_dev *dev)
9807 {
9808 	struct mlx5_priv *priv = dev->data->dev_private;
9809 	int ret = 0;
9810 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9811 
9812 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9813 		const char *message;
9814 		uint32_t queue_idx;
9815 
9816 		ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
9817 					       ind_tbl->queues_n,
9818 					       &message, &queue_idx);
9819 		if (ret != 0) {
9820 			DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
9821 				dev->data->port_id, ind_tbl->queues[queue_idx],
9822 				message);
9823 			break;
9824 		}
9825 	}
9826 	if (ret != 0)
9827 		return ret;
9828 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9829 		ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
9830 		if (ret != 0) {
9831 			DRV_LOG(ERR, "Port %u could not attach "
9832 				"indirection table obj %p",
9833 				dev->data->port_id, (void *)ind_tbl);
9834 			goto error;
9835 		}
9836 	}
9837 
9838 	return 0;
9839 error:
9840 	ind_tbl_last = ind_tbl;
9841 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9842 		if (ind_tbl == ind_tbl_last)
9843 			break;
9844 		if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
9845 			DRV_LOG(CRIT, "Port %u could not detach "
9846 				"indirection table obj %p on rollback",
9847 				dev->data->port_id, (void *)ind_tbl);
9848 	}
9849 	return ret;
9850 }
9851 
9852 /**
9853  * Detach indirect actions of the device from its resources.
9854  *
9855  * @param dev
9856  *   Pointer to Ethernet device.
9857  *
9858  * @return
9859  *   0 on success, a negative errno value otherwise and rte_errno is set.
9860  */
9861 int
9862 mlx5_action_handle_detach(struct rte_eth_dev *dev)
9863 {
9864 	struct mlx5_priv *priv = dev->data->dev_private;
9865 	int ret = 0;
9866 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9867 
9868 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9869 		ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
9870 		if (ret != 0) {
9871 			DRV_LOG(ERR, "Port %u could not detach "
9872 				"indirection table obj %p",
9873 				dev->data->port_id, (void *)ind_tbl);
9874 			goto error;
9875 		}
9876 	}
9877 	return 0;
9878 error:
9879 	ind_tbl_last = ind_tbl;
9880 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9881 		if (ind_tbl == ind_tbl_last)
9882 			break;
9883 		if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
9884 			DRV_LOG(CRIT, "Port %u could not attach "
9885 				"indirection table obj %p on rollback",
9886 				dev->data->port_id, (void *)ind_tbl);
9887 	}
9888 	return ret;
9889 }
9890 
9891 #ifndef HAVE_MLX5DV_DR
9892 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
9893 #else
9894 #define MLX5_DOMAIN_SYNC_FLOW \
9895 	(MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
9896 #endif
9897 
9898 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
9899 {
9900 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
9901 	const struct mlx5_flow_driver_ops *fops;
9902 	int ret;
9903 	struct rte_flow_attr attr = { .transfer = 0 };
9904 
9905 	fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9906 	ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
9907 	if (ret > 0)
9908 		ret = -ret;
9909 	return ret;
9910 }
9911 
9912 const struct mlx5_flow_tunnel *
9913 mlx5_get_tof(const struct rte_flow_item *item,
9914 	     const struct rte_flow_action *action,
9915 	     enum mlx5_tof_rule_type *rule_type)
9916 {
9917 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
9918 		if (item->type == (typeof(item->type))
9919 				  MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
9920 			*rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
9921 			return flow_items_to_tunnel(item);
9922 		}
9923 	}
9924 	for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
9925 		if (action->type == (typeof(action->type))
9926 				    MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
9927 			*rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
9928 			return flow_actions_to_tunnel(action);
9929 		}
9930 	}
9931 	return NULL;
9932 }
9933 
9934 /**
9935  * tunnel offload functionality is defined for DV environment only
9936  */
9937 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9938 __extension__
9939 union tunnel_offload_mark {
9940 	uint32_t val;
9941 	struct {
9942 		uint32_t app_reserve:8;
9943 		uint32_t table_id:15;
9944 		uint32_t transfer:1;
9945 		uint32_t _unused_:8;
9946 	};
9947 };
9948 
9949 static bool
9950 mlx5_access_tunnel_offload_db
9951 	(struct rte_eth_dev *dev,
9952 	 bool (*match)(struct rte_eth_dev *,
9953 		       struct mlx5_flow_tunnel *, const void *),
9954 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
9955 	 void (*miss)(struct rte_eth_dev *, void *),
9956 	 void *ctx, bool lock_op);
9957 
9958 static int
9959 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
9960 			     struct rte_flow *flow,
9961 			     const struct rte_flow_attr *attr,
9962 			     const struct rte_flow_action *app_actions,
9963 			     uint32_t flow_idx,
9964 			     const struct mlx5_flow_tunnel *tunnel,
9965 			     struct tunnel_default_miss_ctx *ctx,
9966 			     struct rte_flow_error *error)
9967 {
9968 	struct mlx5_priv *priv = dev->data->dev_private;
9969 	struct mlx5_flow *dev_flow;
9970 	struct rte_flow_attr miss_attr = *attr;
9971 	const struct rte_flow_item miss_items[2] = {
9972 		{
9973 			.type = RTE_FLOW_ITEM_TYPE_ETH,
9974 			.spec = NULL,
9975 			.last = NULL,
9976 			.mask = NULL
9977 		},
9978 		{
9979 			.type = RTE_FLOW_ITEM_TYPE_END,
9980 			.spec = NULL,
9981 			.last = NULL,
9982 			.mask = NULL
9983 		}
9984 	};
9985 	union tunnel_offload_mark mark_id;
9986 	struct rte_flow_action_mark miss_mark;
9987 	struct rte_flow_action miss_actions[3] = {
9988 		[0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
9989 		[2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
9990 	};
9991 	const struct rte_flow_action_jump *jump_data;
9992 	uint32_t i, flow_table = 0; /* prevent compilation warning */
9993 	struct flow_grp_info grp_info = {
9994 		.external = 1,
9995 		.transfer = attr->transfer,
9996 		.fdb_def_rule = !!priv->fdb_def_rule,
9997 		.std_tbl_fix = 0,
9998 	};
9999 	int ret;
10000 
10001 	if (!attr->transfer) {
10002 		uint32_t q_size;
10003 
10004 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
10005 		q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
10006 		ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
10007 					 0, SOCKET_ID_ANY);
10008 		if (!ctx->queue)
10009 			return rte_flow_error_set
10010 				(error, ENOMEM,
10011 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10012 				NULL, "invalid default miss RSS");
10013 		ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
10014 		ctx->action_rss.level = 0,
10015 		ctx->action_rss.types = priv->rss_conf.rss_hf,
10016 		ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
10017 		ctx->action_rss.queue_num = priv->reta_idx_n,
10018 		ctx->action_rss.key = priv->rss_conf.rss_key,
10019 		ctx->action_rss.queue = ctx->queue;
10020 		if (!priv->reta_idx_n || !priv->rxqs_n)
10021 			return rte_flow_error_set
10022 				(error, EINVAL,
10023 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10024 				NULL, "invalid port configuration");
10025 		if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
10026 			ctx->action_rss.types = 0;
10027 		for (i = 0; i != priv->reta_idx_n; ++i)
10028 			ctx->queue[i] = (*priv->reta_idx)[i];
10029 	} else {
10030 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
10031 		ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
10032 	}
10033 	miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
10034 	for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
10035 	jump_data = app_actions->conf;
10036 	miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
10037 	miss_attr.group = jump_data->group;
10038 	ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
10039 				       &flow_table, &grp_info, error);
10040 	if (ret)
10041 		return rte_flow_error_set(error, EINVAL,
10042 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10043 					  NULL, "invalid tunnel id");
10044 	mark_id.app_reserve = 0;
10045 	mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
10046 	mark_id.transfer = !!attr->transfer;
10047 	mark_id._unused_ = 0;
10048 	miss_mark.id = mark_id.val;
10049 	dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
10050 				    miss_items, miss_actions, flow_idx, error);
10051 	if (!dev_flow)
10052 		return -rte_errno;
10053 	dev_flow->flow = flow;
10054 	dev_flow->external = true;
10055 	dev_flow->tunnel = tunnel;
10056 	dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
10057 	/* Subflow object was created, we must include one in the list. */
10058 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
10059 		      dev_flow->handle, next);
10060 	DRV_LOG(DEBUG,
10061 		"port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
10062 		dev->data->port_id, tunnel->app_tunnel.type,
10063 		tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
10064 	ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
10065 				  miss_actions, error);
10066 	if (!ret)
10067 		ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
10068 						  error);
10069 
10070 	return ret;
10071 }
10072 
10073 static const struct mlx5_flow_tbl_data_entry  *
10074 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
10075 {
10076 	struct mlx5_priv *priv = dev->data->dev_private;
10077 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10078 	struct mlx5_list_entry *he;
10079 	union tunnel_offload_mark mbits = { .val = mark };
10080 	union mlx5_flow_tbl_key table_key = {
10081 		{
10082 			.level = tunnel_id_to_flow_tbl(mbits.table_id),
10083 			.id = 0,
10084 			.reserved = 0,
10085 			.dummy = 0,
10086 			.is_fdb = !!mbits.transfer,
10087 			.is_egress = 0,
10088 		}
10089 	};
10090 	struct mlx5_flow_cb_ctx ctx = {
10091 		.data = &table_key.v64,
10092 	};
10093 
10094 	he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
10095 	return he ?
10096 	       container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
10097 }
10098 
10099 static void
10100 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
10101 				   struct mlx5_list_entry *entry)
10102 {
10103 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10104 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10105 
10106 	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10107 			tunnel_flow_tbl_to_id(tte->flow_table));
10108 	mlx5_free(tte);
10109 }
10110 
10111 static int
10112 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
10113 				  struct mlx5_list_entry *entry, void *cb_ctx)
10114 {
10115 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10116 	union tunnel_tbl_key tbl = {
10117 		.val = *(uint64_t *)(ctx->data),
10118 	};
10119 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10120 
10121 	return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
10122 }
10123 
10124 static struct mlx5_list_entry *
10125 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
10126 {
10127 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10128 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10129 	struct tunnel_tbl_entry *tte;
10130 	union tunnel_tbl_key tbl = {
10131 		.val = *(uint64_t *)(ctx->data),
10132 	};
10133 
10134 	tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
10135 			  sizeof(*tte), 0,
10136 			  SOCKET_ID_ANY);
10137 	if (!tte)
10138 		goto err;
10139 	mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10140 			  &tte->flow_table);
10141 	if (tte->flow_table >= MLX5_MAX_TABLES) {
10142 		DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
10143 			tte->flow_table);
10144 		mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10145 				tte->flow_table);
10146 		goto err;
10147 	} else if (!tte->flow_table) {
10148 		goto err;
10149 	}
10150 	tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
10151 	tte->tunnel_id = tbl.tunnel_id;
10152 	tte->group = tbl.group;
10153 	return &tte->hash;
10154 err:
10155 	if (tte)
10156 		mlx5_free(tte);
10157 	return NULL;
10158 }
10159 
10160 static struct mlx5_list_entry *
10161 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
10162 				  struct mlx5_list_entry *oentry,
10163 				  void *cb_ctx __rte_unused)
10164 {
10165 	struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
10166 						   0, SOCKET_ID_ANY);
10167 
10168 	if (!tte)
10169 		return NULL;
10170 	memcpy(tte, oentry, sizeof(*tte));
10171 	return &tte->hash;
10172 }
10173 
10174 static void
10175 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
10176 				       struct mlx5_list_entry *entry)
10177 {
10178 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10179 
10180 	mlx5_free(tte);
10181 }
10182 
10183 static uint32_t
10184 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
10185 				const struct mlx5_flow_tunnel *tunnel,
10186 				uint32_t group, uint32_t *table,
10187 				struct rte_flow_error *error)
10188 {
10189 	struct mlx5_list_entry *he;
10190 	struct tunnel_tbl_entry *tte;
10191 	union tunnel_tbl_key key = {
10192 		.tunnel_id = tunnel ? tunnel->tunnel_id : 0,
10193 		.group = group
10194 	};
10195 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10196 	struct mlx5_hlist *group_hash;
10197 	struct mlx5_flow_cb_ctx ctx = {
10198 		.data = &key.val,
10199 	};
10200 
10201 	group_hash = tunnel ? tunnel->groups : thub->groups;
10202 	he = mlx5_hlist_register(group_hash, key.val, &ctx);
10203 	if (!he)
10204 		return rte_flow_error_set(error, EINVAL,
10205 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
10206 					  NULL,
10207 					  "tunnel group index not supported");
10208 	tte = container_of(he, typeof(*tte), hash);
10209 	*table = tte->flow_table;
10210 	DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
10211 		dev->data->port_id, key.tunnel_id, group, *table);
10212 	return 0;
10213 }
10214 
10215 static void
10216 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
10217 		      struct mlx5_flow_tunnel *tunnel)
10218 {
10219 	struct mlx5_priv *priv = dev->data->dev_private;
10220 	struct mlx5_indexed_pool *ipool;
10221 
10222 	DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
10223 		dev->data->port_id, tunnel->tunnel_id);
10224 	LIST_REMOVE(tunnel, chain);
10225 	mlx5_hlist_destroy(tunnel->groups);
10226 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10227 	mlx5_ipool_free(ipool, tunnel->tunnel_id);
10228 }
10229 
10230 static bool
10231 mlx5_access_tunnel_offload_db
10232 	(struct rte_eth_dev *dev,
10233 	 bool (*match)(struct rte_eth_dev *,
10234 		       struct mlx5_flow_tunnel *, const void *),
10235 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
10236 	 void (*miss)(struct rte_eth_dev *, void *),
10237 	 void *ctx, bool lock_op)
10238 {
10239 	bool verdict = false;
10240 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10241 	struct mlx5_flow_tunnel *tunnel;
10242 
10243 	rte_spinlock_lock(&thub->sl);
10244 	LIST_FOREACH(tunnel, &thub->tunnels, chain) {
10245 		verdict = match(dev, tunnel, (const void *)ctx);
10246 		if (verdict)
10247 			break;
10248 	}
10249 	if (!lock_op)
10250 		rte_spinlock_unlock(&thub->sl);
10251 	if (verdict && hit)
10252 		hit(dev, tunnel, ctx);
10253 	if (!verdict && miss)
10254 		miss(dev, ctx);
10255 	if (lock_op)
10256 		rte_spinlock_unlock(&thub->sl);
10257 
10258 	return verdict;
10259 }
10260 
10261 struct tunnel_db_find_tunnel_id_ctx {
10262 	uint32_t tunnel_id;
10263 	struct mlx5_flow_tunnel *tunnel;
10264 };
10265 
10266 static bool
10267 find_tunnel_id_match(struct rte_eth_dev *dev,
10268 		     struct mlx5_flow_tunnel *tunnel, const void *x)
10269 {
10270 	const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10271 
10272 	RTE_SET_USED(dev);
10273 	return tunnel->tunnel_id == ctx->tunnel_id;
10274 }
10275 
10276 static void
10277 find_tunnel_id_hit(struct rte_eth_dev *dev,
10278 		   struct mlx5_flow_tunnel *tunnel, void *x)
10279 {
10280 	struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10281 	RTE_SET_USED(dev);
10282 	ctx->tunnel = tunnel;
10283 }
10284 
10285 static struct mlx5_flow_tunnel *
10286 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
10287 {
10288 	struct tunnel_db_find_tunnel_id_ctx ctx = {
10289 		.tunnel_id = id,
10290 	};
10291 
10292 	mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
10293 				      find_tunnel_id_hit, NULL, &ctx, true);
10294 
10295 	return ctx.tunnel;
10296 }
10297 
10298 static struct mlx5_flow_tunnel *
10299 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
10300 			  const struct rte_flow_tunnel *app_tunnel)
10301 {
10302 	struct mlx5_priv *priv = dev->data->dev_private;
10303 	struct mlx5_indexed_pool *ipool;
10304 	struct mlx5_flow_tunnel *tunnel;
10305 	uint32_t id;
10306 
10307 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10308 	tunnel = mlx5_ipool_zmalloc(ipool, &id);
10309 	if (!tunnel)
10310 		return NULL;
10311 	if (id >= MLX5_MAX_TUNNELS) {
10312 		mlx5_ipool_free(ipool, id);
10313 		DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
10314 		return NULL;
10315 	}
10316 	tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
10317 					   priv->sh,
10318 					   mlx5_flow_tunnel_grp2tbl_create_cb,
10319 					   mlx5_flow_tunnel_grp2tbl_match_cb,
10320 					   mlx5_flow_tunnel_grp2tbl_remove_cb,
10321 					   mlx5_flow_tunnel_grp2tbl_clone_cb,
10322 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10323 	if (!tunnel->groups) {
10324 		mlx5_ipool_free(ipool, id);
10325 		return NULL;
10326 	}
10327 	/* initiate new PMD tunnel */
10328 	memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
10329 	tunnel->tunnel_id = id;
10330 	tunnel->action.type = (typeof(tunnel->action.type))
10331 			      MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
10332 	tunnel->action.conf = tunnel;
10333 	tunnel->item.type = (typeof(tunnel->item.type))
10334 			    MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
10335 	tunnel->item.spec = tunnel;
10336 	tunnel->item.last = NULL;
10337 	tunnel->item.mask = NULL;
10338 
10339 	DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
10340 		dev->data->port_id, tunnel->tunnel_id);
10341 
10342 	return tunnel;
10343 }
10344 
10345 struct tunnel_db_get_tunnel_ctx {
10346 	const struct rte_flow_tunnel *app_tunnel;
10347 	struct mlx5_flow_tunnel *tunnel;
10348 };
10349 
10350 static bool get_tunnel_match(struct rte_eth_dev *dev,
10351 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10352 {
10353 	const struct tunnel_db_get_tunnel_ctx *ctx = x;
10354 
10355 	RTE_SET_USED(dev);
10356 	return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
10357 		       sizeof(*ctx->app_tunnel));
10358 }
10359 
10360 static void get_tunnel_hit(struct rte_eth_dev *dev,
10361 			   struct mlx5_flow_tunnel *tunnel, void *x)
10362 {
10363 	/* called under tunnel spinlock protection */
10364 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10365 
10366 	RTE_SET_USED(dev);
10367 	tunnel->refctn++;
10368 	ctx->tunnel = tunnel;
10369 }
10370 
10371 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
10372 {
10373 	/* called under tunnel spinlock protection */
10374 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10375 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10376 
10377 	rte_spinlock_unlock(&thub->sl);
10378 	ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
10379 	rte_spinlock_lock(&thub->sl);
10380 	if (ctx->tunnel) {
10381 		ctx->tunnel->refctn = 1;
10382 		LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
10383 	}
10384 }
10385 
10386 
10387 static int
10388 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
10389 		     const struct rte_flow_tunnel *app_tunnel,
10390 		     struct mlx5_flow_tunnel **tunnel)
10391 {
10392 	struct tunnel_db_get_tunnel_ctx ctx = {
10393 		.app_tunnel = app_tunnel,
10394 	};
10395 
10396 	mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
10397 				      get_tunnel_miss, &ctx, true);
10398 	*tunnel = ctx.tunnel;
10399 	return ctx.tunnel ? 0 : -ENOMEM;
10400 }
10401 
10402 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
10403 {
10404 	struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
10405 
10406 	if (!thub)
10407 		return;
10408 	if (!LIST_EMPTY(&thub->tunnels))
10409 		DRV_LOG(WARNING, "port %u tunnels present", port_id);
10410 	mlx5_hlist_destroy(thub->groups);
10411 	mlx5_free(thub);
10412 }
10413 
10414 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
10415 {
10416 	int err;
10417 	struct mlx5_flow_tunnel_hub *thub;
10418 
10419 	thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
10420 			   0, SOCKET_ID_ANY);
10421 	if (!thub)
10422 		return -ENOMEM;
10423 	LIST_INIT(&thub->tunnels);
10424 	rte_spinlock_init(&thub->sl);
10425 	thub->groups = mlx5_hlist_create("flow groups", 64,
10426 					 false, true, sh,
10427 					 mlx5_flow_tunnel_grp2tbl_create_cb,
10428 					 mlx5_flow_tunnel_grp2tbl_match_cb,
10429 					 mlx5_flow_tunnel_grp2tbl_remove_cb,
10430 					 mlx5_flow_tunnel_grp2tbl_clone_cb,
10431 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10432 	if (!thub->groups) {
10433 		err = -rte_errno;
10434 		goto err;
10435 	}
10436 	sh->tunnel_hub = thub;
10437 
10438 	return 0;
10439 
10440 err:
10441 	if (thub->groups)
10442 		mlx5_hlist_destroy(thub->groups);
10443 	if (thub)
10444 		mlx5_free(thub);
10445 	return err;
10446 }
10447 
10448 static inline int
10449 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
10450 			  struct rte_flow_tunnel *tunnel,
10451 			  struct rte_flow_error *error)
10452 {
10453 	struct mlx5_priv *priv = dev->data->dev_private;
10454 
10455 	if (!priv->sh->config.dv_flow_en)
10456 		return rte_flow_error_set(error, ENOTSUP,
10457 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10458 					  "flow DV interface is off");
10459 	if (!is_tunnel_offload_active(dev))
10460 		return rte_flow_error_set(error, ENOTSUP,
10461 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10462 					  "tunnel offload was not activated");
10463 	if (!tunnel)
10464 		return rte_flow_error_set(error, EINVAL,
10465 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10466 					  "no application tunnel");
10467 	switch (tunnel->type) {
10468 	default:
10469 		return rte_flow_error_set(error, EINVAL,
10470 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10471 					  "unsupported tunnel type");
10472 	case RTE_FLOW_ITEM_TYPE_VXLAN:
10473 	case RTE_FLOW_ITEM_TYPE_GRE:
10474 	case RTE_FLOW_ITEM_TYPE_NVGRE:
10475 	case RTE_FLOW_ITEM_TYPE_GENEVE:
10476 		break;
10477 	}
10478 	return 0;
10479 }
10480 
10481 static int
10482 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
10483 		    struct rte_flow_tunnel *app_tunnel,
10484 		    struct rte_flow_action **actions,
10485 		    uint32_t *num_of_actions,
10486 		    struct rte_flow_error *error)
10487 {
10488 	struct mlx5_flow_tunnel *tunnel;
10489 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10490 
10491 	if (ret)
10492 		return ret;
10493 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10494 	if (ret < 0) {
10495 		return rte_flow_error_set(error, ret,
10496 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10497 					  "failed to initialize pmd tunnel");
10498 	}
10499 	*actions = &tunnel->action;
10500 	*num_of_actions = 1;
10501 	return 0;
10502 }
10503 
10504 static int
10505 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
10506 		       struct rte_flow_tunnel *app_tunnel,
10507 		       struct rte_flow_item **items,
10508 		       uint32_t *num_of_items,
10509 		       struct rte_flow_error *error)
10510 {
10511 	struct mlx5_flow_tunnel *tunnel;
10512 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10513 
10514 	if (ret)
10515 		return ret;
10516 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10517 	if (ret < 0) {
10518 		return rte_flow_error_set(error, ret,
10519 					  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10520 					  "failed to initialize pmd tunnel");
10521 	}
10522 	*items = &tunnel->item;
10523 	*num_of_items = 1;
10524 	return 0;
10525 }
10526 
10527 struct tunnel_db_element_release_ctx {
10528 	struct rte_flow_item *items;
10529 	struct rte_flow_action *actions;
10530 	uint32_t num_elements;
10531 	struct rte_flow_error *error;
10532 	int ret;
10533 };
10534 
10535 static bool
10536 tunnel_element_release_match(struct rte_eth_dev *dev,
10537 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10538 {
10539 	const struct tunnel_db_element_release_ctx *ctx = x;
10540 
10541 	RTE_SET_USED(dev);
10542 	if (ctx->num_elements != 1)
10543 		return false;
10544 	else if (ctx->items)
10545 		return ctx->items == &tunnel->item;
10546 	else if (ctx->actions)
10547 		return ctx->actions == &tunnel->action;
10548 
10549 	return false;
10550 }
10551 
10552 static void
10553 tunnel_element_release_hit(struct rte_eth_dev *dev,
10554 			   struct mlx5_flow_tunnel *tunnel, void *x)
10555 {
10556 	struct tunnel_db_element_release_ctx *ctx = x;
10557 	ctx->ret = 0;
10558 	if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
10559 		mlx5_flow_tunnel_free(dev, tunnel);
10560 }
10561 
10562 static void
10563 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
10564 {
10565 	struct tunnel_db_element_release_ctx *ctx = x;
10566 	RTE_SET_USED(dev);
10567 	ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
10568 				      RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10569 				      "invalid argument");
10570 }
10571 
10572 static int
10573 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
10574 		       struct rte_flow_item *pmd_items,
10575 		       uint32_t num_items, struct rte_flow_error *err)
10576 {
10577 	struct tunnel_db_element_release_ctx ctx = {
10578 		.items = pmd_items,
10579 		.actions = NULL,
10580 		.num_elements = num_items,
10581 		.error = err,
10582 	};
10583 
10584 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10585 				      tunnel_element_release_hit,
10586 				      tunnel_element_release_miss, &ctx, false);
10587 
10588 	return ctx.ret;
10589 }
10590 
10591 static int
10592 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
10593 			 struct rte_flow_action *pmd_actions,
10594 			 uint32_t num_actions, struct rte_flow_error *err)
10595 {
10596 	struct tunnel_db_element_release_ctx ctx = {
10597 		.items = NULL,
10598 		.actions = pmd_actions,
10599 		.num_elements = num_actions,
10600 		.error = err,
10601 	};
10602 
10603 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10604 				      tunnel_element_release_hit,
10605 				      tunnel_element_release_miss, &ctx, false);
10606 
10607 	return ctx.ret;
10608 }
10609 
10610 static int
10611 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
10612 				  struct rte_mbuf *m,
10613 				  struct rte_flow_restore_info *info,
10614 				  struct rte_flow_error *err)
10615 {
10616 	uint64_t ol_flags = m->ol_flags;
10617 	const struct mlx5_flow_tbl_data_entry *tble;
10618 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
10619 
10620 	if (!is_tunnel_offload_active(dev)) {
10621 		info->flags = 0;
10622 		return 0;
10623 	}
10624 
10625 	if ((ol_flags & mask) != mask)
10626 		goto err;
10627 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
10628 	if (!tble) {
10629 		DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
10630 			dev->data->port_id, m->hash.fdir.hi);
10631 		goto err;
10632 	}
10633 	MLX5_ASSERT(tble->tunnel);
10634 	memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
10635 	info->group_id = tble->group_id;
10636 	info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
10637 		      RTE_FLOW_RESTORE_INFO_GROUP_ID |
10638 		      RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
10639 
10640 	return 0;
10641 
10642 err:
10643 	return rte_flow_error_set(err, EINVAL,
10644 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10645 				  "failed to get restore info");
10646 }
10647 
10648 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
10649 static int
10650 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
10651 			   __rte_unused struct rte_flow_tunnel *app_tunnel,
10652 			   __rte_unused struct rte_flow_action **actions,
10653 			   __rte_unused uint32_t *num_of_actions,
10654 			   __rte_unused struct rte_flow_error *error)
10655 {
10656 	return -ENOTSUP;
10657 }
10658 
10659 static int
10660 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
10661 		       __rte_unused struct rte_flow_tunnel *app_tunnel,
10662 		       __rte_unused struct rte_flow_item **items,
10663 		       __rte_unused uint32_t *num_of_items,
10664 		       __rte_unused struct rte_flow_error *error)
10665 {
10666 	return -ENOTSUP;
10667 }
10668 
10669 static int
10670 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
10671 			      __rte_unused struct rte_flow_item *pmd_items,
10672 			      __rte_unused uint32_t num_items,
10673 			      __rte_unused struct rte_flow_error *err)
10674 {
10675 	return -ENOTSUP;
10676 }
10677 
10678 static int
10679 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
10680 				__rte_unused struct rte_flow_action *pmd_action,
10681 				__rte_unused uint32_t num_actions,
10682 				__rte_unused struct rte_flow_error *err)
10683 {
10684 	return -ENOTSUP;
10685 }
10686 
10687 static int
10688 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
10689 				  __rte_unused struct rte_mbuf *m,
10690 				  __rte_unused struct rte_flow_restore_info *i,
10691 				  __rte_unused struct rte_flow_error *err)
10692 {
10693 	return -ENOTSUP;
10694 }
10695 
10696 static int
10697 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
10698 			     __rte_unused struct rte_flow *flow,
10699 			     __rte_unused const struct rte_flow_attr *attr,
10700 			     __rte_unused const struct rte_flow_action *actions,
10701 			     __rte_unused uint32_t flow_idx,
10702 			     __rte_unused const struct mlx5_flow_tunnel *tunnel,
10703 			     __rte_unused struct tunnel_default_miss_ctx *ctx,
10704 			     __rte_unused struct rte_flow_error *error)
10705 {
10706 	return -ENOTSUP;
10707 }
10708 
10709 static struct mlx5_flow_tunnel *
10710 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
10711 		    __rte_unused uint32_t id)
10712 {
10713 	return NULL;
10714 }
10715 
10716 static void
10717 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
10718 		      __rte_unused struct mlx5_flow_tunnel *tunnel)
10719 {
10720 }
10721 
10722 static uint32_t
10723 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
10724 				__rte_unused const struct mlx5_flow_tunnel *t,
10725 				__rte_unused uint32_t group,
10726 				__rte_unused uint32_t *table,
10727 				struct rte_flow_error *error)
10728 {
10729 	return rte_flow_error_set(error, ENOTSUP,
10730 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10731 				  "tunnel offload requires DV support");
10732 }
10733 
10734 void
10735 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
10736 			__rte_unused  uint16_t port_id)
10737 {
10738 }
10739 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
10740 
10741 /* Flex flow item API */
10742 static struct rte_flow_item_flex_handle *
10743 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
10744 			   const struct rte_flow_item_flex_conf *conf,
10745 			   struct rte_flow_error *error)
10746 {
10747 	static const char err_msg[] = "flex item creation unsupported";
10748 	struct mlx5_priv *priv = dev->data->dev_private;
10749 	struct rte_flow_attr attr = { .transfer = 0 };
10750 	const struct mlx5_flow_driver_ops *fops =
10751 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10752 
10753 	if (!priv->pci_dev) {
10754 		rte_flow_error_set(error, ENOTSUP,
10755 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10756 				   "create flex item on PF only");
10757 		return NULL;
10758 	}
10759 	switch (priv->pci_dev->id.device_id) {
10760 	case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF:
10761 	case PCI_DEVICE_ID_MELLANOX_CONNECTX7BF:
10762 		break;
10763 	default:
10764 		rte_flow_error_set(error, ENOTSUP,
10765 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10766 				   "flex item available on BlueField ports only");
10767 		return NULL;
10768 	}
10769 	if (!fops->item_create) {
10770 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10771 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10772 				   NULL, err_msg);
10773 		return NULL;
10774 	}
10775 	return fops->item_create(dev, conf, error);
10776 }
10777 
10778 static int
10779 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
10780 			    const struct rte_flow_item_flex_handle *handle,
10781 			    struct rte_flow_error *error)
10782 {
10783 	static const char err_msg[] = "flex item release unsupported";
10784 	struct rte_flow_attr attr = { .transfer = 0 };
10785 	const struct mlx5_flow_driver_ops *fops =
10786 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10787 
10788 	if (!fops->item_release) {
10789 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10790 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10791 				   NULL, err_msg);
10792 		return -rte_errno;
10793 	}
10794 	return fops->item_release(dev, handle, error);
10795 }
10796 
10797 static void
10798 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
10799 {
10800 	int ret;
10801 	struct rte_flow_error error;
10802 
10803 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10804 		char *item_name;
10805 		ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
10806 				    sizeof(item_name),
10807 				    (void *)(uintptr_t)item->type, &error);
10808 		if (ret > 0)
10809 			printf("%s ", item_name);
10810 		else
10811 			printf("%d\n", (int)item->type);
10812 	}
10813 	printf("END\n");
10814 }
10815 
10816 static int
10817 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
10818 {
10819 	const struct rte_flow_item_udp *spec = udp_item->spec;
10820 	const struct rte_flow_item_udp *mask = udp_item->mask;
10821 	uint16_t udp_dport = 0;
10822 
10823 	if (spec != NULL) {
10824 		if (!mask)
10825 			mask = &rte_flow_item_udp_mask;
10826 		udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
10827 				mask->hdr.dst_port);
10828 	}
10829 	return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
10830 }
10831 
10832 static const struct mlx5_flow_expand_node *
10833 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
10834 		unsigned int item_idx,
10835 		const struct mlx5_flow_expand_node graph[],
10836 		const struct mlx5_flow_expand_node *node)
10837 {
10838 	const struct rte_flow_item *item = pattern + item_idx, *prev_item;
10839 
10840 	if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
10841 			node != NULL &&
10842 			node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
10843 		/*
10844 		 * The expansion node is VXLAN and it is also the last
10845 		 * expandable item in the pattern, so need to continue
10846 		 * expansion of the inner tunnel.
10847 		 */
10848 		MLX5_ASSERT(item_idx > 0);
10849 		prev_item = pattern + item_idx - 1;
10850 		MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
10851 		if (mlx5_flow_is_std_vxlan_port(prev_item))
10852 			return &graph[MLX5_EXPANSION_STD_VXLAN];
10853 		return &graph[MLX5_EXPANSION_L3_VXLAN];
10854 	}
10855 	return node;
10856 }
10857 
10858 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
10859 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
10860 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
10861 };
10862 
10863 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
10864 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
10865 	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
10866 	{ 9, 10, 11 }, { 12, 13, 14 },
10867 };
10868 
10869 /**
10870  * Discover the number of available flow priorities.
10871  *
10872  * @param dev
10873  *   Ethernet device.
10874  *
10875  * @return
10876  *   On success, number of available flow priorities.
10877  *   On failure, a negative errno-style code and rte_errno is set.
10878  */
10879 int
10880 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
10881 {
10882 	static const uint16_t vprio[] = {8, 16};
10883 	const struct mlx5_priv *priv = dev->data->dev_private;
10884 	const struct mlx5_flow_driver_ops *fops;
10885 	enum mlx5_flow_drv_type type;
10886 	int ret;
10887 
10888 	type = mlx5_flow_os_get_type();
10889 	if (type == MLX5_FLOW_TYPE_MAX) {
10890 		type = MLX5_FLOW_TYPE_VERBS;
10891 		if (priv->sh->cdev->config.devx && priv->sh->config.dv_flow_en)
10892 			type = MLX5_FLOW_TYPE_DV;
10893 	}
10894 	fops = flow_get_drv_ops(type);
10895 	if (fops->discover_priorities == NULL) {
10896 		DRV_LOG(ERR, "Priority discovery not supported");
10897 		rte_errno = ENOTSUP;
10898 		return -rte_errno;
10899 	}
10900 	ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
10901 	if (ret < 0)
10902 		return ret;
10903 	switch (ret) {
10904 	case 8:
10905 		ret = RTE_DIM(priority_map_3);
10906 		break;
10907 	case 16:
10908 		ret = RTE_DIM(priority_map_5);
10909 		break;
10910 	default:
10911 		rte_errno = ENOTSUP;
10912 		DRV_LOG(ERR,
10913 			"port %u maximum priority: %d expected 8/16",
10914 			dev->data->port_id, ret);
10915 		return -rte_errno;
10916 	}
10917 	DRV_LOG(INFO, "port %u supported flow priorities:"
10918 		" 0-%d for ingress or egress root table,"
10919 		" 0-%d for non-root table or transfer root table.",
10920 		dev->data->port_id, ret - 2,
10921 		MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
10922 	return ret;
10923 }
10924 
10925 /**
10926  * Adjust flow priority based on the highest layer and the request priority.
10927  *
10928  * @param[in] dev
10929  *   Pointer to the Ethernet device structure.
10930  * @param[in] priority
10931  *   The rule base priority.
10932  * @param[in] subpriority
10933  *   The priority based on the items.
10934  *
10935  * @return
10936  *   The new priority.
10937  */
10938 uint32_t
10939 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
10940 			  uint32_t subpriority)
10941 {
10942 	uint32_t res = 0;
10943 	struct mlx5_priv *priv = dev->data->dev_private;
10944 
10945 	switch (priv->sh->flow_max_priority) {
10946 	case RTE_DIM(priority_map_3):
10947 		res = priority_map_3[priority][subpriority];
10948 		break;
10949 	case RTE_DIM(priority_map_5):
10950 		res = priority_map_5[priority][subpriority];
10951 		break;
10952 	}
10953 	return  res;
10954 }
10955