xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision 1b0037b1cb287cd4cc215b2d0fc974e5188a6959)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11 
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21 
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26 
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35 
36 struct tunnel_default_miss_ctx {
37 	uint16_t *queue;
38 	__extension__
39 	union {
40 		struct rte_flow_action_rss action_rss;
41 		struct rte_flow_action_queue miss_queue;
42 		struct rte_flow_action_jump miss_jump;
43 		uint8_t raw[0];
44 	};
45 };
46 
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49 			     struct rte_flow *flow,
50 			     const struct rte_flow_attr *attr,
51 			     const struct rte_flow_action *app_actions,
52 			     uint32_t flow_idx,
53 			     const struct mlx5_flow_tunnel *tunnel,
54 			     struct tunnel_default_miss_ctx *ctx,
55 			     struct rte_flow_error *error);
56 static struct mlx5_flow_tunnel *
57 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
58 static void
59 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
60 static uint32_t
61 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
62 				const struct mlx5_flow_tunnel *tunnel,
63 				uint32_t group, uint32_t *table,
64 				struct rte_flow_error *error);
65 
66 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
67 static void mlx5_flow_pop_thread_workspace(void);
68 
69 
70 /** Device flow drivers. */
71 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
72 
73 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
74 
75 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
76 	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
77 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
78 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
79 	[MLX5_FLOW_TYPE_HW] = &mlx5_flow_hw_drv_ops,
80 #endif
81 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
82 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
83 };
84 
85 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
86 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
87 	(const int []){ \
88 		__VA_ARGS__, 0, \
89 	}
90 
91 /** Node object of input graph for mlx5_flow_expand_rss(). */
92 struct mlx5_flow_expand_node {
93 	const int *const next;
94 	/**<
95 	 * List of next node indexes. Index 0 is interpreted as a terminator.
96 	 */
97 	const enum rte_flow_item_type type;
98 	/**< Pattern item type of current node. */
99 	uint64_t rss_types;
100 	/**<
101 	 * RSS types bit-field associated with this node
102 	 * (see RTE_ETH_RSS_* definitions).
103 	 */
104 	uint64_t node_flags;
105 	/**<
106 	 *  Bit-fields that define how the node is used in the expansion.
107 	 * (see MLX5_EXPANSION_NODE_* definitions).
108 	 */
109 };
110 
111 /* Optional expand field. The expansion alg will not go deeper. */
112 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
113 
114 /* The node is not added implicitly as expansion to the flow pattern.
115  * If the node type does not match the flow pattern item type, the
116  * expansion alg will go deeper to its next items.
117  * In the current implementation, the list of next nodes indexes can
118  * have up to one node with this flag set and it has to be the last
119  * node index (before the list terminator).
120  */
121 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
122 
123 /** Object returned by mlx5_flow_expand_rss(). */
124 struct mlx5_flow_expand_rss {
125 	uint32_t entries;
126 	/**< Number of entries @p patterns and @p priorities. */
127 	struct {
128 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
129 		uint32_t priority; /**< Priority offset for each expansion. */
130 	} entry[];
131 };
132 
133 static void
134 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
135 
136 static const struct mlx5_flow_expand_node *
137 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
138 		unsigned int item_idx,
139 		const struct mlx5_flow_expand_node graph[],
140 		const struct mlx5_flow_expand_node *node);
141 
142 static bool
143 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
144 {
145 	switch (item->type) {
146 	case RTE_FLOW_ITEM_TYPE_ETH:
147 	case RTE_FLOW_ITEM_TYPE_VLAN:
148 	case RTE_FLOW_ITEM_TYPE_IPV4:
149 	case RTE_FLOW_ITEM_TYPE_IPV6:
150 	case RTE_FLOW_ITEM_TYPE_UDP:
151 	case RTE_FLOW_ITEM_TYPE_TCP:
152 	case RTE_FLOW_ITEM_TYPE_VXLAN:
153 	case RTE_FLOW_ITEM_TYPE_NVGRE:
154 	case RTE_FLOW_ITEM_TYPE_GRE:
155 	case RTE_FLOW_ITEM_TYPE_GENEVE:
156 	case RTE_FLOW_ITEM_TYPE_MPLS:
157 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
158 	case RTE_FLOW_ITEM_TYPE_GRE_KEY:
159 	case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
160 	case RTE_FLOW_ITEM_TYPE_GTP:
161 		return true;
162 	default:
163 		break;
164 	}
165 	return false;
166 }
167 
168 /**
169  * Network Service Header (NSH) and its next protocol values
170  * are described in RFC-8393.
171  */
172 static enum rte_flow_item_type
173 mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
174 {
175 	enum rte_flow_item_type type;
176 
177 	switch (proto_mask & proto_spec) {
178 	case 0:
179 		type = RTE_FLOW_ITEM_TYPE_VOID;
180 		break;
181 	case RTE_VXLAN_GPE_TYPE_IPV4:
182 		type = RTE_FLOW_ITEM_TYPE_IPV4;
183 		break;
184 	case RTE_VXLAN_GPE_TYPE_IPV6:
185 		type = RTE_VXLAN_GPE_TYPE_IPV6;
186 		break;
187 	case RTE_VXLAN_GPE_TYPE_ETH:
188 		type = RTE_FLOW_ITEM_TYPE_ETH;
189 		break;
190 	default:
191 		type = RTE_FLOW_ITEM_TYPE_END;
192 	}
193 	return type;
194 }
195 
196 static enum rte_flow_item_type
197 mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
198 {
199 	enum rte_flow_item_type type;
200 
201 	switch (proto_mask & proto_spec) {
202 	case 0:
203 		type = RTE_FLOW_ITEM_TYPE_VOID;
204 		break;
205 	case IPPROTO_UDP:
206 		type = RTE_FLOW_ITEM_TYPE_UDP;
207 		break;
208 	case IPPROTO_TCP:
209 		type = RTE_FLOW_ITEM_TYPE_TCP;
210 		break;
211 	case IPPROTO_IPIP:
212 		type = RTE_FLOW_ITEM_TYPE_IPV4;
213 		break;
214 	case IPPROTO_IPV6:
215 		type = RTE_FLOW_ITEM_TYPE_IPV6;
216 		break;
217 	default:
218 		type = RTE_FLOW_ITEM_TYPE_END;
219 	}
220 	return type;
221 }
222 
223 static enum rte_flow_item_type
224 mlx5_ethertype_to_item_type(rte_be16_t type_spec,
225 			    rte_be16_t type_mask, bool is_tunnel)
226 {
227 	enum rte_flow_item_type type;
228 
229 	switch (rte_be_to_cpu_16(type_spec & type_mask)) {
230 	case 0:
231 		type = RTE_FLOW_ITEM_TYPE_VOID;
232 		break;
233 	case RTE_ETHER_TYPE_TEB:
234 		type = is_tunnel ?
235 		       RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END;
236 		break;
237 	case RTE_ETHER_TYPE_VLAN:
238 		type = !is_tunnel ?
239 		       RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END;
240 		break;
241 	case RTE_ETHER_TYPE_IPV4:
242 		type = RTE_FLOW_ITEM_TYPE_IPV4;
243 		break;
244 	case RTE_ETHER_TYPE_IPV6:
245 		type = RTE_FLOW_ITEM_TYPE_IPV6;
246 		break;
247 	default:
248 		type = RTE_FLOW_ITEM_TYPE_END;
249 	}
250 	return type;
251 }
252 
253 static enum rte_flow_item_type
254 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
255 {
256 #define MLX5_XSET_ITEM_MASK_SPEC(type, fld)                              \
257 	do {                                                             \
258 		const void *m = item->mask;                              \
259 		const void *s = item->spec;                              \
260 		mask = m ?                                               \
261 			((const struct rte_flow_item_##type *)m)->fld :  \
262 			rte_flow_item_##type##_mask.fld;                 \
263 		spec = ((const struct rte_flow_item_##type *)s)->fld;    \
264 	} while (0)
265 
266 	enum rte_flow_item_type ret;
267 	uint16_t spec, mask;
268 
269 	if (item == NULL || item->spec == NULL)
270 		return RTE_FLOW_ITEM_TYPE_VOID;
271 	switch (item->type) {
272 	case RTE_FLOW_ITEM_TYPE_ETH:
273 		MLX5_XSET_ITEM_MASK_SPEC(eth, type);
274 		if (!mask)
275 			return RTE_FLOW_ITEM_TYPE_VOID;
276 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
277 		break;
278 	case RTE_FLOW_ITEM_TYPE_VLAN:
279 		MLX5_XSET_ITEM_MASK_SPEC(vlan, inner_type);
280 		if (!mask)
281 			return RTE_FLOW_ITEM_TYPE_VOID;
282 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
283 		break;
284 	case RTE_FLOW_ITEM_TYPE_IPV4:
285 		MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id);
286 		if (!mask)
287 			return RTE_FLOW_ITEM_TYPE_VOID;
288 		ret = mlx5_inet_proto_to_item_type(spec, mask);
289 		break;
290 	case RTE_FLOW_ITEM_TYPE_IPV6:
291 		MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto);
292 		if (!mask)
293 			return RTE_FLOW_ITEM_TYPE_VOID;
294 		ret = mlx5_inet_proto_to_item_type(spec, mask);
295 		break;
296 	case RTE_FLOW_ITEM_TYPE_GENEVE:
297 		MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol);
298 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
299 		break;
300 	case RTE_FLOW_ITEM_TYPE_GRE:
301 		MLX5_XSET_ITEM_MASK_SPEC(gre, protocol);
302 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
303 		break;
304 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
305 		MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, protocol);
306 		ret = mlx5_nsh_proto_to_item_type(spec, mask);
307 		break;
308 	default:
309 		ret = RTE_FLOW_ITEM_TYPE_VOID;
310 		break;
311 	}
312 	return ret;
313 #undef MLX5_XSET_ITEM_MASK_SPEC
314 }
315 
316 static const int *
317 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
318 		const int *next_node)
319 {
320 	const struct mlx5_flow_expand_node *node = NULL;
321 	const int *next = next_node;
322 
323 	while (next && *next) {
324 		/*
325 		 * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
326 		 * flag set, because they were not found in the flow pattern.
327 		 */
328 		node = &graph[*next];
329 		if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
330 			break;
331 		next = node->next;
332 	}
333 	return next;
334 }
335 
336 #define MLX5_RSS_EXP_ELT_N 16
337 
338 /**
339  * Expand RSS flows into several possible flows according to the RSS hash
340  * fields requested and the driver capabilities.
341  *
342  * @param[out] buf
343  *   Buffer to store the result expansion.
344  * @param[in] size
345  *   Buffer size in bytes. If 0, @p buf can be NULL.
346  * @param[in] pattern
347  *   User flow pattern.
348  * @param[in] types
349  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
350  * @param[in] graph
351  *   Input graph to expand @p pattern according to @p types.
352  * @param[in] graph_root_index
353  *   Index of root node in @p graph, typically 0.
354  *
355  * @return
356  *   A positive value representing the size of @p buf in bytes regardless of
357  *   @p size on success, a negative errno value otherwise and rte_errno is
358  *   set, the following errors are defined:
359  *
360  *   -E2BIG: graph-depth @p graph is too deep.
361  *   -EINVAL: @p size has not enough space for expanded pattern.
362  */
363 static int
364 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
365 		     const struct rte_flow_item *pattern, uint64_t types,
366 		     const struct mlx5_flow_expand_node graph[],
367 		     int graph_root_index)
368 {
369 	const struct rte_flow_item *item;
370 	const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
371 	const int *next_node;
372 	const int *stack[MLX5_RSS_EXP_ELT_N];
373 	int stack_pos = 0;
374 	struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
375 	unsigned int i, item_idx, last_expand_item_idx = 0;
376 	size_t lsize;
377 	size_t user_pattern_size = 0;
378 	void *addr = NULL;
379 	const struct mlx5_flow_expand_node *next = NULL;
380 	struct rte_flow_item missed_item;
381 	int missed = 0;
382 	int elt = 0;
383 	const struct rte_flow_item *last_expand_item = NULL;
384 
385 	memset(&missed_item, 0, sizeof(missed_item));
386 	lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
387 		MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
388 	if (lsize > size)
389 		return -EINVAL;
390 	buf->entry[0].priority = 0;
391 	buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
392 	buf->entries = 0;
393 	addr = buf->entry[0].pattern;
394 	for (item = pattern, item_idx = 0;
395 			item->type != RTE_FLOW_ITEM_TYPE_END;
396 			item++, item_idx++) {
397 		if (!mlx5_flow_is_rss_expandable_item(item)) {
398 			user_pattern_size += sizeof(*item);
399 			continue;
400 		}
401 		last_expand_item = item;
402 		last_expand_item_idx = item_idx;
403 		i = 0;
404 		while (node->next && node->next[i]) {
405 			next = &graph[node->next[i]];
406 			if (next->type == item->type)
407 				break;
408 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
409 				node = next;
410 				i = 0;
411 			} else {
412 				++i;
413 			}
414 		}
415 		if (next)
416 			node = next;
417 		user_pattern_size += sizeof(*item);
418 	}
419 	user_pattern_size += sizeof(*item); /* Handle END item. */
420 	lsize += user_pattern_size;
421 	if (lsize > size)
422 		return -EINVAL;
423 	/* Copy the user pattern in the first entry of the buffer. */
424 	rte_memcpy(addr, pattern, user_pattern_size);
425 	addr = (void *)(((uintptr_t)addr) + user_pattern_size);
426 	buf->entries = 1;
427 	/* Start expanding. */
428 	memset(flow_items, 0, sizeof(flow_items));
429 	user_pattern_size -= sizeof(*item);
430 	/*
431 	 * Check if the last valid item has spec set, need complete pattern,
432 	 * and the pattern can be used for expansion.
433 	 */
434 	missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
435 	if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
436 		/* Item type END indicates expansion is not required. */
437 		return lsize;
438 	}
439 	if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
440 		next = NULL;
441 		missed = 1;
442 		i = 0;
443 		while (node->next && node->next[i]) {
444 			next = &graph[node->next[i]];
445 			if (next->type == missed_item.type) {
446 				flow_items[0].type = missed_item.type;
447 				flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
448 				break;
449 			}
450 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
451 				node = next;
452 				i = 0;
453 			} else {
454 				++i;
455 			}
456 			next = NULL;
457 		}
458 	}
459 	if (next && missed) {
460 		elt = 2; /* missed item + item end. */
461 		node = next;
462 		lsize += elt * sizeof(*item) + user_pattern_size;
463 		if (lsize > size)
464 			return -EINVAL;
465 		if (node->rss_types & types) {
466 			buf->entry[buf->entries].priority = 1;
467 			buf->entry[buf->entries].pattern = addr;
468 			buf->entries++;
469 			rte_memcpy(addr, buf->entry[0].pattern,
470 				   user_pattern_size);
471 			addr = (void *)(((uintptr_t)addr) + user_pattern_size);
472 			rte_memcpy(addr, flow_items, elt * sizeof(*item));
473 			addr = (void *)(((uintptr_t)addr) +
474 					elt * sizeof(*item));
475 		}
476 	} else if (last_expand_item != NULL) {
477 		node = mlx5_flow_expand_rss_adjust_node(pattern,
478 				last_expand_item_idx, graph, node);
479 	}
480 	memset(flow_items, 0, sizeof(flow_items));
481 	next_node = mlx5_flow_expand_rss_skip_explicit(graph,
482 			node->next);
483 	stack[stack_pos] = next_node;
484 	node = next_node ? &graph[*next_node] : NULL;
485 	while (node) {
486 		flow_items[stack_pos].type = node->type;
487 		if (node->rss_types & types) {
488 			size_t n;
489 			/*
490 			 * compute the number of items to copy from the
491 			 * expansion and copy it.
492 			 * When the stack_pos is 0, there are 1 element in it,
493 			 * plus the addition END item.
494 			 */
495 			elt = stack_pos + 2;
496 			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
497 			lsize += elt * sizeof(*item) + user_pattern_size;
498 			if (lsize > size)
499 				return -EINVAL;
500 			n = elt * sizeof(*item);
501 			buf->entry[buf->entries].priority =
502 				stack_pos + 1 + missed;
503 			buf->entry[buf->entries].pattern = addr;
504 			buf->entries++;
505 			rte_memcpy(addr, buf->entry[0].pattern,
506 				   user_pattern_size);
507 			addr = (void *)(((uintptr_t)addr) +
508 					user_pattern_size);
509 			rte_memcpy(addr, &missed_item,
510 				   missed * sizeof(*item));
511 			addr = (void *)(((uintptr_t)addr) +
512 				missed * sizeof(*item));
513 			rte_memcpy(addr, flow_items, n);
514 			addr = (void *)(((uintptr_t)addr) + n);
515 		}
516 		/* Go deeper. */
517 		if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
518 				node->next) {
519 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
520 					node->next);
521 			if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
522 				rte_errno = E2BIG;
523 				return -rte_errno;
524 			}
525 			stack[stack_pos] = next_node;
526 		} else if (*(next_node + 1)) {
527 			/* Follow up with the next possibility. */
528 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
529 					++next_node);
530 		} else if (!stack_pos) {
531 			/*
532 			 * Completing the traverse over the different paths.
533 			 * The next_node is advanced to the terminator.
534 			 */
535 			++next_node;
536 		} else {
537 			/* Move to the next path. */
538 			while (stack_pos) {
539 				next_node = stack[--stack_pos];
540 				next_node++;
541 				if (*next_node)
542 					break;
543 			}
544 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
545 					next_node);
546 			stack[stack_pos] = next_node;
547 		}
548 		node = next_node && *next_node ? &graph[*next_node] : NULL;
549 	};
550 	return lsize;
551 }
552 
553 enum mlx5_expansion {
554 	MLX5_EXPANSION_ROOT,
555 	MLX5_EXPANSION_ROOT_OUTER,
556 	MLX5_EXPANSION_OUTER_ETH,
557 	MLX5_EXPANSION_OUTER_VLAN,
558 	MLX5_EXPANSION_OUTER_IPV4,
559 	MLX5_EXPANSION_OUTER_IPV4_UDP,
560 	MLX5_EXPANSION_OUTER_IPV4_TCP,
561 	MLX5_EXPANSION_OUTER_IPV6,
562 	MLX5_EXPANSION_OUTER_IPV6_UDP,
563 	MLX5_EXPANSION_OUTER_IPV6_TCP,
564 	MLX5_EXPANSION_VXLAN,
565 	MLX5_EXPANSION_STD_VXLAN,
566 	MLX5_EXPANSION_L3_VXLAN,
567 	MLX5_EXPANSION_VXLAN_GPE,
568 	MLX5_EXPANSION_GRE,
569 	MLX5_EXPANSION_NVGRE,
570 	MLX5_EXPANSION_GRE_KEY,
571 	MLX5_EXPANSION_MPLS,
572 	MLX5_EXPANSION_ETH,
573 	MLX5_EXPANSION_VLAN,
574 	MLX5_EXPANSION_IPV4,
575 	MLX5_EXPANSION_IPV4_UDP,
576 	MLX5_EXPANSION_IPV4_TCP,
577 	MLX5_EXPANSION_IPV6,
578 	MLX5_EXPANSION_IPV6_UDP,
579 	MLX5_EXPANSION_IPV6_TCP,
580 	MLX5_EXPANSION_IPV6_FRAG_EXT,
581 	MLX5_EXPANSION_GTP,
582 	MLX5_EXPANSION_GENEVE,
583 };
584 
585 /** Supported expansion of items. */
586 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
587 	[MLX5_EXPANSION_ROOT] = {
588 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
589 						  MLX5_EXPANSION_IPV4,
590 						  MLX5_EXPANSION_IPV6),
591 		.type = RTE_FLOW_ITEM_TYPE_END,
592 	},
593 	[MLX5_EXPANSION_ROOT_OUTER] = {
594 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
595 						  MLX5_EXPANSION_OUTER_IPV4,
596 						  MLX5_EXPANSION_OUTER_IPV6),
597 		.type = RTE_FLOW_ITEM_TYPE_END,
598 	},
599 	[MLX5_EXPANSION_OUTER_ETH] = {
600 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
601 		.type = RTE_FLOW_ITEM_TYPE_ETH,
602 		.rss_types = 0,
603 	},
604 	[MLX5_EXPANSION_OUTER_VLAN] = {
605 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
606 						  MLX5_EXPANSION_OUTER_IPV6),
607 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
608 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
609 	},
610 	[MLX5_EXPANSION_OUTER_IPV4] = {
611 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
612 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
613 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
614 			 MLX5_EXPANSION_GRE,
615 			 MLX5_EXPANSION_NVGRE,
616 			 MLX5_EXPANSION_IPV4,
617 			 MLX5_EXPANSION_IPV6),
618 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
619 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
620 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
621 	},
622 	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
623 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
624 						  MLX5_EXPANSION_VXLAN_GPE,
625 						  MLX5_EXPANSION_MPLS,
626 						  MLX5_EXPANSION_GENEVE,
627 						  MLX5_EXPANSION_GTP),
628 		.type = RTE_FLOW_ITEM_TYPE_UDP,
629 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
630 	},
631 	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
632 		.type = RTE_FLOW_ITEM_TYPE_TCP,
633 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
634 	},
635 	[MLX5_EXPANSION_OUTER_IPV6] = {
636 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
637 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
638 			 MLX5_EXPANSION_OUTER_IPV6_TCP,
639 			 MLX5_EXPANSION_IPV4,
640 			 MLX5_EXPANSION_IPV6,
641 			 MLX5_EXPANSION_GRE,
642 			 MLX5_EXPANSION_NVGRE),
643 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
644 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
645 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
646 	},
647 	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
648 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
649 						  MLX5_EXPANSION_VXLAN_GPE,
650 						  MLX5_EXPANSION_MPLS,
651 						  MLX5_EXPANSION_GENEVE,
652 						  MLX5_EXPANSION_GTP),
653 		.type = RTE_FLOW_ITEM_TYPE_UDP,
654 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
655 	},
656 	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
657 		.type = RTE_FLOW_ITEM_TYPE_TCP,
658 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
659 	},
660 	[MLX5_EXPANSION_VXLAN] = {
661 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
662 						  MLX5_EXPANSION_IPV4,
663 						  MLX5_EXPANSION_IPV6),
664 		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
665 	},
666 	[MLX5_EXPANSION_STD_VXLAN] = {
667 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
668 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
669 	},
670 	[MLX5_EXPANSION_L3_VXLAN] = {
671 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
672 					MLX5_EXPANSION_IPV6),
673 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
674 	},
675 	[MLX5_EXPANSION_VXLAN_GPE] = {
676 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
677 						  MLX5_EXPANSION_IPV4,
678 						  MLX5_EXPANSION_IPV6),
679 		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
680 	},
681 	[MLX5_EXPANSION_GRE] = {
682 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
683 						  MLX5_EXPANSION_IPV4,
684 						  MLX5_EXPANSION_IPV6,
685 						  MLX5_EXPANSION_GRE_KEY,
686 						  MLX5_EXPANSION_MPLS),
687 		.type = RTE_FLOW_ITEM_TYPE_GRE,
688 	},
689 	[MLX5_EXPANSION_GRE_KEY] = {
690 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
691 						  MLX5_EXPANSION_IPV6,
692 						  MLX5_EXPANSION_MPLS),
693 		.type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
694 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
695 	},
696 	[MLX5_EXPANSION_NVGRE] = {
697 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
698 		.type = RTE_FLOW_ITEM_TYPE_NVGRE,
699 	},
700 	[MLX5_EXPANSION_MPLS] = {
701 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
702 						  MLX5_EXPANSION_IPV6,
703 						  MLX5_EXPANSION_ETH),
704 		.type = RTE_FLOW_ITEM_TYPE_MPLS,
705 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
706 	},
707 	[MLX5_EXPANSION_ETH] = {
708 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
709 		.type = RTE_FLOW_ITEM_TYPE_ETH,
710 	},
711 	[MLX5_EXPANSION_VLAN] = {
712 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
713 						  MLX5_EXPANSION_IPV6),
714 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
715 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
716 	},
717 	[MLX5_EXPANSION_IPV4] = {
718 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
719 						  MLX5_EXPANSION_IPV4_TCP),
720 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
721 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
722 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
723 	},
724 	[MLX5_EXPANSION_IPV4_UDP] = {
725 		.type = RTE_FLOW_ITEM_TYPE_UDP,
726 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
727 	},
728 	[MLX5_EXPANSION_IPV4_TCP] = {
729 		.type = RTE_FLOW_ITEM_TYPE_TCP,
730 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
731 	},
732 	[MLX5_EXPANSION_IPV6] = {
733 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
734 						  MLX5_EXPANSION_IPV6_TCP,
735 						  MLX5_EXPANSION_IPV6_FRAG_EXT),
736 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
737 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
738 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
739 	},
740 	[MLX5_EXPANSION_IPV6_UDP] = {
741 		.type = RTE_FLOW_ITEM_TYPE_UDP,
742 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
743 	},
744 	[MLX5_EXPANSION_IPV6_TCP] = {
745 		.type = RTE_FLOW_ITEM_TYPE_TCP,
746 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
747 	},
748 	[MLX5_EXPANSION_IPV6_FRAG_EXT] = {
749 		.type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
750 	},
751 	[MLX5_EXPANSION_GTP] = {
752 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
753 						  MLX5_EXPANSION_IPV6),
754 		.type = RTE_FLOW_ITEM_TYPE_GTP,
755 	},
756 	[MLX5_EXPANSION_GENEVE] = {
757 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
758 						  MLX5_EXPANSION_IPV4,
759 						  MLX5_EXPANSION_IPV6),
760 		.type = RTE_FLOW_ITEM_TYPE_GENEVE,
761 	},
762 };
763 
764 static struct rte_flow_action_handle *
765 mlx5_action_handle_create(struct rte_eth_dev *dev,
766 			  const struct rte_flow_indir_action_conf *conf,
767 			  const struct rte_flow_action *action,
768 			  struct rte_flow_error *error);
769 static int mlx5_action_handle_destroy
770 				(struct rte_eth_dev *dev,
771 				 struct rte_flow_action_handle *handle,
772 				 struct rte_flow_error *error);
773 static int mlx5_action_handle_update
774 				(struct rte_eth_dev *dev,
775 				 struct rte_flow_action_handle *handle,
776 				 const void *update,
777 				 struct rte_flow_error *error);
778 static int mlx5_action_handle_query
779 				(struct rte_eth_dev *dev,
780 				 const struct rte_flow_action_handle *handle,
781 				 void *data,
782 				 struct rte_flow_error *error);
783 static int
784 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
785 		    struct rte_flow_tunnel *app_tunnel,
786 		    struct rte_flow_action **actions,
787 		    uint32_t *num_of_actions,
788 		    struct rte_flow_error *error);
789 static int
790 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
791 		       struct rte_flow_tunnel *app_tunnel,
792 		       struct rte_flow_item **items,
793 		       uint32_t *num_of_items,
794 		       struct rte_flow_error *error);
795 static int
796 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
797 			      struct rte_flow_item *pmd_items,
798 			      uint32_t num_items, struct rte_flow_error *err);
799 static int
800 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
801 				struct rte_flow_action *pmd_actions,
802 				uint32_t num_actions,
803 				struct rte_flow_error *err);
804 static int
805 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
806 				  struct rte_mbuf *m,
807 				  struct rte_flow_restore_info *info,
808 				  struct rte_flow_error *err);
809 static struct rte_flow_item_flex_handle *
810 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
811 			   const struct rte_flow_item_flex_conf *conf,
812 			   struct rte_flow_error *error);
813 static int
814 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
815 			    const struct rte_flow_item_flex_handle *handle,
816 			    struct rte_flow_error *error);
817 static int
818 mlx5_flow_info_get(struct rte_eth_dev *dev,
819 		   struct rte_flow_port_info *port_info,
820 		   struct rte_flow_queue_info *queue_info,
821 		   struct rte_flow_error *error);
822 static int
823 mlx5_flow_port_configure(struct rte_eth_dev *dev,
824 			 const struct rte_flow_port_attr *port_attr,
825 			 uint16_t nb_queue,
826 			 const struct rte_flow_queue_attr *queue_attr[],
827 			 struct rte_flow_error *err);
828 
829 static struct rte_flow_pattern_template *
830 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
831 		const struct rte_flow_pattern_template_attr *attr,
832 		const struct rte_flow_item items[],
833 		struct rte_flow_error *error);
834 
835 static int
836 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
837 				   struct rte_flow_pattern_template *template,
838 				   struct rte_flow_error *error);
839 static struct rte_flow_actions_template *
840 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
841 			const struct rte_flow_actions_template_attr *attr,
842 			const struct rte_flow_action actions[],
843 			const struct rte_flow_action masks[],
844 			struct rte_flow_error *error);
845 static int
846 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
847 				   struct rte_flow_actions_template *template,
848 				   struct rte_flow_error *error);
849 
850 static struct rte_flow_template_table *
851 mlx5_flow_table_create(struct rte_eth_dev *dev,
852 		       const struct rte_flow_template_table_attr *attr,
853 		       struct rte_flow_pattern_template *item_templates[],
854 		       uint8_t nb_item_templates,
855 		       struct rte_flow_actions_template *action_templates[],
856 		       uint8_t nb_action_templates,
857 		       struct rte_flow_error *error);
858 static int
859 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
860 			struct rte_flow_template_table *table,
861 			struct rte_flow_error *error);
862 static struct rte_flow *
863 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
864 			    uint32_t queue,
865 			    const struct rte_flow_op_attr *attr,
866 			    struct rte_flow_template_table *table,
867 			    const struct rte_flow_item items[],
868 			    uint8_t pattern_template_index,
869 			    const struct rte_flow_action actions[],
870 			    uint8_t action_template_index,
871 			    void *user_data,
872 			    struct rte_flow_error *error);
873 static int
874 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
875 			     uint32_t queue,
876 			     const struct rte_flow_op_attr *attr,
877 			     struct rte_flow *flow,
878 			     void *user_data,
879 			     struct rte_flow_error *error);
880 static int
881 mlx5_flow_pull(struct rte_eth_dev *dev,
882 	       uint32_t queue,
883 	       struct rte_flow_op_result res[],
884 	       uint16_t n_res,
885 	       struct rte_flow_error *error);
886 static int
887 mlx5_flow_push(struct rte_eth_dev *dev,
888 	       uint32_t queue,
889 	       struct rte_flow_error *error);
890 
891 static struct rte_flow_action_handle *
892 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
893 				 const struct rte_flow_op_attr *attr,
894 				 const struct rte_flow_indir_action_conf *conf,
895 				 const struct rte_flow_action *action,
896 				 void *user_data,
897 				 struct rte_flow_error *error);
898 
899 static int
900 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
901 				 const struct rte_flow_op_attr *attr,
902 				 struct rte_flow_action_handle *handle,
903 				 const void *update,
904 				 void *user_data,
905 				 struct rte_flow_error *error);
906 
907 static int
908 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
909 				  const struct rte_flow_op_attr *attr,
910 				  struct rte_flow_action_handle *handle,
911 				  void *user_data,
912 				  struct rte_flow_error *error);
913 
914 static const struct rte_flow_ops mlx5_flow_ops = {
915 	.validate = mlx5_flow_validate,
916 	.create = mlx5_flow_create,
917 	.destroy = mlx5_flow_destroy,
918 	.flush = mlx5_flow_flush,
919 	.isolate = mlx5_flow_isolate,
920 	.query = mlx5_flow_query,
921 	.dev_dump = mlx5_flow_dev_dump,
922 	.get_aged_flows = mlx5_flow_get_aged_flows,
923 	.action_handle_create = mlx5_action_handle_create,
924 	.action_handle_destroy = mlx5_action_handle_destroy,
925 	.action_handle_update = mlx5_action_handle_update,
926 	.action_handle_query = mlx5_action_handle_query,
927 	.tunnel_decap_set = mlx5_flow_tunnel_decap_set,
928 	.tunnel_match = mlx5_flow_tunnel_match,
929 	.tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
930 	.tunnel_item_release = mlx5_flow_tunnel_item_release,
931 	.get_restore_info = mlx5_flow_tunnel_get_restore_info,
932 	.flex_item_create = mlx5_flow_flex_item_create,
933 	.flex_item_release = mlx5_flow_flex_item_release,
934 	.info_get = mlx5_flow_info_get,
935 	.configure = mlx5_flow_port_configure,
936 	.pattern_template_create = mlx5_flow_pattern_template_create,
937 	.pattern_template_destroy = mlx5_flow_pattern_template_destroy,
938 	.actions_template_create = mlx5_flow_actions_template_create,
939 	.actions_template_destroy = mlx5_flow_actions_template_destroy,
940 	.template_table_create = mlx5_flow_table_create,
941 	.template_table_destroy = mlx5_flow_table_destroy,
942 	.async_create = mlx5_flow_async_flow_create,
943 	.async_destroy = mlx5_flow_async_flow_destroy,
944 	.pull = mlx5_flow_pull,
945 	.push = mlx5_flow_push,
946 	.async_action_handle_create = mlx5_flow_async_action_handle_create,
947 	.async_action_handle_update = mlx5_flow_async_action_handle_update,
948 	.async_action_handle_destroy = mlx5_flow_async_action_handle_destroy,
949 };
950 
951 /* Tunnel information. */
952 struct mlx5_flow_tunnel_info {
953 	uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
954 	uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
955 };
956 
957 static struct mlx5_flow_tunnel_info tunnels_info[] = {
958 	{
959 		.tunnel = MLX5_FLOW_LAYER_VXLAN,
960 		.ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
961 	},
962 	{
963 		.tunnel = MLX5_FLOW_LAYER_GENEVE,
964 		.ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
965 	},
966 	{
967 		.tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
968 		.ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
969 	},
970 	{
971 		.tunnel = MLX5_FLOW_LAYER_GRE,
972 		.ptype = RTE_PTYPE_TUNNEL_GRE,
973 	},
974 	{
975 		.tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
976 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
977 	},
978 	{
979 		.tunnel = MLX5_FLOW_LAYER_MPLS,
980 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
981 	},
982 	{
983 		.tunnel = MLX5_FLOW_LAYER_NVGRE,
984 		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
985 	},
986 	{
987 		.tunnel = MLX5_FLOW_LAYER_IPIP,
988 		.ptype = RTE_PTYPE_TUNNEL_IP,
989 	},
990 	{
991 		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
992 		.ptype = RTE_PTYPE_TUNNEL_IP,
993 	},
994 	{
995 		.tunnel = MLX5_FLOW_LAYER_GTP,
996 		.ptype = RTE_PTYPE_TUNNEL_GTPU,
997 	},
998 };
999 
1000 
1001 
1002 /**
1003  * Translate tag ID to register.
1004  *
1005  * @param[in] dev
1006  *   Pointer to the Ethernet device structure.
1007  * @param[in] feature
1008  *   The feature that request the register.
1009  * @param[in] id
1010  *   The request register ID.
1011  * @param[out] error
1012  *   Error description in case of any.
1013  *
1014  * @return
1015  *   The request register on success, a negative errno
1016  *   value otherwise and rte_errno is set.
1017  */
1018 int
1019 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
1020 		     enum mlx5_feature_name feature,
1021 		     uint32_t id,
1022 		     struct rte_flow_error *error)
1023 {
1024 	struct mlx5_priv *priv = dev->data->dev_private;
1025 	struct mlx5_sh_config *config = &priv->sh->config;
1026 	enum modify_reg start_reg;
1027 	bool skip_mtr_reg = false;
1028 
1029 	switch (feature) {
1030 	case MLX5_HAIRPIN_RX:
1031 		return REG_B;
1032 	case MLX5_HAIRPIN_TX:
1033 		return REG_A;
1034 	case MLX5_METADATA_RX:
1035 		switch (config->dv_xmeta_en) {
1036 		case MLX5_XMETA_MODE_LEGACY:
1037 			return REG_B;
1038 		case MLX5_XMETA_MODE_META16:
1039 			return REG_C_0;
1040 		case MLX5_XMETA_MODE_META32:
1041 			return REG_C_1;
1042 		}
1043 		break;
1044 	case MLX5_METADATA_TX:
1045 		return REG_A;
1046 	case MLX5_METADATA_FDB:
1047 		switch (config->dv_xmeta_en) {
1048 		case MLX5_XMETA_MODE_LEGACY:
1049 			return REG_NON;
1050 		case MLX5_XMETA_MODE_META16:
1051 			return REG_C_0;
1052 		case MLX5_XMETA_MODE_META32:
1053 			return REG_C_1;
1054 		}
1055 		break;
1056 	case MLX5_FLOW_MARK:
1057 		switch (config->dv_xmeta_en) {
1058 		case MLX5_XMETA_MODE_LEGACY:
1059 			return REG_NON;
1060 		case MLX5_XMETA_MODE_META16:
1061 			return REG_C_1;
1062 		case MLX5_XMETA_MODE_META32:
1063 			return REG_C_0;
1064 		}
1065 		break;
1066 	case MLX5_MTR_ID:
1067 		/*
1068 		 * If meter color and meter id share one register, flow match
1069 		 * should use the meter color register for match.
1070 		 */
1071 		if (priv->mtr_reg_share)
1072 			return priv->mtr_color_reg;
1073 		else
1074 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1075 			       REG_C_3;
1076 	case MLX5_MTR_COLOR:
1077 	case MLX5_ASO_FLOW_HIT:
1078 	case MLX5_ASO_CONNTRACK:
1079 	case MLX5_SAMPLE_ID:
1080 		/* All features use the same REG_C. */
1081 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
1082 		return priv->mtr_color_reg;
1083 	case MLX5_COPY_MARK:
1084 		/*
1085 		 * Metadata COPY_MARK register using is in meter suffix sub
1086 		 * flow while with meter. It's safe to share the same register.
1087 		 */
1088 		return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
1089 	case MLX5_APP_TAG:
1090 		/*
1091 		 * If meter is enable, it will engage the register for color
1092 		 * match and flow match. If meter color match is not using the
1093 		 * REG_C_2, need to skip the REG_C_x be used by meter color
1094 		 * match.
1095 		 * If meter is disable, free to use all available registers.
1096 		 */
1097 		start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1098 			    (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
1099 		skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
1100 		if (id > (uint32_t)(REG_C_7 - start_reg))
1101 			return rte_flow_error_set(error, EINVAL,
1102 						  RTE_FLOW_ERROR_TYPE_ITEM,
1103 						  NULL, "invalid tag id");
1104 		if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
1105 			return rte_flow_error_set(error, ENOTSUP,
1106 						  RTE_FLOW_ERROR_TYPE_ITEM,
1107 						  NULL, "unsupported tag id");
1108 		/*
1109 		 * This case means meter is using the REG_C_x great than 2.
1110 		 * Take care not to conflict with meter color REG_C_x.
1111 		 * If the available index REG_C_y >= REG_C_x, skip the
1112 		 * color register.
1113 		 */
1114 		if (skip_mtr_reg && priv->sh->flow_mreg_c
1115 		    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
1116 			if (id >= (uint32_t)(REG_C_7 - start_reg))
1117 				return rte_flow_error_set(error, EINVAL,
1118 						       RTE_FLOW_ERROR_TYPE_ITEM,
1119 							NULL, "invalid tag id");
1120 			if (priv->sh->flow_mreg_c
1121 			    [id + 1 + start_reg - REG_C_0] != REG_NON)
1122 				return priv->sh->flow_mreg_c
1123 					       [id + 1 + start_reg - REG_C_0];
1124 			return rte_flow_error_set(error, ENOTSUP,
1125 						  RTE_FLOW_ERROR_TYPE_ITEM,
1126 						  NULL, "unsupported tag id");
1127 		}
1128 		return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
1129 	}
1130 	MLX5_ASSERT(false);
1131 	return rte_flow_error_set(error, EINVAL,
1132 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1133 				  NULL, "invalid feature name");
1134 }
1135 
1136 /**
1137  * Check extensive flow metadata register support.
1138  *
1139  * @param dev
1140  *   Pointer to rte_eth_dev structure.
1141  *
1142  * @return
1143  *   True if device supports extensive flow metadata register, otherwise false.
1144  */
1145 bool
1146 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
1147 {
1148 	struct mlx5_priv *priv = dev->data->dev_private;
1149 
1150 	/*
1151 	 * Having available reg_c can be regarded inclusively as supporting
1152 	 * extensive flow metadata register, which could mean,
1153 	 * - metadata register copy action by modify header.
1154 	 * - 16 modify header actions is supported.
1155 	 * - reg_c's are preserved across different domain (FDB and NIC) on
1156 	 *   packet loopback by flow lookup miss.
1157 	 */
1158 	return priv->sh->flow_mreg_c[2] != REG_NON;
1159 }
1160 
1161 /**
1162  * Get the lowest priority.
1163  *
1164  * @param[in] dev
1165  *   Pointer to the Ethernet device structure.
1166  * @param[in] attributes
1167  *   Pointer to device flow rule attributes.
1168  *
1169  * @return
1170  *   The value of lowest priority of flow.
1171  */
1172 uint32_t
1173 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
1174 			  const struct rte_flow_attr *attr)
1175 {
1176 	struct mlx5_priv *priv = dev->data->dev_private;
1177 
1178 	if (!attr->group && !attr->transfer)
1179 		return priv->sh->flow_max_priority - 2;
1180 	return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1181 }
1182 
1183 /**
1184  * Calculate matcher priority of the flow.
1185  *
1186  * @param[in] dev
1187  *   Pointer to the Ethernet device structure.
1188  * @param[in] attr
1189  *   Pointer to device flow rule attributes.
1190  * @param[in] subpriority
1191  *   The priority based on the items.
1192  * @param[in] external
1193  *   Flow is user flow.
1194  * @return
1195  *   The matcher priority of the flow.
1196  */
1197 uint16_t
1198 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1199 			  const struct rte_flow_attr *attr,
1200 			  uint32_t subpriority, bool external)
1201 {
1202 	uint16_t priority = (uint16_t)attr->priority;
1203 	struct mlx5_priv *priv = dev->data->dev_private;
1204 
1205 	if (!attr->group && !attr->transfer) {
1206 		if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1207 			priority = priv->sh->flow_max_priority - 1;
1208 		return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1209 	} else if (!external && attr->transfer && attr->group == 0 &&
1210 		   attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1211 		return (priv->sh->flow_max_priority - 1) * 3;
1212 	}
1213 	if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1214 		priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1215 	return priority * 3 + subpriority;
1216 }
1217 
1218 /**
1219  * Verify the @p item specifications (spec, last, mask) are compatible with the
1220  * NIC capabilities.
1221  *
1222  * @param[in] item
1223  *   Item specification.
1224  * @param[in] mask
1225  *   @p item->mask or flow default bit-masks.
1226  * @param[in] nic_mask
1227  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1228  * @param[in] size
1229  *   Bit-masks size in bytes.
1230  * @param[in] range_accepted
1231  *   True if range of values is accepted for specific fields, false otherwise.
1232  * @param[out] error
1233  *   Pointer to error structure.
1234  *
1235  * @return
1236  *   0 on success, a negative errno value otherwise and rte_errno is set.
1237  */
1238 int
1239 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1240 			  const uint8_t *mask,
1241 			  const uint8_t *nic_mask,
1242 			  unsigned int size,
1243 			  bool range_accepted,
1244 			  struct rte_flow_error *error)
1245 {
1246 	unsigned int i;
1247 
1248 	MLX5_ASSERT(nic_mask);
1249 	for (i = 0; i < size; ++i)
1250 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
1251 			return rte_flow_error_set(error, ENOTSUP,
1252 						  RTE_FLOW_ERROR_TYPE_ITEM,
1253 						  item,
1254 						  "mask enables non supported"
1255 						  " bits");
1256 	if (!item->spec && (item->mask || item->last))
1257 		return rte_flow_error_set(error, EINVAL,
1258 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1259 					  "mask/last without a spec is not"
1260 					  " supported");
1261 	if (item->spec && item->last && !range_accepted) {
1262 		uint8_t spec[size];
1263 		uint8_t last[size];
1264 		unsigned int i;
1265 		int ret;
1266 
1267 		for (i = 0; i < size; ++i) {
1268 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1269 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1270 		}
1271 		ret = memcmp(spec, last, size);
1272 		if (ret != 0)
1273 			return rte_flow_error_set(error, EINVAL,
1274 						  RTE_FLOW_ERROR_TYPE_ITEM,
1275 						  item,
1276 						  "range is not valid");
1277 	}
1278 	return 0;
1279 }
1280 
1281 /**
1282  * Adjust the hash fields according to the @p flow information.
1283  *
1284  * @param[in] dev_flow.
1285  *   Pointer to the mlx5_flow.
1286  * @param[in] tunnel
1287  *   1 when the hash field is for a tunnel item.
1288  * @param[in] layer_types
1289  *   RTE_ETH_RSS_* types.
1290  * @param[in] hash_fields
1291  *   Item hash fields.
1292  *
1293  * @return
1294  *   The hash fields that should be used.
1295  */
1296 uint64_t
1297 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1298 			    int tunnel __rte_unused, uint64_t layer_types,
1299 			    uint64_t hash_fields)
1300 {
1301 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1302 	int rss_request_inner = rss_desc->level >= 2;
1303 
1304 	/* Check RSS hash level for tunnel. */
1305 	if (tunnel && rss_request_inner)
1306 		hash_fields |= IBV_RX_HASH_INNER;
1307 	else if (tunnel || rss_request_inner)
1308 		return 0;
1309 #endif
1310 	/* Check if requested layer matches RSS hash fields. */
1311 	if (!(rss_desc->types & layer_types))
1312 		return 0;
1313 	return hash_fields;
1314 }
1315 
1316 /**
1317  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1318  * if several tunnel rules are used on this queue, the tunnel ptype will be
1319  * cleared.
1320  *
1321  * @param rxq_ctrl
1322  *   Rx queue to update.
1323  */
1324 static void
1325 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1326 {
1327 	unsigned int i;
1328 	uint32_t tunnel_ptype = 0;
1329 
1330 	/* Look up for the ptype to use. */
1331 	for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1332 		if (!rxq_ctrl->flow_tunnels_n[i])
1333 			continue;
1334 		if (!tunnel_ptype) {
1335 			tunnel_ptype = tunnels_info[i].ptype;
1336 		} else {
1337 			tunnel_ptype = 0;
1338 			break;
1339 		}
1340 	}
1341 	rxq_ctrl->rxq.tunnel = tunnel_ptype;
1342 }
1343 
1344 /**
1345  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device
1346  * flow.
1347  *
1348  * @param[in] dev
1349  *   Pointer to the Ethernet device structure.
1350  * @param[in] dev_handle
1351  *   Pointer to device flow handle structure.
1352  */
1353 void
1354 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1355 		       struct mlx5_flow_handle *dev_handle)
1356 {
1357 	struct mlx5_priv *priv = dev->data->dev_private;
1358 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1359 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1360 	unsigned int i;
1361 
1362 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1363 		struct mlx5_hrxq *hrxq;
1364 
1365 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1366 			      dev_handle->rix_hrxq);
1367 		if (hrxq)
1368 			ind_tbl = hrxq->ind_table;
1369 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1370 		struct mlx5_shared_action_rss *shared_rss;
1371 
1372 		shared_rss = mlx5_ipool_get
1373 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1374 			 dev_handle->rix_srss);
1375 		if (shared_rss)
1376 			ind_tbl = shared_rss->ind_tbl;
1377 	}
1378 	if (!ind_tbl)
1379 		return;
1380 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1381 		int idx = ind_tbl->queues[i];
1382 		struct mlx5_rxq_ctrl *rxq_ctrl;
1383 
1384 		if (mlx5_is_external_rxq(dev, idx))
1385 			continue;
1386 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1387 		MLX5_ASSERT(rxq_ctrl != NULL);
1388 		if (rxq_ctrl == NULL)
1389 			continue;
1390 		/*
1391 		 * To support metadata register copy on Tx loopback,
1392 		 * this must be always enabled (metadata may arive
1393 		 * from other port - not from local flows only.
1394 		 */
1395 		if (tunnel) {
1396 			unsigned int j;
1397 
1398 			/* Increase the counter matching the flow. */
1399 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1400 				if ((tunnels_info[j].tunnel &
1401 				     dev_handle->layers) ==
1402 				    tunnels_info[j].tunnel) {
1403 					rxq_ctrl->flow_tunnels_n[j]++;
1404 					break;
1405 				}
1406 			}
1407 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1408 		}
1409 	}
1410 }
1411 
1412 static void
1413 flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
1414 {
1415 	struct mlx5_priv *priv = dev->data->dev_private;
1416 	struct mlx5_rxq_ctrl *rxq_ctrl;
1417 
1418 	if (priv->mark_enabled)
1419 		return;
1420 	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1421 		rxq_ctrl->rxq.mark = 1;
1422 	}
1423 	priv->mark_enabled = 1;
1424 }
1425 
1426 /**
1427  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1428  *
1429  * @param[in] dev
1430  *   Pointer to the Ethernet device structure.
1431  * @param[in] flow
1432  *   Pointer to flow structure.
1433  */
1434 static void
1435 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1436 {
1437 	struct mlx5_priv *priv = dev->data->dev_private;
1438 	uint32_t handle_idx;
1439 	struct mlx5_flow_handle *dev_handle;
1440 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1441 
1442 	MLX5_ASSERT(wks);
1443 	if (wks->mark)
1444 		flow_rxq_mark_flag_set(dev);
1445 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1446 		       handle_idx, dev_handle, next)
1447 		flow_drv_rxq_flags_set(dev, dev_handle);
1448 }
1449 
1450 /**
1451  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1452  * device flow if no other flow uses it with the same kind of request.
1453  *
1454  * @param dev
1455  *   Pointer to Ethernet device.
1456  * @param[in] dev_handle
1457  *   Pointer to the device flow handle structure.
1458  */
1459 static void
1460 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1461 			struct mlx5_flow_handle *dev_handle)
1462 {
1463 	struct mlx5_priv *priv = dev->data->dev_private;
1464 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1465 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1466 	unsigned int i;
1467 
1468 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1469 		struct mlx5_hrxq *hrxq;
1470 
1471 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1472 			      dev_handle->rix_hrxq);
1473 		if (hrxq)
1474 			ind_tbl = hrxq->ind_table;
1475 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1476 		struct mlx5_shared_action_rss *shared_rss;
1477 
1478 		shared_rss = mlx5_ipool_get
1479 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1480 			 dev_handle->rix_srss);
1481 		if (shared_rss)
1482 			ind_tbl = shared_rss->ind_tbl;
1483 	}
1484 	if (!ind_tbl)
1485 		return;
1486 	MLX5_ASSERT(dev->data->dev_started);
1487 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1488 		int idx = ind_tbl->queues[i];
1489 		struct mlx5_rxq_ctrl *rxq_ctrl;
1490 
1491 		if (mlx5_is_external_rxq(dev, idx))
1492 			continue;
1493 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1494 		MLX5_ASSERT(rxq_ctrl != NULL);
1495 		if (rxq_ctrl == NULL)
1496 			continue;
1497 		if (tunnel) {
1498 			unsigned int j;
1499 
1500 			/* Decrease the counter matching the flow. */
1501 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1502 				if ((tunnels_info[j].tunnel &
1503 				     dev_handle->layers) ==
1504 				    tunnels_info[j].tunnel) {
1505 					rxq_ctrl->flow_tunnels_n[j]--;
1506 					break;
1507 				}
1508 			}
1509 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1510 		}
1511 	}
1512 }
1513 
1514 /**
1515  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1516  * @p flow if no other flow uses it with the same kind of request.
1517  *
1518  * @param dev
1519  *   Pointer to Ethernet device.
1520  * @param[in] flow
1521  *   Pointer to the flow.
1522  */
1523 static void
1524 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1525 {
1526 	struct mlx5_priv *priv = dev->data->dev_private;
1527 	uint32_t handle_idx;
1528 	struct mlx5_flow_handle *dev_handle;
1529 
1530 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1531 		       handle_idx, dev_handle, next)
1532 		flow_drv_rxq_flags_trim(dev, dev_handle);
1533 }
1534 
1535 /**
1536  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1537  *
1538  * @param dev
1539  *   Pointer to Ethernet device.
1540  */
1541 static void
1542 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1543 {
1544 	struct mlx5_priv *priv = dev->data->dev_private;
1545 	unsigned int i;
1546 
1547 	for (i = 0; i != priv->rxqs_n; ++i) {
1548 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1549 		unsigned int j;
1550 
1551 		if (rxq == NULL || rxq->ctrl == NULL)
1552 			continue;
1553 		rxq->ctrl->rxq.mark = 0;
1554 		for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1555 			rxq->ctrl->flow_tunnels_n[j] = 0;
1556 		rxq->ctrl->rxq.tunnel = 0;
1557 	}
1558 	priv->mark_enabled = 0;
1559 }
1560 
1561 /**
1562  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1563  *
1564  * @param[in] dev
1565  *   Pointer to the Ethernet device structure.
1566  */
1567 void
1568 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1569 {
1570 	struct mlx5_priv *priv = dev->data->dev_private;
1571 	unsigned int i;
1572 
1573 	for (i = 0; i != priv->rxqs_n; ++i) {
1574 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1575 		struct mlx5_rxq_data *data;
1576 
1577 		if (rxq == NULL || rxq->ctrl == NULL)
1578 			continue;
1579 		data = &rxq->ctrl->rxq;
1580 		if (!rte_flow_dynf_metadata_avail()) {
1581 			data->dynf_meta = 0;
1582 			data->flow_meta_mask = 0;
1583 			data->flow_meta_offset = -1;
1584 			data->flow_meta_port_mask = 0;
1585 		} else {
1586 			data->dynf_meta = 1;
1587 			data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1588 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1589 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1590 		}
1591 	}
1592 }
1593 
1594 /*
1595  * return a pointer to the desired action in the list of actions.
1596  *
1597  * @param[in] actions
1598  *   The list of actions to search the action in.
1599  * @param[in] action
1600  *   The action to find.
1601  *
1602  * @return
1603  *   Pointer to the action in the list, if found. NULL otherwise.
1604  */
1605 const struct rte_flow_action *
1606 mlx5_flow_find_action(const struct rte_flow_action *actions,
1607 		      enum rte_flow_action_type action)
1608 {
1609 	if (actions == NULL)
1610 		return NULL;
1611 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1612 		if (actions->type == action)
1613 			return actions;
1614 	return NULL;
1615 }
1616 
1617 /*
1618  * Validate the flag action.
1619  *
1620  * @param[in] action_flags
1621  *   Bit-fields that holds the actions detected until now.
1622  * @param[in] attr
1623  *   Attributes of flow that includes this action.
1624  * @param[out] error
1625  *   Pointer to error structure.
1626  *
1627  * @return
1628  *   0 on success, a negative errno value otherwise and rte_errno is set.
1629  */
1630 int
1631 mlx5_flow_validate_action_flag(uint64_t action_flags,
1632 			       const struct rte_flow_attr *attr,
1633 			       struct rte_flow_error *error)
1634 {
1635 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1636 		return rte_flow_error_set(error, EINVAL,
1637 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1638 					  "can't mark and flag in same flow");
1639 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1640 		return rte_flow_error_set(error, EINVAL,
1641 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1642 					  "can't have 2 flag"
1643 					  " actions in same flow");
1644 	if (attr->egress)
1645 		return rte_flow_error_set(error, ENOTSUP,
1646 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1647 					  "flag action not supported for "
1648 					  "egress");
1649 	return 0;
1650 }
1651 
1652 /*
1653  * Validate the mark action.
1654  *
1655  * @param[in] action
1656  *   Pointer to the queue action.
1657  * @param[in] action_flags
1658  *   Bit-fields that holds the actions detected until now.
1659  * @param[in] attr
1660  *   Attributes of flow that includes this action.
1661  * @param[out] error
1662  *   Pointer to error structure.
1663  *
1664  * @return
1665  *   0 on success, a negative errno value otherwise and rte_errno is set.
1666  */
1667 int
1668 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1669 			       uint64_t action_flags,
1670 			       const struct rte_flow_attr *attr,
1671 			       struct rte_flow_error *error)
1672 {
1673 	const struct rte_flow_action_mark *mark = action->conf;
1674 
1675 	if (!mark)
1676 		return rte_flow_error_set(error, EINVAL,
1677 					  RTE_FLOW_ERROR_TYPE_ACTION,
1678 					  action,
1679 					  "configuration cannot be null");
1680 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1681 		return rte_flow_error_set(error, EINVAL,
1682 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1683 					  &mark->id,
1684 					  "mark id must in 0 <= id < "
1685 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1686 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1687 		return rte_flow_error_set(error, EINVAL,
1688 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1689 					  "can't flag and mark in same flow");
1690 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1691 		return rte_flow_error_set(error, EINVAL,
1692 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1693 					  "can't have 2 mark actions in same"
1694 					  " flow");
1695 	if (attr->egress)
1696 		return rte_flow_error_set(error, ENOTSUP,
1697 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1698 					  "mark action not supported for "
1699 					  "egress");
1700 	return 0;
1701 }
1702 
1703 /*
1704  * Validate the drop action.
1705  *
1706  * @param[in] action_flags
1707  *   Bit-fields that holds the actions detected until now.
1708  * @param[in] attr
1709  *   Attributes of flow that includes this action.
1710  * @param[out] error
1711  *   Pointer to error structure.
1712  *
1713  * @return
1714  *   0 on success, a negative errno value otherwise and rte_errno is set.
1715  */
1716 int
1717 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1718 			       const struct rte_flow_attr *attr,
1719 			       struct rte_flow_error *error)
1720 {
1721 	if (attr->egress)
1722 		return rte_flow_error_set(error, ENOTSUP,
1723 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1724 					  "drop action not supported for "
1725 					  "egress");
1726 	return 0;
1727 }
1728 
1729 /*
1730  * Validate the queue action.
1731  *
1732  * @param[in] action
1733  *   Pointer to the queue action.
1734  * @param[in] action_flags
1735  *   Bit-fields that holds the actions detected until now.
1736  * @param[in] dev
1737  *   Pointer to the Ethernet device structure.
1738  * @param[in] attr
1739  *   Attributes of flow that includes this action.
1740  * @param[out] error
1741  *   Pointer to error structure.
1742  *
1743  * @return
1744  *   0 on success, a negative errno value otherwise and rte_errno is set.
1745  */
1746 int
1747 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1748 				uint64_t action_flags,
1749 				struct rte_eth_dev *dev,
1750 				const struct rte_flow_attr *attr,
1751 				struct rte_flow_error *error)
1752 {
1753 	struct mlx5_priv *priv = dev->data->dev_private;
1754 	const struct rte_flow_action_queue *queue = action->conf;
1755 
1756 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1757 		return rte_flow_error_set(error, EINVAL,
1758 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1759 					  "can't have 2 fate actions in"
1760 					  " same flow");
1761 	if (attr->egress)
1762 		return rte_flow_error_set(error, ENOTSUP,
1763 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1764 					  "queue action not supported for egress.");
1765 	if (mlx5_is_external_rxq(dev, queue->index))
1766 		return 0;
1767 	if (!priv->rxqs_n)
1768 		return rte_flow_error_set(error, EINVAL,
1769 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1770 					  NULL, "No Rx queues configured");
1771 	if (queue->index >= priv->rxqs_n)
1772 		return rte_flow_error_set(error, EINVAL,
1773 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1774 					  &queue->index,
1775 					  "queue index out of range");
1776 	if (mlx5_rxq_get(dev, queue->index) == NULL)
1777 		return rte_flow_error_set(error, EINVAL,
1778 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1779 					  &queue->index,
1780 					  "queue is not configured");
1781 	return 0;
1782 }
1783 
1784 /**
1785  * Validate queue numbers for device RSS.
1786  *
1787  * @param[in] dev
1788  *   Configured device.
1789  * @param[in] queues
1790  *   Array of queue numbers.
1791  * @param[in] queues_n
1792  *   Size of the @p queues array.
1793  * @param[out] error
1794  *   On error, filled with a textual error description.
1795  * @param[out] queue_idx
1796  *   On error, filled with an offending queue index in @p queues array.
1797  *
1798  * @return
1799  *   0 on success, a negative errno code on error.
1800  */
1801 static int
1802 mlx5_validate_rss_queues(struct rte_eth_dev *dev,
1803 			 const uint16_t *queues, uint32_t queues_n,
1804 			 const char **error, uint32_t *queue_idx)
1805 {
1806 	const struct mlx5_priv *priv = dev->data->dev_private;
1807 	bool is_hairpin = false;
1808 	bool is_ext_rss = false;
1809 	uint32_t i;
1810 
1811 	for (i = 0; i != queues_n; ++i) {
1812 		struct mlx5_rxq_ctrl *rxq_ctrl;
1813 
1814 		if (mlx5_is_external_rxq(dev, queues[0])) {
1815 			is_ext_rss = true;
1816 			continue;
1817 		}
1818 		if (is_ext_rss) {
1819 			*error = "Combining external and regular RSS queues is not supported";
1820 			*queue_idx = i;
1821 			return -ENOTSUP;
1822 		}
1823 		if (queues[i] >= priv->rxqs_n) {
1824 			*error = "queue index out of range";
1825 			*queue_idx = i;
1826 			return -EINVAL;
1827 		}
1828 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, queues[i]);
1829 		if (rxq_ctrl == NULL) {
1830 			*error =  "queue is not configured";
1831 			*queue_idx = i;
1832 			return -EINVAL;
1833 		}
1834 		if (i == 0 && rxq_ctrl->is_hairpin)
1835 			is_hairpin = true;
1836 		if (is_hairpin != rxq_ctrl->is_hairpin) {
1837 			*error = "combining hairpin and regular RSS queues is not supported";
1838 			*queue_idx = i;
1839 			return -ENOTSUP;
1840 		}
1841 	}
1842 	return 0;
1843 }
1844 
1845 /*
1846  * Validate the rss action.
1847  *
1848  * @param[in] dev
1849  *   Pointer to the Ethernet device structure.
1850  * @param[in] action
1851  *   Pointer to the queue action.
1852  * @param[out] error
1853  *   Pointer to error structure.
1854  *
1855  * @return
1856  *   0 on success, a negative errno value otherwise and rte_errno is set.
1857  */
1858 int
1859 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1860 			 const struct rte_flow_action *action,
1861 			 struct rte_flow_error *error)
1862 {
1863 	struct mlx5_priv *priv = dev->data->dev_private;
1864 	const struct rte_flow_action_rss *rss = action->conf;
1865 	int ret;
1866 	const char *message;
1867 	uint32_t queue_idx;
1868 
1869 	if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1870 	    rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1871 		return rte_flow_error_set(error, ENOTSUP,
1872 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1873 					  &rss->func,
1874 					  "RSS hash function not supported");
1875 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1876 	if (rss->level > 2)
1877 #else
1878 	if (rss->level > 1)
1879 #endif
1880 		return rte_flow_error_set(error, ENOTSUP,
1881 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1882 					  &rss->level,
1883 					  "tunnel RSS is not supported");
1884 	/* allow RSS key_len 0 in case of NULL (default) RSS key. */
1885 	if (rss->key_len == 0 && rss->key != NULL)
1886 		return rte_flow_error_set(error, ENOTSUP,
1887 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1888 					  &rss->key_len,
1889 					  "RSS hash key length 0");
1890 	if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1891 		return rte_flow_error_set(error, ENOTSUP,
1892 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1893 					  &rss->key_len,
1894 					  "RSS hash key too small");
1895 	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1896 		return rte_flow_error_set(error, ENOTSUP,
1897 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1898 					  &rss->key_len,
1899 					  "RSS hash key too large");
1900 	if (rss->queue_num > priv->sh->dev_cap.ind_table_max_size)
1901 		return rte_flow_error_set(error, ENOTSUP,
1902 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1903 					  &rss->queue_num,
1904 					  "number of queues too large");
1905 	if (rss->types & MLX5_RSS_HF_MASK)
1906 		return rte_flow_error_set(error, ENOTSUP,
1907 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1908 					  &rss->types,
1909 					  "some RSS protocols are not"
1910 					  " supported");
1911 	if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
1912 	    !(rss->types & RTE_ETH_RSS_IP))
1913 		return rte_flow_error_set(error, EINVAL,
1914 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1915 					  "L3 partial RSS requested but L3 RSS"
1916 					  " type not specified");
1917 	if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
1918 	    !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
1919 		return rte_flow_error_set(error, EINVAL,
1920 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1921 					  "L4 partial RSS requested but L4 RSS"
1922 					  " type not specified");
1923 	if (!priv->rxqs_n && priv->ext_rxqs == NULL)
1924 		return rte_flow_error_set(error, EINVAL,
1925 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1926 					  NULL, "No Rx queues configured");
1927 	if (!rss->queue_num)
1928 		return rte_flow_error_set(error, EINVAL,
1929 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1930 					  NULL, "No queues configured");
1931 	ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
1932 				       &message, &queue_idx);
1933 	if (ret != 0) {
1934 		return rte_flow_error_set(error, -ret,
1935 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1936 					  &rss->queue[queue_idx], message);
1937 	}
1938 	return 0;
1939 }
1940 
1941 /*
1942  * Validate the rss action.
1943  *
1944  * @param[in] action
1945  *   Pointer to the queue action.
1946  * @param[in] action_flags
1947  *   Bit-fields that holds the actions detected until now.
1948  * @param[in] dev
1949  *   Pointer to the Ethernet device structure.
1950  * @param[in] attr
1951  *   Attributes of flow that includes this action.
1952  * @param[in] item_flags
1953  *   Items that were detected.
1954  * @param[out] error
1955  *   Pointer to error structure.
1956  *
1957  * @return
1958  *   0 on success, a negative errno value otherwise and rte_errno is set.
1959  */
1960 int
1961 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1962 			      uint64_t action_flags,
1963 			      struct rte_eth_dev *dev,
1964 			      const struct rte_flow_attr *attr,
1965 			      uint64_t item_flags,
1966 			      struct rte_flow_error *error)
1967 {
1968 	const struct rte_flow_action_rss *rss = action->conf;
1969 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1970 	int ret;
1971 
1972 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1973 		return rte_flow_error_set(error, EINVAL,
1974 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1975 					  "can't have 2 fate actions"
1976 					  " in same flow");
1977 	ret = mlx5_validate_action_rss(dev, action, error);
1978 	if (ret)
1979 		return ret;
1980 	if (attr->egress)
1981 		return rte_flow_error_set(error, ENOTSUP,
1982 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1983 					  "rss action not supported for "
1984 					  "egress");
1985 	if (rss->level > 1 && !tunnel)
1986 		return rte_flow_error_set(error, EINVAL,
1987 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1988 					  "inner RSS is not supported for "
1989 					  "non-tunnel flows");
1990 	if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1991 	    !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1992 		return rte_flow_error_set(error, EINVAL,
1993 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1994 					  "RSS on eCPRI is not supported now");
1995 	}
1996 	if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
1997 	    !(item_flags &
1998 	      (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
1999 	    rss->level > 1)
2000 		return rte_flow_error_set(error, EINVAL,
2001 					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
2002 					  "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
2003 	return 0;
2004 }
2005 
2006 /*
2007  * Validate the default miss action.
2008  *
2009  * @param[in] action_flags
2010  *   Bit-fields that holds the actions detected until now.
2011  * @param[out] error
2012  *   Pointer to error structure.
2013  *
2014  * @return
2015  *   0 on success, a negative errno value otherwise and rte_errno is set.
2016  */
2017 int
2018 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
2019 				const struct rte_flow_attr *attr,
2020 				struct rte_flow_error *error)
2021 {
2022 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2023 		return rte_flow_error_set(error, EINVAL,
2024 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2025 					  "can't have 2 fate actions in"
2026 					  " same flow");
2027 	if (attr->egress)
2028 		return rte_flow_error_set(error, ENOTSUP,
2029 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2030 					  "default miss action not supported "
2031 					  "for egress");
2032 	if (attr->group)
2033 		return rte_flow_error_set(error, ENOTSUP,
2034 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
2035 					  "only group 0 is supported");
2036 	if (attr->transfer)
2037 		return rte_flow_error_set(error, ENOTSUP,
2038 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2039 					  NULL, "transfer is not supported");
2040 	return 0;
2041 }
2042 
2043 /*
2044  * Validate the count action.
2045  *
2046  * @param[in] dev
2047  *   Pointer to the Ethernet device structure.
2048  * @param[in] attr
2049  *   Attributes of flow that includes this action.
2050  * @param[out] error
2051  *   Pointer to error structure.
2052  *
2053  * @return
2054  *   0 on success, a negative errno value otherwise and rte_errno is set.
2055  */
2056 int
2057 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
2058 				const struct rte_flow_attr *attr,
2059 				struct rte_flow_error *error)
2060 {
2061 	if (attr->egress)
2062 		return rte_flow_error_set(error, ENOTSUP,
2063 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2064 					  "count action not supported for "
2065 					  "egress");
2066 	return 0;
2067 }
2068 
2069 /*
2070  * Validate the ASO CT action.
2071  *
2072  * @param[in] dev
2073  *   Pointer to the Ethernet device structure.
2074  * @param[in] conntrack
2075  *   Pointer to the CT action profile.
2076  * @param[out] error
2077  *   Pointer to error structure.
2078  *
2079  * @return
2080  *   0 on success, a negative errno value otherwise and rte_errno is set.
2081  */
2082 int
2083 mlx5_validate_action_ct(struct rte_eth_dev *dev,
2084 			const struct rte_flow_action_conntrack *conntrack,
2085 			struct rte_flow_error *error)
2086 {
2087 	RTE_SET_USED(dev);
2088 
2089 	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
2090 		return rte_flow_error_set(error, EINVAL,
2091 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2092 					  "Invalid CT state");
2093 	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
2094 		return rte_flow_error_set(error, EINVAL,
2095 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2096 					  "Invalid last TCP packet flag");
2097 	return 0;
2098 }
2099 
2100 /**
2101  * Verify the @p attributes will be correctly understood by the NIC and store
2102  * them in the @p flow if everything is correct.
2103  *
2104  * @param[in] dev
2105  *   Pointer to the Ethernet device structure.
2106  * @param[in] attributes
2107  *   Pointer to flow attributes
2108  * @param[out] error
2109  *   Pointer to error structure.
2110  *
2111  * @return
2112  *   0 on success, a negative errno value otherwise and rte_errno is set.
2113  */
2114 int
2115 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
2116 			      const struct rte_flow_attr *attributes,
2117 			      struct rte_flow_error *error)
2118 {
2119 	struct mlx5_priv *priv = dev->data->dev_private;
2120 	uint32_t priority_max = priv->sh->flow_max_priority - 1;
2121 
2122 	if (attributes->group)
2123 		return rte_flow_error_set(error, ENOTSUP,
2124 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
2125 					  NULL, "groups is not supported");
2126 	if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
2127 	    attributes->priority >= priority_max)
2128 		return rte_flow_error_set(error, ENOTSUP,
2129 					  RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
2130 					  NULL, "priority out of range");
2131 	if (attributes->egress)
2132 		return rte_flow_error_set(error, ENOTSUP,
2133 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2134 					  "egress is not supported");
2135 	if (attributes->transfer && !priv->sh->config.dv_esw_en)
2136 		return rte_flow_error_set(error, ENOTSUP,
2137 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2138 					  NULL, "transfer is not supported");
2139 	if (!attributes->ingress)
2140 		return rte_flow_error_set(error, EINVAL,
2141 					  RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
2142 					  NULL,
2143 					  "ingress attribute is mandatory");
2144 	return 0;
2145 }
2146 
2147 /**
2148  * Validate ICMP6 item.
2149  *
2150  * @param[in] item
2151  *   Item specification.
2152  * @param[in] item_flags
2153  *   Bit-fields that holds the items detected until now.
2154  * @param[in] ext_vlan_sup
2155  *   Whether extended VLAN features are supported or not.
2156  * @param[out] error
2157  *   Pointer to error structure.
2158  *
2159  * @return
2160  *   0 on success, a negative errno value otherwise and rte_errno is set.
2161  */
2162 int
2163 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
2164 			       uint64_t item_flags,
2165 			       uint8_t target_protocol,
2166 			       struct rte_flow_error *error)
2167 {
2168 	const struct rte_flow_item_icmp6 *mask = item->mask;
2169 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2170 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2171 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2172 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2173 				      MLX5_FLOW_LAYER_OUTER_L4;
2174 	int ret;
2175 
2176 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2177 		return rte_flow_error_set(error, EINVAL,
2178 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2179 					  "protocol filtering not compatible"
2180 					  " with ICMP6 layer");
2181 	if (!(item_flags & l3m))
2182 		return rte_flow_error_set(error, EINVAL,
2183 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2184 					  "IPv6 is mandatory to filter on"
2185 					  " ICMP6");
2186 	if (item_flags & l4m)
2187 		return rte_flow_error_set(error, EINVAL,
2188 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2189 					  "multiple L4 layers not supported");
2190 	if (!mask)
2191 		mask = &rte_flow_item_icmp6_mask;
2192 	ret = mlx5_flow_item_acceptable
2193 		(item, (const uint8_t *)mask,
2194 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
2195 		 sizeof(struct rte_flow_item_icmp6),
2196 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2197 	if (ret < 0)
2198 		return ret;
2199 	return 0;
2200 }
2201 
2202 /**
2203  * Validate ICMP item.
2204  *
2205  * @param[in] item
2206  *   Item specification.
2207  * @param[in] item_flags
2208  *   Bit-fields that holds the items detected until now.
2209  * @param[out] error
2210  *   Pointer to error structure.
2211  *
2212  * @return
2213  *   0 on success, a negative errno value otherwise and rte_errno is set.
2214  */
2215 int
2216 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
2217 			     uint64_t item_flags,
2218 			     uint8_t target_protocol,
2219 			     struct rte_flow_error *error)
2220 {
2221 	const struct rte_flow_item_icmp *mask = item->mask;
2222 	const struct rte_flow_item_icmp nic_mask = {
2223 		.hdr.icmp_type = 0xff,
2224 		.hdr.icmp_code = 0xff,
2225 		.hdr.icmp_ident = RTE_BE16(0xffff),
2226 		.hdr.icmp_seq_nb = RTE_BE16(0xffff),
2227 	};
2228 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2229 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2230 				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2231 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2232 				      MLX5_FLOW_LAYER_OUTER_L4;
2233 	int ret;
2234 
2235 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2236 		return rte_flow_error_set(error, EINVAL,
2237 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2238 					  "protocol filtering not compatible"
2239 					  " with ICMP layer");
2240 	if (!(item_flags & l3m))
2241 		return rte_flow_error_set(error, EINVAL,
2242 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2243 					  "IPv4 is mandatory to filter"
2244 					  " on ICMP");
2245 	if (item_flags & l4m)
2246 		return rte_flow_error_set(error, EINVAL,
2247 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2248 					  "multiple L4 layers not supported");
2249 	if (!mask)
2250 		mask = &nic_mask;
2251 	ret = mlx5_flow_item_acceptable
2252 		(item, (const uint8_t *)mask,
2253 		 (const uint8_t *)&nic_mask,
2254 		 sizeof(struct rte_flow_item_icmp),
2255 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2256 	if (ret < 0)
2257 		return ret;
2258 	return 0;
2259 }
2260 
2261 /**
2262  * Validate Ethernet item.
2263  *
2264  * @param[in] item
2265  *   Item specification.
2266  * @param[in] item_flags
2267  *   Bit-fields that holds the items detected until now.
2268  * @param[out] error
2269  *   Pointer to error structure.
2270  *
2271  * @return
2272  *   0 on success, a negative errno value otherwise and rte_errno is set.
2273  */
2274 int
2275 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2276 			    uint64_t item_flags, bool ext_vlan_sup,
2277 			    struct rte_flow_error *error)
2278 {
2279 	const struct rte_flow_item_eth *mask = item->mask;
2280 	const struct rte_flow_item_eth nic_mask = {
2281 		.dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2282 		.src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2283 		.type = RTE_BE16(0xffff),
2284 		.has_vlan = ext_vlan_sup ? 1 : 0,
2285 	};
2286 	int ret;
2287 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2288 	const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2	:
2289 				       MLX5_FLOW_LAYER_OUTER_L2;
2290 
2291 	if (item_flags & ethm)
2292 		return rte_flow_error_set(error, ENOTSUP,
2293 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2294 					  "multiple L2 layers not supported");
2295 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2296 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2297 		return rte_flow_error_set(error, EINVAL,
2298 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2299 					  "L2 layer should not follow "
2300 					  "L3 layers");
2301 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2302 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2303 		return rte_flow_error_set(error, EINVAL,
2304 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2305 					  "L2 layer should not follow VLAN");
2306 	if (item_flags & MLX5_FLOW_LAYER_GTP)
2307 		return rte_flow_error_set(error, EINVAL,
2308 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2309 					  "L2 layer should not follow GTP");
2310 	if (!mask)
2311 		mask = &rte_flow_item_eth_mask;
2312 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2313 					(const uint8_t *)&nic_mask,
2314 					sizeof(struct rte_flow_item_eth),
2315 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2316 	return ret;
2317 }
2318 
2319 /**
2320  * Validate VLAN item.
2321  *
2322  * @param[in] item
2323  *   Item specification.
2324  * @param[in] item_flags
2325  *   Bit-fields that holds the items detected until now.
2326  * @param[in] dev
2327  *   Ethernet device flow is being created on.
2328  * @param[out] error
2329  *   Pointer to error structure.
2330  *
2331  * @return
2332  *   0 on success, a negative errno value otherwise and rte_errno is set.
2333  */
2334 int
2335 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2336 			     uint64_t item_flags,
2337 			     struct rte_eth_dev *dev,
2338 			     struct rte_flow_error *error)
2339 {
2340 	const struct rte_flow_item_vlan *spec = item->spec;
2341 	const struct rte_flow_item_vlan *mask = item->mask;
2342 	const struct rte_flow_item_vlan nic_mask = {
2343 		.tci = RTE_BE16(UINT16_MAX),
2344 		.inner_type = RTE_BE16(UINT16_MAX),
2345 	};
2346 	uint16_t vlan_tag = 0;
2347 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2348 	int ret;
2349 	const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2350 					MLX5_FLOW_LAYER_INNER_L4) :
2351 				       (MLX5_FLOW_LAYER_OUTER_L3 |
2352 					MLX5_FLOW_LAYER_OUTER_L4);
2353 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2354 					MLX5_FLOW_LAYER_OUTER_VLAN;
2355 
2356 	if (item_flags & vlanm)
2357 		return rte_flow_error_set(error, EINVAL,
2358 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2359 					  "multiple VLAN layers not supported");
2360 	else if ((item_flags & l34m) != 0)
2361 		return rte_flow_error_set(error, EINVAL,
2362 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2363 					  "VLAN cannot follow L3/L4 layer");
2364 	if (!mask)
2365 		mask = &rte_flow_item_vlan_mask;
2366 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2367 					(const uint8_t *)&nic_mask,
2368 					sizeof(struct rte_flow_item_vlan),
2369 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2370 	if (ret)
2371 		return ret;
2372 	if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2373 		struct mlx5_priv *priv = dev->data->dev_private;
2374 
2375 		if (priv->vmwa_context) {
2376 			/*
2377 			 * Non-NULL context means we have a virtual machine
2378 			 * and SR-IOV enabled, we have to create VLAN interface
2379 			 * to make hypervisor to setup E-Switch vport
2380 			 * context correctly. We avoid creating the multiple
2381 			 * VLAN interfaces, so we cannot support VLAN tag mask.
2382 			 */
2383 			return rte_flow_error_set(error, EINVAL,
2384 						  RTE_FLOW_ERROR_TYPE_ITEM,
2385 						  item,
2386 						  "VLAN tag mask is not"
2387 						  " supported in virtual"
2388 						  " environment");
2389 		}
2390 	}
2391 	if (spec) {
2392 		vlan_tag = spec->tci;
2393 		vlan_tag &= mask->tci;
2394 	}
2395 	/*
2396 	 * From verbs perspective an empty VLAN is equivalent
2397 	 * to a packet without VLAN layer.
2398 	 */
2399 	if (!vlan_tag)
2400 		return rte_flow_error_set(error, EINVAL,
2401 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2402 					  item->spec,
2403 					  "VLAN cannot be empty");
2404 	return 0;
2405 }
2406 
2407 /**
2408  * Validate IPV4 item.
2409  *
2410  * @param[in] item
2411  *   Item specification.
2412  * @param[in] item_flags
2413  *   Bit-fields that holds the items detected until now.
2414  * @param[in] last_item
2415  *   Previous validated item in the pattern items.
2416  * @param[in] ether_type
2417  *   Type in the ethernet layer header (including dot1q).
2418  * @param[in] acc_mask
2419  *   Acceptable mask, if NULL default internal default mask
2420  *   will be used to check whether item fields are supported.
2421  * @param[in] range_accepted
2422  *   True if range of values is accepted for specific fields, false otherwise.
2423  * @param[out] error
2424  *   Pointer to error structure.
2425  *
2426  * @return
2427  *   0 on success, a negative errno value otherwise and rte_errno is set.
2428  */
2429 int
2430 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2431 			     uint64_t item_flags,
2432 			     uint64_t last_item,
2433 			     uint16_t ether_type,
2434 			     const struct rte_flow_item_ipv4 *acc_mask,
2435 			     bool range_accepted,
2436 			     struct rte_flow_error *error)
2437 {
2438 	const struct rte_flow_item_ipv4 *mask = item->mask;
2439 	const struct rte_flow_item_ipv4 *spec = item->spec;
2440 	const struct rte_flow_item_ipv4 nic_mask = {
2441 		.hdr = {
2442 			.src_addr = RTE_BE32(0xffffffff),
2443 			.dst_addr = RTE_BE32(0xffffffff),
2444 			.type_of_service = 0xff,
2445 			.next_proto_id = 0xff,
2446 		},
2447 	};
2448 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2449 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2450 				      MLX5_FLOW_LAYER_OUTER_L3;
2451 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2452 				      MLX5_FLOW_LAYER_OUTER_L4;
2453 	int ret;
2454 	uint8_t next_proto = 0xFF;
2455 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2456 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2457 				  MLX5_FLOW_LAYER_INNER_VLAN);
2458 
2459 	if ((last_item & l2_vlan) && ether_type &&
2460 	    ether_type != RTE_ETHER_TYPE_IPV4)
2461 		return rte_flow_error_set(error, EINVAL,
2462 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2463 					  "IPv4 cannot follow L2/VLAN layer "
2464 					  "which ether type is not IPv4");
2465 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2466 		if (mask && spec)
2467 			next_proto = mask->hdr.next_proto_id &
2468 				     spec->hdr.next_proto_id;
2469 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2470 			return rte_flow_error_set(error, EINVAL,
2471 						  RTE_FLOW_ERROR_TYPE_ITEM,
2472 						  item,
2473 						  "multiple tunnel "
2474 						  "not supported");
2475 	}
2476 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2477 		return rte_flow_error_set(error, EINVAL,
2478 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2479 					  "wrong tunnel type - IPv6 specified "
2480 					  "but IPv4 item provided");
2481 	if (item_flags & l3m)
2482 		return rte_flow_error_set(error, ENOTSUP,
2483 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2484 					  "multiple L3 layers not supported");
2485 	else if (item_flags & l4m)
2486 		return rte_flow_error_set(error, EINVAL,
2487 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2488 					  "L3 cannot follow an L4 layer.");
2489 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2490 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2491 		return rte_flow_error_set(error, EINVAL,
2492 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2493 					  "L3 cannot follow an NVGRE layer.");
2494 	if (!mask)
2495 		mask = &rte_flow_item_ipv4_mask;
2496 	else if (mask->hdr.next_proto_id != 0 &&
2497 		 mask->hdr.next_proto_id != 0xff)
2498 		return rte_flow_error_set(error, EINVAL,
2499 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2500 					  "partial mask is not supported"
2501 					  " for protocol");
2502 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2503 					acc_mask ? (const uint8_t *)acc_mask
2504 						 : (const uint8_t *)&nic_mask,
2505 					sizeof(struct rte_flow_item_ipv4),
2506 					range_accepted, error);
2507 	if (ret < 0)
2508 		return ret;
2509 	return 0;
2510 }
2511 
2512 /**
2513  * Validate IPV6 item.
2514  *
2515  * @param[in] item
2516  *   Item specification.
2517  * @param[in] item_flags
2518  *   Bit-fields that holds the items detected until now.
2519  * @param[in] last_item
2520  *   Previous validated item in the pattern items.
2521  * @param[in] ether_type
2522  *   Type in the ethernet layer header (including dot1q).
2523  * @param[in] acc_mask
2524  *   Acceptable mask, if NULL default internal default mask
2525  *   will be used to check whether item fields are supported.
2526  * @param[out] error
2527  *   Pointer to error structure.
2528  *
2529  * @return
2530  *   0 on success, a negative errno value otherwise and rte_errno is set.
2531  */
2532 int
2533 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2534 			     uint64_t item_flags,
2535 			     uint64_t last_item,
2536 			     uint16_t ether_type,
2537 			     const struct rte_flow_item_ipv6 *acc_mask,
2538 			     struct rte_flow_error *error)
2539 {
2540 	const struct rte_flow_item_ipv6 *mask = item->mask;
2541 	const struct rte_flow_item_ipv6 *spec = item->spec;
2542 	const struct rte_flow_item_ipv6 nic_mask = {
2543 		.hdr = {
2544 			.src_addr =
2545 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2546 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2547 			.dst_addr =
2548 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2549 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2550 			.vtc_flow = RTE_BE32(0xffffffff),
2551 			.proto = 0xff,
2552 		},
2553 	};
2554 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2555 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2556 				      MLX5_FLOW_LAYER_OUTER_L3;
2557 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2558 				      MLX5_FLOW_LAYER_OUTER_L4;
2559 	int ret;
2560 	uint8_t next_proto = 0xFF;
2561 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2562 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2563 				  MLX5_FLOW_LAYER_INNER_VLAN);
2564 
2565 	if ((last_item & l2_vlan) && ether_type &&
2566 	    ether_type != RTE_ETHER_TYPE_IPV6)
2567 		return rte_flow_error_set(error, EINVAL,
2568 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2569 					  "IPv6 cannot follow L2/VLAN layer "
2570 					  "which ether type is not IPv6");
2571 	if (mask && mask->hdr.proto == UINT8_MAX && spec)
2572 		next_proto = spec->hdr.proto;
2573 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2574 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2575 			return rte_flow_error_set(error, EINVAL,
2576 						  RTE_FLOW_ERROR_TYPE_ITEM,
2577 						  item,
2578 						  "multiple tunnel "
2579 						  "not supported");
2580 	}
2581 	if (next_proto == IPPROTO_HOPOPTS  ||
2582 	    next_proto == IPPROTO_ROUTING  ||
2583 	    next_proto == IPPROTO_FRAGMENT ||
2584 	    next_proto == IPPROTO_ESP	   ||
2585 	    next_proto == IPPROTO_AH	   ||
2586 	    next_proto == IPPROTO_DSTOPTS)
2587 		return rte_flow_error_set(error, EINVAL,
2588 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2589 					  "IPv6 proto (next header) should "
2590 					  "not be set as extension header");
2591 	if (item_flags & MLX5_FLOW_LAYER_IPIP)
2592 		return rte_flow_error_set(error, EINVAL,
2593 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2594 					  "wrong tunnel type - IPv4 specified "
2595 					  "but IPv6 item provided");
2596 	if (item_flags & l3m)
2597 		return rte_flow_error_set(error, ENOTSUP,
2598 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2599 					  "multiple L3 layers not supported");
2600 	else if (item_flags & l4m)
2601 		return rte_flow_error_set(error, EINVAL,
2602 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2603 					  "L3 cannot follow an L4 layer.");
2604 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2605 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2606 		return rte_flow_error_set(error, EINVAL,
2607 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2608 					  "L3 cannot follow an NVGRE layer.");
2609 	if (!mask)
2610 		mask = &rte_flow_item_ipv6_mask;
2611 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2612 					acc_mask ? (const uint8_t *)acc_mask
2613 						 : (const uint8_t *)&nic_mask,
2614 					sizeof(struct rte_flow_item_ipv6),
2615 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2616 	if (ret < 0)
2617 		return ret;
2618 	return 0;
2619 }
2620 
2621 /**
2622  * Validate UDP item.
2623  *
2624  * @param[in] item
2625  *   Item specification.
2626  * @param[in] item_flags
2627  *   Bit-fields that holds the items detected until now.
2628  * @param[in] target_protocol
2629  *   The next protocol in the previous item.
2630  * @param[in] flow_mask
2631  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2632  * @param[out] error
2633  *   Pointer to error structure.
2634  *
2635  * @return
2636  *   0 on success, a negative errno value otherwise and rte_errno is set.
2637  */
2638 int
2639 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2640 			    uint64_t item_flags,
2641 			    uint8_t target_protocol,
2642 			    struct rte_flow_error *error)
2643 {
2644 	const struct rte_flow_item_udp *mask = item->mask;
2645 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2646 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2647 				      MLX5_FLOW_LAYER_OUTER_L3;
2648 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2649 				      MLX5_FLOW_LAYER_OUTER_L4;
2650 	int ret;
2651 
2652 	if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2653 		return rte_flow_error_set(error, EINVAL,
2654 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2655 					  "protocol filtering not compatible"
2656 					  " with UDP layer");
2657 	if (!(item_flags & l3m))
2658 		return rte_flow_error_set(error, EINVAL,
2659 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2660 					  "L3 is mandatory to filter on L4");
2661 	if (item_flags & l4m)
2662 		return rte_flow_error_set(error, EINVAL,
2663 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2664 					  "multiple L4 layers not supported");
2665 	if (!mask)
2666 		mask = &rte_flow_item_udp_mask;
2667 	ret = mlx5_flow_item_acceptable
2668 		(item, (const uint8_t *)mask,
2669 		 (const uint8_t *)&rte_flow_item_udp_mask,
2670 		 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2671 		 error);
2672 	if (ret < 0)
2673 		return ret;
2674 	return 0;
2675 }
2676 
2677 /**
2678  * Validate TCP item.
2679  *
2680  * @param[in] item
2681  *   Item specification.
2682  * @param[in] item_flags
2683  *   Bit-fields that holds the items detected until now.
2684  * @param[in] target_protocol
2685  *   The next protocol in the previous item.
2686  * @param[out] error
2687  *   Pointer to error structure.
2688  *
2689  * @return
2690  *   0 on success, a negative errno value otherwise and rte_errno is set.
2691  */
2692 int
2693 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2694 			    uint64_t item_flags,
2695 			    uint8_t target_protocol,
2696 			    const struct rte_flow_item_tcp *flow_mask,
2697 			    struct rte_flow_error *error)
2698 {
2699 	const struct rte_flow_item_tcp *mask = item->mask;
2700 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2701 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2702 				      MLX5_FLOW_LAYER_OUTER_L3;
2703 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2704 				      MLX5_FLOW_LAYER_OUTER_L4;
2705 	int ret;
2706 
2707 	MLX5_ASSERT(flow_mask);
2708 	if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2709 		return rte_flow_error_set(error, EINVAL,
2710 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2711 					  "protocol filtering not compatible"
2712 					  " with TCP layer");
2713 	if (!(item_flags & l3m))
2714 		return rte_flow_error_set(error, EINVAL,
2715 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2716 					  "L3 is mandatory to filter on L4");
2717 	if (item_flags & l4m)
2718 		return rte_flow_error_set(error, EINVAL,
2719 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2720 					  "multiple L4 layers not supported");
2721 	if (!mask)
2722 		mask = &rte_flow_item_tcp_mask;
2723 	ret = mlx5_flow_item_acceptable
2724 		(item, (const uint8_t *)mask,
2725 		 (const uint8_t *)flow_mask,
2726 		 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2727 		 error);
2728 	if (ret < 0)
2729 		return ret;
2730 	return 0;
2731 }
2732 
2733 /**
2734  * Validate VXLAN item.
2735  *
2736  * @param[in] dev
2737  *   Pointer to the Ethernet device structure.
2738  * @param[in] udp_dport
2739  *   UDP destination port
2740  * @param[in] item
2741  *   Item specification.
2742  * @param[in] item_flags
2743  *   Bit-fields that holds the items detected until now.
2744  * @param[in] attr
2745  *   Flow rule attributes.
2746  * @param[out] error
2747  *   Pointer to error structure.
2748  *
2749  * @return
2750  *   0 on success, a negative errno value otherwise and rte_errno is set.
2751  */
2752 int
2753 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2754 			      uint16_t udp_dport,
2755 			      const struct rte_flow_item *item,
2756 			      uint64_t item_flags,
2757 			      const struct rte_flow_attr *attr,
2758 			      struct rte_flow_error *error)
2759 {
2760 	const struct rte_flow_item_vxlan *spec = item->spec;
2761 	const struct rte_flow_item_vxlan *mask = item->mask;
2762 	int ret;
2763 	struct mlx5_priv *priv = dev->data->dev_private;
2764 	union vni {
2765 		uint32_t vlan_id;
2766 		uint8_t vni[4];
2767 	} id = { .vlan_id = 0, };
2768 	const struct rte_flow_item_vxlan nic_mask = {
2769 		.vni = "\xff\xff\xff",
2770 		.rsvd1 = 0xff,
2771 	};
2772 	const struct rte_flow_item_vxlan *valid_mask;
2773 
2774 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2775 		return rte_flow_error_set(error, ENOTSUP,
2776 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2777 					  "multiple tunnel layers not"
2778 					  " supported");
2779 	valid_mask = &rte_flow_item_vxlan_mask;
2780 	/*
2781 	 * Verify only UDPv4 is present as defined in
2782 	 * https://tools.ietf.org/html/rfc7348
2783 	 */
2784 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2785 		return rte_flow_error_set(error, EINVAL,
2786 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2787 					  "no outer UDP layer found");
2788 	if (!mask)
2789 		mask = &rte_flow_item_vxlan_mask;
2790 
2791 	if (priv->sh->steering_format_version !=
2792 	    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2793 	    !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2794 		/* FDB domain & NIC domain non-zero group */
2795 		if ((attr->transfer || attr->group) && priv->sh->misc5_cap)
2796 			valid_mask = &nic_mask;
2797 		/* Group zero in NIC domain */
2798 		if (!attr->group && !attr->transfer &&
2799 		    priv->sh->tunnel_header_0_1)
2800 			valid_mask = &nic_mask;
2801 	}
2802 	ret = mlx5_flow_item_acceptable
2803 		(item, (const uint8_t *)mask,
2804 		 (const uint8_t *)valid_mask,
2805 		 sizeof(struct rte_flow_item_vxlan),
2806 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2807 	if (ret < 0)
2808 		return ret;
2809 	if (spec) {
2810 		memcpy(&id.vni[1], spec->vni, 3);
2811 		memcpy(&id.vni[1], mask->vni, 3);
2812 	}
2813 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2814 		return rte_flow_error_set(error, ENOTSUP,
2815 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2816 					  "VXLAN tunnel must be fully defined");
2817 	return 0;
2818 }
2819 
2820 /**
2821  * Validate VXLAN_GPE item.
2822  *
2823  * @param[in] item
2824  *   Item specification.
2825  * @param[in] item_flags
2826  *   Bit-fields that holds the items detected until now.
2827  * @param[in] priv
2828  *   Pointer to the private data structure.
2829  * @param[in] target_protocol
2830  *   The next protocol in the previous item.
2831  * @param[out] error
2832  *   Pointer to error structure.
2833  *
2834  * @return
2835  *   0 on success, a negative errno value otherwise and rte_errno is set.
2836  */
2837 int
2838 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2839 				  uint64_t item_flags,
2840 				  struct rte_eth_dev *dev,
2841 				  struct rte_flow_error *error)
2842 {
2843 	struct mlx5_priv *priv = dev->data->dev_private;
2844 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2845 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2846 	int ret;
2847 	union vni {
2848 		uint32_t vlan_id;
2849 		uint8_t vni[4];
2850 	} id = { .vlan_id = 0, };
2851 
2852 	if (!priv->sh->config.l3_vxlan_en)
2853 		return rte_flow_error_set(error, ENOTSUP,
2854 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2855 					  "L3 VXLAN is not enabled by device"
2856 					  " parameter and/or not configured in"
2857 					  " firmware");
2858 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2859 		return rte_flow_error_set(error, ENOTSUP,
2860 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2861 					  "multiple tunnel layers not"
2862 					  " supported");
2863 	/*
2864 	 * Verify only UDPv4 is present as defined in
2865 	 * https://tools.ietf.org/html/rfc7348
2866 	 */
2867 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2868 		return rte_flow_error_set(error, EINVAL,
2869 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2870 					  "no outer UDP layer found");
2871 	if (!mask)
2872 		mask = &rte_flow_item_vxlan_gpe_mask;
2873 	ret = mlx5_flow_item_acceptable
2874 		(item, (const uint8_t *)mask,
2875 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2876 		 sizeof(struct rte_flow_item_vxlan_gpe),
2877 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2878 	if (ret < 0)
2879 		return ret;
2880 	if (spec) {
2881 		if (spec->protocol)
2882 			return rte_flow_error_set(error, ENOTSUP,
2883 						  RTE_FLOW_ERROR_TYPE_ITEM,
2884 						  item,
2885 						  "VxLAN-GPE protocol"
2886 						  " not supported");
2887 		memcpy(&id.vni[1], spec->vni, 3);
2888 		memcpy(&id.vni[1], mask->vni, 3);
2889 	}
2890 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2891 		return rte_flow_error_set(error, ENOTSUP,
2892 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2893 					  "VXLAN-GPE tunnel must be fully"
2894 					  " defined");
2895 	return 0;
2896 }
2897 /**
2898  * Validate GRE Key item.
2899  *
2900  * @param[in] item
2901  *   Item specification.
2902  * @param[in] item_flags
2903  *   Bit flags to mark detected items.
2904  * @param[in] gre_item
2905  *   Pointer to gre_item
2906  * @param[out] error
2907  *   Pointer to error structure.
2908  *
2909  * @return
2910  *   0 on success, a negative errno value otherwise and rte_errno is set.
2911  */
2912 int
2913 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2914 				uint64_t item_flags,
2915 				const struct rte_flow_item *gre_item,
2916 				struct rte_flow_error *error)
2917 {
2918 	const rte_be32_t *mask = item->mask;
2919 	int ret = 0;
2920 	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2921 	const struct rte_flow_item_gre *gre_spec;
2922 	const struct rte_flow_item_gre *gre_mask;
2923 
2924 	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2925 		return rte_flow_error_set(error, ENOTSUP,
2926 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2927 					  "Multiple GRE key not support");
2928 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2929 		return rte_flow_error_set(error, ENOTSUP,
2930 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2931 					  "No preceding GRE header");
2932 	if (item_flags & MLX5_FLOW_LAYER_INNER)
2933 		return rte_flow_error_set(error, ENOTSUP,
2934 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2935 					  "GRE key following a wrong item");
2936 	gre_mask = gre_item->mask;
2937 	if (!gre_mask)
2938 		gre_mask = &rte_flow_item_gre_mask;
2939 	gre_spec = gre_item->spec;
2940 	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2941 			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2942 		return rte_flow_error_set(error, EINVAL,
2943 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2944 					  "Key bit must be on");
2945 
2946 	if (!mask)
2947 		mask = &gre_key_default_mask;
2948 	ret = mlx5_flow_item_acceptable
2949 		(item, (const uint8_t *)mask,
2950 		 (const uint8_t *)&gre_key_default_mask,
2951 		 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2952 	return ret;
2953 }
2954 
2955 /**
2956  * Validate GRE optional item.
2957  *
2958  * @param[in] dev
2959  *   Pointer to the Ethernet device structure.
2960  * @param[in] item
2961  *   Item specification.
2962  * @param[in] item_flags
2963  *   Bit flags to mark detected items.
2964  * @param[in] attr
2965  *   Flow rule attributes.
2966  * @param[in] gre_item
2967  *   Pointer to gre_item
2968  * @param[out] error
2969  *   Pointer to error structure.
2970  *
2971  * @return
2972  *   0 on success, a negative errno value otherwise and rte_errno is set.
2973  */
2974 int
2975 mlx5_flow_validate_item_gre_option(struct rte_eth_dev *dev,
2976 				   const struct rte_flow_item *item,
2977 				   uint64_t item_flags,
2978 				   const struct rte_flow_attr *attr,
2979 				   const struct rte_flow_item *gre_item,
2980 				   struct rte_flow_error *error)
2981 {
2982 	const struct rte_flow_item_gre *gre_spec = gre_item->spec;
2983 	const struct rte_flow_item_gre *gre_mask = gre_item->mask;
2984 	const struct rte_flow_item_gre_opt *spec = item->spec;
2985 	const struct rte_flow_item_gre_opt *mask = item->mask;
2986 	struct mlx5_priv *priv = dev->data->dev_private;
2987 	int ret = 0;
2988 	struct rte_flow_item_gre_opt nic_mask = {
2989 		.checksum_rsvd = {
2990 			.checksum = RTE_BE16(UINT16_MAX),
2991 			.reserved1 = 0x0,
2992 		},
2993 		.key = {
2994 			.key = RTE_BE32(UINT32_MAX),
2995 		},
2996 		.sequence = {
2997 			.sequence = RTE_BE32(UINT32_MAX),
2998 		},
2999 	};
3000 
3001 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3002 		return rte_flow_error_set(error, ENOTSUP,
3003 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3004 					  "No preceding GRE header");
3005 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3006 		return rte_flow_error_set(error, ENOTSUP,
3007 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3008 					  "GRE option following a wrong item");
3009 	if (!spec || !mask)
3010 		return rte_flow_error_set(error, EINVAL,
3011 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3012 					  "At least one field gre_option(checksum/key/sequence) must be specified");
3013 	if (!gre_mask)
3014 		gre_mask = &rte_flow_item_gre_mask;
3015 	if (mask->checksum_rsvd.checksum)
3016 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x8000)) &&
3017 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x8000)))
3018 			return rte_flow_error_set(error, EINVAL,
3019 						  RTE_FLOW_ERROR_TYPE_ITEM,
3020 						  item,
3021 						  "Checksum bit must be on");
3022 	if (mask->key.key)
3023 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3024 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3025 			return rte_flow_error_set(error, EINVAL,
3026 						  RTE_FLOW_ERROR_TYPE_ITEM,
3027 						  item, "Key bit must be on");
3028 	if (mask->sequence.sequence)
3029 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x1000)) &&
3030 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x1000)))
3031 			return rte_flow_error_set(error, EINVAL,
3032 						  RTE_FLOW_ERROR_TYPE_ITEM,
3033 						  item,
3034 						  "Sequence bit must be on");
3035 	if (mask->checksum_rsvd.checksum || mask->sequence.sequence) {
3036 		if (priv->sh->steering_format_version ==
3037 		    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3038 		    ((attr->group || attr->transfer) &&
3039 		     !priv->sh->misc5_cap) ||
3040 		    (!(priv->sh->tunnel_header_0_1 &&
3041 		       priv->sh->tunnel_header_2_3) &&
3042 		    !attr->group && !attr->transfer))
3043 			return rte_flow_error_set(error, EINVAL,
3044 						  RTE_FLOW_ERROR_TYPE_ITEM,
3045 						  item,
3046 						  "Checksum/Sequence not supported");
3047 	}
3048 	ret = mlx5_flow_item_acceptable
3049 		(item, (const uint8_t *)mask,
3050 		 (const uint8_t *)&nic_mask,
3051 		 sizeof(struct rte_flow_item_gre_opt),
3052 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3053 	return ret;
3054 }
3055 
3056 /**
3057  * Validate GRE item.
3058  *
3059  * @param[in] item
3060  *   Item specification.
3061  * @param[in] item_flags
3062  *   Bit flags to mark detected items.
3063  * @param[in] target_protocol
3064  *   The next protocol in the previous item.
3065  * @param[out] error
3066  *   Pointer to error structure.
3067  *
3068  * @return
3069  *   0 on success, a negative errno value otherwise and rte_errno is set.
3070  */
3071 int
3072 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3073 			    uint64_t item_flags,
3074 			    uint8_t target_protocol,
3075 			    struct rte_flow_error *error)
3076 {
3077 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3078 	const struct rte_flow_item_gre *mask = item->mask;
3079 	int ret;
3080 	const struct rte_flow_item_gre nic_mask = {
3081 		.c_rsvd0_ver = RTE_BE16(0xB000),
3082 		.protocol = RTE_BE16(UINT16_MAX),
3083 	};
3084 
3085 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3086 		return rte_flow_error_set(error, EINVAL,
3087 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3088 					  "protocol filtering not compatible"
3089 					  " with this GRE layer");
3090 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3091 		return rte_flow_error_set(error, ENOTSUP,
3092 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3093 					  "multiple tunnel layers not"
3094 					  " supported");
3095 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3096 		return rte_flow_error_set(error, ENOTSUP,
3097 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3098 					  "L3 Layer is missing");
3099 	if (!mask)
3100 		mask = &rte_flow_item_gre_mask;
3101 	ret = mlx5_flow_item_acceptable
3102 		(item, (const uint8_t *)mask,
3103 		 (const uint8_t *)&nic_mask,
3104 		 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3105 		 error);
3106 	if (ret < 0)
3107 		return ret;
3108 #ifndef HAVE_MLX5DV_DR
3109 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3110 	if (spec && (spec->protocol & mask->protocol))
3111 		return rte_flow_error_set(error, ENOTSUP,
3112 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3113 					  "without MPLS support the"
3114 					  " specification cannot be used for"
3115 					  " filtering");
3116 #endif
3117 #endif
3118 	return 0;
3119 }
3120 
3121 /**
3122  * Validate Geneve item.
3123  *
3124  * @param[in] item
3125  *   Item specification.
3126  * @param[in] itemFlags
3127  *   Bit-fields that holds the items detected until now.
3128  * @param[in] enPriv
3129  *   Pointer to the private data structure.
3130  * @param[out] error
3131  *   Pointer to error structure.
3132  *
3133  * @return
3134  *   0 on success, a negative errno value otherwise and rte_errno is set.
3135  */
3136 
3137 int
3138 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
3139 			       uint64_t item_flags,
3140 			       struct rte_eth_dev *dev,
3141 			       struct rte_flow_error *error)
3142 {
3143 	struct mlx5_priv *priv = dev->data->dev_private;
3144 	const struct rte_flow_item_geneve *spec = item->spec;
3145 	const struct rte_flow_item_geneve *mask = item->mask;
3146 	int ret;
3147 	uint16_t gbhdr;
3148 	uint8_t opt_len = priv->sh->cdev->config.hca_attr.geneve_max_opt_len ?
3149 			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
3150 	const struct rte_flow_item_geneve nic_mask = {
3151 		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
3152 		.vni = "\xff\xff\xff",
3153 		.protocol = RTE_BE16(UINT16_MAX),
3154 	};
3155 
3156 	if (!priv->sh->cdev->config.hca_attr.tunnel_stateless_geneve_rx)
3157 		return rte_flow_error_set(error, ENOTSUP,
3158 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3159 					  "L3 Geneve is not enabled by device"
3160 					  " parameter and/or not configured in"
3161 					  " firmware");
3162 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3163 		return rte_flow_error_set(error, ENOTSUP,
3164 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3165 					  "multiple tunnel layers not"
3166 					  " supported");
3167 	/*
3168 	 * Verify only UDPv4 is present as defined in
3169 	 * https://tools.ietf.org/html/rfc7348
3170 	 */
3171 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3172 		return rte_flow_error_set(error, EINVAL,
3173 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3174 					  "no outer UDP layer found");
3175 	if (!mask)
3176 		mask = &rte_flow_item_geneve_mask;
3177 	ret = mlx5_flow_item_acceptable
3178 				  (item, (const uint8_t *)mask,
3179 				   (const uint8_t *)&nic_mask,
3180 				   sizeof(struct rte_flow_item_geneve),
3181 				   MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3182 	if (ret)
3183 		return ret;
3184 	if (spec) {
3185 		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
3186 		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
3187 		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
3188 		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
3189 			return rte_flow_error_set(error, ENOTSUP,
3190 						  RTE_FLOW_ERROR_TYPE_ITEM,
3191 						  item,
3192 						  "Geneve protocol unsupported"
3193 						  " fields are being used");
3194 		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
3195 			return rte_flow_error_set
3196 					(error, ENOTSUP,
3197 					 RTE_FLOW_ERROR_TYPE_ITEM,
3198 					 item,
3199 					 "Unsupported Geneve options length");
3200 	}
3201 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3202 		return rte_flow_error_set
3203 				    (error, ENOTSUP,
3204 				     RTE_FLOW_ERROR_TYPE_ITEM, item,
3205 				     "Geneve tunnel must be fully defined");
3206 	return 0;
3207 }
3208 
3209 /**
3210  * Validate Geneve TLV option item.
3211  *
3212  * @param[in] item
3213  *   Item specification.
3214  * @param[in] last_item
3215  *   Previous validated item in the pattern items.
3216  * @param[in] geneve_item
3217  *   Previous GENEVE item specification.
3218  * @param[in] dev
3219  *   Pointer to the rte_eth_dev structure.
3220  * @param[out] error
3221  *   Pointer to error structure.
3222  *
3223  * @return
3224  *   0 on success, a negative errno value otherwise and rte_errno is set.
3225  */
3226 int
3227 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
3228 				   uint64_t last_item,
3229 				   const struct rte_flow_item *geneve_item,
3230 				   struct rte_eth_dev *dev,
3231 				   struct rte_flow_error *error)
3232 {
3233 	struct mlx5_priv *priv = dev->data->dev_private;
3234 	struct mlx5_dev_ctx_shared *sh = priv->sh;
3235 	struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
3236 	struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
3237 	uint8_t data_max_supported =
3238 			hca_attr->max_geneve_tlv_option_data_len * 4;
3239 	const struct rte_flow_item_geneve *geneve_spec;
3240 	const struct rte_flow_item_geneve *geneve_mask;
3241 	const struct rte_flow_item_geneve_opt *spec = item->spec;
3242 	const struct rte_flow_item_geneve_opt *mask = item->mask;
3243 	unsigned int i;
3244 	unsigned int data_len;
3245 	uint8_t tlv_option_len;
3246 	uint16_t optlen_m, optlen_v;
3247 	const struct rte_flow_item_geneve_opt full_mask = {
3248 		.option_class = RTE_BE16(0xffff),
3249 		.option_type = 0xff,
3250 		.option_len = 0x1f,
3251 	};
3252 
3253 	if (!mask)
3254 		mask = &rte_flow_item_geneve_opt_mask;
3255 	if (!spec)
3256 		return rte_flow_error_set
3257 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3258 			"Geneve TLV opt class/type/length must be specified");
3259 	if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
3260 		return rte_flow_error_set
3261 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3262 			"Geneve TLV opt length exceeds the limit (31)");
3263 	/* Check if class type and length masks are full. */
3264 	if (full_mask.option_class != mask->option_class ||
3265 	    full_mask.option_type != mask->option_type ||
3266 	    full_mask.option_len != (mask->option_len & full_mask.option_len))
3267 		return rte_flow_error_set
3268 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3269 			"Geneve TLV opt class/type/length masks must be full");
3270 	/* Check if length is supported */
3271 	if ((uint32_t)spec->option_len >
3272 			hca_attr->max_geneve_tlv_option_data_len)
3273 		return rte_flow_error_set
3274 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3275 			"Geneve TLV opt length not supported");
3276 	if (hca_attr->max_geneve_tlv_options > 1)
3277 		DRV_LOG(DEBUG,
3278 			"max_geneve_tlv_options supports more than 1 option");
3279 	/* Check GENEVE item preceding. */
3280 	if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
3281 		return rte_flow_error_set
3282 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3283 			"Geneve opt item must be preceded with Geneve item");
3284 	geneve_spec = geneve_item->spec;
3285 	geneve_mask = geneve_item->mask ? geneve_item->mask :
3286 					  &rte_flow_item_geneve_mask;
3287 	/* Check if GENEVE TLV option size doesn't exceed option length */
3288 	if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
3289 			    geneve_spec->ver_opt_len_o_c_rsvd0)) {
3290 		tlv_option_len = spec->option_len & mask->option_len;
3291 		optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
3292 		optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
3293 		optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
3294 		optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
3295 		if ((optlen_v & optlen_m) <= tlv_option_len)
3296 			return rte_flow_error_set
3297 				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3298 				 "GENEVE TLV option length exceeds optlen");
3299 	}
3300 	/* Check if length is 0 or data is 0. */
3301 	if (spec->data == NULL || spec->option_len == 0)
3302 		return rte_flow_error_set
3303 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3304 			"Geneve TLV opt with zero data/length not supported");
3305 	/* Check not all data & mask are 0. */
3306 	data_len = spec->option_len * 4;
3307 	if (mask->data == NULL) {
3308 		for (i = 0; i < data_len; i++)
3309 			if (spec->data[i])
3310 				break;
3311 		if (i == data_len)
3312 			return rte_flow_error_set(error, ENOTSUP,
3313 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3314 				"Can't match on Geneve option data 0");
3315 	} else {
3316 		for (i = 0; i < data_len; i++)
3317 			if (spec->data[i] & mask->data[i])
3318 				break;
3319 		if (i == data_len)
3320 			return rte_flow_error_set(error, ENOTSUP,
3321 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3322 				"Can't match on Geneve option data and mask 0");
3323 		/* Check data mask supported. */
3324 		for (i = data_max_supported; i < data_len ; i++)
3325 			if (mask->data[i])
3326 				return rte_flow_error_set(error, ENOTSUP,
3327 					RTE_FLOW_ERROR_TYPE_ITEM, item,
3328 					"Data mask is of unsupported size");
3329 	}
3330 	/* Check GENEVE option is supported in NIC. */
3331 	if (!hca_attr->geneve_tlv_opt)
3332 		return rte_flow_error_set
3333 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3334 			"Geneve TLV opt not supported");
3335 	/* Check if we already have geneve option with different type/class. */
3336 	rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3337 	geneve_opt_resource = sh->geneve_tlv_option_resource;
3338 	if (geneve_opt_resource != NULL)
3339 		if (geneve_opt_resource->option_class != spec->option_class ||
3340 		    geneve_opt_resource->option_type != spec->option_type ||
3341 		    geneve_opt_resource->length != spec->option_len) {
3342 			rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3343 			return rte_flow_error_set(error, ENOTSUP,
3344 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3345 				"Only one Geneve TLV option supported");
3346 		}
3347 	rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3348 	return 0;
3349 }
3350 
3351 /**
3352  * Validate MPLS item.
3353  *
3354  * @param[in] dev
3355  *   Pointer to the rte_eth_dev structure.
3356  * @param[in] item
3357  *   Item specification.
3358  * @param[in] item_flags
3359  *   Bit-fields that holds the items detected until now.
3360  * @param[in] prev_layer
3361  *   The protocol layer indicated in previous item.
3362  * @param[out] error
3363  *   Pointer to error structure.
3364  *
3365  * @return
3366  *   0 on success, a negative errno value otherwise and rte_errno is set.
3367  */
3368 int
3369 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3370 			     const struct rte_flow_item *item __rte_unused,
3371 			     uint64_t item_flags __rte_unused,
3372 			     uint64_t prev_layer __rte_unused,
3373 			     struct rte_flow_error *error)
3374 {
3375 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3376 	const struct rte_flow_item_mpls *mask = item->mask;
3377 	struct mlx5_priv *priv = dev->data->dev_private;
3378 	int ret;
3379 
3380 	if (!priv->sh->dev_cap.mpls_en)
3381 		return rte_flow_error_set(error, ENOTSUP,
3382 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3383 					  "MPLS not supported or"
3384 					  " disabled in firmware"
3385 					  " configuration.");
3386 	/* MPLS over UDP, GRE is allowed */
3387 	if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3388 			    MLX5_FLOW_LAYER_GRE |
3389 			    MLX5_FLOW_LAYER_GRE_KEY)))
3390 		return rte_flow_error_set(error, EINVAL,
3391 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3392 					  "protocol filtering not compatible"
3393 					  " with MPLS layer");
3394 	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3395 	if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3396 	    !(item_flags & MLX5_FLOW_LAYER_GRE))
3397 		return rte_flow_error_set(error, ENOTSUP,
3398 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3399 					  "multiple tunnel layers not"
3400 					  " supported");
3401 	if (!mask)
3402 		mask = &rte_flow_item_mpls_mask;
3403 	ret = mlx5_flow_item_acceptable
3404 		(item, (const uint8_t *)mask,
3405 		 (const uint8_t *)&rte_flow_item_mpls_mask,
3406 		 sizeof(struct rte_flow_item_mpls),
3407 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3408 	if (ret < 0)
3409 		return ret;
3410 	return 0;
3411 #else
3412 	return rte_flow_error_set(error, ENOTSUP,
3413 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
3414 				  "MPLS is not supported by Verbs, please"
3415 				  " update.");
3416 #endif
3417 }
3418 
3419 /**
3420  * Validate NVGRE item.
3421  *
3422  * @param[in] item
3423  *   Item specification.
3424  * @param[in] item_flags
3425  *   Bit flags to mark detected items.
3426  * @param[in] target_protocol
3427  *   The next protocol in the previous item.
3428  * @param[out] error
3429  *   Pointer to error structure.
3430  *
3431  * @return
3432  *   0 on success, a negative errno value otherwise and rte_errno is set.
3433  */
3434 int
3435 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3436 			      uint64_t item_flags,
3437 			      uint8_t target_protocol,
3438 			      struct rte_flow_error *error)
3439 {
3440 	const struct rte_flow_item_nvgre *mask = item->mask;
3441 	int ret;
3442 
3443 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3444 		return rte_flow_error_set(error, EINVAL,
3445 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3446 					  "protocol filtering not compatible"
3447 					  " with this GRE layer");
3448 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3449 		return rte_flow_error_set(error, ENOTSUP,
3450 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3451 					  "multiple tunnel layers not"
3452 					  " supported");
3453 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3454 		return rte_flow_error_set(error, ENOTSUP,
3455 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3456 					  "L3 Layer is missing");
3457 	if (!mask)
3458 		mask = &rte_flow_item_nvgre_mask;
3459 	ret = mlx5_flow_item_acceptable
3460 		(item, (const uint8_t *)mask,
3461 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
3462 		 sizeof(struct rte_flow_item_nvgre),
3463 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3464 	if (ret < 0)
3465 		return ret;
3466 	return 0;
3467 }
3468 
3469 /**
3470  * Validate eCPRI item.
3471  *
3472  * @param[in] item
3473  *   Item specification.
3474  * @param[in] item_flags
3475  *   Bit-fields that holds the items detected until now.
3476  * @param[in] last_item
3477  *   Previous validated item in the pattern items.
3478  * @param[in] ether_type
3479  *   Type in the ethernet layer header (including dot1q).
3480  * @param[in] acc_mask
3481  *   Acceptable mask, if NULL default internal default mask
3482  *   will be used to check whether item fields are supported.
3483  * @param[out] error
3484  *   Pointer to error structure.
3485  *
3486  * @return
3487  *   0 on success, a negative errno value otherwise and rte_errno is set.
3488  */
3489 int
3490 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3491 			      uint64_t item_flags,
3492 			      uint64_t last_item,
3493 			      uint16_t ether_type,
3494 			      const struct rte_flow_item_ecpri *acc_mask,
3495 			      struct rte_flow_error *error)
3496 {
3497 	const struct rte_flow_item_ecpri *mask = item->mask;
3498 	const struct rte_flow_item_ecpri nic_mask = {
3499 		.hdr = {
3500 			.common = {
3501 				.u32 =
3502 				RTE_BE32(((const struct rte_ecpri_common_hdr) {
3503 					.type = 0xFF,
3504 					}).u32),
3505 			},
3506 			.dummy[0] = 0xFFFFFFFF,
3507 		},
3508 	};
3509 	const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3510 					MLX5_FLOW_LAYER_OUTER_VLAN);
3511 	struct rte_flow_item_ecpri mask_lo;
3512 
3513 	if (!(last_item & outer_l2_vlan) &&
3514 	    last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3515 		return rte_flow_error_set(error, EINVAL,
3516 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3517 					  "eCPRI can only follow L2/VLAN layer or UDP layer");
3518 	if ((last_item & outer_l2_vlan) && ether_type &&
3519 	    ether_type != RTE_ETHER_TYPE_ECPRI)
3520 		return rte_flow_error_set(error, EINVAL,
3521 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3522 					  "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3523 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3524 		return rte_flow_error_set(error, EINVAL,
3525 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3526 					  "eCPRI with tunnel is not supported right now");
3527 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3528 		return rte_flow_error_set(error, ENOTSUP,
3529 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3530 					  "multiple L3 layers not supported");
3531 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3532 		return rte_flow_error_set(error, EINVAL,
3533 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3534 					  "eCPRI cannot coexist with a TCP layer");
3535 	/* In specification, eCPRI could be over UDP layer. */
3536 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3537 		return rte_flow_error_set(error, EINVAL,
3538 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3539 					  "eCPRI over UDP layer is not yet supported right now");
3540 	/* Mask for type field in common header could be zero. */
3541 	if (!mask)
3542 		mask = &rte_flow_item_ecpri_mask;
3543 	mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3544 	/* Input mask is in big-endian format. */
3545 	if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3546 		return rte_flow_error_set(error, EINVAL,
3547 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3548 					  "partial mask is not supported for protocol");
3549 	else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3550 		return rte_flow_error_set(error, EINVAL,
3551 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3552 					  "message header mask must be after a type mask");
3553 	return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3554 					 acc_mask ? (const uint8_t *)acc_mask
3555 						  : (const uint8_t *)&nic_mask,
3556 					 sizeof(struct rte_flow_item_ecpri),
3557 					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3558 }
3559 
3560 static int
3561 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3562 		   const struct rte_flow_attr *attr __rte_unused,
3563 		   const struct rte_flow_item items[] __rte_unused,
3564 		   const struct rte_flow_action actions[] __rte_unused,
3565 		   bool external __rte_unused,
3566 		   int hairpin __rte_unused,
3567 		   struct rte_flow_error *error)
3568 {
3569 	return rte_flow_error_set(error, ENOTSUP,
3570 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3571 }
3572 
3573 static struct mlx5_flow *
3574 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3575 		  const struct rte_flow_attr *attr __rte_unused,
3576 		  const struct rte_flow_item items[] __rte_unused,
3577 		  const struct rte_flow_action actions[] __rte_unused,
3578 		  struct rte_flow_error *error)
3579 {
3580 	rte_flow_error_set(error, ENOTSUP,
3581 			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3582 	return NULL;
3583 }
3584 
3585 static int
3586 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3587 		    struct mlx5_flow *dev_flow __rte_unused,
3588 		    const struct rte_flow_attr *attr __rte_unused,
3589 		    const struct rte_flow_item items[] __rte_unused,
3590 		    const struct rte_flow_action actions[] __rte_unused,
3591 		    struct rte_flow_error *error)
3592 {
3593 	return rte_flow_error_set(error, ENOTSUP,
3594 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3595 }
3596 
3597 static int
3598 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3599 		struct rte_flow *flow __rte_unused,
3600 		struct rte_flow_error *error)
3601 {
3602 	return rte_flow_error_set(error, ENOTSUP,
3603 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3604 }
3605 
3606 static void
3607 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3608 		 struct rte_flow *flow __rte_unused)
3609 {
3610 }
3611 
3612 static void
3613 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3614 		  struct rte_flow *flow __rte_unused)
3615 {
3616 }
3617 
3618 static int
3619 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3620 		struct rte_flow *flow __rte_unused,
3621 		const struct rte_flow_action *actions __rte_unused,
3622 		void *data __rte_unused,
3623 		struct rte_flow_error *error)
3624 {
3625 	return rte_flow_error_set(error, ENOTSUP,
3626 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3627 }
3628 
3629 static int
3630 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3631 		      uint32_t domains __rte_unused,
3632 		      uint32_t flags __rte_unused)
3633 {
3634 	return 0;
3635 }
3636 
3637 /* Void driver to protect from null pointer reference. */
3638 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3639 	.validate = flow_null_validate,
3640 	.prepare = flow_null_prepare,
3641 	.translate = flow_null_translate,
3642 	.apply = flow_null_apply,
3643 	.remove = flow_null_remove,
3644 	.destroy = flow_null_destroy,
3645 	.query = flow_null_query,
3646 	.sync_domain = flow_null_sync_domain,
3647 };
3648 
3649 /**
3650  * Select flow driver type according to flow attributes and device
3651  * configuration.
3652  *
3653  * @param[in] dev
3654  *   Pointer to the dev structure.
3655  * @param[in] attr
3656  *   Pointer to the flow attributes.
3657  *
3658  * @return
3659  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3660  */
3661 static enum mlx5_flow_drv_type
3662 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3663 {
3664 	struct mlx5_priv *priv = dev->data->dev_private;
3665 	/* The OS can determine first a specific flow type (DV, VERBS) */
3666 	enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3667 
3668 	if (type != MLX5_FLOW_TYPE_MAX)
3669 		return type;
3670 	/*
3671 	 * Currently when dv_flow_en == 2, only HW steering engine is
3672 	 * supported. New engines can also be chosen here if ready.
3673 	 */
3674 	if (priv->sh->config.dv_flow_en == 2)
3675 		return MLX5_FLOW_TYPE_HW;
3676 	/* If no OS specific type - continue with DV/VERBS selection */
3677 	if (attr->transfer && priv->sh->config.dv_esw_en)
3678 		type = MLX5_FLOW_TYPE_DV;
3679 	if (!attr->transfer)
3680 		type = priv->sh->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3681 						     MLX5_FLOW_TYPE_VERBS;
3682 	return type;
3683 }
3684 
3685 #define flow_get_drv_ops(type) flow_drv_ops[type]
3686 
3687 /**
3688  * Flow driver validation API. This abstracts calling driver specific functions.
3689  * The type of flow driver is determined according to flow attributes.
3690  *
3691  * @param[in] dev
3692  *   Pointer to the dev structure.
3693  * @param[in] attr
3694  *   Pointer to the flow attributes.
3695  * @param[in] items
3696  *   Pointer to the list of items.
3697  * @param[in] actions
3698  *   Pointer to the list of actions.
3699  * @param[in] external
3700  *   This flow rule is created by request external to PMD.
3701  * @param[in] hairpin
3702  *   Number of hairpin TX actions, 0 means classic flow.
3703  * @param[out] error
3704  *   Pointer to the error structure.
3705  *
3706  * @return
3707  *   0 on success, a negative errno value otherwise and rte_errno is set.
3708  */
3709 static inline int
3710 flow_drv_validate(struct rte_eth_dev *dev,
3711 		  const struct rte_flow_attr *attr,
3712 		  const struct rte_flow_item items[],
3713 		  const struct rte_flow_action actions[],
3714 		  bool external, int hairpin, struct rte_flow_error *error)
3715 {
3716 	const struct mlx5_flow_driver_ops *fops;
3717 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3718 
3719 	fops = flow_get_drv_ops(type);
3720 	return fops->validate(dev, attr, items, actions, external,
3721 			      hairpin, error);
3722 }
3723 
3724 /**
3725  * Flow driver preparation API. This abstracts calling driver specific
3726  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3727  * calculates the size of memory required for device flow, allocates the memory,
3728  * initializes the device flow and returns the pointer.
3729  *
3730  * @note
3731  *   This function initializes device flow structure such as dv or verbs in
3732  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3733  *   rest. For example, adding returning device flow to flow->dev_flow list and
3734  *   setting backward reference to the flow should be done out of this function.
3735  *   layers field is not filled either.
3736  *
3737  * @param[in] dev
3738  *   Pointer to the dev structure.
3739  * @param[in] attr
3740  *   Pointer to the flow attributes.
3741  * @param[in] items
3742  *   Pointer to the list of items.
3743  * @param[in] actions
3744  *   Pointer to the list of actions.
3745  * @param[in] flow_idx
3746  *   This memory pool index to the flow.
3747  * @param[out] error
3748  *   Pointer to the error structure.
3749  *
3750  * @return
3751  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3752  */
3753 static inline struct mlx5_flow *
3754 flow_drv_prepare(struct rte_eth_dev *dev,
3755 		 const struct rte_flow *flow,
3756 		 const struct rte_flow_attr *attr,
3757 		 const struct rte_flow_item items[],
3758 		 const struct rte_flow_action actions[],
3759 		 uint32_t flow_idx,
3760 		 struct rte_flow_error *error)
3761 {
3762 	const struct mlx5_flow_driver_ops *fops;
3763 	enum mlx5_flow_drv_type type = flow->drv_type;
3764 	struct mlx5_flow *mlx5_flow = NULL;
3765 
3766 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3767 	fops = flow_get_drv_ops(type);
3768 	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3769 	if (mlx5_flow)
3770 		mlx5_flow->flow_idx = flow_idx;
3771 	return mlx5_flow;
3772 }
3773 
3774 /**
3775  * Flow driver translation API. This abstracts calling driver specific
3776  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3777  * translates a generic flow into a driver flow. flow_drv_prepare() must
3778  * precede.
3779  *
3780  * @note
3781  *   dev_flow->layers could be filled as a result of parsing during translation
3782  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3783  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3784  *   flow->actions could be overwritten even though all the expanded dev_flows
3785  *   have the same actions.
3786  *
3787  * @param[in] dev
3788  *   Pointer to the rte dev structure.
3789  * @param[in, out] dev_flow
3790  *   Pointer to the mlx5 flow.
3791  * @param[in] attr
3792  *   Pointer to the flow attributes.
3793  * @param[in] items
3794  *   Pointer to the list of items.
3795  * @param[in] actions
3796  *   Pointer to the list of actions.
3797  * @param[out] error
3798  *   Pointer to the error structure.
3799  *
3800  * @return
3801  *   0 on success, a negative errno value otherwise and rte_errno is set.
3802  */
3803 static inline int
3804 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3805 		   const struct rte_flow_attr *attr,
3806 		   const struct rte_flow_item items[],
3807 		   const struct rte_flow_action actions[],
3808 		   struct rte_flow_error *error)
3809 {
3810 	const struct mlx5_flow_driver_ops *fops;
3811 	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3812 
3813 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3814 	fops = flow_get_drv_ops(type);
3815 	return fops->translate(dev, dev_flow, attr, items, actions, error);
3816 }
3817 
3818 /**
3819  * Flow driver apply API. This abstracts calling driver specific functions.
3820  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3821  * translated driver flows on to device. flow_drv_translate() must precede.
3822  *
3823  * @param[in] dev
3824  *   Pointer to Ethernet device structure.
3825  * @param[in, out] flow
3826  *   Pointer to flow structure.
3827  * @param[out] error
3828  *   Pointer to error structure.
3829  *
3830  * @return
3831  *   0 on success, a negative errno value otherwise and rte_errno is set.
3832  */
3833 static inline int
3834 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3835 	       struct rte_flow_error *error)
3836 {
3837 	const struct mlx5_flow_driver_ops *fops;
3838 	enum mlx5_flow_drv_type type = flow->drv_type;
3839 
3840 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3841 	fops = flow_get_drv_ops(type);
3842 	return fops->apply(dev, flow, error);
3843 }
3844 
3845 /**
3846  * Flow driver destroy API. This abstracts calling driver specific functions.
3847  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3848  * on device and releases resources of the flow.
3849  *
3850  * @param[in] dev
3851  *   Pointer to Ethernet device.
3852  * @param[in, out] flow
3853  *   Pointer to flow structure.
3854  */
3855 static inline void
3856 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3857 {
3858 	const struct mlx5_flow_driver_ops *fops;
3859 	enum mlx5_flow_drv_type type = flow->drv_type;
3860 
3861 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3862 	fops = flow_get_drv_ops(type);
3863 	fops->destroy(dev, flow);
3864 }
3865 
3866 /**
3867  * Flow driver find RSS policy tbl API. This abstracts calling driver
3868  * specific functions. Parent flow (rte_flow) should have driver
3869  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3870  *
3871  * @param[in] dev
3872  *   Pointer to Ethernet device.
3873  * @param[in, out] flow
3874  *   Pointer to flow structure.
3875  * @param[in] policy
3876  *   Pointer to meter policy table.
3877  * @param[in] rss_desc
3878  *   Pointer to rss_desc
3879  */
3880 static struct mlx5_flow_meter_sub_policy *
3881 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3882 		struct rte_flow *flow,
3883 		struct mlx5_flow_meter_policy *policy,
3884 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3885 {
3886 	const struct mlx5_flow_driver_ops *fops;
3887 	enum mlx5_flow_drv_type type = flow->drv_type;
3888 
3889 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3890 	fops = flow_get_drv_ops(type);
3891 	return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3892 }
3893 
3894 /**
3895  * Flow driver color tag rule API. This abstracts calling driver
3896  * specific functions. Parent flow (rte_flow) should have driver
3897  * type (drv_type). It will create the color tag rules in hierarchy meter.
3898  *
3899  * @param[in] dev
3900  *   Pointer to Ethernet device.
3901  * @param[in, out] flow
3902  *   Pointer to flow structure.
3903  * @param[in] fm
3904  *   Pointer to flow meter structure.
3905  * @param[in] src_port
3906  *   The src port this extra rule should use.
3907  * @param[in] item
3908  *   The src port id match item.
3909  * @param[out] error
3910  *   Pointer to error structure.
3911  */
3912 static int
3913 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
3914 		struct rte_flow *flow,
3915 		struct mlx5_flow_meter_info *fm,
3916 		int32_t src_port,
3917 		const struct rte_flow_item *item,
3918 		struct rte_flow_error *error)
3919 {
3920 	const struct mlx5_flow_driver_ops *fops;
3921 	enum mlx5_flow_drv_type type = flow->drv_type;
3922 
3923 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3924 	fops = flow_get_drv_ops(type);
3925 	return fops->meter_hierarchy_rule_create(dev, fm,
3926 						src_port, item, error);
3927 }
3928 
3929 /**
3930  * Get RSS action from the action list.
3931  *
3932  * @param[in] dev
3933  *   Pointer to Ethernet device.
3934  * @param[in] actions
3935  *   Pointer to the list of actions.
3936  * @param[in] flow
3937  *   Parent flow structure pointer.
3938  *
3939  * @return
3940  *   Pointer to the RSS action if exist, else return NULL.
3941  */
3942 static const struct rte_flow_action_rss*
3943 flow_get_rss_action(struct rte_eth_dev *dev,
3944 		    const struct rte_flow_action actions[])
3945 {
3946 	struct mlx5_priv *priv = dev->data->dev_private;
3947 	const struct rte_flow_action_rss *rss = NULL;
3948 	struct mlx5_meter_policy_action_container *acg;
3949 	struct mlx5_meter_policy_action_container *acy;
3950 
3951 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3952 		switch (actions->type) {
3953 		case RTE_FLOW_ACTION_TYPE_RSS:
3954 			rss = actions->conf;
3955 			break;
3956 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
3957 		{
3958 			const struct rte_flow_action_sample *sample =
3959 								actions->conf;
3960 			const struct rte_flow_action *act = sample->actions;
3961 			for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3962 				if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3963 					rss = act->conf;
3964 			break;
3965 		}
3966 		case RTE_FLOW_ACTION_TYPE_METER:
3967 		{
3968 			uint32_t mtr_idx;
3969 			struct mlx5_flow_meter_info *fm;
3970 			struct mlx5_flow_meter_policy *policy;
3971 			const struct rte_flow_action_meter *mtr = actions->conf;
3972 
3973 			fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
3974 			if (fm && !fm->def_policy) {
3975 				policy = mlx5_flow_meter_policy_find(dev,
3976 						fm->policy_id, NULL);
3977 				MLX5_ASSERT(policy);
3978 				if (policy->is_hierarchy) {
3979 					policy =
3980 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
3981 									policy);
3982 					if (!policy)
3983 						return NULL;
3984 				}
3985 				if (policy->is_rss) {
3986 					acg =
3987 					&policy->act_cnt[RTE_COLOR_GREEN];
3988 					acy =
3989 					&policy->act_cnt[RTE_COLOR_YELLOW];
3990 					if (acg->fate_action ==
3991 					    MLX5_FLOW_FATE_SHARED_RSS)
3992 						rss = acg->rss->conf;
3993 					else if (acy->fate_action ==
3994 						 MLX5_FLOW_FATE_SHARED_RSS)
3995 						rss = acy->rss->conf;
3996 				}
3997 			}
3998 			break;
3999 		}
4000 		default:
4001 			break;
4002 		}
4003 	}
4004 	return rss;
4005 }
4006 
4007 /**
4008  * Get ASO age action by index.
4009  *
4010  * @param[in] dev
4011  *   Pointer to the Ethernet device structure.
4012  * @param[in] age_idx
4013  *   Index to the ASO age action.
4014  *
4015  * @return
4016  *   The specified ASO age action.
4017  */
4018 struct mlx5_aso_age_action*
4019 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
4020 {
4021 	uint16_t pool_idx = age_idx & UINT16_MAX;
4022 	uint16_t offset = (age_idx >> 16) & UINT16_MAX;
4023 	struct mlx5_priv *priv = dev->data->dev_private;
4024 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
4025 	struct mlx5_aso_age_pool *pool;
4026 
4027 	rte_rwlock_read_lock(&mng->resize_rwl);
4028 	pool = mng->pools[pool_idx];
4029 	rte_rwlock_read_unlock(&mng->resize_rwl);
4030 	return &pool->actions[offset - 1];
4031 }
4032 
4033 /* maps indirect action to translated direct in some actions array */
4034 struct mlx5_translated_action_handle {
4035 	struct rte_flow_action_handle *action; /**< Indirect action handle. */
4036 	int index; /**< Index in related array of rte_flow_action. */
4037 };
4038 
4039 /**
4040  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
4041  * direct action if translation possible.
4042  * This functionality used to run same execution path for both direct and
4043  * indirect actions on flow create. All necessary preparations for indirect
4044  * action handling should be performed on *handle* actions list returned
4045  * from this call.
4046  *
4047  * @param[in] dev
4048  *   Pointer to Ethernet device.
4049  * @param[in] actions
4050  *   List of actions to translate.
4051  * @param[out] handle
4052  *   List to store translated indirect action object handles.
4053  * @param[in, out] indir_n
4054  *   Size of *handle* array. On return should be updated with number of
4055  *   indirect actions retrieved from the *actions* list.
4056  * @param[out] translated_actions
4057  *   List of actions where all indirect actions were translated to direct
4058  *   if possible. NULL if no translation took place.
4059  * @param[out] error
4060  *   Pointer to the error structure.
4061  *
4062  * @return
4063  *   0 on success, a negative errno value otherwise and rte_errno is set.
4064  */
4065 static int
4066 flow_action_handles_translate(struct rte_eth_dev *dev,
4067 			      const struct rte_flow_action actions[],
4068 			      struct mlx5_translated_action_handle *handle,
4069 			      int *indir_n,
4070 			      struct rte_flow_action **translated_actions,
4071 			      struct rte_flow_error *error)
4072 {
4073 	struct mlx5_priv *priv = dev->data->dev_private;
4074 	struct rte_flow_action *translated = NULL;
4075 	size_t actions_size;
4076 	int n;
4077 	int copied_n = 0;
4078 	struct mlx5_translated_action_handle *handle_end = NULL;
4079 
4080 	for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
4081 		if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
4082 			continue;
4083 		if (copied_n == *indir_n) {
4084 			return rte_flow_error_set
4085 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
4086 				 NULL, "too many shared actions");
4087 		}
4088 		rte_memcpy(&handle[copied_n].action, &actions[n].conf,
4089 			   sizeof(actions[n].conf));
4090 		handle[copied_n].index = n;
4091 		copied_n++;
4092 	}
4093 	n++;
4094 	*indir_n = copied_n;
4095 	if (!copied_n)
4096 		return 0;
4097 	actions_size = sizeof(struct rte_flow_action) * n;
4098 	translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
4099 	if (!translated) {
4100 		rte_errno = ENOMEM;
4101 		return -ENOMEM;
4102 	}
4103 	memcpy(translated, actions, actions_size);
4104 	for (handle_end = handle + copied_n; handle < handle_end; handle++) {
4105 		struct mlx5_shared_action_rss *shared_rss;
4106 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4107 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4108 		uint32_t idx = act_idx &
4109 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4110 
4111 		switch (type) {
4112 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4113 			shared_rss = mlx5_ipool_get
4114 			  (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
4115 			translated[handle->index].type =
4116 				RTE_FLOW_ACTION_TYPE_RSS;
4117 			translated[handle->index].conf =
4118 				&shared_rss->origin;
4119 			break;
4120 		case MLX5_INDIRECT_ACTION_TYPE_COUNT:
4121 			translated[handle->index].type =
4122 						(enum rte_flow_action_type)
4123 						MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
4124 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4125 			break;
4126 		case MLX5_INDIRECT_ACTION_TYPE_AGE:
4127 			if (priv->sh->flow_hit_aso_en) {
4128 				translated[handle->index].type =
4129 					(enum rte_flow_action_type)
4130 					MLX5_RTE_FLOW_ACTION_TYPE_AGE;
4131 				translated[handle->index].conf =
4132 							 (void *)(uintptr_t)idx;
4133 				break;
4134 			}
4135 			/* Fall-through */
4136 		case MLX5_INDIRECT_ACTION_TYPE_CT:
4137 			if (priv->sh->ct_aso_en) {
4138 				translated[handle->index].type =
4139 					RTE_FLOW_ACTION_TYPE_CONNTRACK;
4140 				translated[handle->index].conf =
4141 							 (void *)(uintptr_t)idx;
4142 				break;
4143 			}
4144 			/* Fall-through */
4145 		default:
4146 			mlx5_free(translated);
4147 			return rte_flow_error_set
4148 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
4149 				 NULL, "invalid indirect action type");
4150 		}
4151 	}
4152 	*translated_actions = translated;
4153 	return 0;
4154 }
4155 
4156 /**
4157  * Get Shared RSS action from the action list.
4158  *
4159  * @param[in] dev
4160  *   Pointer to Ethernet device.
4161  * @param[in] shared
4162  *   Pointer to the list of actions.
4163  * @param[in] shared_n
4164  *   Actions list length.
4165  *
4166  * @return
4167  *   The MLX5 RSS action ID if exists, otherwise return 0.
4168  */
4169 static uint32_t
4170 flow_get_shared_rss_action(struct rte_eth_dev *dev,
4171 			   struct mlx5_translated_action_handle *handle,
4172 			   int shared_n)
4173 {
4174 	struct mlx5_translated_action_handle *handle_end;
4175 	struct mlx5_priv *priv = dev->data->dev_private;
4176 	struct mlx5_shared_action_rss *shared_rss;
4177 
4178 
4179 	for (handle_end = handle + shared_n; handle < handle_end; handle++) {
4180 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4181 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4182 		uint32_t idx = act_idx &
4183 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4184 		switch (type) {
4185 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4186 			shared_rss = mlx5_ipool_get
4187 				(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
4188 									   idx);
4189 			__atomic_add_fetch(&shared_rss->refcnt, 1,
4190 					   __ATOMIC_RELAXED);
4191 			return idx;
4192 		default:
4193 			break;
4194 		}
4195 	}
4196 	return 0;
4197 }
4198 
4199 static unsigned int
4200 find_graph_root(uint32_t rss_level)
4201 {
4202 	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
4203 			       MLX5_EXPANSION_ROOT_OUTER;
4204 }
4205 
4206 /**
4207  *  Get layer flags from the prefix flow.
4208  *
4209  *  Some flows may be split to several subflows, the prefix subflow gets the
4210  *  match items and the suffix sub flow gets the actions.
4211  *  Some actions need the user defined match item flags to get the detail for
4212  *  the action.
4213  *  This function helps the suffix flow to get the item layer flags from prefix
4214  *  subflow.
4215  *
4216  * @param[in] dev_flow
4217  *   Pointer the created prefix subflow.
4218  *
4219  * @return
4220  *   The layers get from prefix subflow.
4221  */
4222 static inline uint64_t
4223 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
4224 {
4225 	uint64_t layers = 0;
4226 
4227 	/*
4228 	 * Layers bits could be localization, but usually the compiler will
4229 	 * help to do the optimization work for source code.
4230 	 * If no decap actions, use the layers directly.
4231 	 */
4232 	if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
4233 		return dev_flow->handle->layers;
4234 	/* Convert L3 layers with decap action. */
4235 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
4236 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
4237 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
4238 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
4239 	/* Convert L4 layers with decap action.  */
4240 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
4241 		layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
4242 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
4243 		layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
4244 	return layers;
4245 }
4246 
4247 /**
4248  * Get metadata split action information.
4249  *
4250  * @param[in] actions
4251  *   Pointer to the list of actions.
4252  * @param[out] qrss
4253  *   Pointer to the return pointer.
4254  * @param[out] qrss_type
4255  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
4256  *   if no QUEUE/RSS is found.
4257  * @param[out] encap_idx
4258  *   Pointer to the index of the encap action if exists, otherwise the last
4259  *   action index.
4260  *
4261  * @return
4262  *   Total number of actions.
4263  */
4264 static int
4265 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
4266 				       const struct rte_flow_action **qrss,
4267 				       int *encap_idx)
4268 {
4269 	const struct rte_flow_action_raw_encap *raw_encap;
4270 	int actions_n = 0;
4271 	int raw_decap_idx = -1;
4272 
4273 	*encap_idx = -1;
4274 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4275 		switch (actions->type) {
4276 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4277 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4278 			*encap_idx = actions_n;
4279 			break;
4280 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4281 			raw_decap_idx = actions_n;
4282 			break;
4283 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4284 			raw_encap = actions->conf;
4285 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4286 				*encap_idx = raw_decap_idx != -1 ?
4287 						      raw_decap_idx : actions_n;
4288 			break;
4289 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4290 		case RTE_FLOW_ACTION_TYPE_RSS:
4291 			*qrss = actions;
4292 			break;
4293 		default:
4294 			break;
4295 		}
4296 		actions_n++;
4297 	}
4298 	if (*encap_idx == -1)
4299 		*encap_idx = actions_n;
4300 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4301 	return actions_n + 1;
4302 }
4303 
4304 /**
4305  * Check if the action will change packet.
4306  *
4307  * @param dev
4308  *   Pointer to Ethernet device.
4309  * @param[in] type
4310  *   action type.
4311  *
4312  * @return
4313  *   true if action will change packet, false otherwise.
4314  */
4315 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
4316 					  enum rte_flow_action_type type)
4317 {
4318 	struct mlx5_priv *priv = dev->data->dev_private;
4319 
4320 	switch (type) {
4321 	case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4322 	case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4323 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4324 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4325 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4326 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4327 	case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4328 	case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4329 	case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4330 	case RTE_FLOW_ACTION_TYPE_SET_TTL:
4331 	case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4332 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4333 	case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4334 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4335 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4336 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4337 	case RTE_FLOW_ACTION_TYPE_SET_META:
4338 	case RTE_FLOW_ACTION_TYPE_SET_TAG:
4339 	case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4340 	case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4341 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4342 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4343 	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4344 	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4345 	case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4346 	case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4347 	case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4348 	case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4349 	case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4350 		return true;
4351 	case RTE_FLOW_ACTION_TYPE_FLAG:
4352 	case RTE_FLOW_ACTION_TYPE_MARK:
4353 		if (priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
4354 			return true;
4355 		else
4356 			return false;
4357 	default:
4358 		return false;
4359 	}
4360 }
4361 
4362 /**
4363  * Check meter action from the action list.
4364  *
4365  * @param dev
4366  *   Pointer to Ethernet device.
4367  * @param[in] actions
4368  *   Pointer to the list of actions.
4369  * @param[out] has_mtr
4370  *   Pointer to the meter exist flag.
4371  * @param[out] has_modify
4372  *   Pointer to the flag showing there's packet change action.
4373  * @param[out] meter_id
4374  *   Pointer to the meter id.
4375  *
4376  * @return
4377  *   Total number of actions.
4378  */
4379 static int
4380 flow_check_meter_action(struct rte_eth_dev *dev,
4381 			const struct rte_flow_action actions[],
4382 			bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4383 {
4384 	const struct rte_flow_action_meter *mtr = NULL;
4385 	int actions_n = 0;
4386 
4387 	MLX5_ASSERT(has_mtr);
4388 	*has_mtr = false;
4389 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4390 		switch (actions->type) {
4391 		case RTE_FLOW_ACTION_TYPE_METER:
4392 			mtr = actions->conf;
4393 			*meter_id = mtr->mtr_id;
4394 			*has_mtr = true;
4395 			break;
4396 		default:
4397 			break;
4398 		}
4399 		if (!*has_mtr)
4400 			*has_modify |= flow_check_modify_action_type(dev,
4401 								actions->type);
4402 		actions_n++;
4403 	}
4404 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4405 	return actions_n + 1;
4406 }
4407 
4408 /**
4409  * Check if the flow should be split due to hairpin.
4410  * The reason for the split is that in current HW we can't
4411  * support encap and push-vlan on Rx, so if a flow contains
4412  * these actions we move it to Tx.
4413  *
4414  * @param dev
4415  *   Pointer to Ethernet device.
4416  * @param[in] attr
4417  *   Flow rule attributes.
4418  * @param[in] actions
4419  *   Associated actions (list terminated by the END action).
4420  *
4421  * @return
4422  *   > 0 the number of actions and the flow should be split,
4423  *   0 when no split required.
4424  */
4425 static int
4426 flow_check_hairpin_split(struct rte_eth_dev *dev,
4427 			 const struct rte_flow_attr *attr,
4428 			 const struct rte_flow_action actions[])
4429 {
4430 	int queue_action = 0;
4431 	int action_n = 0;
4432 	int split = 0;
4433 	const struct rte_flow_action_queue *queue;
4434 	const struct rte_flow_action_rss *rss;
4435 	const struct rte_flow_action_raw_encap *raw_encap;
4436 	const struct rte_eth_hairpin_conf *conf;
4437 
4438 	if (!attr->ingress)
4439 		return 0;
4440 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4441 		switch (actions->type) {
4442 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4443 			queue = actions->conf;
4444 			if (queue == NULL)
4445 				return 0;
4446 			conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4447 			if (conf == NULL || conf->tx_explicit != 0)
4448 				return 0;
4449 			queue_action = 1;
4450 			action_n++;
4451 			break;
4452 		case RTE_FLOW_ACTION_TYPE_RSS:
4453 			rss = actions->conf;
4454 			if (rss == NULL || rss->queue_num == 0)
4455 				return 0;
4456 			conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4457 			if (conf == NULL || conf->tx_explicit != 0)
4458 				return 0;
4459 			queue_action = 1;
4460 			action_n++;
4461 			break;
4462 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4463 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4464 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4465 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4466 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4467 			split++;
4468 			action_n++;
4469 			break;
4470 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4471 			raw_encap = actions->conf;
4472 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4473 				split++;
4474 			action_n++;
4475 			break;
4476 		default:
4477 			action_n++;
4478 			break;
4479 		}
4480 	}
4481 	if (split && queue_action)
4482 		return action_n;
4483 	return 0;
4484 }
4485 
4486 /* Declare flow create/destroy prototype in advance. */
4487 static uint32_t
4488 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4489 		 const struct rte_flow_attr *attr,
4490 		 const struct rte_flow_item items[],
4491 		 const struct rte_flow_action actions[],
4492 		 bool external, struct rte_flow_error *error);
4493 
4494 static void
4495 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4496 		  uint32_t flow_idx);
4497 
4498 int
4499 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4500 		      struct mlx5_list_entry *entry, void *cb_ctx)
4501 {
4502 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4503 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4504 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4505 
4506 	return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4507 }
4508 
4509 struct mlx5_list_entry *
4510 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4511 {
4512 	struct rte_eth_dev *dev = tool_ctx;
4513 	struct mlx5_priv *priv = dev->data->dev_private;
4514 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4515 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4516 	struct rte_flow_error *error = ctx->error;
4517 	uint32_t idx = 0;
4518 	int ret;
4519 	uint32_t mark_id = *(uint32_t *)(ctx->data);
4520 	struct rte_flow_attr attr = {
4521 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4522 		.ingress = 1,
4523 	};
4524 	struct mlx5_rte_flow_item_tag tag_spec = {
4525 		.data = mark_id,
4526 	};
4527 	struct rte_flow_item items[] = {
4528 		[1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4529 	};
4530 	struct rte_flow_action_mark ftag = {
4531 		.id = mark_id,
4532 	};
4533 	struct mlx5_flow_action_copy_mreg cp_mreg = {
4534 		.dst = REG_B,
4535 		.src = REG_NON,
4536 	};
4537 	struct rte_flow_action_jump jump = {
4538 		.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4539 	};
4540 	struct rte_flow_action actions[] = {
4541 		[3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4542 	};
4543 
4544 	/* Fill the register fields in the flow. */
4545 	ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4546 	if (ret < 0)
4547 		return NULL;
4548 	tag_spec.id = ret;
4549 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4550 	if (ret < 0)
4551 		return NULL;
4552 	cp_mreg.src = ret;
4553 	/* Provide the full width of FLAG specific value. */
4554 	if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4555 		tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4556 	/* Build a new flow. */
4557 	if (mark_id != MLX5_DEFAULT_COPY_ID) {
4558 		items[0] = (struct rte_flow_item){
4559 			.type = (enum rte_flow_item_type)
4560 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4561 			.spec = &tag_spec,
4562 		};
4563 		items[1] = (struct rte_flow_item){
4564 			.type = RTE_FLOW_ITEM_TYPE_END,
4565 		};
4566 		actions[0] = (struct rte_flow_action){
4567 			.type = (enum rte_flow_action_type)
4568 				MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4569 			.conf = &ftag,
4570 		};
4571 		actions[1] = (struct rte_flow_action){
4572 			.type = (enum rte_flow_action_type)
4573 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4574 			.conf = &cp_mreg,
4575 		};
4576 		actions[2] = (struct rte_flow_action){
4577 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4578 			.conf = &jump,
4579 		};
4580 		actions[3] = (struct rte_flow_action){
4581 			.type = RTE_FLOW_ACTION_TYPE_END,
4582 		};
4583 	} else {
4584 		/* Default rule, wildcard match. */
4585 		attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4586 		items[0] = (struct rte_flow_item){
4587 			.type = RTE_FLOW_ITEM_TYPE_END,
4588 		};
4589 		actions[0] = (struct rte_flow_action){
4590 			.type = (enum rte_flow_action_type)
4591 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4592 			.conf = &cp_mreg,
4593 		};
4594 		actions[1] = (struct rte_flow_action){
4595 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4596 			.conf = &jump,
4597 		};
4598 		actions[2] = (struct rte_flow_action){
4599 			.type = RTE_FLOW_ACTION_TYPE_END,
4600 		};
4601 	}
4602 	/* Build a new entry. */
4603 	mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4604 	if (!mcp_res) {
4605 		rte_errno = ENOMEM;
4606 		return NULL;
4607 	}
4608 	mcp_res->idx = idx;
4609 	mcp_res->mark_id = mark_id;
4610 	/*
4611 	 * The copy Flows are not included in any list. There
4612 	 * ones are referenced from other Flows and can not
4613 	 * be applied, removed, deleted in arbitrary order
4614 	 * by list traversing.
4615 	 */
4616 	mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4617 					&attr, items, actions, false, error);
4618 	if (!mcp_res->rix_flow) {
4619 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4620 		return NULL;
4621 	}
4622 	return &mcp_res->hlist_ent;
4623 }
4624 
4625 struct mlx5_list_entry *
4626 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4627 		      void *cb_ctx __rte_unused)
4628 {
4629 	struct rte_eth_dev *dev = tool_ctx;
4630 	struct mlx5_priv *priv = dev->data->dev_private;
4631 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4632 	uint32_t idx = 0;
4633 
4634 	mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4635 	if (!mcp_res) {
4636 		rte_errno = ENOMEM;
4637 		return NULL;
4638 	}
4639 	memcpy(mcp_res, oentry, sizeof(*mcp_res));
4640 	mcp_res->idx = idx;
4641 	return &mcp_res->hlist_ent;
4642 }
4643 
4644 void
4645 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4646 {
4647 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4648 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4649 	struct rte_eth_dev *dev = tool_ctx;
4650 	struct mlx5_priv *priv = dev->data->dev_private;
4651 
4652 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4653 }
4654 
4655 /**
4656  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4657  *
4658  * As mark_id is unique, if there's already a registered flow for the mark_id,
4659  * return by increasing the reference counter of the resource. Otherwise, create
4660  * the resource (mcp_res) and flow.
4661  *
4662  * Flow looks like,
4663  *   - If ingress port is ANY and reg_c[1] is mark_id,
4664  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4665  *
4666  * For default flow (zero mark_id), flow is like,
4667  *   - If ingress port is ANY,
4668  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4669  *
4670  * @param dev
4671  *   Pointer to Ethernet device.
4672  * @param mark_id
4673  *   ID of MARK action, zero means default flow for META.
4674  * @param[out] error
4675  *   Perform verbose error reporting if not NULL.
4676  *
4677  * @return
4678  *   Associated resource on success, NULL otherwise and rte_errno is set.
4679  */
4680 static struct mlx5_flow_mreg_copy_resource *
4681 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4682 			  struct rte_flow_error *error)
4683 {
4684 	struct mlx5_priv *priv = dev->data->dev_private;
4685 	struct mlx5_list_entry *entry;
4686 	struct mlx5_flow_cb_ctx ctx = {
4687 		.dev = dev,
4688 		.error = error,
4689 		.data = &mark_id,
4690 	};
4691 
4692 	/* Check if already registered. */
4693 	MLX5_ASSERT(priv->mreg_cp_tbl);
4694 	entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4695 	if (!entry)
4696 		return NULL;
4697 	return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4698 			    hlist_ent);
4699 }
4700 
4701 void
4702 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4703 {
4704 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4705 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4706 	struct rte_eth_dev *dev = tool_ctx;
4707 	struct mlx5_priv *priv = dev->data->dev_private;
4708 
4709 	MLX5_ASSERT(mcp_res->rix_flow);
4710 	flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4711 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4712 }
4713 
4714 /**
4715  * Release flow in RX_CP_TBL.
4716  *
4717  * @param dev
4718  *   Pointer to Ethernet device.
4719  * @flow
4720  *   Parent flow for wich copying is provided.
4721  */
4722 static void
4723 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4724 			  struct rte_flow *flow)
4725 {
4726 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4727 	struct mlx5_priv *priv = dev->data->dev_private;
4728 
4729 	if (!flow->rix_mreg_copy)
4730 		return;
4731 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4732 				 flow->rix_mreg_copy);
4733 	if (!mcp_res || !priv->mreg_cp_tbl)
4734 		return;
4735 	MLX5_ASSERT(mcp_res->rix_flow);
4736 	mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4737 	flow->rix_mreg_copy = 0;
4738 }
4739 
4740 /**
4741  * Remove the default copy action from RX_CP_TBL.
4742  *
4743  * This functions is called in the mlx5_dev_start(). No thread safe
4744  * is guaranteed.
4745  *
4746  * @param dev
4747  *   Pointer to Ethernet device.
4748  */
4749 static void
4750 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4751 {
4752 	struct mlx5_list_entry *entry;
4753 	struct mlx5_priv *priv = dev->data->dev_private;
4754 	struct mlx5_flow_cb_ctx ctx;
4755 	uint32_t mark_id;
4756 
4757 	/* Check if default flow is registered. */
4758 	if (!priv->mreg_cp_tbl)
4759 		return;
4760 	mark_id = MLX5_DEFAULT_COPY_ID;
4761 	ctx.data = &mark_id;
4762 	entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4763 	if (!entry)
4764 		return;
4765 	mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4766 }
4767 
4768 /**
4769  * Add the default copy action in in RX_CP_TBL.
4770  *
4771  * This functions is called in the mlx5_dev_start(). No thread safe
4772  * is guaranteed.
4773  *
4774  * @param dev
4775  *   Pointer to Ethernet device.
4776  * @param[out] error
4777  *   Perform verbose error reporting if not NULL.
4778  *
4779  * @return
4780  *   0 for success, negative value otherwise and rte_errno is set.
4781  */
4782 static int
4783 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4784 				  struct rte_flow_error *error)
4785 {
4786 	struct mlx5_priv *priv = dev->data->dev_private;
4787 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4788 	struct mlx5_flow_cb_ctx ctx;
4789 	uint32_t mark_id;
4790 
4791 	/* Check whether extensive metadata feature is engaged. */
4792 	if (!priv->sh->config.dv_flow_en ||
4793 	    priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4794 	    !mlx5_flow_ext_mreg_supported(dev) ||
4795 	    !priv->sh->dv_regc0_mask)
4796 		return 0;
4797 	/*
4798 	 * Add default mreg copy flow may be called multiple time, but
4799 	 * only be called once in stop. Avoid register it twice.
4800 	 */
4801 	mark_id = MLX5_DEFAULT_COPY_ID;
4802 	ctx.data = &mark_id;
4803 	if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4804 		return 0;
4805 	mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4806 	if (!mcp_res)
4807 		return -rte_errno;
4808 	return 0;
4809 }
4810 
4811 /**
4812  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4813  *
4814  * All the flow having Q/RSS action should be split by
4815  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4816  * performs the following,
4817  *   - CQE->flow_tag := reg_c[1] (MARK)
4818  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4819  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4820  * but there should be a flow per each MARK ID set by MARK action.
4821  *
4822  * For the aforementioned reason, if there's a MARK action in flow's action
4823  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4824  * the MARK ID to CQE's flow_tag like,
4825  *   - If reg_c[1] is mark_id,
4826  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4827  *
4828  * For SET_META action which stores value in reg_c[0], as the destination is
4829  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4830  * MARK ID means the default flow. The default flow looks like,
4831  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4832  *
4833  * @param dev
4834  *   Pointer to Ethernet device.
4835  * @param flow
4836  *   Pointer to flow structure.
4837  * @param[in] actions
4838  *   Pointer to the list of actions.
4839  * @param[out] error
4840  *   Perform verbose error reporting if not NULL.
4841  *
4842  * @return
4843  *   0 on success, negative value otherwise and rte_errno is set.
4844  */
4845 static int
4846 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4847 			    struct rte_flow *flow,
4848 			    const struct rte_flow_action *actions,
4849 			    struct rte_flow_error *error)
4850 {
4851 	struct mlx5_priv *priv = dev->data->dev_private;
4852 	struct mlx5_sh_config *config = &priv->sh->config;
4853 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4854 	const struct rte_flow_action_mark *mark;
4855 
4856 	/* Check whether extensive metadata feature is engaged. */
4857 	if (!config->dv_flow_en ||
4858 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4859 	    !mlx5_flow_ext_mreg_supported(dev) ||
4860 	    !priv->sh->dv_regc0_mask)
4861 		return 0;
4862 	/* Find MARK action. */
4863 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4864 		switch (actions->type) {
4865 		case RTE_FLOW_ACTION_TYPE_FLAG:
4866 			mcp_res = flow_mreg_add_copy_action
4867 				(dev, MLX5_FLOW_MARK_DEFAULT, error);
4868 			if (!mcp_res)
4869 				return -rte_errno;
4870 			flow->rix_mreg_copy = mcp_res->idx;
4871 			return 0;
4872 		case RTE_FLOW_ACTION_TYPE_MARK:
4873 			mark = (const struct rte_flow_action_mark *)
4874 				actions->conf;
4875 			mcp_res =
4876 				flow_mreg_add_copy_action(dev, mark->id, error);
4877 			if (!mcp_res)
4878 				return -rte_errno;
4879 			flow->rix_mreg_copy = mcp_res->idx;
4880 			return 0;
4881 		default:
4882 			break;
4883 		}
4884 	}
4885 	return 0;
4886 }
4887 
4888 #define MLX5_MAX_SPLIT_ACTIONS 24
4889 #define MLX5_MAX_SPLIT_ITEMS 24
4890 
4891 /**
4892  * Split the hairpin flow.
4893  * Since HW can't support encap and push-vlan on Rx, we move these
4894  * actions to Tx.
4895  * If the count action is after the encap then we also
4896  * move the count action. in this case the count will also measure
4897  * the outer bytes.
4898  *
4899  * @param dev
4900  *   Pointer to Ethernet device.
4901  * @param[in] actions
4902  *   Associated actions (list terminated by the END action).
4903  * @param[out] actions_rx
4904  *   Rx flow actions.
4905  * @param[out] actions_tx
4906  *   Tx flow actions..
4907  * @param[out] pattern_tx
4908  *   The pattern items for the Tx flow.
4909  * @param[out] flow_id
4910  *   The flow ID connected to this flow.
4911  *
4912  * @return
4913  *   0 on success.
4914  */
4915 static int
4916 flow_hairpin_split(struct rte_eth_dev *dev,
4917 		   const struct rte_flow_action actions[],
4918 		   struct rte_flow_action actions_rx[],
4919 		   struct rte_flow_action actions_tx[],
4920 		   struct rte_flow_item pattern_tx[],
4921 		   uint32_t flow_id)
4922 {
4923 	const struct rte_flow_action_raw_encap *raw_encap;
4924 	const struct rte_flow_action_raw_decap *raw_decap;
4925 	struct mlx5_rte_flow_action_set_tag *set_tag;
4926 	struct rte_flow_action *tag_action;
4927 	struct mlx5_rte_flow_item_tag *tag_item;
4928 	struct rte_flow_item *item;
4929 	char *addr;
4930 	int encap = 0;
4931 
4932 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4933 		switch (actions->type) {
4934 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4935 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4936 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4937 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4938 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4939 			rte_memcpy(actions_tx, actions,
4940 			       sizeof(struct rte_flow_action));
4941 			actions_tx++;
4942 			break;
4943 		case RTE_FLOW_ACTION_TYPE_COUNT:
4944 			if (encap) {
4945 				rte_memcpy(actions_tx, actions,
4946 					   sizeof(struct rte_flow_action));
4947 				actions_tx++;
4948 			} else {
4949 				rte_memcpy(actions_rx, actions,
4950 					   sizeof(struct rte_flow_action));
4951 				actions_rx++;
4952 			}
4953 			break;
4954 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4955 			raw_encap = actions->conf;
4956 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4957 				memcpy(actions_tx, actions,
4958 				       sizeof(struct rte_flow_action));
4959 				actions_tx++;
4960 				encap = 1;
4961 			} else {
4962 				rte_memcpy(actions_rx, actions,
4963 					   sizeof(struct rte_flow_action));
4964 				actions_rx++;
4965 			}
4966 			break;
4967 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4968 			raw_decap = actions->conf;
4969 			if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4970 				memcpy(actions_tx, actions,
4971 				       sizeof(struct rte_flow_action));
4972 				actions_tx++;
4973 			} else {
4974 				rte_memcpy(actions_rx, actions,
4975 					   sizeof(struct rte_flow_action));
4976 				actions_rx++;
4977 			}
4978 			break;
4979 		default:
4980 			rte_memcpy(actions_rx, actions,
4981 				   sizeof(struct rte_flow_action));
4982 			actions_rx++;
4983 			break;
4984 		}
4985 	}
4986 	/* Add set meta action and end action for the Rx flow. */
4987 	tag_action = actions_rx;
4988 	tag_action->type = (enum rte_flow_action_type)
4989 			   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4990 	actions_rx++;
4991 	rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4992 	actions_rx++;
4993 	set_tag = (void *)actions_rx;
4994 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
4995 		.id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
4996 		.data = flow_id,
4997 	};
4998 	MLX5_ASSERT(set_tag->id > REG_NON);
4999 	tag_action->conf = set_tag;
5000 	/* Create Tx item list. */
5001 	rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
5002 	addr = (void *)&pattern_tx[2];
5003 	item = pattern_tx;
5004 	item->type = (enum rte_flow_item_type)
5005 		     MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5006 	tag_item = (void *)addr;
5007 	tag_item->data = flow_id;
5008 	tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
5009 	MLX5_ASSERT(set_tag->id > REG_NON);
5010 	item->spec = tag_item;
5011 	addr += sizeof(struct mlx5_rte_flow_item_tag);
5012 	tag_item = (void *)addr;
5013 	tag_item->data = UINT32_MAX;
5014 	tag_item->id = UINT16_MAX;
5015 	item->mask = tag_item;
5016 	item->last = NULL;
5017 	item++;
5018 	item->type = RTE_FLOW_ITEM_TYPE_END;
5019 	return 0;
5020 }
5021 
5022 /**
5023  * The last stage of splitting chain, just creates the subflow
5024  * without any modification.
5025  *
5026  * @param[in] dev
5027  *   Pointer to Ethernet device.
5028  * @param[in] flow
5029  *   Parent flow structure pointer.
5030  * @param[in, out] sub_flow
5031  *   Pointer to return the created subflow, may be NULL.
5032  * @param[in] attr
5033  *   Flow rule attributes.
5034  * @param[in] items
5035  *   Pattern specification (list terminated by the END pattern item).
5036  * @param[in] actions
5037  *   Associated actions (list terminated by the END action).
5038  * @param[in] flow_split_info
5039  *   Pointer to flow split info structure.
5040  * @param[out] error
5041  *   Perform verbose error reporting if not NULL.
5042  * @return
5043  *   0 on success, negative value otherwise
5044  */
5045 static int
5046 flow_create_split_inner(struct rte_eth_dev *dev,
5047 			struct rte_flow *flow,
5048 			struct mlx5_flow **sub_flow,
5049 			const struct rte_flow_attr *attr,
5050 			const struct rte_flow_item items[],
5051 			const struct rte_flow_action actions[],
5052 			struct mlx5_flow_split_info *flow_split_info,
5053 			struct rte_flow_error *error)
5054 {
5055 	struct mlx5_flow *dev_flow;
5056 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5057 
5058 	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
5059 				    flow_split_info->flow_idx, error);
5060 	if (!dev_flow)
5061 		return -rte_errno;
5062 	dev_flow->flow = flow;
5063 	dev_flow->external = flow_split_info->external;
5064 	dev_flow->skip_scale = flow_split_info->skip_scale;
5065 	/* Subflow object was created, we must include one in the list. */
5066 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5067 		      dev_flow->handle, next);
5068 	/*
5069 	 * If dev_flow is as one of the suffix flow, some actions in suffix
5070 	 * flow may need some user defined item layer flags, and pass the
5071 	 * Metadata rxq mark flag to suffix flow as well.
5072 	 */
5073 	if (flow_split_info->prefix_layers)
5074 		dev_flow->handle->layers = flow_split_info->prefix_layers;
5075 	if (flow_split_info->prefix_mark) {
5076 		MLX5_ASSERT(wks);
5077 		wks->mark = 1;
5078 	}
5079 	if (sub_flow)
5080 		*sub_flow = dev_flow;
5081 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5082 	dev_flow->dv.table_id = flow_split_info->table_id;
5083 #endif
5084 	return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
5085 }
5086 
5087 /**
5088  * Get the sub policy of a meter.
5089  *
5090  * @param[in] dev
5091  *   Pointer to Ethernet device.
5092  * @param[in] flow
5093  *   Parent flow structure pointer.
5094  * @param wks
5095  *   Pointer to thread flow work space.
5096  * @param[in] attr
5097  *   Flow rule attributes.
5098  * @param[in] items
5099  *   Pattern specification (list terminated by the END pattern item).
5100  * @param[out] error
5101  *   Perform verbose error reporting if not NULL.
5102  *
5103  * @return
5104  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
5105  */
5106 static struct mlx5_flow_meter_sub_policy *
5107 get_meter_sub_policy(struct rte_eth_dev *dev,
5108 		     struct rte_flow *flow,
5109 		     struct mlx5_flow_workspace *wks,
5110 		     const struct rte_flow_attr *attr,
5111 		     const struct rte_flow_item items[],
5112 		     struct rte_flow_error *error)
5113 {
5114 	struct mlx5_flow_meter_policy *policy;
5115 	struct mlx5_flow_meter_policy *final_policy;
5116 	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
5117 
5118 	policy = wks->policy;
5119 	final_policy = policy->is_hierarchy ? wks->final_policy : policy;
5120 	if (final_policy->is_rss || final_policy->is_queue) {
5121 		struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
5122 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
5123 		uint32_t i;
5124 
5125 		/*
5126 		 * This is a tmp dev_flow,
5127 		 * no need to register any matcher for it in translate.
5128 		 */
5129 		wks->skip_matcher_reg = 1;
5130 		for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
5131 			struct mlx5_flow dev_flow = {0};
5132 			struct mlx5_flow_handle dev_handle = { {0} };
5133 			uint8_t fate = final_policy->act_cnt[i].fate_action;
5134 
5135 			if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
5136 				const struct rte_flow_action_rss *rss_act =
5137 					final_policy->act_cnt[i].rss->conf;
5138 				struct rte_flow_action rss_actions[2] = {
5139 					[0] = {
5140 					.type = RTE_FLOW_ACTION_TYPE_RSS,
5141 					.conf = rss_act,
5142 					},
5143 					[1] = {
5144 					.type = RTE_FLOW_ACTION_TYPE_END,
5145 					.conf = NULL,
5146 					}
5147 				};
5148 
5149 				dev_flow.handle = &dev_handle;
5150 				dev_flow.ingress = attr->ingress;
5151 				dev_flow.flow = flow;
5152 				dev_flow.external = 0;
5153 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5154 				dev_flow.dv.transfer = attr->transfer;
5155 #endif
5156 				/**
5157 				 * Translate RSS action to get rss hash fields.
5158 				 */
5159 				if (flow_drv_translate(dev, &dev_flow, attr,
5160 						items, rss_actions, error))
5161 					goto exit;
5162 				rss_desc_v[i] = wks->rss_desc;
5163 				rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
5164 				rss_desc_v[i].hash_fields =
5165 						dev_flow.hash_fields;
5166 				rss_desc_v[i].queue_num =
5167 						rss_desc_v[i].hash_fields ?
5168 						rss_desc_v[i].queue_num : 1;
5169 				rss_desc_v[i].tunnel =
5170 						!!(dev_flow.handle->layers &
5171 						   MLX5_FLOW_LAYER_TUNNEL);
5172 				/* Use the RSS queues in the containers. */
5173 				rss_desc_v[i].queue =
5174 					(uint16_t *)(uintptr_t)rss_act->queue;
5175 				rss_desc[i] = &rss_desc_v[i];
5176 			} else if (fate == MLX5_FLOW_FATE_QUEUE) {
5177 				/* This is queue action. */
5178 				rss_desc_v[i] = wks->rss_desc;
5179 				rss_desc_v[i].key_len = 0;
5180 				rss_desc_v[i].hash_fields = 0;
5181 				rss_desc_v[i].queue =
5182 					&final_policy->act_cnt[i].queue;
5183 				rss_desc_v[i].queue_num = 1;
5184 				rss_desc[i] = &rss_desc_v[i];
5185 			} else {
5186 				rss_desc[i] = NULL;
5187 			}
5188 		}
5189 		sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
5190 						flow, policy, rss_desc);
5191 	} else {
5192 		enum mlx5_meter_domain mtr_domain =
5193 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5194 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5195 						MLX5_MTR_DOMAIN_INGRESS);
5196 		sub_policy = policy->sub_policys[mtr_domain][0];
5197 	}
5198 	if (!sub_policy)
5199 		rte_flow_error_set(error, EINVAL,
5200 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5201 				   "Failed to get meter sub-policy.");
5202 exit:
5203 	return sub_policy;
5204 }
5205 
5206 /**
5207  * Split the meter flow.
5208  *
5209  * As meter flow will split to three sub flow, other than meter
5210  * action, the other actions make sense to only meter accepts
5211  * the packet. If it need to be dropped, no other additional
5212  * actions should be take.
5213  *
5214  * One kind of special action which decapsulates the L3 tunnel
5215  * header will be in the prefix sub flow, as not to take the
5216  * L3 tunnel header into account.
5217  *
5218  * @param[in] dev
5219  *   Pointer to Ethernet device.
5220  * @param[in] flow
5221  *   Parent flow structure pointer.
5222  * @param wks
5223  *   Pointer to thread flow work space.
5224  * @param[in] attr
5225  *   Flow rule attributes.
5226  * @param[in] items
5227  *   Pattern specification (list terminated by the END pattern item).
5228  * @param[out] sfx_items
5229  *   Suffix flow match items (list terminated by the END pattern item).
5230  * @param[in] actions
5231  *   Associated actions (list terminated by the END action).
5232  * @param[out] actions_sfx
5233  *   Suffix flow actions.
5234  * @param[out] actions_pre
5235  *   Prefix flow actions.
5236  * @param[out] mtr_flow_id
5237  *   Pointer to meter flow id.
5238  * @param[out] error
5239  *   Perform verbose error reporting if not NULL.
5240  *
5241  * @return
5242  *   0 on success, a negative errno value otherwise and rte_errno is set.
5243  */
5244 static int
5245 flow_meter_split_prep(struct rte_eth_dev *dev,
5246 		      struct rte_flow *flow,
5247 		      struct mlx5_flow_workspace *wks,
5248 		      const struct rte_flow_attr *attr,
5249 		      const struct rte_flow_item items[],
5250 		      struct rte_flow_item sfx_items[],
5251 		      const struct rte_flow_action actions[],
5252 		      struct rte_flow_action actions_sfx[],
5253 		      struct rte_flow_action actions_pre[],
5254 		      uint32_t *mtr_flow_id,
5255 		      struct rte_flow_error *error)
5256 {
5257 	struct mlx5_priv *priv = dev->data->dev_private;
5258 	struct mlx5_flow_meter_info *fm = wks->fm;
5259 	struct rte_flow_action *tag_action = NULL;
5260 	struct rte_flow_item *tag_item;
5261 	struct mlx5_rte_flow_action_set_tag *set_tag;
5262 	const struct rte_flow_action_raw_encap *raw_encap;
5263 	const struct rte_flow_action_raw_decap *raw_decap;
5264 	struct mlx5_rte_flow_item_tag *tag_item_spec;
5265 	struct mlx5_rte_flow_item_tag *tag_item_mask;
5266 	uint32_t tag_id = 0;
5267 	struct rte_flow_item *vlan_item_dst = NULL;
5268 	const struct rte_flow_item *vlan_item_src = NULL;
5269 	const struct rte_flow_item *orig_items = items;
5270 	struct rte_flow_action *hw_mtr_action;
5271 	struct rte_flow_action *action_pre_head = NULL;
5272 	int32_t flow_src_port = priv->representor_id;
5273 	bool mtr_first;
5274 	uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
5275 	uint8_t mtr_reg_bits = priv->mtr_reg_share ?
5276 				MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
5277 	uint32_t flow_id = 0;
5278 	uint32_t flow_id_reversed = 0;
5279 	uint8_t flow_id_bits = 0;
5280 	bool after_meter = false;
5281 	int shift;
5282 
5283 	/* Prepare the suffix subflow items. */
5284 	tag_item = sfx_items++;
5285 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5286 		struct mlx5_priv *port_priv;
5287 		const struct rte_flow_item_port_id *pid_v;
5288 		int item_type = items->type;
5289 
5290 		switch (item_type) {
5291 		case RTE_FLOW_ITEM_TYPE_PORT_ID:
5292 			pid_v = items->spec;
5293 			MLX5_ASSERT(pid_v);
5294 			port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
5295 			if (!port_priv)
5296 				return rte_flow_error_set(error,
5297 						rte_errno,
5298 						RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
5299 						pid_v,
5300 						"Failed to get port info.");
5301 			flow_src_port = port_priv->representor_id;
5302 			if (!fm->def_policy && wks->policy->is_hierarchy &&
5303 			    flow_src_port != priv->representor_id) {
5304 				if (flow_drv_mtr_hierarchy_rule_create(dev,
5305 								flow, fm,
5306 								flow_src_port,
5307 								items,
5308 								error))
5309 					return -rte_errno;
5310 			}
5311 			memcpy(sfx_items, items, sizeof(*sfx_items));
5312 			sfx_items++;
5313 			break;
5314 		case RTE_FLOW_ITEM_TYPE_VLAN:
5315 			/* Determine if copy vlan item below. */
5316 			vlan_item_src = items;
5317 			vlan_item_dst = sfx_items++;
5318 			vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
5319 			break;
5320 		default:
5321 			break;
5322 		}
5323 	}
5324 	sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
5325 	sfx_items++;
5326 	mtr_first = priv->sh->meter_aso_en &&
5327 		(attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
5328 	/* For ASO meter, meter must be before tag in TX direction. */
5329 	if (mtr_first) {
5330 		action_pre_head = actions_pre++;
5331 		/* Leave space for tag action. */
5332 		tag_action = actions_pre++;
5333 	}
5334 	/* Prepare the actions for prefix and suffix flow. */
5335 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5336 		struct rte_flow_action *action_cur = NULL;
5337 
5338 		switch (actions->type) {
5339 		case RTE_FLOW_ACTION_TYPE_METER:
5340 			if (mtr_first) {
5341 				action_cur = action_pre_head;
5342 			} else {
5343 				/* Leave space for tag action. */
5344 				tag_action = actions_pre++;
5345 				action_cur = actions_pre++;
5346 			}
5347 			after_meter = true;
5348 			break;
5349 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5350 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5351 			action_cur = actions_pre++;
5352 			break;
5353 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5354 			raw_encap = actions->conf;
5355 			if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5356 				action_cur = actions_pre++;
5357 			break;
5358 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5359 			raw_decap = actions->conf;
5360 			if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5361 				action_cur = actions_pre++;
5362 			break;
5363 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5364 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5365 			if (vlan_item_dst && vlan_item_src) {
5366 				memcpy(vlan_item_dst, vlan_item_src,
5367 					sizeof(*vlan_item_dst));
5368 				/*
5369 				 * Convert to internal match item, it is used
5370 				 * for vlan push and set vid.
5371 				 */
5372 				vlan_item_dst->type = (enum rte_flow_item_type)
5373 						MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5374 			}
5375 			break;
5376 		case RTE_FLOW_ACTION_TYPE_COUNT:
5377 			if (fm->def_policy)
5378 				action_cur = after_meter ?
5379 						actions_sfx++ : actions_pre++;
5380 			break;
5381 		default:
5382 			break;
5383 		}
5384 		if (!action_cur)
5385 			action_cur = (fm->def_policy) ?
5386 					actions_sfx++ : actions_pre++;
5387 		memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5388 	}
5389 	/* Add end action to the actions. */
5390 	actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5391 	if (priv->sh->meter_aso_en) {
5392 		/**
5393 		 * For ASO meter, need to add an extra jump action explicitly,
5394 		 * to jump from meter to policer table.
5395 		 */
5396 		struct mlx5_flow_meter_sub_policy *sub_policy;
5397 		struct mlx5_flow_tbl_data_entry *tbl_data;
5398 
5399 		if (!fm->def_policy) {
5400 			sub_policy = get_meter_sub_policy(dev, flow, wks,
5401 							  attr, orig_items,
5402 							  error);
5403 			if (!sub_policy)
5404 				return -rte_errno;
5405 		} else {
5406 			enum mlx5_meter_domain mtr_domain =
5407 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5408 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5409 						MLX5_MTR_DOMAIN_INGRESS);
5410 
5411 			sub_policy =
5412 			&priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5413 		}
5414 		tbl_data = container_of(sub_policy->tbl_rsc,
5415 					struct mlx5_flow_tbl_data_entry, tbl);
5416 		hw_mtr_action = actions_pre++;
5417 		hw_mtr_action->type = (enum rte_flow_action_type)
5418 				      MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5419 		hw_mtr_action->conf = tbl_data->jump.action;
5420 	}
5421 	actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5422 	actions_pre++;
5423 	if (!tag_action)
5424 		return rte_flow_error_set(error, ENOMEM,
5425 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5426 					  NULL, "No tag action space.");
5427 	if (!mtr_flow_id) {
5428 		tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5429 		goto exit;
5430 	}
5431 	/* Only default-policy Meter creates mtr flow id. */
5432 	if (fm->def_policy) {
5433 		mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5434 		if (!tag_id)
5435 			return rte_flow_error_set(error, ENOMEM,
5436 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5437 					"Failed to allocate meter flow id.");
5438 		flow_id = tag_id - 1;
5439 		flow_id_bits = (!flow_id) ? 1 :
5440 				(MLX5_REG_BITS - __builtin_clz(flow_id));
5441 		if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5442 		    mtr_reg_bits) {
5443 			mlx5_ipool_free(fm->flow_ipool, tag_id);
5444 			return rte_flow_error_set(error, EINVAL,
5445 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5446 					"Meter flow id exceeds max limit.");
5447 		}
5448 		if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5449 			priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5450 	}
5451 	/* Build tag actions and items for meter_id/meter flow_id. */
5452 	set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5453 	tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5454 	tag_item_mask = tag_item_spec + 1;
5455 	/* Both flow_id and meter_id share the same register. */
5456 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5457 		.id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5458 							    0, error),
5459 		.offset = mtr_id_offset,
5460 		.length = mtr_reg_bits,
5461 		.data = flow->meter,
5462 	};
5463 	/*
5464 	 * The color Reg bits used by flow_id are growing from
5465 	 * msb to lsb, so must do bit reverse for flow_id val in RegC.
5466 	 */
5467 	for (shift = 0; shift < flow_id_bits; shift++)
5468 		flow_id_reversed = (flow_id_reversed << 1) |
5469 				((flow_id >> shift) & 0x1);
5470 	set_tag->data |=
5471 		flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5472 	tag_item_spec->id = set_tag->id;
5473 	tag_item_spec->data = set_tag->data << mtr_id_offset;
5474 	tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5475 	tag_action->type = (enum rte_flow_action_type)
5476 				MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5477 	tag_action->conf = set_tag;
5478 	tag_item->type = (enum rte_flow_item_type)
5479 				MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5480 	tag_item->spec = tag_item_spec;
5481 	tag_item->last = NULL;
5482 	tag_item->mask = tag_item_mask;
5483 exit:
5484 	if (mtr_flow_id)
5485 		*mtr_flow_id = tag_id;
5486 	return 0;
5487 }
5488 
5489 /**
5490  * Split action list having QUEUE/RSS for metadata register copy.
5491  *
5492  * Once Q/RSS action is detected in user's action list, the flow action
5493  * should be split in order to copy metadata registers, which will happen in
5494  * RX_CP_TBL like,
5495  *   - CQE->flow_tag := reg_c[1] (MARK)
5496  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5497  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5498  * This is because the last action of each flow must be a terminal action
5499  * (QUEUE, RSS or DROP).
5500  *
5501  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5502  * stored and kept in the mlx5_flow structure per each sub_flow.
5503  *
5504  * The Q/RSS action is replaced with,
5505  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5506  * And the following JUMP action is added at the end,
5507  *   - JUMP, to RX_CP_TBL.
5508  *
5509  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5510  * flow_create_split_metadata() routine. The flow will look like,
5511  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5512  *
5513  * @param dev
5514  *   Pointer to Ethernet device.
5515  * @param[out] split_actions
5516  *   Pointer to store split actions to jump to CP_TBL.
5517  * @param[in] actions
5518  *   Pointer to the list of original flow actions.
5519  * @param[in] qrss
5520  *   Pointer to the Q/RSS action.
5521  * @param[in] actions_n
5522  *   Number of original actions.
5523  * @param[in] mtr_sfx
5524  *   Check if it is in meter suffix table.
5525  * @param[out] error
5526  *   Perform verbose error reporting if not NULL.
5527  *
5528  * @return
5529  *   non-zero unique flow_id on success, otherwise 0 and
5530  *   error/rte_error are set.
5531  */
5532 static uint32_t
5533 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5534 			  struct rte_flow_action *split_actions,
5535 			  const struct rte_flow_action *actions,
5536 			  const struct rte_flow_action *qrss,
5537 			  int actions_n, int mtr_sfx,
5538 			  struct rte_flow_error *error)
5539 {
5540 	struct mlx5_priv *priv = dev->data->dev_private;
5541 	struct mlx5_rte_flow_action_set_tag *set_tag;
5542 	struct rte_flow_action_jump *jump;
5543 	const int qrss_idx = qrss - actions;
5544 	uint32_t flow_id = 0;
5545 	int ret = 0;
5546 
5547 	/*
5548 	 * Given actions will be split
5549 	 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5550 	 * - Add jump to mreg CP_TBL.
5551 	 * As a result, there will be one more action.
5552 	 */
5553 	memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5554 	/* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */
5555 	++actions_n;
5556 	set_tag = (void *)(split_actions + actions_n);
5557 	/*
5558 	 * If we are not the meter suffix flow, add the tag action.
5559 	 * Since meter suffix flow already has the tag added.
5560 	 */
5561 	if (!mtr_sfx) {
5562 		/*
5563 		 * Allocate the new subflow ID. This one is unique within
5564 		 * device and not shared with representors. Otherwise,
5565 		 * we would have to resolve multi-thread access synch
5566 		 * issue. Each flow on the shared device is appended
5567 		 * with source vport identifier, so the resulting
5568 		 * flows will be unique in the shared (by master and
5569 		 * representors) domain even if they have coinciding
5570 		 * IDs.
5571 		 */
5572 		mlx5_ipool_malloc(priv->sh->ipool
5573 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5574 		if (!flow_id)
5575 			return rte_flow_error_set(error, ENOMEM,
5576 						  RTE_FLOW_ERROR_TYPE_ACTION,
5577 						  NULL, "can't allocate id "
5578 						  "for split Q/RSS subflow");
5579 		/* Internal SET_TAG action to set flow ID. */
5580 		*set_tag = (struct mlx5_rte_flow_action_set_tag){
5581 			.data = flow_id,
5582 		};
5583 		ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5584 		if (ret < 0)
5585 			return ret;
5586 		set_tag->id = ret;
5587 		/* Construct new actions array. */
5588 		/* Replace QUEUE/RSS action. */
5589 		split_actions[qrss_idx] = (struct rte_flow_action){
5590 			.type = (enum rte_flow_action_type)
5591 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5592 			.conf = set_tag,
5593 		};
5594 	} else {
5595 		/*
5596 		 * If we are the suffix flow of meter, tag already exist.
5597 		 * Set the QUEUE/RSS action to void.
5598 		 */
5599 		split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID;
5600 	}
5601 	/* JUMP action to jump to mreg copy table (CP_TBL). */
5602 	jump = (void *)(set_tag + 1);
5603 	*jump = (struct rte_flow_action_jump){
5604 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5605 	};
5606 	split_actions[actions_n - 2] = (struct rte_flow_action){
5607 		.type = RTE_FLOW_ACTION_TYPE_JUMP,
5608 		.conf = jump,
5609 	};
5610 	split_actions[actions_n - 1] = (struct rte_flow_action){
5611 		.type = RTE_FLOW_ACTION_TYPE_END,
5612 	};
5613 	return flow_id;
5614 }
5615 
5616 /**
5617  * Extend the given action list for Tx metadata copy.
5618  *
5619  * Copy the given action list to the ext_actions and add flow metadata register
5620  * copy action in order to copy reg_a set by WQE to reg_c[0].
5621  *
5622  * @param[out] ext_actions
5623  *   Pointer to the extended action list.
5624  * @param[in] actions
5625  *   Pointer to the list of actions.
5626  * @param[in] actions_n
5627  *   Number of actions in the list.
5628  * @param[out] error
5629  *   Perform verbose error reporting if not NULL.
5630  * @param[in] encap_idx
5631  *   The encap action index.
5632  *
5633  * @return
5634  *   0 on success, negative value otherwise
5635  */
5636 static int
5637 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5638 		       struct rte_flow_action *ext_actions,
5639 		       const struct rte_flow_action *actions,
5640 		       int actions_n, struct rte_flow_error *error,
5641 		       int encap_idx)
5642 {
5643 	struct mlx5_flow_action_copy_mreg *cp_mreg =
5644 		(struct mlx5_flow_action_copy_mreg *)
5645 			(ext_actions + actions_n + 1);
5646 	int ret;
5647 
5648 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5649 	if (ret < 0)
5650 		return ret;
5651 	cp_mreg->dst = ret;
5652 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5653 	if (ret < 0)
5654 		return ret;
5655 	cp_mreg->src = ret;
5656 	if (encap_idx != 0)
5657 		memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5658 	if (encap_idx == actions_n - 1) {
5659 		ext_actions[actions_n - 1] = (struct rte_flow_action){
5660 			.type = (enum rte_flow_action_type)
5661 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5662 			.conf = cp_mreg,
5663 		};
5664 		ext_actions[actions_n] = (struct rte_flow_action){
5665 			.type = RTE_FLOW_ACTION_TYPE_END,
5666 		};
5667 	} else {
5668 		ext_actions[encap_idx] = (struct rte_flow_action){
5669 			.type = (enum rte_flow_action_type)
5670 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5671 			.conf = cp_mreg,
5672 		};
5673 		memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5674 				sizeof(*ext_actions) * (actions_n - encap_idx));
5675 	}
5676 	return 0;
5677 }
5678 
5679 /**
5680  * Check the match action from the action list.
5681  *
5682  * @param[in] actions
5683  *   Pointer to the list of actions.
5684  * @param[in] attr
5685  *   Flow rule attributes.
5686  * @param[in] action
5687  *   The action to be check if exist.
5688  * @param[out] match_action_pos
5689  *   Pointer to the position of the matched action if exists, otherwise is -1.
5690  * @param[out] qrss_action_pos
5691  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5692  * @param[out] modify_after_mirror
5693  *   Pointer to the flag of modify action after FDB mirroring.
5694  *
5695  * @return
5696  *   > 0 the total number of actions.
5697  *   0 if not found match action in action list.
5698  */
5699 static int
5700 flow_check_match_action(const struct rte_flow_action actions[],
5701 			const struct rte_flow_attr *attr,
5702 			enum rte_flow_action_type action,
5703 			int *match_action_pos, int *qrss_action_pos,
5704 			int *modify_after_mirror)
5705 {
5706 	const struct rte_flow_action_sample *sample;
5707 	const struct rte_flow_action_raw_decap *decap;
5708 	int actions_n = 0;
5709 	uint32_t ratio = 0;
5710 	int sub_type = 0;
5711 	int flag = 0;
5712 	int fdb_mirror = 0;
5713 
5714 	*match_action_pos = -1;
5715 	*qrss_action_pos = -1;
5716 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5717 		if (actions->type == action) {
5718 			flag = 1;
5719 			*match_action_pos = actions_n;
5720 		}
5721 		switch (actions->type) {
5722 		case RTE_FLOW_ACTION_TYPE_QUEUE:
5723 		case RTE_FLOW_ACTION_TYPE_RSS:
5724 			*qrss_action_pos = actions_n;
5725 			break;
5726 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
5727 			sample = actions->conf;
5728 			ratio = sample->ratio;
5729 			sub_type = ((const struct rte_flow_action *)
5730 					(sample->actions))->type;
5731 			if (ratio == 1 && attr->transfer)
5732 				fdb_mirror = 1;
5733 			break;
5734 		case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5735 		case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5736 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5737 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5738 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5739 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5740 		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5741 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5742 		case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5743 		case RTE_FLOW_ACTION_TYPE_SET_TTL:
5744 		case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5745 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5746 		case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5747 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5748 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5749 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5750 		case RTE_FLOW_ACTION_TYPE_FLAG:
5751 		case RTE_FLOW_ACTION_TYPE_MARK:
5752 		case RTE_FLOW_ACTION_TYPE_SET_META:
5753 		case RTE_FLOW_ACTION_TYPE_SET_TAG:
5754 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5755 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5756 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5757 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5758 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5759 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5760 		case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5761 		case RTE_FLOW_ACTION_TYPE_METER:
5762 			if (fdb_mirror)
5763 				*modify_after_mirror = 1;
5764 			break;
5765 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5766 			decap = actions->conf;
5767 			while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5768 				;
5769 			actions_n++;
5770 			if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5771 				const struct rte_flow_action_raw_encap *encap =
5772 								actions->conf;
5773 				if (decap->size <=
5774 					MLX5_ENCAPSULATION_DECISION_SIZE &&
5775 				    encap->size >
5776 					MLX5_ENCAPSULATION_DECISION_SIZE)
5777 					/* L3 encap. */
5778 					break;
5779 			}
5780 			if (fdb_mirror)
5781 				*modify_after_mirror = 1;
5782 			break;
5783 		default:
5784 			break;
5785 		}
5786 		actions_n++;
5787 	}
5788 	if (flag && fdb_mirror && !*modify_after_mirror) {
5789 		/* FDB mirroring uses the destination array to implement
5790 		 * instead of FLOW_SAMPLER object.
5791 		 */
5792 		if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5793 			flag = 0;
5794 	}
5795 	/* Count RTE_FLOW_ACTION_TYPE_END. */
5796 	return flag ? actions_n + 1 : 0;
5797 }
5798 
5799 #define SAMPLE_SUFFIX_ITEM 3
5800 
5801 /**
5802  * Split the sample flow.
5803  *
5804  * As sample flow will split to two sub flow, sample flow with
5805  * sample action, the other actions will move to new suffix flow.
5806  *
5807  * Also add unique tag id with tag action in the sample flow,
5808  * the same tag id will be as match in the suffix flow.
5809  *
5810  * @param dev
5811  *   Pointer to Ethernet device.
5812  * @param[in] add_tag
5813  *   Add extra tag action flag.
5814  * @param[out] sfx_items
5815  *   Suffix flow match items (list terminated by the END pattern item).
5816  * @param[in] actions
5817  *   Associated actions (list terminated by the END action).
5818  * @param[out] actions_sfx
5819  *   Suffix flow actions.
5820  * @param[out] actions_pre
5821  *   Prefix flow actions.
5822  * @param[in] actions_n
5823  *  The total number of actions.
5824  * @param[in] sample_action_pos
5825  *   The sample action position.
5826  * @param[in] qrss_action_pos
5827  *   The Queue/RSS action position.
5828  * @param[in] jump_table
5829  *   Add extra jump action flag.
5830  * @param[out] error
5831  *   Perform verbose error reporting if not NULL.
5832  *
5833  * @return
5834  *   0 on success, or unique flow_id, a negative errno value
5835  *   otherwise and rte_errno is set.
5836  */
5837 static int
5838 flow_sample_split_prep(struct rte_eth_dev *dev,
5839 		       int add_tag,
5840 		       const struct rte_flow_item items[],
5841 		       struct rte_flow_item sfx_items[],
5842 		       const struct rte_flow_action actions[],
5843 		       struct rte_flow_action actions_sfx[],
5844 		       struct rte_flow_action actions_pre[],
5845 		       int actions_n,
5846 		       int sample_action_pos,
5847 		       int qrss_action_pos,
5848 		       int jump_table,
5849 		       struct rte_flow_error *error)
5850 {
5851 	struct mlx5_priv *priv = dev->data->dev_private;
5852 	struct mlx5_rte_flow_action_set_tag *set_tag;
5853 	struct mlx5_rte_flow_item_tag *tag_spec;
5854 	struct mlx5_rte_flow_item_tag *tag_mask;
5855 	struct rte_flow_action_jump *jump_action;
5856 	uint32_t tag_id = 0;
5857 	int append_index = 0;
5858 	int set_tag_idx = -1;
5859 	int index;
5860 	int ret;
5861 
5862 	if (sample_action_pos < 0)
5863 		return rte_flow_error_set(error, EINVAL,
5864 					  RTE_FLOW_ERROR_TYPE_ACTION,
5865 					  NULL, "invalid position of sample "
5866 					  "action in list");
5867 	/* Prepare the actions for prefix and suffix flow. */
5868 	if (add_tag) {
5869 		/* Update the new added tag action index preceding
5870 		 * the PUSH_VLAN or ENCAP action.
5871 		 */
5872 		const struct rte_flow_action_raw_encap *raw_encap;
5873 		const struct rte_flow_action *action = actions;
5874 		int encap_idx;
5875 		int action_idx = 0;
5876 		int raw_decap_idx = -1;
5877 		int push_vlan_idx = -1;
5878 		for (; action->type != RTE_FLOW_ACTION_TYPE_END; action++) {
5879 			switch (action->type) {
5880 			case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5881 				raw_decap_idx = action_idx;
5882 				break;
5883 			case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5884 				raw_encap = action->conf;
5885 				if (raw_encap->size >
5886 					MLX5_ENCAPSULATION_DECISION_SIZE) {
5887 					encap_idx = raw_decap_idx != -1 ?
5888 						    raw_decap_idx : action_idx;
5889 					if (encap_idx < sample_action_pos &&
5890 					    push_vlan_idx == -1)
5891 						set_tag_idx = encap_idx;
5892 				}
5893 				break;
5894 			case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5895 			case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5896 				encap_idx = action_idx;
5897 				if (encap_idx < sample_action_pos &&
5898 				    push_vlan_idx == -1)
5899 					set_tag_idx = encap_idx;
5900 				break;
5901 			case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5902 			case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5903 				push_vlan_idx = action_idx;
5904 				if (push_vlan_idx < sample_action_pos)
5905 					set_tag_idx = action_idx;
5906 				break;
5907 			default:
5908 				break;
5909 			}
5910 			action_idx++;
5911 		}
5912 	}
5913 	/* Prepare the actions for prefix and suffix flow. */
5914 	if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5915 		index = qrss_action_pos;
5916 		/* Put the preceding the Queue/RSS action into prefix flow. */
5917 		if (index != 0)
5918 			memcpy(actions_pre, actions,
5919 			       sizeof(struct rte_flow_action) * index);
5920 		/* Put others preceding the sample action into prefix flow. */
5921 		if (sample_action_pos > index + 1)
5922 			memcpy(actions_pre + index, actions + index + 1,
5923 			       sizeof(struct rte_flow_action) *
5924 			       (sample_action_pos - index - 1));
5925 		index = sample_action_pos - 1;
5926 		/* Put Queue/RSS action into Suffix flow. */
5927 		memcpy(actions_sfx, actions + qrss_action_pos,
5928 		       sizeof(struct rte_flow_action));
5929 		actions_sfx++;
5930 	} else if (add_tag && set_tag_idx >= 0) {
5931 		if (set_tag_idx > 0)
5932 			memcpy(actions_pre, actions,
5933 			       sizeof(struct rte_flow_action) * set_tag_idx);
5934 		memcpy(actions_pre + set_tag_idx + 1, actions + set_tag_idx,
5935 		       sizeof(struct rte_flow_action) *
5936 		       (sample_action_pos - set_tag_idx));
5937 		index = sample_action_pos;
5938 	} else {
5939 		index = sample_action_pos;
5940 		if (index != 0)
5941 			memcpy(actions_pre, actions,
5942 			       sizeof(struct rte_flow_action) * index);
5943 	}
5944 	/* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
5945 	 * For CX6DX and above, metadata registers Cx preserve their value,
5946 	 * add an extra tag action for NIC-RX and E-Switch Domain.
5947 	 */
5948 	if (add_tag) {
5949 		/* Prepare the prefix tag action. */
5950 		append_index++;
5951 		set_tag = (void *)(actions_pre + actions_n + append_index);
5952 		ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error);
5953 		/* Trust VF/SF on CX5 not supported meter so that the reserved
5954 		 * metadata regC is REG_NON, back to use application tag
5955 		 * index 0.
5956 		 */
5957 		if (unlikely(ret == REG_NON))
5958 			ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
5959 		if (ret < 0)
5960 			return ret;
5961 		mlx5_ipool_malloc(priv->sh->ipool
5962 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
5963 		*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5964 			.id = ret,
5965 			.data = tag_id,
5966 		};
5967 		/* Prepare the suffix subflow items. */
5968 		for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5969 			if (items->type == RTE_FLOW_ITEM_TYPE_PORT_ID) {
5970 				memcpy(sfx_items, items, sizeof(*sfx_items));
5971 				sfx_items++;
5972 			}
5973 		}
5974 		tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
5975 		tag_spec->data = tag_id;
5976 		tag_spec->id = set_tag->id;
5977 		tag_mask = tag_spec + 1;
5978 		tag_mask->data = UINT32_MAX;
5979 		sfx_items[0] = (struct rte_flow_item){
5980 			.type = (enum rte_flow_item_type)
5981 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5982 			.spec = tag_spec,
5983 			.last = NULL,
5984 			.mask = tag_mask,
5985 		};
5986 		sfx_items[1] = (struct rte_flow_item){
5987 			.type = (enum rte_flow_item_type)
5988 				RTE_FLOW_ITEM_TYPE_END,
5989 		};
5990 		/* Prepare the tag action in prefix subflow. */
5991 		set_tag_idx = (set_tag_idx == -1) ? index : set_tag_idx;
5992 		actions_pre[set_tag_idx] =
5993 			(struct rte_flow_action){
5994 			.type = (enum rte_flow_action_type)
5995 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5996 			.conf = set_tag,
5997 		};
5998 		/* Update next sample position due to add one tag action */
5999 		index += 1;
6000 	}
6001 	/* Copy the sample action into prefix flow. */
6002 	memcpy(actions_pre + index, actions + sample_action_pos,
6003 	       sizeof(struct rte_flow_action));
6004 	index += 1;
6005 	/* For the modify action after the sample action in E-Switch mirroring,
6006 	 * Add the extra jump action in prefix subflow and jump into the next
6007 	 * table, then do the modify action in the new table.
6008 	 */
6009 	if (jump_table) {
6010 		/* Prepare the prefix jump action. */
6011 		append_index++;
6012 		jump_action = (void *)(actions_pre + actions_n + append_index);
6013 		jump_action->group = jump_table;
6014 		actions_pre[index++] =
6015 			(struct rte_flow_action){
6016 			.type = (enum rte_flow_action_type)
6017 				RTE_FLOW_ACTION_TYPE_JUMP,
6018 			.conf = jump_action,
6019 		};
6020 	}
6021 	actions_pre[index] = (struct rte_flow_action){
6022 		.type = (enum rte_flow_action_type)
6023 			RTE_FLOW_ACTION_TYPE_END,
6024 	};
6025 	/* Put the actions after sample into Suffix flow. */
6026 	memcpy(actions_sfx, actions + sample_action_pos + 1,
6027 	       sizeof(struct rte_flow_action) *
6028 	       (actions_n - sample_action_pos - 1));
6029 	return tag_id;
6030 }
6031 
6032 /**
6033  * The splitting for metadata feature.
6034  *
6035  * - Q/RSS action on NIC Rx should be split in order to pass by
6036  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
6037  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
6038  *
6039  * - All the actions on NIC Tx should have a mreg copy action to
6040  *   copy reg_a from WQE to reg_c[0].
6041  *
6042  * @param dev
6043  *   Pointer to Ethernet device.
6044  * @param[in] flow
6045  *   Parent flow structure pointer.
6046  * @param[in] attr
6047  *   Flow rule attributes.
6048  * @param[in] items
6049  *   Pattern specification (list terminated by the END pattern item).
6050  * @param[in] actions
6051  *   Associated actions (list terminated by the END action).
6052  * @param[in] flow_split_info
6053  *   Pointer to flow split info structure.
6054  * @param[out] error
6055  *   Perform verbose error reporting if not NULL.
6056  * @return
6057  *   0 on success, negative value otherwise
6058  */
6059 static int
6060 flow_create_split_metadata(struct rte_eth_dev *dev,
6061 			   struct rte_flow *flow,
6062 			   const struct rte_flow_attr *attr,
6063 			   const struct rte_flow_item items[],
6064 			   const struct rte_flow_action actions[],
6065 			   struct mlx5_flow_split_info *flow_split_info,
6066 			   struct rte_flow_error *error)
6067 {
6068 	struct mlx5_priv *priv = dev->data->dev_private;
6069 	struct mlx5_sh_config *config = &priv->sh->config;
6070 	const struct rte_flow_action *qrss = NULL;
6071 	struct rte_flow_action *ext_actions = NULL;
6072 	struct mlx5_flow *dev_flow = NULL;
6073 	uint32_t qrss_id = 0;
6074 	int mtr_sfx = 0;
6075 	size_t act_size;
6076 	int actions_n;
6077 	int encap_idx;
6078 	int ret;
6079 
6080 	/* Check whether extensive metadata feature is engaged. */
6081 	if (!config->dv_flow_en ||
6082 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
6083 	    !mlx5_flow_ext_mreg_supported(dev))
6084 		return flow_create_split_inner(dev, flow, NULL, attr, items,
6085 					       actions, flow_split_info, error);
6086 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
6087 							   &encap_idx);
6088 	if (qrss) {
6089 		/* Exclude hairpin flows from splitting. */
6090 		if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
6091 			const struct rte_flow_action_queue *queue;
6092 
6093 			queue = qrss->conf;
6094 			if (mlx5_rxq_is_hairpin(dev, queue->index))
6095 				qrss = NULL;
6096 		} else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
6097 			const struct rte_flow_action_rss *rss;
6098 
6099 			rss = qrss->conf;
6100 			if (mlx5_rxq_is_hairpin(dev, rss->queue[0]))
6101 				qrss = NULL;
6102 		}
6103 	}
6104 	if (qrss) {
6105 		/* Check if it is in meter suffix table. */
6106 		mtr_sfx = attr->group == (attr->transfer ?
6107 			  (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6108 			  MLX5_FLOW_TABLE_LEVEL_METER);
6109 		/*
6110 		 * Q/RSS action on NIC Rx should be split in order to pass by
6111 		 * the mreg copy table (RX_CP_TBL) and then it jumps to the
6112 		 * action table (RX_ACT_TBL) which has the split Q/RSS action.
6113 		 */
6114 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6115 			   sizeof(struct rte_flow_action_set_tag) +
6116 			   sizeof(struct rte_flow_action_jump);
6117 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6118 					  SOCKET_ID_ANY);
6119 		if (!ext_actions)
6120 			return rte_flow_error_set(error, ENOMEM,
6121 						  RTE_FLOW_ERROR_TYPE_ACTION,
6122 						  NULL, "no memory to split "
6123 						  "metadata flow");
6124 		/*
6125 		 * Create the new actions list with removed Q/RSS action
6126 		 * and appended set tag and jump to register copy table
6127 		 * (RX_CP_TBL). We should preallocate unique tag ID here
6128 		 * in advance, because it is needed for set tag action.
6129 		 */
6130 		qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
6131 						    qrss, actions_n,
6132 						    mtr_sfx, error);
6133 		if (!mtr_sfx && !qrss_id) {
6134 			ret = -rte_errno;
6135 			goto exit;
6136 		}
6137 	} else if (attr->egress && !attr->transfer) {
6138 		/*
6139 		 * All the actions on NIC Tx should have a metadata register
6140 		 * copy action to copy reg_a from WQE to reg_c[meta]
6141 		 */
6142 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6143 			   sizeof(struct mlx5_flow_action_copy_mreg);
6144 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6145 					  SOCKET_ID_ANY);
6146 		if (!ext_actions)
6147 			return rte_flow_error_set(error, ENOMEM,
6148 						  RTE_FLOW_ERROR_TYPE_ACTION,
6149 						  NULL, "no memory to split "
6150 						  "metadata flow");
6151 		/* Create the action list appended with copy register. */
6152 		ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
6153 					     actions_n, error, encap_idx);
6154 		if (ret < 0)
6155 			goto exit;
6156 	}
6157 	/* Add the unmodified original or prefix subflow. */
6158 	ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6159 				      items, ext_actions ? ext_actions :
6160 				      actions, flow_split_info, error);
6161 	if (ret < 0)
6162 		goto exit;
6163 	MLX5_ASSERT(dev_flow);
6164 	if (qrss) {
6165 		const struct rte_flow_attr q_attr = {
6166 			.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6167 			.ingress = 1,
6168 		};
6169 		/* Internal PMD action to set register. */
6170 		struct mlx5_rte_flow_item_tag q_tag_spec = {
6171 			.data = qrss_id,
6172 			.id = REG_NON,
6173 		};
6174 		struct rte_flow_item q_items[] = {
6175 			{
6176 				.type = (enum rte_flow_item_type)
6177 					MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6178 				.spec = &q_tag_spec,
6179 				.last = NULL,
6180 				.mask = NULL,
6181 			},
6182 			{
6183 				.type = RTE_FLOW_ITEM_TYPE_END,
6184 			},
6185 		};
6186 		struct rte_flow_action q_actions[] = {
6187 			{
6188 				.type = qrss->type,
6189 				.conf = qrss->conf,
6190 			},
6191 			{
6192 				.type = RTE_FLOW_ACTION_TYPE_END,
6193 			},
6194 		};
6195 		uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
6196 
6197 		/*
6198 		 * Configure the tag item only if there is no meter subflow.
6199 		 * Since tag is already marked in the meter suffix subflow
6200 		 * we can just use the meter suffix items as is.
6201 		 */
6202 		if (qrss_id) {
6203 			/* Not meter subflow. */
6204 			MLX5_ASSERT(!mtr_sfx);
6205 			/*
6206 			 * Put unique id in prefix flow due to it is destroyed
6207 			 * after suffix flow and id will be freed after there
6208 			 * is no actual flows with this id and identifier
6209 			 * reallocation becomes possible (for example, for
6210 			 * other flows in other threads).
6211 			 */
6212 			dev_flow->handle->split_flow_id = qrss_id;
6213 			ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
6214 						   error);
6215 			if (ret < 0)
6216 				goto exit;
6217 			q_tag_spec.id = ret;
6218 		}
6219 		dev_flow = NULL;
6220 		/* Add suffix subflow to execute Q/RSS. */
6221 		flow_split_info->prefix_layers = layers;
6222 		flow_split_info->prefix_mark = 0;
6223 		flow_split_info->table_id = 0;
6224 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6225 					      &q_attr, mtr_sfx ? items :
6226 					      q_items, q_actions,
6227 					      flow_split_info, error);
6228 		if (ret < 0)
6229 			goto exit;
6230 		/* qrss ID should be freed if failed. */
6231 		qrss_id = 0;
6232 		MLX5_ASSERT(dev_flow);
6233 	}
6234 
6235 exit:
6236 	/*
6237 	 * We do not destroy the partially created sub_flows in case of error.
6238 	 * These ones are included into parent flow list and will be destroyed
6239 	 * by flow_drv_destroy.
6240 	 */
6241 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
6242 			qrss_id);
6243 	mlx5_free(ext_actions);
6244 	return ret;
6245 }
6246 
6247 /**
6248  * Create meter internal drop flow with the original pattern.
6249  *
6250  * @param dev
6251  *   Pointer to Ethernet device.
6252  * @param[in] flow
6253  *   Parent flow structure pointer.
6254  * @param[in] attr
6255  *   Flow rule attributes.
6256  * @param[in] items
6257  *   Pattern specification (list terminated by the END pattern item).
6258  * @param[in] flow_split_info
6259  *   Pointer to flow split info structure.
6260  * @param[in] fm
6261  *   Pointer to flow meter structure.
6262  * @param[out] error
6263  *   Perform verbose error reporting if not NULL.
6264  * @return
6265  *   0 on success, negative value otherwise
6266  */
6267 static uint32_t
6268 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
6269 			struct rte_flow *flow,
6270 			const struct rte_flow_attr *attr,
6271 			const struct rte_flow_item items[],
6272 			struct mlx5_flow_split_info *flow_split_info,
6273 			struct mlx5_flow_meter_info *fm,
6274 			struct rte_flow_error *error)
6275 {
6276 	struct mlx5_flow *dev_flow = NULL;
6277 	struct rte_flow_attr drop_attr = *attr;
6278 	struct rte_flow_action drop_actions[3];
6279 	struct mlx5_flow_split_info drop_split_info = *flow_split_info;
6280 
6281 	MLX5_ASSERT(fm->drop_cnt);
6282 	drop_actions[0].type =
6283 		(enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
6284 	drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
6285 	drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
6286 	drop_actions[1].conf = NULL;
6287 	drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
6288 	drop_actions[2].conf = NULL;
6289 	drop_split_info.external = false;
6290 	drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6291 	drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
6292 	drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
6293 	return flow_create_split_inner(dev, flow, &dev_flow,
6294 				&drop_attr, items, drop_actions,
6295 				&drop_split_info, error);
6296 }
6297 
6298 /**
6299  * The splitting for meter feature.
6300  *
6301  * - The meter flow will be split to two flows as prefix and
6302  *   suffix flow. The packets make sense only it pass the prefix
6303  *   meter action.
6304  *
6305  * - Reg_C_5 is used for the packet to match betweend prefix and
6306  *   suffix flow.
6307  *
6308  * @param dev
6309  *   Pointer to Ethernet device.
6310  * @param[in] flow
6311  *   Parent flow structure pointer.
6312  * @param[in] attr
6313  *   Flow rule attributes.
6314  * @param[in] items
6315  *   Pattern specification (list terminated by the END pattern item).
6316  * @param[in] actions
6317  *   Associated actions (list terminated by the END action).
6318  * @param[in] flow_split_info
6319  *   Pointer to flow split info structure.
6320  * @param[out] error
6321  *   Perform verbose error reporting if not NULL.
6322  * @return
6323  *   0 on success, negative value otherwise
6324  */
6325 static int
6326 flow_create_split_meter(struct rte_eth_dev *dev,
6327 			struct rte_flow *flow,
6328 			const struct rte_flow_attr *attr,
6329 			const struct rte_flow_item items[],
6330 			const struct rte_flow_action actions[],
6331 			struct mlx5_flow_split_info *flow_split_info,
6332 			struct rte_flow_error *error)
6333 {
6334 	struct mlx5_priv *priv = dev->data->dev_private;
6335 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6336 	struct rte_flow_action *sfx_actions = NULL;
6337 	struct rte_flow_action *pre_actions = NULL;
6338 	struct rte_flow_item *sfx_items = NULL;
6339 	struct mlx5_flow *dev_flow = NULL;
6340 	struct rte_flow_attr sfx_attr = *attr;
6341 	struct mlx5_flow_meter_info *fm = NULL;
6342 	uint8_t skip_scale_restore;
6343 	bool has_mtr = false;
6344 	bool has_modify = false;
6345 	bool set_mtr_reg = true;
6346 	bool is_mtr_hierarchy = false;
6347 	uint32_t meter_id = 0;
6348 	uint32_t mtr_idx = 0;
6349 	uint32_t mtr_flow_id = 0;
6350 	size_t act_size;
6351 	size_t item_size;
6352 	int actions_n = 0;
6353 	int ret = 0;
6354 
6355 	if (priv->mtr_en)
6356 		actions_n = flow_check_meter_action(dev, actions, &has_mtr,
6357 						    &has_modify, &meter_id);
6358 	if (has_mtr) {
6359 		if (flow->meter) {
6360 			fm = flow_dv_meter_find_by_idx(priv, flow->meter);
6361 			if (!fm)
6362 				return rte_flow_error_set(error, EINVAL,
6363 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6364 						NULL, "Meter not found.");
6365 		} else {
6366 			fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
6367 			if (!fm)
6368 				return rte_flow_error_set(error, EINVAL,
6369 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6370 						NULL, "Meter not found.");
6371 			ret = mlx5_flow_meter_attach(priv, fm,
6372 						     &sfx_attr, error);
6373 			if (ret)
6374 				return -rte_errno;
6375 			flow->meter = mtr_idx;
6376 		}
6377 		MLX5_ASSERT(wks);
6378 		wks->fm = fm;
6379 		if (!fm->def_policy) {
6380 			wks->policy = mlx5_flow_meter_policy_find(dev,
6381 								  fm->policy_id,
6382 								  NULL);
6383 			MLX5_ASSERT(wks->policy);
6384 			if (wks->policy->mark)
6385 				wks->mark = 1;
6386 			if (wks->policy->is_hierarchy) {
6387 				wks->final_policy =
6388 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
6389 								wks->policy);
6390 				if (!wks->final_policy)
6391 					return rte_flow_error_set(error,
6392 					EINVAL,
6393 					RTE_FLOW_ERROR_TYPE_ACTION, NULL,
6394 				"Failed to find terminal policy of hierarchy.");
6395 				is_mtr_hierarchy = true;
6396 			}
6397 		}
6398 		/*
6399 		 * If it isn't default-policy Meter, and
6400 		 * 1. There's no action in flow to change
6401 		 *    packet (modify/encap/decap etc.), OR
6402 		 * 2. No drop count needed for this meter.
6403 		 * 3. It's not meter hierarchy.
6404 		 * Then no need to use regC to save meter id anymore.
6405 		 */
6406 		if (!fm->def_policy && !is_mtr_hierarchy &&
6407 		    (!has_modify || !fm->drop_cnt))
6408 			set_mtr_reg = false;
6409 		/* Prefix actions: meter, decap, encap, tag, jump, end, cnt. */
6410 #define METER_PREFIX_ACTION 7
6411 		act_size = (sizeof(struct rte_flow_action) *
6412 			    (actions_n + METER_PREFIX_ACTION)) +
6413 			   sizeof(struct mlx5_rte_flow_action_set_tag);
6414 		/* Suffix items: tag, vlan, port id, end. */
6415 #define METER_SUFFIX_ITEM 4
6416 		item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
6417 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6418 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
6419 					  0, SOCKET_ID_ANY);
6420 		if (!sfx_actions)
6421 			return rte_flow_error_set(error, ENOMEM,
6422 						  RTE_FLOW_ERROR_TYPE_ACTION,
6423 						  NULL, "no memory to split "
6424 						  "meter flow");
6425 		sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6426 			     act_size);
6427 		/* There's no suffix flow for meter of non-default policy. */
6428 		if (!fm->def_policy)
6429 			pre_actions = sfx_actions + 1;
6430 		else
6431 			pre_actions = sfx_actions + actions_n;
6432 		ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6433 					    items, sfx_items, actions,
6434 					    sfx_actions, pre_actions,
6435 					    (set_mtr_reg ? &mtr_flow_id : NULL),
6436 					    error);
6437 		if (ret) {
6438 			ret = -rte_errno;
6439 			goto exit;
6440 		}
6441 		/* Add the prefix subflow. */
6442 		skip_scale_restore = flow_split_info->skip_scale;
6443 		flow_split_info->skip_scale |=
6444 			1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6445 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6446 					      attr, items, pre_actions,
6447 					      flow_split_info, error);
6448 		flow_split_info->skip_scale = skip_scale_restore;
6449 		if (ret) {
6450 			if (mtr_flow_id)
6451 				mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6452 			ret = -rte_errno;
6453 			goto exit;
6454 		}
6455 		if (mtr_flow_id) {
6456 			dev_flow->handle->split_flow_id = mtr_flow_id;
6457 			dev_flow->handle->is_meter_flow_id = 1;
6458 		}
6459 		if (!fm->def_policy) {
6460 			if (!set_mtr_reg && fm->drop_cnt)
6461 				ret =
6462 			flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6463 							&sfx_attr, items,
6464 							flow_split_info,
6465 							fm, error);
6466 			goto exit;
6467 		}
6468 		/* Setting the sfx group atrr. */
6469 		sfx_attr.group = sfx_attr.transfer ?
6470 				(MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6471 				 MLX5_FLOW_TABLE_LEVEL_METER;
6472 		flow_split_info->prefix_layers =
6473 				flow_get_prefix_layer_flags(dev_flow);
6474 		flow_split_info->prefix_mark |= wks->mark;
6475 		flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6476 	}
6477 	/* Add the prefix subflow. */
6478 	ret = flow_create_split_metadata(dev, flow,
6479 					 &sfx_attr, sfx_items ?
6480 					 sfx_items : items,
6481 					 sfx_actions ? sfx_actions : actions,
6482 					 flow_split_info, error);
6483 exit:
6484 	if (sfx_actions)
6485 		mlx5_free(sfx_actions);
6486 	return ret;
6487 }
6488 
6489 /**
6490  * The splitting for sample feature.
6491  *
6492  * Once Sample action is detected in the action list, the flow actions should
6493  * be split into prefix sub flow and suffix sub flow.
6494  *
6495  * The original items remain in the prefix sub flow, all actions preceding the
6496  * sample action and the sample action itself will be copied to the prefix
6497  * sub flow, the actions following the sample action will be copied to the
6498  * suffix sub flow, Queue action always be located in the suffix sub flow.
6499  *
6500  * In order to make the packet from prefix sub flow matches with suffix sub
6501  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6502  * flow uses tag item with the unique flow id.
6503  *
6504  * @param dev
6505  *   Pointer to Ethernet device.
6506  * @param[in] flow
6507  *   Parent flow structure pointer.
6508  * @param[in] attr
6509  *   Flow rule attributes.
6510  * @param[in] items
6511  *   Pattern specification (list terminated by the END pattern item).
6512  * @param[in] actions
6513  *   Associated actions (list terminated by the END action).
6514  * @param[in] flow_split_info
6515  *   Pointer to flow split info structure.
6516  * @param[out] error
6517  *   Perform verbose error reporting if not NULL.
6518  * @return
6519  *   0 on success, negative value otherwise
6520  */
6521 static int
6522 flow_create_split_sample(struct rte_eth_dev *dev,
6523 			 struct rte_flow *flow,
6524 			 const struct rte_flow_attr *attr,
6525 			 const struct rte_flow_item items[],
6526 			 const struct rte_flow_action actions[],
6527 			 struct mlx5_flow_split_info *flow_split_info,
6528 			 struct rte_flow_error *error)
6529 {
6530 	struct mlx5_priv *priv = dev->data->dev_private;
6531 	struct rte_flow_action *sfx_actions = NULL;
6532 	struct rte_flow_action *pre_actions = NULL;
6533 	struct rte_flow_item *sfx_items = NULL;
6534 	struct mlx5_flow *dev_flow = NULL;
6535 	struct rte_flow_attr sfx_attr = *attr;
6536 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6537 	struct mlx5_flow_dv_sample_resource *sample_res;
6538 	struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6539 	struct mlx5_flow_tbl_resource *sfx_tbl;
6540 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6541 #endif
6542 	size_t act_size;
6543 	size_t item_size;
6544 	uint32_t fdb_tx = 0;
6545 	int32_t tag_id = 0;
6546 	int actions_n = 0;
6547 	int sample_action_pos;
6548 	int qrss_action_pos;
6549 	int add_tag = 0;
6550 	int modify_after_mirror = 0;
6551 	uint16_t jump_table = 0;
6552 	const uint32_t next_ft_step = 1;
6553 	int ret = 0;
6554 
6555 	if (priv->sampler_en)
6556 		actions_n = flow_check_match_action(actions, attr,
6557 					RTE_FLOW_ACTION_TYPE_SAMPLE,
6558 					&sample_action_pos, &qrss_action_pos,
6559 					&modify_after_mirror);
6560 	if (actions_n) {
6561 		/* The prefix actions must includes sample, tag, end. */
6562 		act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6563 			   + sizeof(struct mlx5_rte_flow_action_set_tag);
6564 		item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6565 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6566 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6567 					  item_size), 0, SOCKET_ID_ANY);
6568 		if (!sfx_actions)
6569 			return rte_flow_error_set(error, ENOMEM,
6570 						  RTE_FLOW_ERROR_TYPE_ACTION,
6571 						  NULL, "no memory to split "
6572 						  "sample flow");
6573 		/* The representor_id is UINT16_MAX for uplink. */
6574 		fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6575 		/*
6576 		 * When reg_c_preserve is set, metadata registers Cx preserve
6577 		 * their value even through packet duplication.
6578 		 */
6579 		add_tag = (!fdb_tx ||
6580 			   priv->sh->cdev->config.hca_attr.reg_c_preserve);
6581 		if (add_tag)
6582 			sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6583 					+ act_size);
6584 		if (modify_after_mirror)
6585 			jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6586 				     next_ft_step;
6587 		pre_actions = sfx_actions + actions_n;
6588 		tag_id = flow_sample_split_prep(dev, add_tag, items, sfx_items,
6589 						actions, sfx_actions,
6590 						pre_actions, actions_n,
6591 						sample_action_pos,
6592 						qrss_action_pos, jump_table,
6593 						error);
6594 		if (tag_id < 0 || (add_tag && !tag_id)) {
6595 			ret = -rte_errno;
6596 			goto exit;
6597 		}
6598 		if (modify_after_mirror)
6599 			flow_split_info->skip_scale =
6600 					1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6601 		/* Add the prefix subflow. */
6602 		ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6603 					      items, pre_actions,
6604 					      flow_split_info, error);
6605 		if (ret) {
6606 			ret = -rte_errno;
6607 			goto exit;
6608 		}
6609 		dev_flow->handle->split_flow_id = tag_id;
6610 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6611 		if (!modify_after_mirror) {
6612 			/* Set the sfx group attr. */
6613 			sample_res = (struct mlx5_flow_dv_sample_resource *)
6614 						dev_flow->dv.sample_res;
6615 			sfx_tbl = (struct mlx5_flow_tbl_resource *)
6616 						sample_res->normal_path_tbl;
6617 			sfx_tbl_data = container_of(sfx_tbl,
6618 						struct mlx5_flow_tbl_data_entry,
6619 						tbl);
6620 			sfx_attr.group = sfx_attr.transfer ?
6621 			(sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6622 		} else {
6623 			MLX5_ASSERT(attr->transfer);
6624 			sfx_attr.group = jump_table;
6625 		}
6626 		flow_split_info->prefix_layers =
6627 				flow_get_prefix_layer_flags(dev_flow);
6628 		MLX5_ASSERT(wks);
6629 		flow_split_info->prefix_mark |= wks->mark;
6630 		/* Suffix group level already be scaled with factor, set
6631 		 * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6632 		 * again in translation.
6633 		 */
6634 		flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6635 #endif
6636 	}
6637 	/* Add the suffix subflow. */
6638 	ret = flow_create_split_meter(dev, flow, &sfx_attr,
6639 				      sfx_items ? sfx_items : items,
6640 				      sfx_actions ? sfx_actions : actions,
6641 				      flow_split_info, error);
6642 exit:
6643 	if (sfx_actions)
6644 		mlx5_free(sfx_actions);
6645 	return ret;
6646 }
6647 
6648 /**
6649  * Split the flow to subflow set. The splitters might be linked
6650  * in the chain, like this:
6651  * flow_create_split_outer() calls:
6652  *   flow_create_split_meter() calls:
6653  *     flow_create_split_metadata(meter_subflow_0) calls:
6654  *       flow_create_split_inner(metadata_subflow_0)
6655  *       flow_create_split_inner(metadata_subflow_1)
6656  *       flow_create_split_inner(metadata_subflow_2)
6657  *     flow_create_split_metadata(meter_subflow_1) calls:
6658  *       flow_create_split_inner(metadata_subflow_0)
6659  *       flow_create_split_inner(metadata_subflow_1)
6660  *       flow_create_split_inner(metadata_subflow_2)
6661  *
6662  * This provide flexible way to add new levels of flow splitting.
6663  * The all of successfully created subflows are included to the
6664  * parent flow dev_flow list.
6665  *
6666  * @param dev
6667  *   Pointer to Ethernet device.
6668  * @param[in] flow
6669  *   Parent flow structure pointer.
6670  * @param[in] attr
6671  *   Flow rule attributes.
6672  * @param[in] items
6673  *   Pattern specification (list terminated by the END pattern item).
6674  * @param[in] actions
6675  *   Associated actions (list terminated by the END action).
6676  * @param[in] flow_split_info
6677  *   Pointer to flow split info structure.
6678  * @param[out] error
6679  *   Perform verbose error reporting if not NULL.
6680  * @return
6681  *   0 on success, negative value otherwise
6682  */
6683 static int
6684 flow_create_split_outer(struct rte_eth_dev *dev,
6685 			struct rte_flow *flow,
6686 			const struct rte_flow_attr *attr,
6687 			const struct rte_flow_item items[],
6688 			const struct rte_flow_action actions[],
6689 			struct mlx5_flow_split_info *flow_split_info,
6690 			struct rte_flow_error *error)
6691 {
6692 	int ret;
6693 
6694 	ret = flow_create_split_sample(dev, flow, attr, items,
6695 				       actions, flow_split_info, error);
6696 	MLX5_ASSERT(ret <= 0);
6697 	return ret;
6698 }
6699 
6700 static inline struct mlx5_flow_tunnel *
6701 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6702 {
6703 	struct mlx5_flow_tunnel *tunnel;
6704 
6705 #pragma GCC diagnostic push
6706 #pragma GCC diagnostic ignored "-Wcast-qual"
6707 	tunnel = (typeof(tunnel))flow->tunnel;
6708 #pragma GCC diagnostic pop
6709 
6710 	return tunnel;
6711 }
6712 
6713 /**
6714  * Adjust flow RSS workspace if needed.
6715  *
6716  * @param wks
6717  *   Pointer to thread flow work space.
6718  * @param rss_desc
6719  *   Pointer to RSS descriptor.
6720  * @param[in] nrssq_num
6721  *   New RSS queue number.
6722  *
6723  * @return
6724  *   0 on success, -1 otherwise and rte_errno is set.
6725  */
6726 static int
6727 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6728 			  struct mlx5_flow_rss_desc *rss_desc,
6729 			  uint32_t nrssq_num)
6730 {
6731 	if (likely(nrssq_num <= wks->rssq_num))
6732 		return 0;
6733 	rss_desc->queue = realloc(rss_desc->queue,
6734 			  sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6735 	if (!rss_desc->queue) {
6736 		rte_errno = ENOMEM;
6737 		return -1;
6738 	}
6739 	wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6740 	return 0;
6741 }
6742 
6743 /**
6744  * Create a flow and add it to @p list.
6745  *
6746  * @param dev
6747  *   Pointer to Ethernet device.
6748  * @param list
6749  *   Pointer to a TAILQ flow list. If this parameter NULL,
6750  *   no list insertion occurred, flow is just created,
6751  *   this is caller's responsibility to track the
6752  *   created flow.
6753  * @param[in] attr
6754  *   Flow rule attributes.
6755  * @param[in] items
6756  *   Pattern specification (list terminated by the END pattern item).
6757  * @param[in] actions
6758  *   Associated actions (list terminated by the END action).
6759  * @param[in] external
6760  *   This flow rule is created by request external to PMD.
6761  * @param[out] error
6762  *   Perform verbose error reporting if not NULL.
6763  *
6764  * @return
6765  *   A flow index on success, 0 otherwise and rte_errno is set.
6766  */
6767 static uint32_t
6768 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6769 		 const struct rte_flow_attr *attr,
6770 		 const struct rte_flow_item items[],
6771 		 const struct rte_flow_action original_actions[],
6772 		 bool external, struct rte_flow_error *error)
6773 {
6774 	struct mlx5_priv *priv = dev->data->dev_private;
6775 	struct rte_flow *flow = NULL;
6776 	struct mlx5_flow *dev_flow;
6777 	const struct rte_flow_action_rss *rss = NULL;
6778 	struct mlx5_translated_action_handle
6779 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6780 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6781 	union {
6782 		struct mlx5_flow_expand_rss buf;
6783 		uint8_t buffer[4096];
6784 	} expand_buffer;
6785 	union {
6786 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6787 		uint8_t buffer[2048];
6788 	} actions_rx;
6789 	union {
6790 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6791 		uint8_t buffer[2048];
6792 	} actions_hairpin_tx;
6793 	union {
6794 		struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6795 		uint8_t buffer[2048];
6796 	} items_tx;
6797 	struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6798 	struct mlx5_flow_rss_desc *rss_desc;
6799 	const struct rte_flow_action *p_actions_rx;
6800 	uint32_t i;
6801 	uint32_t idx = 0;
6802 	int hairpin_flow;
6803 	struct rte_flow_attr attr_tx = { .priority = 0 };
6804 	const struct rte_flow_action *actions;
6805 	struct rte_flow_action *translated_actions = NULL;
6806 	struct mlx5_flow_tunnel *tunnel;
6807 	struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6808 	struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6809 	struct mlx5_flow_split_info flow_split_info = {
6810 		.external = !!external,
6811 		.skip_scale = 0,
6812 		.flow_idx = 0,
6813 		.prefix_mark = 0,
6814 		.prefix_layers = 0,
6815 		.table_id = 0
6816 	};
6817 	int ret;
6818 
6819 	MLX5_ASSERT(wks);
6820 	rss_desc = &wks->rss_desc;
6821 	ret = flow_action_handles_translate(dev, original_actions,
6822 					    indir_actions,
6823 					    &indir_actions_n,
6824 					    &translated_actions, error);
6825 	if (ret < 0) {
6826 		MLX5_ASSERT(translated_actions == NULL);
6827 		return 0;
6828 	}
6829 	actions = translated_actions ? translated_actions : original_actions;
6830 	p_actions_rx = actions;
6831 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6832 	ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6833 				external, hairpin_flow, error);
6834 	if (ret < 0)
6835 		goto error_before_hairpin_split;
6836 	flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6837 	if (!flow) {
6838 		rte_errno = ENOMEM;
6839 		goto error_before_hairpin_split;
6840 	}
6841 	if (hairpin_flow > 0) {
6842 		if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6843 			rte_errno = EINVAL;
6844 			goto error_before_hairpin_split;
6845 		}
6846 		flow_hairpin_split(dev, actions, actions_rx.actions,
6847 				   actions_hairpin_tx.actions, items_tx.items,
6848 				   idx);
6849 		p_actions_rx = actions_rx.actions;
6850 	}
6851 	flow_split_info.flow_idx = idx;
6852 	flow->drv_type = flow_get_drv_type(dev, attr);
6853 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6854 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
6855 	memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6856 	/* RSS Action only works on NIC RX domain */
6857 	if (attr->ingress && !attr->transfer)
6858 		rss = flow_get_rss_action(dev, p_actions_rx);
6859 	if (rss) {
6860 		if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6861 			return 0;
6862 		/*
6863 		 * The following information is required by
6864 		 * mlx5_flow_hashfields_adjust() in advance.
6865 		 */
6866 		rss_desc->level = rss->level;
6867 		/* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
6868 		rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
6869 	}
6870 	flow->dev_handles = 0;
6871 	if (rss && rss->types) {
6872 		unsigned int graph_root;
6873 
6874 		graph_root = find_graph_root(rss->level);
6875 		ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6876 					   items, rss->types,
6877 					   mlx5_support_expansion, graph_root);
6878 		MLX5_ASSERT(ret > 0 &&
6879 		       (unsigned int)ret < sizeof(expand_buffer.buffer));
6880 		if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6881 			for (i = 0; i < buf->entries; ++i)
6882 				mlx5_dbg__print_pattern(buf->entry[i].pattern);
6883 		}
6884 	} else {
6885 		buf->entries = 1;
6886 		buf->entry[0].pattern = (void *)(uintptr_t)items;
6887 	}
6888 	rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6889 						      indir_actions_n);
6890 	for (i = 0; i < buf->entries; ++i) {
6891 		/* Initialize flow split data. */
6892 		flow_split_info.prefix_layers = 0;
6893 		flow_split_info.prefix_mark = 0;
6894 		flow_split_info.skip_scale = 0;
6895 		/*
6896 		 * The splitter may create multiple dev_flows,
6897 		 * depending on configuration. In the simplest
6898 		 * case it just creates unmodified original flow.
6899 		 */
6900 		ret = flow_create_split_outer(dev, flow, attr,
6901 					      buf->entry[i].pattern,
6902 					      p_actions_rx, &flow_split_info,
6903 					      error);
6904 		if (ret < 0)
6905 			goto error;
6906 		if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
6907 			ret = flow_tunnel_add_default_miss(dev, flow, attr,
6908 							   p_actions_rx,
6909 							   idx,
6910 							   wks->flows[0].tunnel,
6911 							   &default_miss_ctx,
6912 							   error);
6913 			if (ret < 0) {
6914 				mlx5_free(default_miss_ctx.queue);
6915 				goto error;
6916 			}
6917 		}
6918 	}
6919 	/* Create the tx flow. */
6920 	if (hairpin_flow) {
6921 		attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
6922 		attr_tx.ingress = 0;
6923 		attr_tx.egress = 1;
6924 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
6925 					 actions_hairpin_tx.actions,
6926 					 idx, error);
6927 		if (!dev_flow)
6928 			goto error;
6929 		dev_flow->flow = flow;
6930 		dev_flow->external = 0;
6931 		SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
6932 			      dev_flow->handle, next);
6933 		ret = flow_drv_translate(dev, dev_flow, &attr_tx,
6934 					 items_tx.items,
6935 					 actions_hairpin_tx.actions, error);
6936 		if (ret < 0)
6937 			goto error;
6938 	}
6939 	/*
6940 	 * Update the metadata register copy table. If extensive
6941 	 * metadata feature is enabled and registers are supported
6942 	 * we might create the extra rte_flow for each unique
6943 	 * MARK/FLAG action ID.
6944 	 *
6945 	 * The table is updated for ingress Flows only, because
6946 	 * the egress Flows belong to the different device and
6947 	 * copy table should be updated in peer NIC Rx domain.
6948 	 */
6949 	if (attr->ingress &&
6950 	    (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
6951 		ret = flow_mreg_update_copy_table(dev, flow, actions, error);
6952 		if (ret)
6953 			goto error;
6954 	}
6955 	/*
6956 	 * If the flow is external (from application) OR device is started,
6957 	 * OR mreg discover, then apply immediately.
6958 	 */
6959 	if (external || dev->data->dev_started ||
6960 	    (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
6961 	     attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
6962 		ret = flow_drv_apply(dev, flow, error);
6963 		if (ret < 0)
6964 			goto error;
6965 	}
6966 	flow->type = type;
6967 	flow_rxq_flags_set(dev, flow);
6968 	rte_free(translated_actions);
6969 	tunnel = flow_tunnel_from_rule(wks->flows);
6970 	if (tunnel) {
6971 		flow->tunnel = 1;
6972 		flow->tunnel_id = tunnel->tunnel_id;
6973 		__atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
6974 		mlx5_free(default_miss_ctx.queue);
6975 	}
6976 	mlx5_flow_pop_thread_workspace();
6977 	return idx;
6978 error:
6979 	MLX5_ASSERT(flow);
6980 	ret = rte_errno; /* Save rte_errno before cleanup. */
6981 	flow_mreg_del_copy_action(dev, flow);
6982 	flow_drv_destroy(dev, flow);
6983 	if (rss_desc->shared_rss)
6984 		__atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
6985 			mlx5_ipool_get
6986 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
6987 			rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
6988 	mlx5_ipool_free(priv->flows[type], idx);
6989 	rte_errno = ret; /* Restore rte_errno. */
6990 	ret = rte_errno;
6991 	rte_errno = ret;
6992 	mlx5_flow_pop_thread_workspace();
6993 error_before_hairpin_split:
6994 	rte_free(translated_actions);
6995 	return 0;
6996 }
6997 
6998 /**
6999  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
7000  * incoming packets to table 1.
7001  *
7002  * Other flow rules, requested for group n, will be created in
7003  * e-switch table n+1.
7004  * Jump action to e-switch group n will be created to group n+1.
7005  *
7006  * Used when working in switchdev mode, to utilise advantages of table 1
7007  * and above.
7008  *
7009  * @param dev
7010  *   Pointer to Ethernet device.
7011  *
7012  * @return
7013  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
7014  */
7015 struct rte_flow *
7016 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
7017 {
7018 	const struct rte_flow_attr attr = {
7019 		.group = 0,
7020 		.priority = 0,
7021 		.ingress = 1,
7022 		.egress = 0,
7023 		.transfer = 1,
7024 	};
7025 	const struct rte_flow_item pattern = {
7026 		.type = RTE_FLOW_ITEM_TYPE_END,
7027 	};
7028 	struct rte_flow_action_jump jump = {
7029 		.group = 1,
7030 	};
7031 	const struct rte_flow_action actions[] = {
7032 		{
7033 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7034 			.conf = &jump,
7035 		},
7036 		{
7037 			.type = RTE_FLOW_ACTION_TYPE_END,
7038 		},
7039 	};
7040 	struct rte_flow_error error;
7041 
7042 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7043 						   &attr, &pattern,
7044 						   actions, false, &error);
7045 }
7046 
7047 /**
7048  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
7049  * and sq number, directs all packets to peer vport.
7050  *
7051  * @param dev
7052  *   Pointer to Ethernet device.
7053  * @param txq
7054  *   Txq index.
7055  *
7056  * @return
7057  *   Flow ID on success, 0 otherwise and rte_errno is set.
7058  */
7059 uint32_t
7060 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
7061 {
7062 	struct rte_flow_attr attr = {
7063 		.group = 0,
7064 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7065 		.ingress = 1,
7066 		.egress = 0,
7067 		.transfer = 1,
7068 	};
7069 	struct rte_flow_item_port_id port_spec = {
7070 		.id = MLX5_PORT_ESW_MGR,
7071 	};
7072 	struct mlx5_rte_flow_item_tx_queue txq_spec = {
7073 		.queue = txq,
7074 	};
7075 	struct rte_flow_item pattern[] = {
7076 		{
7077 			.type = RTE_FLOW_ITEM_TYPE_PORT_ID,
7078 			.spec = &port_spec,
7079 		},
7080 		{
7081 			.type = (enum rte_flow_item_type)
7082 				MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7083 			.spec = &txq_spec,
7084 		},
7085 		{
7086 			.type = RTE_FLOW_ITEM_TYPE_END,
7087 		},
7088 	};
7089 	struct rte_flow_action_jump jump = {
7090 		.group = 1,
7091 	};
7092 	struct rte_flow_action_port_id port = {
7093 		.id = dev->data->port_id,
7094 	};
7095 	struct rte_flow_action actions[] = {
7096 		{
7097 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7098 			.conf = &jump,
7099 		},
7100 		{
7101 			.type = RTE_FLOW_ACTION_TYPE_END,
7102 		},
7103 	};
7104 	struct rte_flow_error error;
7105 
7106 	/*
7107 	 * Creates group 0, highest priority jump flow.
7108 	 * Matches txq to bypass kernel packets.
7109 	 */
7110 	if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
7111 			     false, &error) == 0)
7112 		return 0;
7113 	/* Create group 1, lowest priority redirect flow for txq. */
7114 	attr.group = 1;
7115 	actions[0].conf = &port;
7116 	actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
7117 	return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
7118 				actions, false, &error);
7119 }
7120 
7121 /**
7122  * Validate a flow supported by the NIC.
7123  *
7124  * @see rte_flow_validate()
7125  * @see rte_flow_ops
7126  */
7127 int
7128 mlx5_flow_validate(struct rte_eth_dev *dev,
7129 		   const struct rte_flow_attr *attr,
7130 		   const struct rte_flow_item items[],
7131 		   const struct rte_flow_action original_actions[],
7132 		   struct rte_flow_error *error)
7133 {
7134 	int hairpin_flow;
7135 	struct mlx5_translated_action_handle
7136 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7137 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7138 	const struct rte_flow_action *actions;
7139 	struct rte_flow_action *translated_actions = NULL;
7140 	int ret = flow_action_handles_translate(dev, original_actions,
7141 						indir_actions,
7142 						&indir_actions_n,
7143 						&translated_actions, error);
7144 
7145 	if (ret)
7146 		return ret;
7147 	actions = translated_actions ? translated_actions : original_actions;
7148 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7149 	ret = flow_drv_validate(dev, attr, items, actions,
7150 				true, hairpin_flow, error);
7151 	rte_free(translated_actions);
7152 	return ret;
7153 }
7154 
7155 /**
7156  * Create a flow.
7157  *
7158  * @see rte_flow_create()
7159  * @see rte_flow_ops
7160  */
7161 struct rte_flow *
7162 mlx5_flow_create(struct rte_eth_dev *dev,
7163 		 const struct rte_flow_attr *attr,
7164 		 const struct rte_flow_item items[],
7165 		 const struct rte_flow_action actions[],
7166 		 struct rte_flow_error *error)
7167 {
7168 	struct mlx5_priv *priv = dev->data->dev_private;
7169 
7170 	if (priv->sh->config.dv_flow_en == 2) {
7171 		rte_flow_error_set(error, ENOTSUP,
7172 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7173 			  NULL,
7174 			  "Flow non-Q creation not supported");
7175 		return NULL;
7176 	}
7177 	/*
7178 	 * If the device is not started yet, it is not allowed to created a
7179 	 * flow from application. PMD default flows and traffic control flows
7180 	 * are not affected.
7181 	 */
7182 	if (unlikely(!dev->data->dev_started)) {
7183 		DRV_LOG(DEBUG, "port %u is not started when "
7184 			"inserting a flow", dev->data->port_id);
7185 		rte_flow_error_set(error, ENODEV,
7186 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7187 				   NULL,
7188 				   "port not started");
7189 		return NULL;
7190 	}
7191 
7192 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
7193 						   attr, items, actions,
7194 						   true, error);
7195 }
7196 
7197 /**
7198  * Destroy a flow in a list.
7199  *
7200  * @param dev
7201  *   Pointer to Ethernet device.
7202  * @param[in] flow_idx
7203  *   Index of flow to destroy.
7204  */
7205 static void
7206 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7207 		  uint32_t flow_idx)
7208 {
7209 	struct mlx5_priv *priv = dev->data->dev_private;
7210 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
7211 
7212 	if (!flow)
7213 		return;
7214 	MLX5_ASSERT(flow->type == type);
7215 	/*
7216 	 * Update RX queue flags only if port is started, otherwise it is
7217 	 * already clean.
7218 	 */
7219 	if (dev->data->dev_started)
7220 		flow_rxq_flags_trim(dev, flow);
7221 	flow_drv_destroy(dev, flow);
7222 	if (flow->tunnel) {
7223 		struct mlx5_flow_tunnel *tunnel;
7224 
7225 		tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
7226 		RTE_VERIFY(tunnel);
7227 		if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
7228 			mlx5_flow_tunnel_free(dev, tunnel);
7229 	}
7230 	flow_mreg_del_copy_action(dev, flow);
7231 	mlx5_ipool_free(priv->flows[type], flow_idx);
7232 }
7233 
7234 /**
7235  * Destroy all flows.
7236  *
7237  * @param dev
7238  *   Pointer to Ethernet device.
7239  * @param type
7240  *   Flow type to be flushed.
7241  * @param active
7242  *   If flushing is called actively.
7243  */
7244 void
7245 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7246 		     bool active)
7247 {
7248 	struct mlx5_priv *priv = dev->data->dev_private;
7249 	uint32_t num_flushed = 0, fidx = 1;
7250 	struct rte_flow *flow;
7251 
7252 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7253 	if (priv->sh->config.dv_flow_en == 2 &&
7254 	    type == MLX5_FLOW_TYPE_GEN) {
7255 		flow_hw_q_flow_flush(dev, NULL);
7256 		return;
7257 	}
7258 #endif
7259 
7260 	MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
7261 		flow_list_destroy(dev, type, fidx);
7262 		num_flushed++;
7263 	}
7264 	if (active) {
7265 		DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
7266 			dev->data->port_id, num_flushed);
7267 	}
7268 }
7269 
7270 /**
7271  * Stop all default actions for flows.
7272  *
7273  * @param dev
7274  *   Pointer to Ethernet device.
7275  */
7276 void
7277 mlx5_flow_stop_default(struct rte_eth_dev *dev)
7278 {
7279 	flow_mreg_del_default_copy_action(dev);
7280 	flow_rxq_flags_clear(dev);
7281 }
7282 
7283 /**
7284  * Start all default actions for flows.
7285  *
7286  * @param dev
7287  *   Pointer to Ethernet device.
7288  * @return
7289  *   0 on success, a negative errno value otherwise and rte_errno is set.
7290  */
7291 int
7292 mlx5_flow_start_default(struct rte_eth_dev *dev)
7293 {
7294 	struct rte_flow_error error;
7295 
7296 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
7297 	return flow_mreg_add_default_copy_action(dev, &error);
7298 }
7299 
7300 /**
7301  * Release key of thread specific flow workspace data.
7302  */
7303 void
7304 flow_release_workspace(void *data)
7305 {
7306 	struct mlx5_flow_workspace *wks = data;
7307 	struct mlx5_flow_workspace *next;
7308 
7309 	while (wks) {
7310 		next = wks->next;
7311 		free(wks->rss_desc.queue);
7312 		free(wks);
7313 		wks = next;
7314 	}
7315 }
7316 
7317 /**
7318  * Get thread specific current flow workspace.
7319  *
7320  * @return pointer to thread specific flow workspace data, NULL on error.
7321  */
7322 struct mlx5_flow_workspace*
7323 mlx5_flow_get_thread_workspace(void)
7324 {
7325 	struct mlx5_flow_workspace *data;
7326 
7327 	data = mlx5_flow_os_get_specific_workspace();
7328 	MLX5_ASSERT(data && data->inuse);
7329 	if (!data || !data->inuse)
7330 		DRV_LOG(ERR, "flow workspace not initialized.");
7331 	return data;
7332 }
7333 
7334 /**
7335  * Allocate and init new flow workspace.
7336  *
7337  * @return pointer to flow workspace data, NULL on error.
7338  */
7339 static struct mlx5_flow_workspace*
7340 flow_alloc_thread_workspace(void)
7341 {
7342 	struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
7343 
7344 	if (!data) {
7345 		DRV_LOG(ERR, "Failed to allocate flow workspace "
7346 			"memory.");
7347 		return NULL;
7348 	}
7349 	data->rss_desc.queue = calloc(1,
7350 			sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
7351 	if (!data->rss_desc.queue)
7352 		goto err;
7353 	data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
7354 	return data;
7355 err:
7356 	free(data->rss_desc.queue);
7357 	free(data);
7358 	return NULL;
7359 }
7360 
7361 /**
7362  * Get new thread specific flow workspace.
7363  *
7364  * If current workspace inuse, create new one and set as current.
7365  *
7366  * @return pointer to thread specific flow workspace data, NULL on error.
7367  */
7368 static struct mlx5_flow_workspace*
7369 mlx5_flow_push_thread_workspace(void)
7370 {
7371 	struct mlx5_flow_workspace *curr;
7372 	struct mlx5_flow_workspace *data;
7373 
7374 	curr = mlx5_flow_os_get_specific_workspace();
7375 	if (!curr) {
7376 		data = flow_alloc_thread_workspace();
7377 		if (!data)
7378 			return NULL;
7379 	} else if (!curr->inuse) {
7380 		data = curr;
7381 	} else if (curr->next) {
7382 		data = curr->next;
7383 	} else {
7384 		data = flow_alloc_thread_workspace();
7385 		if (!data)
7386 			return NULL;
7387 		curr->next = data;
7388 		data->prev = curr;
7389 	}
7390 	data->inuse = 1;
7391 	data->flow_idx = 0;
7392 	/* Set as current workspace */
7393 	if (mlx5_flow_os_set_specific_workspace(data))
7394 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7395 	return data;
7396 }
7397 
7398 /**
7399  * Close current thread specific flow workspace.
7400  *
7401  * If previous workspace available, set it as current.
7402  *
7403  * @return pointer to thread specific flow workspace data, NULL on error.
7404  */
7405 static void
7406 mlx5_flow_pop_thread_workspace(void)
7407 {
7408 	struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
7409 
7410 	if (!data)
7411 		return;
7412 	if (!data->inuse) {
7413 		DRV_LOG(ERR, "Failed to close unused flow workspace.");
7414 		return;
7415 	}
7416 	data->inuse = 0;
7417 	if (!data->prev)
7418 		return;
7419 	if (mlx5_flow_os_set_specific_workspace(data->prev))
7420 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7421 }
7422 
7423 /**
7424  * Verify the flow list is empty
7425  *
7426  * @param dev
7427  *  Pointer to Ethernet device.
7428  *
7429  * @return the number of flows not released.
7430  */
7431 int
7432 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
7433 {
7434 	struct mlx5_priv *priv = dev->data->dev_private;
7435 	struct rte_flow *flow;
7436 	uint32_t idx = 0;
7437 	int ret = 0, i;
7438 
7439 	for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
7440 		MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
7441 			DRV_LOG(DEBUG, "port %u flow %p still referenced",
7442 				dev->data->port_id, (void *)flow);
7443 			ret++;
7444 		}
7445 	}
7446 	return ret;
7447 }
7448 
7449 /**
7450  * Enable default hairpin egress flow.
7451  *
7452  * @param dev
7453  *   Pointer to Ethernet device.
7454  * @param queue
7455  *   The queue index.
7456  *
7457  * @return
7458  *   0 on success, a negative errno value otherwise and rte_errno is set.
7459  */
7460 int
7461 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
7462 			    uint32_t queue)
7463 {
7464 	const struct rte_flow_attr attr = {
7465 		.egress = 1,
7466 		.priority = 0,
7467 	};
7468 	struct mlx5_rte_flow_item_tx_queue queue_spec = {
7469 		.queue = queue,
7470 	};
7471 	struct mlx5_rte_flow_item_tx_queue queue_mask = {
7472 		.queue = UINT32_MAX,
7473 	};
7474 	struct rte_flow_item items[] = {
7475 		{
7476 			.type = (enum rte_flow_item_type)
7477 				MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7478 			.spec = &queue_spec,
7479 			.last = NULL,
7480 			.mask = &queue_mask,
7481 		},
7482 		{
7483 			.type = RTE_FLOW_ITEM_TYPE_END,
7484 		},
7485 	};
7486 	struct rte_flow_action_jump jump = {
7487 		.group = MLX5_HAIRPIN_TX_TABLE,
7488 	};
7489 	struct rte_flow_action actions[2];
7490 	uint32_t flow_idx;
7491 	struct rte_flow_error error;
7492 
7493 	actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
7494 	actions[0].conf = &jump;
7495 	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
7496 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7497 				    &attr, items, actions, false, &error);
7498 	if (!flow_idx) {
7499 		DRV_LOG(DEBUG,
7500 			"Failed to create ctrl flow: rte_errno(%d),"
7501 			" type(%d), message(%s)",
7502 			rte_errno, error.type,
7503 			error.message ? error.message : " (no stated reason)");
7504 		return -rte_errno;
7505 	}
7506 	return 0;
7507 }
7508 
7509 /**
7510  * Enable a control flow configured from the control plane.
7511  *
7512  * @param dev
7513  *   Pointer to Ethernet device.
7514  * @param eth_spec
7515  *   An Ethernet flow spec to apply.
7516  * @param eth_mask
7517  *   An Ethernet flow mask to apply.
7518  * @param vlan_spec
7519  *   A VLAN flow spec to apply.
7520  * @param vlan_mask
7521  *   A VLAN flow mask to apply.
7522  *
7523  * @return
7524  *   0 on success, a negative errno value otherwise and rte_errno is set.
7525  */
7526 int
7527 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
7528 		    struct rte_flow_item_eth *eth_spec,
7529 		    struct rte_flow_item_eth *eth_mask,
7530 		    struct rte_flow_item_vlan *vlan_spec,
7531 		    struct rte_flow_item_vlan *vlan_mask)
7532 {
7533 	struct mlx5_priv *priv = dev->data->dev_private;
7534 	const struct rte_flow_attr attr = {
7535 		.ingress = 1,
7536 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7537 	};
7538 	struct rte_flow_item items[] = {
7539 		{
7540 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7541 			.spec = eth_spec,
7542 			.last = NULL,
7543 			.mask = eth_mask,
7544 		},
7545 		{
7546 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
7547 					      RTE_FLOW_ITEM_TYPE_END,
7548 			.spec = vlan_spec,
7549 			.last = NULL,
7550 			.mask = vlan_mask,
7551 		},
7552 		{
7553 			.type = RTE_FLOW_ITEM_TYPE_END,
7554 		},
7555 	};
7556 	uint16_t queue[priv->reta_idx_n];
7557 	struct rte_flow_action_rss action_rss = {
7558 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7559 		.level = 0,
7560 		.types = priv->rss_conf.rss_hf,
7561 		.key_len = priv->rss_conf.rss_key_len,
7562 		.queue_num = priv->reta_idx_n,
7563 		.key = priv->rss_conf.rss_key,
7564 		.queue = queue,
7565 	};
7566 	struct rte_flow_action actions[] = {
7567 		{
7568 			.type = RTE_FLOW_ACTION_TYPE_RSS,
7569 			.conf = &action_rss,
7570 		},
7571 		{
7572 			.type = RTE_FLOW_ACTION_TYPE_END,
7573 		},
7574 	};
7575 	uint32_t flow_idx;
7576 	struct rte_flow_error error;
7577 	unsigned int i;
7578 
7579 	if (!priv->reta_idx_n || !priv->rxqs_n) {
7580 		return 0;
7581 	}
7582 	if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
7583 		action_rss.types = 0;
7584 	for (i = 0; i != priv->reta_idx_n; ++i)
7585 		queue[i] = (*priv->reta_idx)[i];
7586 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7587 				    &attr, items, actions, false, &error);
7588 	if (!flow_idx)
7589 		return -rte_errno;
7590 	return 0;
7591 }
7592 
7593 /**
7594  * Enable a flow control configured from the control plane.
7595  *
7596  * @param dev
7597  *   Pointer to Ethernet device.
7598  * @param eth_spec
7599  *   An Ethernet flow spec to apply.
7600  * @param eth_mask
7601  *   An Ethernet flow mask to apply.
7602  *
7603  * @return
7604  *   0 on success, a negative errno value otherwise and rte_errno is set.
7605  */
7606 int
7607 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7608 	       struct rte_flow_item_eth *eth_spec,
7609 	       struct rte_flow_item_eth *eth_mask)
7610 {
7611 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7612 }
7613 
7614 /**
7615  * Create default miss flow rule matching lacp traffic
7616  *
7617  * @param dev
7618  *   Pointer to Ethernet device.
7619  * @param eth_spec
7620  *   An Ethernet flow spec to apply.
7621  *
7622  * @return
7623  *   0 on success, a negative errno value otherwise and rte_errno is set.
7624  */
7625 int
7626 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7627 {
7628 	/*
7629 	 * The LACP matching is done by only using ether type since using
7630 	 * a multicast dst mac causes kernel to give low priority to this flow.
7631 	 */
7632 	static const struct rte_flow_item_eth lacp_spec = {
7633 		.type = RTE_BE16(0x8809),
7634 	};
7635 	static const struct rte_flow_item_eth lacp_mask = {
7636 		.type = 0xffff,
7637 	};
7638 	const struct rte_flow_attr attr = {
7639 		.ingress = 1,
7640 	};
7641 	struct rte_flow_item items[] = {
7642 		{
7643 			.type = RTE_FLOW_ITEM_TYPE_ETH,
7644 			.spec = &lacp_spec,
7645 			.mask = &lacp_mask,
7646 		},
7647 		{
7648 			.type = RTE_FLOW_ITEM_TYPE_END,
7649 		},
7650 	};
7651 	struct rte_flow_action actions[] = {
7652 		{
7653 			.type = (enum rte_flow_action_type)
7654 				MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7655 		},
7656 		{
7657 			.type = RTE_FLOW_ACTION_TYPE_END,
7658 		},
7659 	};
7660 	struct rte_flow_error error;
7661 	uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7662 					&attr, items, actions,
7663 					false, &error);
7664 
7665 	if (!flow_idx)
7666 		return -rte_errno;
7667 	return 0;
7668 }
7669 
7670 /**
7671  * Destroy a flow.
7672  *
7673  * @see rte_flow_destroy()
7674  * @see rte_flow_ops
7675  */
7676 int
7677 mlx5_flow_destroy(struct rte_eth_dev *dev,
7678 		  struct rte_flow *flow,
7679 		  struct rte_flow_error *error __rte_unused)
7680 {
7681 	struct mlx5_priv *priv = dev->data->dev_private;
7682 
7683 	if (priv->sh->config.dv_flow_en == 2)
7684 		return rte_flow_error_set(error, ENOTSUP,
7685 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7686 			  NULL,
7687 			  "Flow non-Q destruction not supported");
7688 	flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7689 				(uintptr_t)(void *)flow);
7690 	return 0;
7691 }
7692 
7693 /**
7694  * Destroy all flows.
7695  *
7696  * @see rte_flow_flush()
7697  * @see rte_flow_ops
7698  */
7699 int
7700 mlx5_flow_flush(struct rte_eth_dev *dev,
7701 		struct rte_flow_error *error __rte_unused)
7702 {
7703 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7704 	return 0;
7705 }
7706 
7707 /**
7708  * Isolated mode.
7709  *
7710  * @see rte_flow_isolate()
7711  * @see rte_flow_ops
7712  */
7713 int
7714 mlx5_flow_isolate(struct rte_eth_dev *dev,
7715 		  int enable,
7716 		  struct rte_flow_error *error)
7717 {
7718 	struct mlx5_priv *priv = dev->data->dev_private;
7719 
7720 	if (dev->data->dev_started) {
7721 		rte_flow_error_set(error, EBUSY,
7722 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7723 				   NULL,
7724 				   "port must be stopped first");
7725 		return -rte_errno;
7726 	}
7727 	priv->isolated = !!enable;
7728 	if (enable)
7729 		dev->dev_ops = &mlx5_dev_ops_isolate;
7730 	else
7731 		dev->dev_ops = &mlx5_dev_ops;
7732 
7733 	dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7734 	dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7735 
7736 	return 0;
7737 }
7738 
7739 /**
7740  * Query a flow.
7741  *
7742  * @see rte_flow_query()
7743  * @see rte_flow_ops
7744  */
7745 static int
7746 flow_drv_query(struct rte_eth_dev *dev,
7747 	       uint32_t flow_idx,
7748 	       const struct rte_flow_action *actions,
7749 	       void *data,
7750 	       struct rte_flow_error *error)
7751 {
7752 	struct mlx5_priv *priv = dev->data->dev_private;
7753 	const struct mlx5_flow_driver_ops *fops;
7754 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7755 					       flow_idx);
7756 	enum mlx5_flow_drv_type ftype;
7757 
7758 	if (!flow) {
7759 		return rte_flow_error_set(error, ENOENT,
7760 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7761 			  NULL,
7762 			  "invalid flow handle");
7763 	}
7764 	ftype = flow->drv_type;
7765 	MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7766 	fops = flow_get_drv_ops(ftype);
7767 
7768 	return fops->query(dev, flow, actions, data, error);
7769 }
7770 
7771 /**
7772  * Query a flow.
7773  *
7774  * @see rte_flow_query()
7775  * @see rte_flow_ops
7776  */
7777 int
7778 mlx5_flow_query(struct rte_eth_dev *dev,
7779 		struct rte_flow *flow,
7780 		const struct rte_flow_action *actions,
7781 		void *data,
7782 		struct rte_flow_error *error)
7783 {
7784 	int ret;
7785 	struct mlx5_priv *priv = dev->data->dev_private;
7786 
7787 	if (priv->sh->config.dv_flow_en == 2)
7788 		return rte_flow_error_set(error, ENOTSUP,
7789 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7790 			  NULL,
7791 			  "Flow non-Q query not supported");
7792 	ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
7793 			     error);
7794 	if (ret < 0)
7795 		return ret;
7796 	return 0;
7797 }
7798 
7799 /**
7800  * Get rte_flow callbacks.
7801  *
7802  * @param dev
7803  *   Pointer to Ethernet device structure.
7804  * @param ops
7805  *   Pointer to operation-specific structure.
7806  *
7807  * @return 0
7808  */
7809 int
7810 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7811 		  const struct rte_flow_ops **ops)
7812 {
7813 	*ops = &mlx5_flow_ops;
7814 	return 0;
7815 }
7816 
7817 /**
7818  * Validate meter policy actions.
7819  * Dispatcher for action type specific validation.
7820  *
7821  * @param[in] dev
7822  *   Pointer to the Ethernet device structure.
7823  * @param[in] action
7824  *   The meter policy action object to validate.
7825  * @param[in] attr
7826  *   Attributes of flow to determine steering domain.
7827  * @param[out] is_rss
7828  *   Is RSS or not.
7829  * @param[out] domain_bitmap
7830  *   Domain bitmap.
7831  * @param[out] is_def_policy
7832  *   Is default policy or not.
7833  * @param[out] error
7834  *   Perform verbose error reporting if not NULL. Initialized in case of
7835  *   error only.
7836  *
7837  * @return
7838  *   0 on success, otherwise negative errno value.
7839  */
7840 int
7841 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7842 			const struct rte_flow_action *actions[RTE_COLORS],
7843 			struct rte_flow_attr *attr,
7844 			bool *is_rss,
7845 			uint8_t *domain_bitmap,
7846 			uint8_t *policy_mode,
7847 			struct rte_mtr_error *error)
7848 {
7849 	const struct mlx5_flow_driver_ops *fops;
7850 
7851 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7852 	return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7853 				       domain_bitmap, policy_mode, error);
7854 }
7855 
7856 /**
7857  * Destroy the meter table set.
7858  *
7859  * @param[in] dev
7860  *   Pointer to Ethernet device.
7861  * @param[in] mtr_policy
7862  *   Meter policy struct.
7863  */
7864 void
7865 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7866 		      struct mlx5_flow_meter_policy *mtr_policy)
7867 {
7868 	const struct mlx5_flow_driver_ops *fops;
7869 
7870 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7871 	fops->destroy_mtr_acts(dev, mtr_policy);
7872 }
7873 
7874 /**
7875  * Create policy action, lock free,
7876  * (mutex should be acquired by caller).
7877  * Dispatcher for action type specific call.
7878  *
7879  * @param[in] dev
7880  *   Pointer to the Ethernet device structure.
7881  * @param[in] mtr_policy
7882  *   Meter policy struct.
7883  * @param[in] action
7884  *   Action specification used to create meter actions.
7885  * @param[out] error
7886  *   Perform verbose error reporting if not NULL. Initialized in case of
7887  *   error only.
7888  *
7889  * @return
7890  *   0 on success, otherwise negative errno value.
7891  */
7892 int
7893 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7894 		      struct mlx5_flow_meter_policy *mtr_policy,
7895 		      const struct rte_flow_action *actions[RTE_COLORS],
7896 		      struct rte_mtr_error *error)
7897 {
7898 	const struct mlx5_flow_driver_ops *fops;
7899 
7900 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7901 	return fops->create_mtr_acts(dev, mtr_policy, actions, error);
7902 }
7903 
7904 /**
7905  * Create policy rules, lock free,
7906  * (mutex should be acquired by caller).
7907  * Dispatcher for action type specific call.
7908  *
7909  * @param[in] dev
7910  *   Pointer to the Ethernet device structure.
7911  * @param[in] mtr_policy
7912  *   Meter policy struct.
7913  *
7914  * @return
7915  *   0 on success, -1 otherwise.
7916  */
7917 int
7918 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
7919 			     struct mlx5_flow_meter_policy *mtr_policy)
7920 {
7921 	const struct mlx5_flow_driver_ops *fops;
7922 
7923 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7924 	return fops->create_policy_rules(dev, mtr_policy);
7925 }
7926 
7927 /**
7928  * Destroy policy rules, lock free,
7929  * (mutex should be acquired by caller).
7930  * Dispatcher for action type specific call.
7931  *
7932  * @param[in] dev
7933  *   Pointer to the Ethernet device structure.
7934  * @param[in] mtr_policy
7935  *   Meter policy struct.
7936  */
7937 void
7938 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
7939 			     struct mlx5_flow_meter_policy *mtr_policy)
7940 {
7941 	const struct mlx5_flow_driver_ops *fops;
7942 
7943 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7944 	fops->destroy_policy_rules(dev, mtr_policy);
7945 }
7946 
7947 /**
7948  * Destroy the default policy table set.
7949  *
7950  * @param[in] dev
7951  *   Pointer to Ethernet device.
7952  */
7953 void
7954 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
7955 {
7956 	const struct mlx5_flow_driver_ops *fops;
7957 
7958 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7959 	fops->destroy_def_policy(dev);
7960 }
7961 
7962 /**
7963  * Destroy the default policy table set.
7964  *
7965  * @param[in] dev
7966  *   Pointer to Ethernet device.
7967  *
7968  * @return
7969  *   0 on success, -1 otherwise.
7970  */
7971 int
7972 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
7973 {
7974 	const struct mlx5_flow_driver_ops *fops;
7975 
7976 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7977 	return fops->create_def_policy(dev);
7978 }
7979 
7980 /**
7981  * Create the needed meter and suffix tables.
7982  *
7983  * @param[in] dev
7984  *   Pointer to Ethernet device.
7985  *
7986  * @return
7987  *   0 on success, -1 otherwise.
7988  */
7989 int
7990 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
7991 			struct mlx5_flow_meter_info *fm,
7992 			uint32_t mtr_idx,
7993 			uint8_t domain_bitmap)
7994 {
7995 	const struct mlx5_flow_driver_ops *fops;
7996 
7997 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7998 	return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
7999 }
8000 
8001 /**
8002  * Destroy the meter table set.
8003  *
8004  * @param[in] dev
8005  *   Pointer to Ethernet device.
8006  * @param[in] tbl
8007  *   Pointer to the meter table set.
8008  */
8009 void
8010 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
8011 			   struct mlx5_flow_meter_info *fm)
8012 {
8013 	const struct mlx5_flow_driver_ops *fops;
8014 
8015 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8016 	fops->destroy_mtr_tbls(dev, fm);
8017 }
8018 
8019 /**
8020  * Destroy the global meter drop table.
8021  *
8022  * @param[in] dev
8023  *   Pointer to Ethernet device.
8024  */
8025 void
8026 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
8027 {
8028 	const struct mlx5_flow_driver_ops *fops;
8029 
8030 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8031 	fops->destroy_mtr_drop_tbls(dev);
8032 }
8033 
8034 /**
8035  * Destroy the sub policy table with RX queue.
8036  *
8037  * @param[in] dev
8038  *   Pointer to Ethernet device.
8039  * @param[in] mtr_policy
8040  *   Pointer to meter policy table.
8041  */
8042 void
8043 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
8044 		struct mlx5_flow_meter_policy *mtr_policy)
8045 {
8046 	const struct mlx5_flow_driver_ops *fops;
8047 
8048 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8049 	fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
8050 }
8051 
8052 /**
8053  * Allocate the needed aso flow meter id.
8054  *
8055  * @param[in] dev
8056  *   Pointer to Ethernet device.
8057  *
8058  * @return
8059  *   Index to aso flow meter on success, NULL otherwise.
8060  */
8061 uint32_t
8062 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
8063 {
8064 	const struct mlx5_flow_driver_ops *fops;
8065 
8066 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8067 	return fops->create_meter(dev);
8068 }
8069 
8070 /**
8071  * Free the aso flow meter id.
8072  *
8073  * @param[in] dev
8074  *   Pointer to Ethernet device.
8075  * @param[in] mtr_idx
8076  *  Index to aso flow meter to be free.
8077  *
8078  * @return
8079  *   0 on success.
8080  */
8081 void
8082 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
8083 {
8084 	const struct mlx5_flow_driver_ops *fops;
8085 
8086 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8087 	fops->free_meter(dev, mtr_idx);
8088 }
8089 
8090 /**
8091  * Allocate a counter.
8092  *
8093  * @param[in] dev
8094  *   Pointer to Ethernet device structure.
8095  *
8096  * @return
8097  *   Index to allocated counter  on success, 0 otherwise.
8098  */
8099 uint32_t
8100 mlx5_counter_alloc(struct rte_eth_dev *dev)
8101 {
8102 	const struct mlx5_flow_driver_ops *fops;
8103 	struct rte_flow_attr attr = { .transfer = 0 };
8104 
8105 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8106 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8107 		return fops->counter_alloc(dev);
8108 	}
8109 	DRV_LOG(ERR,
8110 		"port %u counter allocate is not supported.",
8111 		 dev->data->port_id);
8112 	return 0;
8113 }
8114 
8115 /**
8116  * Free a counter.
8117  *
8118  * @param[in] dev
8119  *   Pointer to Ethernet device structure.
8120  * @param[in] cnt
8121  *   Index to counter to be free.
8122  */
8123 void
8124 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
8125 {
8126 	const struct mlx5_flow_driver_ops *fops;
8127 	struct rte_flow_attr attr = { .transfer = 0 };
8128 
8129 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8130 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8131 		fops->counter_free(dev, cnt);
8132 		return;
8133 	}
8134 	DRV_LOG(ERR,
8135 		"port %u counter free is not supported.",
8136 		 dev->data->port_id);
8137 }
8138 
8139 /**
8140  * Query counter statistics.
8141  *
8142  * @param[in] dev
8143  *   Pointer to Ethernet device structure.
8144  * @param[in] cnt
8145  *   Index to counter to query.
8146  * @param[in] clear
8147  *   Set to clear counter statistics.
8148  * @param[out] pkts
8149  *   The counter hits packets number to save.
8150  * @param[out] bytes
8151  *   The counter hits bytes number to save.
8152  *
8153  * @return
8154  *   0 on success, a negative errno value otherwise.
8155  */
8156 int
8157 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
8158 		   bool clear, uint64_t *pkts, uint64_t *bytes, void **action)
8159 {
8160 	const struct mlx5_flow_driver_ops *fops;
8161 	struct rte_flow_attr attr = { .transfer = 0 };
8162 
8163 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8164 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8165 		return fops->counter_query(dev, cnt, clear, pkts,
8166 					bytes, action);
8167 	}
8168 	DRV_LOG(ERR,
8169 		"port %u counter query is not supported.",
8170 		 dev->data->port_id);
8171 	return -ENOTSUP;
8172 }
8173 
8174 /**
8175  * Get information about HWS pre-configurable resources.
8176  *
8177  * @param[in] dev
8178  *   Pointer to the rte_eth_dev structure.
8179  * @param[out] port_info
8180  *   Pointer to port information.
8181  * @param[out] queue_info
8182  *   Pointer to queue information.
8183  * @param[out] error
8184  *   Pointer to error structure.
8185  *
8186  * @return
8187  *   0 on success, a negative errno value otherwise and rte_errno is set.
8188  */
8189 static int
8190 mlx5_flow_info_get(struct rte_eth_dev *dev,
8191 		   struct rte_flow_port_info *port_info,
8192 		   struct rte_flow_queue_info *queue_info,
8193 		   struct rte_flow_error *error)
8194 {
8195 	const struct mlx5_flow_driver_ops *fops;
8196 
8197 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8198 		return rte_flow_error_set(error, ENOTSUP,
8199 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8200 				NULL,
8201 				"info get with incorrect steering mode");
8202 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8203 	return fops->info_get(dev, port_info, queue_info, error);
8204 }
8205 
8206 /**
8207  * Configure port HWS resources.
8208  *
8209  * @param[in] dev
8210  *   Pointer to the rte_eth_dev structure.
8211  * @param[in] port_attr
8212  *   Port configuration attributes.
8213  * @param[in] nb_queue
8214  *   Number of queue.
8215  * @param[in] queue_attr
8216  *   Array that holds attributes for each flow queue.
8217  * @param[out] error
8218  *   Pointer to error structure.
8219  *
8220  * @return
8221  *   0 on success, a negative errno value otherwise and rte_errno is set.
8222  */
8223 static int
8224 mlx5_flow_port_configure(struct rte_eth_dev *dev,
8225 			 const struct rte_flow_port_attr *port_attr,
8226 			 uint16_t nb_queue,
8227 			 const struct rte_flow_queue_attr *queue_attr[],
8228 			 struct rte_flow_error *error)
8229 {
8230 	const struct mlx5_flow_driver_ops *fops;
8231 
8232 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8233 		return rte_flow_error_set(error, ENOTSUP,
8234 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8235 				NULL,
8236 				"port configure with incorrect steering mode");
8237 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8238 	return fops->configure(dev, port_attr, nb_queue, queue_attr, error);
8239 }
8240 
8241 /**
8242  * Create flow item template.
8243  *
8244  * @param[in] dev
8245  *   Pointer to the rte_eth_dev structure.
8246  * @param[in] attr
8247  *   Pointer to the item template attributes.
8248  * @param[in] items
8249  *   The template item pattern.
8250  * @param[out] error
8251  *   Pointer to error structure.
8252  *
8253  * @return
8254  *   0 on success, a negative errno value otherwise and rte_errno is set.
8255  */
8256 static struct rte_flow_pattern_template *
8257 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
8258 		const struct rte_flow_pattern_template_attr *attr,
8259 		const struct rte_flow_item items[],
8260 		struct rte_flow_error *error)
8261 {
8262 	const struct mlx5_flow_driver_ops *fops;
8263 
8264 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8265 		rte_flow_error_set(error, ENOTSUP,
8266 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8267 				NULL,
8268 				"pattern create with incorrect steering mode");
8269 		return NULL;
8270 	}
8271 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8272 	return fops->pattern_template_create(dev, attr, items, error);
8273 }
8274 
8275 /**
8276  * Destroy flow item template.
8277  *
8278  * @param[in] dev
8279  *   Pointer to the rte_eth_dev structure.
8280  * @param[in] template
8281  *   Pointer to the item template to be destroyed.
8282  * @param[out] error
8283  *   Pointer to error structure.
8284  *
8285  * @return
8286  *   0 on success, a negative errno value otherwise and rte_errno is set.
8287  */
8288 static int
8289 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
8290 				   struct rte_flow_pattern_template *template,
8291 				   struct rte_flow_error *error)
8292 {
8293 	const struct mlx5_flow_driver_ops *fops;
8294 
8295 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8296 		return rte_flow_error_set(error, ENOTSUP,
8297 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8298 				NULL,
8299 				"pattern destroy with incorrect steering mode");
8300 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8301 	return fops->pattern_template_destroy(dev, template, error);
8302 }
8303 
8304 /**
8305  * Create flow item template.
8306  *
8307  * @param[in] dev
8308  *   Pointer to the rte_eth_dev structure.
8309  * @param[in] attr
8310  *   Pointer to the action template attributes.
8311  * @param[in] actions
8312  *   Associated actions (list terminated by the END action).
8313  * @param[in] masks
8314  *   List of actions that marks which of the action's member is constant.
8315  * @param[out] error
8316  *   Pointer to error structure.
8317  *
8318  * @return
8319  *   0 on success, a negative errno value otherwise and rte_errno is set.
8320  */
8321 static struct rte_flow_actions_template *
8322 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
8323 			const struct rte_flow_actions_template_attr *attr,
8324 			const struct rte_flow_action actions[],
8325 			const struct rte_flow_action masks[],
8326 			struct rte_flow_error *error)
8327 {
8328 	const struct mlx5_flow_driver_ops *fops;
8329 
8330 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8331 		rte_flow_error_set(error, ENOTSUP,
8332 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8333 				NULL,
8334 				"action create with incorrect steering mode");
8335 		return NULL;
8336 	}
8337 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8338 	return fops->actions_template_create(dev, attr, actions, masks, error);
8339 }
8340 
8341 /**
8342  * Destroy flow action template.
8343  *
8344  * @param[in] dev
8345  *   Pointer to the rte_eth_dev structure.
8346  * @param[in] template
8347  *   Pointer to the action template to be destroyed.
8348  * @param[out] error
8349  *   Pointer to error structure.
8350  *
8351  * @return
8352  *   0 on success, a negative errno value otherwise and rte_errno is set.
8353  */
8354 static int
8355 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
8356 				   struct rte_flow_actions_template *template,
8357 				   struct rte_flow_error *error)
8358 {
8359 	const struct mlx5_flow_driver_ops *fops;
8360 
8361 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8362 		return rte_flow_error_set(error, ENOTSUP,
8363 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8364 				NULL,
8365 				"action destroy with incorrect steering mode");
8366 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8367 	return fops->actions_template_destroy(dev, template, error);
8368 }
8369 
8370 /**
8371  * Create flow table.
8372  *
8373  * @param[in] dev
8374  *   Pointer to the rte_eth_dev structure.
8375  * @param[in] attr
8376  *   Pointer to the table attributes.
8377  * @param[in] item_templates
8378  *   Item template array to be binded to the table.
8379  * @param[in] nb_item_templates
8380  *   Number of item template.
8381  * @param[in] action_templates
8382  *   Action template array to be binded to the table.
8383  * @param[in] nb_action_templates
8384  *   Number of action template.
8385  * @param[out] error
8386  *   Pointer to error structure.
8387  *
8388  * @return
8389  *    Table on success, NULL otherwise and rte_errno is set.
8390  */
8391 static struct rte_flow_template_table *
8392 mlx5_flow_table_create(struct rte_eth_dev *dev,
8393 		       const struct rte_flow_template_table_attr *attr,
8394 		       struct rte_flow_pattern_template *item_templates[],
8395 		       uint8_t nb_item_templates,
8396 		       struct rte_flow_actions_template *action_templates[],
8397 		       uint8_t nb_action_templates,
8398 		       struct rte_flow_error *error)
8399 {
8400 	const struct mlx5_flow_driver_ops *fops;
8401 
8402 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8403 		rte_flow_error_set(error, ENOTSUP,
8404 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8405 				NULL,
8406 				"table create with incorrect steering mode");
8407 		return NULL;
8408 	}
8409 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8410 	return fops->template_table_create(dev,
8411 					   attr,
8412 					   item_templates,
8413 					   nb_item_templates,
8414 					   action_templates,
8415 					   nb_action_templates,
8416 					   error);
8417 }
8418 
8419 /**
8420  * PMD destroy flow table.
8421  *
8422  * @param[in] dev
8423  *   Pointer to the rte_eth_dev structure.
8424  * @param[in] table
8425  *   Pointer to the table to be destroyed.
8426  * @param[out] error
8427  *   Pointer to error structure.
8428  *
8429  * @return
8430  *   0 on success, a negative errno value otherwise and rte_errno is set.
8431  */
8432 static int
8433 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
8434 			struct rte_flow_template_table *table,
8435 			struct rte_flow_error *error)
8436 {
8437 	const struct mlx5_flow_driver_ops *fops;
8438 
8439 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8440 		return rte_flow_error_set(error, ENOTSUP,
8441 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8442 				NULL,
8443 				"table destroy with incorrect steering mode");
8444 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8445 	return fops->template_table_destroy(dev, table, error);
8446 }
8447 
8448 /**
8449  * Enqueue flow creation.
8450  *
8451  * @param[in] dev
8452  *   Pointer to the rte_eth_dev structure.
8453  * @param[in] queue_id
8454  *   The queue to create the flow.
8455  * @param[in] attr
8456  *   Pointer to the flow operation attributes.
8457  * @param[in] items
8458  *   Items with flow spec value.
8459  * @param[in] pattern_template_index
8460  *   The item pattern flow follows from the table.
8461  * @param[in] actions
8462  *   Action with flow spec value.
8463  * @param[in] action_template_index
8464  *   The action pattern flow follows from the table.
8465  * @param[in] user_data
8466  *   Pointer to the user_data.
8467  * @param[out] error
8468  *   Pointer to error structure.
8469  *
8470  * @return
8471  *    Flow pointer on success, NULL otherwise and rte_errno is set.
8472  */
8473 static struct rte_flow *
8474 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
8475 			    uint32_t queue_id,
8476 			    const struct rte_flow_op_attr *attr,
8477 			    struct rte_flow_template_table *table,
8478 			    const struct rte_flow_item items[],
8479 			    uint8_t pattern_template_index,
8480 			    const struct rte_flow_action actions[],
8481 			    uint8_t action_template_index,
8482 			    void *user_data,
8483 			    struct rte_flow_error *error)
8484 {
8485 	const struct mlx5_flow_driver_ops *fops;
8486 
8487 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8488 		rte_flow_error_set(error, ENOTSUP,
8489 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8490 				NULL,
8491 				"flow_q create with incorrect steering mode");
8492 		return NULL;
8493 	}
8494 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8495 	return fops->async_flow_create(dev, queue_id, attr, table,
8496 				       items, pattern_template_index,
8497 				       actions, action_template_index,
8498 				       user_data, error);
8499 }
8500 
8501 /**
8502  * Enqueue flow destruction.
8503  *
8504  * @param[in] dev
8505  *   Pointer to the rte_eth_dev structure.
8506  * @param[in] queue
8507  *   The queue to destroy the flow.
8508  * @param[in] attr
8509  *   Pointer to the flow operation attributes.
8510  * @param[in] flow
8511  *   Pointer to the flow to be destroyed.
8512  * @param[in] user_data
8513  *   Pointer to the user_data.
8514  * @param[out] error
8515  *   Pointer to error structure.
8516  *
8517  * @return
8518  *    0 on success, negative value otherwise and rte_errno is set.
8519  */
8520 static int
8521 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
8522 			     uint32_t queue,
8523 			     const struct rte_flow_op_attr *attr,
8524 			     struct rte_flow *flow,
8525 			     void *user_data,
8526 			     struct rte_flow_error *error)
8527 {
8528 	const struct mlx5_flow_driver_ops *fops;
8529 
8530 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8531 		return rte_flow_error_set(error, ENOTSUP,
8532 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8533 				NULL,
8534 				"flow_q destroy with incorrect steering mode");
8535 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8536 	return fops->async_flow_destroy(dev, queue, attr, flow,
8537 					user_data, error);
8538 }
8539 
8540 /**
8541  * Pull the enqueued flows.
8542  *
8543  * @param[in] dev
8544  *   Pointer to the rte_eth_dev structure.
8545  * @param[in] queue
8546  *   The queue to pull the result.
8547  * @param[in/out] res
8548  *   Array to save the results.
8549  * @param[in] n_res
8550  *   Available result with the array.
8551  * @param[out] error
8552  *   Pointer to error structure.
8553  *
8554  * @return
8555  *    Result number on success, negative value otherwise and rte_errno is set.
8556  */
8557 static int
8558 mlx5_flow_pull(struct rte_eth_dev *dev,
8559 	       uint32_t queue,
8560 	       struct rte_flow_op_result res[],
8561 	       uint16_t n_res,
8562 	       struct rte_flow_error *error)
8563 {
8564 	const struct mlx5_flow_driver_ops *fops;
8565 
8566 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8567 		return rte_flow_error_set(error, ENOTSUP,
8568 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8569 				NULL,
8570 				"flow_q pull with incorrect steering mode");
8571 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8572 	return fops->pull(dev, queue, res, n_res, error);
8573 }
8574 
8575 /**
8576  * Push the enqueued flows.
8577  *
8578  * @param[in] dev
8579  *   Pointer to the rte_eth_dev structure.
8580  * @param[in] queue
8581  *   The queue to push the flows.
8582  * @param[out] error
8583  *   Pointer to error structure.
8584  *
8585  * @return
8586  *    0 on success, negative value otherwise and rte_errno is set.
8587  */
8588 static int
8589 mlx5_flow_push(struct rte_eth_dev *dev,
8590 	       uint32_t queue,
8591 	       struct rte_flow_error *error)
8592 {
8593 	const struct mlx5_flow_driver_ops *fops;
8594 
8595 	if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8596 		return rte_flow_error_set(error, ENOTSUP,
8597 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8598 				NULL,
8599 				"flow_q push with incorrect steering mode");
8600 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8601 	return fops->push(dev, queue, error);
8602 }
8603 
8604 /**
8605  * Create shared action.
8606  *
8607  * @param[in] dev
8608  *   Pointer to the rte_eth_dev structure.
8609  * @param[in] queue
8610  *   Which queue to be used..
8611  * @param[in] attr
8612  *   Operation attribute.
8613  * @param[in] conf
8614  *   Indirect action configuration.
8615  * @param[in] action
8616  *   rte_flow action detail.
8617  * @param[in] user_data
8618  *   Pointer to the user_data.
8619  * @param[out] error
8620  *   Pointer to error structure.
8621  *
8622  * @return
8623  *   Action handle on success, NULL otherwise and rte_errno is set.
8624  */
8625 static struct rte_flow_action_handle *
8626 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
8627 				 const struct rte_flow_op_attr *attr,
8628 				 const struct rte_flow_indir_action_conf *conf,
8629 				 const struct rte_flow_action *action,
8630 				 void *user_data,
8631 				 struct rte_flow_error *error)
8632 {
8633 	const struct mlx5_flow_driver_ops *fops =
8634 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8635 
8636 	return fops->async_action_create(dev, queue, attr, conf, action,
8637 					 user_data, error);
8638 }
8639 
8640 /**
8641  * Update shared action.
8642  *
8643  * @param[in] dev
8644  *   Pointer to the rte_eth_dev structure.
8645  * @param[in] queue
8646  *   Which queue to be used..
8647  * @param[in] attr
8648  *   Operation attribute.
8649  * @param[in] handle
8650  *   Action handle to be updated.
8651  * @param[in] update
8652  *   Update value.
8653  * @param[in] user_data
8654  *   Pointer to the user_data.
8655  * @param[out] error
8656  *   Pointer to error structure.
8657  *
8658  * @return
8659  *   0 on success, negative value otherwise and rte_errno is set.
8660  */
8661 static int
8662 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
8663 				     const struct rte_flow_op_attr *attr,
8664 				     struct rte_flow_action_handle *handle,
8665 				     const void *update,
8666 				     void *user_data,
8667 				     struct rte_flow_error *error)
8668 {
8669 	const struct mlx5_flow_driver_ops *fops =
8670 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8671 
8672 	return fops->async_action_update(dev, queue, attr, handle,
8673 					 update, user_data, error);
8674 }
8675 
8676 /**
8677  * Destroy shared action.
8678  *
8679  * @param[in] dev
8680  *   Pointer to the rte_eth_dev structure.
8681  * @param[in] queue
8682  *   Which queue to be used..
8683  * @param[in] attr
8684  *   Operation attribute.
8685  * @param[in] handle
8686  *   Action handle to be destroyed.
8687  * @param[in] user_data
8688  *   Pointer to the user_data.
8689  * @param[out] error
8690  *   Pointer to error structure.
8691  *
8692  * @return
8693  *   0 on success, negative value otherwise and rte_errno is set.
8694  */
8695 static int
8696 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
8697 				      const struct rte_flow_op_attr *attr,
8698 				      struct rte_flow_action_handle *handle,
8699 				      void *user_data,
8700 				      struct rte_flow_error *error)
8701 {
8702 	const struct mlx5_flow_driver_ops *fops =
8703 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8704 
8705 	return fops->async_action_destroy(dev, queue, attr, handle,
8706 					  user_data, error);
8707 }
8708 
8709 /**
8710  * Allocate a new memory for the counter values wrapped by all the needed
8711  * management.
8712  *
8713  * @param[in] sh
8714  *   Pointer to mlx5_dev_ctx_shared object.
8715  *
8716  * @return
8717  *   0 on success, a negative errno value otherwise.
8718  */
8719 static int
8720 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
8721 {
8722 	struct mlx5_counter_stats_mem_mng *mem_mng;
8723 	volatile struct flow_counter_stats *raw_data;
8724 	int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
8725 	int size = (sizeof(struct flow_counter_stats) *
8726 			MLX5_COUNTERS_PER_POOL +
8727 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
8728 			sizeof(struct mlx5_counter_stats_mem_mng);
8729 	size_t pgsize = rte_mem_page_size();
8730 	uint8_t *mem;
8731 	int ret;
8732 	int i;
8733 
8734 	if (pgsize == (size_t)-1) {
8735 		DRV_LOG(ERR, "Failed to get mem page size");
8736 		rte_errno = ENOMEM;
8737 		return -ENOMEM;
8738 	}
8739 	mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
8740 	if (!mem) {
8741 		rte_errno = ENOMEM;
8742 		return -ENOMEM;
8743 	}
8744 	mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
8745 	size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
8746 	ret = mlx5_os_wrapped_mkey_create(sh->cdev->ctx, sh->cdev->pd,
8747 					  sh->cdev->pdn, mem, size,
8748 					  &mem_mng->wm);
8749 	if (ret) {
8750 		rte_errno = errno;
8751 		mlx5_free(mem);
8752 		return -rte_errno;
8753 	}
8754 	mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
8755 	raw_data = (volatile struct flow_counter_stats *)mem;
8756 	for (i = 0; i < raws_n; ++i) {
8757 		mem_mng->raws[i].mem_mng = mem_mng;
8758 		mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
8759 	}
8760 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
8761 		LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
8762 				 mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
8763 				 next);
8764 	LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
8765 	sh->cmng.mem_mng = mem_mng;
8766 	return 0;
8767 }
8768 
8769 /**
8770  * Set the statistic memory to the new counter pool.
8771  *
8772  * @param[in] sh
8773  *   Pointer to mlx5_dev_ctx_shared object.
8774  * @param[in] pool
8775  *   Pointer to the pool to set the statistic memory.
8776  *
8777  * @return
8778  *   0 on success, a negative errno value otherwise.
8779  */
8780 static int
8781 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
8782 			       struct mlx5_flow_counter_pool *pool)
8783 {
8784 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8785 	/* Resize statistic memory once used out. */
8786 	if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
8787 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
8788 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
8789 		return -1;
8790 	}
8791 	rte_spinlock_lock(&pool->sl);
8792 	pool->raw = cmng->mem_mng->raws + pool->index %
8793 		    MLX5_CNT_CONTAINER_RESIZE;
8794 	rte_spinlock_unlock(&pool->sl);
8795 	pool->raw_hw = NULL;
8796 	return 0;
8797 }
8798 
8799 #define MLX5_POOL_QUERY_FREQ_US 1000000
8800 
8801 /**
8802  * Set the periodic procedure for triggering asynchronous batch queries for all
8803  * the counter pools.
8804  *
8805  * @param[in] sh
8806  *   Pointer to mlx5_dev_ctx_shared object.
8807  */
8808 void
8809 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
8810 {
8811 	uint32_t pools_n, us;
8812 
8813 	pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
8814 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
8815 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
8816 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
8817 		sh->cmng.query_thread_on = 0;
8818 		DRV_LOG(ERR, "Cannot reinitialize query alarm");
8819 	} else {
8820 		sh->cmng.query_thread_on = 1;
8821 	}
8822 }
8823 
8824 /**
8825  * The periodic procedure for triggering asynchronous batch queries for all the
8826  * counter pools. This function is probably called by the host thread.
8827  *
8828  * @param[in] arg
8829  *   The parameter for the alarm process.
8830  */
8831 void
8832 mlx5_flow_query_alarm(void *arg)
8833 {
8834 	struct mlx5_dev_ctx_shared *sh = arg;
8835 	int ret;
8836 	uint16_t pool_index = sh->cmng.pool_index;
8837 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8838 	struct mlx5_flow_counter_pool *pool;
8839 	uint16_t n_valid;
8840 
8841 	if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
8842 		goto set_alarm;
8843 	rte_spinlock_lock(&cmng->pool_update_sl);
8844 	pool = cmng->pools[pool_index];
8845 	n_valid = cmng->n_valid;
8846 	rte_spinlock_unlock(&cmng->pool_update_sl);
8847 	/* Set the statistic memory to the new created pool. */
8848 	if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
8849 		goto set_alarm;
8850 	if (pool->raw_hw)
8851 		/* There is a pool query in progress. */
8852 		goto set_alarm;
8853 	pool->raw_hw =
8854 		LIST_FIRST(&sh->cmng.free_stat_raws);
8855 	if (!pool->raw_hw)
8856 		/* No free counter statistics raw memory. */
8857 		goto set_alarm;
8858 	/*
8859 	 * Identify the counters released between query trigger and query
8860 	 * handle more efficiently. The counter released in this gap period
8861 	 * should wait for a new round of query as the new arrived packets
8862 	 * will not be taken into account.
8863 	 */
8864 	pool->query_gen++;
8865 	ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
8866 					       MLX5_COUNTERS_PER_POOL,
8867 					       NULL, NULL,
8868 					       pool->raw_hw->mem_mng->wm.lkey,
8869 					       (void *)(uintptr_t)
8870 					       pool->raw_hw->data,
8871 					       sh->devx_comp,
8872 					       (uint64_t)(uintptr_t)pool);
8873 	if (ret) {
8874 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
8875 			" %d", pool->min_dcs->id);
8876 		pool->raw_hw = NULL;
8877 		goto set_alarm;
8878 	}
8879 	LIST_REMOVE(pool->raw_hw, next);
8880 	sh->cmng.pending_queries++;
8881 	pool_index++;
8882 	if (pool_index >= n_valid)
8883 		pool_index = 0;
8884 set_alarm:
8885 	sh->cmng.pool_index = pool_index;
8886 	mlx5_set_query_alarm(sh);
8887 }
8888 
8889 /**
8890  * Check and callback event for new aged flow in the counter pool
8891  *
8892  * @param[in] sh
8893  *   Pointer to mlx5_dev_ctx_shared object.
8894  * @param[in] pool
8895  *   Pointer to Current counter pool.
8896  */
8897 static void
8898 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
8899 		   struct mlx5_flow_counter_pool *pool)
8900 {
8901 	struct mlx5_priv *priv;
8902 	struct mlx5_flow_counter *cnt;
8903 	struct mlx5_age_info *age_info;
8904 	struct mlx5_age_param *age_param;
8905 	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
8906 	struct mlx5_counter_stats_raw *prev = pool->raw;
8907 	const uint64_t curr_time = MLX5_CURR_TIME_SEC;
8908 	const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
8909 	uint16_t expected = AGE_CANDIDATE;
8910 	uint32_t i;
8911 
8912 	pool->time_of_last_age_check = curr_time;
8913 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
8914 		cnt = MLX5_POOL_GET_CNT(pool, i);
8915 		age_param = MLX5_CNT_TO_AGE(cnt);
8916 		if (__atomic_load_n(&age_param->state,
8917 				    __ATOMIC_RELAXED) != AGE_CANDIDATE)
8918 			continue;
8919 		if (cur->data[i].hits != prev->data[i].hits) {
8920 			__atomic_store_n(&age_param->sec_since_last_hit, 0,
8921 					 __ATOMIC_RELAXED);
8922 			continue;
8923 		}
8924 		if (__atomic_add_fetch(&age_param->sec_since_last_hit,
8925 				       time_delta,
8926 				       __ATOMIC_RELAXED) <= age_param->timeout)
8927 			continue;
8928 		/**
8929 		 * Hold the lock first, or if between the
8930 		 * state AGE_TMOUT and tailq operation the
8931 		 * release happened, the release procedure
8932 		 * may delete a non-existent tailq node.
8933 		 */
8934 		priv = rte_eth_devices[age_param->port_id].data->dev_private;
8935 		age_info = GET_PORT_AGE_INFO(priv);
8936 		rte_spinlock_lock(&age_info->aged_sl);
8937 		if (__atomic_compare_exchange_n(&age_param->state, &expected,
8938 						AGE_TMOUT, false,
8939 						__ATOMIC_RELAXED,
8940 						__ATOMIC_RELAXED)) {
8941 			TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
8942 			MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
8943 		}
8944 		rte_spinlock_unlock(&age_info->aged_sl);
8945 	}
8946 	mlx5_age_event_prepare(sh);
8947 }
8948 
8949 /**
8950  * Handler for the HW respond about ready values from an asynchronous batch
8951  * query. This function is probably called by the host thread.
8952  *
8953  * @param[in] sh
8954  *   The pointer to the shared device context.
8955  * @param[in] async_id
8956  *   The Devx async ID.
8957  * @param[in] status
8958  *   The status of the completion.
8959  */
8960 void
8961 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
8962 				  uint64_t async_id, int status)
8963 {
8964 	struct mlx5_flow_counter_pool *pool =
8965 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
8966 	struct mlx5_counter_stats_raw *raw_to_free;
8967 	uint8_t query_gen = pool->query_gen ^ 1;
8968 	struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8969 	enum mlx5_counter_type cnt_type =
8970 		pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
8971 				MLX5_COUNTER_TYPE_ORIGIN;
8972 
8973 	if (unlikely(status)) {
8974 		raw_to_free = pool->raw_hw;
8975 	} else {
8976 		raw_to_free = pool->raw;
8977 		if (pool->is_aged)
8978 			mlx5_flow_aging_check(sh, pool);
8979 		rte_spinlock_lock(&pool->sl);
8980 		pool->raw = pool->raw_hw;
8981 		rte_spinlock_unlock(&pool->sl);
8982 		/* Be sure the new raw counters data is updated in memory. */
8983 		rte_io_wmb();
8984 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
8985 			rte_spinlock_lock(&cmng->csl[cnt_type]);
8986 			TAILQ_CONCAT(&cmng->counters[cnt_type],
8987 				     &pool->counters[query_gen], next);
8988 			rte_spinlock_unlock(&cmng->csl[cnt_type]);
8989 		}
8990 	}
8991 	LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
8992 	pool->raw_hw = NULL;
8993 	sh->cmng.pending_queries--;
8994 }
8995 
8996 static int
8997 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
8998 		    const struct flow_grp_info *grp_info,
8999 		    struct rte_flow_error *error)
9000 {
9001 	if (grp_info->transfer && grp_info->external &&
9002 	    grp_info->fdb_def_rule) {
9003 		if (group == UINT32_MAX)
9004 			return rte_flow_error_set
9005 						(error, EINVAL,
9006 						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9007 						 NULL,
9008 						 "group index not supported");
9009 		*table = group + 1;
9010 	} else {
9011 		*table = group;
9012 	}
9013 	DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
9014 	return 0;
9015 }
9016 
9017 /**
9018  * Translate the rte_flow group index to HW table value.
9019  *
9020  * If tunnel offload is disabled, all group ids converted to flow table
9021  * id using the standard method.
9022  * If tunnel offload is enabled, group id can be converted using the
9023  * standard or tunnel conversion method. Group conversion method
9024  * selection depends on flags in `grp_info` parameter:
9025  * - Internal (grp_info.external == 0) groups conversion uses the
9026  *   standard method.
9027  * - Group ids in JUMP action converted with the tunnel conversion.
9028  * - Group id in rule attribute conversion depends on a rule type and
9029  *   group id value:
9030  *   ** non zero group attributes converted with the tunnel method
9031  *   ** zero group attribute in non-tunnel rule is converted using the
9032  *      standard method - there's only one root table
9033  *   ** zero group attribute in steer tunnel rule is converted with the
9034  *      standard method - single root table
9035  *   ** zero group attribute in match tunnel rule is a special OvS
9036  *      case: that value is used for portability reasons. That group
9037  *      id is converted with the tunnel conversion method.
9038  *
9039  * @param[in] dev
9040  *   Port device
9041  * @param[in] tunnel
9042  *   PMD tunnel offload object
9043  * @param[in] group
9044  *   rte_flow group index value.
9045  * @param[out] table
9046  *   HW table value.
9047  * @param[in] grp_info
9048  *   flags used for conversion
9049  * @param[out] error
9050  *   Pointer to error structure.
9051  *
9052  * @return
9053  *   0 on success, a negative errno value otherwise and rte_errno is set.
9054  */
9055 int
9056 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
9057 			 const struct mlx5_flow_tunnel *tunnel,
9058 			 uint32_t group, uint32_t *table,
9059 			 const struct flow_grp_info *grp_info,
9060 			 struct rte_flow_error *error)
9061 {
9062 	int ret;
9063 	bool standard_translation;
9064 
9065 	if (!grp_info->skip_scale && grp_info->external &&
9066 	    group < MLX5_MAX_TABLES_EXTERNAL)
9067 		group *= MLX5_FLOW_TABLE_FACTOR;
9068 	if (is_tunnel_offload_active(dev)) {
9069 		standard_translation = !grp_info->external ||
9070 					grp_info->std_tbl_fix;
9071 	} else {
9072 		standard_translation = true;
9073 	}
9074 	DRV_LOG(DEBUG,
9075 		"port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
9076 		dev->data->port_id, group, grp_info->transfer,
9077 		grp_info->external, grp_info->fdb_def_rule,
9078 		standard_translation ? "STANDARD" : "TUNNEL");
9079 	if (standard_translation)
9080 		ret = flow_group_to_table(dev->data->port_id, group, table,
9081 					  grp_info, error);
9082 	else
9083 		ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
9084 						      table, error);
9085 
9086 	return ret;
9087 }
9088 
9089 /**
9090  * Discover availability of metadata reg_c's.
9091  *
9092  * Iteratively use test flows to check availability.
9093  *
9094  * @param[in] dev
9095  *   Pointer to the Ethernet device structure.
9096  *
9097  * @return
9098  *   0 on success, a negative errno value otherwise and rte_errno is set.
9099  */
9100 int
9101 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
9102 {
9103 	struct mlx5_priv *priv = dev->data->dev_private;
9104 	enum modify_reg idx;
9105 	int n = 0;
9106 
9107 	/* reg_c[0] and reg_c[1] are reserved. */
9108 	priv->sh->flow_mreg_c[n++] = REG_C_0;
9109 	priv->sh->flow_mreg_c[n++] = REG_C_1;
9110 	/* Discover availability of other reg_c's. */
9111 	for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
9112 		struct rte_flow_attr attr = {
9113 			.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
9114 			.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
9115 			.ingress = 1,
9116 		};
9117 		struct rte_flow_item items[] = {
9118 			[0] = {
9119 				.type = RTE_FLOW_ITEM_TYPE_END,
9120 			},
9121 		};
9122 		struct rte_flow_action actions[] = {
9123 			[0] = {
9124 				.type = (enum rte_flow_action_type)
9125 					MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
9126 				.conf = &(struct mlx5_flow_action_copy_mreg){
9127 					.src = REG_C_1,
9128 					.dst = idx,
9129 				},
9130 			},
9131 			[1] = {
9132 				.type = RTE_FLOW_ACTION_TYPE_JUMP,
9133 				.conf = &(struct rte_flow_action_jump){
9134 					.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
9135 				},
9136 			},
9137 			[2] = {
9138 				.type = RTE_FLOW_ACTION_TYPE_END,
9139 			},
9140 		};
9141 		uint32_t flow_idx;
9142 		struct rte_flow *flow;
9143 		struct rte_flow_error error;
9144 
9145 		if (!priv->sh->config.dv_flow_en)
9146 			break;
9147 		/* Create internal flow, validation skips copy action. */
9148 		flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
9149 					items, actions, false, &error);
9150 		flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9151 				      flow_idx);
9152 		if (!flow)
9153 			continue;
9154 		priv->sh->flow_mreg_c[n++] = idx;
9155 		flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
9156 	}
9157 	for (; n < MLX5_MREG_C_NUM; ++n)
9158 		priv->sh->flow_mreg_c[n] = REG_NON;
9159 	priv->sh->metadata_regc_check_flag = 1;
9160 	return 0;
9161 }
9162 
9163 int
9164 save_dump_file(const uint8_t *data, uint32_t size,
9165 	uint32_t type, uint64_t id, void *arg, FILE *file)
9166 {
9167 	char line[BUF_SIZE];
9168 	uint32_t out = 0;
9169 	uint32_t k;
9170 	uint32_t actions_num;
9171 	struct rte_flow_query_count *count;
9172 
9173 	memset(line, 0, BUF_SIZE);
9174 	switch (type) {
9175 	case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
9176 		actions_num = *(uint32_t *)(arg);
9177 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
9178 				type, id, actions_num);
9179 		break;
9180 	case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
9181 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
9182 				type, id);
9183 		break;
9184 	case DR_DUMP_REC_TYPE_PMD_COUNTER:
9185 		count = (struct rte_flow_query_count *)arg;
9186 		fprintf(file,
9187 			"%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
9188 			type, id, count->hits, count->bytes);
9189 		return 0;
9190 	default:
9191 		return -1;
9192 	}
9193 
9194 	for (k = 0; k < size; k++) {
9195 		/* Make sure we do not overrun the line buffer length. */
9196 		if (out >= BUF_SIZE - 4) {
9197 			line[out] = '\0';
9198 			break;
9199 		}
9200 		out += snprintf(line + out, BUF_SIZE - out, "%02x",
9201 				(data[k]) & 0xff);
9202 	}
9203 	fprintf(file, "%s\n", line);
9204 	return 0;
9205 }
9206 
9207 int
9208 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
9209 	struct rte_flow_query_count *count, struct rte_flow_error *error)
9210 {
9211 	struct rte_flow_action action[2];
9212 	enum mlx5_flow_drv_type ftype;
9213 	const struct mlx5_flow_driver_ops *fops;
9214 
9215 	if (!flow) {
9216 		return rte_flow_error_set(error, ENOENT,
9217 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9218 				NULL,
9219 				"invalid flow handle");
9220 	}
9221 	action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
9222 	action[1].type = RTE_FLOW_ACTION_TYPE_END;
9223 	if (flow->counter) {
9224 		memset(count, 0, sizeof(struct rte_flow_query_count));
9225 		ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
9226 		MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
9227 						ftype < MLX5_FLOW_TYPE_MAX);
9228 		fops = flow_get_drv_ops(ftype);
9229 		return fops->query(dev, flow, action, count, error);
9230 	}
9231 	return -1;
9232 }
9233 
9234 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9235 /**
9236  * Dump flow ipool data to file
9237  *
9238  * @param[in] dev
9239  *   The pointer to Ethernet device.
9240  * @param[in] file
9241  *   A pointer to a file for output.
9242  * @param[out] error
9243  *   Perform verbose error reporting if not NULL. PMDs initialize this
9244  *   structure in case of error only.
9245  * @return
9246  *   0 on success, a negative value otherwise.
9247  */
9248 int
9249 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
9250 	struct rte_flow *flow, FILE *file,
9251 	struct rte_flow_error *error)
9252 {
9253 	struct mlx5_priv *priv = dev->data->dev_private;
9254 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9255 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9256 	uint32_t handle_idx;
9257 	struct mlx5_flow_handle *dh;
9258 	struct rte_flow_query_count count;
9259 	uint32_t actions_num;
9260 	const uint8_t *data;
9261 	size_t size;
9262 	uint64_t id;
9263 	uint32_t type;
9264 	void *action = NULL;
9265 
9266 	if (!flow) {
9267 		return rte_flow_error_set(error, ENOENT,
9268 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9269 				NULL,
9270 				"invalid flow handle");
9271 	}
9272 	handle_idx = flow->dev_handles;
9273 	/* query counter */
9274 	if (flow->counter &&
9275 	(!mlx5_counter_query(dev, flow->counter, false,
9276 	&count.hits, &count.bytes, &action)) && action) {
9277 		id = (uint64_t)(uintptr_t)action;
9278 		type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9279 		save_dump_file(NULL, 0, type,
9280 			id, (void *)&count, file);
9281 	}
9282 
9283 	while (handle_idx) {
9284 		dh = mlx5_ipool_get(priv->sh->ipool
9285 				[MLX5_IPOOL_MLX5_FLOW], handle_idx);
9286 		if (!dh)
9287 			continue;
9288 		handle_idx = dh->next.next;
9289 
9290 		/* Get modify_hdr and encap_decap buf from ipools. */
9291 		encap_decap = NULL;
9292 		modify_hdr = dh->dvh.modify_hdr;
9293 
9294 		if (dh->dvh.rix_encap_decap) {
9295 			encap_decap = mlx5_ipool_get(priv->sh->ipool
9296 						[MLX5_IPOOL_DECAP_ENCAP],
9297 						dh->dvh.rix_encap_decap);
9298 		}
9299 		if (modify_hdr) {
9300 			data = (const uint8_t *)modify_hdr->actions;
9301 			size = (size_t)(modify_hdr->actions_num) * 8;
9302 			id = (uint64_t)(uintptr_t)modify_hdr->action;
9303 			actions_num = modify_hdr->actions_num;
9304 			type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9305 			save_dump_file(data, size, type, id,
9306 						(void *)(&actions_num), file);
9307 		}
9308 		if (encap_decap) {
9309 			data = encap_decap->buf;
9310 			size = encap_decap->size;
9311 			id = (uint64_t)(uintptr_t)encap_decap->action;
9312 			type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9313 			save_dump_file(data, size, type,
9314 						id, NULL, file);
9315 		}
9316 	}
9317 	return 0;
9318 }
9319 
9320 /**
9321  * Dump all flow's encap_decap/modify_hdr/counter data to file
9322  *
9323  * @param[in] dev
9324  *   The pointer to Ethernet device.
9325  * @param[in] file
9326  *   A pointer to a file for output.
9327  * @param[out] error
9328  *   Perform verbose error reporting if not NULL. PMDs initialize this
9329  *   structure in case of error only.
9330  * @return
9331  *   0 on success, a negative value otherwise.
9332  */
9333 static int
9334 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
9335 	FILE *file, struct rte_flow_error *error __rte_unused)
9336 {
9337 	struct mlx5_priv *priv = dev->data->dev_private;
9338 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9339 	struct mlx5_hlist *h;
9340 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9341 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9342 	struct rte_flow_query_count count;
9343 	uint32_t actions_num;
9344 	const uint8_t *data;
9345 	size_t size;
9346 	uint64_t id;
9347 	uint32_t type;
9348 	uint32_t i;
9349 	uint32_t j;
9350 	struct mlx5_list_inconst *l_inconst;
9351 	struct mlx5_list_entry *e;
9352 	int lcore_index;
9353 	struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
9354 	uint32_t max;
9355 	void *action;
9356 
9357 	/* encap_decap hlist is lcore_share, get global core cache. */
9358 	i = MLX5_LIST_GLOBAL;
9359 	h = sh->encaps_decaps;
9360 	if (h) {
9361 		for (j = 0; j <= h->mask; j++) {
9362 			l_inconst = &h->buckets[j].l;
9363 			if (!l_inconst || !l_inconst->cache[i])
9364 				continue;
9365 
9366 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9367 			while (e) {
9368 				encap_decap =
9369 				(struct mlx5_flow_dv_encap_decap_resource *)e;
9370 				data = encap_decap->buf;
9371 				size = encap_decap->size;
9372 				id = (uint64_t)(uintptr_t)encap_decap->action;
9373 				type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9374 				save_dump_file(data, size, type,
9375 					id, NULL, file);
9376 				e = LIST_NEXT(e, next);
9377 			}
9378 		}
9379 	}
9380 
9381 	/* get modify_hdr */
9382 	h = sh->modify_cmds;
9383 	if (h) {
9384 		lcore_index = rte_lcore_index(rte_lcore_id());
9385 		if (unlikely(lcore_index == -1)) {
9386 			lcore_index = MLX5_LIST_NLCORE;
9387 			rte_spinlock_lock(&h->l_const.lcore_lock);
9388 		}
9389 		i = lcore_index;
9390 
9391 		for (j = 0; j <= h->mask; j++) {
9392 			l_inconst = &h->buckets[j].l;
9393 			if (!l_inconst || !l_inconst->cache[i])
9394 				continue;
9395 
9396 			e = LIST_FIRST(&l_inconst->cache[i]->h);
9397 			while (e) {
9398 				modify_hdr =
9399 				(struct mlx5_flow_dv_modify_hdr_resource *)e;
9400 				data = (const uint8_t *)modify_hdr->actions;
9401 				size = (size_t)(modify_hdr->actions_num) * 8;
9402 				actions_num = modify_hdr->actions_num;
9403 				id = (uint64_t)(uintptr_t)modify_hdr->action;
9404 				type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9405 				save_dump_file(data, size, type, id,
9406 						(void *)(&actions_num), file);
9407 				e = LIST_NEXT(e, next);
9408 			}
9409 		}
9410 
9411 		if (unlikely(lcore_index == MLX5_LIST_NLCORE))
9412 			rte_spinlock_unlock(&h->l_const.lcore_lock);
9413 	}
9414 
9415 	/* get counter */
9416 	MLX5_ASSERT(cmng->n_valid <= cmng->n);
9417 	max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
9418 	for (j = 1; j <= max; j++) {
9419 		action = NULL;
9420 		if ((!mlx5_counter_query(dev, j, false, &count.hits,
9421 		&count.bytes, &action)) && action) {
9422 			id = (uint64_t)(uintptr_t)action;
9423 			type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9424 			save_dump_file(NULL, 0, type,
9425 					id, (void *)&count, file);
9426 		}
9427 	}
9428 	return 0;
9429 }
9430 #endif
9431 
9432 /**
9433  * Dump flow raw hw data to file
9434  *
9435  * @param[in] dev
9436  *    The pointer to Ethernet device.
9437  * @param[in] file
9438  *   A pointer to a file for output.
9439  * @param[out] error
9440  *   Perform verbose error reporting if not NULL. PMDs initialize this
9441  *   structure in case of error only.
9442  * @return
9443  *   0 on success, a negative value otherwise.
9444  */
9445 int
9446 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
9447 		   FILE *file,
9448 		   struct rte_flow_error *error __rte_unused)
9449 {
9450 	struct mlx5_priv *priv = dev->data->dev_private;
9451 	struct mlx5_dev_ctx_shared *sh = priv->sh;
9452 	uint32_t handle_idx;
9453 	int ret;
9454 	struct mlx5_flow_handle *dh;
9455 	struct rte_flow *flow;
9456 
9457 	if (!sh->config.dv_flow_en) {
9458 		if (fputs("device dv flow disabled\n", file) <= 0)
9459 			return -errno;
9460 		return -ENOTSUP;
9461 	}
9462 
9463 	/* dump all */
9464 	if (!flow_idx) {
9465 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9466 		if (mlx5_flow_dev_dump_sh_all(dev, file, error))
9467 			return -EINVAL;
9468 #endif
9469 		return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
9470 					sh->rx_domain,
9471 					sh->tx_domain, file);
9472 	}
9473 	/* dump one */
9474 	flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9475 			(uintptr_t)(void *)flow_idx);
9476 	if (!flow)
9477 		return -EINVAL;
9478 
9479 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9480 	mlx5_flow_dev_dump_ipool(dev, flow, file, error);
9481 #endif
9482 	handle_idx = flow->dev_handles;
9483 	while (handle_idx) {
9484 		dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
9485 				handle_idx);
9486 		if (!dh)
9487 			return -ENOENT;
9488 		if (dh->drv_flow) {
9489 			ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
9490 					file);
9491 			if (ret)
9492 				return -ENOENT;
9493 		}
9494 		handle_idx = dh->next.next;
9495 	}
9496 	return 0;
9497 }
9498 
9499 /**
9500  * Get aged-out flows.
9501  *
9502  * @param[in] dev
9503  *   Pointer to the Ethernet device structure.
9504  * @param[in] context
9505  *   The address of an array of pointers to the aged-out flows contexts.
9506  * @param[in] nb_countexts
9507  *   The length of context array pointers.
9508  * @param[out] error
9509  *   Perform verbose error reporting if not NULL. Initialized in case of
9510  *   error only.
9511  *
9512  * @return
9513  *   how many contexts get in success, otherwise negative errno value.
9514  *   if nb_contexts is 0, return the amount of all aged contexts.
9515  *   if nb_contexts is not 0 , return the amount of aged flows reported
9516  *   in the context array.
9517  */
9518 int
9519 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
9520 			uint32_t nb_contexts, struct rte_flow_error *error)
9521 {
9522 	const struct mlx5_flow_driver_ops *fops;
9523 	struct rte_flow_attr attr = { .transfer = 0 };
9524 
9525 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
9526 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
9527 		return fops->get_aged_flows(dev, contexts, nb_contexts,
9528 						    error);
9529 	}
9530 	DRV_LOG(ERR,
9531 		"port %u get aged flows is not supported.",
9532 		 dev->data->port_id);
9533 	return -ENOTSUP;
9534 }
9535 
9536 /* Wrapper for driver action_validate op callback */
9537 static int
9538 flow_drv_action_validate(struct rte_eth_dev *dev,
9539 			 const struct rte_flow_indir_action_conf *conf,
9540 			 const struct rte_flow_action *action,
9541 			 const struct mlx5_flow_driver_ops *fops,
9542 			 struct rte_flow_error *error)
9543 {
9544 	static const char err_msg[] = "indirect action validation unsupported";
9545 
9546 	if (!fops->action_validate) {
9547 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9548 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9549 				   NULL, err_msg);
9550 		return -rte_errno;
9551 	}
9552 	return fops->action_validate(dev, conf, action, error);
9553 }
9554 
9555 /**
9556  * Destroys the shared action by handle.
9557  *
9558  * @param dev
9559  *   Pointer to Ethernet device structure.
9560  * @param[in] handle
9561  *   Handle for the indirect action object to be destroyed.
9562  * @param[out] error
9563  *   Perform verbose error reporting if not NULL. PMDs initialize this
9564  *   structure in case of error only.
9565  *
9566  * @return
9567  *   0 on success, a negative errno value otherwise and rte_errno is set.
9568  *
9569  * @note: wrapper for driver action_create op callback.
9570  */
9571 static int
9572 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
9573 			   struct rte_flow_action_handle *handle,
9574 			   struct rte_flow_error *error)
9575 {
9576 	static const char err_msg[] = "indirect action destruction unsupported";
9577 	struct rte_flow_attr attr = { .transfer = 0 };
9578 	const struct mlx5_flow_driver_ops *fops =
9579 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9580 
9581 	if (!fops->action_destroy) {
9582 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9583 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9584 				   NULL, err_msg);
9585 		return -rte_errno;
9586 	}
9587 	return fops->action_destroy(dev, handle, error);
9588 }
9589 
9590 /* Wrapper for driver action_destroy op callback */
9591 static int
9592 flow_drv_action_update(struct rte_eth_dev *dev,
9593 		       struct rte_flow_action_handle *handle,
9594 		       const void *update,
9595 		       const struct mlx5_flow_driver_ops *fops,
9596 		       struct rte_flow_error *error)
9597 {
9598 	static const char err_msg[] = "indirect action update unsupported";
9599 
9600 	if (!fops->action_update) {
9601 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9602 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9603 				   NULL, err_msg);
9604 		return -rte_errno;
9605 	}
9606 	return fops->action_update(dev, handle, update, error);
9607 }
9608 
9609 /* Wrapper for driver action_destroy op callback */
9610 static int
9611 flow_drv_action_query(struct rte_eth_dev *dev,
9612 		      const struct rte_flow_action_handle *handle,
9613 		      void *data,
9614 		      const struct mlx5_flow_driver_ops *fops,
9615 		      struct rte_flow_error *error)
9616 {
9617 	static const char err_msg[] = "indirect action query unsupported";
9618 
9619 	if (!fops->action_query) {
9620 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9621 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9622 				   NULL, err_msg);
9623 		return -rte_errno;
9624 	}
9625 	return fops->action_query(dev, handle, data, error);
9626 }
9627 
9628 /**
9629  * Create indirect action for reuse in multiple flow rules.
9630  *
9631  * @param dev
9632  *   Pointer to Ethernet device structure.
9633  * @param conf
9634  *   Pointer to indirect action object configuration.
9635  * @param[in] action
9636  *   Action configuration for indirect action object creation.
9637  * @param[out] error
9638  *   Perform verbose error reporting if not NULL. PMDs initialize this
9639  *   structure in case of error only.
9640  * @return
9641  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
9642  */
9643 static struct rte_flow_action_handle *
9644 mlx5_action_handle_create(struct rte_eth_dev *dev,
9645 			  const struct rte_flow_indir_action_conf *conf,
9646 			  const struct rte_flow_action *action,
9647 			  struct rte_flow_error *error)
9648 {
9649 	static const char err_msg[] = "indirect action creation unsupported";
9650 	struct rte_flow_attr attr = { .transfer = 0 };
9651 	const struct mlx5_flow_driver_ops *fops =
9652 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9653 
9654 	if (flow_drv_action_validate(dev, conf, action, fops, error))
9655 		return NULL;
9656 	if (!fops->action_create) {
9657 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9658 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9659 				   NULL, err_msg);
9660 		return NULL;
9661 	}
9662 	return fops->action_create(dev, conf, action, error);
9663 }
9664 
9665 /**
9666  * Updates inplace the indirect action configuration pointed by *handle*
9667  * with the configuration provided as *update* argument.
9668  * The update of the indirect action configuration effects all flow rules
9669  * reusing the action via handle.
9670  *
9671  * @param dev
9672  *   Pointer to Ethernet device structure.
9673  * @param[in] handle
9674  *   Handle for the indirect action to be updated.
9675  * @param[in] update
9676  *   Action specification used to modify the action pointed by handle.
9677  *   *update* could be of same type with the action pointed by the *handle*
9678  *   handle argument, or some other structures like a wrapper, depending on
9679  *   the indirect action type.
9680  * @param[out] error
9681  *   Perform verbose error reporting if not NULL. PMDs initialize this
9682  *   structure in case of error only.
9683  *
9684  * @return
9685  *   0 on success, a negative errno value otherwise and rte_errno is set.
9686  */
9687 static int
9688 mlx5_action_handle_update(struct rte_eth_dev *dev,
9689 		struct rte_flow_action_handle *handle,
9690 		const void *update,
9691 		struct rte_flow_error *error)
9692 {
9693 	struct rte_flow_attr attr = { .transfer = 0 };
9694 	const struct mlx5_flow_driver_ops *fops =
9695 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9696 	int ret;
9697 
9698 	ret = flow_drv_action_validate(dev, NULL,
9699 			(const struct rte_flow_action *)update, fops, error);
9700 	if (ret)
9701 		return ret;
9702 	return flow_drv_action_update(dev, handle, update, fops,
9703 				      error);
9704 }
9705 
9706 /**
9707  * Query the indirect action by handle.
9708  *
9709  * This function allows retrieving action-specific data such as counters.
9710  * Data is gathered by special action which may be present/referenced in
9711  * more than one flow rule definition.
9712  *
9713  * see @RTE_FLOW_ACTION_TYPE_COUNT
9714  *
9715  * @param dev
9716  *   Pointer to Ethernet device structure.
9717  * @param[in] handle
9718  *   Handle for the indirect action to query.
9719  * @param[in, out] data
9720  *   Pointer to storage for the associated query data type.
9721  * @param[out] error
9722  *   Perform verbose error reporting if not NULL. PMDs initialize this
9723  *   structure in case of error only.
9724  *
9725  * @return
9726  *   0 on success, a negative errno value otherwise and rte_errno is set.
9727  */
9728 static int
9729 mlx5_action_handle_query(struct rte_eth_dev *dev,
9730 			 const struct rte_flow_action_handle *handle,
9731 			 void *data,
9732 			 struct rte_flow_error *error)
9733 {
9734 	struct rte_flow_attr attr = { .transfer = 0 };
9735 	const struct mlx5_flow_driver_ops *fops =
9736 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9737 
9738 	return flow_drv_action_query(dev, handle, data, fops, error);
9739 }
9740 
9741 /**
9742  * Destroy all indirect actions (shared RSS).
9743  *
9744  * @param dev
9745  *   Pointer to Ethernet device.
9746  *
9747  * @return
9748  *   0 on success, a negative errno value otherwise and rte_errno is set.
9749  */
9750 int
9751 mlx5_action_handle_flush(struct rte_eth_dev *dev)
9752 {
9753 	struct rte_flow_error error;
9754 	struct mlx5_priv *priv = dev->data->dev_private;
9755 	struct mlx5_shared_action_rss *shared_rss;
9756 	int ret = 0;
9757 	uint32_t idx;
9758 
9759 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
9760 		      priv->rss_shared_actions, idx, shared_rss, next) {
9761 		ret |= mlx5_action_handle_destroy(dev,
9762 		       (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
9763 	}
9764 	return ret;
9765 }
9766 
9767 /**
9768  * Validate existing indirect actions against current device configuration
9769  * and attach them to device resources.
9770  *
9771  * @param dev
9772  *   Pointer to Ethernet device.
9773  *
9774  * @return
9775  *   0 on success, a negative errno value otherwise and rte_errno is set.
9776  */
9777 int
9778 mlx5_action_handle_attach(struct rte_eth_dev *dev)
9779 {
9780 	struct mlx5_priv *priv = dev->data->dev_private;
9781 	int ret = 0;
9782 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9783 
9784 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9785 		const char *message;
9786 		uint32_t queue_idx;
9787 
9788 		ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
9789 					       ind_tbl->queues_n,
9790 					       &message, &queue_idx);
9791 		if (ret != 0) {
9792 			DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
9793 				dev->data->port_id, ind_tbl->queues[queue_idx],
9794 				message);
9795 			break;
9796 		}
9797 	}
9798 	if (ret != 0)
9799 		return ret;
9800 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9801 		ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
9802 		if (ret != 0) {
9803 			DRV_LOG(ERR, "Port %u could not attach "
9804 				"indirection table obj %p",
9805 				dev->data->port_id, (void *)ind_tbl);
9806 			goto error;
9807 		}
9808 	}
9809 
9810 	return 0;
9811 error:
9812 	ind_tbl_last = ind_tbl;
9813 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9814 		if (ind_tbl == ind_tbl_last)
9815 			break;
9816 		if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
9817 			DRV_LOG(CRIT, "Port %u could not detach "
9818 				"indirection table obj %p on rollback",
9819 				dev->data->port_id, (void *)ind_tbl);
9820 	}
9821 	return ret;
9822 }
9823 
9824 /**
9825  * Detach indirect actions of the device from its resources.
9826  *
9827  * @param dev
9828  *   Pointer to Ethernet device.
9829  *
9830  * @return
9831  *   0 on success, a negative errno value otherwise and rte_errno is set.
9832  */
9833 int
9834 mlx5_action_handle_detach(struct rte_eth_dev *dev)
9835 {
9836 	struct mlx5_priv *priv = dev->data->dev_private;
9837 	int ret = 0;
9838 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9839 
9840 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9841 		ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
9842 		if (ret != 0) {
9843 			DRV_LOG(ERR, "Port %u could not detach "
9844 				"indirection table obj %p",
9845 				dev->data->port_id, (void *)ind_tbl);
9846 			goto error;
9847 		}
9848 	}
9849 	return 0;
9850 error:
9851 	ind_tbl_last = ind_tbl;
9852 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9853 		if (ind_tbl == ind_tbl_last)
9854 			break;
9855 		if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
9856 			DRV_LOG(CRIT, "Port %u could not attach "
9857 				"indirection table obj %p on rollback",
9858 				dev->data->port_id, (void *)ind_tbl);
9859 	}
9860 	return ret;
9861 }
9862 
9863 #ifndef HAVE_MLX5DV_DR
9864 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
9865 #else
9866 #define MLX5_DOMAIN_SYNC_FLOW \
9867 	(MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
9868 #endif
9869 
9870 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
9871 {
9872 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
9873 	const struct mlx5_flow_driver_ops *fops;
9874 	int ret;
9875 	struct rte_flow_attr attr = { .transfer = 0 };
9876 
9877 	fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9878 	ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
9879 	if (ret > 0)
9880 		ret = -ret;
9881 	return ret;
9882 }
9883 
9884 const struct mlx5_flow_tunnel *
9885 mlx5_get_tof(const struct rte_flow_item *item,
9886 	     const struct rte_flow_action *action,
9887 	     enum mlx5_tof_rule_type *rule_type)
9888 {
9889 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
9890 		if (item->type == (typeof(item->type))
9891 				  MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
9892 			*rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
9893 			return flow_items_to_tunnel(item);
9894 		}
9895 	}
9896 	for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
9897 		if (action->type == (typeof(action->type))
9898 				    MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
9899 			*rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
9900 			return flow_actions_to_tunnel(action);
9901 		}
9902 	}
9903 	return NULL;
9904 }
9905 
9906 /**
9907  * tunnel offload functionality is defined for DV environment only
9908  */
9909 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9910 __extension__
9911 union tunnel_offload_mark {
9912 	uint32_t val;
9913 	struct {
9914 		uint32_t app_reserve:8;
9915 		uint32_t table_id:15;
9916 		uint32_t transfer:1;
9917 		uint32_t _unused_:8;
9918 	};
9919 };
9920 
9921 static bool
9922 mlx5_access_tunnel_offload_db
9923 	(struct rte_eth_dev *dev,
9924 	 bool (*match)(struct rte_eth_dev *,
9925 		       struct mlx5_flow_tunnel *, const void *),
9926 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
9927 	 void (*miss)(struct rte_eth_dev *, void *),
9928 	 void *ctx, bool lock_op);
9929 
9930 static int
9931 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
9932 			     struct rte_flow *flow,
9933 			     const struct rte_flow_attr *attr,
9934 			     const struct rte_flow_action *app_actions,
9935 			     uint32_t flow_idx,
9936 			     const struct mlx5_flow_tunnel *tunnel,
9937 			     struct tunnel_default_miss_ctx *ctx,
9938 			     struct rte_flow_error *error)
9939 {
9940 	struct mlx5_priv *priv = dev->data->dev_private;
9941 	struct mlx5_flow *dev_flow;
9942 	struct rte_flow_attr miss_attr = *attr;
9943 	const struct rte_flow_item miss_items[2] = {
9944 		{
9945 			.type = RTE_FLOW_ITEM_TYPE_ETH,
9946 			.spec = NULL,
9947 			.last = NULL,
9948 			.mask = NULL
9949 		},
9950 		{
9951 			.type = RTE_FLOW_ITEM_TYPE_END,
9952 			.spec = NULL,
9953 			.last = NULL,
9954 			.mask = NULL
9955 		}
9956 	};
9957 	union tunnel_offload_mark mark_id;
9958 	struct rte_flow_action_mark miss_mark;
9959 	struct rte_flow_action miss_actions[3] = {
9960 		[0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
9961 		[2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
9962 	};
9963 	const struct rte_flow_action_jump *jump_data;
9964 	uint32_t i, flow_table = 0; /* prevent compilation warning */
9965 	struct flow_grp_info grp_info = {
9966 		.external = 1,
9967 		.transfer = attr->transfer,
9968 		.fdb_def_rule = !!priv->fdb_def_rule,
9969 		.std_tbl_fix = 0,
9970 	};
9971 	int ret;
9972 
9973 	if (!attr->transfer) {
9974 		uint32_t q_size;
9975 
9976 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
9977 		q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
9978 		ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
9979 					 0, SOCKET_ID_ANY);
9980 		if (!ctx->queue)
9981 			return rte_flow_error_set
9982 				(error, ENOMEM,
9983 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
9984 				NULL, "invalid default miss RSS");
9985 		ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
9986 		ctx->action_rss.level = 0,
9987 		ctx->action_rss.types = priv->rss_conf.rss_hf,
9988 		ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
9989 		ctx->action_rss.queue_num = priv->reta_idx_n,
9990 		ctx->action_rss.key = priv->rss_conf.rss_key,
9991 		ctx->action_rss.queue = ctx->queue;
9992 		if (!priv->reta_idx_n || !priv->rxqs_n)
9993 			return rte_flow_error_set
9994 				(error, EINVAL,
9995 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
9996 				NULL, "invalid port configuration");
9997 		if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
9998 			ctx->action_rss.types = 0;
9999 		for (i = 0; i != priv->reta_idx_n; ++i)
10000 			ctx->queue[i] = (*priv->reta_idx)[i];
10001 	} else {
10002 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
10003 		ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
10004 	}
10005 	miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
10006 	for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
10007 	jump_data = app_actions->conf;
10008 	miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
10009 	miss_attr.group = jump_data->group;
10010 	ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
10011 				       &flow_table, &grp_info, error);
10012 	if (ret)
10013 		return rte_flow_error_set(error, EINVAL,
10014 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10015 					  NULL, "invalid tunnel id");
10016 	mark_id.app_reserve = 0;
10017 	mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
10018 	mark_id.transfer = !!attr->transfer;
10019 	mark_id._unused_ = 0;
10020 	miss_mark.id = mark_id.val;
10021 	dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
10022 				    miss_items, miss_actions, flow_idx, error);
10023 	if (!dev_flow)
10024 		return -rte_errno;
10025 	dev_flow->flow = flow;
10026 	dev_flow->external = true;
10027 	dev_flow->tunnel = tunnel;
10028 	dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
10029 	/* Subflow object was created, we must include one in the list. */
10030 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
10031 		      dev_flow->handle, next);
10032 	DRV_LOG(DEBUG,
10033 		"port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
10034 		dev->data->port_id, tunnel->app_tunnel.type,
10035 		tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
10036 	ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
10037 				  miss_actions, error);
10038 	if (!ret)
10039 		ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
10040 						  error);
10041 
10042 	return ret;
10043 }
10044 
10045 static const struct mlx5_flow_tbl_data_entry  *
10046 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
10047 {
10048 	struct mlx5_priv *priv = dev->data->dev_private;
10049 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10050 	struct mlx5_list_entry *he;
10051 	union tunnel_offload_mark mbits = { .val = mark };
10052 	union mlx5_flow_tbl_key table_key = {
10053 		{
10054 			.level = tunnel_id_to_flow_tbl(mbits.table_id),
10055 			.id = 0,
10056 			.reserved = 0,
10057 			.dummy = 0,
10058 			.is_fdb = !!mbits.transfer,
10059 			.is_egress = 0,
10060 		}
10061 	};
10062 	struct mlx5_flow_cb_ctx ctx = {
10063 		.data = &table_key.v64,
10064 	};
10065 
10066 	he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
10067 	return he ?
10068 	       container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
10069 }
10070 
10071 static void
10072 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
10073 				   struct mlx5_list_entry *entry)
10074 {
10075 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10076 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10077 
10078 	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10079 			tunnel_flow_tbl_to_id(tte->flow_table));
10080 	mlx5_free(tte);
10081 }
10082 
10083 static int
10084 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
10085 				  struct mlx5_list_entry *entry, void *cb_ctx)
10086 {
10087 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10088 	union tunnel_tbl_key tbl = {
10089 		.val = *(uint64_t *)(ctx->data),
10090 	};
10091 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10092 
10093 	return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
10094 }
10095 
10096 static struct mlx5_list_entry *
10097 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
10098 {
10099 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
10100 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10101 	struct tunnel_tbl_entry *tte;
10102 	union tunnel_tbl_key tbl = {
10103 		.val = *(uint64_t *)(ctx->data),
10104 	};
10105 
10106 	tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
10107 			  sizeof(*tte), 0,
10108 			  SOCKET_ID_ANY);
10109 	if (!tte)
10110 		goto err;
10111 	mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10112 			  &tte->flow_table);
10113 	if (tte->flow_table >= MLX5_MAX_TABLES) {
10114 		DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
10115 			tte->flow_table);
10116 		mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10117 				tte->flow_table);
10118 		goto err;
10119 	} else if (!tte->flow_table) {
10120 		goto err;
10121 	}
10122 	tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
10123 	tte->tunnel_id = tbl.tunnel_id;
10124 	tte->group = tbl.group;
10125 	return &tte->hash;
10126 err:
10127 	if (tte)
10128 		mlx5_free(tte);
10129 	return NULL;
10130 }
10131 
10132 static struct mlx5_list_entry *
10133 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
10134 				  struct mlx5_list_entry *oentry,
10135 				  void *cb_ctx __rte_unused)
10136 {
10137 	struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
10138 						   0, SOCKET_ID_ANY);
10139 
10140 	if (!tte)
10141 		return NULL;
10142 	memcpy(tte, oentry, sizeof(*tte));
10143 	return &tte->hash;
10144 }
10145 
10146 static void
10147 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
10148 				       struct mlx5_list_entry *entry)
10149 {
10150 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10151 
10152 	mlx5_free(tte);
10153 }
10154 
10155 static uint32_t
10156 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
10157 				const struct mlx5_flow_tunnel *tunnel,
10158 				uint32_t group, uint32_t *table,
10159 				struct rte_flow_error *error)
10160 {
10161 	struct mlx5_list_entry *he;
10162 	struct tunnel_tbl_entry *tte;
10163 	union tunnel_tbl_key key = {
10164 		.tunnel_id = tunnel ? tunnel->tunnel_id : 0,
10165 		.group = group
10166 	};
10167 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10168 	struct mlx5_hlist *group_hash;
10169 	struct mlx5_flow_cb_ctx ctx = {
10170 		.data = &key.val,
10171 	};
10172 
10173 	group_hash = tunnel ? tunnel->groups : thub->groups;
10174 	he = mlx5_hlist_register(group_hash, key.val, &ctx);
10175 	if (!he)
10176 		return rte_flow_error_set(error, EINVAL,
10177 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
10178 					  NULL,
10179 					  "tunnel group index not supported");
10180 	tte = container_of(he, typeof(*tte), hash);
10181 	*table = tte->flow_table;
10182 	DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
10183 		dev->data->port_id, key.tunnel_id, group, *table);
10184 	return 0;
10185 }
10186 
10187 static void
10188 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
10189 		      struct mlx5_flow_tunnel *tunnel)
10190 {
10191 	struct mlx5_priv *priv = dev->data->dev_private;
10192 	struct mlx5_indexed_pool *ipool;
10193 
10194 	DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
10195 		dev->data->port_id, tunnel->tunnel_id);
10196 	LIST_REMOVE(tunnel, chain);
10197 	mlx5_hlist_destroy(tunnel->groups);
10198 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10199 	mlx5_ipool_free(ipool, tunnel->tunnel_id);
10200 }
10201 
10202 static bool
10203 mlx5_access_tunnel_offload_db
10204 	(struct rte_eth_dev *dev,
10205 	 bool (*match)(struct rte_eth_dev *,
10206 		       struct mlx5_flow_tunnel *, const void *),
10207 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
10208 	 void (*miss)(struct rte_eth_dev *, void *),
10209 	 void *ctx, bool lock_op)
10210 {
10211 	bool verdict = false;
10212 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10213 	struct mlx5_flow_tunnel *tunnel;
10214 
10215 	rte_spinlock_lock(&thub->sl);
10216 	LIST_FOREACH(tunnel, &thub->tunnels, chain) {
10217 		verdict = match(dev, tunnel, (const void *)ctx);
10218 		if (verdict)
10219 			break;
10220 	}
10221 	if (!lock_op)
10222 		rte_spinlock_unlock(&thub->sl);
10223 	if (verdict && hit)
10224 		hit(dev, tunnel, ctx);
10225 	if (!verdict && miss)
10226 		miss(dev, ctx);
10227 	if (lock_op)
10228 		rte_spinlock_unlock(&thub->sl);
10229 
10230 	return verdict;
10231 }
10232 
10233 struct tunnel_db_find_tunnel_id_ctx {
10234 	uint32_t tunnel_id;
10235 	struct mlx5_flow_tunnel *tunnel;
10236 };
10237 
10238 static bool
10239 find_tunnel_id_match(struct rte_eth_dev *dev,
10240 		     struct mlx5_flow_tunnel *tunnel, const void *x)
10241 {
10242 	const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10243 
10244 	RTE_SET_USED(dev);
10245 	return tunnel->tunnel_id == ctx->tunnel_id;
10246 }
10247 
10248 static void
10249 find_tunnel_id_hit(struct rte_eth_dev *dev,
10250 		   struct mlx5_flow_tunnel *tunnel, void *x)
10251 {
10252 	struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10253 	RTE_SET_USED(dev);
10254 	ctx->tunnel = tunnel;
10255 }
10256 
10257 static struct mlx5_flow_tunnel *
10258 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
10259 {
10260 	struct tunnel_db_find_tunnel_id_ctx ctx = {
10261 		.tunnel_id = id,
10262 	};
10263 
10264 	mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
10265 				      find_tunnel_id_hit, NULL, &ctx, true);
10266 
10267 	return ctx.tunnel;
10268 }
10269 
10270 static struct mlx5_flow_tunnel *
10271 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
10272 			  const struct rte_flow_tunnel *app_tunnel)
10273 {
10274 	struct mlx5_priv *priv = dev->data->dev_private;
10275 	struct mlx5_indexed_pool *ipool;
10276 	struct mlx5_flow_tunnel *tunnel;
10277 	uint32_t id;
10278 
10279 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10280 	tunnel = mlx5_ipool_zmalloc(ipool, &id);
10281 	if (!tunnel)
10282 		return NULL;
10283 	if (id >= MLX5_MAX_TUNNELS) {
10284 		mlx5_ipool_free(ipool, id);
10285 		DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
10286 		return NULL;
10287 	}
10288 	tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
10289 					   priv->sh,
10290 					   mlx5_flow_tunnel_grp2tbl_create_cb,
10291 					   mlx5_flow_tunnel_grp2tbl_match_cb,
10292 					   mlx5_flow_tunnel_grp2tbl_remove_cb,
10293 					   mlx5_flow_tunnel_grp2tbl_clone_cb,
10294 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10295 	if (!tunnel->groups) {
10296 		mlx5_ipool_free(ipool, id);
10297 		return NULL;
10298 	}
10299 	/* initiate new PMD tunnel */
10300 	memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
10301 	tunnel->tunnel_id = id;
10302 	tunnel->action.type = (typeof(tunnel->action.type))
10303 			      MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
10304 	tunnel->action.conf = tunnel;
10305 	tunnel->item.type = (typeof(tunnel->item.type))
10306 			    MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
10307 	tunnel->item.spec = tunnel;
10308 	tunnel->item.last = NULL;
10309 	tunnel->item.mask = NULL;
10310 
10311 	DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
10312 		dev->data->port_id, tunnel->tunnel_id);
10313 
10314 	return tunnel;
10315 }
10316 
10317 struct tunnel_db_get_tunnel_ctx {
10318 	const struct rte_flow_tunnel *app_tunnel;
10319 	struct mlx5_flow_tunnel *tunnel;
10320 };
10321 
10322 static bool get_tunnel_match(struct rte_eth_dev *dev,
10323 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10324 {
10325 	const struct tunnel_db_get_tunnel_ctx *ctx = x;
10326 
10327 	RTE_SET_USED(dev);
10328 	return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
10329 		       sizeof(*ctx->app_tunnel));
10330 }
10331 
10332 static void get_tunnel_hit(struct rte_eth_dev *dev,
10333 			   struct mlx5_flow_tunnel *tunnel, void *x)
10334 {
10335 	/* called under tunnel spinlock protection */
10336 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10337 
10338 	RTE_SET_USED(dev);
10339 	tunnel->refctn++;
10340 	ctx->tunnel = tunnel;
10341 }
10342 
10343 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
10344 {
10345 	/* called under tunnel spinlock protection */
10346 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10347 	struct tunnel_db_get_tunnel_ctx *ctx = x;
10348 
10349 	rte_spinlock_unlock(&thub->sl);
10350 	ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
10351 	rte_spinlock_lock(&thub->sl);
10352 	if (ctx->tunnel) {
10353 		ctx->tunnel->refctn = 1;
10354 		LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
10355 	}
10356 }
10357 
10358 
10359 static int
10360 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
10361 		     const struct rte_flow_tunnel *app_tunnel,
10362 		     struct mlx5_flow_tunnel **tunnel)
10363 {
10364 	struct tunnel_db_get_tunnel_ctx ctx = {
10365 		.app_tunnel = app_tunnel,
10366 	};
10367 
10368 	mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
10369 				      get_tunnel_miss, &ctx, true);
10370 	*tunnel = ctx.tunnel;
10371 	return ctx.tunnel ? 0 : -ENOMEM;
10372 }
10373 
10374 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
10375 {
10376 	struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
10377 
10378 	if (!thub)
10379 		return;
10380 	if (!LIST_EMPTY(&thub->tunnels))
10381 		DRV_LOG(WARNING, "port %u tunnels present", port_id);
10382 	mlx5_hlist_destroy(thub->groups);
10383 	mlx5_free(thub);
10384 }
10385 
10386 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
10387 {
10388 	int err;
10389 	struct mlx5_flow_tunnel_hub *thub;
10390 
10391 	thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
10392 			   0, SOCKET_ID_ANY);
10393 	if (!thub)
10394 		return -ENOMEM;
10395 	LIST_INIT(&thub->tunnels);
10396 	rte_spinlock_init(&thub->sl);
10397 	thub->groups = mlx5_hlist_create("flow groups", 64,
10398 					 false, true, sh,
10399 					 mlx5_flow_tunnel_grp2tbl_create_cb,
10400 					 mlx5_flow_tunnel_grp2tbl_match_cb,
10401 					 mlx5_flow_tunnel_grp2tbl_remove_cb,
10402 					 mlx5_flow_tunnel_grp2tbl_clone_cb,
10403 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10404 	if (!thub->groups) {
10405 		err = -rte_errno;
10406 		goto err;
10407 	}
10408 	sh->tunnel_hub = thub;
10409 
10410 	return 0;
10411 
10412 err:
10413 	if (thub->groups)
10414 		mlx5_hlist_destroy(thub->groups);
10415 	if (thub)
10416 		mlx5_free(thub);
10417 	return err;
10418 }
10419 
10420 static inline int
10421 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
10422 			  struct rte_flow_tunnel *tunnel,
10423 			  struct rte_flow_error *error)
10424 {
10425 	struct mlx5_priv *priv = dev->data->dev_private;
10426 
10427 	if (!priv->sh->config.dv_flow_en)
10428 		return rte_flow_error_set(error, ENOTSUP,
10429 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10430 					  "flow DV interface is off");
10431 	if (!is_tunnel_offload_active(dev))
10432 		return rte_flow_error_set(error, ENOTSUP,
10433 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10434 					  "tunnel offload was not activated");
10435 	if (!tunnel)
10436 		return rte_flow_error_set(error, EINVAL,
10437 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10438 					  "no application tunnel");
10439 	switch (tunnel->type) {
10440 	default:
10441 		return rte_flow_error_set(error, EINVAL,
10442 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10443 					  "unsupported tunnel type");
10444 	case RTE_FLOW_ITEM_TYPE_VXLAN:
10445 	case RTE_FLOW_ITEM_TYPE_GRE:
10446 	case RTE_FLOW_ITEM_TYPE_NVGRE:
10447 	case RTE_FLOW_ITEM_TYPE_GENEVE:
10448 		break;
10449 	}
10450 	return 0;
10451 }
10452 
10453 static int
10454 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
10455 		    struct rte_flow_tunnel *app_tunnel,
10456 		    struct rte_flow_action **actions,
10457 		    uint32_t *num_of_actions,
10458 		    struct rte_flow_error *error)
10459 {
10460 	struct mlx5_flow_tunnel *tunnel;
10461 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10462 
10463 	if (ret)
10464 		return ret;
10465 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10466 	if (ret < 0) {
10467 		return rte_flow_error_set(error, ret,
10468 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10469 					  "failed to initialize pmd tunnel");
10470 	}
10471 	*actions = &tunnel->action;
10472 	*num_of_actions = 1;
10473 	return 0;
10474 }
10475 
10476 static int
10477 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
10478 		       struct rte_flow_tunnel *app_tunnel,
10479 		       struct rte_flow_item **items,
10480 		       uint32_t *num_of_items,
10481 		       struct rte_flow_error *error)
10482 {
10483 	struct mlx5_flow_tunnel *tunnel;
10484 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10485 
10486 	if (ret)
10487 		return ret;
10488 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10489 	if (ret < 0) {
10490 		return rte_flow_error_set(error, ret,
10491 					  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10492 					  "failed to initialize pmd tunnel");
10493 	}
10494 	*items = &tunnel->item;
10495 	*num_of_items = 1;
10496 	return 0;
10497 }
10498 
10499 struct tunnel_db_element_release_ctx {
10500 	struct rte_flow_item *items;
10501 	struct rte_flow_action *actions;
10502 	uint32_t num_elements;
10503 	struct rte_flow_error *error;
10504 	int ret;
10505 };
10506 
10507 static bool
10508 tunnel_element_release_match(struct rte_eth_dev *dev,
10509 			     struct mlx5_flow_tunnel *tunnel, const void *x)
10510 {
10511 	const struct tunnel_db_element_release_ctx *ctx = x;
10512 
10513 	RTE_SET_USED(dev);
10514 	if (ctx->num_elements != 1)
10515 		return false;
10516 	else if (ctx->items)
10517 		return ctx->items == &tunnel->item;
10518 	else if (ctx->actions)
10519 		return ctx->actions == &tunnel->action;
10520 
10521 	return false;
10522 }
10523 
10524 static void
10525 tunnel_element_release_hit(struct rte_eth_dev *dev,
10526 			   struct mlx5_flow_tunnel *tunnel, void *x)
10527 {
10528 	struct tunnel_db_element_release_ctx *ctx = x;
10529 	ctx->ret = 0;
10530 	if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
10531 		mlx5_flow_tunnel_free(dev, tunnel);
10532 }
10533 
10534 static void
10535 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
10536 {
10537 	struct tunnel_db_element_release_ctx *ctx = x;
10538 	RTE_SET_USED(dev);
10539 	ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
10540 				      RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10541 				      "invalid argument");
10542 }
10543 
10544 static int
10545 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
10546 		       struct rte_flow_item *pmd_items,
10547 		       uint32_t num_items, struct rte_flow_error *err)
10548 {
10549 	struct tunnel_db_element_release_ctx ctx = {
10550 		.items = pmd_items,
10551 		.actions = NULL,
10552 		.num_elements = num_items,
10553 		.error = err,
10554 	};
10555 
10556 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10557 				      tunnel_element_release_hit,
10558 				      tunnel_element_release_miss, &ctx, false);
10559 
10560 	return ctx.ret;
10561 }
10562 
10563 static int
10564 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
10565 			 struct rte_flow_action *pmd_actions,
10566 			 uint32_t num_actions, struct rte_flow_error *err)
10567 {
10568 	struct tunnel_db_element_release_ctx ctx = {
10569 		.items = NULL,
10570 		.actions = pmd_actions,
10571 		.num_elements = num_actions,
10572 		.error = err,
10573 	};
10574 
10575 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10576 				      tunnel_element_release_hit,
10577 				      tunnel_element_release_miss, &ctx, false);
10578 
10579 	return ctx.ret;
10580 }
10581 
10582 static int
10583 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
10584 				  struct rte_mbuf *m,
10585 				  struct rte_flow_restore_info *info,
10586 				  struct rte_flow_error *err)
10587 {
10588 	uint64_t ol_flags = m->ol_flags;
10589 	const struct mlx5_flow_tbl_data_entry *tble;
10590 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
10591 
10592 	if (!is_tunnel_offload_active(dev)) {
10593 		info->flags = 0;
10594 		return 0;
10595 	}
10596 
10597 	if ((ol_flags & mask) != mask)
10598 		goto err;
10599 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
10600 	if (!tble) {
10601 		DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
10602 			dev->data->port_id, m->hash.fdir.hi);
10603 		goto err;
10604 	}
10605 	MLX5_ASSERT(tble->tunnel);
10606 	memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
10607 	info->group_id = tble->group_id;
10608 	info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
10609 		      RTE_FLOW_RESTORE_INFO_GROUP_ID |
10610 		      RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
10611 
10612 	return 0;
10613 
10614 err:
10615 	return rte_flow_error_set(err, EINVAL,
10616 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10617 				  "failed to get restore info");
10618 }
10619 
10620 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
10621 static int
10622 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
10623 			   __rte_unused struct rte_flow_tunnel *app_tunnel,
10624 			   __rte_unused struct rte_flow_action **actions,
10625 			   __rte_unused uint32_t *num_of_actions,
10626 			   __rte_unused struct rte_flow_error *error)
10627 {
10628 	return -ENOTSUP;
10629 }
10630 
10631 static int
10632 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
10633 		       __rte_unused struct rte_flow_tunnel *app_tunnel,
10634 		       __rte_unused struct rte_flow_item **items,
10635 		       __rte_unused uint32_t *num_of_items,
10636 		       __rte_unused struct rte_flow_error *error)
10637 {
10638 	return -ENOTSUP;
10639 }
10640 
10641 static int
10642 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
10643 			      __rte_unused struct rte_flow_item *pmd_items,
10644 			      __rte_unused uint32_t num_items,
10645 			      __rte_unused struct rte_flow_error *err)
10646 {
10647 	return -ENOTSUP;
10648 }
10649 
10650 static int
10651 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
10652 				__rte_unused struct rte_flow_action *pmd_action,
10653 				__rte_unused uint32_t num_actions,
10654 				__rte_unused struct rte_flow_error *err)
10655 {
10656 	return -ENOTSUP;
10657 }
10658 
10659 static int
10660 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
10661 				  __rte_unused struct rte_mbuf *m,
10662 				  __rte_unused struct rte_flow_restore_info *i,
10663 				  __rte_unused struct rte_flow_error *err)
10664 {
10665 	return -ENOTSUP;
10666 }
10667 
10668 static int
10669 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
10670 			     __rte_unused struct rte_flow *flow,
10671 			     __rte_unused const struct rte_flow_attr *attr,
10672 			     __rte_unused const struct rte_flow_action *actions,
10673 			     __rte_unused uint32_t flow_idx,
10674 			     __rte_unused const struct mlx5_flow_tunnel *tunnel,
10675 			     __rte_unused struct tunnel_default_miss_ctx *ctx,
10676 			     __rte_unused struct rte_flow_error *error)
10677 {
10678 	return -ENOTSUP;
10679 }
10680 
10681 static struct mlx5_flow_tunnel *
10682 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
10683 		    __rte_unused uint32_t id)
10684 {
10685 	return NULL;
10686 }
10687 
10688 static void
10689 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
10690 		      __rte_unused struct mlx5_flow_tunnel *tunnel)
10691 {
10692 }
10693 
10694 static uint32_t
10695 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
10696 				__rte_unused const struct mlx5_flow_tunnel *t,
10697 				__rte_unused uint32_t group,
10698 				__rte_unused uint32_t *table,
10699 				struct rte_flow_error *error)
10700 {
10701 	return rte_flow_error_set(error, ENOTSUP,
10702 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10703 				  "tunnel offload requires DV support");
10704 }
10705 
10706 void
10707 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
10708 			__rte_unused  uint16_t port_id)
10709 {
10710 }
10711 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
10712 
10713 /* Flex flow item API */
10714 static struct rte_flow_item_flex_handle *
10715 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
10716 			   const struct rte_flow_item_flex_conf *conf,
10717 			   struct rte_flow_error *error)
10718 {
10719 	static const char err_msg[] = "flex item creation unsupported";
10720 	struct mlx5_priv *priv = dev->data->dev_private;
10721 	struct rte_flow_attr attr = { .transfer = 0 };
10722 	const struct mlx5_flow_driver_ops *fops =
10723 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10724 
10725 	if (!priv->pci_dev) {
10726 		rte_flow_error_set(error, ENOTSUP,
10727 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10728 				   "create flex item on PF only");
10729 		return NULL;
10730 	}
10731 	switch (priv->pci_dev->id.device_id) {
10732 	case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF:
10733 	case PCI_DEVICE_ID_MELLANOX_CONNECTX7BF:
10734 		break;
10735 	default:
10736 		rte_flow_error_set(error, ENOTSUP,
10737 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10738 				   "flex item available on BlueField ports only");
10739 		return NULL;
10740 	}
10741 	if (!fops->item_create) {
10742 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10743 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10744 				   NULL, err_msg);
10745 		return NULL;
10746 	}
10747 	return fops->item_create(dev, conf, error);
10748 }
10749 
10750 static int
10751 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
10752 			    const struct rte_flow_item_flex_handle *handle,
10753 			    struct rte_flow_error *error)
10754 {
10755 	static const char err_msg[] = "flex item release unsupported";
10756 	struct rte_flow_attr attr = { .transfer = 0 };
10757 	const struct mlx5_flow_driver_ops *fops =
10758 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10759 
10760 	if (!fops->item_release) {
10761 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10762 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10763 				   NULL, err_msg);
10764 		return -rte_errno;
10765 	}
10766 	return fops->item_release(dev, handle, error);
10767 }
10768 
10769 static void
10770 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
10771 {
10772 	int ret;
10773 	struct rte_flow_error error;
10774 
10775 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10776 		char *item_name;
10777 		ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
10778 				    sizeof(item_name),
10779 				    (void *)(uintptr_t)item->type, &error);
10780 		if (ret > 0)
10781 			printf("%s ", item_name);
10782 		else
10783 			printf("%d\n", (int)item->type);
10784 	}
10785 	printf("END\n");
10786 }
10787 
10788 static int
10789 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
10790 {
10791 	const struct rte_flow_item_udp *spec = udp_item->spec;
10792 	const struct rte_flow_item_udp *mask = udp_item->mask;
10793 	uint16_t udp_dport = 0;
10794 
10795 	if (spec != NULL) {
10796 		if (!mask)
10797 			mask = &rte_flow_item_udp_mask;
10798 		udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
10799 				mask->hdr.dst_port);
10800 	}
10801 	return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
10802 }
10803 
10804 static const struct mlx5_flow_expand_node *
10805 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
10806 		unsigned int item_idx,
10807 		const struct mlx5_flow_expand_node graph[],
10808 		const struct mlx5_flow_expand_node *node)
10809 {
10810 	const struct rte_flow_item *item = pattern + item_idx, *prev_item;
10811 
10812 	if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
10813 			node != NULL &&
10814 			node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
10815 		/*
10816 		 * The expansion node is VXLAN and it is also the last
10817 		 * expandable item in the pattern, so need to continue
10818 		 * expansion of the inner tunnel.
10819 		 */
10820 		MLX5_ASSERT(item_idx > 0);
10821 		prev_item = pattern + item_idx - 1;
10822 		MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
10823 		if (mlx5_flow_is_std_vxlan_port(prev_item))
10824 			return &graph[MLX5_EXPANSION_STD_VXLAN];
10825 		return &graph[MLX5_EXPANSION_L3_VXLAN];
10826 	}
10827 	return node;
10828 }
10829 
10830 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
10831 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
10832 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
10833 };
10834 
10835 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
10836 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
10837 	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
10838 	{ 9, 10, 11 }, { 12, 13, 14 },
10839 };
10840 
10841 /**
10842  * Discover the number of available flow priorities.
10843  *
10844  * @param dev
10845  *   Ethernet device.
10846  *
10847  * @return
10848  *   On success, number of available flow priorities.
10849  *   On failure, a negative errno-style code and rte_errno is set.
10850  */
10851 int
10852 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
10853 {
10854 	static const uint16_t vprio[] = {8, 16};
10855 	const struct mlx5_priv *priv = dev->data->dev_private;
10856 	const struct mlx5_flow_driver_ops *fops;
10857 	enum mlx5_flow_drv_type type;
10858 	int ret;
10859 
10860 	type = mlx5_flow_os_get_type();
10861 	if (type == MLX5_FLOW_TYPE_MAX) {
10862 		type = MLX5_FLOW_TYPE_VERBS;
10863 		if (priv->sh->cdev->config.devx && priv->sh->config.dv_flow_en)
10864 			type = MLX5_FLOW_TYPE_DV;
10865 	}
10866 	fops = flow_get_drv_ops(type);
10867 	if (fops->discover_priorities == NULL) {
10868 		DRV_LOG(ERR, "Priority discovery not supported");
10869 		rte_errno = ENOTSUP;
10870 		return -rte_errno;
10871 	}
10872 	ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
10873 	if (ret < 0)
10874 		return ret;
10875 	switch (ret) {
10876 	case 8:
10877 		ret = RTE_DIM(priority_map_3);
10878 		break;
10879 	case 16:
10880 		ret = RTE_DIM(priority_map_5);
10881 		break;
10882 	default:
10883 		rte_errno = ENOTSUP;
10884 		DRV_LOG(ERR,
10885 			"port %u maximum priority: %d expected 8/16",
10886 			dev->data->port_id, ret);
10887 		return -rte_errno;
10888 	}
10889 	DRV_LOG(INFO, "port %u supported flow priorities:"
10890 		" 0-%d for ingress or egress root table,"
10891 		" 0-%d for non-root table or transfer root table.",
10892 		dev->data->port_id, ret - 2,
10893 		MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
10894 	return ret;
10895 }
10896 
10897 /**
10898  * Adjust flow priority based on the highest layer and the request priority.
10899  *
10900  * @param[in] dev
10901  *   Pointer to the Ethernet device structure.
10902  * @param[in] priority
10903  *   The rule base priority.
10904  * @param[in] subpriority
10905  *   The priority based on the items.
10906  *
10907  * @return
10908  *   The new priority.
10909  */
10910 uint32_t
10911 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
10912 			  uint32_t subpriority)
10913 {
10914 	uint32_t res = 0;
10915 	struct mlx5_priv *priv = dev->data->dev_private;
10916 
10917 	switch (priv->sh->flow_max_priority) {
10918 	case RTE_DIM(priority_map_3):
10919 		res = priority_map_3[priority][subpriority];
10920 		break;
10921 	case RTE_DIM(priority_map_5):
10922 		res = priority_map_5[priority][subpriority];
10923 		break;
10924 	}
10925 	return  res;
10926 }
10927