xref: /dpdk/drivers/net/mlx5/mlx5_flow.c (revision c56185fc183fc0532d2f03aaf04bbf0989ea91a5)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5 
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11 
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21 
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26 
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35 
36 /*
37  * Shared array for quick translation between port_id and vport mask/values
38  * used for HWS rules.
39  */
40 struct flow_hw_port_info mlx5_flow_hw_port_infos[RTE_MAX_ETHPORTS];
41 
42 /*
43  * A global structure to save the available REG_C_x for tags usage.
44  * The Meter color REG (ASO) and the last available one will be reserved
45  * for PMD internal usage.
46  * Since there is no "port" concept in the driver, it is assumed that the
47  * available tags set will be the minimum intersection.
48  * 3 - in FDB mode / 5 - in legacy mode
49  */
50 uint32_t mlx5_flow_hw_avl_tags_init_cnt;
51 enum modify_reg mlx5_flow_hw_avl_tags[MLX5_FLOW_HW_TAGS_MAX] = {REG_NON};
52 enum modify_reg mlx5_flow_hw_aso_tag;
53 
54 struct tunnel_default_miss_ctx {
55 	uint16_t *queue;
56 	__extension__
57 	union {
58 		struct rte_flow_action_rss action_rss;
59 		struct rte_flow_action_queue miss_queue;
60 		struct rte_flow_action_jump miss_jump;
61 		uint8_t raw[0];
62 	};
63 };
64 
65 static int
66 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
67 			     struct rte_flow *flow,
68 			     const struct rte_flow_attr *attr,
69 			     const struct rte_flow_action *app_actions,
70 			     uint32_t flow_idx,
71 			     const struct mlx5_flow_tunnel *tunnel,
72 			     struct tunnel_default_miss_ctx *ctx,
73 			     struct rte_flow_error *error);
74 static struct mlx5_flow_tunnel *
75 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
76 static void
77 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
78 static uint32_t
79 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
80 				const struct mlx5_flow_tunnel *tunnel,
81 				uint32_t group, uint32_t *table,
82 				struct rte_flow_error *error);
83 
84 /** Device flow drivers. */
85 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
86 
87 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
88 
89 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
90 	[MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
91 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
92 	[MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
93 #endif
94 #ifdef HAVE_MLX5_HWS_SUPPORT
95 	[MLX5_FLOW_TYPE_HW] = &mlx5_flow_hw_drv_ops,
96 #endif
97 	[MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
98 	[MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
99 };
100 
101 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
102 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
103 	(const int []){ \
104 		__VA_ARGS__, 0, \
105 	}
106 
107 /** Node object of input graph for mlx5_flow_expand_rss(). */
108 struct mlx5_flow_expand_node {
109 	const int *const next;
110 	/**<
111 	 * List of next node indexes. Index 0 is interpreted as a terminator.
112 	 */
113 	const enum rte_flow_item_type type;
114 	/**< Pattern item type of current node. */
115 	uint64_t rss_types;
116 	/**<
117 	 * RSS types bit-field associated with this node
118 	 * (see RTE_ETH_RSS_* definitions).
119 	 */
120 	uint64_t node_flags;
121 	/**<
122 	 *  Bit-fields that define how the node is used in the expansion.
123 	 * (see MLX5_EXPANSION_NODE_* definitions).
124 	 */
125 };
126 
127 /** Keep same format with mlx5_flow_expand_rss to share the buffer for expansion. */
128 struct mlx5_flow_expand_sqn {
129 	uint32_t entries; /** Number of entries */
130 	struct {
131 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
132 		uint32_t priority; /**< Priority offset for each expansion. */
133 	} entry[];
134 };
135 
136 /* Optional expand field. The expansion alg will not go deeper. */
137 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
138 
139 /* The node is not added implicitly as expansion to the flow pattern.
140  * If the node type does not match the flow pattern item type, the
141  * expansion alg will go deeper to its next items.
142  * In the current implementation, the list of next nodes indexes can
143  * have up to one node with this flag set and it has to be the last
144  * node index (before the list terminator).
145  */
146 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
147 
148 /** Object returned by mlx5_flow_expand_rss(). */
149 struct mlx5_flow_expand_rss {
150 	uint32_t entries;
151 	/**< Number of entries @p patterns and @p priorities. */
152 	struct {
153 		struct rte_flow_item *pattern; /**< Expanded pattern array. */
154 		uint32_t priority; /**< Priority offset for each expansion. */
155 	} entry[];
156 };
157 
158 static void
159 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
160 
161 static const struct mlx5_flow_expand_node *
162 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
163 		unsigned int item_idx,
164 		const struct mlx5_flow_expand_node graph[],
165 		const struct mlx5_flow_expand_node *node);
166 
167 static __rte_always_inline int
168 mlx5_need_cache_flow(const struct mlx5_priv *priv,
169 		     const struct rte_flow_attr *attr)
170 {
171 	return priv->isolated && priv->sh->config.dv_flow_en == 1 &&
172 		(attr ? !attr->group : true) &&
173 		priv->mode_info.mode == MLX5_FLOW_ENGINE_MODE_STANDBY &&
174 		(!priv->sh->config.dv_esw_en || !priv->sh->config.fdb_def_rule);
175 }
176 
177 static bool
178 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
179 {
180 	switch (item->type) {
181 	case RTE_FLOW_ITEM_TYPE_ETH:
182 	case RTE_FLOW_ITEM_TYPE_VLAN:
183 	case RTE_FLOW_ITEM_TYPE_IPV4:
184 	case RTE_FLOW_ITEM_TYPE_IPV6:
185 	case RTE_FLOW_ITEM_TYPE_UDP:
186 	case RTE_FLOW_ITEM_TYPE_TCP:
187 	case RTE_FLOW_ITEM_TYPE_ESP:
188 	case RTE_FLOW_ITEM_TYPE_ICMP:
189 	case RTE_FLOW_ITEM_TYPE_ICMP6:
190 	case RTE_FLOW_ITEM_TYPE_VXLAN:
191 	case RTE_FLOW_ITEM_TYPE_NVGRE:
192 	case RTE_FLOW_ITEM_TYPE_GRE:
193 	case RTE_FLOW_ITEM_TYPE_GENEVE:
194 	case RTE_FLOW_ITEM_TYPE_MPLS:
195 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
196 	case RTE_FLOW_ITEM_TYPE_GRE_KEY:
197 	case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
198 	case RTE_FLOW_ITEM_TYPE_GTP:
199 		return true;
200 	default:
201 		break;
202 	}
203 	return false;
204 }
205 
206 /**
207  * Network Service Header (NSH) and its next protocol values
208  * are described in RFC-8393.
209  */
210 static enum rte_flow_item_type
211 mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
212 {
213 	enum rte_flow_item_type type;
214 
215 	switch (proto_mask & proto_spec) {
216 	case 0:
217 		type = RTE_FLOW_ITEM_TYPE_VOID;
218 		break;
219 	case RTE_VXLAN_GPE_TYPE_IPV4:
220 		type = RTE_FLOW_ITEM_TYPE_IPV4;
221 		break;
222 	case RTE_VXLAN_GPE_TYPE_IPV6:
223 		type = RTE_VXLAN_GPE_TYPE_IPV6;
224 		break;
225 	case RTE_VXLAN_GPE_TYPE_ETH:
226 		type = RTE_FLOW_ITEM_TYPE_ETH;
227 		break;
228 	default:
229 		type = RTE_FLOW_ITEM_TYPE_END;
230 	}
231 	return type;
232 }
233 
234 static enum rte_flow_item_type
235 mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
236 {
237 	enum rte_flow_item_type type;
238 
239 	switch (proto_mask & proto_spec) {
240 	case 0:
241 		type = RTE_FLOW_ITEM_TYPE_VOID;
242 		break;
243 	case IPPROTO_UDP:
244 		type = RTE_FLOW_ITEM_TYPE_UDP;
245 		break;
246 	case IPPROTO_TCP:
247 		type = RTE_FLOW_ITEM_TYPE_TCP;
248 		break;
249 	case IPPROTO_IPIP:
250 		type = RTE_FLOW_ITEM_TYPE_IPV4;
251 		break;
252 	case IPPROTO_IPV6:
253 		type = RTE_FLOW_ITEM_TYPE_IPV6;
254 		break;
255 	case IPPROTO_ESP:
256 		type = RTE_FLOW_ITEM_TYPE_ESP;
257 		break;
258 	default:
259 		type = RTE_FLOW_ITEM_TYPE_END;
260 	}
261 	return type;
262 }
263 
264 static enum rte_flow_item_type
265 mlx5_ethertype_to_item_type(rte_be16_t type_spec,
266 			    rte_be16_t type_mask, bool is_tunnel)
267 {
268 	enum rte_flow_item_type type;
269 
270 	switch (rte_be_to_cpu_16(type_spec & type_mask)) {
271 	case 0:
272 		type = RTE_FLOW_ITEM_TYPE_VOID;
273 		break;
274 	case RTE_ETHER_TYPE_TEB:
275 		type = is_tunnel ?
276 		       RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END;
277 		break;
278 	case RTE_ETHER_TYPE_VLAN:
279 		type = !is_tunnel ?
280 		       RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END;
281 		break;
282 	case RTE_ETHER_TYPE_IPV4:
283 		type = RTE_FLOW_ITEM_TYPE_IPV4;
284 		break;
285 	case RTE_ETHER_TYPE_IPV6:
286 		type = RTE_FLOW_ITEM_TYPE_IPV6;
287 		break;
288 	default:
289 		type = RTE_FLOW_ITEM_TYPE_END;
290 	}
291 	return type;
292 }
293 
294 static enum rte_flow_item_type
295 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
296 {
297 #define MLX5_XSET_ITEM_MASK_SPEC(type, fld)                              \
298 	do {                                                             \
299 		const void *m = item->mask;                              \
300 		const void *s = item->spec;                              \
301 		mask = m ?                                               \
302 			((const struct rte_flow_item_##type *)m)->fld :  \
303 			rte_flow_item_##type##_mask.fld;                 \
304 		spec = ((const struct rte_flow_item_##type *)s)->fld;    \
305 	} while (0)
306 
307 	enum rte_flow_item_type ret;
308 	uint16_t spec, mask;
309 
310 	if (item == NULL || item->spec == NULL)
311 		return RTE_FLOW_ITEM_TYPE_VOID;
312 	switch (item->type) {
313 	case RTE_FLOW_ITEM_TYPE_ETH:
314 		MLX5_XSET_ITEM_MASK_SPEC(eth, hdr.ether_type);
315 		if (!mask)
316 			return RTE_FLOW_ITEM_TYPE_VOID;
317 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
318 		break;
319 	case RTE_FLOW_ITEM_TYPE_VLAN:
320 		MLX5_XSET_ITEM_MASK_SPEC(vlan, hdr.eth_proto);
321 		if (!mask)
322 			return RTE_FLOW_ITEM_TYPE_VOID;
323 		ret = mlx5_ethertype_to_item_type(spec, mask, false);
324 		break;
325 	case RTE_FLOW_ITEM_TYPE_IPV4:
326 		MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id);
327 		if (!mask)
328 			return RTE_FLOW_ITEM_TYPE_VOID;
329 		ret = mlx5_inet_proto_to_item_type(spec, mask);
330 		break;
331 	case RTE_FLOW_ITEM_TYPE_IPV6:
332 		MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto);
333 		if (!mask)
334 			return RTE_FLOW_ITEM_TYPE_VOID;
335 		ret = mlx5_inet_proto_to_item_type(spec, mask);
336 		break;
337 	case RTE_FLOW_ITEM_TYPE_GENEVE:
338 		MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol);
339 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
340 		break;
341 	case RTE_FLOW_ITEM_TYPE_GRE:
342 		MLX5_XSET_ITEM_MASK_SPEC(gre, protocol);
343 		ret = mlx5_ethertype_to_item_type(spec, mask, true);
344 		break;
345 	case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
346 		MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, hdr.proto);
347 		ret = mlx5_nsh_proto_to_item_type(spec, mask);
348 		break;
349 	default:
350 		ret = RTE_FLOW_ITEM_TYPE_VOID;
351 		break;
352 	}
353 	return ret;
354 #undef MLX5_XSET_ITEM_MASK_SPEC
355 }
356 
357 static const int *
358 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
359 		const int *next_node)
360 {
361 	const struct mlx5_flow_expand_node *node = NULL;
362 	const int *next = next_node;
363 
364 	while (next && *next) {
365 		/*
366 		 * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
367 		 * flag set, because they were not found in the flow pattern.
368 		 */
369 		node = &graph[*next];
370 		if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
371 			break;
372 		next = node->next;
373 	}
374 	return next;
375 }
376 
377 #define MLX5_RSS_EXP_ELT_N 32
378 
379 /**
380  * Expand RSS flows into several possible flows according to the RSS hash
381  * fields requested and the driver capabilities.
382  *
383  * @param[out] buf
384  *   Buffer to store the result expansion.
385  * @param[in] size
386  *   Buffer size in bytes. If 0, @p buf can be NULL.
387  * @param[in] pattern
388  *   User flow pattern.
389  * @param[in] types
390  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
391  * @param[in] graph
392  *   Input graph to expand @p pattern according to @p types.
393  * @param[in] graph_root_index
394  *   Index of root node in @p graph, typically 0.
395  *
396  * @return
397  *   A positive value representing the size of @p buf in bytes regardless of
398  *   @p size on success, a negative errno value otherwise and rte_errno is
399  *   set, the following errors are defined:
400  *
401  *   -E2BIG: graph-depth @p graph is too deep.
402  *   -EINVAL: @p size has not enough space for expanded pattern.
403  */
404 static int
405 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
406 		     const struct rte_flow_item *pattern, uint64_t types,
407 		     const struct mlx5_flow_expand_node graph[],
408 		     int graph_root_index)
409 {
410 	const struct rte_flow_item *item;
411 	const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
412 	const int *next_node;
413 	const int *stack[MLX5_RSS_EXP_ELT_N];
414 	int stack_pos = 0;
415 	struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
416 	unsigned int i, item_idx, last_expand_item_idx = 0;
417 	size_t lsize;
418 	size_t user_pattern_size = 0;
419 	void *addr = NULL;
420 	const struct mlx5_flow_expand_node *next = NULL;
421 	struct rte_flow_item missed_item;
422 	int missed = 0;
423 	int elt = 0;
424 	const struct rte_flow_item *last_expand_item = NULL;
425 
426 	memset(&missed_item, 0, sizeof(missed_item));
427 	lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
428 		MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
429 	if (lsize > size)
430 		return -EINVAL;
431 	buf->entry[0].priority = 0;
432 	buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
433 	buf->entries = 0;
434 	addr = buf->entry[0].pattern;
435 	for (item = pattern, item_idx = 0;
436 			item->type != RTE_FLOW_ITEM_TYPE_END;
437 			item++, item_idx++) {
438 		if (!mlx5_flow_is_rss_expandable_item(item)) {
439 			user_pattern_size += sizeof(*item);
440 			continue;
441 		}
442 		last_expand_item = item;
443 		last_expand_item_idx = item_idx;
444 		i = 0;
445 		while (node->next && node->next[i]) {
446 			next = &graph[node->next[i]];
447 			if (next->type == item->type)
448 				break;
449 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
450 				node = next;
451 				i = 0;
452 			} else {
453 				++i;
454 			}
455 		}
456 		if (next)
457 			node = next;
458 		user_pattern_size += sizeof(*item);
459 	}
460 	user_pattern_size += sizeof(*item); /* Handle END item. */
461 	lsize += user_pattern_size;
462 	if (lsize > size)
463 		return -EINVAL;
464 	/* Copy the user pattern in the first entry of the buffer. */
465 	rte_memcpy(addr, pattern, user_pattern_size);
466 	addr = (void *)(((uintptr_t)addr) + user_pattern_size);
467 	buf->entries = 1;
468 	/* Start expanding. */
469 	memset(flow_items, 0, sizeof(flow_items));
470 	user_pattern_size -= sizeof(*item);
471 	/*
472 	 * Check if the last valid item has spec set, need complete pattern,
473 	 * and the pattern can be used for expansion.
474 	 */
475 	missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
476 	if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
477 		/* Item type END indicates expansion is not required. */
478 		return lsize;
479 	}
480 	if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
481 		next = NULL;
482 		missed = 1;
483 		i = 0;
484 		while (node->next && node->next[i]) {
485 			next = &graph[node->next[i]];
486 			if (next->type == missed_item.type) {
487 				flow_items[0].type = missed_item.type;
488 				flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
489 				break;
490 			}
491 			if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
492 				node = next;
493 				i = 0;
494 			} else {
495 				++i;
496 			}
497 			next = NULL;
498 		}
499 	}
500 	if (next && missed) {
501 		elt = 2; /* missed item + item end. */
502 		node = next;
503 		lsize += elt * sizeof(*item) + user_pattern_size;
504 		if (lsize > size)
505 			return -EINVAL;
506 		if (node->rss_types & types) {
507 			buf->entry[buf->entries].priority = 1;
508 			buf->entry[buf->entries].pattern = addr;
509 			buf->entries++;
510 			rte_memcpy(addr, buf->entry[0].pattern,
511 				   user_pattern_size);
512 			addr = (void *)(((uintptr_t)addr) + user_pattern_size);
513 			rte_memcpy(addr, flow_items, elt * sizeof(*item));
514 			addr = (void *)(((uintptr_t)addr) +
515 					elt * sizeof(*item));
516 		}
517 	} else if (last_expand_item != NULL) {
518 		node = mlx5_flow_expand_rss_adjust_node(pattern,
519 				last_expand_item_idx, graph, node);
520 	}
521 	memset(flow_items, 0, sizeof(flow_items));
522 	next_node = mlx5_flow_expand_rss_skip_explicit(graph,
523 			node->next);
524 	stack[stack_pos] = next_node;
525 	node = next_node ? &graph[*next_node] : NULL;
526 	while (node) {
527 		flow_items[stack_pos].type = node->type;
528 		if (node->rss_types & types) {
529 			size_t n;
530 			/*
531 			 * compute the number of items to copy from the
532 			 * expansion and copy it.
533 			 * When the stack_pos is 0, there are 1 element in it,
534 			 * plus the addition END item.
535 			 */
536 			elt = stack_pos + 2;
537 			flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
538 			lsize += elt * sizeof(*item) + user_pattern_size;
539 			if (lsize > size)
540 				return -EINVAL;
541 			n = elt * sizeof(*item);
542 			MLX5_ASSERT((buf->entries) < MLX5_RSS_EXP_ELT_N);
543 			buf->entry[buf->entries].priority =
544 				stack_pos + 1 + missed;
545 			buf->entry[buf->entries].pattern = addr;
546 			buf->entries++;
547 			rte_memcpy(addr, buf->entry[0].pattern,
548 				   user_pattern_size);
549 			addr = (void *)(((uintptr_t)addr) +
550 					user_pattern_size);
551 			rte_memcpy(addr, &missed_item,
552 				   missed * sizeof(*item));
553 			addr = (void *)(((uintptr_t)addr) +
554 				missed * sizeof(*item));
555 			rte_memcpy(addr, flow_items, n);
556 			addr = (void *)(((uintptr_t)addr) + n);
557 		}
558 		/* Go deeper. */
559 		if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
560 				node->next) {
561 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
562 					node->next);
563 			if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
564 				rte_errno = E2BIG;
565 				return -rte_errno;
566 			}
567 			stack[stack_pos] = next_node;
568 		} else if (*(next_node + 1)) {
569 			/* Follow up with the next possibility. */
570 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
571 					++next_node);
572 		} else if (!stack_pos) {
573 			/*
574 			 * Completing the traverse over the different paths.
575 			 * The next_node is advanced to the terminator.
576 			 */
577 			++next_node;
578 		} else {
579 			/* Move to the next path. */
580 			while (stack_pos) {
581 				next_node = stack[--stack_pos];
582 				next_node++;
583 				if (*next_node)
584 					break;
585 			}
586 			next_node = mlx5_flow_expand_rss_skip_explicit(graph,
587 					next_node);
588 			stack[stack_pos] = next_node;
589 		}
590 		node = next_node && *next_node ? &graph[*next_node] : NULL;
591 	};
592 	return lsize;
593 }
594 
595 /**
596  * Expand SQN flows into several possible flows according to the Tx queue
597  * number
598  *
599  * @param[in] buf
600  *   Buffer to store the result expansion.
601  * @param[in] size
602  *   Buffer size in bytes. If 0, @p buf can be NULL.
603  * @param[in] pattern
604  *   User flow pattern.
605  * @param[in] sq_specs
606  *   Buffer to store sq spec.
607  *
608  * @return
609  *   0 for success and negative value for failure
610  *
611  */
612 static int
613 mlx5_flow_expand_sqn(struct mlx5_flow_expand_sqn *buf, size_t size,
614 		     const struct rte_flow_item *pattern,
615 		     struct mlx5_rte_flow_item_sq *sq_specs)
616 {
617 	const struct rte_flow_item *item;
618 	bool port_representor = false;
619 	size_t user_pattern_size = 0;
620 	struct rte_eth_dev *dev;
621 	struct mlx5_priv *priv;
622 	void *addr = NULL;
623 	uint16_t port_id;
624 	size_t lsize;
625 	int elt = 2;
626 	uint16_t i;
627 
628 	buf->entries = 0;
629 	for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
630 		if (item->type == RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR) {
631 			const struct rte_flow_item_ethdev *pid_v = item->spec;
632 
633 			if (!pid_v)
634 				return 0;
635 			port_id = pid_v->port_id;
636 			port_representor = true;
637 		}
638 		user_pattern_size += sizeof(*item);
639 	}
640 	if (!port_representor)
641 		return 0;
642 	dev = &rte_eth_devices[port_id];
643 	priv = dev->data->dev_private;
644 	buf->entry[0].pattern = (void *)&buf->entry[priv->txqs_n];
645 	lsize = offsetof(struct mlx5_flow_expand_sqn, entry) +
646 		sizeof(buf->entry[0]) * priv->txqs_n;
647 	if (lsize + (user_pattern_size + sizeof(struct rte_flow_item) * elt) * priv->txqs_n > size)
648 		return -EINVAL;
649 	addr = buf->entry[0].pattern;
650 	for (i = 0; i != priv->txqs_n; ++i) {
651 		struct rte_flow_item pattern_add[] = {
652 			{
653 				.type = (enum rte_flow_item_type)
654 					MLX5_RTE_FLOW_ITEM_TYPE_SQ,
655 				.spec = &sq_specs[i],
656 			},
657 			{
658 				.type = RTE_FLOW_ITEM_TYPE_END,
659 			},
660 		};
661 		struct mlx5_txq_ctrl *txq = mlx5_txq_get(dev, i);
662 
663 		if (txq == NULL)
664 			return -EINVAL;
665 		buf->entry[i].pattern = addr;
666 		sq_specs[i].queue = mlx5_txq_get_sqn(txq);
667 		mlx5_txq_release(dev, i);
668 		rte_memcpy(addr, pattern, user_pattern_size);
669 		addr = (void *)(((uintptr_t)addr) + user_pattern_size);
670 		rte_memcpy(addr, pattern_add, sizeof(struct rte_flow_item) * elt);
671 		addr = (void *)(((uintptr_t)addr) + sizeof(struct rte_flow_item) * elt);
672 		buf->entries++;
673 	}
674 	return 0;
675 }
676 
677 enum mlx5_expansion {
678 	MLX5_EXPANSION_ROOT,
679 	MLX5_EXPANSION_ROOT_OUTER,
680 	MLX5_EXPANSION_OUTER_ETH,
681 	MLX5_EXPANSION_OUTER_VLAN,
682 	MLX5_EXPANSION_OUTER_IPV4,
683 	MLX5_EXPANSION_OUTER_IPV4_UDP,
684 	MLX5_EXPANSION_OUTER_IPV4_TCP,
685 	MLX5_EXPANSION_OUTER_IPV4_ESP,
686 	MLX5_EXPANSION_OUTER_IPV4_ICMP,
687 	MLX5_EXPANSION_OUTER_IPV6,
688 	MLX5_EXPANSION_OUTER_IPV6_UDP,
689 	MLX5_EXPANSION_OUTER_IPV6_TCP,
690 	MLX5_EXPANSION_OUTER_IPV6_ESP,
691 	MLX5_EXPANSION_OUTER_IPV6_ICMP6,
692 	MLX5_EXPANSION_VXLAN,
693 	MLX5_EXPANSION_STD_VXLAN,
694 	MLX5_EXPANSION_L3_VXLAN,
695 	MLX5_EXPANSION_VXLAN_GPE,
696 	MLX5_EXPANSION_GRE,
697 	MLX5_EXPANSION_NVGRE,
698 	MLX5_EXPANSION_GRE_KEY,
699 	MLX5_EXPANSION_MPLS,
700 	MLX5_EXPANSION_ETH,
701 	MLX5_EXPANSION_VLAN,
702 	MLX5_EXPANSION_IPV4,
703 	MLX5_EXPANSION_IPV4_UDP,
704 	MLX5_EXPANSION_IPV4_TCP,
705 	MLX5_EXPANSION_IPV4_ESP,
706 	MLX5_EXPANSION_IPV4_ICMP,
707 	MLX5_EXPANSION_IPV6,
708 	MLX5_EXPANSION_IPV6_UDP,
709 	MLX5_EXPANSION_IPV6_TCP,
710 	MLX5_EXPANSION_IPV6_ESP,
711 	MLX5_EXPANSION_IPV6_ICMP6,
712 	MLX5_EXPANSION_IPV6_FRAG_EXT,
713 	MLX5_EXPANSION_GTP,
714 	MLX5_EXPANSION_GENEVE,
715 };
716 
717 /** Supported expansion of items. */
718 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
719 	[MLX5_EXPANSION_ROOT] = {
720 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
721 						  MLX5_EXPANSION_IPV4,
722 						  MLX5_EXPANSION_IPV6),
723 		.type = RTE_FLOW_ITEM_TYPE_END,
724 	},
725 	[MLX5_EXPANSION_ROOT_OUTER] = {
726 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
727 						  MLX5_EXPANSION_OUTER_IPV4,
728 						  MLX5_EXPANSION_OUTER_IPV6),
729 		.type = RTE_FLOW_ITEM_TYPE_END,
730 	},
731 	[MLX5_EXPANSION_OUTER_ETH] = {
732 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
733 		.type = RTE_FLOW_ITEM_TYPE_ETH,
734 		.rss_types = 0,
735 	},
736 	[MLX5_EXPANSION_OUTER_VLAN] = {
737 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
738 						  MLX5_EXPANSION_OUTER_IPV6),
739 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
740 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
741 	},
742 	[MLX5_EXPANSION_OUTER_IPV4] = {
743 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
744 			(MLX5_EXPANSION_OUTER_IPV4_UDP,
745 			 MLX5_EXPANSION_OUTER_IPV4_TCP,
746 			 MLX5_EXPANSION_OUTER_IPV4_ESP,
747 			 MLX5_EXPANSION_OUTER_IPV4_ICMP,
748 			 MLX5_EXPANSION_GRE,
749 			 MLX5_EXPANSION_NVGRE,
750 			 MLX5_EXPANSION_IPV4,
751 			 MLX5_EXPANSION_IPV6),
752 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
753 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
754 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
755 	},
756 	[MLX5_EXPANSION_OUTER_IPV4_UDP] = {
757 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
758 						  MLX5_EXPANSION_VXLAN_GPE,
759 						  MLX5_EXPANSION_MPLS,
760 						  MLX5_EXPANSION_GENEVE,
761 						  MLX5_EXPANSION_GTP),
762 		.type = RTE_FLOW_ITEM_TYPE_UDP,
763 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
764 	},
765 	[MLX5_EXPANSION_OUTER_IPV4_TCP] = {
766 		.type = RTE_FLOW_ITEM_TYPE_TCP,
767 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
768 	},
769 	[MLX5_EXPANSION_OUTER_IPV4_ESP] = {
770 		.type = RTE_FLOW_ITEM_TYPE_ESP,
771 		.rss_types = RTE_ETH_RSS_ESP,
772 	},
773 	[MLX5_EXPANSION_OUTER_IPV4_ICMP] = {
774 		.type = RTE_FLOW_ITEM_TYPE_ICMP,
775 	},
776 	[MLX5_EXPANSION_OUTER_IPV6] = {
777 		.next = MLX5_FLOW_EXPAND_RSS_NEXT
778 			(MLX5_EXPANSION_OUTER_IPV6_UDP,
779 			 MLX5_EXPANSION_OUTER_IPV6_TCP,
780 			 MLX5_EXPANSION_OUTER_IPV6_ESP,
781 			 MLX5_EXPANSION_OUTER_IPV6_ICMP6,
782 			 MLX5_EXPANSION_IPV4,
783 			 MLX5_EXPANSION_IPV6,
784 			 MLX5_EXPANSION_GRE,
785 			 MLX5_EXPANSION_NVGRE),
786 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
787 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
788 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
789 	},
790 	[MLX5_EXPANSION_OUTER_IPV6_UDP] = {
791 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
792 						  MLX5_EXPANSION_VXLAN_GPE,
793 						  MLX5_EXPANSION_MPLS,
794 						  MLX5_EXPANSION_GENEVE,
795 						  MLX5_EXPANSION_GTP),
796 		.type = RTE_FLOW_ITEM_TYPE_UDP,
797 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
798 	},
799 	[MLX5_EXPANSION_OUTER_IPV6_TCP] = {
800 		.type = RTE_FLOW_ITEM_TYPE_TCP,
801 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
802 	},
803 	[MLX5_EXPANSION_OUTER_IPV6_ESP] = {
804 		.type = RTE_FLOW_ITEM_TYPE_ESP,
805 		.rss_types = RTE_ETH_RSS_ESP,
806 	},
807 	[MLX5_EXPANSION_OUTER_IPV6_ICMP6] = {
808 		.type = RTE_FLOW_ITEM_TYPE_ICMP6,
809 	},
810 	[MLX5_EXPANSION_VXLAN] = {
811 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
812 						  MLX5_EXPANSION_IPV4,
813 						  MLX5_EXPANSION_IPV6),
814 		.type = RTE_FLOW_ITEM_TYPE_VXLAN,
815 	},
816 	[MLX5_EXPANSION_STD_VXLAN] = {
817 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
818 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
819 	},
820 	[MLX5_EXPANSION_L3_VXLAN] = {
821 			.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
822 					MLX5_EXPANSION_IPV6),
823 					.type = RTE_FLOW_ITEM_TYPE_VXLAN,
824 	},
825 	[MLX5_EXPANSION_VXLAN_GPE] = {
826 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
827 						  MLX5_EXPANSION_IPV4,
828 						  MLX5_EXPANSION_IPV6),
829 		.type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
830 	},
831 	[MLX5_EXPANSION_GRE] = {
832 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
833 						  MLX5_EXPANSION_IPV4,
834 						  MLX5_EXPANSION_IPV6,
835 						  MLX5_EXPANSION_GRE_KEY,
836 						  MLX5_EXPANSION_MPLS),
837 		.type = RTE_FLOW_ITEM_TYPE_GRE,
838 	},
839 	[MLX5_EXPANSION_GRE_KEY] = {
840 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
841 						  MLX5_EXPANSION_IPV6,
842 						  MLX5_EXPANSION_MPLS),
843 		.type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
844 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
845 	},
846 	[MLX5_EXPANSION_NVGRE] = {
847 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
848 		.type = RTE_FLOW_ITEM_TYPE_NVGRE,
849 	},
850 	[MLX5_EXPANSION_MPLS] = {
851 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
852 						  MLX5_EXPANSION_IPV6,
853 						  MLX5_EXPANSION_ETH),
854 		.type = RTE_FLOW_ITEM_TYPE_MPLS,
855 		.node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
856 	},
857 	[MLX5_EXPANSION_ETH] = {
858 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
859 		.type = RTE_FLOW_ITEM_TYPE_ETH,
860 	},
861 	[MLX5_EXPANSION_VLAN] = {
862 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
863 						  MLX5_EXPANSION_IPV6),
864 		.type = RTE_FLOW_ITEM_TYPE_VLAN,
865 		.node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
866 	},
867 	[MLX5_EXPANSION_IPV4] = {
868 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
869 						  MLX5_EXPANSION_IPV4_TCP,
870 						  MLX5_EXPANSION_IPV4_ESP,
871 						  MLX5_EXPANSION_IPV4_ICMP),
872 		.type = RTE_FLOW_ITEM_TYPE_IPV4,
873 		.rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
874 			RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
875 	},
876 	[MLX5_EXPANSION_IPV4_UDP] = {
877 		.type = RTE_FLOW_ITEM_TYPE_UDP,
878 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
879 	},
880 	[MLX5_EXPANSION_IPV4_TCP] = {
881 		.type = RTE_FLOW_ITEM_TYPE_TCP,
882 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
883 	},
884 	[MLX5_EXPANSION_IPV4_ESP] = {
885 		.type = RTE_FLOW_ITEM_TYPE_ESP,
886 		.rss_types = RTE_ETH_RSS_ESP,
887 	},
888 	[MLX5_EXPANSION_IPV4_ICMP] = {
889 		.type = RTE_FLOW_ITEM_TYPE_ICMP,
890 	},
891 	[MLX5_EXPANSION_IPV6] = {
892 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
893 						  MLX5_EXPANSION_IPV6_TCP,
894 						  MLX5_EXPANSION_IPV6_ESP,
895 						  MLX5_EXPANSION_IPV6_ICMP6,
896 						  MLX5_EXPANSION_IPV6_FRAG_EXT),
897 		.type = RTE_FLOW_ITEM_TYPE_IPV6,
898 		.rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
899 			RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
900 	},
901 	[MLX5_EXPANSION_IPV6_UDP] = {
902 		.type = RTE_FLOW_ITEM_TYPE_UDP,
903 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
904 	},
905 	[MLX5_EXPANSION_IPV6_TCP] = {
906 		.type = RTE_FLOW_ITEM_TYPE_TCP,
907 		.rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
908 	},
909 	[MLX5_EXPANSION_IPV6_ESP] = {
910 		.type = RTE_FLOW_ITEM_TYPE_ESP,
911 		.rss_types = RTE_ETH_RSS_ESP,
912 	},
913 	[MLX5_EXPANSION_IPV6_FRAG_EXT] = {
914 		.type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
915 	},
916 	[MLX5_EXPANSION_IPV6_ICMP6] = {
917 		.type = RTE_FLOW_ITEM_TYPE_ICMP6,
918 	},
919 	[MLX5_EXPANSION_GTP] = {
920 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
921 						  MLX5_EXPANSION_IPV6),
922 		.type = RTE_FLOW_ITEM_TYPE_GTP,
923 	},
924 	[MLX5_EXPANSION_GENEVE] = {
925 		.next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
926 						  MLX5_EXPANSION_IPV4,
927 						  MLX5_EXPANSION_IPV6),
928 		.type = RTE_FLOW_ITEM_TYPE_GENEVE,
929 	},
930 };
931 
932 static struct rte_flow_action_handle *
933 mlx5_action_handle_create(struct rte_eth_dev *dev,
934 			  const struct rte_flow_indir_action_conf *conf,
935 			  const struct rte_flow_action *action,
936 			  struct rte_flow_error *error);
937 static int mlx5_action_handle_destroy
938 				(struct rte_eth_dev *dev,
939 				 struct rte_flow_action_handle *handle,
940 				 struct rte_flow_error *error);
941 static int mlx5_action_handle_update
942 				(struct rte_eth_dev *dev,
943 				 struct rte_flow_action_handle *handle,
944 				 const void *update,
945 				 struct rte_flow_error *error);
946 static int mlx5_action_handle_query
947 				(struct rte_eth_dev *dev,
948 				 const struct rte_flow_action_handle *handle,
949 				 void *data,
950 				 struct rte_flow_error *error);
951 static int
952 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
953 		    struct rte_flow_tunnel *app_tunnel,
954 		    struct rte_flow_action **actions,
955 		    uint32_t *num_of_actions,
956 		    struct rte_flow_error *error);
957 static int
958 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
959 		       struct rte_flow_tunnel *app_tunnel,
960 		       struct rte_flow_item **items,
961 		       uint32_t *num_of_items,
962 		       struct rte_flow_error *error);
963 static int
964 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
965 			      struct rte_flow_item *pmd_items,
966 			      uint32_t num_items, struct rte_flow_error *err);
967 static int
968 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
969 				struct rte_flow_action *pmd_actions,
970 				uint32_t num_actions,
971 				struct rte_flow_error *err);
972 static int
973 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
974 				  struct rte_mbuf *m,
975 				  struct rte_flow_restore_info *info,
976 				  struct rte_flow_error *err);
977 static struct rte_flow_item_flex_handle *
978 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
979 			   const struct rte_flow_item_flex_conf *conf,
980 			   struct rte_flow_error *error);
981 static int
982 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
983 			    const struct rte_flow_item_flex_handle *handle,
984 			    struct rte_flow_error *error);
985 static int
986 mlx5_flow_info_get(struct rte_eth_dev *dev,
987 		   struct rte_flow_port_info *port_info,
988 		   struct rte_flow_queue_info *queue_info,
989 		   struct rte_flow_error *error);
990 static int
991 mlx5_flow_port_configure(struct rte_eth_dev *dev,
992 			 const struct rte_flow_port_attr *port_attr,
993 			 uint16_t nb_queue,
994 			 const struct rte_flow_queue_attr *queue_attr[],
995 			 struct rte_flow_error *err);
996 
997 static struct rte_flow_pattern_template *
998 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
999 		const struct rte_flow_pattern_template_attr *attr,
1000 		const struct rte_flow_item items[],
1001 		struct rte_flow_error *error);
1002 
1003 static int
1004 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
1005 				   struct rte_flow_pattern_template *template,
1006 				   struct rte_flow_error *error);
1007 static struct rte_flow_actions_template *
1008 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
1009 			const struct rte_flow_actions_template_attr *attr,
1010 			const struct rte_flow_action actions[],
1011 			const struct rte_flow_action masks[],
1012 			struct rte_flow_error *error);
1013 static int
1014 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
1015 				   struct rte_flow_actions_template *template,
1016 				   struct rte_flow_error *error);
1017 
1018 static struct rte_flow_template_table *
1019 mlx5_flow_table_create(struct rte_eth_dev *dev,
1020 		       const struct rte_flow_template_table_attr *attr,
1021 		       struct rte_flow_pattern_template *item_templates[],
1022 		       uint8_t nb_item_templates,
1023 		       struct rte_flow_actions_template *action_templates[],
1024 		       uint8_t nb_action_templates,
1025 		       struct rte_flow_error *error);
1026 static int
1027 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
1028 			struct rte_flow_template_table *table,
1029 			struct rte_flow_error *error);
1030 static struct rte_flow *
1031 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
1032 			    uint32_t queue,
1033 			    const struct rte_flow_op_attr *attr,
1034 			    struct rte_flow_template_table *table,
1035 			    const struct rte_flow_item items[],
1036 			    uint8_t pattern_template_index,
1037 			    const struct rte_flow_action actions[],
1038 			    uint8_t action_template_index,
1039 			    void *user_data,
1040 			    struct rte_flow_error *error);
1041 static struct rte_flow *
1042 mlx5_flow_async_flow_create_by_index(struct rte_eth_dev *dev,
1043 			    uint32_t queue,
1044 			    const struct rte_flow_op_attr *attr,
1045 			    struct rte_flow_template_table *table,
1046 			    uint32_t rule_index,
1047 			    const struct rte_flow_action actions[],
1048 			    uint8_t action_template_index,
1049 			    void *user_data,
1050 			    struct rte_flow_error *error);
1051 static int
1052 mlx5_flow_async_flow_update(struct rte_eth_dev *dev,
1053 			     uint32_t queue,
1054 			     const struct rte_flow_op_attr *attr,
1055 			     struct rte_flow *flow,
1056 			     const struct rte_flow_action actions[],
1057 			     uint8_t action_template_index,
1058 			     void *user_data,
1059 			     struct rte_flow_error *error);
1060 static int
1061 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
1062 			     uint32_t queue,
1063 			     const struct rte_flow_op_attr *attr,
1064 			     struct rte_flow *flow,
1065 			     void *user_data,
1066 			     struct rte_flow_error *error);
1067 static int
1068 mlx5_flow_pull(struct rte_eth_dev *dev,
1069 	       uint32_t queue,
1070 	       struct rte_flow_op_result res[],
1071 	       uint16_t n_res,
1072 	       struct rte_flow_error *error);
1073 static int
1074 mlx5_flow_push(struct rte_eth_dev *dev,
1075 	       uint32_t queue,
1076 	       struct rte_flow_error *error);
1077 
1078 static struct rte_flow_action_handle *
1079 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
1080 				 const struct rte_flow_op_attr *attr,
1081 				 const struct rte_flow_indir_action_conf *conf,
1082 				 const struct rte_flow_action *action,
1083 				 void *user_data,
1084 				 struct rte_flow_error *error);
1085 
1086 static int
1087 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
1088 				 const struct rte_flow_op_attr *attr,
1089 				 struct rte_flow_action_handle *handle,
1090 				 const void *update,
1091 				 void *user_data,
1092 				 struct rte_flow_error *error);
1093 
1094 static int
1095 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
1096 				  const struct rte_flow_op_attr *attr,
1097 				  struct rte_flow_action_handle *handle,
1098 				  void *user_data,
1099 				  struct rte_flow_error *error);
1100 
1101 static int
1102 mlx5_flow_async_action_handle_query(struct rte_eth_dev *dev, uint32_t queue,
1103 				 const struct rte_flow_op_attr *attr,
1104 				 const struct rte_flow_action_handle *handle,
1105 				 void *data,
1106 				 void *user_data,
1107 				 struct rte_flow_error *error);
1108 static int
1109 mlx5_action_handle_query_update(struct rte_eth_dev *dev,
1110 				struct rte_flow_action_handle *handle,
1111 				const void *update, void *query,
1112 				enum rte_flow_query_update_mode qu_mode,
1113 				struct rte_flow_error *error);
1114 static int
1115 mlx5_flow_async_action_handle_query_update
1116 	(struct rte_eth_dev *dev, uint32_t queue_id,
1117 	 const struct rte_flow_op_attr *op_attr,
1118 	 struct rte_flow_action_handle *action_handle,
1119 	 const void *update, void *query,
1120 	 enum rte_flow_query_update_mode qu_mode,
1121 	 void *user_data, struct rte_flow_error *error);
1122 
1123 static const struct rte_flow_ops mlx5_flow_ops = {
1124 	.validate = mlx5_flow_validate,
1125 	.create = mlx5_flow_create,
1126 	.destroy = mlx5_flow_destroy,
1127 	.flush = mlx5_flow_flush,
1128 	.isolate = mlx5_flow_isolate,
1129 	.query = mlx5_flow_query,
1130 	.dev_dump = mlx5_flow_dev_dump,
1131 	.get_q_aged_flows = mlx5_flow_get_q_aged_flows,
1132 	.get_aged_flows = mlx5_flow_get_aged_flows,
1133 	.action_handle_create = mlx5_action_handle_create,
1134 	.action_handle_destroy = mlx5_action_handle_destroy,
1135 	.action_handle_update = mlx5_action_handle_update,
1136 	.action_handle_query = mlx5_action_handle_query,
1137 	.action_handle_query_update = mlx5_action_handle_query_update,
1138 	.tunnel_decap_set = mlx5_flow_tunnel_decap_set,
1139 	.tunnel_match = mlx5_flow_tunnel_match,
1140 	.tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
1141 	.tunnel_item_release = mlx5_flow_tunnel_item_release,
1142 	.get_restore_info = mlx5_flow_tunnel_get_restore_info,
1143 	.flex_item_create = mlx5_flow_flex_item_create,
1144 	.flex_item_release = mlx5_flow_flex_item_release,
1145 	.info_get = mlx5_flow_info_get,
1146 	.pick_transfer_proxy = mlx5_flow_pick_transfer_proxy,
1147 	.configure = mlx5_flow_port_configure,
1148 	.pattern_template_create = mlx5_flow_pattern_template_create,
1149 	.pattern_template_destroy = mlx5_flow_pattern_template_destroy,
1150 	.actions_template_create = mlx5_flow_actions_template_create,
1151 	.actions_template_destroy = mlx5_flow_actions_template_destroy,
1152 	.template_table_create = mlx5_flow_table_create,
1153 	.template_table_destroy = mlx5_flow_table_destroy,
1154 	.async_create = mlx5_flow_async_flow_create,
1155 	.async_create_by_index = mlx5_flow_async_flow_create_by_index,
1156 	.async_destroy = mlx5_flow_async_flow_destroy,
1157 	.pull = mlx5_flow_pull,
1158 	.push = mlx5_flow_push,
1159 	.async_action_handle_create = mlx5_flow_async_action_handle_create,
1160 	.async_action_handle_update = mlx5_flow_async_action_handle_update,
1161 	.async_action_handle_query_update =
1162 		mlx5_flow_async_action_handle_query_update,
1163 	.async_action_handle_query = mlx5_flow_async_action_handle_query,
1164 	.async_action_handle_destroy = mlx5_flow_async_action_handle_destroy,
1165 	.async_actions_update = mlx5_flow_async_flow_update,
1166 };
1167 
1168 /* Tunnel information. */
1169 struct mlx5_flow_tunnel_info {
1170 	uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
1171 	uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
1172 };
1173 
1174 static struct mlx5_flow_tunnel_info tunnels_info[] = {
1175 	{
1176 		.tunnel = MLX5_FLOW_LAYER_VXLAN,
1177 		.ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
1178 	},
1179 	{
1180 		.tunnel = MLX5_FLOW_LAYER_GENEVE,
1181 		.ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
1182 	},
1183 	{
1184 		.tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
1185 		.ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
1186 	},
1187 	{
1188 		.tunnel = MLX5_FLOW_LAYER_GRE,
1189 		.ptype = RTE_PTYPE_TUNNEL_GRE,
1190 	},
1191 	{
1192 		.tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
1193 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
1194 	},
1195 	{
1196 		.tunnel = MLX5_FLOW_LAYER_MPLS,
1197 		.ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
1198 	},
1199 	{
1200 		.tunnel = MLX5_FLOW_LAYER_NVGRE,
1201 		.ptype = RTE_PTYPE_TUNNEL_NVGRE,
1202 	},
1203 	{
1204 		.tunnel = MLX5_FLOW_LAYER_IPIP,
1205 		.ptype = RTE_PTYPE_TUNNEL_IP,
1206 	},
1207 	{
1208 		.tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
1209 		.ptype = RTE_PTYPE_TUNNEL_IP,
1210 	},
1211 	{
1212 		.tunnel = MLX5_FLOW_LAYER_GTP,
1213 		.ptype = RTE_PTYPE_TUNNEL_GTPU,
1214 	},
1215 };
1216 
1217 
1218 
1219 /**
1220  * Translate tag ID to register.
1221  *
1222  * @param[in] dev
1223  *   Pointer to the Ethernet device structure.
1224  * @param[in] feature
1225  *   The feature that request the register.
1226  * @param[in] id
1227  *   The request register ID.
1228  * @param[out] error
1229  *   Error description in case of any.
1230  *
1231  * @return
1232  *   The request register on success, a negative errno
1233  *   value otherwise and rte_errno is set.
1234  */
1235 int
1236 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
1237 		     enum mlx5_feature_name feature,
1238 		     uint32_t id,
1239 		     struct rte_flow_error *error)
1240 {
1241 	struct mlx5_priv *priv = dev->data->dev_private;
1242 	struct mlx5_sh_config *config = &priv->sh->config;
1243 	enum modify_reg start_reg;
1244 	bool skip_mtr_reg = false;
1245 
1246 	switch (feature) {
1247 	case MLX5_HAIRPIN_RX:
1248 		return REG_B;
1249 	case MLX5_HAIRPIN_TX:
1250 		return REG_A;
1251 	case MLX5_METADATA_RX:
1252 		switch (config->dv_xmeta_en) {
1253 		case MLX5_XMETA_MODE_LEGACY:
1254 			return REG_B;
1255 		case MLX5_XMETA_MODE_META16:
1256 			return REG_C_0;
1257 		case MLX5_XMETA_MODE_META32:
1258 			return REG_C_1;
1259 		case MLX5_XMETA_MODE_META32_HWS:
1260 			return REG_C_1;
1261 		}
1262 		break;
1263 	case MLX5_METADATA_TX:
1264 		if (config->dv_flow_en == 2 && config->dv_xmeta_en == MLX5_XMETA_MODE_META32_HWS) {
1265 			return REG_C_1;
1266 		} else {
1267 			return REG_A;
1268 		}
1269 	case MLX5_METADATA_FDB:
1270 		switch (config->dv_xmeta_en) {
1271 		case MLX5_XMETA_MODE_LEGACY:
1272 			return REG_NON;
1273 		case MLX5_XMETA_MODE_META16:
1274 			return REG_C_0;
1275 		case MLX5_XMETA_MODE_META32:
1276 			return REG_C_1;
1277 		case MLX5_XMETA_MODE_META32_HWS:
1278 			return REG_C_1;
1279 		}
1280 		break;
1281 	case MLX5_FLOW_MARK:
1282 		switch (config->dv_xmeta_en) {
1283 		case MLX5_XMETA_MODE_LEGACY:
1284 		case MLX5_XMETA_MODE_META32_HWS:
1285 			return REG_NON;
1286 		case MLX5_XMETA_MODE_META16:
1287 			return REG_C_1;
1288 		case MLX5_XMETA_MODE_META32:
1289 			return REG_C_0;
1290 		}
1291 		break;
1292 	case MLX5_MTR_ID:
1293 		/*
1294 		 * If meter color and meter id share one register, flow match
1295 		 * should use the meter color register for match.
1296 		 */
1297 		if (priv->mtr_reg_share)
1298 			return priv->mtr_color_reg;
1299 		else
1300 			return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1301 			       REG_C_3;
1302 	case MLX5_MTR_COLOR:
1303 	case MLX5_ASO_FLOW_HIT:
1304 	case MLX5_ASO_CONNTRACK:
1305 	case MLX5_SAMPLE_ID:
1306 		/* All features use the same REG_C. */
1307 		MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
1308 		return priv->mtr_color_reg;
1309 	case MLX5_COPY_MARK:
1310 		/*
1311 		 * Metadata COPY_MARK register using is in meter suffix sub
1312 		 * flow while with meter. It's safe to share the same register.
1313 		 */
1314 		return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
1315 	case MLX5_APP_TAG:
1316 		/*
1317 		 * If meter is enable, it will engage the register for color
1318 		 * match and flow match. If meter color match is not using the
1319 		 * REG_C_2, need to skip the REG_C_x be used by meter color
1320 		 * match.
1321 		 * If meter is disable, free to use all available registers.
1322 		 */
1323 		start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1324 			    (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
1325 		skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
1326 		if (id > (uint32_t)(REG_C_7 - start_reg))
1327 			return rte_flow_error_set(error, EINVAL,
1328 						  RTE_FLOW_ERROR_TYPE_ITEM,
1329 						  NULL, "invalid tag id");
1330 		if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
1331 			return rte_flow_error_set(error, ENOTSUP,
1332 						  RTE_FLOW_ERROR_TYPE_ITEM,
1333 						  NULL, "unsupported tag id");
1334 		/*
1335 		 * This case means meter is using the REG_C_x great than 2.
1336 		 * Take care not to conflict with meter color REG_C_x.
1337 		 * If the available index REG_C_y >= REG_C_x, skip the
1338 		 * color register.
1339 		 */
1340 		if (skip_mtr_reg && priv->sh->flow_mreg_c
1341 		    [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
1342 			if (id >= (uint32_t)(REG_C_7 - start_reg))
1343 				return rte_flow_error_set(error, EINVAL,
1344 						       RTE_FLOW_ERROR_TYPE_ITEM,
1345 							NULL, "invalid tag id");
1346 			if (priv->sh->flow_mreg_c
1347 			    [id + 1 + start_reg - REG_C_0] != REG_NON)
1348 				return priv->sh->flow_mreg_c
1349 					       [id + 1 + start_reg - REG_C_0];
1350 			return rte_flow_error_set(error, ENOTSUP,
1351 						  RTE_FLOW_ERROR_TYPE_ITEM,
1352 						  NULL, "unsupported tag id");
1353 		}
1354 		return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
1355 	}
1356 	MLX5_ASSERT(false);
1357 	return rte_flow_error_set(error, EINVAL,
1358 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1359 				  NULL, "invalid feature name");
1360 }
1361 
1362 /**
1363  * Check extensive flow metadata register support.
1364  *
1365  * @param dev
1366  *   Pointer to rte_eth_dev structure.
1367  *
1368  * @return
1369  *   True if device supports extensive flow metadata register, otherwise false.
1370  */
1371 bool
1372 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
1373 {
1374 	struct mlx5_priv *priv = dev->data->dev_private;
1375 
1376 	/*
1377 	 * Having available reg_c can be regarded inclusively as supporting
1378 	 * extensive flow metadata register, which could mean,
1379 	 * - metadata register copy action by modify header.
1380 	 * - 16 modify header actions is supported.
1381 	 * - reg_c's are preserved across different domain (FDB and NIC) on
1382 	 *   packet loopback by flow lookup miss.
1383 	 */
1384 	return priv->sh->flow_mreg_c[2] != REG_NON;
1385 }
1386 
1387 /**
1388  * Get the lowest priority.
1389  *
1390  * @param[in] dev
1391  *   Pointer to the Ethernet device structure.
1392  * @param[in] attributes
1393  *   Pointer to device flow rule attributes.
1394  *
1395  * @return
1396  *   The value of lowest priority of flow.
1397  */
1398 uint32_t
1399 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
1400 			  const struct rte_flow_attr *attr)
1401 {
1402 	struct mlx5_priv *priv = dev->data->dev_private;
1403 
1404 	if (!attr->group && !(attr->transfer && priv->fdb_def_rule))
1405 		return priv->sh->flow_max_priority - 2;
1406 	return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1407 }
1408 
1409 /**
1410  * Calculate matcher priority of the flow.
1411  *
1412  * @param[in] dev
1413  *   Pointer to the Ethernet device structure.
1414  * @param[in] attr
1415  *   Pointer to device flow rule attributes.
1416  * @param[in] subpriority
1417  *   The priority based on the items.
1418  * @param[in] external
1419  *   Flow is user flow.
1420  * @return
1421  *   The matcher priority of the flow.
1422  */
1423 uint16_t
1424 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1425 			  const struct rte_flow_attr *attr,
1426 			  uint32_t subpriority, bool external)
1427 {
1428 	uint16_t priority = (uint16_t)attr->priority;
1429 	struct mlx5_priv *priv = dev->data->dev_private;
1430 
1431 	/* NIC root rules */
1432 	if (!attr->group && !attr->transfer) {
1433 		if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1434 			priority = priv->sh->flow_max_priority - 1;
1435 		return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1436 	/* FDB root rules */
1437 	} else if (attr->transfer && (!external || !priv->fdb_def_rule) &&
1438 		   attr->group == 0 &&
1439 		   attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1440 		return (priv->sh->flow_max_priority - 1) * 3;
1441 	}
1442 	if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1443 		priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1444 	return priority * 3 + subpriority;
1445 }
1446 
1447 /**
1448  * Verify the @p item specifications (spec, last, mask) are compatible with the
1449  * NIC capabilities.
1450  *
1451  * @param[in] item
1452  *   Item specification.
1453  * @param[in] mask
1454  *   @p item->mask or flow default bit-masks.
1455  * @param[in] nic_mask
1456  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1457  * @param[in] size
1458  *   Bit-masks size in bytes.
1459  * @param[in] range_accepted
1460  *   True if range of values is accepted for specific fields, false otherwise.
1461  * @param[out] error
1462  *   Pointer to error structure.
1463  *
1464  * @return
1465  *   0 on success, a negative errno value otherwise and rte_errno is set.
1466  */
1467 int
1468 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1469 			  const uint8_t *mask,
1470 			  const uint8_t *nic_mask,
1471 			  unsigned int size,
1472 			  bool range_accepted,
1473 			  struct rte_flow_error *error)
1474 {
1475 	unsigned int i;
1476 
1477 	MLX5_ASSERT(nic_mask);
1478 	for (i = 0; i < size; ++i)
1479 		if ((nic_mask[i] | mask[i]) != nic_mask[i])
1480 			return rte_flow_error_set(error, ENOTSUP,
1481 						  RTE_FLOW_ERROR_TYPE_ITEM,
1482 						  item,
1483 						  "mask enables non supported"
1484 						  " bits");
1485 	if (!item->spec && (item->mask || item->last))
1486 		return rte_flow_error_set(error, EINVAL,
1487 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
1488 					  "mask/last without a spec is not"
1489 					  " supported");
1490 	if (item->spec && item->last && !range_accepted) {
1491 		uint8_t spec[size];
1492 		uint8_t last[size];
1493 		unsigned int i;
1494 		int ret;
1495 
1496 		for (i = 0; i < size; ++i) {
1497 			spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1498 			last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1499 		}
1500 		ret = memcmp(spec, last, size);
1501 		if (ret != 0)
1502 			return rte_flow_error_set(error, EINVAL,
1503 						  RTE_FLOW_ERROR_TYPE_ITEM,
1504 						  item,
1505 						  "range is not valid");
1506 	}
1507 	return 0;
1508 }
1509 
1510 /**
1511  * Adjust the hash fields according to the @p flow information.
1512  *
1513  * @param[in] dev_flow.
1514  *   Pointer to the mlx5_flow.
1515  * @param[in] tunnel
1516  *   1 when the hash field is for a tunnel item.
1517  * @param[in] layer_types
1518  *   RTE_ETH_RSS_* types.
1519  * @param[in] hash_fields
1520  *   Item hash fields.
1521  *
1522  * @return
1523  *   The hash fields that should be used.
1524  */
1525 uint64_t
1526 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1527 			    int tunnel __rte_unused, uint64_t layer_types,
1528 			    uint64_t hash_fields)
1529 {
1530 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1531 	int rss_request_inner = rss_desc->level >= 2;
1532 
1533 	/* Check RSS hash level for tunnel. */
1534 	if (tunnel && rss_request_inner)
1535 		hash_fields |= IBV_RX_HASH_INNER;
1536 	else if (tunnel || rss_request_inner)
1537 		return 0;
1538 #endif
1539 	/* Check if requested layer matches RSS hash fields. */
1540 	if (!(rss_desc->types & layer_types))
1541 		return 0;
1542 	return hash_fields;
1543 }
1544 
1545 /**
1546  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1547  * if several tunnel rules are used on this queue, the tunnel ptype will be
1548  * cleared.
1549  *
1550  * @param rxq_ctrl
1551  *   Rx queue to update.
1552  */
1553 static void
1554 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1555 {
1556 	unsigned int i;
1557 	uint32_t tunnel_ptype = 0;
1558 
1559 	/* Look up for the ptype to use. */
1560 	for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1561 		if (!rxq_ctrl->flow_tunnels_n[i])
1562 			continue;
1563 		if (!tunnel_ptype) {
1564 			tunnel_ptype = tunnels_info[i].ptype;
1565 		} else {
1566 			tunnel_ptype = 0;
1567 			break;
1568 		}
1569 	}
1570 	rxq_ctrl->rxq.tunnel = tunnel_ptype;
1571 }
1572 
1573 /**
1574  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device
1575  * flow.
1576  *
1577  * @param[in] dev
1578  *   Pointer to the Ethernet device structure.
1579  * @param[in] dev_handle
1580  *   Pointer to device flow handle structure.
1581  */
1582 void
1583 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1584 		       struct mlx5_flow_handle *dev_handle)
1585 {
1586 	struct mlx5_priv *priv = dev->data->dev_private;
1587 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1588 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1589 	unsigned int i;
1590 
1591 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1592 		struct mlx5_hrxq *hrxq;
1593 
1594 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1595 			      dev_handle->rix_hrxq);
1596 		if (hrxq)
1597 			ind_tbl = hrxq->ind_table;
1598 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1599 		struct mlx5_shared_action_rss *shared_rss;
1600 
1601 		shared_rss = mlx5_ipool_get
1602 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1603 			 dev_handle->rix_srss);
1604 		if (shared_rss)
1605 			ind_tbl = shared_rss->ind_tbl;
1606 	}
1607 	if (!ind_tbl)
1608 		return;
1609 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1610 		int idx = ind_tbl->queues[i];
1611 		struct mlx5_rxq_ctrl *rxq_ctrl;
1612 
1613 		if (mlx5_is_external_rxq(dev, idx))
1614 			continue;
1615 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1616 		MLX5_ASSERT(rxq_ctrl != NULL);
1617 		if (rxq_ctrl == NULL)
1618 			continue;
1619 		/*
1620 		 * To support metadata register copy on Tx loopback,
1621 		 * this must be always enabled (metadata may arive
1622 		 * from other port - not from local flows only.
1623 		 */
1624 		if (tunnel) {
1625 			unsigned int j;
1626 
1627 			/* Increase the counter matching the flow. */
1628 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1629 				if ((tunnels_info[j].tunnel &
1630 				     dev_handle->layers) ==
1631 				    tunnels_info[j].tunnel) {
1632 					rxq_ctrl->flow_tunnels_n[j]++;
1633 					break;
1634 				}
1635 			}
1636 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1637 		}
1638 	}
1639 }
1640 
1641 static void
1642 flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
1643 {
1644 	struct mlx5_priv *priv = dev->data->dev_private;
1645 	struct mlx5_rxq_ctrl *rxq_ctrl;
1646 	uint16_t port_id;
1647 
1648 	if (priv->sh->shared_mark_enabled)
1649 		return;
1650 	if (priv->master || priv->representor) {
1651 		MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
1652 			struct mlx5_priv *opriv =
1653 				rte_eth_devices[port_id].data->dev_private;
1654 
1655 			if (!opriv ||
1656 			    opriv->sh != priv->sh ||
1657 			    opriv->domain_id != priv->domain_id ||
1658 			    opriv->mark_enabled)
1659 				continue;
1660 			LIST_FOREACH(rxq_ctrl, &opriv->rxqsctrl, next) {
1661 				rxq_ctrl->rxq.mark = 1;
1662 			}
1663 			opriv->mark_enabled = 1;
1664 		}
1665 	} else {
1666 		LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1667 			rxq_ctrl->rxq.mark = 1;
1668 		}
1669 		priv->mark_enabled = 1;
1670 	}
1671 	priv->sh->shared_mark_enabled = 1;
1672 }
1673 
1674 /**
1675  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1676  *
1677  * @param[in] dev
1678  *   Pointer to the Ethernet device structure.
1679  * @param[in] flow
1680  *   Pointer to flow structure.
1681  */
1682 static void
1683 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1684 {
1685 	struct mlx5_priv *priv = dev->data->dev_private;
1686 	uint32_t handle_idx;
1687 	struct mlx5_flow_handle *dev_handle;
1688 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1689 
1690 	MLX5_ASSERT(wks);
1691 	if (wks->mark)
1692 		flow_rxq_mark_flag_set(dev);
1693 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1694 		       handle_idx, dev_handle, next)
1695 		flow_drv_rxq_flags_set(dev, dev_handle);
1696 }
1697 
1698 /**
1699  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1700  * device flow if no other flow uses it with the same kind of request.
1701  *
1702  * @param dev
1703  *   Pointer to Ethernet device.
1704  * @param[in] dev_handle
1705  *   Pointer to the device flow handle structure.
1706  */
1707 static void
1708 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1709 			struct mlx5_flow_handle *dev_handle)
1710 {
1711 	struct mlx5_priv *priv = dev->data->dev_private;
1712 	const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1713 	struct mlx5_ind_table_obj *ind_tbl = NULL;
1714 	unsigned int i;
1715 
1716 	if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1717 		struct mlx5_hrxq *hrxq;
1718 
1719 		hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1720 			      dev_handle->rix_hrxq);
1721 		if (hrxq)
1722 			ind_tbl = hrxq->ind_table;
1723 	} else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1724 		struct mlx5_shared_action_rss *shared_rss;
1725 
1726 		shared_rss = mlx5_ipool_get
1727 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1728 			 dev_handle->rix_srss);
1729 		if (shared_rss)
1730 			ind_tbl = shared_rss->ind_tbl;
1731 	}
1732 	if (!ind_tbl)
1733 		return;
1734 	MLX5_ASSERT(dev->data->dev_started);
1735 	for (i = 0; i != ind_tbl->queues_n; ++i) {
1736 		int idx = ind_tbl->queues[i];
1737 		struct mlx5_rxq_ctrl *rxq_ctrl;
1738 
1739 		if (mlx5_is_external_rxq(dev, idx))
1740 			continue;
1741 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1742 		MLX5_ASSERT(rxq_ctrl != NULL);
1743 		if (rxq_ctrl == NULL)
1744 			continue;
1745 		if (tunnel) {
1746 			unsigned int j;
1747 
1748 			/* Decrease the counter matching the flow. */
1749 			for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1750 				if ((tunnels_info[j].tunnel &
1751 				     dev_handle->layers) ==
1752 				    tunnels_info[j].tunnel) {
1753 					rxq_ctrl->flow_tunnels_n[j]--;
1754 					break;
1755 				}
1756 			}
1757 			flow_rxq_tunnel_ptype_update(rxq_ctrl);
1758 		}
1759 	}
1760 }
1761 
1762 /**
1763  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1764  * @p flow if no other flow uses it with the same kind of request.
1765  *
1766  * @param dev
1767  *   Pointer to Ethernet device.
1768  * @param[in] flow
1769  *   Pointer to the flow.
1770  */
1771 static void
1772 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1773 {
1774 	struct mlx5_priv *priv = dev->data->dev_private;
1775 	uint32_t handle_idx;
1776 	struct mlx5_flow_handle *dev_handle;
1777 
1778 	SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1779 		       handle_idx, dev_handle, next)
1780 		flow_drv_rxq_flags_trim(dev, dev_handle);
1781 }
1782 
1783 /**
1784  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1785  *
1786  * @param dev
1787  *   Pointer to Ethernet device.
1788  */
1789 static void
1790 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1791 {
1792 	struct mlx5_priv *priv = dev->data->dev_private;
1793 	unsigned int i;
1794 
1795 	for (i = 0; i != priv->rxqs_n; ++i) {
1796 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1797 		unsigned int j;
1798 
1799 		if (rxq == NULL || rxq->ctrl == NULL)
1800 			continue;
1801 		rxq->ctrl->rxq.mark = 0;
1802 		for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1803 			rxq->ctrl->flow_tunnels_n[j] = 0;
1804 		rxq->ctrl->rxq.tunnel = 0;
1805 	}
1806 	priv->mark_enabled = 0;
1807 	priv->sh->shared_mark_enabled = 0;
1808 }
1809 
1810 static uint64_t mlx5_restore_info_dynflag;
1811 
1812 int
1813 mlx5_flow_rx_metadata_negotiate(struct rte_eth_dev *dev, uint64_t *features)
1814 {
1815 	struct mlx5_priv *priv = dev->data->dev_private;
1816 	uint64_t supported = 0;
1817 
1818 	if (!is_tunnel_offload_active(dev)) {
1819 		supported |= RTE_ETH_RX_METADATA_USER_FLAG;
1820 		supported |= RTE_ETH_RX_METADATA_USER_MARK;
1821 		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0) {
1822 			DRV_LOG(DEBUG,
1823 				"tunnel offload was not activated, consider setting dv_xmeta_en=%d",
1824 				MLX5_XMETA_MODE_MISS_INFO);
1825 		}
1826 	} else {
1827 		supported |= RTE_ETH_RX_METADATA_TUNNEL_ID;
1828 		if ((*features & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0 &&
1829 				mlx5_restore_info_dynflag == 0)
1830 			mlx5_restore_info_dynflag = rte_flow_restore_info_dynflag();
1831 	}
1832 
1833 	if (((*features & supported) & RTE_ETH_RX_METADATA_TUNNEL_ID) != 0)
1834 		priv->tunnel_enabled = 1;
1835 	else
1836 		priv->tunnel_enabled = 0;
1837 
1838 	*features &= supported;
1839 	return 0;
1840 }
1841 
1842 /**
1843  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1844  *
1845  * @param[in] dev
1846  *   Pointer to the Ethernet device structure.
1847  */
1848 void
1849 mlx5_flow_rxq_dynf_set(struct rte_eth_dev *dev)
1850 {
1851 	struct mlx5_priv *priv = dev->data->dev_private;
1852 	uint64_t mark_flag = RTE_MBUF_F_RX_FDIR_ID;
1853 	unsigned int i;
1854 
1855 	if (priv->tunnel_enabled)
1856 		mark_flag |= mlx5_restore_info_dynflag;
1857 
1858 	for (i = 0; i != priv->rxqs_n; ++i) {
1859 		struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1860 		struct mlx5_rxq_data *data;
1861 
1862 		if (rxq == NULL || rxq->ctrl == NULL)
1863 			continue;
1864 		data = &rxq->ctrl->rxq;
1865 		if (!rte_flow_dynf_metadata_avail()) {
1866 			data->dynf_meta = 0;
1867 			data->flow_meta_mask = 0;
1868 			data->flow_meta_offset = -1;
1869 			data->flow_meta_port_mask = 0;
1870 		} else {
1871 			data->dynf_meta = 1;
1872 			data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1873 			data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1874 			data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1875 		}
1876 		data->mark_flag = mark_flag;
1877 	}
1878 }
1879 
1880 /*
1881  * return a pointer to the desired action in the list of actions.
1882  *
1883  * @param[in] actions
1884  *   The list of actions to search the action in.
1885  * @param[in] action
1886  *   The action to find.
1887  *
1888  * @return
1889  *   Pointer to the action in the list, if found. NULL otherwise.
1890  */
1891 const struct rte_flow_action *
1892 mlx5_flow_find_action(const struct rte_flow_action *actions,
1893 		      enum rte_flow_action_type action)
1894 {
1895 	if (actions == NULL)
1896 		return NULL;
1897 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1898 		if (actions->type == action)
1899 			return actions;
1900 	return NULL;
1901 }
1902 
1903 /*
1904  * Validate the flag action.
1905  *
1906  * @param[in] action_flags
1907  *   Bit-fields that holds the actions detected until now.
1908  * @param[in] attr
1909  *   Attributes of flow that includes this action.
1910  * @param[out] error
1911  *   Pointer to error structure.
1912  *
1913  * @return
1914  *   0 on success, a negative errno value otherwise and rte_errno is set.
1915  */
1916 int
1917 mlx5_flow_validate_action_flag(uint64_t action_flags,
1918 			       const struct rte_flow_attr *attr,
1919 			       struct rte_flow_error *error)
1920 {
1921 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1922 		return rte_flow_error_set(error, EINVAL,
1923 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1924 					  "can't mark and flag in same flow");
1925 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1926 		return rte_flow_error_set(error, EINVAL,
1927 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1928 					  "can't have 2 flag"
1929 					  " actions in same flow");
1930 	if (attr->egress)
1931 		return rte_flow_error_set(error, ENOTSUP,
1932 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1933 					  "flag action not supported for "
1934 					  "egress");
1935 	return 0;
1936 }
1937 
1938 /*
1939  * Validate the mark action.
1940  *
1941  * @param[in] action
1942  *   Pointer to the queue action.
1943  * @param[in] action_flags
1944  *   Bit-fields that holds the actions detected until now.
1945  * @param[in] attr
1946  *   Attributes of flow that includes this action.
1947  * @param[out] error
1948  *   Pointer to error structure.
1949  *
1950  * @return
1951  *   0 on success, a negative errno value otherwise and rte_errno is set.
1952  */
1953 int
1954 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1955 			       uint64_t action_flags,
1956 			       const struct rte_flow_attr *attr,
1957 			       struct rte_flow_error *error)
1958 {
1959 	const struct rte_flow_action_mark *mark = action->conf;
1960 
1961 	if (!mark)
1962 		return rte_flow_error_set(error, EINVAL,
1963 					  RTE_FLOW_ERROR_TYPE_ACTION,
1964 					  action,
1965 					  "configuration cannot be null");
1966 	if (mark->id >= MLX5_FLOW_MARK_MAX)
1967 		return rte_flow_error_set(error, EINVAL,
1968 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1969 					  &mark->id,
1970 					  "mark id must in 0 <= id < "
1971 					  RTE_STR(MLX5_FLOW_MARK_MAX));
1972 	if (action_flags & MLX5_FLOW_ACTION_FLAG)
1973 		return rte_flow_error_set(error, EINVAL,
1974 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1975 					  "can't flag and mark in same flow");
1976 	if (action_flags & MLX5_FLOW_ACTION_MARK)
1977 		return rte_flow_error_set(error, EINVAL,
1978 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1979 					  "can't have 2 mark actions in same"
1980 					  " flow");
1981 	if (attr->egress)
1982 		return rte_flow_error_set(error, ENOTSUP,
1983 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1984 					  "mark action not supported for "
1985 					  "egress");
1986 	return 0;
1987 }
1988 
1989 /*
1990  * Validate the drop action.
1991  *
1992  * @param[in] dev
1993  *   Pointer to the Ethernet device structure.
1994  * @param[in] is_root
1995  *   True if flow is validated for root table. False otherwise.
1996  * @param[in] attr
1997  *   Attributes of flow that includes this action.
1998  * @param[out] error
1999  *   Pointer to error structure.
2000  *
2001  * @return
2002  *   0 on success, a negative errno value otherwise and rte_errno is set.
2003  */
2004 int
2005 mlx5_flow_validate_action_drop(struct rte_eth_dev *dev,
2006 			       bool is_root,
2007 			       const struct rte_flow_attr *attr,
2008 			       struct rte_flow_error *error)
2009 {
2010 	struct mlx5_priv *priv = dev->data->dev_private;
2011 
2012 	if (priv->sh->config.dv_flow_en == 0 && attr->egress)
2013 		return rte_flow_error_set(error, ENOTSUP,
2014 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2015 					  "drop action not supported for "
2016 					  "egress");
2017 	if (priv->sh->config.dv_flow_en == 1 && is_root && (attr->egress || attr->transfer) &&
2018 	    !priv->sh->dr_root_drop_action_en) {
2019 		return rte_flow_error_set(error, ENOTSUP,
2020 					  RTE_FLOW_ERROR_TYPE_ATTR, NULL,
2021 					  "drop action not supported for "
2022 					  "egress and transfer on group 0");
2023 	}
2024 	return 0;
2025 }
2026 
2027 /*
2028  * Validate the queue action.
2029  *
2030  * @param[in] action
2031  *   Pointer to the queue action.
2032  * @param[in] action_flags
2033  *   Bit-fields that holds the actions detected until now.
2034  * @param[in] dev
2035  *   Pointer to the Ethernet device structure.
2036  * @param[in] attr
2037  *   Attributes of flow that includes this action.
2038  * @param[out] error
2039  *   Pointer to error structure.
2040  *
2041  * @return
2042  *   0 on success, a negative errno value otherwise and rte_errno is set.
2043  */
2044 int
2045 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
2046 				uint64_t action_flags,
2047 				struct rte_eth_dev *dev,
2048 				const struct rte_flow_attr *attr,
2049 				struct rte_flow_error *error)
2050 {
2051 	struct mlx5_priv *priv = dev->data->dev_private;
2052 	const struct rte_flow_action_queue *queue = action->conf;
2053 
2054 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2055 		return rte_flow_error_set(error, EINVAL,
2056 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2057 					  "can't have 2 fate actions in"
2058 					  " same flow");
2059 	if (attr->egress)
2060 		return rte_flow_error_set(error, ENOTSUP,
2061 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2062 					  "queue action not supported for egress.");
2063 	if (mlx5_is_external_rxq(dev, queue->index))
2064 		return 0;
2065 	if (!priv->rxqs_n)
2066 		return rte_flow_error_set(error, EINVAL,
2067 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2068 					  NULL, "No Rx queues configured");
2069 	if (queue->index >= priv->rxqs_n)
2070 		return rte_flow_error_set(error, EINVAL,
2071 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2072 					  &queue->index,
2073 					  "queue index out of range");
2074 	if (mlx5_rxq_get(dev, queue->index) == NULL)
2075 		return rte_flow_error_set(error, EINVAL,
2076 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2077 					  &queue->index,
2078 					  "queue is not configured");
2079 	return 0;
2080 }
2081 
2082 /**
2083  * Validate queue numbers for device RSS.
2084  *
2085  * @param[in] dev
2086  *   Configured device.
2087  * @param[in] queues
2088  *   Array of queue numbers.
2089  * @param[in] queues_n
2090  *   Size of the @p queues array.
2091  * @param[out] error
2092  *   On error, filled with a textual error description.
2093  * @param[out] queue_idx
2094  *   On error, filled with an offending queue index in @p queues array.
2095  *
2096  * @return
2097  *   0 on success, a negative errno code on error.
2098  */
2099 static int
2100 mlx5_validate_rss_queues(struct rte_eth_dev *dev,
2101 			 const uint16_t *queues, uint32_t queues_n,
2102 			 const char **error, uint32_t *queue_idx)
2103 {
2104 	const struct mlx5_priv *priv = dev->data->dev_private;
2105 	bool is_hairpin = false;
2106 	bool is_ext_rss = false;
2107 	uint32_t i;
2108 
2109 	for (i = 0; i != queues_n; ++i) {
2110 		struct mlx5_rxq_ctrl *rxq_ctrl;
2111 
2112 		if (mlx5_is_external_rxq(dev, queues[0])) {
2113 			is_ext_rss = true;
2114 			continue;
2115 		}
2116 		if (is_ext_rss) {
2117 			*error = "Combining external and regular RSS queues is not supported";
2118 			*queue_idx = i;
2119 			return -ENOTSUP;
2120 		}
2121 		if (queues[i] >= priv->rxqs_n) {
2122 			*error = "queue index out of range";
2123 			*queue_idx = i;
2124 			return -EINVAL;
2125 		}
2126 		rxq_ctrl = mlx5_rxq_ctrl_get(dev, queues[i]);
2127 		if (rxq_ctrl == NULL) {
2128 			*error =  "queue is not configured";
2129 			*queue_idx = i;
2130 			return -EINVAL;
2131 		}
2132 		if (i == 0 && rxq_ctrl->is_hairpin)
2133 			is_hairpin = true;
2134 		if (is_hairpin != rxq_ctrl->is_hairpin) {
2135 			*error = "combining hairpin and regular RSS queues is not supported";
2136 			*queue_idx = i;
2137 			return -ENOTSUP;
2138 		}
2139 	}
2140 	return 0;
2141 }
2142 
2143 /*
2144  * Validate the rss action.
2145  *
2146  * @param[in] dev
2147  *   Pointer to the Ethernet device structure.
2148  * @param[in] action
2149  *   Pointer to the queue action.
2150  * @param[out] error
2151  *   Pointer to error structure.
2152  *
2153  * @return
2154  *   0 on success, a negative errno value otherwise and rte_errno is set.
2155  */
2156 int
2157 mlx5_validate_action_rss(struct rte_eth_dev *dev,
2158 			 const struct rte_flow_action *action,
2159 			 struct rte_flow_error *error)
2160 {
2161 	struct mlx5_priv *priv = dev->data->dev_private;
2162 	const struct rte_flow_action_rss *rss = action->conf;
2163 	int ret;
2164 	const char *message;
2165 	uint32_t queue_idx;
2166 
2167 	if (rss->func == RTE_ETH_HASH_FUNCTION_SYMMETRIC_TOEPLITZ) {
2168 		DRV_LOG(WARNING, "port %u symmetric RSS supported with SORT",
2169 			dev->data->port_id);
2170 	} else if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
2171 		   rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
2172 		return rte_flow_error_set(error, ENOTSUP,
2173 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2174 					  &rss->func,
2175 					  "RSS hash function not supported");
2176 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2177 	if (rss->level > 2)
2178 #else
2179 	if (rss->level > 1)
2180 #endif
2181 		return rte_flow_error_set(error, ENOTSUP,
2182 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2183 					  &rss->level,
2184 					  "tunnel RSS is not supported");
2185 	/* allow RSS key_len 0 in case of NULL (default) RSS key. */
2186 	if (rss->key_len == 0 && rss->key != NULL)
2187 		return rte_flow_error_set(error, ENOTSUP,
2188 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2189 					  &rss->key_len,
2190 					  "RSS hash key length 0");
2191 	if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
2192 		return rte_flow_error_set(error, ENOTSUP,
2193 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2194 					  &rss->key_len,
2195 					  "RSS hash key too small");
2196 	if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
2197 		return rte_flow_error_set(error, ENOTSUP,
2198 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2199 					  &rss->key_len,
2200 					  "RSS hash key too large");
2201 	if (rss->queue_num > priv->sh->dev_cap.ind_table_max_size)
2202 		return rte_flow_error_set(error, ENOTSUP,
2203 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2204 					  &rss->queue_num,
2205 					  "number of queues too large");
2206 	if (rss->types & MLX5_RSS_HF_MASK)
2207 		return rte_flow_error_set(error, ENOTSUP,
2208 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2209 					  &rss->types,
2210 					  "some RSS protocols are not"
2211 					  " supported");
2212 	if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
2213 	    !(rss->types & RTE_ETH_RSS_IP))
2214 		return rte_flow_error_set(error, EINVAL,
2215 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2216 					  "L3 partial RSS requested but L3 RSS"
2217 					  " type not specified");
2218 	if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
2219 	    !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
2220 		return rte_flow_error_set(error, EINVAL,
2221 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2222 					  "L4 partial RSS requested but L4 RSS"
2223 					  " type not specified");
2224 	if (!priv->rxqs_n && priv->ext_rxqs == NULL)
2225 		return rte_flow_error_set(error, EINVAL,
2226 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2227 					  NULL, "No Rx queues configured");
2228 	if (!rss->queue_num)
2229 		return rte_flow_error_set(error, EINVAL,
2230 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2231 					  NULL, "No queues configured");
2232 	ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
2233 				       &message, &queue_idx);
2234 	if (ret != 0) {
2235 		return rte_flow_error_set(error, -ret,
2236 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2237 					  &rss->queue[queue_idx], message);
2238 	}
2239 	return 0;
2240 }
2241 
2242 /*
2243  * Validate the rss action.
2244  *
2245  * @param[in] action
2246  *   Pointer to the queue action.
2247  * @param[in] action_flags
2248  *   Bit-fields that holds the actions detected until now.
2249  * @param[in] dev
2250  *   Pointer to the Ethernet device structure.
2251  * @param[in] attr
2252  *   Attributes of flow that includes this action.
2253  * @param[in] item_flags
2254  *   Items that were detected.
2255  * @param[out] error
2256  *   Pointer to error structure.
2257  *
2258  * @return
2259  *   0 on success, a negative errno value otherwise and rte_errno is set.
2260  */
2261 int
2262 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
2263 			      uint64_t action_flags,
2264 			      struct rte_eth_dev *dev,
2265 			      const struct rte_flow_attr *attr,
2266 			      uint64_t item_flags,
2267 			      struct rte_flow_error *error)
2268 {
2269 	const struct rte_flow_action_rss *rss = action->conf;
2270 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2271 	int ret;
2272 
2273 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2274 		return rte_flow_error_set(error, EINVAL,
2275 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2276 					  "can't have 2 fate actions"
2277 					  " in same flow");
2278 	ret = mlx5_validate_action_rss(dev, action, error);
2279 	if (ret)
2280 		return ret;
2281 	if (attr->egress)
2282 		return rte_flow_error_set(error, ENOTSUP,
2283 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2284 					  "rss action not supported for "
2285 					  "egress");
2286 	if (rss->level > 1 && !tunnel)
2287 		return rte_flow_error_set(error, EINVAL,
2288 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2289 					  "inner RSS is not supported for "
2290 					  "non-tunnel flows");
2291 	if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
2292 	    !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
2293 		return rte_flow_error_set(error, EINVAL,
2294 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
2295 					  "RSS on eCPRI is not supported now");
2296 	}
2297 	if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
2298 	    !(item_flags &
2299 	      (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
2300 	    rss->level > 1)
2301 		return rte_flow_error_set(error, EINVAL,
2302 					  RTE_FLOW_ERROR_TYPE_ITEM, NULL,
2303 					  "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
2304 	return 0;
2305 }
2306 
2307 /*
2308  * Validate the default miss action.
2309  *
2310  * @param[in] action_flags
2311  *   Bit-fields that holds the actions detected until now.
2312  * @param[out] error
2313  *   Pointer to error structure.
2314  *
2315  * @return
2316  *   0 on success, a negative errno value otherwise and rte_errno is set.
2317  */
2318 int
2319 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
2320 				const struct rte_flow_attr *attr,
2321 				struct rte_flow_error *error)
2322 {
2323 	if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2324 		return rte_flow_error_set(error, EINVAL,
2325 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2326 					  "can't have 2 fate actions in"
2327 					  " same flow");
2328 	if (attr->egress)
2329 		return rte_flow_error_set(error, ENOTSUP,
2330 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2331 					  "default miss action not supported "
2332 					  "for egress");
2333 	if (attr->group)
2334 		return rte_flow_error_set(error, ENOTSUP,
2335 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
2336 					  "only group 0 is supported");
2337 	if (attr->transfer)
2338 		return rte_flow_error_set(error, ENOTSUP,
2339 					  RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2340 					  NULL, "transfer is not supported");
2341 	return 0;
2342 }
2343 
2344 /*
2345  * Validate the count action.
2346  *
2347  * @param[in] dev
2348  *   Pointer to the Ethernet device structure.
2349  * @param[in] attr
2350  *   Attributes of flow that includes this action.
2351  * @param[out] error
2352  *   Pointer to error structure.
2353  *
2354  * @return
2355  *   0 on success, a negative errno value otherwise and rte_errno is set.
2356  */
2357 int
2358 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
2359 				const struct rte_flow_attr *attr,
2360 				struct rte_flow_error *error)
2361 {
2362 	if (attr->egress)
2363 		return rte_flow_error_set(error, ENOTSUP,
2364 					  RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2365 					  "count action not supported for "
2366 					  "egress");
2367 	return 0;
2368 }
2369 
2370 /*
2371  * Validate the ASO CT action.
2372  *
2373  * @param[in] dev
2374  *   Pointer to the Ethernet device structure.
2375  * @param[in] conntrack
2376  *   Pointer to the CT action profile.
2377  * @param[out] error
2378  *   Pointer to error structure.
2379  *
2380  * @return
2381  *   0 on success, a negative errno value otherwise and rte_errno is set.
2382  */
2383 int
2384 mlx5_validate_action_ct(struct rte_eth_dev *dev,
2385 			const struct rte_flow_action_conntrack *conntrack,
2386 			struct rte_flow_error *error)
2387 {
2388 	RTE_SET_USED(dev);
2389 
2390 	if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
2391 		return rte_flow_error_set(error, EINVAL,
2392 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2393 					  "Invalid CT state");
2394 	if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
2395 		return rte_flow_error_set(error, EINVAL,
2396 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2397 					  "Invalid last TCP packet flag");
2398 	return 0;
2399 }
2400 
2401 /**
2402  * Validate the level value for modify field action.
2403  *
2404  * @param[in] data
2405  *   Pointer to the rte_flow_action_modify_data structure either src or dst.
2406  * @param[out] error
2407  *   Pointer to error structure.
2408  *
2409  * @return
2410  *   0 on success, a negative errno value otherwise and rte_errno is set.
2411  */
2412 int
2413 flow_validate_modify_field_level(const struct rte_flow_action_modify_data *data,
2414 				 struct rte_flow_error *error)
2415 {
2416 	if (data->level == 0)
2417 		return 0;
2418 	if (data->field != RTE_FLOW_FIELD_TAG &&
2419 	    data->field != (enum rte_flow_field_id)MLX5_RTE_FLOW_FIELD_META_REG)
2420 		return rte_flow_error_set(error, ENOTSUP,
2421 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2422 					  "inner header fields modification is not supported");
2423 	if (data->tag_index != 0)
2424 		return rte_flow_error_set(error, EINVAL,
2425 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2426 					  "tag array can be provided using 'level' or 'tag_index' fields, not both");
2427 	/*
2428 	 * The tag array for RTE_FLOW_FIELD_TAG type is provided using
2429 	 * 'tag_index' field. In old API, it was provided using 'level' field
2430 	 * and it is still supported for backwards compatibility.
2431 	 */
2432 	DRV_LOG(DEBUG, "tag array provided in 'level' field instead of 'tag_index' field.");
2433 	return 0;
2434 }
2435 
2436 /**
2437  * Validate ICMP6 item.
2438  *
2439  * @param[in] item
2440  *   Item specification.
2441  * @param[in] item_flags
2442  *   Bit-fields that holds the items detected until now.
2443  * @param[in] ext_vlan_sup
2444  *   Whether extended VLAN features are supported or not.
2445  * @param[out] error
2446  *   Pointer to error structure.
2447  *
2448  * @return
2449  *   0 on success, a negative errno value otherwise and rte_errno is set.
2450  */
2451 int
2452 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
2453 			       uint64_t item_flags,
2454 			       uint8_t target_protocol,
2455 			       struct rte_flow_error *error)
2456 {
2457 	const struct rte_flow_item_icmp6 *mask = item->mask;
2458 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2459 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2460 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2461 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2462 				      MLX5_FLOW_LAYER_OUTER_L4;
2463 	int ret;
2464 
2465 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2466 		return rte_flow_error_set(error, EINVAL,
2467 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2468 					  "protocol filtering not compatible"
2469 					  " with ICMP6 layer");
2470 	if (!(item_flags & l3m))
2471 		return rte_flow_error_set(error, EINVAL,
2472 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2473 					  "IPv6 is mandatory to filter on"
2474 					  " ICMP6");
2475 	if (item_flags & l4m)
2476 		return rte_flow_error_set(error, EINVAL,
2477 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2478 					  "multiple L4 layers not supported");
2479 	if (!mask)
2480 		mask = &rte_flow_item_icmp6_mask;
2481 	ret = mlx5_flow_item_acceptable
2482 		(item, (const uint8_t *)mask,
2483 		 (const uint8_t *)&rte_flow_item_icmp6_mask,
2484 		 sizeof(struct rte_flow_item_icmp6),
2485 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2486 	if (ret < 0)
2487 		return ret;
2488 	return 0;
2489 }
2490 
2491 /**
2492  * Validate ICMP6 echo request/reply item.
2493  *
2494  * @param[in] item
2495  *   Item specification.
2496  * @param[in] item_flags
2497  *   Bit-fields that holds the items detected until now.
2498  * @param[in] ext_vlan_sup
2499  *   Whether extended VLAN features are supported or not.
2500  * @param[out] error
2501  *   Pointer to error structure.
2502  *
2503  * @return
2504  *   0 on success, a negative errno value otherwise and rte_errno is set.
2505  */
2506 int
2507 mlx5_flow_validate_item_icmp6_echo(const struct rte_flow_item *item,
2508 				   uint64_t item_flags,
2509 				   uint8_t target_protocol,
2510 				   struct rte_flow_error *error)
2511 {
2512 	const struct rte_flow_item_icmp6_echo *mask = item->mask;
2513 	const struct rte_flow_item_icmp6_echo nic_mask = {
2514 		.hdr.base.type = 0xff,
2515 		.hdr.base.code = 0xff,
2516 		.hdr.identifier = RTE_BE16(0xffff),
2517 		.hdr.sequence = RTE_BE16(0xffff),
2518 	};
2519 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2520 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2521 				      MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2522 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2523 				      MLX5_FLOW_LAYER_OUTER_L4;
2524 	int ret;
2525 
2526 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2527 		return rte_flow_error_set(error, EINVAL,
2528 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2529 					  "protocol filtering not compatible"
2530 					  " with ICMP6 layer");
2531 	if (!(item_flags & l3m))
2532 		return rte_flow_error_set(error, EINVAL,
2533 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2534 					  "IPv6 is mandatory to filter on"
2535 					  " ICMP6");
2536 	if (item_flags & l4m)
2537 		return rte_flow_error_set(error, EINVAL,
2538 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2539 					  "multiple L4 layers not supported");
2540 	if (!mask)
2541 		mask = &nic_mask;
2542 	ret = mlx5_flow_item_acceptable
2543 		(item, (const uint8_t *)mask,
2544 		 (const uint8_t *)&nic_mask,
2545 		 sizeof(struct rte_flow_item_icmp6_echo),
2546 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2547 	if (ret < 0)
2548 		return ret;
2549 	return 0;
2550 }
2551 
2552 /**
2553  * Validate ICMP item.
2554  *
2555  * @param[in] item
2556  *   Item specification.
2557  * @param[in] item_flags
2558  *   Bit-fields that holds the items detected until now.
2559  * @param[out] error
2560  *   Pointer to error structure.
2561  *
2562  * @return
2563  *   0 on success, a negative errno value otherwise and rte_errno is set.
2564  */
2565 int
2566 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
2567 			     uint64_t item_flags,
2568 			     uint8_t target_protocol,
2569 			     struct rte_flow_error *error)
2570 {
2571 	const struct rte_flow_item_icmp *mask = item->mask;
2572 	const struct rte_flow_item_icmp nic_mask = {
2573 		.hdr.icmp_type = 0xff,
2574 		.hdr.icmp_code = 0xff,
2575 		.hdr.icmp_ident = RTE_BE16(0xffff),
2576 		.hdr.icmp_seq_nb = RTE_BE16(0xffff),
2577 	};
2578 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2579 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2580 				      MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2581 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2582 				      MLX5_FLOW_LAYER_OUTER_L4;
2583 	int ret;
2584 
2585 	if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2586 		return rte_flow_error_set(error, EINVAL,
2587 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2588 					  "protocol filtering not compatible"
2589 					  " with ICMP layer");
2590 	if (!(item_flags & l3m))
2591 		return rte_flow_error_set(error, EINVAL,
2592 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2593 					  "IPv4 is mandatory to filter"
2594 					  " on ICMP");
2595 	if (item_flags & l4m)
2596 		return rte_flow_error_set(error, EINVAL,
2597 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2598 					  "multiple L4 layers not supported");
2599 	if (!mask)
2600 		mask = &nic_mask;
2601 	ret = mlx5_flow_item_acceptable
2602 		(item, (const uint8_t *)mask,
2603 		 (const uint8_t *)&nic_mask,
2604 		 sizeof(struct rte_flow_item_icmp),
2605 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2606 	if (ret < 0)
2607 		return ret;
2608 	return 0;
2609 }
2610 
2611 /**
2612  * Validate Ethernet item.
2613  *
2614  * @param[in] item
2615  *   Item specification.
2616  * @param[in] item_flags
2617  *   Bit-fields that holds the items detected until now.
2618  * @param[out] error
2619  *   Pointer to error structure.
2620  *
2621  * @return
2622  *   0 on success, a negative errno value otherwise and rte_errno is set.
2623  */
2624 int
2625 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2626 			    uint64_t item_flags, bool ext_vlan_sup,
2627 			    struct rte_flow_error *error)
2628 {
2629 	const struct rte_flow_item_eth *mask = item->mask;
2630 	const struct rte_flow_item_eth nic_mask = {
2631 		.hdr.dst_addr.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2632 		.hdr.src_addr.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2633 		.hdr.ether_type = RTE_BE16(0xffff),
2634 		.has_vlan = ext_vlan_sup ? 1 : 0,
2635 	};
2636 	int ret;
2637 	int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2638 	const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2	:
2639 				       MLX5_FLOW_LAYER_OUTER_L2;
2640 
2641 	if (item_flags & ethm)
2642 		return rte_flow_error_set(error, ENOTSUP,
2643 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2644 					  "multiple L2 layers not supported");
2645 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2646 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2647 		return rte_flow_error_set(error, EINVAL,
2648 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2649 					  "L2 layer should not follow "
2650 					  "L3 layers");
2651 	if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2652 	    (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2653 		return rte_flow_error_set(error, EINVAL,
2654 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2655 					  "L2 layer should not follow VLAN");
2656 	if (item_flags & MLX5_FLOW_LAYER_GTP)
2657 		return rte_flow_error_set(error, EINVAL,
2658 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2659 					  "L2 layer should not follow GTP");
2660 	if (!mask)
2661 		mask = &rte_flow_item_eth_mask;
2662 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2663 					(const uint8_t *)&nic_mask,
2664 					sizeof(struct rte_flow_item_eth),
2665 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2666 	return ret;
2667 }
2668 
2669 /**
2670  * Validate VLAN item.
2671  *
2672  * @param[in] item
2673  *   Item specification.
2674  * @param[in] item_flags
2675  *   Bit-fields that holds the items detected until now.
2676  * @param[in] dev
2677  *   Ethernet device flow is being created on.
2678  * @param[out] error
2679  *   Pointer to error structure.
2680  *
2681  * @return
2682  *   0 on success, a negative errno value otherwise and rte_errno is set.
2683  */
2684 int
2685 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2686 			     uint64_t item_flags,
2687 			     struct rte_eth_dev *dev,
2688 			     struct rte_flow_error *error)
2689 {
2690 	const struct rte_flow_item_vlan *spec = item->spec;
2691 	const struct rte_flow_item_vlan *mask = item->mask;
2692 	const struct rte_flow_item_vlan nic_mask = {
2693 		.hdr.vlan_tci = RTE_BE16(UINT16_MAX),
2694 		.hdr.eth_proto = RTE_BE16(UINT16_MAX),
2695 	};
2696 	uint16_t vlan_tag = 0;
2697 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2698 	int ret;
2699 	const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2700 					MLX5_FLOW_LAYER_INNER_L4) :
2701 				       (MLX5_FLOW_LAYER_OUTER_L3 |
2702 					MLX5_FLOW_LAYER_OUTER_L4);
2703 	const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2704 					MLX5_FLOW_LAYER_OUTER_VLAN;
2705 
2706 	if (item_flags & vlanm)
2707 		return rte_flow_error_set(error, EINVAL,
2708 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2709 					  "multiple VLAN layers not supported");
2710 	else if ((item_flags & l34m) != 0)
2711 		return rte_flow_error_set(error, EINVAL,
2712 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2713 					  "VLAN cannot follow L3/L4 layer");
2714 	if (!mask)
2715 		mask = &rte_flow_item_vlan_mask;
2716 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2717 					(const uint8_t *)&nic_mask,
2718 					sizeof(struct rte_flow_item_vlan),
2719 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2720 	if (ret)
2721 		return ret;
2722 	if (!tunnel && mask->hdr.vlan_tci != RTE_BE16(0x0fff)) {
2723 		struct mlx5_priv *priv = dev->data->dev_private;
2724 
2725 		if (priv->vmwa_context) {
2726 			/*
2727 			 * Non-NULL context means we have a virtual machine
2728 			 * and SR-IOV enabled, we have to create VLAN interface
2729 			 * to make hypervisor to setup E-Switch vport
2730 			 * context correctly. We avoid creating the multiple
2731 			 * VLAN interfaces, so we cannot support VLAN tag mask.
2732 			 */
2733 			return rte_flow_error_set(error, EINVAL,
2734 						  RTE_FLOW_ERROR_TYPE_ITEM,
2735 						  item,
2736 						  "VLAN tag mask is not"
2737 						  " supported in virtual"
2738 						  " environment");
2739 		}
2740 	}
2741 	if (spec) {
2742 		vlan_tag = spec->hdr.vlan_tci;
2743 		vlan_tag &= mask->hdr.vlan_tci;
2744 	}
2745 	/*
2746 	 * From verbs perspective an empty VLAN is equivalent
2747 	 * to a packet without VLAN layer.
2748 	 */
2749 	if (!vlan_tag)
2750 		return rte_flow_error_set(error, EINVAL,
2751 					  RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2752 					  item->spec,
2753 					  "VLAN cannot be empty");
2754 	return 0;
2755 }
2756 
2757 /**
2758  * Validate IPV4 item.
2759  *
2760  * @param[in] item
2761  *   Item specification.
2762  * @param[in] item_flags
2763  *   Bit-fields that holds the items detected until now.
2764  * @param[in] last_item
2765  *   Previous validated item in the pattern items.
2766  * @param[in] ether_type
2767  *   Type in the ethernet layer header (including dot1q).
2768  * @param[in] acc_mask
2769  *   Acceptable mask, if NULL default internal default mask
2770  *   will be used to check whether item fields are supported.
2771  * @param[in] range_accepted
2772  *   True if range of values is accepted for specific fields, false otherwise.
2773  * @param[out] error
2774  *   Pointer to error structure.
2775  *
2776  * @return
2777  *   0 on success, a negative errno value otherwise and rte_errno is set.
2778  */
2779 int
2780 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2781 			     uint64_t item_flags,
2782 			     uint64_t last_item,
2783 			     uint16_t ether_type,
2784 			     const struct rte_flow_item_ipv4 *acc_mask,
2785 			     bool range_accepted,
2786 			     struct rte_flow_error *error)
2787 {
2788 	const struct rte_flow_item_ipv4 *mask = item->mask;
2789 	const struct rte_flow_item_ipv4 *spec = item->spec;
2790 	const struct rte_flow_item_ipv4 nic_mask = {
2791 		.hdr = {
2792 			.src_addr = RTE_BE32(0xffffffff),
2793 			.dst_addr = RTE_BE32(0xffffffff),
2794 			.type_of_service = 0xff,
2795 			.next_proto_id = 0xff,
2796 		},
2797 	};
2798 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2799 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2800 				      MLX5_FLOW_LAYER_OUTER_L3;
2801 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2802 				      MLX5_FLOW_LAYER_OUTER_L4;
2803 	int ret;
2804 	uint8_t next_proto = 0xFF;
2805 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2806 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2807 				  MLX5_FLOW_LAYER_INNER_VLAN);
2808 
2809 	if ((last_item & l2_vlan) && ether_type &&
2810 	    ether_type != RTE_ETHER_TYPE_IPV4)
2811 		return rte_flow_error_set(error, EINVAL,
2812 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2813 					  "IPv4 cannot follow L2/VLAN layer "
2814 					  "which ether type is not IPv4");
2815 	if (item_flags & MLX5_FLOW_LAYER_IPIP) {
2816 		if (mask && spec)
2817 			next_proto = mask->hdr.next_proto_id &
2818 				     spec->hdr.next_proto_id;
2819 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2820 			return rte_flow_error_set(error, EINVAL,
2821 						  RTE_FLOW_ERROR_TYPE_ITEM,
2822 						  item,
2823 						  "multiple tunnel "
2824 						  "not supported");
2825 	}
2826 	if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2827 		return rte_flow_error_set(error, EINVAL,
2828 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2829 					  "wrong tunnel type - IPv6 specified "
2830 					  "but IPv4 item provided");
2831 	if (item_flags & l3m)
2832 		return rte_flow_error_set(error, ENOTSUP,
2833 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2834 					  "multiple L3 layers not supported");
2835 	else if (item_flags & l4m)
2836 		return rte_flow_error_set(error, EINVAL,
2837 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2838 					  "L3 cannot follow an L4 layer.");
2839 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2840 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2841 		return rte_flow_error_set(error, EINVAL,
2842 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2843 					  "L3 cannot follow an NVGRE layer.");
2844 	if (!mask)
2845 		mask = &rte_flow_item_ipv4_mask;
2846 	else if (mask->hdr.next_proto_id != 0 &&
2847 		 mask->hdr.next_proto_id != 0xff)
2848 		return rte_flow_error_set(error, EINVAL,
2849 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2850 					  "partial mask is not supported"
2851 					  " for protocol");
2852 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2853 					acc_mask ? (const uint8_t *)acc_mask
2854 						 : (const uint8_t *)&nic_mask,
2855 					sizeof(struct rte_flow_item_ipv4),
2856 					range_accepted, error);
2857 	if (ret < 0)
2858 		return ret;
2859 	return 0;
2860 }
2861 
2862 /**
2863  * Validate IPV6 item.
2864  *
2865  * @param[in] item
2866  *   Item specification.
2867  * @param[in] item_flags
2868  *   Bit-fields that holds the items detected until now.
2869  * @param[in] last_item
2870  *   Previous validated item in the pattern items.
2871  * @param[in] ether_type
2872  *   Type in the ethernet layer header (including dot1q).
2873  * @param[in] acc_mask
2874  *   Acceptable mask, if NULL default internal default mask
2875  *   will be used to check whether item fields are supported.
2876  * @param[out] error
2877  *   Pointer to error structure.
2878  *
2879  * @return
2880  *   0 on success, a negative errno value otherwise and rte_errno is set.
2881  */
2882 int
2883 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2884 			     uint64_t item_flags,
2885 			     uint64_t last_item,
2886 			     uint16_t ether_type,
2887 			     const struct rte_flow_item_ipv6 *acc_mask,
2888 			     struct rte_flow_error *error)
2889 {
2890 	const struct rte_flow_item_ipv6 *mask = item->mask;
2891 	const struct rte_flow_item_ipv6 *spec = item->spec;
2892 	const struct rte_flow_item_ipv6 nic_mask = {
2893 		.hdr = {
2894 			.src_addr =
2895 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2896 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2897 			.dst_addr =
2898 				"\xff\xff\xff\xff\xff\xff\xff\xff"
2899 				"\xff\xff\xff\xff\xff\xff\xff\xff",
2900 			.vtc_flow = RTE_BE32(0xffffffff),
2901 			.proto = 0xff,
2902 		},
2903 	};
2904 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2905 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2906 				      MLX5_FLOW_LAYER_OUTER_L3;
2907 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2908 				      MLX5_FLOW_LAYER_OUTER_L4;
2909 	int ret;
2910 	uint8_t next_proto = 0xFF;
2911 	const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2912 				  MLX5_FLOW_LAYER_OUTER_VLAN |
2913 				  MLX5_FLOW_LAYER_INNER_VLAN);
2914 
2915 	if ((last_item & l2_vlan) && ether_type &&
2916 	    ether_type != RTE_ETHER_TYPE_IPV6)
2917 		return rte_flow_error_set(error, EINVAL,
2918 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2919 					  "IPv6 cannot follow L2/VLAN layer "
2920 					  "which ether type is not IPv6");
2921 	if (mask && mask->hdr.proto == UINT8_MAX && spec)
2922 		next_proto = spec->hdr.proto;
2923 	if (item_flags & MLX5_FLOW_LAYER_IPIP) {
2924 		if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2925 			return rte_flow_error_set(error, EINVAL,
2926 						  RTE_FLOW_ERROR_TYPE_ITEM,
2927 						  item,
2928 						  "multiple tunnel "
2929 						  "not supported");
2930 	}
2931 	if (next_proto == IPPROTO_HOPOPTS  ||
2932 	    next_proto == IPPROTO_ROUTING  ||
2933 	    next_proto == IPPROTO_FRAGMENT ||
2934 	    next_proto == IPPROTO_ESP	   ||
2935 	    next_proto == IPPROTO_AH	   ||
2936 	    next_proto == IPPROTO_DSTOPTS)
2937 		return rte_flow_error_set(error, EINVAL,
2938 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2939 					  "IPv6 proto (next header) should "
2940 					  "not be set as extension header");
2941 	if (item_flags & MLX5_FLOW_LAYER_IPIP)
2942 		return rte_flow_error_set(error, EINVAL,
2943 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2944 					  "wrong tunnel type - IPv4 specified "
2945 					  "but IPv6 item provided");
2946 	if (item_flags & l3m)
2947 		return rte_flow_error_set(error, ENOTSUP,
2948 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2949 					  "multiple L3 layers not supported");
2950 	else if (item_flags & l4m)
2951 		return rte_flow_error_set(error, EINVAL,
2952 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2953 					  "L3 cannot follow an L4 layer.");
2954 	else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2955 		  !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2956 		return rte_flow_error_set(error, EINVAL,
2957 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
2958 					  "L3 cannot follow an NVGRE layer.");
2959 	if (!mask)
2960 		mask = &rte_flow_item_ipv6_mask;
2961 	ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2962 					acc_mask ? (const uint8_t *)acc_mask
2963 						 : (const uint8_t *)&nic_mask,
2964 					sizeof(struct rte_flow_item_ipv6),
2965 					MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2966 	if (ret < 0)
2967 		return ret;
2968 	return 0;
2969 }
2970 
2971 /**
2972  * Validate UDP item.
2973  *
2974  * @param[in] item
2975  *   Item specification.
2976  * @param[in] item_flags
2977  *   Bit-fields that holds the items detected until now.
2978  * @param[in] target_protocol
2979  *   The next protocol in the previous item.
2980  * @param[in] flow_mask
2981  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2982  * @param[out] error
2983  *   Pointer to error structure.
2984  *
2985  * @return
2986  *   0 on success, a negative errno value otherwise and rte_errno is set.
2987  */
2988 int
2989 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2990 			    uint64_t item_flags,
2991 			    uint8_t target_protocol,
2992 			    struct rte_flow_error *error)
2993 {
2994 	const struct rte_flow_item_udp *mask = item->mask;
2995 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2996 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2997 				      MLX5_FLOW_LAYER_OUTER_L3;
2998 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2999 				      MLX5_FLOW_LAYER_OUTER_L4;
3000 	int ret;
3001 
3002 	if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
3003 		return rte_flow_error_set(error, EINVAL,
3004 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3005 					  "protocol filtering not compatible"
3006 					  " with UDP layer");
3007 	if (!(item_flags & l3m))
3008 		return rte_flow_error_set(error, EINVAL,
3009 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3010 					  "L3 is mandatory to filter on L4");
3011 	if (item_flags & l4m)
3012 		return rte_flow_error_set(error, EINVAL,
3013 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3014 					  "multiple L4 layers not supported");
3015 	if (!mask)
3016 		mask = &rte_flow_item_udp_mask;
3017 	ret = mlx5_flow_item_acceptable
3018 		(item, (const uint8_t *)mask,
3019 		 (const uint8_t *)&rte_flow_item_udp_mask,
3020 		 sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3021 		 error);
3022 	if (ret < 0)
3023 		return ret;
3024 	return 0;
3025 }
3026 
3027 /**
3028  * Validate TCP item.
3029  *
3030  * @param[in] item
3031  *   Item specification.
3032  * @param[in] item_flags
3033  *   Bit-fields that holds the items detected until now.
3034  * @param[in] target_protocol
3035  *   The next protocol in the previous item.
3036  * @param[out] error
3037  *   Pointer to error structure.
3038  *
3039  * @return
3040  *   0 on success, a negative errno value otherwise and rte_errno is set.
3041  */
3042 int
3043 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
3044 			    uint64_t item_flags,
3045 			    uint8_t target_protocol,
3046 			    const struct rte_flow_item_tcp *flow_mask,
3047 			    struct rte_flow_error *error)
3048 {
3049 	const struct rte_flow_item_tcp *mask = item->mask;
3050 	const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
3051 	const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
3052 				      MLX5_FLOW_LAYER_OUTER_L3;
3053 	const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
3054 				      MLX5_FLOW_LAYER_OUTER_L4;
3055 	int ret;
3056 
3057 	MLX5_ASSERT(flow_mask);
3058 	if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
3059 		return rte_flow_error_set(error, EINVAL,
3060 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3061 					  "protocol filtering not compatible"
3062 					  " with TCP layer");
3063 	if (!(item_flags & l3m))
3064 		return rte_flow_error_set(error, EINVAL,
3065 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3066 					  "L3 is mandatory to filter on L4");
3067 	if (item_flags & l4m)
3068 		return rte_flow_error_set(error, EINVAL,
3069 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3070 					  "multiple L4 layers not supported");
3071 	if (!mask)
3072 		mask = &rte_flow_item_tcp_mask;
3073 	ret = mlx5_flow_item_acceptable
3074 		(item, (const uint8_t *)mask,
3075 		 (const uint8_t *)flow_mask,
3076 		 sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3077 		 error);
3078 	if (ret < 0)
3079 		return ret;
3080 	return 0;
3081 }
3082 
3083 /**
3084  * Validate VXLAN item.
3085  *
3086  * @param[in] dev
3087  *   Pointer to the Ethernet device structure.
3088  * @param[in] udp_dport
3089  *   UDP destination port
3090  * @param[in] item
3091  *   Item specification.
3092  * @param[in] item_flags
3093  *   Bit-fields that holds the items detected until now.
3094  * @param root
3095  *   Whether action is on root table.
3096  * @param[out] error
3097  *   Pointer to error structure.
3098  *
3099  * @return
3100  *   0 on success, a negative errno value otherwise and rte_errno is set.
3101  */
3102 int
3103 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
3104 			      uint16_t udp_dport,
3105 			      const struct rte_flow_item *item,
3106 			      uint64_t item_flags,
3107 			      bool root,
3108 			      struct rte_flow_error *error)
3109 {
3110 	const struct rte_flow_item_vxlan *spec = item->spec;
3111 	const struct rte_flow_item_vxlan *mask = item->mask;
3112 	int ret;
3113 	struct mlx5_priv *priv = dev->data->dev_private;
3114 	union vni {
3115 		uint32_t vlan_id;
3116 		uint8_t vni[4];
3117 	} id = { .vlan_id = 0, };
3118 	const struct rte_flow_item_vxlan nic_mask = {
3119 		.hdr.vni = "\xff\xff\xff",
3120 		.hdr.rsvd1 = 0xff,
3121 	};
3122 	const struct rte_flow_item_vxlan *valid_mask;
3123 
3124 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3125 		return rte_flow_error_set(error, ENOTSUP,
3126 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3127 					  "multiple tunnel layers not"
3128 					  " supported");
3129 	valid_mask = &rte_flow_item_vxlan_mask;
3130 	/*
3131 	 * Verify only UDPv4 is present as defined in
3132 	 * https://tools.ietf.org/html/rfc7348
3133 	 */
3134 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3135 		return rte_flow_error_set(error, EINVAL,
3136 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3137 					  "no outer UDP layer found");
3138 	if (!mask)
3139 		mask = &rte_flow_item_vxlan_mask;
3140 
3141 	if (priv->sh->steering_format_version !=
3142 	    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3143 	    !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
3144 		/* non-root table */
3145 		if (!root && priv->sh->misc5_cap)
3146 			valid_mask = &nic_mask;
3147 		/* Group zero in NIC domain */
3148 		if (!root && priv->sh->tunnel_header_0_1)
3149 			valid_mask = &nic_mask;
3150 	}
3151 	ret = mlx5_flow_item_acceptable
3152 		(item, (const uint8_t *)mask,
3153 		 (const uint8_t *)valid_mask,
3154 		 sizeof(struct rte_flow_item_vxlan),
3155 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3156 	if (ret < 0)
3157 		return ret;
3158 	if (spec) {
3159 		memcpy(&id.vni[1], spec->hdr.vni, 3);
3160 		memcpy(&id.vni[1], mask->hdr.vni, 3);
3161 	}
3162 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3163 		return rte_flow_error_set(error, ENOTSUP,
3164 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3165 					  "VXLAN tunnel must be fully defined");
3166 	return 0;
3167 }
3168 
3169 /**
3170  * Validate VXLAN_GPE item.
3171  *
3172  * @param[in] item
3173  *   Item specification.
3174  * @param[in] item_flags
3175  *   Bit-fields that holds the items detected until now.
3176  * @param[in] priv
3177  *   Pointer to the private data structure.
3178  * @param[in] target_protocol
3179  *   The next protocol in the previous item.
3180  * @param[out] error
3181  *   Pointer to error structure.
3182  *
3183  * @return
3184  *   0 on success, a negative errno value otherwise and rte_errno is set.
3185  */
3186 int
3187 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
3188 				  uint64_t item_flags,
3189 				  struct rte_eth_dev *dev,
3190 				  struct rte_flow_error *error)
3191 {
3192 	struct mlx5_priv *priv = dev->data->dev_private;
3193 	const struct rte_flow_item_vxlan_gpe *spec = item->spec;
3194 	const struct rte_flow_item_vxlan_gpe *mask = item->mask;
3195 	int ret;
3196 	union vni {
3197 		uint32_t vlan_id;
3198 		uint8_t vni[4];
3199 	} id = { .vlan_id = 0, };
3200 
3201 	if (!priv->sh->config.l3_vxlan_en)
3202 		return rte_flow_error_set(error, ENOTSUP,
3203 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3204 					  "L3 VXLAN is not enabled by device"
3205 					  " parameter and/or not configured in"
3206 					  " firmware");
3207 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3208 		return rte_flow_error_set(error, ENOTSUP,
3209 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3210 					  "multiple tunnel layers not"
3211 					  " supported");
3212 	/*
3213 	 * Verify only UDPv4 is present as defined in
3214 	 * https://tools.ietf.org/html/rfc7348
3215 	 */
3216 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3217 		return rte_flow_error_set(error, EINVAL,
3218 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3219 					  "no outer UDP layer found");
3220 	if (!mask)
3221 		mask = &rte_flow_item_vxlan_gpe_mask;
3222 	ret = mlx5_flow_item_acceptable
3223 		(item, (const uint8_t *)mask,
3224 		 (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
3225 		 sizeof(struct rte_flow_item_vxlan_gpe),
3226 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3227 	if (ret < 0)
3228 		return ret;
3229 	if (spec) {
3230 		if (spec->hdr.proto)
3231 			return rte_flow_error_set(error, ENOTSUP,
3232 						  RTE_FLOW_ERROR_TYPE_ITEM,
3233 						  item,
3234 						  "VxLAN-GPE protocol"
3235 						  " not supported");
3236 		memcpy(&id.vni[1], spec->hdr.vni, 3);
3237 		memcpy(&id.vni[1], mask->hdr.vni, 3);
3238 	}
3239 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3240 		return rte_flow_error_set(error, ENOTSUP,
3241 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3242 					  "VXLAN-GPE tunnel must be fully"
3243 					  " defined");
3244 	return 0;
3245 }
3246 /**
3247  * Validate GRE Key item.
3248  *
3249  * @param[in] item
3250  *   Item specification.
3251  * @param[in] item_flags
3252  *   Bit flags to mark detected items.
3253  * @param[in] gre_item
3254  *   Pointer to gre_item
3255  * @param[out] error
3256  *   Pointer to error structure.
3257  *
3258  * @return
3259  *   0 on success, a negative errno value otherwise and rte_errno is set.
3260  */
3261 int
3262 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
3263 				uint64_t item_flags,
3264 				const struct rte_flow_item *gre_item,
3265 				struct rte_flow_error *error)
3266 {
3267 	const rte_be32_t *mask = item->mask;
3268 	int ret = 0;
3269 	rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
3270 	const struct rte_flow_item_gre *gre_spec;
3271 	const struct rte_flow_item_gre *gre_mask;
3272 
3273 	if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
3274 		return rte_flow_error_set(error, ENOTSUP,
3275 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3276 					  "Multiple GRE key not support");
3277 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3278 		return rte_flow_error_set(error, ENOTSUP,
3279 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3280 					  "No preceding GRE header");
3281 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3282 		return rte_flow_error_set(error, ENOTSUP,
3283 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3284 					  "GRE key following a wrong item");
3285 	gre_mask = gre_item->mask;
3286 	if (!gre_mask)
3287 		gre_mask = &rte_flow_item_gre_mask;
3288 	gre_spec = gre_item->spec;
3289 	if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3290 			 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3291 		return rte_flow_error_set(error, EINVAL,
3292 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3293 					  "Key bit must be on");
3294 
3295 	if (!mask)
3296 		mask = &gre_key_default_mask;
3297 	ret = mlx5_flow_item_acceptable
3298 		(item, (const uint8_t *)mask,
3299 		 (const uint8_t *)&gre_key_default_mask,
3300 		 sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3301 	return ret;
3302 }
3303 
3304 /**
3305  * Validate GRE optional item.
3306  *
3307  * @param[in] dev
3308  *   Pointer to the Ethernet device structure.
3309  * @param[in] item
3310  *   Item specification.
3311  * @param[in] item_flags
3312  *   Bit flags to mark detected items.
3313  * @param[in] attr
3314  *   Flow rule attributes.
3315  * @param[in] gre_item
3316  *   Pointer to gre_item
3317  * @param[out] error
3318  *   Pointer to error structure.
3319  *
3320  * @return
3321  *   0 on success, a negative errno value otherwise and rte_errno is set.
3322  */
3323 int
3324 mlx5_flow_validate_item_gre_option(struct rte_eth_dev *dev,
3325 				   const struct rte_flow_item *item,
3326 				   uint64_t item_flags,
3327 				   const struct rte_flow_attr *attr,
3328 				   const struct rte_flow_item *gre_item,
3329 				   struct rte_flow_error *error)
3330 {
3331 	const struct rte_flow_item_gre *gre_spec = gre_item->spec;
3332 	const struct rte_flow_item_gre *gre_mask = gre_item->mask;
3333 	const struct rte_flow_item_gre_opt *spec = item->spec;
3334 	const struct rte_flow_item_gre_opt *mask = item->mask;
3335 	struct mlx5_priv *priv = dev->data->dev_private;
3336 	int ret = 0;
3337 	struct rte_flow_item_gre_opt nic_mask = {
3338 		.checksum_rsvd = {
3339 			.checksum = RTE_BE16(UINT16_MAX),
3340 			.reserved1 = 0x0,
3341 		},
3342 		.key = {
3343 			.key = RTE_BE32(UINT32_MAX),
3344 		},
3345 		.sequence = {
3346 			.sequence = RTE_BE32(UINT32_MAX),
3347 		},
3348 	};
3349 
3350 	if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3351 		return rte_flow_error_set(error, ENOTSUP,
3352 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3353 					  "No preceding GRE header");
3354 	if (item_flags & MLX5_FLOW_LAYER_INNER)
3355 		return rte_flow_error_set(error, ENOTSUP,
3356 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3357 					  "GRE option following a wrong item");
3358 	if (!spec || !mask)
3359 		return rte_flow_error_set(error, EINVAL,
3360 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3361 					  "At least one field gre_option(checksum/key/sequence) must be specified");
3362 	if (!gre_mask)
3363 		gre_mask = &rte_flow_item_gre_mask;
3364 	if (mask->checksum_rsvd.checksum)
3365 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x8000)) &&
3366 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x8000)))
3367 			return rte_flow_error_set(error, EINVAL,
3368 						  RTE_FLOW_ERROR_TYPE_ITEM,
3369 						  item,
3370 						  "Checksum bit must be on");
3371 	if (mask->key.key)
3372 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3373 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3374 			return rte_flow_error_set(error, EINVAL,
3375 						  RTE_FLOW_ERROR_TYPE_ITEM,
3376 						  item, "Key bit must be on");
3377 	if (mask->sequence.sequence)
3378 		if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x1000)) &&
3379 				 !(gre_spec->c_rsvd0_ver & RTE_BE16(0x1000)))
3380 			return rte_flow_error_set(error, EINVAL,
3381 						  RTE_FLOW_ERROR_TYPE_ITEM,
3382 						  item,
3383 						  "Sequence bit must be on");
3384 	if (mask->checksum_rsvd.checksum || mask->sequence.sequence) {
3385 		if (priv->sh->steering_format_version ==
3386 		    MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3387 		    ((attr->group || (attr->transfer && priv->fdb_def_rule)) &&
3388 		     !priv->sh->misc5_cap) ||
3389 		    (!(priv->sh->tunnel_header_0_1 &&
3390 		       priv->sh->tunnel_header_2_3) &&
3391 		    !attr->group && (!attr->transfer || !priv->fdb_def_rule)))
3392 			return rte_flow_error_set(error, EINVAL,
3393 						  RTE_FLOW_ERROR_TYPE_ITEM,
3394 						  item,
3395 						  "Checksum/Sequence not supported");
3396 	}
3397 	ret = mlx5_flow_item_acceptable
3398 		(item, (const uint8_t *)mask,
3399 		 (const uint8_t *)&nic_mask,
3400 		 sizeof(struct rte_flow_item_gre_opt),
3401 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3402 	return ret;
3403 }
3404 
3405 /**
3406  * Validate GRE item.
3407  *
3408  * @param[in] item
3409  *   Item specification.
3410  * @param[in] item_flags
3411  *   Bit flags to mark detected items.
3412  * @param[in] target_protocol
3413  *   The next protocol in the previous item.
3414  * @param[out] error
3415  *   Pointer to error structure.
3416  *
3417  * @return
3418  *   0 on success, a negative errno value otherwise and rte_errno is set.
3419  */
3420 int
3421 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3422 			    uint64_t item_flags,
3423 			    uint8_t target_protocol,
3424 			    struct rte_flow_error *error)
3425 {
3426 	const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3427 	const struct rte_flow_item_gre *mask = item->mask;
3428 	int ret;
3429 	const struct rte_flow_item_gre nic_mask = {
3430 		.c_rsvd0_ver = RTE_BE16(0xB000),
3431 		.protocol = RTE_BE16(UINT16_MAX),
3432 	};
3433 
3434 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3435 		return rte_flow_error_set(error, EINVAL,
3436 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3437 					  "protocol filtering not compatible"
3438 					  " with this GRE layer");
3439 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3440 		return rte_flow_error_set(error, ENOTSUP,
3441 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3442 					  "multiple tunnel layers not"
3443 					  " supported");
3444 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3445 		return rte_flow_error_set(error, ENOTSUP,
3446 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3447 					  "L3 Layer is missing");
3448 	if (!mask)
3449 		mask = &rte_flow_item_gre_mask;
3450 	ret = mlx5_flow_item_acceptable
3451 		(item, (const uint8_t *)mask,
3452 		 (const uint8_t *)&nic_mask,
3453 		 sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3454 		 error);
3455 	if (ret < 0)
3456 		return ret;
3457 #ifndef HAVE_MLX5DV_DR
3458 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3459 	if (spec && (spec->protocol & mask->protocol))
3460 		return rte_flow_error_set(error, ENOTSUP,
3461 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3462 					  "without MPLS support the"
3463 					  " specification cannot be used for"
3464 					  " filtering");
3465 #endif
3466 #endif
3467 	return 0;
3468 }
3469 
3470 /**
3471  * Validate Geneve item.
3472  *
3473  * @param[in] item
3474  *   Item specification.
3475  * @param[in] itemFlags
3476  *   Bit-fields that holds the items detected until now.
3477  * @param[in] enPriv
3478  *   Pointer to the private data structure.
3479  * @param[out] error
3480  *   Pointer to error structure.
3481  *
3482  * @return
3483  *   0 on success, a negative errno value otherwise and rte_errno is set.
3484  */
3485 
3486 int
3487 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
3488 			       uint64_t item_flags,
3489 			       struct rte_eth_dev *dev,
3490 			       struct rte_flow_error *error)
3491 {
3492 	struct mlx5_priv *priv = dev->data->dev_private;
3493 	const struct rte_flow_item_geneve *spec = item->spec;
3494 	const struct rte_flow_item_geneve *mask = item->mask;
3495 	int ret;
3496 	uint16_t gbhdr;
3497 	uint8_t opt_len = priv->sh->cdev->config.hca_attr.geneve_max_opt_len ?
3498 			  MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
3499 	const struct rte_flow_item_geneve nic_mask = {
3500 		.ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
3501 		.vni = "\xff\xff\xff",
3502 		.protocol = RTE_BE16(UINT16_MAX),
3503 	};
3504 
3505 	if (!priv->sh->cdev->config.hca_attr.tunnel_stateless_geneve_rx)
3506 		return rte_flow_error_set(error, ENOTSUP,
3507 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3508 					  "L3 Geneve is not enabled by device"
3509 					  " parameter and/or not configured in"
3510 					  " firmware");
3511 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3512 		return rte_flow_error_set(error, ENOTSUP,
3513 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3514 					  "multiple tunnel layers not"
3515 					  " supported");
3516 	/*
3517 	 * Verify only UDPv4 is present as defined in
3518 	 * https://tools.ietf.org/html/rfc7348
3519 	 */
3520 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3521 		return rte_flow_error_set(error, EINVAL,
3522 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3523 					  "no outer UDP layer found");
3524 	if (!mask)
3525 		mask = &rte_flow_item_geneve_mask;
3526 	ret = mlx5_flow_item_acceptable
3527 				  (item, (const uint8_t *)mask,
3528 				   (const uint8_t *)&nic_mask,
3529 				   sizeof(struct rte_flow_item_geneve),
3530 				   MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3531 	if (ret)
3532 		return ret;
3533 	if (spec) {
3534 		gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
3535 		if (MLX5_GENEVE_VER_VAL(gbhdr) ||
3536 		     MLX5_GENEVE_CRITO_VAL(gbhdr) ||
3537 		     MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
3538 			return rte_flow_error_set(error, ENOTSUP,
3539 						  RTE_FLOW_ERROR_TYPE_ITEM,
3540 						  item,
3541 						  "Geneve protocol unsupported"
3542 						  " fields are being used");
3543 		if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
3544 			return rte_flow_error_set
3545 					(error, ENOTSUP,
3546 					 RTE_FLOW_ERROR_TYPE_ITEM,
3547 					 item,
3548 					 "Unsupported Geneve options length");
3549 	}
3550 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3551 		return rte_flow_error_set
3552 				    (error, ENOTSUP,
3553 				     RTE_FLOW_ERROR_TYPE_ITEM, item,
3554 				     "Geneve tunnel must be fully defined");
3555 	return 0;
3556 }
3557 
3558 /**
3559  * Validate Geneve TLV option item.
3560  *
3561  * @param[in] item
3562  *   Item specification.
3563  * @param[in] last_item
3564  *   Previous validated item in the pattern items.
3565  * @param[in] geneve_item
3566  *   Previous GENEVE item specification.
3567  * @param[in] dev
3568  *   Pointer to the rte_eth_dev structure.
3569  * @param[out] error
3570  *   Pointer to error structure.
3571  *
3572  * @return
3573  *   0 on success, a negative errno value otherwise and rte_errno is set.
3574  */
3575 int
3576 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
3577 				   uint64_t last_item,
3578 				   const struct rte_flow_item *geneve_item,
3579 				   struct rte_eth_dev *dev,
3580 				   struct rte_flow_error *error)
3581 {
3582 	struct mlx5_priv *priv = dev->data->dev_private;
3583 	struct mlx5_dev_ctx_shared *sh = priv->sh;
3584 	struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
3585 	struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
3586 	uint8_t data_max_supported =
3587 			hca_attr->max_geneve_tlv_option_data_len * 4;
3588 	const struct rte_flow_item_geneve *geneve_spec;
3589 	const struct rte_flow_item_geneve *geneve_mask;
3590 	const struct rte_flow_item_geneve_opt *spec = item->spec;
3591 	const struct rte_flow_item_geneve_opt *mask = item->mask;
3592 	unsigned int i;
3593 	unsigned int data_len;
3594 	uint8_t tlv_option_len;
3595 	uint16_t optlen_m, optlen_v;
3596 	const struct rte_flow_item_geneve_opt full_mask = {
3597 		.option_class = RTE_BE16(0xffff),
3598 		.option_type = 0xff,
3599 		.option_len = 0x1f,
3600 	};
3601 
3602 	if (!mask)
3603 		mask = &rte_flow_item_geneve_opt_mask;
3604 	if (!spec)
3605 		return rte_flow_error_set
3606 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3607 			"Geneve TLV opt class/type/length must be specified");
3608 	if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
3609 		return rte_flow_error_set
3610 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3611 			"Geneve TLV opt length exceeds the limit (31)");
3612 	/* Check if class type and length masks are full. */
3613 	if (full_mask.option_class != mask->option_class ||
3614 	    full_mask.option_type != mask->option_type ||
3615 	    full_mask.option_len != (mask->option_len & full_mask.option_len))
3616 		return rte_flow_error_set
3617 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3618 			"Geneve TLV opt class/type/length masks must be full");
3619 	/* Check if length is supported */
3620 	if ((uint32_t)spec->option_len >
3621 			hca_attr->max_geneve_tlv_option_data_len)
3622 		return rte_flow_error_set
3623 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3624 			"Geneve TLV opt length not supported");
3625 	if (hca_attr->max_geneve_tlv_options > 1)
3626 		DRV_LOG(DEBUG,
3627 			"max_geneve_tlv_options supports more than 1 option");
3628 	/* Check GENEVE item preceding. */
3629 	if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
3630 		return rte_flow_error_set
3631 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3632 			"Geneve opt item must be preceded with Geneve item");
3633 	geneve_spec = geneve_item->spec;
3634 	geneve_mask = geneve_item->mask ? geneve_item->mask :
3635 					  &rte_flow_item_geneve_mask;
3636 	/* Check if GENEVE TLV option size doesn't exceed option length */
3637 	if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
3638 			    geneve_spec->ver_opt_len_o_c_rsvd0)) {
3639 		tlv_option_len = spec->option_len & mask->option_len;
3640 		optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
3641 		optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
3642 		optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
3643 		optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
3644 		if ((optlen_v & optlen_m) <= tlv_option_len)
3645 			return rte_flow_error_set
3646 				(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3647 				 "GENEVE TLV option length exceeds optlen");
3648 	}
3649 	/* Check if length is 0 or data is 0. */
3650 	if (spec->data == NULL || spec->option_len == 0)
3651 		return rte_flow_error_set
3652 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3653 			"Geneve TLV opt with zero data/length not supported");
3654 	/* Check not all data & mask are 0. */
3655 	data_len = spec->option_len * 4;
3656 	if (mask->data == NULL) {
3657 		for (i = 0; i < data_len; i++)
3658 			if (spec->data[i])
3659 				break;
3660 		if (i == data_len)
3661 			return rte_flow_error_set(error, ENOTSUP,
3662 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3663 				"Can't match on Geneve option data 0");
3664 	} else {
3665 		for (i = 0; i < data_len; i++)
3666 			if (spec->data[i] & mask->data[i])
3667 				break;
3668 		if (i == data_len)
3669 			return rte_flow_error_set(error, ENOTSUP,
3670 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3671 				"Can't match on Geneve option data and mask 0");
3672 		/* Check data mask supported. */
3673 		for (i = data_max_supported; i < data_len ; i++)
3674 			if (mask->data[i])
3675 				return rte_flow_error_set(error, ENOTSUP,
3676 					RTE_FLOW_ERROR_TYPE_ITEM, item,
3677 					"Data mask is of unsupported size");
3678 	}
3679 	/* Check GENEVE option is supported in NIC. */
3680 	if (!hca_attr->geneve_tlv_opt)
3681 		return rte_flow_error_set
3682 			(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3683 			"Geneve TLV opt not supported");
3684 	/* Check if we already have geneve option with different type/class. */
3685 	rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3686 	geneve_opt_resource = sh->geneve_tlv_option_resource;
3687 	if (geneve_opt_resource != NULL)
3688 		if (geneve_opt_resource->option_class != spec->option_class ||
3689 		    geneve_opt_resource->option_type != spec->option_type ||
3690 		    geneve_opt_resource->length != spec->option_len) {
3691 			rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3692 			return rte_flow_error_set(error, ENOTSUP,
3693 				RTE_FLOW_ERROR_TYPE_ITEM, item,
3694 				"Only one Geneve TLV option supported");
3695 		}
3696 	rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3697 	return 0;
3698 }
3699 
3700 /**
3701  * Validate MPLS item.
3702  *
3703  * @param[in] dev
3704  *   Pointer to the rte_eth_dev structure.
3705  * @param[in] item
3706  *   Item specification.
3707  * @param[in] item_flags
3708  *   Bit-fields that holds the items detected until now.
3709  * @param[in] prev_layer
3710  *   The protocol layer indicated in previous item.
3711  * @param[out] error
3712  *   Pointer to error structure.
3713  *
3714  * @return
3715  *   0 on success, a negative errno value otherwise and rte_errno is set.
3716  */
3717 int
3718 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3719 			     const struct rte_flow_item *item __rte_unused,
3720 			     uint64_t item_flags __rte_unused,
3721 			     uint64_t prev_layer __rte_unused,
3722 			     struct rte_flow_error *error)
3723 {
3724 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3725 	const struct rte_flow_item_mpls *mask = item->mask;
3726 	struct mlx5_priv *priv = dev->data->dev_private;
3727 	int ret;
3728 
3729 	if (!priv->sh->dev_cap.mpls_en)
3730 		return rte_flow_error_set(error, ENOTSUP,
3731 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3732 					  "MPLS not supported or"
3733 					  " disabled in firmware"
3734 					  " configuration.");
3735 	/* MPLS over UDP, GRE is allowed */
3736 	if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3737 			    MLX5_FLOW_LAYER_GRE |
3738 			    MLX5_FLOW_LAYER_GRE_KEY)))
3739 		return rte_flow_error_set(error, EINVAL,
3740 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3741 					  "protocol filtering not compatible"
3742 					  " with MPLS layer");
3743 	/* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3744 	if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3745 	    !(item_flags & MLX5_FLOW_LAYER_GRE))
3746 		return rte_flow_error_set(error, ENOTSUP,
3747 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3748 					  "multiple tunnel layers not"
3749 					  " supported");
3750 	if (!mask)
3751 		mask = &rte_flow_item_mpls_mask;
3752 	ret = mlx5_flow_item_acceptable
3753 		(item, (const uint8_t *)mask,
3754 		 (const uint8_t *)&rte_flow_item_mpls_mask,
3755 		 sizeof(struct rte_flow_item_mpls),
3756 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3757 	if (ret < 0)
3758 		return ret;
3759 	return 0;
3760 #else
3761 	return rte_flow_error_set(error, ENOTSUP,
3762 				  RTE_FLOW_ERROR_TYPE_ITEM, item,
3763 				  "MPLS is not supported by Verbs, please"
3764 				  " update.");
3765 #endif
3766 }
3767 
3768 /**
3769  * Validate NVGRE item.
3770  *
3771  * @param[in] item
3772  *   Item specification.
3773  * @param[in] item_flags
3774  *   Bit flags to mark detected items.
3775  * @param[in] target_protocol
3776  *   The next protocol in the previous item.
3777  * @param[out] error
3778  *   Pointer to error structure.
3779  *
3780  * @return
3781  *   0 on success, a negative errno value otherwise and rte_errno is set.
3782  */
3783 int
3784 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3785 			      uint64_t item_flags,
3786 			      uint8_t target_protocol,
3787 			      struct rte_flow_error *error)
3788 {
3789 	const struct rte_flow_item_nvgre *mask = item->mask;
3790 	int ret;
3791 
3792 	if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3793 		return rte_flow_error_set(error, EINVAL,
3794 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3795 					  "protocol filtering not compatible"
3796 					  " with this GRE layer");
3797 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3798 		return rte_flow_error_set(error, ENOTSUP,
3799 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3800 					  "multiple tunnel layers not"
3801 					  " supported");
3802 	if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3803 		return rte_flow_error_set(error, ENOTSUP,
3804 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3805 					  "L3 Layer is missing");
3806 	if (!mask)
3807 		mask = &rte_flow_item_nvgre_mask;
3808 	ret = mlx5_flow_item_acceptable
3809 		(item, (const uint8_t *)mask,
3810 		 (const uint8_t *)&rte_flow_item_nvgre_mask,
3811 		 sizeof(struct rte_flow_item_nvgre),
3812 		 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3813 	if (ret < 0)
3814 		return ret;
3815 	return 0;
3816 }
3817 
3818 /**
3819  * Validate eCPRI item.
3820  *
3821  * @param[in] item
3822  *   Item specification.
3823  * @param[in] item_flags
3824  *   Bit-fields that holds the items detected until now.
3825  * @param[in] last_item
3826  *   Previous validated item in the pattern items.
3827  * @param[in] ether_type
3828  *   Type in the ethernet layer header (including dot1q).
3829  * @param[in] acc_mask
3830  *   Acceptable mask, if NULL default internal default mask
3831  *   will be used to check whether item fields are supported.
3832  * @param[out] error
3833  *   Pointer to error structure.
3834  *
3835  * @return
3836  *   0 on success, a negative errno value otherwise and rte_errno is set.
3837  */
3838 int
3839 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3840 			      uint64_t item_flags,
3841 			      uint64_t last_item,
3842 			      uint16_t ether_type,
3843 			      const struct rte_flow_item_ecpri *acc_mask,
3844 			      struct rte_flow_error *error)
3845 {
3846 	const struct rte_flow_item_ecpri *mask = item->mask;
3847 	const struct rte_flow_item_ecpri nic_mask = {
3848 		.hdr = {
3849 			.common = {
3850 				.u32 =
3851 				RTE_BE32(((const struct rte_ecpri_common_hdr) {
3852 					.type = 0xFF,
3853 					}).u32),
3854 			},
3855 			.dummy[0] = 0xFFFFFFFF,
3856 		},
3857 	};
3858 	const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3859 					MLX5_FLOW_LAYER_OUTER_VLAN);
3860 	struct rte_flow_item_ecpri mask_lo;
3861 
3862 	if (!(last_item & outer_l2_vlan) &&
3863 	    last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3864 		return rte_flow_error_set(error, EINVAL,
3865 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3866 					  "eCPRI can only follow L2/VLAN layer or UDP layer");
3867 	if ((last_item & outer_l2_vlan) && ether_type &&
3868 	    ether_type != RTE_ETHER_TYPE_ECPRI)
3869 		return rte_flow_error_set(error, EINVAL,
3870 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3871 					  "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3872 	if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3873 		return rte_flow_error_set(error, EINVAL,
3874 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3875 					  "eCPRI with tunnel is not supported right now");
3876 	if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3877 		return rte_flow_error_set(error, ENOTSUP,
3878 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3879 					  "multiple L3 layers not supported");
3880 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3881 		return rte_flow_error_set(error, EINVAL,
3882 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3883 					  "eCPRI cannot coexist with a TCP layer");
3884 	/* In specification, eCPRI could be over UDP layer. */
3885 	else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3886 		return rte_flow_error_set(error, EINVAL,
3887 					  RTE_FLOW_ERROR_TYPE_ITEM, item,
3888 					  "eCPRI over UDP layer is not yet supported right now");
3889 	/* Mask for type field in common header could be zero. */
3890 	if (!mask)
3891 		mask = &rte_flow_item_ecpri_mask;
3892 	mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3893 	/* Input mask is in big-endian format. */
3894 	if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3895 		return rte_flow_error_set(error, EINVAL,
3896 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3897 					  "partial mask is not supported for protocol");
3898 	else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3899 		return rte_flow_error_set(error, EINVAL,
3900 					  RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3901 					  "message header mask must be after a type mask");
3902 	return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3903 					 acc_mask ? (const uint8_t *)acc_mask
3904 						  : (const uint8_t *)&nic_mask,
3905 					 sizeof(struct rte_flow_item_ecpri),
3906 					 MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3907 }
3908 
3909 static int
3910 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3911 		   const struct rte_flow_attr *attr __rte_unused,
3912 		   const struct rte_flow_item items[] __rte_unused,
3913 		   const struct rte_flow_action actions[] __rte_unused,
3914 		   bool external __rte_unused,
3915 		   int hairpin __rte_unused,
3916 		   struct rte_flow_error *error)
3917 {
3918 	return rte_flow_error_set(error, ENOTSUP,
3919 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3920 }
3921 
3922 static struct mlx5_flow *
3923 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3924 		  const struct rte_flow_attr *attr __rte_unused,
3925 		  const struct rte_flow_item items[] __rte_unused,
3926 		  const struct rte_flow_action actions[] __rte_unused,
3927 		  struct rte_flow_error *error)
3928 {
3929 	rte_flow_error_set(error, ENOTSUP,
3930 			   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3931 	return NULL;
3932 }
3933 
3934 static int
3935 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3936 		    struct mlx5_flow *dev_flow __rte_unused,
3937 		    const struct rte_flow_attr *attr __rte_unused,
3938 		    const struct rte_flow_item items[] __rte_unused,
3939 		    const struct rte_flow_action actions[] __rte_unused,
3940 		    struct rte_flow_error *error)
3941 {
3942 	return rte_flow_error_set(error, ENOTSUP,
3943 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3944 }
3945 
3946 static int
3947 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3948 		struct rte_flow *flow __rte_unused,
3949 		struct rte_flow_error *error)
3950 {
3951 	return rte_flow_error_set(error, ENOTSUP,
3952 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3953 }
3954 
3955 static void
3956 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3957 		 struct rte_flow *flow __rte_unused)
3958 {
3959 }
3960 
3961 static void
3962 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3963 		  struct rte_flow *flow __rte_unused)
3964 {
3965 }
3966 
3967 static int
3968 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3969 		struct rte_flow *flow __rte_unused,
3970 		const struct rte_flow_action *actions __rte_unused,
3971 		void *data __rte_unused,
3972 		struct rte_flow_error *error)
3973 {
3974 	return rte_flow_error_set(error, ENOTSUP,
3975 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3976 }
3977 
3978 static int
3979 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3980 		      uint32_t domains __rte_unused,
3981 		      uint32_t flags __rte_unused)
3982 {
3983 	return 0;
3984 }
3985 
3986 int
3987 flow_null_get_aged_flows(struct rte_eth_dev *dev,
3988 		    void **context __rte_unused,
3989 		    uint32_t nb_contexts __rte_unused,
3990 		    struct rte_flow_error *error __rte_unused)
3991 {
3992 	DRV_LOG(ERR, "port %u get aged flows is not supported.",
3993 		dev->data->port_id);
3994 	return -ENOTSUP;
3995 }
3996 
3997 uint32_t
3998 flow_null_counter_allocate(struct rte_eth_dev *dev)
3999 {
4000 	DRV_LOG(ERR, "port %u counter allocate is not supported.",
4001 		dev->data->port_id);
4002 	return 0;
4003 }
4004 
4005 void
4006 flow_null_counter_free(struct rte_eth_dev *dev,
4007 			uint32_t counter __rte_unused)
4008 {
4009 	DRV_LOG(ERR, "port %u counter free is not supported.",
4010 		 dev->data->port_id);
4011 }
4012 
4013 int
4014 flow_null_counter_query(struct rte_eth_dev *dev,
4015 			uint32_t counter __rte_unused,
4016 			bool clear __rte_unused,
4017 			uint64_t *pkts __rte_unused,
4018 			uint64_t *bytes __rte_unused,
4019 			void **action __rte_unused)
4020 {
4021 	DRV_LOG(ERR, "port %u counter query is not supported.",
4022 		 dev->data->port_id);
4023 	return -ENOTSUP;
4024 }
4025 
4026 /* Void driver to protect from null pointer reference. */
4027 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
4028 	.validate = flow_null_validate,
4029 	.prepare = flow_null_prepare,
4030 	.translate = flow_null_translate,
4031 	.apply = flow_null_apply,
4032 	.remove = flow_null_remove,
4033 	.destroy = flow_null_destroy,
4034 	.query = flow_null_query,
4035 	.sync_domain = flow_null_sync_domain,
4036 	.get_aged_flows = flow_null_get_aged_flows,
4037 	.counter_alloc = flow_null_counter_allocate,
4038 	.counter_free = flow_null_counter_free,
4039 	.counter_query = flow_null_counter_query
4040 };
4041 
4042 /**
4043  * Select flow driver type according to flow attributes and device
4044  * configuration.
4045  *
4046  * @param[in] dev
4047  *   Pointer to the dev structure.
4048  * @param[in] attr
4049  *   Pointer to the flow attributes.
4050  *
4051  * @return
4052  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
4053  */
4054 static enum mlx5_flow_drv_type
4055 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
4056 {
4057 	struct mlx5_priv *priv = dev->data->dev_private;
4058 	/* The OS can determine first a specific flow type (DV, VERBS) */
4059 	enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
4060 
4061 	if (type != MLX5_FLOW_TYPE_MAX)
4062 		return type;
4063 	/*
4064 	 * Currently when dv_flow_en == 2, only HW steering engine is
4065 	 * supported. New engines can also be chosen here if ready.
4066 	 */
4067 	if (priv->sh->config.dv_flow_en == 2)
4068 		return MLX5_FLOW_TYPE_HW;
4069 	if (!attr)
4070 		return MLX5_FLOW_TYPE_MIN;
4071 	/* If no OS specific type - continue with DV/VERBS selection */
4072 	if (attr->transfer && priv->sh->config.dv_esw_en)
4073 		type = MLX5_FLOW_TYPE_DV;
4074 	if (!attr->transfer)
4075 		type = priv->sh->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
4076 						     MLX5_FLOW_TYPE_VERBS;
4077 	return type;
4078 }
4079 
4080 #define flow_get_drv_ops(type) flow_drv_ops[type]
4081 
4082 /**
4083  * Flow driver validation API. This abstracts calling driver specific functions.
4084  * The type of flow driver is determined according to flow attributes.
4085  *
4086  * @param[in] dev
4087  *   Pointer to the dev structure.
4088  * @param[in] attr
4089  *   Pointer to the flow attributes.
4090  * @param[in] items
4091  *   Pointer to the list of items.
4092  * @param[in] actions
4093  *   Pointer to the list of actions.
4094  * @param[in] external
4095  *   This flow rule is created by request external to PMD.
4096  * @param[in] hairpin
4097  *   Number of hairpin TX actions, 0 means classic flow.
4098  * @param[out] error
4099  *   Pointer to the error structure.
4100  *
4101  * @return
4102  *   0 on success, a negative errno value otherwise and rte_errno is set.
4103  */
4104 static inline int
4105 flow_drv_validate(struct rte_eth_dev *dev,
4106 		  const struct rte_flow_attr *attr,
4107 		  const struct rte_flow_item items[],
4108 		  const struct rte_flow_action actions[],
4109 		  bool external, int hairpin, struct rte_flow_error *error)
4110 {
4111 	const struct mlx5_flow_driver_ops *fops;
4112 	enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
4113 
4114 	fops = flow_get_drv_ops(type);
4115 	return fops->validate(dev, attr, items, actions, external,
4116 			      hairpin, error);
4117 }
4118 
4119 /**
4120  * Flow driver preparation API. This abstracts calling driver specific
4121  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
4122  * calculates the size of memory required for device flow, allocates the memory,
4123  * initializes the device flow and returns the pointer.
4124  *
4125  * @note
4126  *   This function initializes device flow structure such as dv or verbs in
4127  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
4128  *   rest. For example, adding returning device flow to flow->dev_flow list and
4129  *   setting backward reference to the flow should be done out of this function.
4130  *   layers field is not filled either.
4131  *
4132  * @param[in] dev
4133  *   Pointer to the dev structure.
4134  * @param[in] attr
4135  *   Pointer to the flow attributes.
4136  * @param[in] items
4137  *   Pointer to the list of items.
4138  * @param[in] actions
4139  *   Pointer to the list of actions.
4140  * @param[in] flow_idx
4141  *   This memory pool index to the flow.
4142  * @param[out] error
4143  *   Pointer to the error structure.
4144  *
4145  * @return
4146  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
4147  */
4148 static inline struct mlx5_flow *
4149 flow_drv_prepare(struct rte_eth_dev *dev,
4150 		 const struct rte_flow *flow,
4151 		 const struct rte_flow_attr *attr,
4152 		 const struct rte_flow_item items[],
4153 		 const struct rte_flow_action actions[],
4154 		 uint32_t flow_idx,
4155 		 struct rte_flow_error *error)
4156 {
4157 	const struct mlx5_flow_driver_ops *fops;
4158 	enum mlx5_flow_drv_type type = flow->drv_type;
4159 	struct mlx5_flow *mlx5_flow = NULL;
4160 
4161 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4162 	fops = flow_get_drv_ops(type);
4163 	mlx5_flow = fops->prepare(dev, attr, items, actions, error);
4164 	if (mlx5_flow)
4165 		mlx5_flow->flow_idx = flow_idx;
4166 	return mlx5_flow;
4167 }
4168 
4169 /**
4170  * Flow driver translation API. This abstracts calling driver specific
4171  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
4172  * translates a generic flow into a driver flow. flow_drv_prepare() must
4173  * precede.
4174  *
4175  * @note
4176  *   dev_flow->layers could be filled as a result of parsing during translation
4177  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
4178  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
4179  *   flow->actions could be overwritten even though all the expanded dev_flows
4180  *   have the same actions.
4181  *
4182  * @param[in] dev
4183  *   Pointer to the rte dev structure.
4184  * @param[in, out] dev_flow
4185  *   Pointer to the mlx5 flow.
4186  * @param[in] attr
4187  *   Pointer to the flow attributes.
4188  * @param[in] items
4189  *   Pointer to the list of items.
4190  * @param[in] actions
4191  *   Pointer to the list of actions.
4192  * @param[out] error
4193  *   Pointer to the error structure.
4194  *
4195  * @return
4196  *   0 on success, a negative errno value otherwise and rte_errno is set.
4197  */
4198 static inline int
4199 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
4200 		   const struct rte_flow_attr *attr,
4201 		   const struct rte_flow_item items[],
4202 		   const struct rte_flow_action actions[],
4203 		   struct rte_flow_error *error)
4204 {
4205 	const struct mlx5_flow_driver_ops *fops;
4206 	enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
4207 
4208 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4209 	fops = flow_get_drv_ops(type);
4210 	return fops->translate(dev, dev_flow, attr, items, actions, error);
4211 }
4212 
4213 /**
4214  * Flow driver apply API. This abstracts calling driver specific functions.
4215  * Parent flow (rte_flow) should have driver type (drv_type). It applies
4216  * translated driver flows on to device. flow_drv_translate() must precede.
4217  *
4218  * @param[in] dev
4219  *   Pointer to Ethernet device structure.
4220  * @param[in, out] flow
4221  *   Pointer to flow structure.
4222  * @param[out] error
4223  *   Pointer to error structure.
4224  *
4225  * @return
4226  *   0 on success, a negative errno value otherwise and rte_errno is set.
4227  */
4228 static inline int
4229 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
4230 	       struct rte_flow_error *error)
4231 {
4232 	const struct mlx5_flow_driver_ops *fops;
4233 	enum mlx5_flow_drv_type type = flow->drv_type;
4234 
4235 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4236 	fops = flow_get_drv_ops(type);
4237 	return fops->apply(dev, flow, error);
4238 }
4239 
4240 /**
4241  * Flow driver destroy API. This abstracts calling driver specific functions.
4242  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
4243  * on device and releases resources of the flow.
4244  *
4245  * @param[in] dev
4246  *   Pointer to Ethernet device.
4247  * @param[in, out] flow
4248  *   Pointer to flow structure.
4249  */
4250 static inline void
4251 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
4252 {
4253 	const struct mlx5_flow_driver_ops *fops;
4254 	enum mlx5_flow_drv_type type = flow->drv_type;
4255 
4256 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4257 	fops = flow_get_drv_ops(type);
4258 	fops->destroy(dev, flow);
4259 }
4260 
4261 /**
4262  * Flow driver find RSS policy tbl API. This abstracts calling driver
4263  * specific functions. Parent flow (rte_flow) should have driver
4264  * type (drv_type). It will find the RSS policy table that has the rss_desc.
4265  *
4266  * @param[in] dev
4267  *   Pointer to Ethernet device.
4268  * @param[in, out] flow
4269  *   Pointer to flow structure.
4270  * @param[in] policy
4271  *   Pointer to meter policy table.
4272  * @param[in] rss_desc
4273  *   Pointer to rss_desc
4274  */
4275 static struct mlx5_flow_meter_sub_policy *
4276 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
4277 		struct rte_flow *flow,
4278 		struct mlx5_flow_meter_policy *policy,
4279 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
4280 {
4281 	const struct mlx5_flow_driver_ops *fops;
4282 	enum mlx5_flow_drv_type type = flow->drv_type;
4283 
4284 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4285 	fops = flow_get_drv_ops(type);
4286 	return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
4287 }
4288 
4289 /**
4290  * Flow driver color tag rule API. This abstracts calling driver
4291  * specific functions. Parent flow (rte_flow) should have driver
4292  * type (drv_type). It will create the color tag rules in hierarchy meter.
4293  *
4294  * @param[in] dev
4295  *   Pointer to Ethernet device.
4296  * @param[in, out] flow
4297  *   Pointer to flow structure.
4298  * @param[in] fm
4299  *   Pointer to flow meter structure.
4300  * @param[in] src_port
4301  *   The src port this extra rule should use.
4302  * @param[in] item
4303  *   The src port id match item.
4304  * @param[out] error
4305  *   Pointer to error structure.
4306  */
4307 static int
4308 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
4309 		struct rte_flow *flow,
4310 		struct mlx5_flow_meter_info *fm,
4311 		int32_t src_port,
4312 		const struct rte_flow_item *item,
4313 		struct rte_flow_error *error)
4314 {
4315 	const struct mlx5_flow_driver_ops *fops;
4316 	enum mlx5_flow_drv_type type = flow->drv_type;
4317 
4318 	MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
4319 	fops = flow_get_drv_ops(type);
4320 	return fops->meter_hierarchy_rule_create(dev, fm,
4321 						src_port, item, error);
4322 }
4323 
4324 /**
4325  * Get RSS action from the action list.
4326  *
4327  * @param[in] dev
4328  *   Pointer to Ethernet device.
4329  * @param[in] actions
4330  *   Pointer to the list of actions.
4331  * @param[in] flow
4332  *   Parent flow structure pointer.
4333  *
4334  * @return
4335  *   Pointer to the RSS action if exist, else return NULL.
4336  */
4337 static const struct rte_flow_action_rss*
4338 flow_get_rss_action(struct rte_eth_dev *dev,
4339 		    const struct rte_flow_action actions[])
4340 {
4341 	struct mlx5_priv *priv = dev->data->dev_private;
4342 	const struct rte_flow_action_rss *rss = NULL;
4343 	struct mlx5_meter_policy_action_container *acg;
4344 	struct mlx5_meter_policy_action_container *acy;
4345 
4346 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4347 		switch (actions->type) {
4348 		case RTE_FLOW_ACTION_TYPE_RSS:
4349 			rss = actions->conf;
4350 			break;
4351 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
4352 		{
4353 			const struct rte_flow_action_sample *sample =
4354 								actions->conf;
4355 			const struct rte_flow_action *act = sample->actions;
4356 			for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
4357 				if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
4358 					rss = act->conf;
4359 			break;
4360 		}
4361 		case RTE_FLOW_ACTION_TYPE_METER:
4362 		{
4363 			uint32_t mtr_idx;
4364 			struct mlx5_flow_meter_info *fm;
4365 			struct mlx5_flow_meter_policy *policy;
4366 			const struct rte_flow_action_meter *mtr = actions->conf;
4367 
4368 			fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
4369 			if (fm && !fm->def_policy) {
4370 				policy = mlx5_flow_meter_policy_find(dev,
4371 						fm->policy_id, NULL);
4372 				MLX5_ASSERT(policy);
4373 				if (policy->is_hierarchy) {
4374 					policy =
4375 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
4376 									policy);
4377 					if (!policy)
4378 						return NULL;
4379 				}
4380 				if (policy->is_rss) {
4381 					acg =
4382 					&policy->act_cnt[RTE_COLOR_GREEN];
4383 					acy =
4384 					&policy->act_cnt[RTE_COLOR_YELLOW];
4385 					if (acg->fate_action ==
4386 					    MLX5_FLOW_FATE_SHARED_RSS)
4387 						rss = acg->rss->conf;
4388 					else if (acy->fate_action ==
4389 						 MLX5_FLOW_FATE_SHARED_RSS)
4390 						rss = acy->rss->conf;
4391 				}
4392 			}
4393 			break;
4394 		}
4395 		default:
4396 			break;
4397 		}
4398 	}
4399 	return rss;
4400 }
4401 
4402 /**
4403  * Get ASO age action by index.
4404  *
4405  * @param[in] dev
4406  *   Pointer to the Ethernet device structure.
4407  * @param[in] age_idx
4408  *   Index to the ASO age action.
4409  *
4410  * @return
4411  *   The specified ASO age action.
4412  */
4413 struct mlx5_aso_age_action*
4414 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
4415 {
4416 	uint16_t pool_idx = age_idx & UINT16_MAX;
4417 	uint16_t offset = (age_idx >> 16) & UINT16_MAX;
4418 	struct mlx5_priv *priv = dev->data->dev_private;
4419 	struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
4420 	struct mlx5_aso_age_pool *pool;
4421 
4422 	rte_rwlock_read_lock(&mng->resize_rwl);
4423 	pool = mng->pools[pool_idx];
4424 	rte_rwlock_read_unlock(&mng->resize_rwl);
4425 	return &pool->actions[offset - 1];
4426 }
4427 
4428 /* maps indirect action to translated direct in some actions array */
4429 struct mlx5_translated_action_handle {
4430 	struct rte_flow_action_handle *action; /**< Indirect action handle. */
4431 	int index; /**< Index in related array of rte_flow_action. */
4432 };
4433 
4434 /**
4435  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
4436  * direct action if translation possible.
4437  * This functionality used to run same execution path for both direct and
4438  * indirect actions on flow create. All necessary preparations for indirect
4439  * action handling should be performed on *handle* actions list returned
4440  * from this call.
4441  *
4442  * @param[in] dev
4443  *   Pointer to Ethernet device.
4444  * @param[in] actions
4445  *   List of actions to translate.
4446  * @param[out] handle
4447  *   List to store translated indirect action object handles.
4448  * @param[in, out] indir_n
4449  *   Size of *handle* array. On return should be updated with number of
4450  *   indirect actions retrieved from the *actions* list.
4451  * @param[out] translated_actions
4452  *   List of actions where all indirect actions were translated to direct
4453  *   if possible. NULL if no translation took place.
4454  * @param[out] error
4455  *   Pointer to the error structure.
4456  *
4457  * @return
4458  *   0 on success, a negative errno value otherwise and rte_errno is set.
4459  */
4460 static int
4461 flow_action_handles_translate(struct rte_eth_dev *dev,
4462 			      const struct rte_flow_action actions[],
4463 			      struct mlx5_translated_action_handle *handle,
4464 			      int *indir_n,
4465 			      struct rte_flow_action **translated_actions,
4466 			      struct rte_flow_error *error)
4467 {
4468 	struct mlx5_priv *priv = dev->data->dev_private;
4469 	struct rte_flow_action *translated = NULL;
4470 	size_t actions_size;
4471 	int n;
4472 	int copied_n = 0;
4473 	struct mlx5_translated_action_handle *handle_end = NULL;
4474 
4475 	for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
4476 		if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
4477 			continue;
4478 		if (copied_n == *indir_n) {
4479 			return rte_flow_error_set
4480 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
4481 				 NULL, "too many shared actions");
4482 		}
4483 		rte_memcpy(&handle[copied_n].action, &actions[n].conf,
4484 			   sizeof(actions[n].conf));
4485 		handle[copied_n].index = n;
4486 		copied_n++;
4487 	}
4488 	n++;
4489 	*indir_n = copied_n;
4490 	if (!copied_n)
4491 		return 0;
4492 	actions_size = sizeof(struct rte_flow_action) * n;
4493 	translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
4494 	if (!translated) {
4495 		rte_errno = ENOMEM;
4496 		return -ENOMEM;
4497 	}
4498 	memcpy(translated, actions, actions_size);
4499 	for (handle_end = handle + copied_n; handle < handle_end; handle++) {
4500 		struct mlx5_shared_action_rss *shared_rss;
4501 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4502 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4503 		uint32_t idx = act_idx &
4504 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4505 
4506 		switch (type) {
4507 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4508 			shared_rss = mlx5_ipool_get
4509 			  (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
4510 			translated[handle->index].type =
4511 				RTE_FLOW_ACTION_TYPE_RSS;
4512 			translated[handle->index].conf =
4513 				&shared_rss->origin;
4514 			break;
4515 		case MLX5_INDIRECT_ACTION_TYPE_COUNT:
4516 			translated[handle->index].type =
4517 						(enum rte_flow_action_type)
4518 						MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
4519 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4520 			break;
4521 		case MLX5_INDIRECT_ACTION_TYPE_METER_MARK:
4522 			translated[handle->index].type =
4523 						(enum rte_flow_action_type)
4524 						MLX5_RTE_FLOW_ACTION_TYPE_METER_MARK;
4525 			translated[handle->index].conf = (void *)(uintptr_t)idx;
4526 			break;
4527 		case MLX5_INDIRECT_ACTION_TYPE_AGE:
4528 			if (priv->sh->flow_hit_aso_en) {
4529 				translated[handle->index].type =
4530 					(enum rte_flow_action_type)
4531 					MLX5_RTE_FLOW_ACTION_TYPE_AGE;
4532 				translated[handle->index].conf =
4533 							 (void *)(uintptr_t)idx;
4534 				break;
4535 			}
4536 			/* Fall-through */
4537 		case MLX5_INDIRECT_ACTION_TYPE_CT:
4538 			if (priv->sh->ct_aso_en) {
4539 				translated[handle->index].type =
4540 					RTE_FLOW_ACTION_TYPE_CONNTRACK;
4541 				translated[handle->index].conf =
4542 							 (void *)(uintptr_t)idx;
4543 				break;
4544 			}
4545 			/* Fall-through */
4546 		default:
4547 			mlx5_free(translated);
4548 			return rte_flow_error_set
4549 				(error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
4550 				 NULL, "invalid indirect action type");
4551 		}
4552 	}
4553 	*translated_actions = translated;
4554 	return 0;
4555 }
4556 
4557 /**
4558  * Get Shared RSS action from the action list.
4559  *
4560  * @param[in] dev
4561  *   Pointer to Ethernet device.
4562  * @param[in] shared
4563  *   Pointer to the list of actions.
4564  * @param[in] shared_n
4565  *   Actions list length.
4566  *
4567  * @return
4568  *   The MLX5 RSS action ID if exists, otherwise return 0.
4569  */
4570 static uint32_t
4571 flow_get_shared_rss_action(struct rte_eth_dev *dev,
4572 			   struct mlx5_translated_action_handle *handle,
4573 			   int shared_n)
4574 {
4575 	struct mlx5_translated_action_handle *handle_end;
4576 	struct mlx5_priv *priv = dev->data->dev_private;
4577 	struct mlx5_shared_action_rss *shared_rss;
4578 
4579 
4580 	for (handle_end = handle + shared_n; handle < handle_end; handle++) {
4581 		uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4582 		uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4583 		uint32_t idx = act_idx &
4584 			       ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4585 		switch (type) {
4586 		case MLX5_INDIRECT_ACTION_TYPE_RSS:
4587 			shared_rss = mlx5_ipool_get
4588 				(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
4589 									   idx);
4590 			__atomic_fetch_add(&shared_rss->refcnt, 1,
4591 					   __ATOMIC_RELAXED);
4592 			return idx;
4593 		default:
4594 			break;
4595 		}
4596 	}
4597 	return 0;
4598 }
4599 
4600 static unsigned int
4601 find_graph_root(uint32_t rss_level)
4602 {
4603 	return rss_level < 2 ? MLX5_EXPANSION_ROOT :
4604 			       MLX5_EXPANSION_ROOT_OUTER;
4605 }
4606 
4607 /**
4608  *  Get layer flags from the prefix flow.
4609  *
4610  *  Some flows may be split to several subflows, the prefix subflow gets the
4611  *  match items and the suffix sub flow gets the actions.
4612  *  Some actions need the user defined match item flags to get the detail for
4613  *  the action.
4614  *  This function helps the suffix flow to get the item layer flags from prefix
4615  *  subflow.
4616  *
4617  * @param[in] dev_flow
4618  *   Pointer the created prefix subflow.
4619  *
4620  * @return
4621  *   The layers get from prefix subflow.
4622  */
4623 static inline uint64_t
4624 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
4625 {
4626 	uint64_t layers = 0;
4627 
4628 	/*
4629 	 * Layers bits could be localization, but usually the compiler will
4630 	 * help to do the optimization work for source code.
4631 	 * If no decap actions, use the layers directly.
4632 	 */
4633 	if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
4634 		return dev_flow->handle->layers;
4635 	/* Convert L3 layers with decap action. */
4636 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
4637 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
4638 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
4639 		layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
4640 	/* Convert L4 layers with decap action.  */
4641 	if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
4642 		layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
4643 	else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
4644 		layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
4645 	return layers;
4646 }
4647 
4648 /**
4649  * Get metadata split action information.
4650  *
4651  * @param[in] actions
4652  *   Pointer to the list of actions.
4653  * @param[out] qrss
4654  *   Pointer to the return pointer.
4655  * @param[out] qrss_type
4656  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
4657  *   if no QUEUE/RSS is found.
4658  * @param[out] encap_idx
4659  *   Pointer to the index of the encap action if exists, otherwise the last
4660  *   action index.
4661  *
4662  * @return
4663  *   Total number of actions.
4664  */
4665 static int
4666 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
4667 				       const struct rte_flow_action **qrss,
4668 				       int *encap_idx)
4669 {
4670 	const struct rte_flow_action_raw_encap *raw_encap;
4671 	int actions_n = 0;
4672 	int raw_decap_idx = -1;
4673 
4674 	*encap_idx = -1;
4675 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4676 		switch (actions->type) {
4677 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4678 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4679 			*encap_idx = actions_n;
4680 			break;
4681 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4682 			raw_decap_idx = actions_n;
4683 			break;
4684 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4685 			raw_encap = actions->conf;
4686 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4687 				*encap_idx = raw_decap_idx != -1 ?
4688 						      raw_decap_idx : actions_n;
4689 			break;
4690 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4691 		case RTE_FLOW_ACTION_TYPE_RSS:
4692 			*qrss = actions;
4693 			break;
4694 		default:
4695 			break;
4696 		}
4697 		actions_n++;
4698 	}
4699 	if (*encap_idx == -1)
4700 		*encap_idx = actions_n;
4701 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4702 	return actions_n + 1;
4703 }
4704 
4705 /**
4706  * Check if the action will change packet.
4707  *
4708  * @param dev
4709  *   Pointer to Ethernet device.
4710  * @param[in] type
4711  *   action type.
4712  *
4713  * @return
4714  *   true if action will change packet, false otherwise.
4715  */
4716 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
4717 					  enum rte_flow_action_type type)
4718 {
4719 	struct mlx5_priv *priv = dev->data->dev_private;
4720 
4721 	switch (type) {
4722 	case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4723 	case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4724 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4725 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4726 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4727 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4728 	case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4729 	case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4730 	case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4731 	case RTE_FLOW_ACTION_TYPE_SET_TTL:
4732 	case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4733 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4734 	case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4735 	case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4736 	case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4737 	case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4738 	case RTE_FLOW_ACTION_TYPE_SET_META:
4739 	case RTE_FLOW_ACTION_TYPE_SET_TAG:
4740 	case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4741 	case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4742 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4743 	case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4744 	case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4745 	case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4746 	case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4747 	case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4748 	case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4749 	case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4750 	case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4751 		return true;
4752 	case RTE_FLOW_ACTION_TYPE_FLAG:
4753 	case RTE_FLOW_ACTION_TYPE_MARK:
4754 		if (priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
4755 		    priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_META32_HWS)
4756 			return true;
4757 		else
4758 			return false;
4759 	default:
4760 		return false;
4761 	}
4762 }
4763 
4764 /**
4765  * Check meter action from the action list.
4766  *
4767  * @param dev
4768  *   Pointer to Ethernet device.
4769  * @param[in] actions
4770  *   Pointer to the list of actions.
4771  * @param[out] has_mtr
4772  *   Pointer to the meter exist flag.
4773  * @param[out] has_modify
4774  *   Pointer to the flag showing there's packet change action.
4775  * @param[out] meter_id
4776  *   Pointer to the meter id.
4777  *
4778  * @return
4779  *   Total number of actions.
4780  */
4781 static int
4782 flow_check_meter_action(struct rte_eth_dev *dev,
4783 			const struct rte_flow_action actions[],
4784 			bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4785 {
4786 	const struct rte_flow_action_meter *mtr = NULL;
4787 	int actions_n = 0;
4788 
4789 	MLX5_ASSERT(has_mtr);
4790 	*has_mtr = false;
4791 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4792 		switch (actions->type) {
4793 		case RTE_FLOW_ACTION_TYPE_METER:
4794 			mtr = actions->conf;
4795 			*meter_id = mtr->mtr_id;
4796 			*has_mtr = true;
4797 			break;
4798 		default:
4799 			break;
4800 		}
4801 		if (!*has_mtr)
4802 			*has_modify |= flow_check_modify_action_type(dev,
4803 								actions->type);
4804 		actions_n++;
4805 	}
4806 	/* Count RTE_FLOW_ACTION_TYPE_END. */
4807 	return actions_n + 1;
4808 }
4809 
4810 /**
4811  * Check if the flow should be split due to hairpin.
4812  * The reason for the split is that in current HW we can't
4813  * support encap and push-vlan on Rx, so if a flow contains
4814  * these actions we move it to Tx.
4815  *
4816  * @param dev
4817  *   Pointer to Ethernet device.
4818  * @param[in] attr
4819  *   Flow rule attributes.
4820  * @param[in] actions
4821  *   Associated actions (list terminated by the END action).
4822  *
4823  * @return
4824  *   > 0 the number of actions and the flow should be split,
4825  *   0 when no split required.
4826  */
4827 static int
4828 flow_check_hairpin_split(struct rte_eth_dev *dev,
4829 			 const struct rte_flow_attr *attr,
4830 			 const struct rte_flow_action actions[])
4831 {
4832 	int queue_action = 0;
4833 	int action_n = 0;
4834 	int split = 0;
4835 	int push_vlan = 0;
4836 	const struct rte_flow_action_queue *queue;
4837 	const struct rte_flow_action_rss *rss;
4838 	const struct rte_flow_action_raw_encap *raw_encap;
4839 	const struct rte_eth_hairpin_conf *conf;
4840 
4841 	if (!attr->ingress)
4842 		return 0;
4843 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4844 		if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
4845 			push_vlan = 1;
4846 		switch (actions->type) {
4847 		case RTE_FLOW_ACTION_TYPE_QUEUE:
4848 			queue = actions->conf;
4849 			if (queue == NULL)
4850 				return 0;
4851 			conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4852 			if (conf == NULL || conf->tx_explicit != 0)
4853 				return 0;
4854 			queue_action = 1;
4855 			action_n++;
4856 			break;
4857 		case RTE_FLOW_ACTION_TYPE_RSS:
4858 			rss = actions->conf;
4859 			if (rss == NULL || rss->queue_num == 0)
4860 				return 0;
4861 			conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4862 			if (conf == NULL || conf->tx_explicit != 0)
4863 				return 0;
4864 			queue_action = 1;
4865 			action_n++;
4866 			break;
4867 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4868 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4869 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4870 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4871 			split++;
4872 			action_n++;
4873 			break;
4874 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4875 			if (push_vlan)
4876 				split++;
4877 			action_n++;
4878 			break;
4879 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4880 			raw_encap = actions->conf;
4881 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4882 				split++;
4883 			action_n++;
4884 			break;
4885 		default:
4886 			action_n++;
4887 			break;
4888 		}
4889 	}
4890 	if (split && queue_action)
4891 		return action_n;
4892 	return 0;
4893 }
4894 
4895 /* Declare flow create/destroy prototype in advance. */
4896 static uint32_t
4897 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4898 		 const struct rte_flow_attr *attr,
4899 		 const struct rte_flow_item items[],
4900 		 const struct rte_flow_action actions[],
4901 		 bool external, struct rte_flow_error *error);
4902 
4903 static void
4904 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4905 		  uint32_t flow_idx);
4906 
4907 int
4908 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4909 		      struct mlx5_list_entry *entry, void *cb_ctx)
4910 {
4911 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4912 	struct mlx5_flow_mreg_copy_resource *mcp_res =
4913 			       container_of(entry, typeof(*mcp_res), hlist_ent);
4914 
4915 	return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4916 }
4917 
4918 struct mlx5_list_entry *
4919 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4920 {
4921 	struct rte_eth_dev *dev = tool_ctx;
4922 	struct mlx5_priv *priv = dev->data->dev_private;
4923 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4924 	struct mlx5_flow_mreg_copy_resource *mcp_res;
4925 	struct rte_flow_error *error = ctx->error;
4926 	uint32_t idx = 0;
4927 	int ret;
4928 	uint32_t mark_id = *(uint32_t *)(ctx->data);
4929 	struct rte_flow_attr attr = {
4930 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4931 		.ingress = 1,
4932 	};
4933 	struct mlx5_rte_flow_item_tag tag_spec = {
4934 		.data = mark_id,
4935 	};
4936 	struct rte_flow_item items[] = {
4937 		[1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4938 	};
4939 	struct rte_flow_action_mark ftag = {
4940 		.id = mark_id,
4941 	};
4942 	struct mlx5_flow_action_copy_mreg cp_mreg = {
4943 		.dst = REG_B,
4944 		.src = REG_NON,
4945 	};
4946 	struct rte_flow_action_jump jump = {
4947 		.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4948 	};
4949 	struct rte_flow_action actions[] = {
4950 		[3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4951 	};
4952 
4953 	/* Fill the register fields in the flow. */
4954 	ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4955 	if (ret < 0)
4956 		return NULL;
4957 	tag_spec.id = ret;
4958 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4959 	if (ret < 0)
4960 		return NULL;
4961 	cp_mreg.src = ret;
4962 	/* Provide the full width of FLAG specific value. */
4963 	if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4964 		tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4965 	/* Build a new flow. */
4966 	if (mark_id != MLX5_DEFAULT_COPY_ID) {
4967 		items[0] = (struct rte_flow_item){
4968 			.type = (enum rte_flow_item_type)
4969 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4970 			.spec = &tag_spec,
4971 		};
4972 		items[1] = (struct rte_flow_item){
4973 			.type = RTE_FLOW_ITEM_TYPE_END,
4974 		};
4975 		actions[0] = (struct rte_flow_action){
4976 			.type = (enum rte_flow_action_type)
4977 				MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4978 			.conf = &ftag,
4979 		};
4980 		actions[1] = (struct rte_flow_action){
4981 			.type = (enum rte_flow_action_type)
4982 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4983 			.conf = &cp_mreg,
4984 		};
4985 		actions[2] = (struct rte_flow_action){
4986 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
4987 			.conf = &jump,
4988 		};
4989 		actions[3] = (struct rte_flow_action){
4990 			.type = RTE_FLOW_ACTION_TYPE_END,
4991 		};
4992 	} else {
4993 		/* Default rule, wildcard match. */
4994 		attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4995 		items[0] = (struct rte_flow_item){
4996 			.type = RTE_FLOW_ITEM_TYPE_END,
4997 		};
4998 		actions[0] = (struct rte_flow_action){
4999 			.type = (enum rte_flow_action_type)
5000 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5001 			.conf = &cp_mreg,
5002 		};
5003 		actions[1] = (struct rte_flow_action){
5004 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
5005 			.conf = &jump,
5006 		};
5007 		actions[2] = (struct rte_flow_action){
5008 			.type = RTE_FLOW_ACTION_TYPE_END,
5009 		};
5010 	}
5011 	/* Build a new entry. */
5012 	mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
5013 	if (!mcp_res) {
5014 		rte_errno = ENOMEM;
5015 		return NULL;
5016 	}
5017 	mcp_res->idx = idx;
5018 	mcp_res->mark_id = mark_id;
5019 	/*
5020 	 * The copy Flows are not included in any list. There
5021 	 * ones are referenced from other Flows and can not
5022 	 * be applied, removed, deleted in arbitrary order
5023 	 * by list traversing.
5024 	 */
5025 	mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
5026 					&attr, items, actions, false, error);
5027 	if (!mcp_res->rix_flow) {
5028 		mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
5029 		return NULL;
5030 	}
5031 	return &mcp_res->hlist_ent;
5032 }
5033 
5034 struct mlx5_list_entry *
5035 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
5036 		      void *cb_ctx __rte_unused)
5037 {
5038 	struct rte_eth_dev *dev = tool_ctx;
5039 	struct mlx5_priv *priv = dev->data->dev_private;
5040 	struct mlx5_flow_mreg_copy_resource *mcp_res;
5041 	uint32_t idx = 0;
5042 
5043 	mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
5044 	if (!mcp_res) {
5045 		rte_errno = ENOMEM;
5046 		return NULL;
5047 	}
5048 	memcpy(mcp_res, oentry, sizeof(*mcp_res));
5049 	mcp_res->idx = idx;
5050 	return &mcp_res->hlist_ent;
5051 }
5052 
5053 void
5054 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
5055 {
5056 	struct mlx5_flow_mreg_copy_resource *mcp_res =
5057 			       container_of(entry, typeof(*mcp_res), hlist_ent);
5058 	struct rte_eth_dev *dev = tool_ctx;
5059 	struct mlx5_priv *priv = dev->data->dev_private;
5060 
5061 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
5062 }
5063 
5064 /**
5065  * Add a flow of copying flow metadata registers in RX_CP_TBL.
5066  *
5067  * As mark_id is unique, if there's already a registered flow for the mark_id,
5068  * return by increasing the reference counter of the resource. Otherwise, create
5069  * the resource (mcp_res) and flow.
5070  *
5071  * Flow looks like,
5072  *   - If ingress port is ANY and reg_c[1] is mark_id,
5073  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
5074  *
5075  * For default flow (zero mark_id), flow is like,
5076  *   - If ingress port is ANY,
5077  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
5078  *
5079  * @param dev
5080  *   Pointer to Ethernet device.
5081  * @param mark_id
5082  *   ID of MARK action, zero means default flow for META.
5083  * @param[out] error
5084  *   Perform verbose error reporting if not NULL.
5085  *
5086  * @return
5087  *   Associated resource on success, NULL otherwise and rte_errno is set.
5088  */
5089 static struct mlx5_flow_mreg_copy_resource *
5090 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
5091 			  struct rte_flow_error *error)
5092 {
5093 	struct mlx5_priv *priv = dev->data->dev_private;
5094 	struct mlx5_list_entry *entry;
5095 	struct mlx5_flow_cb_ctx ctx = {
5096 		.dev = dev,
5097 		.error = error,
5098 		.data = &mark_id,
5099 	};
5100 
5101 	/* Check if already registered. */
5102 	MLX5_ASSERT(priv->mreg_cp_tbl);
5103 	entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
5104 	if (!entry)
5105 		return NULL;
5106 	return container_of(entry, struct mlx5_flow_mreg_copy_resource,
5107 			    hlist_ent);
5108 }
5109 
5110 void
5111 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
5112 {
5113 	struct mlx5_flow_mreg_copy_resource *mcp_res =
5114 			       container_of(entry, typeof(*mcp_res), hlist_ent);
5115 	struct rte_eth_dev *dev = tool_ctx;
5116 	struct mlx5_priv *priv = dev->data->dev_private;
5117 
5118 	MLX5_ASSERT(mcp_res->rix_flow);
5119 	flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
5120 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
5121 }
5122 
5123 /**
5124  * Release flow in RX_CP_TBL.
5125  *
5126  * @param dev
5127  *   Pointer to Ethernet device.
5128  * @flow
5129  *   Parent flow for wich copying is provided.
5130  */
5131 static void
5132 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
5133 			  struct rte_flow *flow)
5134 {
5135 	struct mlx5_flow_mreg_copy_resource *mcp_res;
5136 	struct mlx5_priv *priv = dev->data->dev_private;
5137 
5138 	if (!flow->rix_mreg_copy)
5139 		return;
5140 	mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
5141 				 flow->rix_mreg_copy);
5142 	if (!mcp_res || !priv->mreg_cp_tbl)
5143 		return;
5144 	MLX5_ASSERT(mcp_res->rix_flow);
5145 	mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
5146 	flow->rix_mreg_copy = 0;
5147 }
5148 
5149 /**
5150  * Remove the default copy action from RX_CP_TBL.
5151  *
5152  * This functions is called in the mlx5_dev_start(). No thread safe
5153  * is guaranteed.
5154  *
5155  * @param dev
5156  *   Pointer to Ethernet device.
5157  */
5158 static void
5159 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
5160 {
5161 	struct mlx5_list_entry *entry;
5162 	struct mlx5_priv *priv = dev->data->dev_private;
5163 	struct mlx5_flow_cb_ctx ctx;
5164 	uint32_t mark_id;
5165 
5166 	/* Check if default flow is registered. */
5167 	if (!priv->mreg_cp_tbl)
5168 		return;
5169 	mark_id = MLX5_DEFAULT_COPY_ID;
5170 	ctx.data = &mark_id;
5171 	entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
5172 	if (!entry)
5173 		return;
5174 	mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
5175 }
5176 
5177 /**
5178  * Add the default copy action in RX_CP_TBL.
5179  *
5180  * This functions is called in the mlx5_dev_start(). No thread safe
5181  * is guaranteed.
5182  *
5183  * @param dev
5184  *   Pointer to Ethernet device.
5185  * @param[out] error
5186  *   Perform verbose error reporting if not NULL.
5187  *
5188  * @return
5189  *   0 for success, negative value otherwise and rte_errno is set.
5190  */
5191 static int
5192 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
5193 				  struct rte_flow_error *error)
5194 {
5195 	struct mlx5_priv *priv = dev->data->dev_private;
5196 	struct mlx5_flow_mreg_copy_resource *mcp_res;
5197 	struct mlx5_flow_cb_ctx ctx;
5198 	uint32_t mark_id;
5199 
5200 	/* Check whether extensive metadata feature is engaged. */
5201 	if (!priv->sh->config.dv_flow_en ||
5202 	    priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
5203 	    !mlx5_flow_ext_mreg_supported(dev) ||
5204 	    !priv->sh->dv_regc0_mask)
5205 		return 0;
5206 	/*
5207 	 * Add default mreg copy flow may be called multiple time, but
5208 	 * only be called once in stop. Avoid register it twice.
5209 	 */
5210 	mark_id = MLX5_DEFAULT_COPY_ID;
5211 	ctx.data = &mark_id;
5212 	if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
5213 		return 0;
5214 	mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
5215 	if (!mcp_res)
5216 		return -rte_errno;
5217 	return 0;
5218 }
5219 
5220 /**
5221  * Add a flow of copying flow metadata registers in RX_CP_TBL.
5222  *
5223  * All the flow having Q/RSS action should be split by
5224  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
5225  * performs the following,
5226  *   - CQE->flow_tag := reg_c[1] (MARK)
5227  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5228  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
5229  * but there should be a flow per each MARK ID set by MARK action.
5230  *
5231  * For the aforementioned reason, if there's a MARK action in flow's action
5232  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
5233  * the MARK ID to CQE's flow_tag like,
5234  *   - If reg_c[1] is mark_id,
5235  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
5236  *
5237  * For SET_META action which stores value in reg_c[0], as the destination is
5238  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
5239  * MARK ID means the default flow. The default flow looks like,
5240  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
5241  *
5242  * @param dev
5243  *   Pointer to Ethernet device.
5244  * @param flow
5245  *   Pointer to flow structure.
5246  * @param[in] actions
5247  *   Pointer to the list of actions.
5248  * @param[out] error
5249  *   Perform verbose error reporting if not NULL.
5250  *
5251  * @return
5252  *   0 on success, negative value otherwise and rte_errno is set.
5253  */
5254 static int
5255 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
5256 			    struct rte_flow *flow,
5257 			    const struct rte_flow_action *actions,
5258 			    struct rte_flow_error *error)
5259 {
5260 	struct mlx5_priv *priv = dev->data->dev_private;
5261 	struct mlx5_sh_config *config = &priv->sh->config;
5262 	struct mlx5_flow_mreg_copy_resource *mcp_res;
5263 	const struct rte_flow_action_mark *mark;
5264 
5265 	/* Check whether extensive metadata feature is engaged. */
5266 	if (!config->dv_flow_en ||
5267 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
5268 	    !mlx5_flow_ext_mreg_supported(dev) ||
5269 	    !priv->sh->dv_regc0_mask)
5270 		return 0;
5271 	/* Find MARK action. */
5272 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5273 		switch (actions->type) {
5274 		case RTE_FLOW_ACTION_TYPE_FLAG:
5275 			mcp_res = flow_mreg_add_copy_action
5276 				(dev, MLX5_FLOW_MARK_DEFAULT, error);
5277 			if (!mcp_res)
5278 				return -rte_errno;
5279 			flow->rix_mreg_copy = mcp_res->idx;
5280 			return 0;
5281 		case RTE_FLOW_ACTION_TYPE_MARK:
5282 			mark = (const struct rte_flow_action_mark *)
5283 				actions->conf;
5284 			mcp_res =
5285 				flow_mreg_add_copy_action(dev, mark->id, error);
5286 			if (!mcp_res)
5287 				return -rte_errno;
5288 			flow->rix_mreg_copy = mcp_res->idx;
5289 			return 0;
5290 		default:
5291 			break;
5292 		}
5293 	}
5294 	return 0;
5295 }
5296 
5297 #define MLX5_MAX_SPLIT_ACTIONS 24
5298 #define MLX5_MAX_SPLIT_ITEMS 24
5299 
5300 /**
5301  * Split the hairpin flow.
5302  * Since HW can't support encap and push-vlan on Rx, we move these
5303  * actions to Tx.
5304  * If the count action is after the encap then we also
5305  * move the count action. in this case the count will also measure
5306  * the outer bytes.
5307  *
5308  * @param dev
5309  *   Pointer to Ethernet device.
5310  * @param[in] actions
5311  *   Associated actions (list terminated by the END action).
5312  * @param[out] actions_rx
5313  *   Rx flow actions.
5314  * @param[out] actions_tx
5315  *   Tx flow actions..
5316  * @param[out] pattern_tx
5317  *   The pattern items for the Tx flow.
5318  * @param[out] flow_id
5319  *   The flow ID connected to this flow.
5320  *
5321  * @return
5322  *   0 on success.
5323  */
5324 static int
5325 flow_hairpin_split(struct rte_eth_dev *dev,
5326 		   const struct rte_flow_action actions[],
5327 		   struct rte_flow_action actions_rx[],
5328 		   struct rte_flow_action actions_tx[],
5329 		   struct rte_flow_item pattern_tx[],
5330 		   uint32_t flow_id)
5331 {
5332 	const struct rte_flow_action_raw_encap *raw_encap;
5333 	const struct rte_flow_action_raw_decap *raw_decap;
5334 	struct mlx5_rte_flow_action_set_tag *set_tag;
5335 	struct rte_flow_action *tag_action;
5336 	struct mlx5_rte_flow_item_tag *tag_item;
5337 	struct rte_flow_item *item;
5338 	char *addr;
5339 	int push_vlan = 0;
5340 	int encap = 0;
5341 
5342 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5343 		if (actions->type == RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN)
5344 			push_vlan = 1;
5345 		switch (actions->type) {
5346 		case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5347 		case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5348 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5349 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5350 			rte_memcpy(actions_tx, actions,
5351 			       sizeof(struct rte_flow_action));
5352 			actions_tx++;
5353 			break;
5354 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5355 			if (push_vlan) {
5356 				rte_memcpy(actions_tx, actions,
5357 					   sizeof(struct rte_flow_action));
5358 				actions_tx++;
5359 			} else {
5360 				rte_memcpy(actions_rx, actions,
5361 					   sizeof(struct rte_flow_action));
5362 				actions_rx++;
5363 			}
5364 			break;
5365 		case RTE_FLOW_ACTION_TYPE_COUNT:
5366 			if (encap) {
5367 				rte_memcpy(actions_tx, actions,
5368 					   sizeof(struct rte_flow_action));
5369 				actions_tx++;
5370 			} else {
5371 				rte_memcpy(actions_rx, actions,
5372 					   sizeof(struct rte_flow_action));
5373 				actions_rx++;
5374 			}
5375 			break;
5376 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5377 			raw_encap = actions->conf;
5378 			if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
5379 				memcpy(actions_tx, actions,
5380 				       sizeof(struct rte_flow_action));
5381 				actions_tx++;
5382 				encap = 1;
5383 			} else {
5384 				rte_memcpy(actions_rx, actions,
5385 					   sizeof(struct rte_flow_action));
5386 				actions_rx++;
5387 			}
5388 			break;
5389 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5390 			raw_decap = actions->conf;
5391 			if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
5392 				memcpy(actions_tx, actions,
5393 				       sizeof(struct rte_flow_action));
5394 				actions_tx++;
5395 			} else {
5396 				rte_memcpy(actions_rx, actions,
5397 					   sizeof(struct rte_flow_action));
5398 				actions_rx++;
5399 			}
5400 			break;
5401 		default:
5402 			rte_memcpy(actions_rx, actions,
5403 				   sizeof(struct rte_flow_action));
5404 			actions_rx++;
5405 			break;
5406 		}
5407 	}
5408 	/* Add set meta action and end action for the Rx flow. */
5409 	tag_action = actions_rx;
5410 	tag_action->type = (enum rte_flow_action_type)
5411 			   MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5412 	actions_rx++;
5413 	rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
5414 	actions_rx++;
5415 	set_tag = (void *)actions_rx;
5416 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5417 		.id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
5418 		.data = flow_id,
5419 	};
5420 	MLX5_ASSERT(set_tag->id > REG_NON);
5421 	tag_action->conf = set_tag;
5422 	/* Create Tx item list. */
5423 	rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
5424 	addr = (void *)&pattern_tx[2];
5425 	item = pattern_tx;
5426 	item->type = (enum rte_flow_item_type)
5427 		     MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5428 	tag_item = (void *)addr;
5429 	tag_item->data = flow_id;
5430 	tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
5431 	MLX5_ASSERT(set_tag->id > REG_NON);
5432 	item->spec = tag_item;
5433 	addr += sizeof(struct mlx5_rte_flow_item_tag);
5434 	tag_item = (void *)addr;
5435 	tag_item->data = UINT32_MAX;
5436 	tag_item->id = UINT16_MAX;
5437 	item->mask = tag_item;
5438 	item->last = NULL;
5439 	item++;
5440 	item->type = RTE_FLOW_ITEM_TYPE_END;
5441 	return 0;
5442 }
5443 
5444 /**
5445  * The last stage of splitting chain, just creates the subflow
5446  * without any modification.
5447  *
5448  * @param[in] dev
5449  *   Pointer to Ethernet device.
5450  * @param[in] flow
5451  *   Parent flow structure pointer.
5452  * @param[in, out] sub_flow
5453  *   Pointer to return the created subflow, may be NULL.
5454  * @param[in] attr
5455  *   Flow rule attributes.
5456  * @param[in] items
5457  *   Pattern specification (list terminated by the END pattern item).
5458  * @param[in] actions
5459  *   Associated actions (list terminated by the END action).
5460  * @param[in] flow_split_info
5461  *   Pointer to flow split info structure.
5462  * @param[out] error
5463  *   Perform verbose error reporting if not NULL.
5464  * @return
5465  *   0 on success, negative value otherwise
5466  */
5467 static int
5468 flow_create_split_inner(struct rte_eth_dev *dev,
5469 			struct rte_flow *flow,
5470 			struct mlx5_flow **sub_flow,
5471 			const struct rte_flow_attr *attr,
5472 			const struct rte_flow_item items[],
5473 			const struct rte_flow_action actions[],
5474 			struct mlx5_flow_split_info *flow_split_info,
5475 			struct rte_flow_error *error)
5476 {
5477 	struct mlx5_flow *dev_flow;
5478 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5479 
5480 	dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
5481 				    flow_split_info->flow_idx, error);
5482 	if (!dev_flow)
5483 		return -rte_errno;
5484 	dev_flow->flow = flow;
5485 	dev_flow->external = flow_split_info->external;
5486 	dev_flow->skip_scale = flow_split_info->skip_scale;
5487 	/* Subflow object was created, we must include one in the list. */
5488 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5489 		      dev_flow->handle, next);
5490 	/*
5491 	 * If dev_flow is as one of the suffix flow, some actions in suffix
5492 	 * flow may need some user defined item layer flags, and pass the
5493 	 * Metadata rxq mark flag to suffix flow as well.
5494 	 */
5495 	if (flow_split_info->prefix_layers)
5496 		dev_flow->handle->layers = flow_split_info->prefix_layers;
5497 	if (flow_split_info->prefix_mark) {
5498 		MLX5_ASSERT(wks);
5499 		wks->mark = 1;
5500 	}
5501 	if (sub_flow)
5502 		*sub_flow = dev_flow;
5503 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5504 	dev_flow->dv.table_id = flow_split_info->table_id;
5505 #endif
5506 	return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
5507 }
5508 
5509 /**
5510  * Get the sub policy of a meter.
5511  *
5512  * @param[in] dev
5513  *   Pointer to Ethernet device.
5514  * @param[in] flow
5515  *   Parent flow structure pointer.
5516  * @param wks
5517  *   Pointer to thread flow work space.
5518  * @param[in] attr
5519  *   Flow rule attributes.
5520  * @param[in] items
5521  *   Pattern specification (list terminated by the END pattern item).
5522  * @param[out] error
5523  *   Perform verbose error reporting if not NULL.
5524  *
5525  * @return
5526  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
5527  */
5528 static struct mlx5_flow_meter_sub_policy *
5529 get_meter_sub_policy(struct rte_eth_dev *dev,
5530 		     struct rte_flow *flow,
5531 		     struct mlx5_flow_workspace *wks,
5532 		     const struct rte_flow_attr *attr,
5533 		     const struct rte_flow_item items[],
5534 		     struct rte_flow_error *error)
5535 {
5536 	struct mlx5_flow_meter_policy *policy;
5537 	struct mlx5_flow_meter_policy *final_policy;
5538 	struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
5539 
5540 	policy = wks->policy;
5541 	final_policy = policy->is_hierarchy ? wks->final_policy : policy;
5542 	if (final_policy->is_rss || final_policy->is_queue) {
5543 		struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
5544 		struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
5545 		uint32_t i;
5546 
5547 		/*
5548 		 * This is a tmp dev_flow,
5549 		 * no need to register any matcher for it in translate.
5550 		 */
5551 		wks->skip_matcher_reg = 1;
5552 		for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
5553 			struct mlx5_flow dev_flow = {0};
5554 			struct mlx5_flow_handle dev_handle = { {0} };
5555 			uint8_t fate = final_policy->act_cnt[i].fate_action;
5556 
5557 			if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
5558 				const struct rte_flow_action_rss *rss_act =
5559 					final_policy->act_cnt[i].rss->conf;
5560 				struct rte_flow_action rss_actions[2] = {
5561 					[0] = {
5562 					.type = RTE_FLOW_ACTION_TYPE_RSS,
5563 					.conf = rss_act,
5564 					},
5565 					[1] = {
5566 					.type = RTE_FLOW_ACTION_TYPE_END,
5567 					.conf = NULL,
5568 					}
5569 				};
5570 
5571 				dev_flow.handle = &dev_handle;
5572 				dev_flow.ingress = attr->ingress;
5573 				dev_flow.flow = flow;
5574 				dev_flow.external = 0;
5575 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5576 				dev_flow.dv.transfer = attr->transfer;
5577 #endif
5578 				/**
5579 				 * Translate RSS action to get rss hash fields.
5580 				 */
5581 				if (flow_drv_translate(dev, &dev_flow, attr,
5582 						items, rss_actions, error))
5583 					goto exit;
5584 				rss_desc_v[i] = wks->rss_desc;
5585 				rss_desc_v[i].symmetric_hash_function =
5586 						dev_flow.symmetric_hash_function;
5587 				rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
5588 				rss_desc_v[i].hash_fields =
5589 						dev_flow.hash_fields;
5590 				rss_desc_v[i].queue_num =
5591 						rss_desc_v[i].hash_fields ?
5592 						rss_desc_v[i].queue_num : 1;
5593 				rss_desc_v[i].tunnel =
5594 						!!(dev_flow.handle->layers &
5595 						   MLX5_FLOW_LAYER_TUNNEL);
5596 				/* Use the RSS queues in the containers. */
5597 				rss_desc_v[i].queue =
5598 					(uint16_t *)(uintptr_t)rss_act->queue;
5599 				rss_desc[i] = &rss_desc_v[i];
5600 			} else if (fate == MLX5_FLOW_FATE_QUEUE) {
5601 				/* This is queue action. */
5602 				rss_desc_v[i] = wks->rss_desc;
5603 				rss_desc_v[i].key_len = 0;
5604 				rss_desc_v[i].hash_fields = 0;
5605 				rss_desc_v[i].queue =
5606 					&final_policy->act_cnt[i].queue;
5607 				rss_desc_v[i].queue_num = 1;
5608 				rss_desc[i] = &rss_desc_v[i];
5609 			} else {
5610 				rss_desc[i] = NULL;
5611 			}
5612 		}
5613 		sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
5614 						flow, policy, rss_desc);
5615 	} else {
5616 		enum mlx5_meter_domain mtr_domain =
5617 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5618 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5619 						MLX5_MTR_DOMAIN_INGRESS);
5620 		sub_policy = policy->sub_policys[mtr_domain][0];
5621 	}
5622 	if (!sub_policy)
5623 		rte_flow_error_set(error, EINVAL,
5624 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5625 				   "Failed to get meter sub-policy.");
5626 exit:
5627 	return sub_policy;
5628 }
5629 
5630 /**
5631  * Split the meter flow.
5632  *
5633  * As meter flow will split to three sub flow, other than meter
5634  * action, the other actions make sense to only meter accepts
5635  * the packet. If it need to be dropped, no other additional
5636  * actions should be take.
5637  *
5638  * One kind of special action which decapsulates the L3 tunnel
5639  * header will be in the prefix sub flow, as not to take the
5640  * L3 tunnel header into account.
5641  *
5642  * @param[in] dev
5643  *   Pointer to Ethernet device.
5644  * @param[in] flow
5645  *   Parent flow structure pointer.
5646  * @param wks
5647  *   Pointer to thread flow work space.
5648  * @param[in] attr
5649  *   Flow rule attributes.
5650  * @param[in] items
5651  *   Pattern specification (list terminated by the END pattern item).
5652  * @param[out] sfx_items
5653  *   Suffix flow match items (list terminated by the END pattern item).
5654  * @param[in] actions
5655  *   Associated actions (list terminated by the END action).
5656  * @param[out] actions_sfx
5657  *   Suffix flow actions.
5658  * @param[out] actions_pre
5659  *   Prefix flow actions.
5660  * @param[out] mtr_flow_id
5661  *   Pointer to meter flow id.
5662  * @param[out] error
5663  *   Perform verbose error reporting if not NULL.
5664  *
5665  * @return
5666  *   0 on success, a negative errno value otherwise and rte_errno is set.
5667  */
5668 static int
5669 flow_meter_split_prep(struct rte_eth_dev *dev,
5670 		      struct rte_flow *flow,
5671 		      struct mlx5_flow_workspace *wks,
5672 		      const struct rte_flow_attr *attr,
5673 		      const struct rte_flow_item items[],
5674 		      struct rte_flow_item sfx_items[],
5675 		      const struct rte_flow_action actions[],
5676 		      struct rte_flow_action actions_sfx[],
5677 		      struct rte_flow_action actions_pre[],
5678 		      uint32_t *mtr_flow_id,
5679 		      struct rte_flow_error *error)
5680 {
5681 	struct mlx5_priv *priv = dev->data->dev_private;
5682 	struct mlx5_flow_meter_info *fm = wks->fm;
5683 	struct rte_flow_action *tag_action = NULL;
5684 	struct rte_flow_item *tag_item;
5685 	struct mlx5_rte_flow_action_set_tag *set_tag;
5686 	const struct rte_flow_action_raw_encap *raw_encap;
5687 	const struct rte_flow_action_raw_decap *raw_decap;
5688 	struct mlx5_rte_flow_item_tag *tag_item_spec;
5689 	struct mlx5_rte_flow_item_tag *tag_item_mask;
5690 	uint32_t tag_id = 0;
5691 	struct rte_flow_item *vlan_item_dst = NULL;
5692 	const struct rte_flow_item *vlan_item_src = NULL;
5693 	const struct rte_flow_item *orig_items = items;
5694 	struct rte_flow_action *hw_mtr_action;
5695 	struct rte_flow_action *action_pre_head = NULL;
5696 	uint16_t flow_src_port = priv->representor_id;
5697 	bool mtr_first;
5698 	uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
5699 	uint8_t mtr_reg_bits = priv->mtr_reg_share ?
5700 				MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
5701 	uint32_t flow_id = 0;
5702 	uint32_t flow_id_reversed = 0;
5703 	uint8_t flow_id_bits = 0;
5704 	bool after_meter = false;
5705 	int shift;
5706 
5707 	/* Prepare the suffix subflow items. */
5708 	tag_item = sfx_items++;
5709 	for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5710 		int item_type = items->type;
5711 
5712 		switch (item_type) {
5713 		case RTE_FLOW_ITEM_TYPE_PORT_ID:
5714 		case RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT:
5715 		case RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR:
5716 			if (mlx5_flow_get_item_vport_id(dev, items, &flow_src_port, NULL, error))
5717 				return -rte_errno;
5718 			if (!fm->def_policy && wks->policy->hierarchy_match_port &&
5719 			    flow_src_port != priv->representor_id) {
5720 				if (flow_drv_mtr_hierarchy_rule_create(dev,
5721 								flow, fm,
5722 								flow_src_port,
5723 								items,
5724 								error))
5725 					return -rte_errno;
5726 			}
5727 			memcpy(sfx_items, items, sizeof(*sfx_items));
5728 			sfx_items++;
5729 			break;
5730 		case RTE_FLOW_ITEM_TYPE_VLAN:
5731 			/* Determine if copy vlan item below. */
5732 			vlan_item_src = items;
5733 			vlan_item_dst = sfx_items++;
5734 			vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
5735 			break;
5736 		default:
5737 			break;
5738 		}
5739 	}
5740 	sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
5741 	sfx_items++;
5742 	mtr_first = priv->sh->meter_aso_en &&
5743 		(attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
5744 	/* For ASO meter, meter must be before tag in TX direction. */
5745 	if (mtr_first) {
5746 		action_pre_head = actions_pre++;
5747 		/* Leave space for tag action. */
5748 		tag_action = actions_pre++;
5749 	}
5750 	/* Prepare the actions for prefix and suffix flow. */
5751 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5752 		struct rte_flow_action *action_cur = NULL;
5753 
5754 		switch (actions->type) {
5755 		case RTE_FLOW_ACTION_TYPE_METER:
5756 			if (mtr_first) {
5757 				action_cur = action_pre_head;
5758 			} else {
5759 				/* Leave space for tag action. */
5760 				tag_action = actions_pre++;
5761 				action_cur = actions_pre++;
5762 			}
5763 			after_meter = true;
5764 			break;
5765 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5766 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5767 			action_cur = actions_pre++;
5768 			break;
5769 		case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5770 			raw_encap = actions->conf;
5771 			if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5772 				action_cur = actions_pre++;
5773 			break;
5774 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5775 			raw_decap = actions->conf;
5776 			if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5777 				action_cur = actions_pre++;
5778 			break;
5779 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5780 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5781 			if (vlan_item_dst && vlan_item_src) {
5782 				memcpy(vlan_item_dst, vlan_item_src,
5783 					sizeof(*vlan_item_dst));
5784 				/*
5785 				 * Convert to internal match item, it is used
5786 				 * for vlan push and set vid.
5787 				 */
5788 				vlan_item_dst->type = (enum rte_flow_item_type)
5789 						MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5790 			}
5791 			break;
5792 		case RTE_FLOW_ACTION_TYPE_COUNT:
5793 			if (fm->def_policy)
5794 				action_cur = after_meter ?
5795 						actions_sfx++ : actions_pre++;
5796 			break;
5797 		default:
5798 			break;
5799 		}
5800 		if (!action_cur)
5801 			action_cur = (fm->def_policy) ?
5802 					actions_sfx++ : actions_pre++;
5803 		memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5804 	}
5805 	/* Add end action to the actions. */
5806 	actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5807 	if (priv->sh->meter_aso_en) {
5808 		/**
5809 		 * For ASO meter, need to add an extra jump action explicitly,
5810 		 * to jump from meter to policer table.
5811 		 */
5812 		struct mlx5_flow_meter_sub_policy *sub_policy;
5813 		struct mlx5_flow_tbl_data_entry *tbl_data;
5814 
5815 		if (!fm->def_policy) {
5816 			sub_policy = get_meter_sub_policy(dev, flow, wks,
5817 							  attr, orig_items,
5818 							  error);
5819 			if (!sub_policy)
5820 				return -rte_errno;
5821 		} else {
5822 			enum mlx5_meter_domain mtr_domain =
5823 			attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5824 				(attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5825 						MLX5_MTR_DOMAIN_INGRESS);
5826 
5827 			sub_policy =
5828 			&priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5829 		}
5830 		tbl_data = container_of(sub_policy->tbl_rsc,
5831 					struct mlx5_flow_tbl_data_entry, tbl);
5832 		hw_mtr_action = actions_pre++;
5833 		hw_mtr_action->type = (enum rte_flow_action_type)
5834 				      MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5835 		hw_mtr_action->conf = tbl_data->jump.action;
5836 	}
5837 	actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5838 	actions_pre++;
5839 	if (!tag_action)
5840 		return rte_flow_error_set(error, ENOMEM,
5841 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5842 					  NULL, "No tag action space.");
5843 	if (!mtr_flow_id) {
5844 		tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5845 		goto exit;
5846 	}
5847 	/* Only default-policy Meter creates mtr flow id. */
5848 	if (fm->def_policy) {
5849 		mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5850 		if (!tag_id)
5851 			return rte_flow_error_set(error, ENOMEM,
5852 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5853 					"Failed to allocate meter flow id.");
5854 		flow_id = tag_id - 1;
5855 		flow_id_bits = (!flow_id) ? 1 :
5856 				(MLX5_REG_BITS - rte_clz32(flow_id));
5857 		if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5858 		    mtr_reg_bits) {
5859 			mlx5_ipool_free(fm->flow_ipool, tag_id);
5860 			return rte_flow_error_set(error, EINVAL,
5861 					RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5862 					"Meter flow id exceeds max limit.");
5863 		}
5864 		if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5865 			priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5866 	}
5867 	/* Build tag actions and items for meter_id/meter flow_id. */
5868 	set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5869 	tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5870 	tag_item_mask = tag_item_spec + 1;
5871 	/* Both flow_id and meter_id share the same register. */
5872 	*set_tag = (struct mlx5_rte_flow_action_set_tag) {
5873 		.id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5874 							    0, error),
5875 		.offset = mtr_id_offset,
5876 		.length = mtr_reg_bits,
5877 		.data = flow->meter,
5878 	};
5879 	/*
5880 	 * The color Reg bits used by flow_id are growing from
5881 	 * msb to lsb, so must do bit reverse for flow_id val in RegC.
5882 	 */
5883 	for (shift = 0; shift < flow_id_bits; shift++)
5884 		flow_id_reversed = (flow_id_reversed << 1) |
5885 				((flow_id >> shift) & 0x1);
5886 	set_tag->data |=
5887 		flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5888 	tag_item_spec->id = set_tag->id;
5889 	tag_item_spec->data = set_tag->data << mtr_id_offset;
5890 	tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5891 	tag_action->type = (enum rte_flow_action_type)
5892 				MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5893 	tag_action->conf = set_tag;
5894 	tag_item->type = (enum rte_flow_item_type)
5895 				MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5896 	tag_item->spec = tag_item_spec;
5897 	tag_item->last = NULL;
5898 	tag_item->mask = tag_item_mask;
5899 exit:
5900 	if (mtr_flow_id)
5901 		*mtr_flow_id = tag_id;
5902 	return 0;
5903 }
5904 
5905 /**
5906  * Split action list having QUEUE/RSS for metadata register copy.
5907  *
5908  * Once Q/RSS action is detected in user's action list, the flow action
5909  * should be split in order to copy metadata registers, which will happen in
5910  * RX_CP_TBL like,
5911  *   - CQE->flow_tag := reg_c[1] (MARK)
5912  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5913  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5914  * This is because the last action of each flow must be a terminal action
5915  * (QUEUE, RSS or DROP).
5916  *
5917  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5918  * stored and kept in the mlx5_flow structure per each sub_flow.
5919  *
5920  * The Q/RSS action is replaced with,
5921  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5922  * And the following JUMP action is added at the end,
5923  *   - JUMP, to RX_CP_TBL.
5924  *
5925  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5926  * flow_create_split_metadata() routine. The flow will look like,
5927  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5928  *
5929  * @param dev
5930  *   Pointer to Ethernet device.
5931  * @param[out] split_actions
5932  *   Pointer to store split actions to jump to CP_TBL.
5933  * @param[in] actions
5934  *   Pointer to the list of original flow actions.
5935  * @param[in] qrss
5936  *   Pointer to the Q/RSS action.
5937  * @param[in] actions_n
5938  *   Number of original actions.
5939  * @param[in] mtr_sfx
5940  *   Check if it is in meter suffix table.
5941  * @param[out] error
5942  *   Perform verbose error reporting if not NULL.
5943  *
5944  * @return
5945  *   non-zero unique flow_id on success, otherwise 0 and
5946  *   error/rte_error are set.
5947  */
5948 static uint32_t
5949 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5950 			  struct rte_flow_action *split_actions,
5951 			  const struct rte_flow_action *actions,
5952 			  const struct rte_flow_action *qrss,
5953 			  int actions_n, int mtr_sfx,
5954 			  struct rte_flow_error *error)
5955 {
5956 	struct mlx5_priv *priv = dev->data->dev_private;
5957 	struct mlx5_rte_flow_action_set_tag *set_tag;
5958 	struct rte_flow_action_jump *jump;
5959 	const int qrss_idx = qrss - actions;
5960 	uint32_t flow_id = 0;
5961 	int ret = 0;
5962 
5963 	/*
5964 	 * Given actions will be split
5965 	 * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5966 	 * - Add jump to mreg CP_TBL.
5967 	 * As a result, there will be one more action.
5968 	 */
5969 	memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5970 	/* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */
5971 	++actions_n;
5972 	set_tag = (void *)(split_actions + actions_n);
5973 	/*
5974 	 * If we are not the meter suffix flow, add the tag action.
5975 	 * Since meter suffix flow already has the tag added.
5976 	 */
5977 	if (!mtr_sfx) {
5978 		/*
5979 		 * Allocate the new subflow ID. This one is unique within
5980 		 * device and not shared with representors. Otherwise,
5981 		 * we would have to resolve multi-thread access synch
5982 		 * issue. Each flow on the shared device is appended
5983 		 * with source vport identifier, so the resulting
5984 		 * flows will be unique in the shared (by master and
5985 		 * representors) domain even if they have coinciding
5986 		 * IDs.
5987 		 */
5988 		mlx5_ipool_malloc(priv->sh->ipool
5989 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5990 		if (!flow_id)
5991 			return rte_flow_error_set(error, ENOMEM,
5992 						  RTE_FLOW_ERROR_TYPE_ACTION,
5993 						  NULL, "can't allocate id "
5994 						  "for split Q/RSS subflow");
5995 		/* Internal SET_TAG action to set flow ID. */
5996 		*set_tag = (struct mlx5_rte_flow_action_set_tag){
5997 			.data = flow_id,
5998 		};
5999 		ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
6000 		if (ret < 0)
6001 			return ret;
6002 		set_tag->id = ret;
6003 		/* Construct new actions array. */
6004 		/* Replace QUEUE/RSS action. */
6005 		split_actions[qrss_idx] = (struct rte_flow_action){
6006 			.type = (enum rte_flow_action_type)
6007 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
6008 			.conf = set_tag,
6009 		};
6010 	} else {
6011 		/*
6012 		 * If we are the suffix flow of meter, tag already exist.
6013 		 * Set the QUEUE/RSS action to void.
6014 		 */
6015 		split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID;
6016 	}
6017 	/* JUMP action to jump to mreg copy table (CP_TBL). */
6018 	jump = (void *)(set_tag + 1);
6019 	*jump = (struct rte_flow_action_jump){
6020 		.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
6021 	};
6022 	split_actions[actions_n - 2] = (struct rte_flow_action){
6023 		.type = RTE_FLOW_ACTION_TYPE_JUMP,
6024 		.conf = jump,
6025 	};
6026 	split_actions[actions_n - 1] = (struct rte_flow_action){
6027 		.type = RTE_FLOW_ACTION_TYPE_END,
6028 	};
6029 	return flow_id;
6030 }
6031 
6032 /**
6033  * Extend the given action list for Tx metadata copy.
6034  *
6035  * Copy the given action list to the ext_actions and add flow metadata register
6036  * copy action in order to copy reg_a set by WQE to reg_c[0].
6037  *
6038  * @param[out] ext_actions
6039  *   Pointer to the extended action list.
6040  * @param[in] actions
6041  *   Pointer to the list of actions.
6042  * @param[in] actions_n
6043  *   Number of actions in the list.
6044  * @param[out] error
6045  *   Perform verbose error reporting if not NULL.
6046  * @param[in] encap_idx
6047  *   The encap action index.
6048  *
6049  * @return
6050  *   0 on success, negative value otherwise
6051  */
6052 static int
6053 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
6054 		       struct rte_flow_action *ext_actions,
6055 		       const struct rte_flow_action *actions,
6056 		       int actions_n, struct rte_flow_error *error,
6057 		       int encap_idx)
6058 {
6059 	struct mlx5_flow_action_copy_mreg *cp_mreg =
6060 		(struct mlx5_flow_action_copy_mreg *)
6061 			(ext_actions + actions_n + 1);
6062 	int ret;
6063 
6064 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
6065 	if (ret < 0)
6066 		return ret;
6067 	cp_mreg->dst = ret;
6068 	ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
6069 	if (ret < 0)
6070 		return ret;
6071 	cp_mreg->src = ret;
6072 	if (encap_idx != 0)
6073 		memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
6074 	if (encap_idx == actions_n - 1) {
6075 		ext_actions[actions_n - 1] = (struct rte_flow_action){
6076 			.type = (enum rte_flow_action_type)
6077 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
6078 			.conf = cp_mreg,
6079 		};
6080 		ext_actions[actions_n] = (struct rte_flow_action){
6081 			.type = RTE_FLOW_ACTION_TYPE_END,
6082 		};
6083 	} else {
6084 		ext_actions[encap_idx] = (struct rte_flow_action){
6085 			.type = (enum rte_flow_action_type)
6086 				MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
6087 			.conf = cp_mreg,
6088 		};
6089 		memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
6090 				sizeof(*ext_actions) * (actions_n - encap_idx));
6091 	}
6092 	return 0;
6093 }
6094 
6095 /**
6096  * Check the match action from the action list.
6097  *
6098  * @param[in] actions
6099  *   Pointer to the list of actions.
6100  * @param[in] attr
6101  *   Flow rule attributes.
6102  * @param[in] action
6103  *   The action to be check if exist.
6104  * @param[out] match_action_pos
6105  *   Pointer to the position of the matched action if exists, otherwise is -1.
6106  * @param[out] qrss_action_pos
6107  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
6108  * @param[out] modify_after_mirror
6109  *   Pointer to the flag of modify action after FDB mirroring.
6110  *
6111  * @return
6112  *   > 0 the total number of actions.
6113  *   0 if not found match action in action list.
6114  */
6115 static int
6116 flow_check_match_action(const struct rte_flow_action actions[],
6117 			const struct rte_flow_attr *attr,
6118 			enum rte_flow_action_type action,
6119 			int *match_action_pos, int *qrss_action_pos,
6120 			int *modify_after_mirror)
6121 {
6122 	const struct rte_flow_action_sample *sample;
6123 	const struct rte_flow_action_raw_decap *decap;
6124 	int actions_n = 0;
6125 	uint32_t ratio = 0;
6126 	int sub_type = 0;
6127 	int flag = 0;
6128 	int fdb_mirror = 0;
6129 
6130 	*match_action_pos = -1;
6131 	*qrss_action_pos = -1;
6132 	for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
6133 		if (actions->type == action) {
6134 			flag = 1;
6135 			*match_action_pos = actions_n;
6136 		}
6137 		switch (actions->type) {
6138 		case RTE_FLOW_ACTION_TYPE_QUEUE:
6139 		case RTE_FLOW_ACTION_TYPE_RSS:
6140 			*qrss_action_pos = actions_n;
6141 			break;
6142 		case RTE_FLOW_ACTION_TYPE_SAMPLE:
6143 			sample = actions->conf;
6144 			ratio = sample->ratio;
6145 			sub_type = ((const struct rte_flow_action *)
6146 					(sample->actions))->type;
6147 			if (ratio == 1 && attr->transfer &&
6148 			    sub_type != RTE_FLOW_ACTION_TYPE_END)
6149 				fdb_mirror = 1;
6150 			break;
6151 		case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
6152 		case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
6153 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
6154 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
6155 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
6156 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
6157 		case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
6158 		case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
6159 		case RTE_FLOW_ACTION_TYPE_DEC_TTL:
6160 		case RTE_FLOW_ACTION_TYPE_SET_TTL:
6161 		case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
6162 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
6163 		case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
6164 		case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
6165 		case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
6166 		case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
6167 		case RTE_FLOW_ACTION_TYPE_FLAG:
6168 		case RTE_FLOW_ACTION_TYPE_MARK:
6169 		case RTE_FLOW_ACTION_TYPE_SET_META:
6170 		case RTE_FLOW_ACTION_TYPE_SET_TAG:
6171 		case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
6172 		case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
6173 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
6174 		case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
6175 		case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
6176 		case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
6177 		case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
6178 		case RTE_FLOW_ACTION_TYPE_METER:
6179 			if (fdb_mirror)
6180 				*modify_after_mirror = 1;
6181 			break;
6182 		case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
6183 			decap = actions->conf;
6184 			while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
6185 				;
6186 			actions_n++;
6187 			if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
6188 				const struct rte_flow_action_raw_encap *encap =
6189 								actions->conf;
6190 				if (decap->size <=
6191 					MLX5_ENCAPSULATION_DECISION_SIZE &&
6192 				    encap->size >
6193 					MLX5_ENCAPSULATION_DECISION_SIZE)
6194 					/* L3 encap. */
6195 					break;
6196 			}
6197 			if (fdb_mirror)
6198 				*modify_after_mirror = 1;
6199 			break;
6200 		default:
6201 			break;
6202 		}
6203 		actions_n++;
6204 	}
6205 	if (flag && fdb_mirror && !*modify_after_mirror) {
6206 		/* FDB mirroring uses the destination array to implement
6207 		 * instead of FLOW_SAMPLER object.
6208 		 */
6209 		if (sub_type != RTE_FLOW_ACTION_TYPE_END)
6210 			flag = 0;
6211 	}
6212 	/* Count RTE_FLOW_ACTION_TYPE_END. */
6213 	return flag ? actions_n + 1 : 0;
6214 }
6215 
6216 #define SAMPLE_SUFFIX_ITEM 3
6217 
6218 /**
6219  * Split the sample flow.
6220  *
6221  * As sample flow will split to two sub flow, sample flow with
6222  * sample action, the other actions will move to new suffix flow.
6223  *
6224  * Also add unique tag id with tag action in the sample flow,
6225  * the same tag id will be as match in the suffix flow.
6226  *
6227  * @param dev
6228  *   Pointer to Ethernet device.
6229  * @param[in] add_tag
6230  *   Add extra tag action flag.
6231  * @param[out] sfx_items
6232  *   Suffix flow match items (list terminated by the END pattern item).
6233  * @param[in] actions
6234  *   Associated actions (list terminated by the END action).
6235  * @param[out] actions_sfx
6236  *   Suffix flow actions.
6237  * @param[out] actions_pre
6238  *   Prefix flow actions.
6239  * @param[in] actions_n
6240  *  The total number of actions.
6241  * @param[in] sample_action_pos
6242  *   The sample action position.
6243  * @param[in] qrss_action_pos
6244  *   The Queue/RSS action position.
6245  * @param[in] jump_table
6246  *   Add extra jump action flag.
6247  * @param[out] error
6248  *   Perform verbose error reporting if not NULL.
6249  *
6250  * @return
6251  *   0 on success, or unique flow_id, a negative errno value
6252  *   otherwise and rte_errno is set.
6253  */
6254 static int
6255 flow_sample_split_prep(struct rte_eth_dev *dev,
6256 		       int add_tag,
6257 		       const struct rte_flow_item items[],
6258 		       struct rte_flow_item sfx_items[],
6259 		       const struct rte_flow_action actions[],
6260 		       struct rte_flow_action actions_sfx[],
6261 		       struct rte_flow_action actions_pre[],
6262 		       int actions_n,
6263 		       int sample_action_pos,
6264 		       int qrss_action_pos,
6265 		       int jump_table,
6266 		       struct rte_flow_error *error)
6267 {
6268 	struct mlx5_priv *priv = dev->data->dev_private;
6269 	struct mlx5_rte_flow_action_set_tag *set_tag;
6270 	struct mlx5_rte_flow_item_tag *tag_spec;
6271 	struct mlx5_rte_flow_item_tag *tag_mask;
6272 	struct rte_flow_action_jump *jump_action;
6273 	uint32_t tag_id = 0;
6274 	int append_index = 0;
6275 	int set_tag_idx = -1;
6276 	int index;
6277 	int ret;
6278 
6279 	if (sample_action_pos < 0)
6280 		return rte_flow_error_set(error, EINVAL,
6281 					  RTE_FLOW_ERROR_TYPE_ACTION,
6282 					  NULL, "invalid position of sample "
6283 					  "action in list");
6284 	/* Prepare the actions for prefix and suffix flow. */
6285 	if (add_tag) {
6286 		/* Update the new added tag action index preceding
6287 		 * the PUSH_VLAN or ENCAP action.
6288 		 */
6289 		const struct rte_flow_action_raw_encap *raw_encap;
6290 		const struct rte_flow_action *action = actions;
6291 		int encap_idx;
6292 		int action_idx = 0;
6293 		int raw_decap_idx = -1;
6294 		int push_vlan_idx = -1;
6295 		for (; action->type != RTE_FLOW_ACTION_TYPE_END; action++) {
6296 			switch (action->type) {
6297 			case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
6298 				raw_decap_idx = action_idx;
6299 				break;
6300 			case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
6301 				raw_encap = action->conf;
6302 				if (raw_encap->size >
6303 					MLX5_ENCAPSULATION_DECISION_SIZE) {
6304 					encap_idx = raw_decap_idx != -1 ?
6305 						    raw_decap_idx : action_idx;
6306 					if (encap_idx < sample_action_pos &&
6307 					    push_vlan_idx == -1)
6308 						set_tag_idx = encap_idx;
6309 				}
6310 				break;
6311 			case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
6312 			case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
6313 				encap_idx = action_idx;
6314 				if (encap_idx < sample_action_pos &&
6315 				    push_vlan_idx == -1)
6316 					set_tag_idx = encap_idx;
6317 				break;
6318 			case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
6319 			case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
6320 				if (action_idx < sample_action_pos &&
6321 				    push_vlan_idx == -1) {
6322 					set_tag_idx = action_idx;
6323 					push_vlan_idx = action_idx;
6324 				}
6325 				break;
6326 			default:
6327 				break;
6328 			}
6329 			action_idx++;
6330 		}
6331 	}
6332 	/* Prepare the actions for prefix and suffix flow. */
6333 	if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
6334 		index = qrss_action_pos;
6335 		/* Put the preceding the Queue/RSS action into prefix flow. */
6336 		if (index != 0)
6337 			memcpy(actions_pre, actions,
6338 			       sizeof(struct rte_flow_action) * index);
6339 		/* Put others preceding the sample action into prefix flow. */
6340 		if (sample_action_pos > index + 1)
6341 			memcpy(actions_pre + index, actions + index + 1,
6342 			       sizeof(struct rte_flow_action) *
6343 			       (sample_action_pos - index - 1));
6344 		index = sample_action_pos - 1;
6345 		/* Put Queue/RSS action into Suffix flow. */
6346 		memcpy(actions_sfx, actions + qrss_action_pos,
6347 		       sizeof(struct rte_flow_action));
6348 		actions_sfx++;
6349 	} else if (add_tag && set_tag_idx >= 0) {
6350 		if (set_tag_idx > 0)
6351 			memcpy(actions_pre, actions,
6352 			       sizeof(struct rte_flow_action) * set_tag_idx);
6353 		memcpy(actions_pre + set_tag_idx + 1, actions + set_tag_idx,
6354 		       sizeof(struct rte_flow_action) *
6355 		       (sample_action_pos - set_tag_idx));
6356 		index = sample_action_pos;
6357 	} else {
6358 		index = sample_action_pos;
6359 		if (index != 0)
6360 			memcpy(actions_pre, actions,
6361 			       sizeof(struct rte_flow_action) * index);
6362 	}
6363 	/* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
6364 	 * For CX6DX and above, metadata registers Cx preserve their value,
6365 	 * add an extra tag action for NIC-RX and E-Switch Domain.
6366 	 */
6367 	if (add_tag) {
6368 		/* Prepare the prefix tag action. */
6369 		append_index++;
6370 		set_tag = (void *)(actions_pre + actions_n + append_index);
6371 		/* Trust VF/SF on CX5 not supported meter so that the reserved
6372 		 * metadata regC is REG_NON, back to use application tag
6373 		 * index 0.
6374 		 */
6375 		if (unlikely(priv->mtr_color_reg == REG_NON))
6376 			ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
6377 		else
6378 			ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error);
6379 		if (ret < 0)
6380 			return ret;
6381 		mlx5_ipool_malloc(priv->sh->ipool
6382 				  [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
6383 		*set_tag = (struct mlx5_rte_flow_action_set_tag) {
6384 			.id = ret,
6385 			.data = tag_id,
6386 		};
6387 		/* Prepare the suffix subflow items. */
6388 		tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
6389 		tag_spec->data = tag_id;
6390 		tag_spec->id = set_tag->id;
6391 		tag_mask = tag_spec + 1;
6392 		tag_mask->data = UINT32_MAX;
6393 		for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
6394 			if (items->type == RTE_FLOW_ITEM_TYPE_PORT_ID ||
6395 			    items->type == RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR ||
6396 			    items->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
6397 				memcpy(sfx_items, items, sizeof(*sfx_items));
6398 				sfx_items++;
6399 				break;
6400 			}
6401 		}
6402 		sfx_items[0] = (struct rte_flow_item){
6403 			.type = (enum rte_flow_item_type)
6404 				MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6405 			.spec = tag_spec,
6406 			.last = NULL,
6407 			.mask = tag_mask,
6408 		};
6409 		sfx_items[1] = (struct rte_flow_item){
6410 			.type = (enum rte_flow_item_type)
6411 				RTE_FLOW_ITEM_TYPE_END,
6412 		};
6413 		/* Prepare the tag action in prefix subflow. */
6414 		set_tag_idx = (set_tag_idx == -1) ? index : set_tag_idx;
6415 		actions_pre[set_tag_idx] =
6416 			(struct rte_flow_action){
6417 			.type = (enum rte_flow_action_type)
6418 				MLX5_RTE_FLOW_ACTION_TYPE_TAG,
6419 			.conf = set_tag,
6420 		};
6421 		/* Update next sample position due to add one tag action */
6422 		index += 1;
6423 	}
6424 	/* Copy the sample action into prefix flow. */
6425 	memcpy(actions_pre + index, actions + sample_action_pos,
6426 	       sizeof(struct rte_flow_action));
6427 	index += 1;
6428 	/* For the modify action after the sample action in E-Switch mirroring,
6429 	 * Add the extra jump action in prefix subflow and jump into the next
6430 	 * table, then do the modify action in the new table.
6431 	 */
6432 	if (jump_table) {
6433 		/* Prepare the prefix jump action. */
6434 		append_index++;
6435 		jump_action = (void *)(actions_pre + actions_n + append_index);
6436 		jump_action->group = jump_table;
6437 		actions_pre[index++] =
6438 			(struct rte_flow_action){
6439 			.type = (enum rte_flow_action_type)
6440 				RTE_FLOW_ACTION_TYPE_JUMP,
6441 			.conf = jump_action,
6442 		};
6443 	}
6444 	actions_pre[index] = (struct rte_flow_action){
6445 		.type = (enum rte_flow_action_type)
6446 			RTE_FLOW_ACTION_TYPE_END,
6447 	};
6448 	/* Put the actions after sample into Suffix flow. */
6449 	memcpy(actions_sfx, actions + sample_action_pos + 1,
6450 	       sizeof(struct rte_flow_action) *
6451 	       (actions_n - sample_action_pos - 1));
6452 	return tag_id;
6453 }
6454 
6455 /**
6456  * The splitting for metadata feature.
6457  *
6458  * - Q/RSS action on NIC Rx should be split in order to pass by
6459  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
6460  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
6461  *
6462  * - All the actions on NIC Tx should have a mreg copy action to
6463  *   copy reg_a from WQE to reg_c[0].
6464  *
6465  * @param dev
6466  *   Pointer to Ethernet device.
6467  * @param[in] flow
6468  *   Parent flow structure pointer.
6469  * @param[in] attr
6470  *   Flow rule attributes.
6471  * @param[in] items
6472  *   Pattern specification (list terminated by the END pattern item).
6473  * @param[in] actions
6474  *   Associated actions (list terminated by the END action).
6475  * @param[in] flow_split_info
6476  *   Pointer to flow split info structure.
6477  * @param[out] error
6478  *   Perform verbose error reporting if not NULL.
6479  * @return
6480  *   0 on success, negative value otherwise
6481  */
6482 static int
6483 flow_create_split_metadata(struct rte_eth_dev *dev,
6484 			   struct rte_flow *flow,
6485 			   const struct rte_flow_attr *attr,
6486 			   const struct rte_flow_item items[],
6487 			   const struct rte_flow_action actions[],
6488 			   struct mlx5_flow_split_info *flow_split_info,
6489 			   struct rte_flow_error *error)
6490 {
6491 	struct mlx5_priv *priv = dev->data->dev_private;
6492 	struct mlx5_sh_config *config = &priv->sh->config;
6493 	const struct rte_flow_action *qrss = NULL;
6494 	struct rte_flow_action *ext_actions = NULL;
6495 	struct mlx5_flow *dev_flow = NULL;
6496 	uint32_t qrss_id = 0;
6497 	int mtr_sfx = 0;
6498 	size_t act_size;
6499 	int actions_n;
6500 	int encap_idx;
6501 	int ret;
6502 
6503 	/* Check whether extensive metadata feature is engaged. */
6504 	if (!config->dv_flow_en ||
6505 	    config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
6506 	    !mlx5_flow_ext_mreg_supported(dev))
6507 		return flow_create_split_inner(dev, flow, NULL, attr, items,
6508 					       actions, flow_split_info, error);
6509 	actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
6510 							   &encap_idx);
6511 	if (qrss) {
6512 		/* Exclude hairpin flows from splitting. */
6513 		if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
6514 			const struct rte_flow_action_queue *queue;
6515 
6516 			queue = qrss->conf;
6517 			if (mlx5_rxq_is_hairpin(dev, queue->index))
6518 				qrss = NULL;
6519 		} else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
6520 			const struct rte_flow_action_rss *rss;
6521 
6522 			rss = qrss->conf;
6523 			if (mlx5_rxq_is_hairpin(dev, rss->queue[0]))
6524 				qrss = NULL;
6525 		}
6526 	}
6527 	if (qrss) {
6528 		/* Check if it is in meter suffix table. */
6529 		mtr_sfx = attr->group ==
6530 			  ((attr->transfer && priv->fdb_def_rule) ?
6531 			  (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6532 			  MLX5_FLOW_TABLE_LEVEL_METER);
6533 		/*
6534 		 * Q/RSS action on NIC Rx should be split in order to pass by
6535 		 * the mreg copy table (RX_CP_TBL) and then it jumps to the
6536 		 * action table (RX_ACT_TBL) which has the split Q/RSS action.
6537 		 */
6538 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6539 			   sizeof(struct rte_flow_action_set_tag) +
6540 			   sizeof(struct rte_flow_action_jump);
6541 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6542 					  SOCKET_ID_ANY);
6543 		if (!ext_actions)
6544 			return rte_flow_error_set(error, ENOMEM,
6545 						  RTE_FLOW_ERROR_TYPE_ACTION,
6546 						  NULL, "no memory to split "
6547 						  "metadata flow");
6548 		/*
6549 		 * Create the new actions list with removed Q/RSS action
6550 		 * and appended set tag and jump to register copy table
6551 		 * (RX_CP_TBL). We should preallocate unique tag ID here
6552 		 * in advance, because it is needed for set tag action.
6553 		 */
6554 		qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
6555 						    qrss, actions_n,
6556 						    mtr_sfx, error);
6557 		if (!mtr_sfx && !qrss_id) {
6558 			ret = -rte_errno;
6559 			goto exit;
6560 		}
6561 	} else if (attr->egress) {
6562 		/*
6563 		 * All the actions on NIC Tx should have a metadata register
6564 		 * copy action to copy reg_a from WQE to reg_c[meta]
6565 		 */
6566 		act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6567 			   sizeof(struct mlx5_flow_action_copy_mreg);
6568 		ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6569 					  SOCKET_ID_ANY);
6570 		if (!ext_actions)
6571 			return rte_flow_error_set(error, ENOMEM,
6572 						  RTE_FLOW_ERROR_TYPE_ACTION,
6573 						  NULL, "no memory to split "
6574 						  "metadata flow");
6575 		/* Create the action list appended with copy register. */
6576 		ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
6577 					     actions_n, error, encap_idx);
6578 		if (ret < 0)
6579 			goto exit;
6580 	}
6581 	/* Add the unmodified original or prefix subflow. */
6582 	ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6583 				      items, ext_actions ? ext_actions :
6584 				      actions, flow_split_info, error);
6585 	if (ret < 0)
6586 		goto exit;
6587 	MLX5_ASSERT(dev_flow);
6588 	if (qrss) {
6589 		const struct rte_flow_attr q_attr = {
6590 			.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6591 			.ingress = 1,
6592 		};
6593 		/* Internal PMD action to set register. */
6594 		struct mlx5_rte_flow_item_tag q_tag_spec = {
6595 			.data = qrss_id,
6596 			.id = REG_NON,
6597 		};
6598 		struct rte_flow_item q_items[] = {
6599 			{
6600 				.type = (enum rte_flow_item_type)
6601 					MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6602 				.spec = &q_tag_spec,
6603 				.last = NULL,
6604 				.mask = NULL,
6605 			},
6606 			{
6607 				.type = RTE_FLOW_ITEM_TYPE_END,
6608 			},
6609 		};
6610 		struct rte_flow_action q_actions[] = {
6611 			{
6612 				.type = qrss->type,
6613 				.conf = qrss->conf,
6614 			},
6615 			{
6616 				.type = RTE_FLOW_ACTION_TYPE_END,
6617 			},
6618 		};
6619 		uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
6620 
6621 		/*
6622 		 * Configure the tag item only if there is no meter subflow.
6623 		 * Since tag is already marked in the meter suffix subflow
6624 		 * we can just use the meter suffix items as is.
6625 		 */
6626 		if (qrss_id) {
6627 			/* Not meter subflow. */
6628 			MLX5_ASSERT(!mtr_sfx);
6629 			/*
6630 			 * Put unique id in prefix flow due to it is destroyed
6631 			 * after suffix flow and id will be freed after there
6632 			 * is no actual flows with this id and identifier
6633 			 * reallocation becomes possible (for example, for
6634 			 * other flows in other threads).
6635 			 */
6636 			dev_flow->handle->split_flow_id = qrss_id;
6637 			ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
6638 						   error);
6639 			if (ret < 0)
6640 				goto exit;
6641 			q_tag_spec.id = ret;
6642 		}
6643 		dev_flow = NULL;
6644 		/* Add suffix subflow to execute Q/RSS. */
6645 		flow_split_info->prefix_layers = layers;
6646 		flow_split_info->prefix_mark = 0;
6647 		flow_split_info->table_id = 0;
6648 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6649 					      &q_attr, mtr_sfx ? items :
6650 					      q_items, q_actions,
6651 					      flow_split_info, error);
6652 		if (ret < 0)
6653 			goto exit;
6654 		/* qrss ID should be freed if failed. */
6655 		qrss_id = 0;
6656 		MLX5_ASSERT(dev_flow);
6657 	}
6658 
6659 exit:
6660 	/*
6661 	 * We do not destroy the partially created sub_flows in case of error.
6662 	 * These ones are included into parent flow list and will be destroyed
6663 	 * by flow_drv_destroy.
6664 	 */
6665 	mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
6666 			qrss_id);
6667 	mlx5_free(ext_actions);
6668 	return ret;
6669 }
6670 
6671 /**
6672  * Create meter internal drop flow with the original pattern.
6673  *
6674  * @param dev
6675  *   Pointer to Ethernet device.
6676  * @param[in] flow
6677  *   Parent flow structure pointer.
6678  * @param[in] attr
6679  *   Flow rule attributes.
6680  * @param[in] items
6681  *   Pattern specification (list terminated by the END pattern item).
6682  * @param[in] flow_split_info
6683  *   Pointer to flow split info structure.
6684  * @param[in] fm
6685  *   Pointer to flow meter structure.
6686  * @param[out] error
6687  *   Perform verbose error reporting if not NULL.
6688  * @return
6689  *   0 on success, negative value otherwise
6690  */
6691 static uint32_t
6692 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
6693 			struct rte_flow *flow,
6694 			const struct rte_flow_attr *attr,
6695 			const struct rte_flow_item items[],
6696 			struct mlx5_flow_split_info *flow_split_info,
6697 			struct mlx5_flow_meter_info *fm,
6698 			struct rte_flow_error *error)
6699 {
6700 	struct mlx5_flow *dev_flow = NULL;
6701 	struct rte_flow_attr drop_attr = *attr;
6702 	struct rte_flow_action drop_actions[3];
6703 	struct mlx5_flow_split_info drop_split_info = *flow_split_info;
6704 
6705 	MLX5_ASSERT(fm->drop_cnt);
6706 	drop_actions[0].type =
6707 		(enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
6708 	drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
6709 	drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
6710 	drop_actions[1].conf = NULL;
6711 	drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
6712 	drop_actions[2].conf = NULL;
6713 	drop_split_info.external = false;
6714 	drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6715 	drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
6716 	drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
6717 	return flow_create_split_inner(dev, flow, &dev_flow,
6718 				&drop_attr, items, drop_actions,
6719 				&drop_split_info, error);
6720 }
6721 
6722 /**
6723  * The splitting for meter feature.
6724  *
6725  * - The meter flow will be split to two flows as prefix and
6726  *   suffix flow. The packets make sense only it pass the prefix
6727  *   meter action.
6728  *
6729  * - Reg_C_5 is used for the packet to match betweend prefix and
6730  *   suffix flow.
6731  *
6732  * @param dev
6733  *   Pointer to Ethernet device.
6734  * @param[in] flow
6735  *   Parent flow structure pointer.
6736  * @param[in] attr
6737  *   Flow rule attributes.
6738  * @param[in] items
6739  *   Pattern specification (list terminated by the END pattern item).
6740  * @param[in] actions
6741  *   Associated actions (list terminated by the END action).
6742  * @param[in] flow_split_info
6743  *   Pointer to flow split info structure.
6744  * @param[out] error
6745  *   Perform verbose error reporting if not NULL.
6746  * @return
6747  *   0 on success, negative value otherwise
6748  */
6749 static int
6750 flow_create_split_meter(struct rte_eth_dev *dev,
6751 			struct rte_flow *flow,
6752 			const struct rte_flow_attr *attr,
6753 			const struct rte_flow_item items[],
6754 			const struct rte_flow_action actions[],
6755 			struct mlx5_flow_split_info *flow_split_info,
6756 			struct rte_flow_error *error)
6757 {
6758 	struct mlx5_priv *priv = dev->data->dev_private;
6759 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6760 	struct rte_flow_action *sfx_actions = NULL;
6761 	struct rte_flow_action *pre_actions = NULL;
6762 	struct rte_flow_item *sfx_items = NULL;
6763 	struct mlx5_flow *dev_flow = NULL;
6764 	struct rte_flow_attr sfx_attr = *attr;
6765 	struct mlx5_flow_meter_info *fm = NULL;
6766 	uint8_t skip_scale_restore;
6767 	bool has_mtr = false;
6768 	bool has_modify = false;
6769 	bool set_mtr_reg = true;
6770 	bool is_mtr_hierarchy = false;
6771 	uint32_t meter_id = 0;
6772 	uint32_t mtr_idx = 0;
6773 	uint32_t mtr_flow_id = 0;
6774 	size_t act_size;
6775 	size_t item_size;
6776 	int actions_n = 0;
6777 	int ret = 0;
6778 
6779 	if (priv->mtr_en)
6780 		actions_n = flow_check_meter_action(dev, actions, &has_mtr,
6781 						    &has_modify, &meter_id);
6782 	if (has_mtr) {
6783 		if (flow->meter) {
6784 			fm = flow_dv_meter_find_by_idx(priv, flow->meter);
6785 			if (!fm)
6786 				return rte_flow_error_set(error, EINVAL,
6787 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6788 						NULL, "Meter not found.");
6789 		} else {
6790 			fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
6791 			if (!fm)
6792 				return rte_flow_error_set(error, EINVAL,
6793 						RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6794 						NULL, "Meter not found.");
6795 			ret = mlx5_flow_meter_attach(priv, fm,
6796 						     &sfx_attr, error);
6797 			if (ret)
6798 				return -rte_errno;
6799 			flow->meter = mtr_idx;
6800 		}
6801 		MLX5_ASSERT(wks);
6802 		wks->fm = fm;
6803 		if (!fm->def_policy) {
6804 			wks->policy = mlx5_flow_meter_policy_find(dev,
6805 								  fm->policy_id,
6806 								  NULL);
6807 			MLX5_ASSERT(wks->policy);
6808 			if (wks->policy->mark)
6809 				wks->mark = 1;
6810 			if (wks->policy->is_hierarchy) {
6811 				wks->final_policy =
6812 				mlx5_flow_meter_hierarchy_get_final_policy(dev,
6813 								wks->policy);
6814 				if (!wks->final_policy)
6815 					return rte_flow_error_set(error,
6816 					EINVAL,
6817 					RTE_FLOW_ERROR_TYPE_ACTION, NULL,
6818 				"Failed to find terminal policy of hierarchy.");
6819 				is_mtr_hierarchy = true;
6820 			}
6821 		}
6822 		/*
6823 		 * If it isn't default-policy Meter, and
6824 		 * 1. Not meter hierarchy and there's no action in flow to change
6825 		 *    packet (modify/encap/decap etc.), OR
6826 		 * 2. No drop count needed for this meter.
6827 		 * Then no need to use regC to save meter id anymore.
6828 		 */
6829 		if (!fm->def_policy && ((!has_modify && !is_mtr_hierarchy) || !fm->drop_cnt))
6830 			set_mtr_reg = false;
6831 		/* Prefix actions: meter, decap, encap, tag, jump, end, cnt. */
6832 #define METER_PREFIX_ACTION 7
6833 		act_size = (sizeof(struct rte_flow_action) *
6834 			    (actions_n + METER_PREFIX_ACTION)) +
6835 			   sizeof(struct mlx5_rte_flow_action_set_tag);
6836 		/* Suffix items: tag, vlan, port id, end. */
6837 #define METER_SUFFIX_ITEM 4
6838 		item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
6839 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6840 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
6841 					  0, SOCKET_ID_ANY);
6842 		if (!sfx_actions)
6843 			return rte_flow_error_set(error, ENOMEM,
6844 						  RTE_FLOW_ERROR_TYPE_ACTION,
6845 						  NULL, "no memory to split "
6846 						  "meter flow");
6847 		sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6848 			     act_size);
6849 		/* There's no suffix flow for meter of non-default policy. */
6850 		if (!fm->def_policy)
6851 			pre_actions = sfx_actions + 1;
6852 		else
6853 			pre_actions = sfx_actions + actions_n;
6854 		ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6855 					    items, sfx_items, actions,
6856 					    sfx_actions, pre_actions,
6857 					    (set_mtr_reg ? &mtr_flow_id : NULL),
6858 					    error);
6859 		if (ret) {
6860 			ret = -rte_errno;
6861 			goto exit;
6862 		}
6863 		/* Add the prefix subflow. */
6864 		skip_scale_restore = flow_split_info->skip_scale;
6865 		flow_split_info->skip_scale |=
6866 			1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6867 		ret = flow_create_split_inner(dev, flow, &dev_flow,
6868 					      attr, items, pre_actions,
6869 					      flow_split_info, error);
6870 		flow_split_info->skip_scale = skip_scale_restore;
6871 		if (ret) {
6872 			if (mtr_flow_id)
6873 				mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6874 			ret = -rte_errno;
6875 			goto exit;
6876 		}
6877 		if (mtr_flow_id) {
6878 			dev_flow->handle->split_flow_id = mtr_flow_id;
6879 			dev_flow->handle->is_meter_flow_id = 1;
6880 		}
6881 		if (!fm->def_policy) {
6882 			if (!set_mtr_reg && fm->drop_cnt)
6883 				ret =
6884 			flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6885 							&sfx_attr, items,
6886 							flow_split_info,
6887 							fm, error);
6888 			goto exit;
6889 		}
6890 		/* Setting the sfx group atrr. */
6891 		sfx_attr.group = sfx_attr.transfer ?
6892 				(MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6893 				 MLX5_FLOW_TABLE_LEVEL_METER;
6894 		flow_split_info->prefix_layers =
6895 				flow_get_prefix_layer_flags(dev_flow);
6896 		flow_split_info->prefix_mark |= wks->mark;
6897 		flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6898 	}
6899 	/* Add the prefix subflow. */
6900 	ret = flow_create_split_metadata(dev, flow,
6901 					 &sfx_attr, sfx_items ?
6902 					 sfx_items : items,
6903 					 sfx_actions ? sfx_actions : actions,
6904 					 flow_split_info, error);
6905 exit:
6906 	if (sfx_actions)
6907 		mlx5_free(sfx_actions);
6908 	return ret;
6909 }
6910 
6911 /**
6912  * The splitting for sample feature.
6913  *
6914  * Once Sample action is detected in the action list, the flow actions should
6915  * be split into prefix sub flow and suffix sub flow.
6916  *
6917  * The original items remain in the prefix sub flow, all actions preceding the
6918  * sample action and the sample action itself will be copied to the prefix
6919  * sub flow, the actions following the sample action will be copied to the
6920  * suffix sub flow, Queue action always be located in the suffix sub flow.
6921  *
6922  * In order to make the packet from prefix sub flow matches with suffix sub
6923  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6924  * flow uses tag item with the unique flow id.
6925  *
6926  * @param dev
6927  *   Pointer to Ethernet device.
6928  * @param[in] flow
6929  *   Parent flow structure pointer.
6930  * @param[in] attr
6931  *   Flow rule attributes.
6932  * @param[in] items
6933  *   Pattern specification (list terminated by the END pattern item).
6934  * @param[in] actions
6935  *   Associated actions (list terminated by the END action).
6936  * @param[in] flow_split_info
6937  *   Pointer to flow split info structure.
6938  * @param[out] error
6939  *   Perform verbose error reporting if not NULL.
6940  * @return
6941  *   0 on success, negative value otherwise
6942  */
6943 static int
6944 flow_create_split_sample(struct rte_eth_dev *dev,
6945 			 struct rte_flow *flow,
6946 			 const struct rte_flow_attr *attr,
6947 			 const struct rte_flow_item items[],
6948 			 const struct rte_flow_action actions[],
6949 			 struct mlx5_flow_split_info *flow_split_info,
6950 			 struct rte_flow_error *error)
6951 {
6952 	struct mlx5_priv *priv = dev->data->dev_private;
6953 	struct rte_flow_action *sfx_actions = NULL;
6954 	struct rte_flow_action *pre_actions = NULL;
6955 	struct rte_flow_item *sfx_items = NULL;
6956 	struct mlx5_flow *dev_flow = NULL;
6957 	struct rte_flow_attr sfx_attr = *attr;
6958 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6959 	struct mlx5_flow_dv_sample_resource *sample_res;
6960 	struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6961 	struct mlx5_flow_tbl_resource *sfx_tbl;
6962 	struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6963 #endif
6964 	size_t act_size;
6965 	size_t item_size;
6966 	uint32_t fdb_tx = 0;
6967 	int32_t tag_id = 0;
6968 	int actions_n = 0;
6969 	int sample_action_pos;
6970 	int qrss_action_pos;
6971 	int add_tag = 0;
6972 	int modify_after_mirror = 0;
6973 	uint16_t jump_table = 0;
6974 	const uint32_t next_ft_step = 1;
6975 	int ret = 0;
6976 	struct mlx5_priv *item_port_priv = NULL;
6977 	const struct rte_flow_item *item;
6978 
6979 	if (priv->sampler_en)
6980 		actions_n = flow_check_match_action(actions, attr,
6981 					RTE_FLOW_ACTION_TYPE_SAMPLE,
6982 					&sample_action_pos, &qrss_action_pos,
6983 					&modify_after_mirror);
6984 	if (actions_n) {
6985 		/* The prefix actions must includes sample, tag, end. */
6986 		act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6987 			   + sizeof(struct mlx5_rte_flow_action_set_tag);
6988 		item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6989 			    sizeof(struct mlx5_rte_flow_item_tag) * 2;
6990 		sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6991 					  item_size), 0, SOCKET_ID_ANY);
6992 		if (!sfx_actions)
6993 			return rte_flow_error_set(error, ENOMEM,
6994 						  RTE_FLOW_ERROR_TYPE_ACTION,
6995 						  NULL, "no memory to split "
6996 						  "sample flow");
6997 		for (item = items; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
6998 			if (item->type == RTE_FLOW_ITEM_TYPE_PORT_ID) {
6999 				const struct rte_flow_item_port_id *spec;
7000 
7001 				spec = (const struct rte_flow_item_port_id *)item->spec;
7002 				if (spec)
7003 					item_port_priv =
7004 						mlx5_port_to_eswitch_info(spec->id, true);
7005 				break;
7006 			} else if (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
7007 				const struct rte_flow_item_ethdev *spec;
7008 
7009 				spec = (const struct rte_flow_item_ethdev *)item->spec;
7010 				if (spec)
7011 					item_port_priv =
7012 						mlx5_port_to_eswitch_info(spec->port_id, true);
7013 				break;
7014 			} else if (item->type == RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR) {
7015 				const struct rte_flow_item_ethdev *spec;
7016 
7017 				spec = (const struct rte_flow_item_ethdev *)item->spec;
7018 				if (spec)
7019 					item_port_priv =
7020 						mlx5_port_to_eswitch_info(spec->port_id, true);
7021 				break;
7022 			}
7023 		}
7024 		/* The representor_id is UINT16_MAX for uplink. */
7025 		fdb_tx = (attr->transfer &&
7026 			  flow_source_vport_representor(priv, item_port_priv));
7027 		/*
7028 		 * When reg_c_preserve is set, metadata registers Cx preserve
7029 		 * their value even through packet duplication.
7030 		 */
7031 		add_tag = (!fdb_tx ||
7032 			   priv->sh->cdev->config.hca_attr.reg_c_preserve);
7033 		if (add_tag)
7034 			sfx_items = (struct rte_flow_item *)((char *)sfx_actions
7035 					+ act_size);
7036 		if (modify_after_mirror)
7037 			jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
7038 				     next_ft_step;
7039 		pre_actions = sfx_actions + actions_n;
7040 		tag_id = flow_sample_split_prep(dev, add_tag, items, sfx_items,
7041 						actions, sfx_actions,
7042 						pre_actions, actions_n,
7043 						sample_action_pos,
7044 						qrss_action_pos, jump_table,
7045 						error);
7046 		if (tag_id < 0 || (add_tag && !tag_id)) {
7047 			ret = -rte_errno;
7048 			goto exit;
7049 		}
7050 		if (modify_after_mirror)
7051 			flow_split_info->skip_scale =
7052 					1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
7053 		/* Add the prefix subflow. */
7054 		ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
7055 					      items, pre_actions,
7056 					      flow_split_info, error);
7057 		if (ret) {
7058 			ret = -rte_errno;
7059 			goto exit;
7060 		}
7061 		dev_flow->handle->split_flow_id = tag_id;
7062 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7063 		if (!modify_after_mirror) {
7064 			/* Set the sfx group attr. */
7065 			sample_res = (struct mlx5_flow_dv_sample_resource *)
7066 						dev_flow->dv.sample_res;
7067 			sfx_tbl = (struct mlx5_flow_tbl_resource *)
7068 						sample_res->normal_path_tbl;
7069 			sfx_tbl_data = container_of(sfx_tbl,
7070 						struct mlx5_flow_tbl_data_entry,
7071 						tbl);
7072 			sfx_attr.group = sfx_attr.transfer ?
7073 			(sfx_tbl_data->level - 1) : sfx_tbl_data->level;
7074 		} else {
7075 			MLX5_ASSERT(attr->transfer);
7076 			sfx_attr.group = jump_table;
7077 		}
7078 		flow_split_info->prefix_layers =
7079 				flow_get_prefix_layer_flags(dev_flow);
7080 		MLX5_ASSERT(wks);
7081 		flow_split_info->prefix_mark |= wks->mark;
7082 		/* Suffix group level already be scaled with factor, set
7083 		 * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
7084 		 * again in translation.
7085 		 */
7086 		flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
7087 #endif
7088 	}
7089 	/* Add the suffix subflow. */
7090 	ret = flow_create_split_meter(dev, flow, &sfx_attr,
7091 				      sfx_items ? sfx_items : items,
7092 				      sfx_actions ? sfx_actions : actions,
7093 				      flow_split_info, error);
7094 exit:
7095 	if (sfx_actions)
7096 		mlx5_free(sfx_actions);
7097 	return ret;
7098 }
7099 
7100 /**
7101  * Split the flow to subflow set. The splitters might be linked
7102  * in the chain, like this:
7103  * flow_create_split_outer() calls:
7104  *   flow_create_split_meter() calls:
7105  *     flow_create_split_metadata(meter_subflow_0) calls:
7106  *       flow_create_split_inner(metadata_subflow_0)
7107  *       flow_create_split_inner(metadata_subflow_1)
7108  *       flow_create_split_inner(metadata_subflow_2)
7109  *     flow_create_split_metadata(meter_subflow_1) calls:
7110  *       flow_create_split_inner(metadata_subflow_0)
7111  *       flow_create_split_inner(metadata_subflow_1)
7112  *       flow_create_split_inner(metadata_subflow_2)
7113  *
7114  * This provide flexible way to add new levels of flow splitting.
7115  * The all of successfully created subflows are included to the
7116  * parent flow dev_flow list.
7117  *
7118  * @param dev
7119  *   Pointer to Ethernet device.
7120  * @param[in] flow
7121  *   Parent flow structure pointer.
7122  * @param[in] attr
7123  *   Flow rule attributes.
7124  * @param[in] items
7125  *   Pattern specification (list terminated by the END pattern item).
7126  * @param[in] actions
7127  *   Associated actions (list terminated by the END action).
7128  * @param[in] flow_split_info
7129  *   Pointer to flow split info structure.
7130  * @param[out] error
7131  *   Perform verbose error reporting if not NULL.
7132  * @return
7133  *   0 on success, negative value otherwise
7134  */
7135 static int
7136 flow_create_split_outer(struct rte_eth_dev *dev,
7137 			struct rte_flow *flow,
7138 			const struct rte_flow_attr *attr,
7139 			const struct rte_flow_item items[],
7140 			const struct rte_flow_action actions[],
7141 			struct mlx5_flow_split_info *flow_split_info,
7142 			struct rte_flow_error *error)
7143 {
7144 	int ret;
7145 
7146 	ret = flow_create_split_sample(dev, flow, attr, items,
7147 				       actions, flow_split_info, error);
7148 	MLX5_ASSERT(ret <= 0);
7149 	return ret;
7150 }
7151 
7152 static inline struct mlx5_flow_tunnel *
7153 flow_tunnel_from_rule(const struct mlx5_flow *flow)
7154 {
7155 	struct mlx5_flow_tunnel *tunnel;
7156 
7157 #pragma GCC diagnostic push
7158 #pragma GCC diagnostic ignored "-Wcast-qual"
7159 	tunnel = (typeof(tunnel))flow->tunnel;
7160 #pragma GCC diagnostic pop
7161 
7162 	return tunnel;
7163 }
7164 
7165 /**
7166  * Create a flow and add it to @p list.
7167  *
7168  * @param dev
7169  *   Pointer to Ethernet device.
7170  * @param list
7171  *   Pointer to a TAILQ flow list. If this parameter NULL,
7172  *   no list insertion occurred, flow is just created,
7173  *   this is caller's responsibility to track the
7174  *   created flow.
7175  * @param[in] attr
7176  *   Flow rule attributes.
7177  * @param[in] items
7178  *   Pattern specification (list terminated by the END pattern item).
7179  * @param[in] actions
7180  *   Associated actions (list terminated by the END action).
7181  * @param[in] external
7182  *   This flow rule is created by request external to PMD.
7183  * @param[out] error
7184  *   Perform verbose error reporting if not NULL.
7185  *
7186  * @return
7187  *   A flow index on success, 0 otherwise and rte_errno is set.
7188  */
7189 static uint32_t
7190 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7191 		 const struct rte_flow_attr *attr,
7192 		 const struct rte_flow_item items[],
7193 		 const struct rte_flow_action original_actions[],
7194 		 bool external, struct rte_flow_error *error)
7195 {
7196 	struct mlx5_priv *priv = dev->data->dev_private;
7197 	struct rte_flow *flow = NULL;
7198 	struct mlx5_flow *dev_flow;
7199 	const struct rte_flow_action_rss *rss = NULL;
7200 	struct mlx5_translated_action_handle
7201 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7202 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7203 	union {
7204 		struct mlx5_flow_expand_rss buf;
7205 		uint8_t buffer[8192];
7206 	} expand_buffer;
7207 	union {
7208 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
7209 		uint8_t buffer[2048];
7210 	} actions_rx;
7211 	union {
7212 		struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
7213 		uint8_t buffer[2048];
7214 	} actions_hairpin_tx;
7215 	union {
7216 		struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
7217 		uint8_t buffer[2048];
7218 	} items_tx;
7219 	struct mlx5_rte_flow_item_sq sq_specs[RTE_MAX_QUEUES_PER_PORT];
7220 	struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
7221 	struct mlx5_flow_rss_desc *rss_desc;
7222 	const struct rte_flow_action *p_actions_rx;
7223 	uint32_t i;
7224 	uint32_t idx = 0;
7225 	int hairpin_flow;
7226 	struct rte_flow_attr attr_tx = { .priority = 0 };
7227 	const struct rte_flow_action *actions;
7228 	struct rte_flow_action *translated_actions = NULL;
7229 	struct mlx5_flow_tunnel *tunnel;
7230 	struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
7231 	struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
7232 	struct mlx5_flow_split_info flow_split_info = {
7233 		.external = !!external,
7234 		.skip_scale = 0,
7235 		.flow_idx = 0,
7236 		.prefix_mark = 0,
7237 		.prefix_layers = 0,
7238 		.table_id = 0
7239 	};
7240 	int ret;
7241 
7242 	MLX5_ASSERT(wks);
7243 	rss_desc = &wks->rss_desc;
7244 	ret = flow_action_handles_translate(dev, original_actions,
7245 					    indir_actions,
7246 					    &indir_actions_n,
7247 					    &translated_actions, error);
7248 	if (ret < 0) {
7249 		MLX5_ASSERT(translated_actions == NULL);
7250 		return 0;
7251 	}
7252 	actions = translated_actions ? translated_actions : original_actions;
7253 	p_actions_rx = actions;
7254 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7255 	ret = flow_drv_validate(dev, attr, items, p_actions_rx,
7256 				external, hairpin_flow, error);
7257 	if (ret < 0)
7258 		goto error_before_hairpin_split;
7259 	flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
7260 	if (!flow) {
7261 		rte_errno = ENOMEM;
7262 		goto error_before_hairpin_split;
7263 	}
7264 	if (hairpin_flow > 0) {
7265 		if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
7266 			rte_errno = EINVAL;
7267 			goto error_before_hairpin_split;
7268 		}
7269 		flow_hairpin_split(dev, actions, actions_rx.actions,
7270 				   actions_hairpin_tx.actions, items_tx.items,
7271 				   idx);
7272 		p_actions_rx = actions_rx.actions;
7273 	}
7274 	flow_split_info.flow_idx = idx;
7275 	flow->drv_type = flow_get_drv_type(dev, attr);
7276 	MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
7277 		    flow->drv_type < MLX5_FLOW_TYPE_MAX);
7278 	memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
7279 	/* RSS Action only works on NIC RX domain */
7280 	if (attr->ingress)
7281 		rss = flow_get_rss_action(dev, p_actions_rx);
7282 	if (rss) {
7283 		MLX5_ASSERT(rss->queue_num <= RTE_ETH_RSS_RETA_SIZE_512);
7284 		rss_desc->symmetric_hash_function = MLX5_RSS_IS_SYMM(rss->func);
7285 		/*
7286 		 * The following information is required by
7287 		 * mlx5_flow_hashfields_adjust() in advance.
7288 		 */
7289 		rss_desc->level = rss->level;
7290 		/* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
7291 		rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
7292 	}
7293 	flow->dev_handles = 0;
7294 	if (rss && rss->types) {
7295 		unsigned int graph_root;
7296 
7297 		graph_root = find_graph_root(rss->level);
7298 		ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
7299 					   items, rss->types,
7300 					   mlx5_support_expansion, graph_root);
7301 		MLX5_ASSERT(ret > 0 &&
7302 		       (unsigned int)ret < sizeof(expand_buffer.buffer));
7303 		if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
7304 			for (i = 0; i < buf->entries; ++i)
7305 				mlx5_dbg__print_pattern(buf->entry[i].pattern);
7306 		}
7307 	} else {
7308 		ret = mlx5_flow_expand_sqn((struct mlx5_flow_expand_sqn *)buf,
7309 					   sizeof(expand_buffer.buffer),
7310 					   items, sq_specs);
7311 		if (ret) {
7312 			rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
7313 					   NULL, "not enough memory for rte_flow");
7314 			goto error;
7315 		}
7316 		if (buf->entries == 0) {
7317 			buf->entries = 1;
7318 			buf->entry[0].pattern = (void *)(uintptr_t)items;
7319 		}
7320 	}
7321 	rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
7322 						      indir_actions_n);
7323 	for (i = 0; i < buf->entries; ++i) {
7324 		/* Initialize flow split data. */
7325 		flow_split_info.prefix_layers = 0;
7326 		flow_split_info.prefix_mark = 0;
7327 		flow_split_info.skip_scale = 0;
7328 		/*
7329 		 * The splitter may create multiple dev_flows,
7330 		 * depending on configuration. In the simplest
7331 		 * case it just creates unmodified original flow.
7332 		 */
7333 		ret = flow_create_split_outer(dev, flow, attr,
7334 					      buf->entry[i].pattern,
7335 					      p_actions_rx, &flow_split_info,
7336 					      error);
7337 		if (ret < 0)
7338 			goto error;
7339 		if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
7340 			ret = flow_tunnel_add_default_miss(dev, flow, attr,
7341 							   p_actions_rx,
7342 							   idx,
7343 							   wks->flows[0].tunnel,
7344 							   &default_miss_ctx,
7345 							   error);
7346 			if (ret < 0) {
7347 				mlx5_free(default_miss_ctx.queue);
7348 				goto error;
7349 			}
7350 		}
7351 	}
7352 	/* Create the tx flow. */
7353 	if (hairpin_flow) {
7354 		attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
7355 		attr_tx.ingress = 0;
7356 		attr_tx.egress = 1;
7357 		dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
7358 					 actions_hairpin_tx.actions,
7359 					 idx, error);
7360 		if (!dev_flow)
7361 			goto error;
7362 		dev_flow->flow = flow;
7363 		dev_flow->external = 0;
7364 		SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
7365 			      dev_flow->handle, next);
7366 		ret = flow_drv_translate(dev, dev_flow, &attr_tx,
7367 					 items_tx.items,
7368 					 actions_hairpin_tx.actions, error);
7369 		if (ret < 0)
7370 			goto error;
7371 	}
7372 	/*
7373 	 * Update the metadata register copy table. If extensive
7374 	 * metadata feature is enabled and registers are supported
7375 	 * we might create the extra rte_flow for each unique
7376 	 * MARK/FLAG action ID.
7377 	 *
7378 	 * The table is updated for ingress and transfer flows only, because
7379 	 * the egress Flows belong to the different device and
7380 	 * copy table should be updated in peer NIC Rx domain.
7381 	 */
7382 	if ((attr->ingress || attr->transfer) &&
7383 	    (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
7384 		ret = flow_mreg_update_copy_table(dev, flow, actions, error);
7385 		if (ret)
7386 			goto error;
7387 	}
7388 	/*
7389 	 * If the flow is external (from application) OR device is started,
7390 	 * OR mreg discover, then apply immediately.
7391 	 */
7392 	if (external || dev->data->dev_started ||
7393 	    (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
7394 	     attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
7395 		ret = flow_drv_apply(dev, flow, error);
7396 		if (ret < 0)
7397 			goto error;
7398 	}
7399 	flow->type = type;
7400 	flow_rxq_flags_set(dev, flow);
7401 	rte_free(translated_actions);
7402 	tunnel = flow_tunnel_from_rule(wks->flows);
7403 	if (tunnel) {
7404 		flow->tunnel = 1;
7405 		flow->tunnel_id = tunnel->tunnel_id;
7406 		__atomic_fetch_add(&tunnel->refctn, 1, __ATOMIC_RELAXED);
7407 		mlx5_free(default_miss_ctx.queue);
7408 	}
7409 	mlx5_flow_pop_thread_workspace();
7410 	return idx;
7411 error:
7412 	MLX5_ASSERT(flow);
7413 	ret = rte_errno; /* Save rte_errno before cleanup. */
7414 	flow_mreg_del_copy_action(dev, flow);
7415 	flow_drv_destroy(dev, flow);
7416 	if (rss_desc->shared_rss)
7417 		__atomic_fetch_sub(&((struct mlx5_shared_action_rss *)
7418 			mlx5_ipool_get
7419 			(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
7420 			rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
7421 	mlx5_ipool_free(priv->flows[type], idx);
7422 	rte_errno = ret; /* Restore rte_errno. */
7423 	ret = rte_errno;
7424 	rte_errno = ret;
7425 error_before_hairpin_split:
7426 	mlx5_flow_pop_thread_workspace();
7427 	rte_free(translated_actions);
7428 	return 0;
7429 }
7430 
7431 /**
7432  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
7433  * incoming packets to table 1.
7434  *
7435  * Other flow rules, requested for group n, will be created in
7436  * e-switch table n+1.
7437  * Jump action to e-switch group n will be created to group n+1.
7438  *
7439  * Used when working in switchdev mode, to utilise advantages of table 1
7440  * and above.
7441  *
7442  * @param dev
7443  *   Pointer to Ethernet device.
7444  *
7445  * @return
7446  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
7447  */
7448 struct rte_flow *
7449 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
7450 {
7451 	const struct rte_flow_attr attr = {
7452 		.group = 0,
7453 		.priority = 0,
7454 		.ingress = 0,
7455 		.egress = 0,
7456 		.transfer = 1,
7457 	};
7458 	const struct rte_flow_item pattern = {
7459 		.type = RTE_FLOW_ITEM_TYPE_END,
7460 	};
7461 	struct rte_flow_action_jump jump = {
7462 		.group = 1,
7463 	};
7464 	const struct rte_flow_action actions[] = {
7465 		{
7466 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7467 			.conf = &jump,
7468 		},
7469 		{
7470 			.type = RTE_FLOW_ACTION_TYPE_END,
7471 		},
7472 	};
7473 	struct rte_flow_error error;
7474 
7475 	return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7476 						   &attr, &pattern,
7477 						   actions, false, &error);
7478 }
7479 
7480 /**
7481  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
7482  * and sq number, directs all packets to peer vport.
7483  *
7484  * @param dev
7485  *   Pointer to Ethernet device.
7486  * @param sq_num
7487  *   SQ number.
7488  *
7489  * @return
7490  *   Flow ID on success, 0 otherwise and rte_errno is set.
7491  */
7492 uint32_t
7493 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t sq_num)
7494 {
7495 	struct rte_flow_attr attr = {
7496 		.group = 0,
7497 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7498 		.ingress = 0,
7499 		.egress = 0,
7500 		.transfer = 1,
7501 	};
7502 	struct rte_flow_item_port_id port_spec = {
7503 		.id = MLX5_PORT_ESW_MGR,
7504 	};
7505 	struct mlx5_rte_flow_item_sq sq_spec = {
7506 		.queue = sq_num,
7507 	};
7508 	struct rte_flow_item pattern[] = {
7509 		{
7510 			.type = RTE_FLOW_ITEM_TYPE_PORT_ID,
7511 			.spec = &port_spec,
7512 		},
7513 		{
7514 			.type = (enum rte_flow_item_type)
7515 				MLX5_RTE_FLOW_ITEM_TYPE_SQ,
7516 			.spec = &sq_spec,
7517 		},
7518 		{
7519 			.type = RTE_FLOW_ITEM_TYPE_END,
7520 		},
7521 	};
7522 	struct rte_flow_action_jump jump = {
7523 		.group = 1,
7524 	};
7525 	struct rte_flow_action_port_id port = {
7526 		.id = dev->data->port_id,
7527 	};
7528 	struct rte_flow_action actions[] = {
7529 		{
7530 			.type = RTE_FLOW_ACTION_TYPE_JUMP,
7531 			.conf = &jump,
7532 		},
7533 		{
7534 			.type = RTE_FLOW_ACTION_TYPE_END,
7535 		},
7536 	};
7537 	struct rte_flow_error error;
7538 
7539 	/*
7540 	 * Creates group 0, highest priority jump flow.
7541 	 * Matches txq to bypass kernel packets.
7542 	 */
7543 	if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
7544 			     false, &error) == 0)
7545 		return 0;
7546 	/* Create group 1, lowest priority redirect flow for txq. */
7547 	attr.group = 1;
7548 	actions[0].conf = &port;
7549 	actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
7550 	return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
7551 				actions, false, &error);
7552 }
7553 
7554 /**
7555  * Validate a flow supported by the NIC.
7556  *
7557  * @see rte_flow_validate()
7558  * @see rte_flow_ops
7559  */
7560 int
7561 mlx5_flow_validate(struct rte_eth_dev *dev,
7562 		   const struct rte_flow_attr *attr,
7563 		   const struct rte_flow_item items[],
7564 		   const struct rte_flow_action original_actions[],
7565 		   struct rte_flow_error *error)
7566 {
7567 	int hairpin_flow;
7568 	struct mlx5_translated_action_handle
7569 		indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7570 	int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7571 	const struct rte_flow_action *actions;
7572 	struct rte_flow_action *translated_actions = NULL;
7573 	int ret = flow_action_handles_translate(dev, original_actions,
7574 						indir_actions,
7575 						&indir_actions_n,
7576 						&translated_actions, error);
7577 
7578 	if (ret)
7579 		return ret;
7580 	actions = translated_actions ? translated_actions : original_actions;
7581 	hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7582 	ret = flow_drv_validate(dev, attr, items, actions,
7583 				true, hairpin_flow, error);
7584 	rte_free(translated_actions);
7585 	return ret;
7586 }
7587 
7588 static int
7589 mlx5_flow_cache_flow_info(struct rte_eth_dev *dev,
7590 			  const struct rte_flow_attr *attr,
7591 			  const uint32_t orig_prio,
7592 			  const struct rte_flow_item *items,
7593 			  const struct rte_flow_action *actions,
7594 			  uint32_t flow_idx)
7595 {
7596 	struct mlx5_priv *priv = dev->data->dev_private;
7597 	struct mlx5_flow_engine_mode_info *mode_info = &priv->mode_info;
7598 	struct mlx5_dv_flow_info *flow_info, *tmp_info;
7599 	struct rte_flow_error error;
7600 	int len, ret;
7601 
7602 	flow_info = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*flow_info), 0, SOCKET_ID_ANY);
7603 	if (!flow_info) {
7604 		DRV_LOG(ERR, "No enough memory for flow_info caching.");
7605 		return -1;
7606 	}
7607 	flow_info->orig_prio = orig_prio;
7608 	flow_info->attr = *attr;
7609 	/* Standby mode rule awlays saves it in low priority entry. */
7610 	flow_info->flow_idx_low_prio = flow_idx;
7611 
7612 	/* Store matching items. */
7613 	ret = rte_flow_conv(RTE_FLOW_CONV_OP_PATTERN, NULL, 0, items, &error);
7614 	if (ret <= 0) {
7615 		DRV_LOG(ERR, "Can't get items length.");
7616 		goto end;
7617 	}
7618 	len = RTE_ALIGN(ret, 16);
7619 	flow_info->items = mlx5_malloc(MLX5_MEM_ZERO, len, 0, SOCKET_ID_ANY);
7620 	if (!flow_info->items) {
7621 		DRV_LOG(ERR, "No enough memory for items caching.");
7622 		goto end;
7623 	}
7624 	ret = rte_flow_conv(RTE_FLOW_CONV_OP_PATTERN, flow_info->items, ret, items, &error);
7625 	if (ret <= 0) {
7626 		DRV_LOG(ERR, "Can't duplicate items.");
7627 		goto end;
7628 	}
7629 
7630 	/* Store flow actions. */
7631 	ret = rte_flow_conv(RTE_FLOW_CONV_OP_ACTIONS, NULL, 0, actions, &error);
7632 	if (ret <= 0) {
7633 		DRV_LOG(ERR, "Can't get actions length.");
7634 		goto end;
7635 	}
7636 	len = RTE_ALIGN(ret, 16);
7637 	flow_info->actions = mlx5_malloc(MLX5_MEM_ZERO, len, 0, SOCKET_ID_ANY);
7638 	if (!flow_info->actions) {
7639 		DRV_LOG(ERR, "No enough memory for actions caching.");
7640 		goto end;
7641 	}
7642 	ret = rte_flow_conv(RTE_FLOW_CONV_OP_ACTIONS, flow_info->actions, ret, actions, &error);
7643 	if (ret <= 0) {
7644 		DRV_LOG(ERR, "Can't duplicate actions.");
7645 		goto end;
7646 	}
7647 
7648 	/* Insert to the list end. */
7649 	if (LIST_EMPTY(&mode_info->hot_upgrade)) {
7650 		LIST_INSERT_HEAD(&mode_info->hot_upgrade, flow_info,  next);
7651 	} else {
7652 		tmp_info = LIST_FIRST(&mode_info->hot_upgrade);
7653 		while (LIST_NEXT(tmp_info, next))
7654 			tmp_info = LIST_NEXT(tmp_info, next);
7655 		LIST_INSERT_AFTER(tmp_info, flow_info, next);
7656 	}
7657 	return 0;
7658 end:
7659 	if (flow_info->items)
7660 		mlx5_free(flow_info->items);
7661 	if (flow_info->actions)
7662 		mlx5_free(flow_info->actions);
7663 	mlx5_free(flow_info);
7664 	return -1;
7665 }
7666 
7667 static int
7668 mlx5_flow_cache_flow_toggle(struct rte_eth_dev *dev, bool orig_prio)
7669 {
7670 	struct mlx5_priv *priv = dev->data->dev_private;
7671 	struct mlx5_flow_engine_mode_info *mode_info = &priv->mode_info;
7672 	struct mlx5_dv_flow_info *flow_info;
7673 	struct rte_flow_attr attr;
7674 	struct rte_flow_error error;
7675 	struct rte_flow *high, *low;
7676 
7677 	flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7678 	while (flow_info) {
7679 		/* DUP flow may have the same priority. */
7680 		if (flow_info->orig_prio != flow_info->attr.priority) {
7681 			attr = flow_info->attr;
7682 			if (orig_prio)
7683 				attr.priority = flow_info->orig_prio;
7684 			flow_info->flow_idx_high_prio = flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
7685 					&attr, flow_info->items, flow_info->actions,
7686 					true, &error);
7687 			if (!flow_info->flow_idx_high_prio) {
7688 				DRV_LOG(ERR, "Priority toggle failed internally.");
7689 				goto err;
7690 			}
7691 		}
7692 		flow_info = LIST_NEXT(flow_info, next);
7693 	}
7694 	/* Delete the low priority rules and swap the flow handle. */
7695 	flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7696 	while (flow_info) {
7697 		MLX5_ASSERT(flow_info->flow_idx_low_prio);
7698 		if (flow_info->orig_prio != flow_info->attr.priority) {
7699 			high = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7700 					flow_info->flow_idx_high_prio);
7701 			low = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7702 					flow_info->flow_idx_low_prio);
7703 			if (high && low) {
7704 				RTE_SWAP(*low, *high);
7705 				flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7706 						  flow_info->flow_idx_low_prio);
7707 				flow_info->flow_idx_high_prio = 0;
7708 			}
7709 		}
7710 		flow_info = LIST_NEXT(flow_info, next);
7711 	}
7712 	return 0;
7713 err:
7714 	/* Destroy preceding successful high priority rules. */
7715 	flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7716 	while (flow_info) {
7717 		if (flow_info->orig_prio != flow_info->attr.priority) {
7718 			if (flow_info->flow_idx_high_prio)
7719 				flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7720 						  flow_info->flow_idx_high_prio);
7721 			else
7722 				break;
7723 			flow_info->flow_idx_high_prio = 0;
7724 		}
7725 		flow_info = LIST_NEXT(flow_info, next);
7726 	}
7727 	return -1;
7728 }
7729 
7730 /**
7731  * Set the mode of the flow engine of a process to active or standby during live migration.
7732  *
7733  * @param[in] mode
7734  *   MLX5 flow engine mode, @see `enum mlx5_flow_engine_mode`.
7735  * @param[in] flags
7736  *   Flow engine mode specific flags.
7737  *
7738  * @return
7739  *   Negative value on error, positive on success.
7740  */
7741 int
7742 rte_pmd_mlx5_flow_engine_set_mode(enum mlx5_flow_engine_mode mode, uint32_t flags)
7743 {
7744 	struct mlx5_priv *priv;
7745 	struct mlx5_flow_engine_mode_info *mode_info;
7746 	struct mlx5_dv_flow_info *flow_info, *tmp_info;
7747 	uint16_t port, port_id;
7748 	uint16_t toggle_num = 0;
7749 	struct rte_eth_dev *dev;
7750 	enum mlx5_flow_engine_mode orig_mode;
7751 	uint32_t orig_flags;
7752 	bool need_toggle = false;
7753 
7754 	/* Check if flags combinations are supported. */
7755 	if (flags && flags != MLX5_FLOW_ENGINE_FLAG_STANDBY_DUP_INGRESS) {
7756 		DRV_LOG(ERR, "Doesn't support such flags %u", flags);
7757 		return -1;
7758 	}
7759 	MLX5_ETH_FOREACH_DEV(port, NULL) {
7760 		dev = &rte_eth_devices[port];
7761 		priv = dev->data->dev_private;
7762 		mode_info = &priv->mode_info;
7763 		/* No mode change. Assume all devices hold the same mode. */
7764 		if (mode_info->mode == mode) {
7765 			DRV_LOG(INFO, "Process flow engine has been in mode %u", mode);
7766 			if (mode_info->mode_flag != flags && !LIST_EMPTY(&mode_info->hot_upgrade)) {
7767 				DRV_LOG(ERR, "Port %u has rule cache with different flag %u\n",
7768 						port, mode_info->mode_flag);
7769 				orig_mode = mode_info->mode;
7770 				orig_flags = mode_info->mode_flag;
7771 				goto err;
7772 			}
7773 			mode_info->mode_flag = flags;
7774 			toggle_num++;
7775 			continue;
7776 		}
7777 		/* Active -> standby. */
7778 		if (mode == MLX5_FLOW_ENGINE_MODE_STANDBY) {
7779 			if (!LIST_EMPTY(&mode_info->hot_upgrade)) {
7780 				DRV_LOG(ERR, "Cached rule existed");
7781 				orig_mode = mode_info->mode;
7782 				orig_flags = mode_info->mode_flag;
7783 				goto err;
7784 			}
7785 			mode_info->mode_flag = flags;
7786 			mode_info->mode = mode;
7787 			toggle_num++;
7788 		/* Standby -> active. */
7789 		} else if (mode == MLX5_FLOW_ENGINE_MODE_ACTIVE) {
7790 			if (LIST_EMPTY(&mode_info->hot_upgrade)) {
7791 				DRV_LOG(INFO, "No cached rule existed");
7792 			} else {
7793 				if (mlx5_flow_cache_flow_toggle(dev, true)) {
7794 					orig_mode = mode_info->mode;
7795 					orig_flags = mode_info->mode_flag;
7796 					need_toggle = true;
7797 					goto err;
7798 				}
7799 			}
7800 			toggle_num++;
7801 		}
7802 	}
7803 	if (mode == MLX5_FLOW_ENGINE_MODE_ACTIVE) {
7804 		/* Clear cache flow rules. */
7805 		MLX5_ETH_FOREACH_DEV(port, NULL) {
7806 			priv = rte_eth_devices[port].data->dev_private;
7807 			mode_info = &priv->mode_info;
7808 			flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7809 			while (flow_info) {
7810 				tmp_info = LIST_NEXT(flow_info, next);
7811 				LIST_REMOVE(flow_info, next);
7812 				mlx5_free(flow_info->actions);
7813 				mlx5_free(flow_info->items);
7814 				mlx5_free(flow_info);
7815 				flow_info = tmp_info;
7816 			}
7817 			MLX5_ASSERT(LIST_EMPTY(&mode_info->hot_upgrade));
7818 		}
7819 	}
7820 	return toggle_num;
7821 err:
7822 	/* Rollback all preceding successful ports. */
7823 	MLX5_ETH_FOREACH_DEV(port_id, NULL) {
7824 		if (port_id == port)
7825 			break;
7826 		priv = rte_eth_devices[port_id].data->dev_private;
7827 		mode_info = &priv->mode_info;
7828 		if (need_toggle && !LIST_EMPTY(&mode_info->hot_upgrade) &&
7829 		    mlx5_flow_cache_flow_toggle(dev, false))
7830 			return -EPERM;
7831 		mode_info->mode = orig_mode;
7832 		mode_info->mode_flag = orig_flags;
7833 	}
7834 	return -EINVAL;
7835 }
7836 /**
7837  * Create a flow.
7838  *
7839  * @see rte_flow_create()
7840  * @see rte_flow_ops
7841  */
7842 struct rte_flow *
7843 mlx5_flow_create(struct rte_eth_dev *dev,
7844 		 const struct rte_flow_attr *attr,
7845 		 const struct rte_flow_item items[],
7846 		 const struct rte_flow_action actions[],
7847 		 struct rte_flow_error *error)
7848 {
7849 	struct mlx5_priv *priv = dev->data->dev_private;
7850 	struct rte_flow_attr *new_attr = (void *)(uintptr_t)attr;
7851 	uint32_t prio = attr->priority;
7852 	uint32_t flow_idx;
7853 
7854 	if (priv->sh->config.dv_flow_en == 2) {
7855 		rte_flow_error_set(error, ENOTSUP,
7856 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7857 			  NULL,
7858 			  "Flow non-Q creation not supported");
7859 		return NULL;
7860 	}
7861 	/*
7862 	 * If the device is not started yet, it is not allowed to created a
7863 	 * flow from application. PMD default flows and traffic control flows
7864 	 * are not affected.
7865 	 */
7866 	if (unlikely(!dev->data->dev_started)) {
7867 		DRV_LOG(DEBUG, "port %u is not started when "
7868 			"inserting a flow", dev->data->port_id);
7869 		rte_flow_error_set(error, ENODEV,
7870 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7871 				   NULL,
7872 				   "port not started");
7873 		return NULL;
7874 	}
7875 	if (unlikely(mlx5_need_cache_flow(priv, attr))) {
7876 		if (attr->transfer ||
7877 		    (attr->ingress &&
7878 		    !(priv->mode_info.mode_flag & MLX5_FLOW_ENGINE_FLAG_STANDBY_DUP_INGRESS)))
7879 			new_attr->priority += 1;
7880 	}
7881 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, attr, items, actions, true, error);
7882 	if (!flow_idx)
7883 		return NULL;
7884 	if (unlikely(mlx5_need_cache_flow(priv, attr))) {
7885 		if (mlx5_flow_cache_flow_info(dev, attr, prio, items, actions, flow_idx)) {
7886 			flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
7887 			flow_idx = 0;
7888 		}
7889 	}
7890 	return (void *)(uintptr_t)flow_idx;
7891 }
7892 
7893 /**
7894  * Destroy a flow in a list.
7895  *
7896  * @param dev
7897  *   Pointer to Ethernet device.
7898  * @param[in] flow_idx
7899  *   Index of flow to destroy.
7900  */
7901 static void
7902 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7903 		  uint32_t flow_idx)
7904 {
7905 	struct mlx5_priv *priv = dev->data->dev_private;
7906 	struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
7907 
7908 	if (!flow)
7909 		return;
7910 	MLX5_ASSERT(flow->type == type);
7911 	/*
7912 	 * Update RX queue flags only if port is started, otherwise it is
7913 	 * already clean.
7914 	 */
7915 	if (dev->data->dev_started)
7916 		flow_rxq_flags_trim(dev, flow);
7917 	flow_drv_destroy(dev, flow);
7918 	if (flow->tunnel) {
7919 		struct mlx5_flow_tunnel *tunnel;
7920 
7921 		tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
7922 		RTE_VERIFY(tunnel);
7923 		if (!(__atomic_fetch_sub(&tunnel->refctn, 1, __ATOMIC_RELAXED) - 1))
7924 			mlx5_flow_tunnel_free(dev, tunnel);
7925 	}
7926 	flow_mreg_del_copy_action(dev, flow);
7927 	mlx5_ipool_free(priv->flows[type], flow_idx);
7928 }
7929 
7930 /**
7931  * Destroy all flows.
7932  *
7933  * @param dev
7934  *   Pointer to Ethernet device.
7935  * @param type
7936  *   Flow type to be flushed.
7937  * @param active
7938  *   If flushing is called actively.
7939  */
7940 void
7941 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7942 		     bool active)
7943 {
7944 	struct mlx5_priv *priv = dev->data->dev_private;
7945 	uint32_t num_flushed = 0, fidx = 1;
7946 	struct rte_flow *flow;
7947 	struct mlx5_flow_engine_mode_info *mode_info = &priv->mode_info;
7948 	struct mlx5_dv_flow_info *flow_info;
7949 
7950 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7951 	if (priv->sh->config.dv_flow_en == 2 &&
7952 	    type == MLX5_FLOW_TYPE_GEN) {
7953 		flow_hw_q_flow_flush(dev, NULL);
7954 		return;
7955 	}
7956 #endif
7957 
7958 	MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
7959 		flow_list_destroy(dev, type, fidx);
7960 		if (unlikely(mlx5_need_cache_flow(priv, NULL) && type == MLX5_FLOW_TYPE_GEN)) {
7961 			flow_info = LIST_FIRST(&mode_info->hot_upgrade);
7962 			while (flow_info) {
7963 				/* Romove the cache flow info. */
7964 				if (flow_info->flow_idx_low_prio == (uint32_t)(uintptr_t)fidx) {
7965 					MLX5_ASSERT(!flow_info->flow_idx_high_prio);
7966 					LIST_REMOVE(flow_info, next);
7967 					mlx5_free(flow_info->items);
7968 					mlx5_free(flow_info->actions);
7969 					mlx5_free(flow_info);
7970 					break;
7971 				}
7972 				flow_info = LIST_NEXT(flow_info, next);
7973 			}
7974 		}
7975 		num_flushed++;
7976 	}
7977 	if (active) {
7978 		DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
7979 			dev->data->port_id, num_flushed);
7980 	}
7981 }
7982 
7983 /**
7984  * Stop all default actions for flows.
7985  *
7986  * @param dev
7987  *   Pointer to Ethernet device.
7988  */
7989 void
7990 mlx5_flow_stop_default(struct rte_eth_dev *dev)
7991 {
7992 	flow_mreg_del_default_copy_action(dev);
7993 	flow_rxq_flags_clear(dev);
7994 }
7995 
7996 /**
7997  * Set rxq flag.
7998  *
7999  * @param[in] dev
8000  *   Pointer to the rte_eth_dev structure.
8001  * @param[in] enable
8002  *   Flag to enable or not.
8003  */
8004 void
8005 flow_hw_rxq_flag_set(struct rte_eth_dev *dev, bool enable)
8006 {
8007 	struct mlx5_priv *priv = dev->data->dev_private;
8008 	unsigned int i;
8009 
8010 	if ((!priv->mark_enabled && !enable) ||
8011 	    (priv->mark_enabled && enable))
8012 		return;
8013 	for (i = 0; i < priv->rxqs_n; ++i) {
8014 		struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, i);
8015 
8016 		/* With RXQ start/stop feature, RXQ might be stopped. */
8017 		if (!rxq_ctrl)
8018 			continue;
8019 		rxq_ctrl->rxq.mark = enable;
8020 	}
8021 	priv->mark_enabled = enable;
8022 }
8023 
8024 /**
8025  * Start all default actions for flows.
8026  *
8027  * @param dev
8028  *   Pointer to Ethernet device.
8029  * @return
8030  *   0 on success, a negative errno value otherwise and rte_errno is set.
8031  */
8032 int
8033 mlx5_flow_start_default(struct rte_eth_dev *dev)
8034 {
8035 	struct rte_flow_error error;
8036 
8037 	/* Make sure default copy action (reg_c[0] -> reg_b) is created. */
8038 	return flow_mreg_add_default_copy_action(dev, &error);
8039 }
8040 
8041 /**
8042  * Release key of thread specific flow workspace data.
8043  */
8044 void
8045 flow_release_workspace(void *data)
8046 {
8047 	struct mlx5_flow_workspace *wks = data;
8048 	struct mlx5_flow_workspace *next;
8049 
8050 	while (wks) {
8051 		next = wks->next;
8052 		free(wks);
8053 		wks = next;
8054 	}
8055 }
8056 
8057 static struct mlx5_flow_workspace *gc_head;
8058 static rte_spinlock_t mlx5_flow_workspace_lock = RTE_SPINLOCK_INITIALIZER;
8059 
8060 static void
8061 mlx5_flow_workspace_gc_add(struct mlx5_flow_workspace *ws)
8062 {
8063 	rte_spinlock_lock(&mlx5_flow_workspace_lock);
8064 	ws->gc = gc_head;
8065 	gc_head = ws;
8066 	rte_spinlock_unlock(&mlx5_flow_workspace_lock);
8067 }
8068 
8069 void
8070 mlx5_flow_workspace_gc_release(void)
8071 {
8072 	while (gc_head) {
8073 		struct mlx5_flow_workspace *wks = gc_head;
8074 
8075 		gc_head = wks->gc;
8076 		flow_release_workspace(wks);
8077 	}
8078 }
8079 
8080 /**
8081  * Get thread specific current flow workspace.
8082  *
8083  * @return pointer to thread specific flow workspace data, NULL on error.
8084  */
8085 struct mlx5_flow_workspace*
8086 mlx5_flow_get_thread_workspace(void)
8087 {
8088 	struct mlx5_flow_workspace *data;
8089 
8090 	data = mlx5_flow_os_get_specific_workspace();
8091 	MLX5_ASSERT(data && data->inuse);
8092 	if (!data || !data->inuse)
8093 		DRV_LOG(ERR, "flow workspace not initialized.");
8094 	return data;
8095 }
8096 
8097 /**
8098  * Allocate and init new flow workspace.
8099  *
8100  * @return pointer to flow workspace data, NULL on error.
8101  */
8102 static struct mlx5_flow_workspace*
8103 flow_alloc_thread_workspace(void)
8104 {
8105 	size_t data_size = RTE_ALIGN(sizeof(struct mlx5_flow_workspace), sizeof(long));
8106 	size_t rss_queue_array_size = sizeof(uint16_t) * RTE_ETH_RSS_RETA_SIZE_512;
8107 	struct mlx5_flow_workspace *data = calloc(1, data_size +
8108 						     rss_queue_array_size);
8109 
8110 	if (!data) {
8111 		DRV_LOG(ERR, "Failed to allocate flow workspace memory.");
8112 		return NULL;
8113 	}
8114 	data->rss_desc.queue = RTE_PTR_ADD(data, data_size);
8115 	return data;
8116 }
8117 
8118 /**
8119  * Get new thread specific flow workspace.
8120  *
8121  * If current workspace inuse, create new one and set as current.
8122  *
8123  * @return pointer to thread specific flow workspace data, NULL on error.
8124  */
8125 struct mlx5_flow_workspace*
8126 mlx5_flow_push_thread_workspace(void)
8127 {
8128 	struct mlx5_flow_workspace *curr;
8129 	struct mlx5_flow_workspace *data;
8130 
8131 	curr = mlx5_flow_os_get_specific_workspace();
8132 	if (!curr) {
8133 		data = flow_alloc_thread_workspace();
8134 		if (!data)
8135 			return NULL;
8136 		mlx5_flow_workspace_gc_add(data);
8137 	} else if (!curr->inuse) {
8138 		data = curr;
8139 	} else if (curr->next) {
8140 		data = curr->next;
8141 	} else {
8142 		data = flow_alloc_thread_workspace();
8143 		if (!data)
8144 			return NULL;
8145 		curr->next = data;
8146 		data->prev = curr;
8147 	}
8148 	data->inuse = 1;
8149 	data->flow_idx = 0;
8150 	/* Set as current workspace */
8151 	if (mlx5_flow_os_set_specific_workspace(data))
8152 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
8153 	return data;
8154 }
8155 
8156 /**
8157  * Close current thread specific flow workspace.
8158  *
8159  * If previous workspace available, set it as current.
8160  *
8161  * @return pointer to thread specific flow workspace data, NULL on error.
8162  */
8163 void
8164 mlx5_flow_pop_thread_workspace(void)
8165 {
8166 	struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
8167 
8168 	if (!data)
8169 		return;
8170 	if (!data->inuse) {
8171 		DRV_LOG(ERR, "Failed to close unused flow workspace.");
8172 		return;
8173 	}
8174 	data->inuse = 0;
8175 	if (!data->prev)
8176 		return;
8177 	if (mlx5_flow_os_set_specific_workspace(data->prev))
8178 		DRV_LOG(ERR, "Failed to set flow workspace to thread.");
8179 }
8180 
8181 /**
8182  * Verify the flow list is empty
8183  *
8184  * @param dev
8185  *  Pointer to Ethernet device.
8186  *
8187  * @return the number of flows not released.
8188  */
8189 int
8190 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
8191 {
8192 	struct mlx5_priv *priv = dev->data->dev_private;
8193 	struct rte_flow *flow;
8194 	uint32_t idx = 0;
8195 	int ret = 0, i;
8196 
8197 	for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
8198 		MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
8199 			DRV_LOG(DEBUG, "port %u flow %p still referenced",
8200 				dev->data->port_id, (void *)flow);
8201 			ret++;
8202 		}
8203 	}
8204 	return ret;
8205 }
8206 
8207 /**
8208  * Enable default hairpin egress flow.
8209  *
8210  * @param dev
8211  *   Pointer to Ethernet device.
8212  * @param sq_num
8213  *   The SQ hw number.
8214  *
8215  * @return
8216  *   0 on success, a negative errno value otherwise and rte_errno is set.
8217  */
8218 int
8219 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
8220 			    uint32_t sq_num)
8221 {
8222 	const struct rte_flow_attr attr = {
8223 		.egress = 1,
8224 		.priority = 0,
8225 	};
8226 	struct mlx5_rte_flow_item_sq queue_spec = {
8227 		.queue = sq_num,
8228 	};
8229 	struct mlx5_rte_flow_item_sq queue_mask = {
8230 		.queue = UINT32_MAX,
8231 	};
8232 	struct rte_flow_item items[] = {
8233 		{
8234 			.type = (enum rte_flow_item_type)
8235 				MLX5_RTE_FLOW_ITEM_TYPE_SQ,
8236 			.spec = &queue_spec,
8237 			.last = NULL,
8238 			.mask = &queue_mask,
8239 		},
8240 		{
8241 			.type = RTE_FLOW_ITEM_TYPE_END,
8242 		},
8243 	};
8244 	struct rte_flow_action_jump jump = {
8245 		.group = MLX5_HAIRPIN_TX_TABLE,
8246 	};
8247 	struct rte_flow_action actions[2];
8248 	uint32_t flow_idx;
8249 	struct rte_flow_error error;
8250 
8251 	actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
8252 	actions[0].conf = &jump;
8253 	actions[1].type = RTE_FLOW_ACTION_TYPE_END;
8254 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
8255 				    &attr, items, actions, false, &error);
8256 	if (!flow_idx) {
8257 		DRV_LOG(DEBUG,
8258 			"Failed to create ctrl flow: rte_errno(%d),"
8259 			" type(%d), message(%s)",
8260 			rte_errno, error.type,
8261 			error.message ? error.message : " (no stated reason)");
8262 		return -rte_errno;
8263 	}
8264 	return 0;
8265 }
8266 
8267 /**
8268  * Enable a control flow configured from the control plane.
8269  *
8270  * @param dev
8271  *   Pointer to Ethernet device.
8272  * @param eth_spec
8273  *   An Ethernet flow spec to apply.
8274  * @param eth_mask
8275  *   An Ethernet flow mask to apply.
8276  * @param vlan_spec
8277  *   A VLAN flow spec to apply.
8278  * @param vlan_mask
8279  *   A VLAN flow mask to apply.
8280  *
8281  * @return
8282  *   0 on success, a negative errno value otherwise and rte_errno is set.
8283  */
8284 int
8285 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
8286 		    struct rte_flow_item_eth *eth_spec,
8287 		    struct rte_flow_item_eth *eth_mask,
8288 		    struct rte_flow_item_vlan *vlan_spec,
8289 		    struct rte_flow_item_vlan *vlan_mask)
8290 {
8291 	struct mlx5_priv *priv = dev->data->dev_private;
8292 	const struct rte_flow_attr attr = {
8293 		.ingress = 1,
8294 		.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
8295 	};
8296 	struct rte_flow_item items[] = {
8297 		{
8298 			.type = RTE_FLOW_ITEM_TYPE_ETH,
8299 			.spec = eth_spec,
8300 			.last = NULL,
8301 			.mask = eth_mask,
8302 		},
8303 		{
8304 			.type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
8305 					      RTE_FLOW_ITEM_TYPE_END,
8306 			.spec = vlan_spec,
8307 			.last = NULL,
8308 			.mask = vlan_mask,
8309 		},
8310 		{
8311 			.type = RTE_FLOW_ITEM_TYPE_END,
8312 		},
8313 	};
8314 	uint16_t queue[priv->reta_idx_n];
8315 	struct rte_flow_action_rss action_rss = {
8316 		.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
8317 		.level = 0,
8318 		.types = priv->rss_conf.rss_hf,
8319 		.key_len = priv->rss_conf.rss_key_len,
8320 		.queue_num = priv->reta_idx_n,
8321 		.key = priv->rss_conf.rss_key,
8322 		.queue = queue,
8323 	};
8324 	struct rte_flow_action actions[] = {
8325 		{
8326 			.type = RTE_FLOW_ACTION_TYPE_RSS,
8327 			.conf = &action_rss,
8328 		},
8329 		{
8330 			.type = RTE_FLOW_ACTION_TYPE_END,
8331 		},
8332 	};
8333 	uint32_t flow_idx;
8334 	struct rte_flow_error error;
8335 	unsigned int i;
8336 
8337 	if (!priv->reta_idx_n || !priv->rxqs_n) {
8338 		return 0;
8339 	}
8340 	if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
8341 		action_rss.types = 0;
8342 	for (i = 0; i != priv->reta_idx_n; ++i)
8343 		queue[i] = (*priv->reta_idx)[i];
8344 	flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
8345 				    &attr, items, actions, false, &error);
8346 	if (!flow_idx)
8347 		return -rte_errno;
8348 	return 0;
8349 }
8350 
8351 /**
8352  * Enable a flow control configured from the control plane.
8353  *
8354  * @param dev
8355  *   Pointer to Ethernet device.
8356  * @param eth_spec
8357  *   An Ethernet flow spec to apply.
8358  * @param eth_mask
8359  *   An Ethernet flow mask to apply.
8360  *
8361  * @return
8362  *   0 on success, a negative errno value otherwise and rte_errno is set.
8363  */
8364 int
8365 mlx5_ctrl_flow(struct rte_eth_dev *dev,
8366 	       struct rte_flow_item_eth *eth_spec,
8367 	       struct rte_flow_item_eth *eth_mask)
8368 {
8369 	return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
8370 }
8371 
8372 /**
8373  * Create default miss flow rule matching lacp traffic
8374  *
8375  * @param dev
8376  *   Pointer to Ethernet device.
8377  * @param eth_spec
8378  *   An Ethernet flow spec to apply.
8379  *
8380  * @return
8381  *   0 on success, a negative errno value otherwise and rte_errno is set.
8382  */
8383 int
8384 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
8385 {
8386 	/*
8387 	 * The LACP matching is done by only using ether type since using
8388 	 * a multicast dst mac causes kernel to give low priority to this flow.
8389 	 */
8390 	static const struct rte_flow_item_eth lacp_spec = {
8391 		.hdr.ether_type = RTE_BE16(0x8809),
8392 	};
8393 	static const struct rte_flow_item_eth lacp_mask = {
8394 		.hdr.ether_type = 0xffff,
8395 	};
8396 	const struct rte_flow_attr attr = {
8397 		.ingress = 1,
8398 	};
8399 	struct rte_flow_item items[] = {
8400 		{
8401 			.type = RTE_FLOW_ITEM_TYPE_ETH,
8402 			.spec = &lacp_spec,
8403 			.mask = &lacp_mask,
8404 		},
8405 		{
8406 			.type = RTE_FLOW_ITEM_TYPE_END,
8407 		},
8408 	};
8409 	struct rte_flow_action actions[] = {
8410 		{
8411 			.type = (enum rte_flow_action_type)
8412 				MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
8413 		},
8414 		{
8415 			.type = RTE_FLOW_ACTION_TYPE_END,
8416 		},
8417 	};
8418 	struct rte_flow_error error;
8419 	uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
8420 					&attr, items, actions,
8421 					false, &error);
8422 
8423 	if (!flow_idx)
8424 		return -rte_errno;
8425 	return 0;
8426 }
8427 
8428 /**
8429  * Destroy a flow.
8430  *
8431  * @see rte_flow_destroy()
8432  * @see rte_flow_ops
8433  */
8434 int
8435 mlx5_flow_destroy(struct rte_eth_dev *dev,
8436 		  struct rte_flow *flow,
8437 		  struct rte_flow_error *error __rte_unused)
8438 {
8439 	struct mlx5_priv *priv = dev->data->dev_private;
8440 	struct mlx5_flow_engine_mode_info *mode_info = &priv->mode_info;
8441 	struct mlx5_dv_flow_info *flow_info;
8442 
8443 	if (priv->sh->config.dv_flow_en == 2)
8444 		return rte_flow_error_set(error, ENOTSUP,
8445 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8446 			  NULL,
8447 			  "Flow non-Q destruction not supported");
8448 	flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
8449 				(uintptr_t)(void *)flow);
8450 	if (unlikely(mlx5_need_cache_flow(priv, NULL))) {
8451 		flow_info = LIST_FIRST(&mode_info->hot_upgrade);
8452 		while (flow_info) {
8453 			/* Romove the cache flow info. */
8454 			if (flow_info->flow_idx_low_prio == (uint32_t)(uintptr_t)flow) {
8455 				MLX5_ASSERT(!flow_info->flow_idx_high_prio);
8456 				LIST_REMOVE(flow_info, next);
8457 				mlx5_free(flow_info->items);
8458 				mlx5_free(flow_info->actions);
8459 				mlx5_free(flow_info);
8460 				break;
8461 			}
8462 			flow_info = LIST_NEXT(flow_info, next);
8463 		}
8464 	}
8465 	return 0;
8466 }
8467 
8468 /**
8469  * Destroy all flows.
8470  *
8471  * @see rte_flow_flush()
8472  * @see rte_flow_ops
8473  */
8474 int
8475 mlx5_flow_flush(struct rte_eth_dev *dev,
8476 		struct rte_flow_error *error __rte_unused)
8477 {
8478 	mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
8479 	return 0;
8480 }
8481 
8482 /**
8483  * Isolated mode.
8484  *
8485  * @see rte_flow_isolate()
8486  * @see rte_flow_ops
8487  */
8488 int
8489 mlx5_flow_isolate(struct rte_eth_dev *dev,
8490 		  int enable,
8491 		  struct rte_flow_error *error)
8492 {
8493 	struct mlx5_priv *priv = dev->data->dev_private;
8494 
8495 	if (dev->data->dev_started) {
8496 		rte_flow_error_set(error, EBUSY,
8497 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8498 				   NULL,
8499 				   "port must be stopped first");
8500 		return -rte_errno;
8501 	}
8502 	if (!enable && !priv->sh->config.repr_matching)
8503 		return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8504 					  "isolated mode cannot be disabled when "
8505 					  "representor matching is disabled");
8506 	priv->isolated = !!enable;
8507 	if (enable)
8508 		dev->dev_ops = &mlx5_dev_ops_isolate;
8509 	else
8510 		dev->dev_ops = &mlx5_dev_ops;
8511 
8512 	dev->rx_descriptor_status = mlx5_rx_descriptor_status;
8513 	dev->tx_descriptor_status = mlx5_tx_descriptor_status;
8514 
8515 	return 0;
8516 }
8517 
8518 /**
8519  * Query a flow.
8520  *
8521  * @see rte_flow_query()
8522  * @see rte_flow_ops
8523  */
8524 static int
8525 flow_drv_query(struct rte_eth_dev *dev,
8526 	       struct rte_flow *eflow,
8527 	       const struct rte_flow_action *actions,
8528 	       void *data,
8529 	       struct rte_flow_error *error)
8530 {
8531 	struct mlx5_priv *priv = dev->data->dev_private;
8532 	const struct mlx5_flow_driver_ops *fops;
8533 	struct rte_flow *flow = NULL;
8534 	enum mlx5_flow_drv_type ftype = MLX5_FLOW_TYPE_MIN;
8535 
8536 	if (priv->sh->config.dv_flow_en == 2) {
8537 #ifdef HAVE_MLX5_HWS_SUPPORT
8538 		flow = eflow;
8539 		ftype = MLX5_FLOW_TYPE_HW;
8540 #endif
8541 	} else {
8542 		flow = (struct rte_flow *)mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8543 				(uintptr_t)(void *)eflow);
8544 	}
8545 	if (!flow) {
8546 		return rte_flow_error_set(error, ENOENT,
8547 			  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8548 			  NULL,
8549 			  "invalid flow handle");
8550 	}
8551 	if (ftype == MLX5_FLOW_TYPE_MIN)
8552 		ftype = flow->drv_type;
8553 	MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
8554 	fops = flow_get_drv_ops(ftype);
8555 
8556 	return fops->query(dev, flow, actions, data, error);
8557 }
8558 
8559 /**
8560  * Query a flow.
8561  *
8562  * @see rte_flow_query()
8563  * @see rte_flow_ops
8564  */
8565 int
8566 mlx5_flow_query(struct rte_eth_dev *dev,
8567 		struct rte_flow *flow,
8568 		const struct rte_flow_action *actions,
8569 		void *data,
8570 		struct rte_flow_error *error)
8571 {
8572 	int ret;
8573 
8574 	ret = flow_drv_query(dev, flow, actions, data,
8575 			     error);
8576 	if (ret < 0)
8577 		return ret;
8578 	return 0;
8579 }
8580 
8581 /**
8582  * Get rte_flow callbacks.
8583  *
8584  * @param dev
8585  *   Pointer to Ethernet device structure.
8586  * @param ops
8587  *   Pointer to operation-specific structure.
8588  *
8589  * @return 0
8590  */
8591 int
8592 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
8593 		  const struct rte_flow_ops **ops)
8594 {
8595 	*ops = &mlx5_flow_ops;
8596 	return 0;
8597 }
8598 
8599 /**
8600  * Validate meter policy actions.
8601  * Dispatcher for action type specific validation.
8602  *
8603  * @param[in] dev
8604  *   Pointer to the Ethernet device structure.
8605  * @param[in] action
8606  *   The meter policy action object to validate.
8607  * @param[in] attr
8608  *   Attributes of flow to determine steering domain.
8609  * @param[out] is_rss
8610  *   Is RSS or not.
8611  * @param[out] domain_bitmap
8612  *   Domain bitmap.
8613  * @param[out] is_def_policy
8614  *   Is default policy or not.
8615  * @param[out] error
8616  *   Perform verbose error reporting if not NULL. Initialized in case of
8617  *   error only.
8618  *
8619  * @return
8620  *   0 on success, otherwise negative errno value.
8621  */
8622 int
8623 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
8624 			const struct rte_flow_action *actions[RTE_COLORS],
8625 			struct rte_flow_attr *attr,
8626 			bool *is_rss,
8627 			uint8_t *domain_bitmap,
8628 			uint8_t *policy_mode,
8629 			struct rte_mtr_error *error)
8630 {
8631 	const struct mlx5_flow_driver_ops *fops;
8632 
8633 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8634 	return fops->validate_mtr_acts(dev, actions, attr, is_rss,
8635 				       domain_bitmap, policy_mode, error);
8636 }
8637 
8638 /**
8639  * Destroy the meter table set.
8640  *
8641  * @param[in] dev
8642  *   Pointer to Ethernet device.
8643  * @param[in] mtr_policy
8644  *   Meter policy struct.
8645  */
8646 void
8647 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
8648 		      struct mlx5_flow_meter_policy *mtr_policy)
8649 {
8650 	const struct mlx5_flow_driver_ops *fops;
8651 
8652 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8653 	fops->destroy_mtr_acts(dev, mtr_policy);
8654 }
8655 
8656 /**
8657  * Create policy action, lock free,
8658  * (mutex should be acquired by caller).
8659  * Dispatcher for action type specific call.
8660  *
8661  * @param[in] dev
8662  *   Pointer to the Ethernet device structure.
8663  * @param[in] mtr_policy
8664  *   Meter policy struct.
8665  * @param[in] action
8666  *   Action specification used to create meter actions.
8667  * @param[in] attr
8668  *   Flow rule attributes.
8669  * @param[out] error
8670  *   Perform verbose error reporting if not NULL. Initialized in case of
8671  *   error only.
8672  *
8673  * @return
8674  *   0 on success, otherwise negative errno value.
8675  */
8676 int
8677 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
8678 		      struct mlx5_flow_meter_policy *mtr_policy,
8679 		      const struct rte_flow_action *actions[RTE_COLORS],
8680 		      struct rte_flow_attr *attr,
8681 		      struct rte_mtr_error *error)
8682 {
8683 	const struct mlx5_flow_driver_ops *fops;
8684 
8685 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8686 	return fops->create_mtr_acts(dev, mtr_policy, actions, attr, error);
8687 }
8688 
8689 /**
8690  * Create policy rules, lock free,
8691  * (mutex should be acquired by caller).
8692  * Dispatcher for action type specific call.
8693  *
8694  * @param[in] dev
8695  *   Pointer to the Ethernet device structure.
8696  * @param[in] mtr_policy
8697  *   Meter policy struct.
8698  *
8699  * @return
8700  *   0 on success, -1 otherwise.
8701  */
8702 int
8703 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
8704 			     struct mlx5_flow_meter_policy *mtr_policy)
8705 {
8706 	const struct mlx5_flow_driver_ops *fops;
8707 
8708 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8709 	return fops->create_policy_rules(dev, mtr_policy);
8710 }
8711 
8712 /**
8713  * Destroy policy rules, lock free,
8714  * (mutex should be acquired by caller).
8715  * Dispatcher for action type specific call.
8716  *
8717  * @param[in] dev
8718  *   Pointer to the Ethernet device structure.
8719  * @param[in] mtr_policy
8720  *   Meter policy struct.
8721  */
8722 void
8723 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
8724 			     struct mlx5_flow_meter_policy *mtr_policy)
8725 {
8726 	const struct mlx5_flow_driver_ops *fops;
8727 
8728 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8729 	fops->destroy_policy_rules(dev, mtr_policy);
8730 }
8731 
8732 /**
8733  * Destroy the default policy table set.
8734  *
8735  * @param[in] dev
8736  *   Pointer to Ethernet device.
8737  */
8738 void
8739 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
8740 {
8741 	const struct mlx5_flow_driver_ops *fops;
8742 
8743 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8744 	fops->destroy_def_policy(dev);
8745 }
8746 
8747 /**
8748  * Destroy the default policy table set.
8749  *
8750  * @param[in] dev
8751  *   Pointer to Ethernet device.
8752  *
8753  * @return
8754  *   0 on success, -1 otherwise.
8755  */
8756 int
8757 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
8758 {
8759 	const struct mlx5_flow_driver_ops *fops;
8760 
8761 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8762 	return fops->create_def_policy(dev);
8763 }
8764 
8765 /**
8766  * Create the needed meter and suffix tables.
8767  *
8768  * @param[in] dev
8769  *   Pointer to Ethernet device.
8770  *
8771  * @return
8772  *   0 on success, -1 otherwise.
8773  */
8774 int
8775 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
8776 			struct mlx5_flow_meter_info *fm,
8777 			uint32_t mtr_idx,
8778 			uint8_t domain_bitmap)
8779 {
8780 	const struct mlx5_flow_driver_ops *fops;
8781 
8782 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8783 	return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
8784 }
8785 
8786 /**
8787  * Destroy the meter table set.
8788  *
8789  * @param[in] dev
8790  *   Pointer to Ethernet device.
8791  * @param[in] tbl
8792  *   Pointer to the meter table set.
8793  */
8794 void
8795 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
8796 			   struct mlx5_flow_meter_info *fm)
8797 {
8798 	const struct mlx5_flow_driver_ops *fops;
8799 
8800 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8801 	fops->destroy_mtr_tbls(dev, fm);
8802 }
8803 
8804 /**
8805  * Destroy the global meter drop table.
8806  *
8807  * @param[in] dev
8808  *   Pointer to Ethernet device.
8809  */
8810 void
8811 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
8812 {
8813 	const struct mlx5_flow_driver_ops *fops;
8814 
8815 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8816 	fops->destroy_mtr_drop_tbls(dev);
8817 }
8818 
8819 /**
8820  * Destroy the sub policy table with RX queue.
8821  *
8822  * @param[in] dev
8823  *   Pointer to Ethernet device.
8824  * @param[in] mtr_policy
8825  *   Pointer to meter policy table.
8826  */
8827 void
8828 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
8829 		struct mlx5_flow_meter_policy *mtr_policy)
8830 {
8831 	const struct mlx5_flow_driver_ops *fops;
8832 
8833 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8834 	fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
8835 }
8836 
8837 /**
8838  * Allocate the needed aso flow meter id.
8839  *
8840  * @param[in] dev
8841  *   Pointer to Ethernet device.
8842  *
8843  * @return
8844  *   Index to aso flow meter on success, NULL otherwise.
8845  */
8846 uint32_t
8847 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
8848 {
8849 	const struct mlx5_flow_driver_ops *fops;
8850 
8851 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8852 	return fops->create_meter(dev);
8853 }
8854 
8855 /**
8856  * Free the aso flow meter id.
8857  *
8858  * @param[in] dev
8859  *   Pointer to Ethernet device.
8860  * @param[in] mtr_idx
8861  *  Index to aso flow meter to be free.
8862  *
8863  * @return
8864  *   0 on success.
8865  */
8866 void
8867 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
8868 {
8869 	const struct mlx5_flow_driver_ops *fops;
8870 
8871 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8872 	fops->free_meter(dev, mtr_idx);
8873 }
8874 
8875 /**
8876  * Allocate a counter.
8877  *
8878  * @param[in] dev
8879  *   Pointer to Ethernet device structure.
8880  *
8881  * @return
8882  *   Index to allocated counter  on success, 0 otherwise.
8883  */
8884 uint32_t
8885 mlx5_counter_alloc(struct rte_eth_dev *dev)
8886 {
8887 	struct rte_flow_attr attr = { .transfer = 0 };
8888 
8889 	return flow_get_drv_ops(flow_get_drv_type(dev, &attr))->counter_alloc
8890 		(dev);
8891 }
8892 
8893 /**
8894  * Free a counter.
8895  *
8896  * @param[in] dev
8897  *   Pointer to Ethernet device structure.
8898  * @param[in] cnt
8899  *   Index to counter to be free.
8900  */
8901 void
8902 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
8903 {
8904 	struct rte_flow_attr attr = { .transfer = 0 };
8905 
8906 	flow_get_drv_ops(flow_get_drv_type(dev, &attr))->counter_free(dev, cnt);
8907 }
8908 
8909 /**
8910  * Query counter statistics.
8911  *
8912  * @param[in] dev
8913  *   Pointer to Ethernet device structure.
8914  * @param[in] cnt
8915  *   Index to counter to query.
8916  * @param[in] clear
8917  *   Set to clear counter statistics.
8918  * @param[out] pkts
8919  *   The counter hits packets number to save.
8920  * @param[out] bytes
8921  *   The counter hits bytes number to save.
8922  *
8923  * @return
8924  *   0 on success, a negative errno value otherwise.
8925  */
8926 int
8927 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
8928 		   bool clear, uint64_t *pkts, uint64_t *bytes, void **action)
8929 {
8930 	struct rte_flow_attr attr = { .transfer = 0 };
8931 
8932 	return flow_get_drv_ops(flow_get_drv_type(dev, &attr))->counter_query
8933 		(dev, cnt, clear, pkts, bytes, action);
8934 }
8935 
8936 /**
8937  * Get information about HWS pre-configurable resources.
8938  *
8939  * @param[in] dev
8940  *   Pointer to the rte_eth_dev structure.
8941  * @param[out] port_info
8942  *   Pointer to port information.
8943  * @param[out] queue_info
8944  *   Pointer to queue information.
8945  * @param[out] error
8946  *   Pointer to error structure.
8947  *
8948  * @return
8949  *   0 on success, a negative errno value otherwise and rte_errno is set.
8950  */
8951 static int
8952 mlx5_flow_info_get(struct rte_eth_dev *dev,
8953 		   struct rte_flow_port_info *port_info,
8954 		   struct rte_flow_queue_info *queue_info,
8955 		   struct rte_flow_error *error)
8956 {
8957 	const struct mlx5_flow_driver_ops *fops;
8958 	struct rte_flow_attr attr = {0};
8959 
8960 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8961 		return rte_flow_error_set(error, ENOTSUP,
8962 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8963 				NULL,
8964 				"info get with incorrect steering mode");
8965 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8966 	return fops->info_get(dev, port_info, queue_info, error);
8967 }
8968 
8969 /**
8970  * Configure port HWS resources.
8971  *
8972  * @param[in] dev
8973  *   Pointer to the rte_eth_dev structure.
8974  * @param[in] port_attr
8975  *   Port configuration attributes.
8976  * @param[in] nb_queue
8977  *   Number of queue.
8978  * @param[in] queue_attr
8979  *   Array that holds attributes for each flow queue.
8980  * @param[out] error
8981  *   Pointer to error structure.
8982  *
8983  * @return
8984  *   0 on success, a negative errno value otherwise and rte_errno is set.
8985  */
8986 static int
8987 mlx5_flow_port_configure(struct rte_eth_dev *dev,
8988 			 const struct rte_flow_port_attr *port_attr,
8989 			 uint16_t nb_queue,
8990 			 const struct rte_flow_queue_attr *queue_attr[],
8991 			 struct rte_flow_error *error)
8992 {
8993 	const struct mlx5_flow_driver_ops *fops;
8994 	struct rte_flow_attr attr = {0};
8995 
8996 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
8997 		return rte_flow_error_set(error, ENOTSUP,
8998 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8999 				NULL,
9000 				"port configure with incorrect steering mode");
9001 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9002 	return fops->configure(dev, port_attr, nb_queue, queue_attr, error);
9003 }
9004 
9005 /**
9006  * Validate item template.
9007  *
9008  * @param[in] dev
9009  *   Pointer to the rte_eth_dev structure.
9010  * @param[in] attr
9011  *   Pointer to the item template attributes.
9012  * @param[in] items
9013  *   The template item pattern.
9014  * @param[out] error
9015  *   Pointer to error structure.
9016  *
9017  * @return
9018  *   0 on success, a negative errno value otherwise and rte_errno is set.
9019  */
9020 int
9021 mlx5_flow_pattern_validate(struct rte_eth_dev *dev,
9022 		const struct rte_flow_pattern_template_attr *attr,
9023 		const struct rte_flow_item items[],
9024 		struct rte_flow_error *error)
9025 {
9026 	const struct mlx5_flow_driver_ops *fops;
9027 	struct rte_flow_attr fattr = {0};
9028 
9029 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9030 		rte_flow_error_set(error, ENOTSUP,
9031 			RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
9032 			"pattern validate with incorrect steering mode");
9033 		return -ENOTSUP;
9034 	}
9035 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9036 	return fops->pattern_validate(dev, attr, items, error);
9037 }
9038 
9039 /**
9040  * Create flow item template.
9041  *
9042  * @param[in] dev
9043  *   Pointer to the rte_eth_dev structure.
9044  * @param[in] attr
9045  *   Pointer to the item template attributes.
9046  * @param[in] items
9047  *   The template item pattern.
9048  * @param[out] error
9049  *   Pointer to error structure.
9050  *
9051  * @return
9052  *   0 on success, a negative errno value otherwise and rte_errno is set.
9053  */
9054 static struct rte_flow_pattern_template *
9055 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
9056 		const struct rte_flow_pattern_template_attr *attr,
9057 		const struct rte_flow_item items[],
9058 		struct rte_flow_error *error)
9059 {
9060 	const struct mlx5_flow_driver_ops *fops;
9061 	struct rte_flow_attr fattr = {0};
9062 
9063 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9064 		rte_flow_error_set(error, ENOTSUP,
9065 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9066 				NULL,
9067 				"pattern create with incorrect steering mode");
9068 		return NULL;
9069 	}
9070 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9071 	return fops->pattern_template_create(dev, attr, items, error);
9072 }
9073 
9074 /**
9075  * Destroy flow item template.
9076  *
9077  * @param[in] dev
9078  *   Pointer to the rte_eth_dev structure.
9079  * @param[in] template
9080  *   Pointer to the item template to be destroyed.
9081  * @param[out] error
9082  *   Pointer to error structure.
9083  *
9084  * @return
9085  *   0 on success, a negative errno value otherwise and rte_errno is set.
9086  */
9087 static int
9088 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
9089 				   struct rte_flow_pattern_template *template,
9090 				   struct rte_flow_error *error)
9091 {
9092 	const struct mlx5_flow_driver_ops *fops;
9093 	struct rte_flow_attr attr = {0};
9094 
9095 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9096 		return rte_flow_error_set(error, ENOTSUP,
9097 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9098 				NULL,
9099 				"pattern destroy with incorrect steering mode");
9100 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9101 	return fops->pattern_template_destroy(dev, template, error);
9102 }
9103 
9104 /**
9105  * Validate flow actions template.
9106  *
9107  * @param[in] dev
9108  *   Pointer to the rte_eth_dev structure.
9109  * @param[in] attr
9110  *   Pointer to the action template attributes.
9111  * @param[in] actions
9112  *   Associated actions (list terminated by the END action).
9113  * @param[in] masks
9114  *   List of actions that marks which of the action's member is constant.
9115  * @param[out] error
9116  *   Pointer to error structure.
9117  *
9118  * @return
9119  *   0 on success, a negative errno value otherwise and rte_errno is set.
9120  */
9121 int
9122 mlx5_flow_actions_validate(struct rte_eth_dev *dev,
9123 			const struct rte_flow_actions_template_attr *attr,
9124 			const struct rte_flow_action actions[],
9125 			const struct rte_flow_action masks[],
9126 			struct rte_flow_error *error)
9127 {
9128 	const struct mlx5_flow_driver_ops *fops;
9129 	struct rte_flow_attr fattr = {0};
9130 
9131 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9132 		rte_flow_error_set(error, ENOTSUP,
9133 			RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
9134 			"actions validate with incorrect steering mode");
9135 		return -ENOTSUP;
9136 	}
9137 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9138 	return fops->actions_validate(dev, attr, actions, masks, error);
9139 }
9140 
9141 /**
9142  * Create flow item template.
9143  *
9144  * @param[in] dev
9145  *   Pointer to the rte_eth_dev structure.
9146  * @param[in] attr
9147  *   Pointer to the action template attributes.
9148  * @param[in] actions
9149  *   Associated actions (list terminated by the END action).
9150  * @param[in] masks
9151  *   List of actions that marks which of the action's member is constant.
9152  * @param[out] error
9153  *   Pointer to error structure.
9154  *
9155  * @return
9156  *   0 on success, a negative errno value otherwise and rte_errno is set.
9157  */
9158 static struct rte_flow_actions_template *
9159 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
9160 			const struct rte_flow_actions_template_attr *attr,
9161 			const struct rte_flow_action actions[],
9162 			const struct rte_flow_action masks[],
9163 			struct rte_flow_error *error)
9164 {
9165 	const struct mlx5_flow_driver_ops *fops;
9166 	struct rte_flow_attr fattr = {0};
9167 
9168 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9169 		rte_flow_error_set(error, ENOTSUP,
9170 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9171 				NULL,
9172 				"action create with incorrect steering mode");
9173 		return NULL;
9174 	}
9175 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9176 	return fops->actions_template_create(dev, attr, actions, masks, error);
9177 }
9178 
9179 /**
9180  * Destroy flow action template.
9181  *
9182  * @param[in] dev
9183  *   Pointer to the rte_eth_dev structure.
9184  * @param[in] template
9185  *   Pointer to the action template to be destroyed.
9186  * @param[out] error
9187  *   Pointer to error structure.
9188  *
9189  * @return
9190  *   0 on success, a negative errno value otherwise and rte_errno is set.
9191  */
9192 static int
9193 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
9194 				   struct rte_flow_actions_template *template,
9195 				   struct rte_flow_error *error)
9196 {
9197 	const struct mlx5_flow_driver_ops *fops;
9198 	struct rte_flow_attr attr = {0};
9199 
9200 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9201 		return rte_flow_error_set(error, ENOTSUP,
9202 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9203 				NULL,
9204 				"action destroy with incorrect steering mode");
9205 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9206 	return fops->actions_template_destroy(dev, template, error);
9207 }
9208 
9209 /**
9210  * Create flow table.
9211  *
9212  * @param[in] dev
9213  *   Pointer to the rte_eth_dev structure.
9214  * @param[in] attr
9215  *   Pointer to the table attributes.
9216  * @param[in] item_templates
9217  *   Item template array to be binded to the table.
9218  * @param[in] nb_item_templates
9219  *   Number of item template.
9220  * @param[in] action_templates
9221  *   Action template array to be binded to the table.
9222  * @param[in] nb_action_templates
9223  *   Number of action template.
9224  * @param[out] error
9225  *   Pointer to error structure.
9226  *
9227  * @return
9228  *    Table on success, NULL otherwise and rte_errno is set.
9229  */
9230 static struct rte_flow_template_table *
9231 mlx5_flow_table_create(struct rte_eth_dev *dev,
9232 		       const struct rte_flow_template_table_attr *attr,
9233 		       struct rte_flow_pattern_template *item_templates[],
9234 		       uint8_t nb_item_templates,
9235 		       struct rte_flow_actions_template *action_templates[],
9236 		       uint8_t nb_action_templates,
9237 		       struct rte_flow_error *error)
9238 {
9239 	const struct mlx5_flow_driver_ops *fops;
9240 	struct rte_flow_attr fattr = {0};
9241 
9242 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9243 		rte_flow_error_set(error, ENOTSUP,
9244 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9245 				NULL,
9246 				"table create with incorrect steering mode");
9247 		return NULL;
9248 	}
9249 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9250 	return fops->template_table_create(dev,
9251 					   attr,
9252 					   item_templates,
9253 					   nb_item_templates,
9254 					   action_templates,
9255 					   nb_action_templates,
9256 					   error);
9257 }
9258 
9259 /**
9260  * PMD destroy flow table.
9261  *
9262  * @param[in] dev
9263  *   Pointer to the rte_eth_dev structure.
9264  * @param[in] table
9265  *   Pointer to the table to be destroyed.
9266  * @param[out] error
9267  *   Pointer to error structure.
9268  *
9269  * @return
9270  *   0 on success, a negative errno value otherwise and rte_errno is set.
9271  */
9272 static int
9273 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
9274 			struct rte_flow_template_table *table,
9275 			struct rte_flow_error *error)
9276 {
9277 	const struct mlx5_flow_driver_ops *fops;
9278 	struct rte_flow_attr attr = {0};
9279 
9280 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9281 		return rte_flow_error_set(error, ENOTSUP,
9282 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9283 				NULL,
9284 				"table destroy with incorrect steering mode");
9285 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9286 	return fops->template_table_destroy(dev, table, error);
9287 }
9288 
9289 /**
9290  * Enqueue flow creation.
9291  *
9292  * @param[in] dev
9293  *   Pointer to the rte_eth_dev structure.
9294  * @param[in] queue_id
9295  *   The queue to create the flow.
9296  * @param[in] attr
9297  *   Pointer to the flow operation attributes.
9298  * @param[in] items
9299  *   Items with flow spec value.
9300  * @param[in] pattern_template_index
9301  *   The item pattern flow follows from the table.
9302  * @param[in] actions
9303  *   Action with flow spec value.
9304  * @param[in] action_template_index
9305  *   The action pattern flow follows from the table.
9306  * @param[in] user_data
9307  *   Pointer to the user_data.
9308  * @param[out] error
9309  *   Pointer to error structure.
9310  *
9311  * @return
9312  *    Flow pointer on success, NULL otherwise and rte_errno is set.
9313  */
9314 static struct rte_flow *
9315 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
9316 			    uint32_t queue_id,
9317 			    const struct rte_flow_op_attr *attr,
9318 			    struct rte_flow_template_table *table,
9319 			    const struct rte_flow_item items[],
9320 			    uint8_t pattern_template_index,
9321 			    const struct rte_flow_action actions[],
9322 			    uint8_t action_template_index,
9323 			    void *user_data,
9324 			    struct rte_flow_error *error)
9325 {
9326 	const struct mlx5_flow_driver_ops *fops;
9327 	struct rte_flow_attr fattr = {0};
9328 
9329 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9330 		rte_flow_error_set(error, ENOTSUP,
9331 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9332 				NULL,
9333 				"flow_q create with incorrect steering mode");
9334 		return NULL;
9335 	}
9336 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9337 	return fops->async_flow_create(dev, queue_id, attr, table,
9338 				       items, pattern_template_index,
9339 				       actions, action_template_index,
9340 				       user_data, error);
9341 }
9342 
9343 /**
9344  * Enqueue flow creation by index.
9345  *
9346  * @param[in] dev
9347  *   Pointer to the rte_eth_dev structure.
9348  * @param[in] queue_id
9349  *   The queue to create the flow.
9350  * @param[in] attr
9351  *   Pointer to the flow operation attributes.
9352  * @param[in] rule_index
9353  *   The item pattern flow follows from the table.
9354  * @param[in] actions
9355  *   Action with flow spec value.
9356  * @param[in] action_template_index
9357  *   The action pattern flow follows from the table.
9358  * @param[in] user_data
9359  *   Pointer to the user_data.
9360  * @param[out] error
9361  *   Pointer to error structure.
9362  *
9363  * @return
9364  *    Flow pointer on success, NULL otherwise and rte_errno is set.
9365  */
9366 static struct rte_flow *
9367 mlx5_flow_async_flow_create_by_index(struct rte_eth_dev *dev,
9368 			    uint32_t queue_id,
9369 			    const struct rte_flow_op_attr *attr,
9370 			    struct rte_flow_template_table *table,
9371 			    uint32_t rule_index,
9372 			    const struct rte_flow_action actions[],
9373 			    uint8_t action_template_index,
9374 			    void *user_data,
9375 			    struct rte_flow_error *error)
9376 {
9377 	const struct mlx5_flow_driver_ops *fops;
9378 	struct rte_flow_attr fattr = {0};
9379 
9380 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW) {
9381 		rte_flow_error_set(error, ENOTSUP,
9382 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9383 				NULL,
9384 				"flow_q create with incorrect steering mode");
9385 		return NULL;
9386 	}
9387 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9388 	return fops->async_flow_create_by_index(dev, queue_id, attr, table,
9389 				       rule_index, actions, action_template_index,
9390 				       user_data, error);
9391 }
9392 
9393 /**
9394  * Enqueue flow update.
9395  *
9396  * @param[in] dev
9397  *   Pointer to the rte_eth_dev structure.
9398  * @param[in] queue
9399  *   The queue to destroy the flow.
9400  * @param[in] attr
9401  *   Pointer to the flow operation attributes.
9402  * @param[in] flow
9403  *   Pointer to the flow to be destroyed.
9404  * @param[in] actions
9405  *   Action with flow spec value.
9406  * @param[in] action_template_index
9407  *   The action pattern flow follows from the table.
9408  * @param[in] user_data
9409  *   Pointer to the user_data.
9410  * @param[out] error
9411  *   Pointer to error structure.
9412  *
9413  * @return
9414  *    0 on success, negative value otherwise and rte_errno is set.
9415  */
9416 static int
9417 mlx5_flow_async_flow_update(struct rte_eth_dev *dev,
9418 			     uint32_t queue,
9419 			     const struct rte_flow_op_attr *attr,
9420 			     struct rte_flow *flow,
9421 			     const struct rte_flow_action actions[],
9422 			     uint8_t action_template_index,
9423 			     void *user_data,
9424 			     struct rte_flow_error *error)
9425 {
9426 	const struct mlx5_flow_driver_ops *fops;
9427 	struct rte_flow_attr fattr = {0};
9428 
9429 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW)
9430 		return rte_flow_error_set(error, ENOTSUP,
9431 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9432 				NULL,
9433 				"flow_q update with incorrect steering mode");
9434 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9435 	return fops->async_flow_update(dev, queue, attr, flow,
9436 					actions, action_template_index, user_data, error);
9437 }
9438 
9439 /**
9440  * Enqueue flow destruction.
9441  *
9442  * @param[in] dev
9443  *   Pointer to the rte_eth_dev structure.
9444  * @param[in] queue
9445  *   The queue to destroy the flow.
9446  * @param[in] attr
9447  *   Pointer to the flow operation attributes.
9448  * @param[in] flow
9449  *   Pointer to the flow to be destroyed.
9450  * @param[in] user_data
9451  *   Pointer to the user_data.
9452  * @param[out] error
9453  *   Pointer to error structure.
9454  *
9455  * @return
9456  *    0 on success, negative value otherwise and rte_errno is set.
9457  */
9458 static int
9459 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
9460 			     uint32_t queue,
9461 			     const struct rte_flow_op_attr *attr,
9462 			     struct rte_flow *flow,
9463 			     void *user_data,
9464 			     struct rte_flow_error *error)
9465 {
9466 	const struct mlx5_flow_driver_ops *fops;
9467 	struct rte_flow_attr fattr = {0};
9468 
9469 	if (flow_get_drv_type(dev, &fattr) != MLX5_FLOW_TYPE_HW)
9470 		return rte_flow_error_set(error, ENOTSUP,
9471 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9472 				NULL,
9473 				"flow_q destroy with incorrect steering mode");
9474 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9475 	return fops->async_flow_destroy(dev, queue, attr, flow,
9476 					user_data, error);
9477 }
9478 
9479 /**
9480  * Pull the enqueued flows.
9481  *
9482  * @param[in] dev
9483  *   Pointer to the rte_eth_dev structure.
9484  * @param[in] queue
9485  *   The queue to pull the result.
9486  * @param[in/out] res
9487  *   Array to save the results.
9488  * @param[in] n_res
9489  *   Available result with the array.
9490  * @param[out] error
9491  *   Pointer to error structure.
9492  *
9493  * @return
9494  *    Result number on success, negative value otherwise and rte_errno is set.
9495  */
9496 static int
9497 mlx5_flow_pull(struct rte_eth_dev *dev,
9498 	       uint32_t queue,
9499 	       struct rte_flow_op_result res[],
9500 	       uint16_t n_res,
9501 	       struct rte_flow_error *error)
9502 {
9503 	const struct mlx5_flow_driver_ops *fops;
9504 	struct rte_flow_attr attr = {0};
9505 
9506 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9507 		return rte_flow_error_set(error, ENOTSUP,
9508 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9509 				NULL,
9510 				"flow_q pull with incorrect steering mode");
9511 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9512 	return fops->pull(dev, queue, res, n_res, error);
9513 }
9514 
9515 /**
9516  * Push the enqueued flows.
9517  *
9518  * @param[in] dev
9519  *   Pointer to the rte_eth_dev structure.
9520  * @param[in] queue
9521  *   The queue to push the flows.
9522  * @param[out] error
9523  *   Pointer to error structure.
9524  *
9525  * @return
9526  *    0 on success, negative value otherwise and rte_errno is set.
9527  */
9528 static int
9529 mlx5_flow_push(struct rte_eth_dev *dev,
9530 	       uint32_t queue,
9531 	       struct rte_flow_error *error)
9532 {
9533 	const struct mlx5_flow_driver_ops *fops;
9534 	struct rte_flow_attr attr = {0};
9535 
9536 	if (flow_get_drv_type(dev, &attr) != MLX5_FLOW_TYPE_HW)
9537 		return rte_flow_error_set(error, ENOTSUP,
9538 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9539 				NULL,
9540 				"flow_q push with incorrect steering mode");
9541 	fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9542 	return fops->push(dev, queue, error);
9543 }
9544 
9545 /**
9546  * Create shared action.
9547  *
9548  * @param[in] dev
9549  *   Pointer to the rte_eth_dev structure.
9550  * @param[in] queue
9551  *   Which queue to be used..
9552  * @param[in] attr
9553  *   Operation attribute.
9554  * @param[in] conf
9555  *   Indirect action configuration.
9556  * @param[in] action
9557  *   rte_flow action detail.
9558  * @param[in] user_data
9559  *   Pointer to the user_data.
9560  * @param[out] error
9561  *   Pointer to error structure.
9562  *
9563  * @return
9564  *   Action handle on success, NULL otherwise and rte_errno is set.
9565  */
9566 static struct rte_flow_action_handle *
9567 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
9568 				 const struct rte_flow_op_attr *attr,
9569 				 const struct rte_flow_indir_action_conf *conf,
9570 				 const struct rte_flow_action *action,
9571 				 void *user_data,
9572 				 struct rte_flow_error *error)
9573 {
9574 	const struct mlx5_flow_driver_ops *fops =
9575 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9576 
9577 	return fops->async_action_create(dev, queue, attr, conf, action,
9578 					 user_data, error);
9579 }
9580 
9581 /**
9582  * Update shared action.
9583  *
9584  * @param[in] dev
9585  *   Pointer to the rte_eth_dev structure.
9586  * @param[in] queue
9587  *   Which queue to be used..
9588  * @param[in] attr
9589  *   Operation attribute.
9590  * @param[in] handle
9591  *   Action handle to be updated.
9592  * @param[in] update
9593  *   Update value.
9594  * @param[in] user_data
9595  *   Pointer to the user_data.
9596  * @param[out] error
9597  *   Pointer to error structure.
9598  *
9599  * @return
9600  *   0 on success, negative value otherwise and rte_errno is set.
9601  */
9602 static int
9603 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
9604 				     const struct rte_flow_op_attr *attr,
9605 				     struct rte_flow_action_handle *handle,
9606 				     const void *update,
9607 				     void *user_data,
9608 				     struct rte_flow_error *error)
9609 {
9610 	const struct mlx5_flow_driver_ops *fops =
9611 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9612 
9613 	return fops->async_action_update(dev, queue, attr, handle,
9614 					 update, user_data, error);
9615 }
9616 
9617 static int
9618 mlx5_flow_async_action_handle_query_update
9619 	(struct rte_eth_dev *dev, uint32_t queue_id,
9620 	 const struct rte_flow_op_attr *op_attr,
9621 	 struct rte_flow_action_handle *action_handle,
9622 	 const void *update, void *query,
9623 	 enum rte_flow_query_update_mode qu_mode,
9624 	 void *user_data, struct rte_flow_error *error)
9625 {
9626 	const struct mlx5_flow_driver_ops *fops =
9627 		flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9628 
9629 	if (!fops || !fops->async_action_query_update)
9630 		return rte_flow_error_set(error, ENOTSUP,
9631 					  RTE_FLOW_ERROR_TYPE_ACTION, NULL,
9632 					  "async query_update not supported");
9633 	return fops->async_action_query_update
9634 			   (dev, queue_id, op_attr, action_handle,
9635 			    update, query, qu_mode, user_data, error);
9636 }
9637 
9638 /**
9639  * Query shared action.
9640  *
9641  * @param[in] dev
9642  *   Pointer to the rte_eth_dev structure.
9643  * @param[in] queue
9644  *   Which queue to be used..
9645  * @param[in] attr
9646  *   Operation attribute.
9647  * @param[in] handle
9648  *   Action handle to be updated.
9649  * @param[in] data
9650  *   Pointer query result data.
9651  * @param[in] user_data
9652  *   Pointer to the user_data.
9653  * @param[out] error
9654  *   Pointer to error structure.
9655  *
9656  * @return
9657  *   0 on success, negative value otherwise and rte_errno is set.
9658  */
9659 static int
9660 mlx5_flow_async_action_handle_query(struct rte_eth_dev *dev, uint32_t queue,
9661 				    const struct rte_flow_op_attr *attr,
9662 				    const struct rte_flow_action_handle *handle,
9663 				    void *data,
9664 				    void *user_data,
9665 				    struct rte_flow_error *error)
9666 {
9667 	const struct mlx5_flow_driver_ops *fops =
9668 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9669 
9670 	return fops->async_action_query(dev, queue, attr, handle,
9671 					data, user_data, error);
9672 }
9673 
9674 /**
9675  * Destroy shared action.
9676  *
9677  * @param[in] dev
9678  *   Pointer to the rte_eth_dev structure.
9679  * @param[in] queue
9680  *   Which queue to be used..
9681  * @param[in] attr
9682  *   Operation attribute.
9683  * @param[in] handle
9684  *   Action handle to be destroyed.
9685  * @param[in] user_data
9686  *   Pointer to the user_data.
9687  * @param[out] error
9688  *   Pointer to error structure.
9689  *
9690  * @return
9691  *   0 on success, negative value otherwise and rte_errno is set.
9692  */
9693 static int
9694 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
9695 				      const struct rte_flow_op_attr *attr,
9696 				      struct rte_flow_action_handle *handle,
9697 				      void *user_data,
9698 				      struct rte_flow_error *error)
9699 {
9700 	const struct mlx5_flow_driver_ops *fops =
9701 			flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
9702 
9703 	return fops->async_action_destroy(dev, queue, attr, handle,
9704 					  user_data, error);
9705 }
9706 
9707 /**
9708  * Allocate a new memory for the counter values wrapped by all the needed
9709  * management.
9710  *
9711  * @param[in] sh
9712  *   Pointer to mlx5_dev_ctx_shared object.
9713  *
9714  * @return
9715  *   0 on success, a negative errno value otherwise.
9716  */
9717 static int
9718 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
9719 {
9720 	struct mlx5_counter_stats_mem_mng *mem_mng;
9721 	volatile struct flow_counter_stats *raw_data;
9722 	int raws_n = MLX5_CNT_MR_ALLOC_BULK + MLX5_MAX_PENDING_QUERIES;
9723 	int size = (sizeof(struct flow_counter_stats) *
9724 			MLX5_COUNTERS_PER_POOL +
9725 			sizeof(struct mlx5_counter_stats_raw)) * raws_n +
9726 			sizeof(struct mlx5_counter_stats_mem_mng);
9727 	size_t pgsize = rte_mem_page_size();
9728 	uint8_t *mem;
9729 	int ret;
9730 	int i;
9731 
9732 	if (pgsize == (size_t)-1) {
9733 		DRV_LOG(ERR, "Failed to get mem page size");
9734 		rte_errno = ENOMEM;
9735 		return -ENOMEM;
9736 	}
9737 	mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
9738 	if (!mem) {
9739 		rte_errno = ENOMEM;
9740 		return -ENOMEM;
9741 	}
9742 	mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
9743 	size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
9744 	ret = mlx5_os_wrapped_mkey_create(sh->cdev->ctx, sh->cdev->pd,
9745 					  sh->cdev->pdn, mem, size,
9746 					  &mem_mng->wm);
9747 	if (ret) {
9748 		rte_errno = errno;
9749 		mlx5_free(mem);
9750 		return -rte_errno;
9751 	}
9752 	mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
9753 	raw_data = (volatile struct flow_counter_stats *)mem;
9754 	for (i = 0; i < raws_n; ++i) {
9755 		mem_mng->raws[i].mem_mng = mem_mng;
9756 		mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
9757 	}
9758 	for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
9759 		LIST_INSERT_HEAD(&sh->sws_cmng.free_stat_raws,
9760 				 mem_mng->raws + MLX5_CNT_MR_ALLOC_BULK + i,
9761 				 next);
9762 	LIST_INSERT_HEAD(&sh->sws_cmng.mem_mngs, mem_mng, next);
9763 	sh->sws_cmng.mem_mng = mem_mng;
9764 	return 0;
9765 }
9766 
9767 /**
9768  * Set the statistic memory to the new counter pool.
9769  *
9770  * @param[in] sh
9771  *   Pointer to mlx5_dev_ctx_shared object.
9772  * @param[in] pool
9773  *   Pointer to the pool to set the statistic memory.
9774  *
9775  * @return
9776  *   0 on success, a negative errno value otherwise.
9777  */
9778 static int
9779 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
9780 			       struct mlx5_flow_counter_pool *pool)
9781 {
9782 	struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
9783 	/* Resize statistic memory once used out. */
9784 	if (!(pool->index % MLX5_CNT_MR_ALLOC_BULK) &&
9785 	    mlx5_flow_create_counter_stat_mem_mng(sh)) {
9786 		DRV_LOG(ERR, "Cannot resize counter stat mem.");
9787 		return -1;
9788 	}
9789 	rte_spinlock_lock(&pool->sl);
9790 	pool->raw = cmng->mem_mng->raws + pool->index % MLX5_CNT_MR_ALLOC_BULK;
9791 	rte_spinlock_unlock(&pool->sl);
9792 	pool->raw_hw = NULL;
9793 	return 0;
9794 }
9795 
9796 #define MLX5_POOL_QUERY_FREQ_US 1000000
9797 
9798 /**
9799  * Set the periodic procedure for triggering asynchronous batch queries for all
9800  * the counter pools.
9801  *
9802  * @param[in] sh
9803  *   Pointer to mlx5_dev_ctx_shared object.
9804  */
9805 void
9806 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
9807 {
9808 	uint32_t pools_n, us;
9809 
9810 	pools_n = __atomic_load_n(&sh->sws_cmng.n_valid, __ATOMIC_RELAXED);
9811 	us = MLX5_POOL_QUERY_FREQ_US / pools_n;
9812 	DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
9813 	if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
9814 		sh->sws_cmng.query_thread_on = 0;
9815 		DRV_LOG(ERR, "Cannot reinitialize query alarm");
9816 	} else {
9817 		sh->sws_cmng.query_thread_on = 1;
9818 	}
9819 }
9820 
9821 /**
9822  * The periodic procedure for triggering asynchronous batch queries for all the
9823  * counter pools. This function is probably called by the host thread.
9824  *
9825  * @param[in] arg
9826  *   The parameter for the alarm process.
9827  */
9828 void
9829 mlx5_flow_query_alarm(void *arg)
9830 {
9831 	struct mlx5_dev_ctx_shared *sh = arg;
9832 	struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
9833 	uint16_t pool_index = cmng->pool_index;
9834 	struct mlx5_flow_counter_pool *pool;
9835 	uint16_t n_valid;
9836 	int ret;
9837 
9838 	if (cmng->pending_queries >= MLX5_MAX_PENDING_QUERIES)
9839 		goto set_alarm;
9840 	rte_spinlock_lock(&cmng->pool_update_sl);
9841 	pool = cmng->pools[pool_index];
9842 	n_valid = cmng->n_valid;
9843 	rte_spinlock_unlock(&cmng->pool_update_sl);
9844 	/* Set the statistic memory to the new created pool. */
9845 	if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
9846 		goto set_alarm;
9847 	if (pool->raw_hw)
9848 		/* There is a pool query in progress. */
9849 		goto set_alarm;
9850 	pool->raw_hw = LIST_FIRST(&cmng->free_stat_raws);
9851 	if (!pool->raw_hw)
9852 		/* No free counter statistics raw memory. */
9853 		goto set_alarm;
9854 	/*
9855 	 * Identify the counters released between query trigger and query
9856 	 * handle more efficiently. The counter released in this gap period
9857 	 * should wait for a new round of query as the new arrived packets
9858 	 * will not be taken into account.
9859 	 */
9860 	pool->query_gen++;
9861 	ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
9862 					       MLX5_COUNTERS_PER_POOL,
9863 					       NULL, NULL,
9864 					       pool->raw_hw->mem_mng->wm.lkey,
9865 					       (void *)(uintptr_t)
9866 					       pool->raw_hw->data,
9867 					       sh->devx_comp,
9868 					       (uint64_t)(uintptr_t)pool);
9869 	if (ret) {
9870 		DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
9871 			" %d", pool->min_dcs->id);
9872 		pool->raw_hw = NULL;
9873 		goto set_alarm;
9874 	}
9875 	LIST_REMOVE(pool->raw_hw, next);
9876 	cmng->pending_queries++;
9877 	pool_index++;
9878 	if (pool_index >= n_valid)
9879 		pool_index = 0;
9880 set_alarm:
9881 	cmng->pool_index = pool_index;
9882 	mlx5_set_query_alarm(sh);
9883 }
9884 
9885 /**
9886  * Check and callback event for new aged flow in the counter pool
9887  *
9888  * @param[in] sh
9889  *   Pointer to mlx5_dev_ctx_shared object.
9890  * @param[in] pool
9891  *   Pointer to Current counter pool.
9892  */
9893 static void
9894 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
9895 		   struct mlx5_flow_counter_pool *pool)
9896 {
9897 	struct mlx5_priv *priv;
9898 	struct mlx5_flow_counter *cnt;
9899 	struct mlx5_age_info *age_info;
9900 	struct mlx5_age_param *age_param;
9901 	struct mlx5_counter_stats_raw *cur = pool->raw_hw;
9902 	struct mlx5_counter_stats_raw *prev = pool->raw;
9903 	const uint64_t curr_time = MLX5_CURR_TIME_SEC;
9904 	const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
9905 	uint16_t expected = AGE_CANDIDATE;
9906 	uint32_t i;
9907 
9908 	pool->time_of_last_age_check = curr_time;
9909 	for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
9910 		cnt = MLX5_POOL_GET_CNT(pool, i);
9911 		age_param = MLX5_CNT_TO_AGE(cnt);
9912 		if (__atomic_load_n(&age_param->state,
9913 				    __ATOMIC_RELAXED) != AGE_CANDIDATE)
9914 			continue;
9915 		if (cur->data[i].hits != prev->data[i].hits) {
9916 			__atomic_store_n(&age_param->sec_since_last_hit, 0,
9917 					 __ATOMIC_RELAXED);
9918 			continue;
9919 		}
9920 		if (__atomic_fetch_add(&age_param->sec_since_last_hit,
9921 				       time_delta,
9922 				       __ATOMIC_RELAXED) + time_delta <= age_param->timeout)
9923 			continue;
9924 		/**
9925 		 * Hold the lock first, or if between the
9926 		 * state AGE_TMOUT and tailq operation the
9927 		 * release happened, the release procedure
9928 		 * may delete a non-existent tailq node.
9929 		 */
9930 		priv = rte_eth_devices[age_param->port_id].data->dev_private;
9931 		age_info = GET_PORT_AGE_INFO(priv);
9932 		rte_spinlock_lock(&age_info->aged_sl);
9933 		if (__atomic_compare_exchange_n(&age_param->state, &expected,
9934 						AGE_TMOUT, false,
9935 						__ATOMIC_RELAXED,
9936 						__ATOMIC_RELAXED)) {
9937 			TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
9938 			MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
9939 		}
9940 		rte_spinlock_unlock(&age_info->aged_sl);
9941 	}
9942 	mlx5_age_event_prepare(sh);
9943 }
9944 
9945 /**
9946  * Handler for the HW respond about ready values from an asynchronous batch
9947  * query. This function is probably called by the host thread.
9948  *
9949  * @param[in] sh
9950  *   The pointer to the shared device context.
9951  * @param[in] async_id
9952  *   The Devx async ID.
9953  * @param[in] status
9954  *   The status of the completion.
9955  */
9956 void
9957 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
9958 				  uint64_t async_id, int status)
9959 {
9960 	struct mlx5_flow_counter_pool *pool =
9961 		(struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
9962 	struct mlx5_counter_stats_raw *raw_to_free;
9963 	uint8_t query_gen = pool->query_gen ^ 1;
9964 	struct mlx5_flow_counter_mng *cmng = &sh->sws_cmng;
9965 	enum mlx5_counter_type cnt_type =
9966 		pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
9967 				MLX5_COUNTER_TYPE_ORIGIN;
9968 
9969 	if (unlikely(status)) {
9970 		raw_to_free = pool->raw_hw;
9971 	} else {
9972 		raw_to_free = pool->raw;
9973 		if (pool->is_aged)
9974 			mlx5_flow_aging_check(sh, pool);
9975 		rte_spinlock_lock(&pool->sl);
9976 		pool->raw = pool->raw_hw;
9977 		rte_spinlock_unlock(&pool->sl);
9978 		/* Be sure the new raw counters data is updated in memory. */
9979 		rte_io_wmb();
9980 		if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
9981 			rte_spinlock_lock(&cmng->csl[cnt_type]);
9982 			TAILQ_CONCAT(&cmng->counters[cnt_type],
9983 				     &pool->counters[query_gen], next);
9984 			rte_spinlock_unlock(&cmng->csl[cnt_type]);
9985 		}
9986 	}
9987 	LIST_INSERT_HEAD(&sh->sws_cmng.free_stat_raws, raw_to_free, next);
9988 	pool->raw_hw = NULL;
9989 	sh->sws_cmng.pending_queries--;
9990 }
9991 
9992 static int
9993 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
9994 		    const struct flow_grp_info *grp_info,
9995 		    struct rte_flow_error *error)
9996 {
9997 	if (grp_info->transfer && grp_info->external &&
9998 	    grp_info->fdb_def_rule) {
9999 		if (group == UINT32_MAX)
10000 			return rte_flow_error_set
10001 						(error, EINVAL,
10002 						 RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
10003 						 NULL,
10004 						 "group index not supported");
10005 		*table = group + 1;
10006 	} else {
10007 		*table = group;
10008 	}
10009 	DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
10010 	return 0;
10011 }
10012 
10013 /**
10014  * Translate the rte_flow group index to HW table value.
10015  *
10016  * If tunnel offload is disabled, all group ids converted to flow table
10017  * id using the standard method.
10018  * If tunnel offload is enabled, group id can be converted using the
10019  * standard or tunnel conversion method. Group conversion method
10020  * selection depends on flags in `grp_info` parameter:
10021  * - Internal (grp_info.external == 0) groups conversion uses the
10022  *   standard method.
10023  * - Group ids in JUMP action converted with the tunnel conversion.
10024  * - Group id in rule attribute conversion depends on a rule type and
10025  *   group id value:
10026  *   ** non zero group attributes converted with the tunnel method
10027  *   ** zero group attribute in non-tunnel rule is converted using the
10028  *      standard method - there's only one root table
10029  *   ** zero group attribute in steer tunnel rule is converted with the
10030  *      standard method - single root table
10031  *   ** zero group attribute in match tunnel rule is a special OvS
10032  *      case: that value is used for portability reasons. That group
10033  *      id is converted with the tunnel conversion method.
10034  *
10035  * @param[in] dev
10036  *   Port device
10037  * @param[in] tunnel
10038  *   PMD tunnel offload object
10039  * @param[in] group
10040  *   rte_flow group index value.
10041  * @param[out] table
10042  *   HW table value.
10043  * @param[in] grp_info
10044  *   flags used for conversion
10045  * @param[out] error
10046  *   Pointer to error structure.
10047  *
10048  * @return
10049  *   0 on success, a negative errno value otherwise and rte_errno is set.
10050  */
10051 int
10052 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
10053 			 const struct mlx5_flow_tunnel *tunnel,
10054 			 uint32_t group, uint32_t *table,
10055 			 const struct flow_grp_info *grp_info,
10056 			 struct rte_flow_error *error)
10057 {
10058 	int ret;
10059 	bool standard_translation;
10060 
10061 	if (!grp_info->skip_scale && grp_info->external &&
10062 	    group < MLX5_MAX_TABLES_EXTERNAL)
10063 		group *= MLX5_FLOW_TABLE_FACTOR;
10064 	if (is_tunnel_offload_active(dev)) {
10065 		standard_translation = !grp_info->external ||
10066 					grp_info->std_tbl_fix;
10067 	} else {
10068 		standard_translation = true;
10069 	}
10070 	DRV_LOG(DEBUG,
10071 		"port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
10072 		dev->data->port_id, group, grp_info->transfer,
10073 		grp_info->external, grp_info->fdb_def_rule,
10074 		standard_translation ? "STANDARD" : "TUNNEL");
10075 	if (standard_translation)
10076 		ret = flow_group_to_table(dev->data->port_id, group, table,
10077 					  grp_info, error);
10078 	else
10079 		ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
10080 						      table, error);
10081 
10082 	return ret;
10083 }
10084 
10085 /**
10086  * Discover availability of metadata reg_c's.
10087  *
10088  * Iteratively use test flows to check availability.
10089  *
10090  * @param[in] dev
10091  *   Pointer to the Ethernet device structure.
10092  *
10093  * @return
10094  *   0 on success, a negative errno value otherwise and rte_errno is set.
10095  */
10096 int
10097 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
10098 {
10099 	struct mlx5_priv *priv = dev->data->dev_private;
10100 	enum modify_reg idx;
10101 	int n = 0;
10102 
10103 	/* reg_c[0] and reg_c[1] are reserved. */
10104 	priv->sh->flow_mreg_c[n++] = REG_C_0;
10105 	priv->sh->flow_mreg_c[n++] = REG_C_1;
10106 	/* Discover availability of other reg_c's. */
10107 	for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
10108 		struct rte_flow_attr attr = {
10109 			.group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
10110 			.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
10111 			.ingress = 1,
10112 		};
10113 		struct rte_flow_item items[] = {
10114 			[0] = {
10115 				.type = RTE_FLOW_ITEM_TYPE_END,
10116 			},
10117 		};
10118 		struct rte_flow_action actions[] = {
10119 			[0] = {
10120 				.type = (enum rte_flow_action_type)
10121 					MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
10122 				.conf = &(struct mlx5_flow_action_copy_mreg){
10123 					.src = REG_C_1,
10124 					.dst = idx,
10125 				},
10126 			},
10127 			[1] = {
10128 				.type = RTE_FLOW_ACTION_TYPE_JUMP,
10129 				.conf = &(struct rte_flow_action_jump){
10130 					.group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
10131 				},
10132 			},
10133 			[2] = {
10134 				.type = RTE_FLOW_ACTION_TYPE_END,
10135 			},
10136 		};
10137 		uint32_t flow_idx;
10138 		struct rte_flow *flow;
10139 		struct rte_flow_error error;
10140 
10141 		if (!priv->sh->config.dv_flow_en)
10142 			break;
10143 		/* Create internal flow, validation skips copy action. */
10144 		flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
10145 					items, actions, false, &error);
10146 		flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
10147 				      flow_idx);
10148 		if (!flow)
10149 			continue;
10150 		priv->sh->flow_mreg_c[n++] = idx;
10151 		flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
10152 	}
10153 	for (; n < MLX5_MREG_C_NUM; ++n)
10154 		priv->sh->flow_mreg_c[n] = REG_NON;
10155 	priv->sh->metadata_regc_check_flag = 1;
10156 	return 0;
10157 }
10158 
10159 int
10160 save_dump_file(const uint8_t *data, uint32_t size,
10161 	uint32_t type, uint64_t id, void *arg, FILE *file)
10162 {
10163 	char line[BUF_SIZE];
10164 	uint32_t out = 0;
10165 	uint32_t k;
10166 	uint32_t actions_num;
10167 	struct rte_flow_query_count *count;
10168 
10169 	memset(line, 0, BUF_SIZE);
10170 	switch (type) {
10171 	case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
10172 		actions_num = *(uint32_t *)(arg);
10173 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
10174 				type, id, actions_num);
10175 		break;
10176 	case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
10177 		out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
10178 				type, id);
10179 		break;
10180 	case DR_DUMP_REC_TYPE_PMD_COUNTER:
10181 		count = (struct rte_flow_query_count *)arg;
10182 		fprintf(file,
10183 			"%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
10184 			type, id, count->hits, count->bytes);
10185 		return 0;
10186 	default:
10187 		return -1;
10188 	}
10189 
10190 	for (k = 0; k < size; k++) {
10191 		/* Make sure we do not overrun the line buffer length. */
10192 		if (out >= BUF_SIZE - 4) {
10193 			line[out] = '\0';
10194 			break;
10195 		}
10196 		out += snprintf(line + out, BUF_SIZE - out, "%02x",
10197 				(data[k]) & 0xff);
10198 	}
10199 	fprintf(file, "%s\n", line);
10200 	return 0;
10201 }
10202 
10203 int
10204 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
10205 	struct rte_flow_query_count *count, struct rte_flow_error *error)
10206 {
10207 	struct rte_flow_action action[2];
10208 	enum mlx5_flow_drv_type ftype;
10209 	const struct mlx5_flow_driver_ops *fops;
10210 
10211 	if (!flow) {
10212 		return rte_flow_error_set(error, ENOENT,
10213 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
10214 				NULL,
10215 				"invalid flow handle");
10216 	}
10217 	action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
10218 	action[1].type = RTE_FLOW_ACTION_TYPE_END;
10219 	if (flow->counter) {
10220 		memset(count, 0, sizeof(struct rte_flow_query_count));
10221 		ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
10222 		MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
10223 						ftype < MLX5_FLOW_TYPE_MAX);
10224 		fops = flow_get_drv_ops(ftype);
10225 		return fops->query(dev, flow, action, count, error);
10226 	}
10227 	return -1;
10228 }
10229 
10230 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
10231 /**
10232  * Dump flow ipool data to file
10233  *
10234  * @param[in] dev
10235  *   The pointer to Ethernet device.
10236  * @param[in] file
10237  *   A pointer to a file for output.
10238  * @param[out] error
10239  *   Perform verbose error reporting if not NULL. PMDs initialize this
10240  *   structure in case of error only.
10241  * @return
10242  *   0 on success, a negative value otherwise.
10243  */
10244 int
10245 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
10246 	struct rte_flow *flow, FILE *file,
10247 	struct rte_flow_error *error)
10248 {
10249 	struct mlx5_priv *priv = dev->data->dev_private;
10250 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
10251 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
10252 	uint32_t handle_idx;
10253 	struct mlx5_flow_handle *dh;
10254 	struct rte_flow_query_count count;
10255 	uint32_t actions_num;
10256 	const uint8_t *data;
10257 	size_t size;
10258 	uint64_t id;
10259 	uint32_t type;
10260 	void *action = NULL;
10261 
10262 	if (!flow) {
10263 		return rte_flow_error_set(error, ENOENT,
10264 				RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
10265 				NULL,
10266 				"invalid flow handle");
10267 	}
10268 	handle_idx = flow->dev_handles;
10269 	/* query counter */
10270 	if (flow->counter &&
10271 	(!mlx5_counter_query(dev, flow->counter, false,
10272 	&count.hits, &count.bytes, &action)) && action) {
10273 		id = (uint64_t)(uintptr_t)action;
10274 		type = DR_DUMP_REC_TYPE_PMD_COUNTER;
10275 		save_dump_file(NULL, 0, type,
10276 			id, (void *)&count, file);
10277 	}
10278 
10279 	while (handle_idx) {
10280 		dh = mlx5_ipool_get(priv->sh->ipool
10281 				[MLX5_IPOOL_MLX5_FLOW], handle_idx);
10282 		if (!dh)
10283 			continue;
10284 		handle_idx = dh->next.next;
10285 
10286 		/* Get modify_hdr and encap_decap buf from ipools. */
10287 		encap_decap = NULL;
10288 		modify_hdr = dh->dvh.modify_hdr;
10289 
10290 		if (dh->dvh.rix_encap_decap) {
10291 			encap_decap = mlx5_ipool_get(priv->sh->ipool
10292 						[MLX5_IPOOL_DECAP_ENCAP],
10293 						dh->dvh.rix_encap_decap);
10294 		}
10295 		if (modify_hdr) {
10296 			data = (const uint8_t *)modify_hdr->actions;
10297 			size = (size_t)(modify_hdr->actions_num) * 8;
10298 			id = (uint64_t)(uintptr_t)modify_hdr->action;
10299 			actions_num = modify_hdr->actions_num;
10300 			type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
10301 			save_dump_file(data, size, type, id,
10302 						(void *)(&actions_num), file);
10303 		}
10304 		if (encap_decap) {
10305 			data = encap_decap->buf;
10306 			size = encap_decap->size;
10307 			id = (uint64_t)(uintptr_t)encap_decap->action;
10308 			type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
10309 			save_dump_file(data, size, type,
10310 						id, NULL, file);
10311 		}
10312 	}
10313 	return 0;
10314 }
10315 
10316 /**
10317  * Dump all flow's encap_decap/modify_hdr/counter data to file
10318  *
10319  * @param[in] dev
10320  *   The pointer to Ethernet device.
10321  * @param[in] file
10322  *   A pointer to a file for output.
10323  * @param[out] error
10324  *   Perform verbose error reporting if not NULL. PMDs initialize this
10325  *   structure in case of error only.
10326  * @return
10327  *   0 on success, a negative value otherwise.
10328  */
10329 static int
10330 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
10331 	FILE *file, struct rte_flow_error *error __rte_unused)
10332 {
10333 	struct mlx5_priv *priv = dev->data->dev_private;
10334 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10335 	struct mlx5_hlist *h;
10336 	struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
10337 	struct mlx5_flow_dv_encap_decap_resource *encap_decap;
10338 	struct rte_flow_query_count count;
10339 	uint32_t actions_num;
10340 	const uint8_t *data;
10341 	size_t size;
10342 	uint64_t id;
10343 	uint32_t type;
10344 	uint32_t i;
10345 	uint32_t j;
10346 	struct mlx5_list_inconst *l_inconst;
10347 	struct mlx5_list_entry *e;
10348 	int lcore_index;
10349 	struct mlx5_flow_counter_mng *cmng = &priv->sh->sws_cmng;
10350 	uint32_t max;
10351 	void *action;
10352 
10353 	/* encap_decap hlist is lcore_share, get global core cache. */
10354 	i = MLX5_LIST_GLOBAL;
10355 	h = sh->encaps_decaps;
10356 	if (h) {
10357 		for (j = 0; j <= h->mask; j++) {
10358 			l_inconst = &h->buckets[j].l;
10359 			if (!l_inconst || !l_inconst->cache[i])
10360 				continue;
10361 
10362 			e = LIST_FIRST(&l_inconst->cache[i]->h);
10363 			while (e) {
10364 				encap_decap =
10365 				(struct mlx5_flow_dv_encap_decap_resource *)e;
10366 				data = encap_decap->buf;
10367 				size = encap_decap->size;
10368 				id = (uint64_t)(uintptr_t)encap_decap->action;
10369 				type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
10370 				save_dump_file(data, size, type,
10371 					id, NULL, file);
10372 				e = LIST_NEXT(e, next);
10373 			}
10374 		}
10375 	}
10376 
10377 	/* get modify_hdr */
10378 	h = sh->modify_cmds;
10379 	if (h) {
10380 		lcore_index = rte_lcore_index(rte_lcore_id());
10381 		if (unlikely(lcore_index == -1)) {
10382 			lcore_index = MLX5_LIST_NLCORE;
10383 			rte_spinlock_lock(&h->l_const.lcore_lock);
10384 		}
10385 		i = lcore_index;
10386 
10387 		if (lcore_index == MLX5_LIST_NLCORE) {
10388 			for (i = 0; i <= (uint32_t)lcore_index; i++) {
10389 				for (j = 0; j <= h->mask; j++) {
10390 					l_inconst = &h->buckets[j].l;
10391 					if (!l_inconst || !l_inconst->cache[i])
10392 						continue;
10393 
10394 					e = LIST_FIRST(&l_inconst->cache[i]->h);
10395 					while (e) {
10396 						modify_hdr =
10397 						(struct mlx5_flow_dv_modify_hdr_resource *)e;
10398 						data = (const uint8_t *)modify_hdr->actions;
10399 						size = (size_t)(modify_hdr->actions_num) * 8;
10400 						actions_num = modify_hdr->actions_num;
10401 						id = (uint64_t)(uintptr_t)modify_hdr->action;
10402 						type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
10403 						save_dump_file(data, size, type, id,
10404 								(void *)(&actions_num), file);
10405 						e = LIST_NEXT(e, next);
10406 					}
10407 				}
10408 			}
10409 		} else {
10410 			for (j = 0; j <= h->mask; j++) {
10411 				l_inconst = &h->buckets[j].l;
10412 				if (!l_inconst || !l_inconst->cache[i])
10413 					continue;
10414 
10415 				e = LIST_FIRST(&l_inconst->cache[i]->h);
10416 				while (e) {
10417 					modify_hdr =
10418 					(struct mlx5_flow_dv_modify_hdr_resource *)e;
10419 					data = (const uint8_t *)modify_hdr->actions;
10420 					size = (size_t)(modify_hdr->actions_num) * 8;
10421 					actions_num = modify_hdr->actions_num;
10422 					id = (uint64_t)(uintptr_t)modify_hdr->action;
10423 					type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
10424 					save_dump_file(data, size, type, id,
10425 							(void *)(&actions_num), file);
10426 					e = LIST_NEXT(e, next);
10427 				}
10428 			}
10429 		}
10430 
10431 		if (unlikely(lcore_index == MLX5_LIST_NLCORE))
10432 			rte_spinlock_unlock(&h->l_const.lcore_lock);
10433 	}
10434 
10435 	/* get counter */
10436 	MLX5_ASSERT(cmng->n_valid <= MLX5_COUNTER_POOLS_MAX_NUM);
10437 	max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
10438 	for (j = 1; j <= max; j++) {
10439 		action = NULL;
10440 		if ((!mlx5_counter_query(dev, j, false, &count.hits,
10441 		&count.bytes, &action)) && action) {
10442 			id = (uint64_t)(uintptr_t)action;
10443 			type = DR_DUMP_REC_TYPE_PMD_COUNTER;
10444 			save_dump_file(NULL, 0, type,
10445 					id, (void *)&count, file);
10446 		}
10447 	}
10448 	return 0;
10449 }
10450 #endif
10451 
10452 /**
10453  * Dump flow raw hw data to file
10454  *
10455  * @param[in] dev
10456  *    The pointer to Ethernet device.
10457  * @param[in] file
10458  *   A pointer to a file for output.
10459  * @param[out] error
10460  *   Perform verbose error reporting if not NULL. PMDs initialize this
10461  *   structure in case of error only.
10462  * @return
10463  *   0 on success, a negative value otherwise.
10464  */
10465 int
10466 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
10467 		   FILE *file,
10468 		   struct rte_flow_error *error __rte_unused)
10469 {
10470 	struct mlx5_priv *priv = dev->data->dev_private;
10471 	struct mlx5_dev_ctx_shared *sh = priv->sh;
10472 	uint32_t handle_idx;
10473 	int ret;
10474 	struct mlx5_flow_handle *dh;
10475 	struct rte_flow *flow;
10476 
10477 	if (!sh->config.dv_flow_en) {
10478 		if (fputs("device dv flow disabled\n", file) <= 0)
10479 			return -errno;
10480 		return -ENOTSUP;
10481 	}
10482 
10483 	/* dump all */
10484 	if (!flow_idx) {
10485 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
10486 		if (mlx5_flow_dev_dump_sh_all(dev, file, error))
10487 			return -EINVAL;
10488 
10489 		if (sh->config.dv_flow_en == 2)
10490 			return mlx5dr_debug_dump(priv->dr_ctx, file);
10491 #endif
10492 		return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
10493 					       sh->rx_domain,
10494 					       sh->tx_domain, file);
10495 	}
10496 	/* dump one */
10497 	flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
10498 			(uintptr_t)(void *)flow_idx);
10499 	if (!flow)
10500 		return -EINVAL;
10501 
10502 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
10503 	mlx5_flow_dev_dump_ipool(dev, flow, file, error);
10504 #endif
10505 	handle_idx = flow->dev_handles;
10506 	while (handle_idx) {
10507 		dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
10508 				handle_idx);
10509 		if (!dh)
10510 			return -ENOENT;
10511 		if (dh->drv_flow) {
10512 			if (sh->config.dv_flow_en == 2)
10513 				return -ENOTSUP;
10514 
10515 			ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
10516 							     file);
10517 			if (ret)
10518 				return -ENOENT;
10519 		}
10520 		handle_idx = dh->next.next;
10521 	}
10522 	return 0;
10523 }
10524 
10525 /**
10526  * Get aged-out flows.
10527  *
10528  * @param[in] dev
10529  *   Pointer to the Ethernet device structure.
10530  * @param[in] context
10531  *   The address of an array of pointers to the aged-out flows contexts.
10532  * @param[in] nb_countexts
10533  *   The length of context array pointers.
10534  * @param[out] error
10535  *   Perform verbose error reporting if not NULL. Initialized in case of
10536  *   error only.
10537  *
10538  * @return
10539  *   how many contexts get in success, otherwise negative errno value.
10540  *   if nb_contexts is 0, return the amount of all aged contexts.
10541  *   if nb_contexts is not 0 , return the amount of aged flows reported
10542  *   in the context array.
10543  */
10544 int
10545 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
10546 			uint32_t nb_contexts, struct rte_flow_error *error)
10547 {
10548 	struct rte_flow_attr attr = { .transfer = 0 };
10549 
10550 	return flow_get_drv_ops(flow_get_drv_type(dev, &attr))->get_aged_flows
10551 		(dev, contexts, nb_contexts, error);
10552 }
10553 
10554 /**
10555  * Get aged-out flows per HWS queue.
10556  *
10557  * @param[in] dev
10558  *   Pointer to the Ethernet device structure.
10559  * @param[in] queue_id
10560  *   Flow queue to query.
10561  * @param[in] context
10562  *   The address of an array of pointers to the aged-out flows contexts.
10563  * @param[in] nb_countexts
10564  *   The length of context array pointers.
10565  * @param[out] error
10566  *   Perform verbose error reporting if not NULL. Initialized in case of
10567  *   error only.
10568  *
10569  * @return
10570  *   how many contexts get in success, otherwise negative errno value.
10571  *   if nb_contexts is 0, return the amount of all aged contexts.
10572  *   if nb_contexts is not 0 , return the amount of aged flows reported
10573  *   in the context array.
10574  */
10575 int
10576 mlx5_flow_get_q_aged_flows(struct rte_eth_dev *dev, uint32_t queue_id,
10577 			   void **contexts, uint32_t nb_contexts,
10578 			   struct rte_flow_error *error)
10579 {
10580 	const struct mlx5_flow_driver_ops *fops;
10581 	struct rte_flow_attr attr = { 0 };
10582 
10583 	if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_HW) {
10584 		fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
10585 		return fops->get_q_aged_flows(dev, queue_id, contexts,
10586 					      nb_contexts, error);
10587 	}
10588 	DRV_LOG(ERR, "port %u queue %u get aged flows is not supported.",
10589 		dev->data->port_id, queue_id);
10590 	return rte_flow_error_set(error, ENOTSUP,
10591 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10592 				  "get Q aged flows with incorrect steering mode");
10593 }
10594 
10595 /* Wrapper for driver action_validate op callback */
10596 static int
10597 flow_drv_action_validate(struct rte_eth_dev *dev,
10598 			 const struct rte_flow_indir_action_conf *conf,
10599 			 const struct rte_flow_action *action,
10600 			 const struct mlx5_flow_driver_ops *fops,
10601 			 struct rte_flow_error *error)
10602 {
10603 	static const char err_msg[] = "indirect action validation unsupported";
10604 
10605 	if (!fops->action_validate) {
10606 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10607 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10608 				   NULL, err_msg);
10609 		return -rte_errno;
10610 	}
10611 	return fops->action_validate(dev, conf, action, error);
10612 }
10613 
10614 /**
10615  * Destroys the shared action by handle.
10616  *
10617  * @param dev
10618  *   Pointer to Ethernet device structure.
10619  * @param[in] handle
10620  *   Handle for the indirect action object to be destroyed.
10621  * @param[out] error
10622  *   Perform verbose error reporting if not NULL. PMDs initialize this
10623  *   structure in case of error only.
10624  *
10625  * @return
10626  *   0 on success, a negative errno value otherwise and rte_errno is set.
10627  *
10628  * @note: wrapper for driver action_create op callback.
10629  */
10630 static int
10631 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
10632 			   struct rte_flow_action_handle *handle,
10633 			   struct rte_flow_error *error)
10634 {
10635 	static const char err_msg[] = "indirect action destruction unsupported";
10636 	struct rte_flow_attr attr = { .transfer = 0 };
10637 	const struct mlx5_flow_driver_ops *fops =
10638 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10639 
10640 	if (!fops->action_destroy) {
10641 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10642 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10643 				   NULL, err_msg);
10644 		return -rte_errno;
10645 	}
10646 	return fops->action_destroy(dev, handle, error);
10647 }
10648 
10649 /* Wrapper for driver action_destroy op callback */
10650 static int
10651 flow_drv_action_update(struct rte_eth_dev *dev,
10652 		       struct rte_flow_action_handle *handle,
10653 		       const void *update,
10654 		       const struct mlx5_flow_driver_ops *fops,
10655 		       struct rte_flow_error *error)
10656 {
10657 	static const char err_msg[] = "indirect action update unsupported";
10658 
10659 	if (!fops->action_update) {
10660 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10661 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10662 				   NULL, err_msg);
10663 		return -rte_errno;
10664 	}
10665 	return fops->action_update(dev, handle, update, error);
10666 }
10667 
10668 /* Wrapper for driver action_destroy op callback */
10669 static int
10670 flow_drv_action_query(struct rte_eth_dev *dev,
10671 		      const struct rte_flow_action_handle *handle,
10672 		      void *data,
10673 		      const struct mlx5_flow_driver_ops *fops,
10674 		      struct rte_flow_error *error)
10675 {
10676 	static const char err_msg[] = "indirect action query unsupported";
10677 
10678 	if (!fops->action_query) {
10679 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10680 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10681 				   NULL, err_msg);
10682 		return -rte_errno;
10683 	}
10684 	return fops->action_query(dev, handle, data, error);
10685 }
10686 
10687 /**
10688  * Create indirect action for reuse in multiple flow rules.
10689  *
10690  * @param dev
10691  *   Pointer to Ethernet device structure.
10692  * @param conf
10693  *   Pointer to indirect action object configuration.
10694  * @param[in] action
10695  *   Action configuration for indirect action object creation.
10696  * @param[out] error
10697  *   Perform verbose error reporting if not NULL. PMDs initialize this
10698  *   structure in case of error only.
10699  * @return
10700  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
10701  */
10702 static struct rte_flow_action_handle *
10703 mlx5_action_handle_create(struct rte_eth_dev *dev,
10704 			  const struct rte_flow_indir_action_conf *conf,
10705 			  const struct rte_flow_action *action,
10706 			  struct rte_flow_error *error)
10707 {
10708 	static const char err_msg[] = "indirect action creation unsupported";
10709 	struct rte_flow_attr attr = { .transfer = 0 };
10710 	const struct mlx5_flow_driver_ops *fops =
10711 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10712 
10713 	if (flow_drv_action_validate(dev, conf, action, fops, error))
10714 		return NULL;
10715 	if (!fops->action_create) {
10716 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10717 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10718 				   NULL, err_msg);
10719 		return NULL;
10720 	}
10721 	return fops->action_create(dev, conf, action, error);
10722 }
10723 
10724 /**
10725  * Updates inplace the indirect action configuration pointed by *handle*
10726  * with the configuration provided as *update* argument.
10727  * The update of the indirect action configuration effects all flow rules
10728  * reusing the action via handle.
10729  *
10730  * @param dev
10731  *   Pointer to Ethernet device structure.
10732  * @param[in] handle
10733  *   Handle for the indirect action to be updated.
10734  * @param[in] update
10735  *   Action specification used to modify the action pointed by handle.
10736  *   *update* could be of same type with the action pointed by the *handle*
10737  *   handle argument, or some other structures like a wrapper, depending on
10738  *   the indirect action type.
10739  * @param[out] error
10740  *   Perform verbose error reporting if not NULL. PMDs initialize this
10741  *   structure in case of error only.
10742  *
10743  * @return
10744  *   0 on success, a negative errno value otherwise and rte_errno is set.
10745  */
10746 static int
10747 mlx5_action_handle_update(struct rte_eth_dev *dev,
10748 		struct rte_flow_action_handle *handle,
10749 		const void *update,
10750 		struct rte_flow_error *error)
10751 {
10752 	struct rte_flow_attr attr = { .transfer = 0 };
10753 	const struct mlx5_flow_driver_ops *fops =
10754 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10755 	int ret;
10756 	uint32_t act_idx = (uint32_t)(uintptr_t)handle;
10757 	uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
10758 
10759 	switch (type) {
10760 	case MLX5_INDIRECT_ACTION_TYPE_CT:
10761 	case MLX5_INDIRECT_ACTION_TYPE_METER_MARK:
10762 		ret = 0;
10763 		break;
10764 	default:
10765 		ret = flow_drv_action_validate(dev, NULL,
10766 				(const struct rte_flow_action *)update,
10767 				fops, error);
10768 	}
10769 	if (ret)
10770 		return ret;
10771 	return flow_drv_action_update(dev, handle, update, fops,
10772 				      error);
10773 }
10774 
10775 /**
10776  * Query the indirect action by handle.
10777  *
10778  * This function allows retrieving action-specific data such as counters.
10779  * Data is gathered by special action which may be present/referenced in
10780  * more than one flow rule definition.
10781  *
10782  * see @RTE_FLOW_ACTION_TYPE_COUNT
10783  *
10784  * @param dev
10785  *   Pointer to Ethernet device structure.
10786  * @param[in] handle
10787  *   Handle for the indirect action to query.
10788  * @param[in, out] data
10789  *   Pointer to storage for the associated query data type.
10790  * @param[out] error
10791  *   Perform verbose error reporting if not NULL. PMDs initialize this
10792  *   structure in case of error only.
10793  *
10794  * @return
10795  *   0 on success, a negative errno value otherwise and rte_errno is set.
10796  */
10797 static int
10798 mlx5_action_handle_query(struct rte_eth_dev *dev,
10799 			 const struct rte_flow_action_handle *handle,
10800 			 void *data,
10801 			 struct rte_flow_error *error)
10802 {
10803 	struct rte_flow_attr attr = { .transfer = 0 };
10804 	const struct mlx5_flow_driver_ops *fops =
10805 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10806 
10807 	return flow_drv_action_query(dev, handle, data, fops, error);
10808 }
10809 
10810 static int
10811 mlx5_action_handle_query_update(struct rte_eth_dev *dev,
10812 				struct rte_flow_action_handle *handle,
10813 				const void *update, void *query,
10814 				enum rte_flow_query_update_mode qu_mode,
10815 				struct rte_flow_error *error)
10816 {
10817 	struct rte_flow_attr attr = { .transfer = 0 };
10818 	enum mlx5_flow_drv_type drv_type = flow_get_drv_type(dev, &attr);
10819 	const struct mlx5_flow_driver_ops *fops;
10820 
10821 	if (drv_type == MLX5_FLOW_TYPE_MIN || drv_type == MLX5_FLOW_TYPE_MAX)
10822 		return rte_flow_error_set(error, ENOTSUP,
10823 					  RTE_FLOW_ERROR_TYPE_ACTION,
10824 					  NULL, "invalid driver type");
10825 	fops = flow_get_drv_ops(drv_type);
10826 	if (!fops || !fops->action_query_update)
10827 		return rte_flow_error_set(error, ENOTSUP,
10828 					  RTE_FLOW_ERROR_TYPE_ACTION,
10829 					  NULL, "no query_update handler");
10830 	return fops->action_query_update(dev, handle, update,
10831 					 query, qu_mode, error);
10832 }
10833 
10834 /**
10835  * Destroy all indirect actions (shared RSS).
10836  *
10837  * @param dev
10838  *   Pointer to Ethernet device.
10839  *
10840  * @return
10841  *   0 on success, a negative errno value otherwise and rte_errno is set.
10842  */
10843 int
10844 mlx5_action_handle_flush(struct rte_eth_dev *dev)
10845 {
10846 	struct rte_flow_error error;
10847 	struct mlx5_priv *priv = dev->data->dev_private;
10848 	struct mlx5_shared_action_rss *shared_rss;
10849 	int ret = 0;
10850 	uint32_t idx;
10851 
10852 	ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
10853 		      priv->rss_shared_actions, idx, shared_rss, next) {
10854 		ret |= mlx5_action_handle_destroy(dev,
10855 		       (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
10856 	}
10857 	return ret;
10858 }
10859 
10860 /**
10861  * Validate existing indirect actions against current device configuration
10862  * and attach them to device resources.
10863  *
10864  * @param dev
10865  *   Pointer to Ethernet device.
10866  *
10867  * @return
10868  *   0 on success, a negative errno value otherwise and rte_errno is set.
10869  */
10870 int
10871 mlx5_action_handle_attach(struct rte_eth_dev *dev)
10872 {
10873 	struct mlx5_priv *priv = dev->data->dev_private;
10874 	int ret = 0;
10875 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
10876 
10877 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10878 		const char *message;
10879 		uint32_t queue_idx;
10880 
10881 		ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
10882 					       ind_tbl->queues_n,
10883 					       &message, &queue_idx);
10884 		if (ret != 0) {
10885 			DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
10886 				dev->data->port_id, ind_tbl->queues[queue_idx],
10887 				message);
10888 			break;
10889 		}
10890 	}
10891 	if (ret != 0)
10892 		return ret;
10893 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10894 		ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
10895 		if (ret != 0) {
10896 			DRV_LOG(ERR, "Port %u could not attach "
10897 				"indirection table obj %p",
10898 				dev->data->port_id, (void *)ind_tbl);
10899 			goto error;
10900 		}
10901 	}
10902 
10903 	return 0;
10904 error:
10905 	ind_tbl_last = ind_tbl;
10906 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10907 		if (ind_tbl == ind_tbl_last)
10908 			break;
10909 		if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
10910 			DRV_LOG(CRIT, "Port %u could not detach "
10911 				"indirection table obj %p on rollback",
10912 				dev->data->port_id, (void *)ind_tbl);
10913 	}
10914 	return ret;
10915 }
10916 
10917 /**
10918  * Detach indirect actions of the device from its resources.
10919  *
10920  * @param dev
10921  *   Pointer to Ethernet device.
10922  *
10923  * @return
10924  *   0 on success, a negative errno value otherwise and rte_errno is set.
10925  */
10926 int
10927 mlx5_action_handle_detach(struct rte_eth_dev *dev)
10928 {
10929 	struct mlx5_priv *priv = dev->data->dev_private;
10930 	int ret = 0;
10931 	struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
10932 
10933 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10934 		ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
10935 		if (ret != 0) {
10936 			DRV_LOG(ERR, "Port %u could not detach "
10937 				"indirection table obj %p",
10938 				dev->data->port_id, (void *)ind_tbl);
10939 			goto error;
10940 		}
10941 	}
10942 	return 0;
10943 error:
10944 	ind_tbl_last = ind_tbl;
10945 	LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
10946 		if (ind_tbl == ind_tbl_last)
10947 			break;
10948 		if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
10949 			DRV_LOG(CRIT, "Port %u could not attach "
10950 				"indirection table obj %p on rollback",
10951 				dev->data->port_id, (void *)ind_tbl);
10952 	}
10953 	return ret;
10954 }
10955 
10956 #ifndef HAVE_MLX5DV_DR
10957 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
10958 #else
10959 #define MLX5_DOMAIN_SYNC_FLOW \
10960 	(MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
10961 #endif
10962 
10963 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
10964 {
10965 	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
10966 	const struct mlx5_flow_driver_ops *fops;
10967 	int ret;
10968 	struct rte_flow_attr attr = { .transfer = 0 };
10969 
10970 	fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10971 	ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
10972 	if (ret > 0)
10973 		ret = -ret;
10974 	return ret;
10975 }
10976 
10977 const struct mlx5_flow_tunnel *
10978 mlx5_get_tof(const struct rte_flow_item *item,
10979 	     const struct rte_flow_action *action,
10980 	     enum mlx5_tof_rule_type *rule_type)
10981 {
10982 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10983 		if (item->type == (typeof(item->type))
10984 				  MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
10985 			*rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
10986 			return flow_items_to_tunnel(item);
10987 		}
10988 	}
10989 	for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
10990 		if (action->type == (typeof(action->type))
10991 				    MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
10992 			*rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
10993 			return flow_actions_to_tunnel(action);
10994 		}
10995 	}
10996 	return NULL;
10997 }
10998 
10999 /**
11000  * tunnel offload functionality is defined for DV environment only
11001  */
11002 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
11003 __extension__
11004 union tunnel_offload_mark {
11005 	uint32_t val;
11006 	struct {
11007 		uint32_t app_reserve:8;
11008 		uint32_t table_id:15;
11009 		uint32_t transfer:1;
11010 		uint32_t _unused_:8;
11011 	};
11012 };
11013 
11014 static bool
11015 mlx5_access_tunnel_offload_db
11016 	(struct rte_eth_dev *dev,
11017 	 bool (*match)(struct rte_eth_dev *,
11018 		       struct mlx5_flow_tunnel *, const void *),
11019 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
11020 	 void (*miss)(struct rte_eth_dev *, void *),
11021 	 void *ctx, bool lock_op);
11022 
11023 static int
11024 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
11025 			     struct rte_flow *flow,
11026 			     const struct rte_flow_attr *attr,
11027 			     const struct rte_flow_action *app_actions,
11028 			     uint32_t flow_idx,
11029 			     const struct mlx5_flow_tunnel *tunnel,
11030 			     struct tunnel_default_miss_ctx *ctx,
11031 			     struct rte_flow_error *error)
11032 {
11033 	struct mlx5_priv *priv = dev->data->dev_private;
11034 	struct mlx5_flow *dev_flow;
11035 	struct rte_flow_attr miss_attr = *attr;
11036 	const struct rte_flow_item miss_items[2] = {
11037 		{
11038 			.type = RTE_FLOW_ITEM_TYPE_ETH,
11039 			.spec = NULL,
11040 			.last = NULL,
11041 			.mask = NULL
11042 		},
11043 		{
11044 			.type = RTE_FLOW_ITEM_TYPE_END,
11045 			.spec = NULL,
11046 			.last = NULL,
11047 			.mask = NULL
11048 		}
11049 	};
11050 	union tunnel_offload_mark mark_id;
11051 	struct rte_flow_action_mark miss_mark;
11052 	struct rte_flow_action miss_actions[3] = {
11053 		[0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
11054 		[2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
11055 	};
11056 	const struct rte_flow_action_jump *jump_data;
11057 	uint32_t i, flow_table = 0; /* prevent compilation warning */
11058 	struct flow_grp_info grp_info = {
11059 		.external = 1,
11060 		.transfer = attr->transfer,
11061 		.fdb_def_rule = !!priv->fdb_def_rule,
11062 		.std_tbl_fix = 0,
11063 	};
11064 	int ret;
11065 
11066 	if (!attr->transfer) {
11067 		uint32_t q_size;
11068 
11069 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
11070 		q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
11071 		ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
11072 					 0, SOCKET_ID_ANY);
11073 		if (!ctx->queue)
11074 			return rte_flow_error_set
11075 				(error, ENOMEM,
11076 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
11077 				NULL, "invalid default miss RSS");
11078 		ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
11079 		ctx->action_rss.level = 0,
11080 		ctx->action_rss.types = priv->rss_conf.rss_hf,
11081 		ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
11082 		ctx->action_rss.queue_num = priv->reta_idx_n,
11083 		ctx->action_rss.key = priv->rss_conf.rss_key,
11084 		ctx->action_rss.queue = ctx->queue;
11085 		if (!priv->reta_idx_n || !priv->rxqs_n)
11086 			return rte_flow_error_set
11087 				(error, EINVAL,
11088 				RTE_FLOW_ERROR_TYPE_ACTION_CONF,
11089 				NULL, "invalid port configuration");
11090 		if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
11091 			ctx->action_rss.types = 0;
11092 		for (i = 0; i != priv->reta_idx_n; ++i)
11093 			ctx->queue[i] = (*priv->reta_idx)[i];
11094 	} else {
11095 		miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
11096 		ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
11097 	}
11098 	miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
11099 	for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
11100 	jump_data = app_actions->conf;
11101 	miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
11102 	miss_attr.group = jump_data->group;
11103 	ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
11104 				       &flow_table, &grp_info, error);
11105 	if (ret)
11106 		return rte_flow_error_set(error, EINVAL,
11107 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
11108 					  NULL, "invalid tunnel id");
11109 	mark_id.app_reserve = 0;
11110 	mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
11111 	mark_id.transfer = !!attr->transfer;
11112 	mark_id._unused_ = 0;
11113 	miss_mark.id = mark_id.val;
11114 	dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
11115 				    miss_items, miss_actions, flow_idx, error);
11116 	if (!dev_flow)
11117 		return -rte_errno;
11118 	dev_flow->flow = flow;
11119 	dev_flow->external = true;
11120 	dev_flow->tunnel = tunnel;
11121 	dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
11122 	/* Subflow object was created, we must include one in the list. */
11123 	SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
11124 		      dev_flow->handle, next);
11125 	DRV_LOG(DEBUG,
11126 		"port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
11127 		dev->data->port_id, tunnel->app_tunnel.type,
11128 		tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
11129 	ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
11130 				  miss_actions, error);
11131 	if (!ret)
11132 		ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
11133 						  error);
11134 
11135 	return ret;
11136 }
11137 
11138 static const struct mlx5_flow_tbl_data_entry  *
11139 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
11140 {
11141 	struct mlx5_priv *priv = dev->data->dev_private;
11142 	struct mlx5_dev_ctx_shared *sh = priv->sh;
11143 	struct mlx5_list_entry *he;
11144 	union tunnel_offload_mark mbits = { .val = mark };
11145 	union mlx5_flow_tbl_key table_key = {
11146 		{
11147 			.level = tunnel_id_to_flow_tbl(mbits.table_id),
11148 			.id = 0,
11149 			.reserved = 0,
11150 			.dummy = 0,
11151 			.is_fdb = !!mbits.transfer,
11152 			.is_egress = 0,
11153 		}
11154 	};
11155 	struct mlx5_flow_cb_ctx ctx = {
11156 		.data = &table_key.v64,
11157 	};
11158 
11159 	he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
11160 	return he ?
11161 	       container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
11162 }
11163 
11164 static void
11165 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
11166 				   struct mlx5_list_entry *entry)
11167 {
11168 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
11169 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
11170 
11171 	mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
11172 			tunnel_flow_tbl_to_id(tte->flow_table));
11173 	mlx5_free(tte);
11174 }
11175 
11176 static int
11177 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
11178 				  struct mlx5_list_entry *entry, void *cb_ctx)
11179 {
11180 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
11181 	union tunnel_tbl_key tbl = {
11182 		.val = *(uint64_t *)(ctx->data),
11183 	};
11184 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
11185 
11186 	return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
11187 }
11188 
11189 static struct mlx5_list_entry *
11190 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
11191 {
11192 	struct mlx5_dev_ctx_shared *sh = tool_ctx;
11193 	struct mlx5_flow_cb_ctx *ctx = cb_ctx;
11194 	struct tunnel_tbl_entry *tte;
11195 	union tunnel_tbl_key tbl = {
11196 		.val = *(uint64_t *)(ctx->data),
11197 	};
11198 
11199 	tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
11200 			  sizeof(*tte), 0,
11201 			  SOCKET_ID_ANY);
11202 	if (!tte)
11203 		goto err;
11204 	mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
11205 			  &tte->flow_table);
11206 	if (tte->flow_table >= MLX5_MAX_TABLES) {
11207 		DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
11208 			tte->flow_table);
11209 		mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
11210 				tte->flow_table);
11211 		goto err;
11212 	} else if (!tte->flow_table) {
11213 		goto err;
11214 	}
11215 	tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
11216 	tte->tunnel_id = tbl.tunnel_id;
11217 	tte->group = tbl.group;
11218 	return &tte->hash;
11219 err:
11220 	if (tte)
11221 		mlx5_free(tte);
11222 	return NULL;
11223 }
11224 
11225 static struct mlx5_list_entry *
11226 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
11227 				  struct mlx5_list_entry *oentry,
11228 				  void *cb_ctx __rte_unused)
11229 {
11230 	struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
11231 						   0, SOCKET_ID_ANY);
11232 
11233 	if (!tte)
11234 		return NULL;
11235 	memcpy(tte, oentry, sizeof(*tte));
11236 	return &tte->hash;
11237 }
11238 
11239 static void
11240 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
11241 				       struct mlx5_list_entry *entry)
11242 {
11243 	struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
11244 
11245 	mlx5_free(tte);
11246 }
11247 
11248 static uint32_t
11249 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
11250 				const struct mlx5_flow_tunnel *tunnel,
11251 				uint32_t group, uint32_t *table,
11252 				struct rte_flow_error *error)
11253 {
11254 	struct mlx5_list_entry *he;
11255 	struct tunnel_tbl_entry *tte;
11256 	union tunnel_tbl_key key = {
11257 		.tunnel_id = tunnel ? tunnel->tunnel_id : 0,
11258 		.group = group
11259 	};
11260 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
11261 	struct mlx5_hlist *group_hash;
11262 	struct mlx5_flow_cb_ctx ctx = {
11263 		.data = &key.val,
11264 	};
11265 
11266 	group_hash = tunnel ? tunnel->groups : thub->groups;
11267 	he = mlx5_hlist_register(group_hash, key.val, &ctx);
11268 	if (!he)
11269 		return rte_flow_error_set(error, EINVAL,
11270 					  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
11271 					  NULL,
11272 					  "tunnel group index not supported");
11273 	tte = container_of(he, typeof(*tte), hash);
11274 	*table = tte->flow_table;
11275 	DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
11276 		dev->data->port_id, key.tunnel_id, group, *table);
11277 	return 0;
11278 }
11279 
11280 static void
11281 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
11282 		      struct mlx5_flow_tunnel *tunnel)
11283 {
11284 	struct mlx5_priv *priv = dev->data->dev_private;
11285 	struct mlx5_indexed_pool *ipool;
11286 
11287 	DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
11288 		dev->data->port_id, tunnel->tunnel_id);
11289 	LIST_REMOVE(tunnel, chain);
11290 	mlx5_hlist_destroy(tunnel->groups);
11291 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
11292 	mlx5_ipool_free(ipool, tunnel->tunnel_id);
11293 }
11294 
11295 static bool
11296 mlx5_access_tunnel_offload_db
11297 	(struct rte_eth_dev *dev,
11298 	 bool (*match)(struct rte_eth_dev *,
11299 		       struct mlx5_flow_tunnel *, const void *),
11300 	 void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
11301 	 void (*miss)(struct rte_eth_dev *, void *),
11302 	 void *ctx, bool lock_op)
11303 {
11304 	bool verdict = false;
11305 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
11306 	struct mlx5_flow_tunnel *tunnel;
11307 
11308 	rte_spinlock_lock(&thub->sl);
11309 	LIST_FOREACH(tunnel, &thub->tunnels, chain) {
11310 		verdict = match(dev, tunnel, (const void *)ctx);
11311 		if (verdict)
11312 			break;
11313 	}
11314 	if (!lock_op)
11315 		rte_spinlock_unlock(&thub->sl);
11316 	if (verdict && hit)
11317 		hit(dev, tunnel, ctx);
11318 	if (!verdict && miss)
11319 		miss(dev, ctx);
11320 	if (lock_op)
11321 		rte_spinlock_unlock(&thub->sl);
11322 
11323 	return verdict;
11324 }
11325 
11326 struct tunnel_db_find_tunnel_id_ctx {
11327 	uint32_t tunnel_id;
11328 	struct mlx5_flow_tunnel *tunnel;
11329 };
11330 
11331 static bool
11332 find_tunnel_id_match(struct rte_eth_dev *dev,
11333 		     struct mlx5_flow_tunnel *tunnel, const void *x)
11334 {
11335 	const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
11336 
11337 	RTE_SET_USED(dev);
11338 	return tunnel->tunnel_id == ctx->tunnel_id;
11339 }
11340 
11341 static void
11342 find_tunnel_id_hit(struct rte_eth_dev *dev,
11343 		   struct mlx5_flow_tunnel *tunnel, void *x)
11344 {
11345 	struct tunnel_db_find_tunnel_id_ctx *ctx = x;
11346 	RTE_SET_USED(dev);
11347 	ctx->tunnel = tunnel;
11348 }
11349 
11350 static struct mlx5_flow_tunnel *
11351 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
11352 {
11353 	struct tunnel_db_find_tunnel_id_ctx ctx = {
11354 		.tunnel_id = id,
11355 	};
11356 
11357 	mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
11358 				      find_tunnel_id_hit, NULL, &ctx, true);
11359 
11360 	return ctx.tunnel;
11361 }
11362 
11363 static struct mlx5_flow_tunnel *
11364 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
11365 			  const struct rte_flow_tunnel *app_tunnel)
11366 {
11367 	struct mlx5_priv *priv = dev->data->dev_private;
11368 	struct mlx5_indexed_pool *ipool;
11369 	struct mlx5_flow_tunnel *tunnel;
11370 	uint32_t id;
11371 
11372 	ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
11373 	tunnel = mlx5_ipool_zmalloc(ipool, &id);
11374 	if (!tunnel)
11375 		return NULL;
11376 	if (id >= MLX5_MAX_TUNNELS) {
11377 		mlx5_ipool_free(ipool, id);
11378 		DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
11379 		return NULL;
11380 	}
11381 	tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
11382 					   priv->sh,
11383 					   mlx5_flow_tunnel_grp2tbl_create_cb,
11384 					   mlx5_flow_tunnel_grp2tbl_match_cb,
11385 					   mlx5_flow_tunnel_grp2tbl_remove_cb,
11386 					   mlx5_flow_tunnel_grp2tbl_clone_cb,
11387 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
11388 	if (!tunnel->groups) {
11389 		mlx5_ipool_free(ipool, id);
11390 		return NULL;
11391 	}
11392 	/* initiate new PMD tunnel */
11393 	memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
11394 	tunnel->tunnel_id = id;
11395 	tunnel->action.type = (typeof(tunnel->action.type))
11396 			      MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
11397 	tunnel->action.conf = tunnel;
11398 	tunnel->item.type = (typeof(tunnel->item.type))
11399 			    MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
11400 	tunnel->item.spec = tunnel;
11401 	tunnel->item.last = NULL;
11402 	tunnel->item.mask = NULL;
11403 
11404 	DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
11405 		dev->data->port_id, tunnel->tunnel_id);
11406 
11407 	return tunnel;
11408 }
11409 
11410 struct tunnel_db_get_tunnel_ctx {
11411 	const struct rte_flow_tunnel *app_tunnel;
11412 	struct mlx5_flow_tunnel *tunnel;
11413 };
11414 
11415 static bool get_tunnel_match(struct rte_eth_dev *dev,
11416 			     struct mlx5_flow_tunnel *tunnel, const void *x)
11417 {
11418 	const struct tunnel_db_get_tunnel_ctx *ctx = x;
11419 
11420 	RTE_SET_USED(dev);
11421 	return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
11422 		       sizeof(*ctx->app_tunnel));
11423 }
11424 
11425 static void get_tunnel_hit(struct rte_eth_dev *dev,
11426 			   struct mlx5_flow_tunnel *tunnel, void *x)
11427 {
11428 	/* called under tunnel spinlock protection */
11429 	struct tunnel_db_get_tunnel_ctx *ctx = x;
11430 
11431 	RTE_SET_USED(dev);
11432 	tunnel->refctn++;
11433 	ctx->tunnel = tunnel;
11434 }
11435 
11436 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
11437 {
11438 	/* called under tunnel spinlock protection */
11439 	struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
11440 	struct tunnel_db_get_tunnel_ctx *ctx = x;
11441 
11442 	rte_spinlock_unlock(&thub->sl);
11443 	ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
11444 	rte_spinlock_lock(&thub->sl);
11445 	if (ctx->tunnel) {
11446 		ctx->tunnel->refctn = 1;
11447 		LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
11448 	}
11449 }
11450 
11451 
11452 static int
11453 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
11454 		     const struct rte_flow_tunnel *app_tunnel,
11455 		     struct mlx5_flow_tunnel **tunnel)
11456 {
11457 	struct tunnel_db_get_tunnel_ctx ctx = {
11458 		.app_tunnel = app_tunnel,
11459 	};
11460 
11461 	mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
11462 				      get_tunnel_miss, &ctx, true);
11463 	*tunnel = ctx.tunnel;
11464 	return ctx.tunnel ? 0 : -ENOMEM;
11465 }
11466 
11467 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
11468 {
11469 	struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
11470 
11471 	if (!thub)
11472 		return;
11473 	if (!LIST_EMPTY(&thub->tunnels))
11474 		DRV_LOG(WARNING, "port %u tunnels present", port_id);
11475 	mlx5_hlist_destroy(thub->groups);
11476 	mlx5_free(thub);
11477 }
11478 
11479 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
11480 {
11481 	int err;
11482 	struct mlx5_flow_tunnel_hub *thub;
11483 
11484 	thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
11485 			   0, SOCKET_ID_ANY);
11486 	if (!thub)
11487 		return -ENOMEM;
11488 	LIST_INIT(&thub->tunnels);
11489 	rte_spinlock_init(&thub->sl);
11490 	thub->groups = mlx5_hlist_create("flow groups", 64,
11491 					 false, true, sh,
11492 					 mlx5_flow_tunnel_grp2tbl_create_cb,
11493 					 mlx5_flow_tunnel_grp2tbl_match_cb,
11494 					 mlx5_flow_tunnel_grp2tbl_remove_cb,
11495 					 mlx5_flow_tunnel_grp2tbl_clone_cb,
11496 					mlx5_flow_tunnel_grp2tbl_clone_free_cb);
11497 	if (!thub->groups) {
11498 		err = -rte_errno;
11499 		goto err;
11500 	}
11501 	sh->tunnel_hub = thub;
11502 
11503 	return 0;
11504 
11505 err:
11506 	if (thub->groups)
11507 		mlx5_hlist_destroy(thub->groups);
11508 	if (thub)
11509 		mlx5_free(thub);
11510 	return err;
11511 }
11512 
11513 static inline int
11514 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
11515 			  struct rte_flow_tunnel *tunnel,
11516 			  struct rte_flow_error *error)
11517 {
11518 	struct mlx5_priv *priv = dev->data->dev_private;
11519 
11520 	if (!priv->sh->config.dv_flow_en)
11521 		return rte_flow_error_set(error, ENOTSUP,
11522 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11523 					  "flow DV interface is off");
11524 	if (!is_tunnel_offload_active(dev))
11525 		return rte_flow_error_set(error, ENOTSUP,
11526 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11527 					  "tunnel offload was not activated, consider setting dv_xmeta_en=3");
11528 	if (!tunnel)
11529 		return rte_flow_error_set(error, EINVAL,
11530 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11531 					  "no application tunnel");
11532 	switch (tunnel->type) {
11533 	default:
11534 		return rte_flow_error_set(error, EINVAL,
11535 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11536 					  "unsupported tunnel type");
11537 	case RTE_FLOW_ITEM_TYPE_VXLAN:
11538 	case RTE_FLOW_ITEM_TYPE_GRE:
11539 	case RTE_FLOW_ITEM_TYPE_NVGRE:
11540 	case RTE_FLOW_ITEM_TYPE_GENEVE:
11541 		break;
11542 	}
11543 	return 0;
11544 }
11545 
11546 static int
11547 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
11548 		    struct rte_flow_tunnel *app_tunnel,
11549 		    struct rte_flow_action **actions,
11550 		    uint32_t *num_of_actions,
11551 		    struct rte_flow_error *error)
11552 {
11553 	struct mlx5_flow_tunnel *tunnel;
11554 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
11555 
11556 	if (ret)
11557 		return ret;
11558 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
11559 	if (ret < 0) {
11560 		return rte_flow_error_set(error, ret,
11561 					  RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
11562 					  "failed to initialize pmd tunnel");
11563 	}
11564 	*actions = &tunnel->action;
11565 	*num_of_actions = 1;
11566 	return 0;
11567 }
11568 
11569 static int
11570 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
11571 		       struct rte_flow_tunnel *app_tunnel,
11572 		       struct rte_flow_item **items,
11573 		       uint32_t *num_of_items,
11574 		       struct rte_flow_error *error)
11575 {
11576 	struct mlx5_flow_tunnel *tunnel;
11577 	int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
11578 
11579 	if (ret)
11580 		return ret;
11581 	ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
11582 	if (ret < 0) {
11583 		return rte_flow_error_set(error, ret,
11584 					  RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
11585 					  "failed to initialize pmd tunnel");
11586 	}
11587 	*items = &tunnel->item;
11588 	*num_of_items = 1;
11589 	return 0;
11590 }
11591 
11592 struct tunnel_db_element_release_ctx {
11593 	struct rte_flow_item *items;
11594 	struct rte_flow_action *actions;
11595 	uint32_t num_elements;
11596 	struct rte_flow_error *error;
11597 	int ret;
11598 };
11599 
11600 static bool
11601 tunnel_element_release_match(struct rte_eth_dev *dev,
11602 			     struct mlx5_flow_tunnel *tunnel, const void *x)
11603 {
11604 	const struct tunnel_db_element_release_ctx *ctx = x;
11605 
11606 	RTE_SET_USED(dev);
11607 	if (ctx->num_elements != 1)
11608 		return false;
11609 	else if (ctx->items)
11610 		return ctx->items == &tunnel->item;
11611 	else if (ctx->actions)
11612 		return ctx->actions == &tunnel->action;
11613 
11614 	return false;
11615 }
11616 
11617 static void
11618 tunnel_element_release_hit(struct rte_eth_dev *dev,
11619 			   struct mlx5_flow_tunnel *tunnel, void *x)
11620 {
11621 	struct tunnel_db_element_release_ctx *ctx = x;
11622 	ctx->ret = 0;
11623 	if (!(__atomic_fetch_sub(&tunnel->refctn, 1, __ATOMIC_RELAXED) - 1))
11624 		mlx5_flow_tunnel_free(dev, tunnel);
11625 }
11626 
11627 static void
11628 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
11629 {
11630 	struct tunnel_db_element_release_ctx *ctx = x;
11631 	RTE_SET_USED(dev);
11632 	ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
11633 				      RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
11634 				      "invalid argument");
11635 }
11636 
11637 static int
11638 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
11639 		       struct rte_flow_item *pmd_items,
11640 		       uint32_t num_items, struct rte_flow_error *err)
11641 {
11642 	struct tunnel_db_element_release_ctx ctx = {
11643 		.items = pmd_items,
11644 		.actions = NULL,
11645 		.num_elements = num_items,
11646 		.error = err,
11647 	};
11648 
11649 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
11650 				      tunnel_element_release_hit,
11651 				      tunnel_element_release_miss, &ctx, false);
11652 
11653 	return ctx.ret;
11654 }
11655 
11656 static int
11657 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
11658 			 struct rte_flow_action *pmd_actions,
11659 			 uint32_t num_actions, struct rte_flow_error *err)
11660 {
11661 	struct tunnel_db_element_release_ctx ctx = {
11662 		.items = NULL,
11663 		.actions = pmd_actions,
11664 		.num_elements = num_actions,
11665 		.error = err,
11666 	};
11667 
11668 	mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
11669 				      tunnel_element_release_hit,
11670 				      tunnel_element_release_miss, &ctx, false);
11671 
11672 	return ctx.ret;
11673 }
11674 
11675 static int
11676 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
11677 				  struct rte_mbuf *m,
11678 				  struct rte_flow_restore_info *info,
11679 				  struct rte_flow_error *err)
11680 {
11681 	uint64_t ol_flags = m->ol_flags;
11682 	const struct mlx5_flow_tbl_data_entry *tble;
11683 	const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
11684 	struct mlx5_priv *priv = dev->data->dev_private;
11685 
11686 	if (priv->tunnel_enabled == 0)
11687 		goto err;
11688 	if ((ol_flags & mask) != mask)
11689 		goto err;
11690 	tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
11691 	if (!tble) {
11692 		DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
11693 			dev->data->port_id, m->hash.fdir.hi);
11694 		goto err;
11695 	}
11696 	MLX5_ASSERT(tble->tunnel);
11697 	memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
11698 	info->group_id = tble->group_id;
11699 	info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
11700 		      RTE_FLOW_RESTORE_INFO_GROUP_ID |
11701 		      RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
11702 
11703 	return 0;
11704 
11705 err:
11706 	return rte_flow_error_set(err, EINVAL,
11707 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
11708 				  "failed to get restore info");
11709 }
11710 
11711 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
11712 static int
11713 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
11714 			   __rte_unused struct rte_flow_tunnel *app_tunnel,
11715 			   __rte_unused struct rte_flow_action **actions,
11716 			   __rte_unused uint32_t *num_of_actions,
11717 			   __rte_unused struct rte_flow_error *error)
11718 {
11719 	return -ENOTSUP;
11720 }
11721 
11722 static int
11723 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
11724 		       __rte_unused struct rte_flow_tunnel *app_tunnel,
11725 		       __rte_unused struct rte_flow_item **items,
11726 		       __rte_unused uint32_t *num_of_items,
11727 		       __rte_unused struct rte_flow_error *error)
11728 {
11729 	return -ENOTSUP;
11730 }
11731 
11732 static int
11733 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
11734 			      __rte_unused struct rte_flow_item *pmd_items,
11735 			      __rte_unused uint32_t num_items,
11736 			      __rte_unused struct rte_flow_error *err)
11737 {
11738 	return -ENOTSUP;
11739 }
11740 
11741 static int
11742 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
11743 				__rte_unused struct rte_flow_action *pmd_action,
11744 				__rte_unused uint32_t num_actions,
11745 				__rte_unused struct rte_flow_error *err)
11746 {
11747 	return -ENOTSUP;
11748 }
11749 
11750 static int
11751 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
11752 				  __rte_unused struct rte_mbuf *m,
11753 				  __rte_unused struct rte_flow_restore_info *i,
11754 				  __rte_unused struct rte_flow_error *err)
11755 {
11756 	return -ENOTSUP;
11757 }
11758 
11759 static int
11760 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
11761 			     __rte_unused struct rte_flow *flow,
11762 			     __rte_unused const struct rte_flow_attr *attr,
11763 			     __rte_unused const struct rte_flow_action *actions,
11764 			     __rte_unused uint32_t flow_idx,
11765 			     __rte_unused const struct mlx5_flow_tunnel *tunnel,
11766 			     __rte_unused struct tunnel_default_miss_ctx *ctx,
11767 			     __rte_unused struct rte_flow_error *error)
11768 {
11769 	return -ENOTSUP;
11770 }
11771 
11772 static struct mlx5_flow_tunnel *
11773 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
11774 		    __rte_unused uint32_t id)
11775 {
11776 	return NULL;
11777 }
11778 
11779 static void
11780 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
11781 		      __rte_unused struct mlx5_flow_tunnel *tunnel)
11782 {
11783 }
11784 
11785 static uint32_t
11786 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
11787 				__rte_unused const struct mlx5_flow_tunnel *t,
11788 				__rte_unused uint32_t group,
11789 				__rte_unused uint32_t *table,
11790 				struct rte_flow_error *error)
11791 {
11792 	return rte_flow_error_set(error, ENOTSUP,
11793 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
11794 				  "tunnel offload requires DV support");
11795 }
11796 
11797 void
11798 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
11799 			__rte_unused  uint16_t port_id)
11800 {
11801 }
11802 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
11803 
11804 /* Flex flow item API */
11805 static struct rte_flow_item_flex_handle *
11806 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
11807 			   const struct rte_flow_item_flex_conf *conf,
11808 			   struct rte_flow_error *error)
11809 {
11810 	static const char err_msg[] = "flex item creation unsupported";
11811 	struct mlx5_priv *priv = dev->data->dev_private;
11812 	struct rte_flow_attr attr = { .transfer = 0 };
11813 	const struct mlx5_flow_driver_ops *fops =
11814 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
11815 
11816 	if (!priv->pci_dev) {
11817 		rte_flow_error_set(error, ENOTSUP,
11818 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
11819 				   "create flex item on PF only");
11820 		return NULL;
11821 	}
11822 	switch (priv->pci_dev->id.device_id) {
11823 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD2:
11824 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD3:
11825 		break;
11826 	default:
11827 		rte_flow_error_set(error, ENOTSUP,
11828 				   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
11829 				   "flex item available on BlueField ports only");
11830 		return NULL;
11831 	}
11832 	if (!fops->item_create) {
11833 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
11834 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
11835 				   NULL, err_msg);
11836 		return NULL;
11837 	}
11838 	return fops->item_create(dev, conf, error);
11839 }
11840 
11841 static int
11842 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
11843 			    const struct rte_flow_item_flex_handle *handle,
11844 			    struct rte_flow_error *error)
11845 {
11846 	static const char err_msg[] = "flex item release unsupported";
11847 	struct rte_flow_attr attr = { .transfer = 0 };
11848 	const struct mlx5_flow_driver_ops *fops =
11849 			flow_get_drv_ops(flow_get_drv_type(dev, &attr));
11850 
11851 	if (!fops->item_release) {
11852 		DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
11853 		rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
11854 				   NULL, err_msg);
11855 		return -rte_errno;
11856 	}
11857 	return fops->item_release(dev, handle, error);
11858 }
11859 
11860 static void
11861 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
11862 {
11863 	int ret;
11864 	struct rte_flow_error error;
11865 
11866 	for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
11867 		char *item_name;
11868 		ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
11869 				    sizeof(item_name),
11870 				    (void *)(uintptr_t)item->type, &error);
11871 		if (ret > 0)
11872 			printf("%s ", item_name);
11873 		else
11874 			printf("%d\n", (int)item->type);
11875 	}
11876 	printf("END\n");
11877 }
11878 
11879 static int
11880 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
11881 {
11882 	const struct rte_flow_item_udp *spec = udp_item->spec;
11883 	const struct rte_flow_item_udp *mask = udp_item->mask;
11884 	uint16_t udp_dport = 0;
11885 
11886 	if (spec != NULL) {
11887 		if (!mask)
11888 			mask = &rte_flow_item_udp_mask;
11889 		udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
11890 				mask->hdr.dst_port);
11891 	}
11892 	return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
11893 }
11894 
11895 static const struct mlx5_flow_expand_node *
11896 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
11897 		unsigned int item_idx,
11898 		const struct mlx5_flow_expand_node graph[],
11899 		const struct mlx5_flow_expand_node *node)
11900 {
11901 	const struct rte_flow_item *item = pattern + item_idx, *prev_item;
11902 
11903 	if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
11904 			node != NULL &&
11905 			node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
11906 		/*
11907 		 * The expansion node is VXLAN and it is also the last
11908 		 * expandable item in the pattern, so need to continue
11909 		 * expansion of the inner tunnel.
11910 		 */
11911 		MLX5_ASSERT(item_idx > 0);
11912 		prev_item = pattern + item_idx - 1;
11913 		MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
11914 		if (mlx5_flow_is_std_vxlan_port(prev_item))
11915 			return &graph[MLX5_EXPANSION_STD_VXLAN];
11916 		return &graph[MLX5_EXPANSION_L3_VXLAN];
11917 	}
11918 	return node;
11919 }
11920 
11921 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
11922 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
11923 	{ 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
11924 };
11925 
11926 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
11927 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
11928 	{ 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
11929 	{ 9, 10, 11 }, { 12, 13, 14 },
11930 };
11931 
11932 /**
11933  * Discover the number of available flow priorities.
11934  *
11935  * @param dev
11936  *   Ethernet device.
11937  *
11938  * @return
11939  *   On success, number of available flow priorities.
11940  *   On failure, a negative errno-style code and rte_errno is set.
11941  */
11942 int
11943 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
11944 {
11945 	static const uint16_t vprio[] = {8, 16};
11946 	const struct mlx5_priv *priv = dev->data->dev_private;
11947 	const struct mlx5_flow_driver_ops *fops;
11948 	enum mlx5_flow_drv_type type;
11949 	int ret;
11950 
11951 	type = mlx5_flow_os_get_type();
11952 	if (type == MLX5_FLOW_TYPE_MAX) {
11953 		type = MLX5_FLOW_TYPE_VERBS;
11954 		if (priv->sh->cdev->config.devx && priv->sh->config.dv_flow_en)
11955 			type = MLX5_FLOW_TYPE_DV;
11956 	}
11957 	fops = flow_get_drv_ops(type);
11958 	if (fops->discover_priorities == NULL) {
11959 		DRV_LOG(ERR, "Priority discovery not supported");
11960 		rte_errno = ENOTSUP;
11961 		return -rte_errno;
11962 	}
11963 	ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
11964 	if (ret < 0)
11965 		return ret;
11966 	switch (ret) {
11967 	case 8:
11968 		ret = RTE_DIM(priority_map_3);
11969 		break;
11970 	case 16:
11971 		ret = RTE_DIM(priority_map_5);
11972 		break;
11973 	default:
11974 		rte_errno = ENOTSUP;
11975 		DRV_LOG(ERR,
11976 			"port %u maximum priority: %d expected 8/16",
11977 			dev->data->port_id, ret);
11978 		return -rte_errno;
11979 	}
11980 	DRV_LOG(INFO, "port %u supported flow priorities:"
11981 		" 0-%d for ingress or egress root table,"
11982 		" 0-%d for non-root table or transfer root table.",
11983 		dev->data->port_id, ret - 2,
11984 		MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
11985 	return ret;
11986 }
11987 
11988 /**
11989  * Adjust flow priority based on the highest layer and the request priority.
11990  *
11991  * @param[in] dev
11992  *   Pointer to the Ethernet device structure.
11993  * @param[in] priority
11994  *   The rule base priority.
11995  * @param[in] subpriority
11996  *   The priority based on the items.
11997  *
11998  * @return
11999  *   The new priority.
12000  */
12001 uint32_t
12002 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
12003 			  uint32_t subpriority)
12004 {
12005 	uint32_t res = 0;
12006 	struct mlx5_priv *priv = dev->data->dev_private;
12007 
12008 	switch (priv->sh->flow_max_priority) {
12009 	case RTE_DIM(priority_map_3):
12010 		res = priority_map_3[priority][subpriority];
12011 		break;
12012 	case RTE_DIM(priority_map_5):
12013 		res = priority_map_5[priority][subpriority];
12014 		break;
12015 	}
12016 	return  res;
12017 }
12018 
12019 /**
12020  * Get the priority for sending traffic to kernel table.
12021  *
12022  * @param[in] dev
12023  *   Pointer to the Ethernet device structure.
12024  *
12025  * @return
12026  *   On success: the value of priority for sending traffic to kernel table
12027  *   On failure: -1
12028  */
12029 uint32_t
12030 mlx5_get_send_to_kernel_priority(struct rte_eth_dev *dev)
12031 {
12032 	struct mlx5_priv *priv = dev->data->dev_private;
12033 	uint32_t res;
12034 
12035 	switch (priv->sh->flow_max_priority) {
12036 	case RTE_DIM(priority_map_5):
12037 		res = 15;
12038 		break;
12039 	case RTE_DIM(priority_map_3):
12040 		res = 7;
12041 		break;
12042 	default:
12043 		DRV_LOG(ERR,
12044 			"port %u maximum priority: %d expected 8/16",
12045 			dev->data->port_id, priv->sh->flow_max_priority);
12046 		res = (uint32_t)-1;
12047 	}
12048 	return res;
12049 }
12050 
12051 /**
12052  * Get the E-Switch Manager vport id.
12053  *
12054  * @param[in] dev
12055  *   Pointer to the Ethernet device structure.
12056  *
12057  * @return
12058  *   The vport id.
12059  */
12060 int16_t mlx5_flow_get_esw_manager_vport_id(struct rte_eth_dev *dev)
12061 {
12062 	struct mlx5_priv *priv = dev->data->dev_private;
12063 	struct mlx5_common_device *cdev = priv->sh->cdev;
12064 
12065 	/* New FW exposes E-Switch Manager vport ID, can use it directly. */
12066 	if (cdev->config.hca_attr.esw_mgr_vport_id_valid)
12067 		return (int16_t)cdev->config.hca_attr.esw_mgr_vport_id;
12068 
12069 	if (priv->pci_dev == NULL)
12070 		return 0;
12071 	switch (priv->pci_dev->id.device_id) {
12072 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD:
12073 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD2:
12074 	case PCI_DEVICE_ID_MELLANOX_BLUEFIELD3:
12075 	/*
12076 	 * In old FW which doesn't expose the E-Switch Manager vport ID in the capability,
12077 	 * only the BF embedded CPUs control the E-Switch Manager port. Hence,
12078 	 * ECPF vport ID is selected and not the host port (0) in any BF case.
12079 	 */
12080 		return (int16_t)MLX5_ECPF_VPORT_ID;
12081 	default:
12082 		return MLX5_PF_VPORT_ID;
12083 	}
12084 }
12085 
12086 /**
12087  * Parse item to get the vport id.
12088  *
12089  * @param[in] dev
12090  *   Pointer to the Ethernet device structure.
12091  * @param[in] item
12092  *   The src port id match item.
12093  * @param[out] vport_id
12094  *   Pointer to put the vport id.
12095  * @param[out] all_ports
12096  *   Indicate if the item matches all ports.
12097  * @param[out] error
12098  *   Pointer to error structure.
12099  *
12100  * @return
12101  *   0 on success, a negative errno value otherwise and rte_errno is set.
12102  */
12103 int mlx5_flow_get_item_vport_id(struct rte_eth_dev *dev,
12104 				const struct rte_flow_item *item,
12105 				uint16_t *vport_id,
12106 				bool *all_ports,
12107 				struct rte_flow_error *error)
12108 {
12109 	struct mlx5_priv *port_priv;
12110 	const struct rte_flow_item_port_id *pid_v = NULL;
12111 	const struct rte_flow_item_ethdev *dev_v = NULL;
12112 	uint32_t esw_mgr_port;
12113 	uint32_t src_port;
12114 
12115 	if (all_ports)
12116 		*all_ports = false;
12117 	switch (item->type) {
12118 	case RTE_FLOW_ITEM_TYPE_PORT_ID:
12119 		pid_v = item->spec;
12120 		if (!pid_v)
12121 			return 0;
12122 		src_port = pid_v->id;
12123 		esw_mgr_port = MLX5_PORT_ESW_MGR;
12124 		break;
12125 	case RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT:
12126 		dev_v = item->spec;
12127 		if (!dev_v) {
12128 			if (all_ports)
12129 				*all_ports = true;
12130 			return 0;
12131 		}
12132 		src_port = dev_v->port_id;
12133 		esw_mgr_port = MLX5_REPRESENTED_PORT_ESW_MGR;
12134 		break;
12135 	case RTE_FLOW_ITEM_TYPE_PORT_REPRESENTOR:
12136 		src_port = MLX5_REPRESENTED_PORT_ESW_MGR;
12137 		esw_mgr_port = MLX5_REPRESENTED_PORT_ESW_MGR;
12138 		break;
12139 	default:
12140 		return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
12141 					  NULL, "Incorrect item type.");
12142 	}
12143 	if (src_port == esw_mgr_port) {
12144 		*vport_id = mlx5_flow_get_esw_manager_vport_id(dev);
12145 	} else {
12146 		port_priv = mlx5_port_to_eswitch_info(src_port, false);
12147 		if (!port_priv)
12148 			return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
12149 						  NULL, "Failed to get port info.");
12150 		*vport_id = port_priv->representor_id;
12151 	}
12152 
12153 	return 0;
12154 }
12155 
12156 int
12157 mlx5_flow_pick_transfer_proxy(struct rte_eth_dev *dev,
12158 			      uint16_t *proxy_port_id,
12159 			      struct rte_flow_error *error)
12160 {
12161 	const struct mlx5_priv *priv = dev->data->dev_private;
12162 	uint16_t port_id;
12163 
12164 	if (!priv->sh->config.dv_esw_en)
12165 		return rte_flow_error_set(error, EINVAL,
12166 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
12167 					  NULL,
12168 					  "unable to provide a proxy port"
12169 					  " without E-Switch configured");
12170 	if (!priv->master && !priv->representor)
12171 		return rte_flow_error_set(error, EINVAL,
12172 					  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
12173 					  NULL,
12174 					  "unable to provide a proxy port"
12175 					  " for port which is not a master"
12176 					  " or a representor port");
12177 	if (priv->master) {
12178 		*proxy_port_id = dev->data->port_id;
12179 		return 0;
12180 	}
12181 	MLX5_ETH_FOREACH_DEV(port_id, dev->device) {
12182 		const struct rte_eth_dev *port_dev = &rte_eth_devices[port_id];
12183 		const struct mlx5_priv *port_priv = port_dev->data->dev_private;
12184 
12185 		if (port_priv->master &&
12186 		    port_priv->domain_id == priv->domain_id) {
12187 			*proxy_port_id = port_id;
12188 			return 0;
12189 		}
12190 	}
12191 	return rte_flow_error_set(error, ENODEV,
12192 				  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
12193 				  NULL, "unable to find a proxy port");
12194 }
12195